348 files changed, 12201 insertions, 13053 deletions
diff --git a/arch/alpha/kernel/Makefile b/arch/alpha/kernel/Makefile
index dccf05245d4d..ac706c1d7ada 100644
--- a/arch/alpha/kernel/Makefile
+++ b/arch/alpha/kernel/Makefile
@@ -7,7 +7,7 @@ EXTRA_AFLAGS	:= $(KBUILD_CFLAGS)
 EXTRA_CFLAGS	:= -Werror -Wno-sign-compare
 
 obj-y    := entry.o traps.o process.o init_task.o osf_sys.o irq.o \
-	    irq_alpha.o signal.o setup.o ptrace.o time.o semaphore.o \
+	    irq_alpha.o signal.o setup.o ptrace.o time.o \
 	    alpha_ksyms.o systbls.o err_common.o io.o
 
 obj-$(CONFIG_VGA_HOSE)	+= console.o
diff --git a/arch/alpha/kernel/alpha_ksyms.c b/arch/alpha/kernel/alpha_ksyms.c
index e9762a33b043..d96e742d4dc2 100644
--- a/arch/alpha/kernel/alpha_ksyms.c
+++ b/arch/alpha/kernel/alpha_ksyms.c
@@ -77,15 +77,6 @@ EXPORT_SYMBOL(__do_clear_user);
 EXPORT_SYMBOL(__strncpy_from_user);
 EXPORT_SYMBOL(__strnlen_user);
 
-/* Semaphore helper functions.  */
-EXPORT_SYMBOL(__down_failed);
-EXPORT_SYMBOL(__down_failed_interruptible);
-EXPORT_SYMBOL(__up_wakeup);
-EXPORT_SYMBOL(down);
-EXPORT_SYMBOL(down_interruptible);
-EXPORT_SYMBOL(down_trylock);
-EXPORT_SYMBOL(up);
-
 /* 
  * SMP-specific symbols.
  */
diff --git a/arch/alpha/kernel/semaphore.c b/arch/alpha/kernel/semaphore.c
deleted file mode 100644
index 8d2982aa1b8d..000000000000
--- a/arch/alpha/kernel/semaphore.c
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- * Alpha semaphore implementation.
- *
- * (C) Copyright 1996 Linus Torvalds
- * (C) Copyright 1999, 2000 Richard Henderson
- */
-
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-
-/*
- * This is basically the PPC semaphore scheme ported to use
- * the Alpha ll/sc sequences, so see the PPC code for
- * credits.
- */
-
-/*
- * Atomically update sem->count.
- * This does the equivalent of the following:
- *
- *	old_count = sem->count;
- *	tmp = MAX(old_count, 0) + incr;
- *	sem->count = tmp;
- *	return old_count;
- */
-static inline int __sem_update_count(struct semaphore *sem, int incr)
-{
-	long old_count, tmp = 0;
-
-	__asm__ __volatile__(
-	"1:	ldl_l	%0,%2\n"
-	"	cmovgt	%0,%0,%1\n"
-	"	addl	%1,%3,%1\n"
-	"	stl_c	%1,%2\n"
-	"	beq	%1,2f\n"
-	"	mb\n"
-	".subsection 2\n"
-	"2:	br	1b\n"
-	".previous"
-	: "=&r" (old_count), "=&r" (tmp), "=m" (sem->count)
-	: "Ir" (incr), "1" (tmp), "m" (sem->count));
-
-	return old_count;
-}
-
-/*
- * Perform the "down" function.  Return zero for semaphore acquired,
- * return negative for signalled out of the function.
- *
- * If called from down, the return is ignored and the wait loop is
- * not interruptible.  This means that a task waiting on a semaphore
- * using "down()" cannot be killed until someone does an "up()" on
- * the semaphore.
- *
- * If called from down_interruptible, the return value gets checked
- * upon return.  If the return value is negative then the task continues
- * with the negative value in the return register (it can be tested by
- * the caller).
- *
- * Either form may be used in conjunction with "up()".
- */
-
-void __sched
-__down_failed(struct semaphore *sem)
-{
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-
-#ifdef CONFIG_DEBUG_SEMAPHORE
-	printk("%s(%d): down failed(%p)\n",
-	       tsk->comm, task_pid_nr(tsk), sem);
-#endif
-
-	tsk->state = TASK_UNINTERRUPTIBLE;
-	wmb();
-	add_wait_queue_exclusive(&sem->wait, &wait);
-
-	/*
-	 * Try to get the semaphore.  If the count is > 0, then we've
-	 * got the semaphore; we decrement count and exit the loop.
-	 * If the count is 0 or negative, we set it to -1, indicating
-	 * that we are asleep, and then sleep.
-	 */
-	while (__sem_update_count(sem, -1) <= 0) {
-		schedule();
-		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-	}
-	remove_wait_queue(&sem->wait, &wait);
-	tsk->state = TASK_RUNNING;
-
-	/*
-	 * If there are any more sleepers, wake one of them up so
-	 * that it can either get the semaphore, or set count to -1
-	 * indicating that there are still processes sleeping.
-	 */
-	wake_up(&sem->wait);
-
-#ifdef CONFIG_DEBUG_SEMAPHORE
-	printk("%s(%d): down acquired(%p)\n",
-	       tsk->comm, task_pid_nr(tsk), sem);
-#endif
-}
-
-int __sched
-__down_failed_interruptible(struct semaphore *sem)
-{
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-	long ret = 0;
-
-#ifdef CONFIG_DEBUG_SEMAPHORE
-	printk("%s(%d): down failed(%p)\n",
-	       tsk->comm, task_pid_nr(tsk), sem);
-#endif
-
-	tsk->state = TASK_INTERRUPTIBLE;
-	wmb();
-	add_wait_queue_exclusive(&sem->wait, &wait);
-
-	while (__sem_update_count(sem, -1) <= 0) {
-		if (signal_pending(current)) {
-			/*
-			 * A signal is pending - give up trying.
-			 * Set sem->count to 0 if it is negative,
-			 * since we are no longer sleeping.
-			 */
-			__sem_update_count(sem, 0);
-			ret = -EINTR;
-			break;
-		}
-		schedule();
-		set_task_state(tsk, TASK_INTERRUPTIBLE);
-	}
-
-	remove_wait_queue(&sem->wait, &wait);
-	tsk->state = TASK_RUNNING;
-	wake_up(&sem->wait);
-
-#ifdef CONFIG_DEBUG_SEMAPHORE
-	printk("%s(%d): down %s(%p)\n",
-	       current->comm, task_pid_nr(current),
-	       (ret < 0 ? "interrupted" : "acquired"), sem);
-#endif
-	return ret;
-}
-
-void
-__up_wakeup(struct semaphore *sem)
-{
-	/*
-	 * Note that we incremented count in up() before we came here,
-	 * but that was ineffective since the result was <= 0, and
-	 * any negative value of count is equivalent to 0.
-	 * This ends up setting count to 1, unless count is now > 0
-	 * (i.e. because some other cpu has called up() in the meantime),
-	 * in which case we just increment count.
-	 */
-	__sem_update_count(sem, 1);
-	wake_up(&sem->wait);
-}
-
-void __sched
-down(struct semaphore *sem)
-{
-#ifdef WAITQUEUE_DEBUG
-	CHECK_MAGIC(sem->__magic);
-#endif
-#ifdef CONFIG_DEBUG_SEMAPHORE
-	printk("%s(%d): down(%p) <count=%d> from %p\n",
-	       current->comm, task_pid_nr(current), sem,
-	       atomic_read(&sem->count), __builtin_return_address(0));
-#endif
-	__down(sem);
-}
-
-int __sched
-down_interruptible(struct semaphore *sem)
-{
-#ifdef WAITQUEUE_DEBUG
-	CHECK_MAGIC(sem->__magic);
-#endif
-#ifdef CONFIG_DEBUG_SEMAPHORE
-	printk("%s(%d): down(%p) <count=%d> from %p\n",
-	       current->comm, task_pid_nr(current), sem,
-	       atomic_read(&sem->count), __builtin_return_address(0));
-#endif
-	return __down_interruptible(sem);
-}
-
-int
-down_trylock(struct semaphore *sem)
-{
-	int ret;
-
-#ifdef WAITQUEUE_DEBUG
-	CHECK_MAGIC(sem->__magic);
-#endif
-
-	ret = __down_trylock(sem);
-
-#ifdef CONFIG_DEBUG_SEMAPHORE
-	printk("%s(%d): down_trylock %s from %p\n",
-	       current->comm, task_pid_nr(current),
-	       ret ? "failed" : "acquired",
-	       __builtin_return_address(0));
-#endif
-
-	return ret;
-}
-
-void
-up(struct semaphore *sem)
-{
-#ifdef WAITQUEUE_DEBUG
-	CHECK_MAGIC(sem->__magic);
-#endif
-#ifdef CONFIG_DEBUG_SEMAPHORE
-	printk("%s(%d): up(%p) <count=%d> from %p\n",
-	       current->comm, task_pid_nr(current), sem,
-	       atomic_read(&sem->count), __builtin_return_address(0));
-#endif
-	__up(sem);
-}
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 00d44c6fbfe9..6235f72a14f0 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -7,7 +7,7 @@ AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
 # Object file lists.
 
 obj-y		:= compat.o entry-armv.o entry-common.o irq.o \
-		   process.o ptrace.o semaphore.o setup.o signal.o \
+		   process.o ptrace.o setup.o signal.o \
 		   sys_arm.o stacktrace.o time.o traps.o
 
 obj-$(CONFIG_ISA_DMA_API)	+= dma.o
diff --git a/arch/arm/kernel/semaphore.c b/arch/arm/kernel/semaphore.c
deleted file mode 100644
index 981fe5c6ccbe..000000000000
--- a/arch/arm/kernel/semaphore.c
+++ /dev/null
@@ -1,221 +0,0 @@
-/*
- *  ARM semaphore implementation, taken from
- *
- *  i386 semaphore implementation.
- *
- *  (C) Copyright 1999 Linus Torvalds
- *
- *  Modified for ARM by Russell King
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-
-#include <asm/semaphore.h>
-
-/*
- * Semaphores are implemented using a two-way counter:
- * The "count" variable is decremented for each process
- * that tries to acquire the semaphore, while the "sleeping"
- * variable is a count of such acquires.
- *
- * Notably, the inline "up()" and "down()" functions can
- * efficiently test if they need to do any extra work (up
- * needs to do something only if count was negative before
- * the increment operation.
- *
- * "sleeping" and the contention routine ordering is
- * protected by the semaphore spinlock.
- *
- * Note that these functions are only called when there is
- * contention on the lock, and as such all this is the
- * "non-critical" part of the whole semaphore business. The
- * critical part is the inline stuff in <asm/semaphore.h>
- * where we want to avoid any extra jumps and calls.
- */
-
-/*
- * Logic:
- *  - only on a boundary condition do we need to care. When we go
- *    from a negative count to a non-negative, we wake people up.
- *  - when we go from a non-negative count to a negative do we
- *    (a) synchronize with the "sleeper" count and (b) make sure
- *    that we're on the wakeup list before we synchronize so that
- *    we cannot lose wakeup events.
- */
-
-void __up(struct semaphore *sem)
-{
-	wake_up(&sem->wait);
-}
-
-static DEFINE_SPINLOCK(semaphore_lock);
-
-void __sched __down(struct semaphore * sem)
-{
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-	tsk->state = TASK_UNINTERRUPTIBLE;
-	add_wait_queue_exclusive(&sem->wait, &wait);
-
-	spin_lock_irq(&semaphore_lock);
-	sem->sleepers++;
-	for (;;) {
-		int sleepers = sem->sleepers;
-
-		/*
-		 * Add "everybody else" into it. They aren't
-		 * playing, because we own the spinlock.
-		 */
-		if (!atomic_add_negative(sleepers - 1, &sem->count)) {
-			sem->sleepers = 0;
-			break;
-		}
-		sem->sleepers = 1;	/* us - see -1 above */
-		spin_unlock_irq(&semaphore_lock);
-
-		schedule();
-		tsk->state = TASK_UNINTERRUPTIBLE;
-		spin_lock_irq(&semaphore_lock);
-	}
-	spin_unlock_irq(&semaphore_lock);
-	remove_wait_queue(&sem->wait, &wait);
-	tsk->state = TASK_RUNNING;
-	wake_up(&sem->wait);
-}
-
-int __sched __down_interruptible(struct semaphore * sem)
-{
-	int retval = 0;
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-	tsk->state = TASK_INTERRUPTIBLE;
-	add_wait_queue_exclusive(&sem->wait, &wait);
-
-	spin_lock_irq(&semaphore_lock);
-	sem->sleepers ++;
-	for (;;) {
-		int sleepers = sem->sleepers;
-
-		/*
-		 * With signals pending, this turns into
-		 * the trylock failure case - we won't be
-		 * sleeping, and we* can't get the lock as
-		 * it has contention. Just correct the count
-		 * and exit.
-		 */
-		if (signal_pending(current)) {
-			retval = -EINTR;
-			sem->sleepers = 0;
-			atomic_add(sleepers, &sem->count);
-			break;
-		}
-
-		/*
-		 * Add "everybody else" into it. They aren't
-		 * playing, because we own the spinlock. The
-		 * "-1" is because we're still hoping to get
-		 * the lock.
-		 */
-		if (!atomic_add_negative(sleepers - 1, &sem->count)) {
-			sem->sleepers = 0;
-			break;
-		}
-		sem->sleepers = 1;	/* us - see -1 above */
-		spin_unlock_irq(&semaphore_lock);
-
-		schedule();
-		tsk->state = TASK_INTERRUPTIBLE;
-		spin_lock_irq(&semaphore_lock);
-	}
-	spin_unlock_irq(&semaphore_lock);
-	tsk->state = TASK_RUNNING;
-	remove_wait_queue(&sem->wait, &wait);
-	wake_up(&sem->wait);
-	return retval;
-}
-
-/*
- * Trylock failed - make sure we correct for
- * having decremented the count.
- *
- * We could have done the trylock with a
- * single "cmpxchg" without failure cases,
- * but then it wouldn't work on a 386.
- */
-int __down_trylock(struct semaphore * sem)
-{
-	int sleepers;
-	unsigned long flags;
-
-	spin_lock_irqsave(&semaphore_lock, flags);
-	sleepers = sem->sleepers + 1;
-	sem->sleepers = 0;
-
-	/*
-	 * Add "everybody else" and us into it. They aren't
-	 * playing, because we own the spinlock.
-	 */
-	if (!atomic_add_negative(sleepers, &sem->count))
-		wake_up(&sem->wait);
-
-	spin_unlock_irqrestore(&semaphore_lock, flags);
-	return 1;
-}
-
-/*
- * The semaphore operations have a special calling sequence that
- * allow us to do a simpler in-line version of them. These routines
- * need to convert that sequence back into the C sequence when
- * there is contention on the semaphore.
- *
- * ip contains the semaphore pointer on entry. Save the C-clobbered
- * registers (r0 to r3 and lr), but not ip, as we use it as a return
- * value in some cases..
- * To remain AAPCS compliant (64-bit stack align) we save r4 as well.
- */
-asm("	.section .sched.text,\"ax\",%progbits	\n\
-	.align	5				\n\
-	.globl	__down_failed			\n\
-__down_failed:					\n\
-	stmfd	sp!, {r0 - r4, lr}		\n\
-	mov	r0, ip				\n\
-	bl	__down				\n\
-	ldmfd	sp!, {r0 - r4, pc}		\n\
-						\n\
-	.align	5				\n\
-	.globl	__down_interruptible_failed	\n\
-__down_interruptible_failed:			\n\
-	stmfd	sp!, {r0 - r4, lr}		\n\
-	mov	r0, ip				\n\
-	bl	__down_interruptible		\n\
-	mov	ip, r0				\n\
-	ldmfd	sp!, {r0 - r4, pc}		\n\
-						\n\
-	.align	5				\n\
-	.globl	__down_trylock_failed		\n\
-__down_trylock_failed:				\n\
-	stmfd	sp!, {r0 - r4, lr}		\n\
-	mov	r0, ip				\n\
-	bl	__down_trylock			\n\
-	mov	ip, r0				\n\
-	ldmfd	sp!, {r0 - r4, pc}		\n\
-						\n\
-	.align	5				\n\
-	.globl	__up_wakeup			\n\
-__up_wakeup:					\n\
-	stmfd	sp!, {r0 - r4, lr}		\n\
-	mov	r0, ip				\n\
-	bl	__up				\n\
-	ldmfd	sp!, {r0 - r4, pc}		\n\
-	");
-
-EXPORT_SYMBOL(__down_failed);
-EXPORT_SYMBOL(__down_interruptible_failed);
-EXPORT_SYMBOL(__down_trylock_failed);
-EXPORT_SYMBOL(__up_wakeup);
diff --git a/arch/avr32/kernel/Makefile b/arch/avr32/kernel/Makefile
index e4b6d122b033..18229d0d1861 100644
--- a/arch/avr32/kernel/Makefile
+++ b/arch/avr32/kernel/Makefile
@@ -6,7 +6,7 @@ extra-y				:= head.o vmlinux.lds
 
 obj-$(CONFIG_SUBARCH_AVR32B)	+= entry-avr32b.o
 obj-y				+= syscall_table.o syscall-stubs.o irq.o
-obj-y				+= setup.o traps.o semaphore.o ocd.o ptrace.o
+obj-y				+= setup.o traps.o ocd.o ptrace.o
 obj-y				+= signal.o sys_avr32.o process.o time.o
 obj-y				+= init_task.o switch_to.o cpu.o
 obj-$(CONFIG_MODULES)		+= module.o avr32_ksyms.o
diff --git a/arch/avr32/kernel/semaphore.c b/arch/avr32/kernel/semaphore.c
deleted file mode 100644
index 1e2705a05016..000000000000
--- a/arch/avr32/kernel/semaphore.c
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * AVR32 sempahore implementation.
- *
- * Copyright (C) 2004-2006 Atmel Corporation
- *
- * Based on linux/arch/i386/kernel/semaphore.c
- *  Copyright (C) 1999 Linus Torvalds
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/sched.h>
-#include <linux/errno.h>
-#include <linux/module.h>
-
-#include <asm/semaphore.h>
-#include <asm/atomic.h>
-
-/*
- * Semaphores are implemented using a two-way counter:
- * The "count" variable is decremented for each process
- * that tries to acquire the semaphore, while the "sleeping"
- * variable is a count of such acquires.
- *
- * Notably, the inline "up()" and "down()" functions can
- * efficiently test if they need to do any extra work (up
- * needs to do something only if count was negative before
- * the increment operation.
- *
- * "sleeping" and the contention routine ordering is protected
- * by the spinlock in the semaphore's waitqueue head.
- *
- * Note that these functions are only called when there is
- * contention on the lock, and as such all this is the
- * "non-critical" part of the whole semaphore business. The
- * critical part is the inline stuff in <asm/semaphore.h>
- * where we want to avoid any extra jumps and calls.
- */
-
-/*
- * Logic:
- *  - only on a boundary condition do we need to care. When we go
- *    from a negative count to a non-negative, we wake people up.
- *  - when we go from a non-negative count to a negative do we
- *    (a) synchronize with the "sleeper" count and (b) make sure
- *    that we're on the wakeup list before we synchronize so that
- *    we cannot lose wakeup events.
- */
-
-void __up(struct semaphore *sem)
-{
-	wake_up(&sem->wait);
-}
-EXPORT_SYMBOL(__up);
-
-void __sched __down(struct semaphore *sem)
-{
-	struct task_struct *tsk = current;
-        DECLARE_WAITQUEUE(wait, tsk);
-        unsigned long flags;
-
-        tsk->state = TASK_UNINTERRUPTIBLE;
-        spin_lock_irqsave(&sem->wait.lock, flags);
-        add_wait_queue_exclusive_locked(&sem->wait, &wait);
-
-        sem->sleepers++;
-        for (;;) {
-                int sleepers = sem->sleepers;
-
-                /*
-                 * Add "everybody else" into it. They aren't
-                 * playing, because we own the spinlock in
-                 * the wait_queue_head.
-                 */
-                if (atomic_add_return(sleepers - 1, &sem->count) >= 0) {
-                        sem->sleepers = 0;
-                        break;
-                }
-                sem->sleepers = 1;      /* us - see -1 above */
-                spin_unlock_irqrestore(&sem->wait.lock, flags);
-
-                schedule();
-
-                spin_lock_irqsave(&sem->wait.lock, flags);
-                tsk->state = TASK_UNINTERRUPTIBLE;
-        }
-        remove_wait_queue_locked(&sem->wait, &wait);
-        wake_up_locked(&sem->wait);
-        spin_unlock_irqrestore(&sem->wait.lock, flags);
-        tsk->state = TASK_RUNNING;
-}
-EXPORT_SYMBOL(__down);
-
-int __sched __down_interruptible(struct semaphore *sem)
-{
-	int retval = 0;
-	struct task_struct *tsk = current;
-        DECLARE_WAITQUEUE(wait, tsk);
-        unsigned long flags;
-
-        tsk->state = TASK_INTERRUPTIBLE;
-        spin_lock_irqsave(&sem->wait.lock, flags);
-        add_wait_queue_exclusive_locked(&sem->wait, &wait);
-
-        sem->sleepers++;
-        for (;;) {
-                int sleepers = sem->sleepers;
-
-		/*
-		 * With signals pending, this turns into the trylock
-		 * failure case - we won't be sleeping, and we can't
-		 * get the lock as it has contention. Just correct the
-		 * count and exit.
-		 */
-		if (signal_pending(current)) {
-			retval = -EINTR;
-			sem->sleepers = 0;
-			atomic_add(sleepers, &sem->count);
-			break;
-		}
-
-                /*
-                 * Add "everybody else" into it. They aren't
-                 * playing, because we own the spinlock in
-                 * the wait_queue_head.
-                 */
-                if (atomic_add_return(sleepers - 1, &sem->count) >= 0) {
-                        sem->sleepers = 0;
-                        break;
-                }
-                sem->sleepers = 1;      /* us - see -1 above */
-                spin_unlock_irqrestore(&sem->wait.lock, flags);
-
-                schedule();
-
-                spin_lock_irqsave(&sem->wait.lock, flags);
-                tsk->state = TASK_INTERRUPTIBLE;
-        }
-        remove_wait_queue_locked(&sem->wait, &wait);
-        wake_up_locked(&sem->wait);
-        spin_unlock_irqrestore(&sem->wait.lock, flags);
-
-        tsk->state = TASK_RUNNING;
-	return retval;
-}
-EXPORT_SYMBOL(__down_interruptible);
diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index 589c6aca4803..2dd1f300a5cf 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -31,10 +31,6 @@ config ZONE_DMA
 	bool
 	default y
 
-config SEMAPHORE_SLEEPERS
-	bool
-	default y
-
 config GENERIC_FIND_NEXT_BIT
 	bool
 	default y
diff --git a/arch/blackfin/kernel/bfin_ksyms.c b/arch/blackfin/kernel/bfin_ksyms.c
index 0bfbb269e350..053edff6c0d8 100644
--- a/arch/blackfin/kernel/bfin_ksyms.c
+++ b/arch/blackfin/kernel/bfin_ksyms.c
@@ -42,11 +42,6 @@ EXPORT_SYMBOL(ip_fast_csum);
 
 EXPORT_SYMBOL(kernel_thread);
 
-EXPORT_SYMBOL(__up);
-EXPORT_SYMBOL(__down);
-EXPORT_SYMBOL(__down_trylock);
-EXPORT_SYMBOL(__down_interruptible);
-
 EXPORT_SYMBOL(is_in_rom);
 EXPORT_SYMBOL(bfin_return_from_exception);
 
diff --git a/arch/cris/kernel/Makefile b/arch/cris/kernel/Makefile
index c8e8ea570989..ee7bcd4d20b2 100644
--- a/arch/cris/kernel/Makefile
+++ b/arch/cris/kernel/Makefile
@@ -5,8 +5,7 @@
 
 extra-y	:= vmlinux.lds
 
-obj-y   := process.o traps.o irq.o ptrace.o setup.o \
-	   time.o sys_cris.o semaphore.o
+obj-y   := process.o traps.o irq.o ptrace.o setup.o time.o sys_cris.o
 
 obj-$(CONFIG_MODULES)    += crisksyms.o
 obj-$(CONFIG_MODULES)	 += module.o
diff --git a/arch/cris/kernel/crisksyms.c b/arch/cris/kernel/crisksyms.c
index 62f0e752915a..7ac000f6a888 100644
--- a/arch/cris/kernel/crisksyms.c
+++ b/arch/cris/kernel/crisksyms.c
@@ -9,7 +9,6 @@
 #include <linux/string.h>
 #include <linux/tty.h>
 
-#include <asm/semaphore.h>
 #include <asm/processor.h>
 #include <asm/uaccess.h>
 #include <asm/checksum.h>
@@ -49,12 +48,6 @@ EXPORT_SYMBOL(__negdi2);
 EXPORT_SYMBOL(__ioremap);
 EXPORT_SYMBOL(iounmap);
 
-/* Semaphore functions */
-EXPORT_SYMBOL(__up);
-EXPORT_SYMBOL(__down);
-EXPORT_SYMBOL(__down_interruptible);
-EXPORT_SYMBOL(__down_trylock);
-
 /* Userspace access functions */
 EXPORT_SYMBOL(__copy_user_zeroing);
 EXPORT_SYMBOL(__copy_user);
diff --git a/arch/cris/kernel/semaphore.c b/arch/cris/kernel/semaphore.c
deleted file mode 100644
index f137a439041f..000000000000
--- a/arch/cris/kernel/semaphore.c
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- *  Generic semaphore code. Buyer beware. Do your own
- * specific changes in <asm/semaphore-helper.h>
- */
-
-#include <linux/sched.h>
-#include <asm/semaphore-helper.h>
-
-/*
- * Semaphores are implemented using a two-way counter:
- * The "count" variable is decremented for each process
- * that tries to sleep, while the "waking" variable is
- * incremented when the "up()" code goes to wake up waiting
- * processes.
- *
- * Notably, the inline "up()" and "down()" functions can
- * efficiently test if they need to do any extra work (up
- * needs to do something only if count was negative before
- * the increment operation.
- *
- * waking_non_zero() (from asm/semaphore.h) must execute
- * atomically.
- *
- * When __up() is called, the count was negative before
- * incrementing it, and we need to wake up somebody.
- *
- * This routine adds one to the count of processes that need to
- * wake up and exit.  ALL waiting processes actually wake up but
- * only the one that gets to the "waking" field first will gate
- * through and acquire the semaphore.  The others will go back
- * to sleep.
- *
- * Note that these functions are only called when there is
- * contention on the lock, and as such all this is the
- * "non-critical" part of the whole semaphore business. The
- * critical part is the inline stuff in <asm/semaphore.h>
- * where we want to avoid any extra jumps and calls.
- */
-void __up(struct semaphore *sem)
-{
-	wake_one_more(sem);
-	wake_up(&sem->wait);
-}
-
-/*
- * Perform the "down" function.  Return zero for semaphore acquired,
- * return negative for signalled out of the function.
- *
- * If called from __down, the return is ignored and the wait loop is
- * not interruptible.  This means that a task waiting on a semaphore
- * using "down()" cannot be killed until someone does an "up()" on
- * the semaphore.
- *
- * If called from __down_interruptible, the return value gets checked
- * upon return.  If the return value is negative then the task continues
- * with the negative value in the return register (it can be tested by
- * the caller).
- *
- * Either form may be used in conjunction with "up()".
- *
- */
-
-#define DOWN_VAR				\
-	struct task_struct *tsk = current;	\
-	wait_queue_t wait;			\
-	init_waitqueue_entry(&wait, tsk);
-
-#define DOWN_HEAD(task_state)						\
-									\
-									\
-	tsk->state = (task_state);					\
-	add_wait_queue(&sem->wait, &wait);				\
-									\
-	/*								\
-	 * Ok, we're set up.  sem->count is known to be less than zero	\
-	 * so we must wait.						\
-	 *								\
-	 * We can let go the lock for purposes of waiting.		\
-	 * We re-acquire it after awaking so as to protect		\
-	 * all semaphore operations.					\
-	 *								\
-	 * If "up()" is called before we call waking_non_zero() then	\
-	 * we will catch it right away.  If it is called later then	\
-	 * we will have to go through a wakeup cycle to catch it.	\
-	 *								\
-	 * Multiple waiters contend for the semaphore lock to see	\
-	 * who gets to gate through and who has to wait some more.	\
-	 */								\
-	for (;;) {
-
-#define DOWN_TAIL(task_state)			\
-		tsk->state = (task_state);	\
-	}					\
-	tsk->state = TASK_RUNNING;		\
-	remove_wait_queue(&sem->wait, &wait);
-
-void __sched __down(struct semaphore * sem)
-{
-	DOWN_VAR
-	DOWN_HEAD(TASK_UNINTERRUPTIBLE)
-	if (waking_non_zero(sem))
-		break;
-	schedule();
-	DOWN_TAIL(TASK_UNINTERRUPTIBLE)
-}
-
-int __sched __down_interruptible(struct semaphore * sem)
-{
-	int ret = 0;
-	DOWN_VAR
-	DOWN_HEAD(TASK_INTERRUPTIBLE)
-
-	ret = waking_non_zero_interruptible(sem, tsk);
-	if (ret)
-	{
-		if (ret == 1)
-			/* ret != 0 only if we get interrupted -arca */
-			ret = 0;
-		break;
-	}
-	schedule();
-	DOWN_TAIL(TASK_INTERRUPTIBLE)
-	return ret;
-}
-
-int __down_trylock(struct semaphore * sem)
-{
-	return waking_non_zero_trylock(sem);
-}
diff --git a/arch/frv/kernel/Makefile b/arch/frv/kernel/Makefile
index e8f73ed28b52..c36f70b6699a 100644
--- a/arch/frv/kernel/Makefile
+++ b/arch/frv/kernel/Makefile
@@ -9,7 +9,7 @@ extra-y:= head.o init_task.o vmlinux.lds
 
 obj-y := $(heads-y) entry.o entry-table.o break.o switch_to.o kernel_thread.o \
 	 kernel_execve.o process.o traps.o ptrace.o signal.o dma.o \
-	 sys_frv.o time.o semaphore.o setup.o frv_ksyms.o \
+	 sys_frv.o time.o setup.o frv_ksyms.o \
 	 debug-stub.o irq.o sleep.o uaccess.o
 
 obj-$(CONFIG_GDBSTUB)		+= gdb-stub.o gdb-io.o
diff --git a/arch/frv/kernel/frv_ksyms.c b/arch/frv/kernel/frv_ksyms.c
index f772704b3d28..0316b3c50eff 100644
--- a/arch/frv/kernel/frv_ksyms.c
+++ b/arch/frv/kernel/frv_ksyms.c
@@ -12,7 +12,6 @@
 #include <asm/pgalloc.h>
 #include <asm/irq.h>
 #include <asm/io.h>
-#include <asm/semaphore.h>
 #include <asm/checksum.h>
 #include <asm/hardirq.h>
 #include <asm/cacheflush.h>
diff --git a/arch/frv/kernel/semaphore.c b/arch/frv/kernel/semaphore.c
deleted file mode 100644
index 7ee3a147b471..000000000000
--- a/arch/frv/kernel/semaphore.c
+++ /dev/null
@@ -1,155 +0,0 @@
-/* semaphore.c: FR-V semaphores
- *
- * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- * - Derived from lib/rwsem-spinlock.c
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/sched.h>
-#include <linux/module.h>
-#include <asm/semaphore.h>
-
-struct sem_waiter {
-	struct list_head	list;
-	struct task_struct	*task;
-};
-
-#ifdef CONFIG_DEBUG_SEMAPHORE
-void semtrace(struct semaphore *sem, const char *str)
-{
-	if (sem->debug)
-		printk("[%d] %s({%d,%d})\n",
-		       current->pid,
-		       str,
-		       sem->counter,
-		       list_empty(&sem->wait_list) ? 0 : 1);
-}
-#else
-#define semtrace(SEM,STR) do { } while(0)
-#endif
-
-/*
- * wait for a token to be granted from a semaphore
- * - entered with lock held and interrupts disabled
- */
-void __down(struct semaphore *sem, unsigned long flags)
-{
-	struct task_struct *tsk = current;
-	struct sem_waiter waiter;
-
-	semtrace(sem, "Entering __down");
-
-	/* set up my own style of waitqueue */
-	waiter.task = tsk;
-	get_task_struct(tsk);
-
-	list_add_tail(&waiter.list, &sem->wait_list);
-
-	/* we don't need to touch the semaphore struct anymore */
-	spin_unlock_irqrestore(&sem->wait_lock, flags);
-
-	/* wait to be given the semaphore */
-	set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-
-	for (;;) {
-		if (list_empty(&waiter.list))
-			break;
-		schedule();
-		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-	}
-
-	tsk->state = TASK_RUNNING;
-	semtrace(sem, "Leaving __down");
-}
-
-EXPORT_SYMBOL(__down);
-
-/*
- * interruptibly wait for a token to be granted from a semaphore
- * - entered with lock held and interrupts disabled
- */
-int __down_interruptible(struct semaphore *sem, unsigned long flags)
-{
-	struct task_struct *tsk = current;
-	struct sem_waiter waiter;
-	int ret;
-
-	semtrace(sem,"Entering __down_interruptible");
-
-	/* set up my own style of waitqueue */
-	waiter.task = tsk;
-	get_task_struct(tsk);
-
-	list_add_tail(&waiter.list, &sem->wait_list);
-
-	/* we don't need to touch the semaphore struct anymore */
-	set_task_state(tsk, TASK_INTERRUPTIBLE);
-
-	spin_unlock_irqrestore(&sem->wait_lock, flags);
-
-	/* wait to be given the semaphore */
-	ret = 0;
-	for (;;) {
-		if (list_empty(&waiter.list))
-			break;
-		if (unlikely(signal_pending(current)))
-			goto interrupted;
-		schedule();
-		set_task_state(tsk, TASK_INTERRUPTIBLE);
-	}
-
- out:
-	tsk->state = TASK_RUNNING;
-	semtrace(sem, "Leaving __down_interruptible");
-	return ret;
-
- interrupted:
-	spin_lock_irqsave(&sem->wait_lock, flags);
-
-	if (!list_empty(&waiter.list)) {
-		list_del(&waiter.list);
-		ret = -EINTR;
-	}
-
-	spin_unlock_irqrestore(&sem->wait_lock, flags);
-	if (ret == -EINTR)
-		put_task_struct(current);
-	goto out;
-}
-
-EXPORT_SYMBOL(__down_interruptible);
-
-/*
- * release a single token back to a semaphore
- * - entered with lock held and interrupts disabled
- */
-void __up(struct semaphore *sem)
-{
-	struct task_struct *tsk;
-	struct sem_waiter *waiter;
-
-	semtrace(sem,"Entering __up");
-
-	/* grant the token to the process at the front of the queue */
-	waiter = list_entry(sem->wait_list.next, struct sem_waiter, list);
-
-	/* We must be careful not to touch 'waiter' after we set ->task = NULL.
-	 * It is allocated on the waiter's stack and may become invalid at
-	 * any time after that point (due to a wakeup from another source).
-	 */
-	list_del_init(&waiter->list);
-	tsk = waiter->task;
-	mb();
-	waiter->task = NULL;
-	wake_up_process(tsk);
-	put_task_struct(tsk);
-
-	semtrace(sem,"Leaving __up");
-}
-
-EXPORT_SYMBOL(__up);
diff --git a/arch/h8300/kernel/Makefile b/arch/h8300/kernel/Makefile
index 874f6aefee65..6c248c3c5c3b 100644
--- a/arch/h8300/kernel/Makefile
+++ b/arch/h8300/kernel/Makefile
@@ -5,7 +5,7 @@
 extra-y := vmlinux.lds
 
 obj-y := process.o traps.o ptrace.o irq.o \
-	 sys_h8300.o time.o semaphore.o signal.o \
+	 sys_h8300.o time.o signal.o \
          setup.o gpio.o init_task.o syscalls.o \
 	 entry.o
 
diff --git a/arch/h8300/kernel/h8300_ksyms.c b/arch/h8300/kernel/h8300_ksyms.c
index d1b15267ac81..6866bd9c7fb4 100644
--- a/arch/h8300/kernel/h8300_ksyms.c
+++ b/arch/h8300/kernel/h8300_ksyms.c
@@ -12,7 +12,6 @@
 #include <asm/pgalloc.h>
 #include <asm/irq.h>
 #include <asm/io.h>
-#include <asm/semaphore.h>
 #include <asm/checksum.h>
 #include <asm/current.h>
 #include <asm/gpio.h>
diff --git a/arch/h8300/kernel/semaphore.c b/arch/h8300/kernel/semaphore.c
deleted file mode 100644
index d12cbbfe6ebd..000000000000
--- a/arch/h8300/kernel/semaphore.c
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- *  Generic semaphore code. Buyer beware. Do your own
- * specific changes in <asm/semaphore-helper.h>
- */
-
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <asm/semaphore-helper.h>
-
-#ifndef CONFIG_RMW_INSNS
-spinlock_t semaphore_wake_lock;
-#endif
-
-/*
- * Semaphores are implemented using a two-way counter:
- * The "count" variable is decremented for each process
- * that tries to sleep, while the "waking" variable is
- * incremented when the "up()" code goes to wake up waiting
- * processes.
- *
- * Notably, the inline "up()" and "down()" functions can
- * efficiently test if they need to do any extra work (up
- * needs to do something only if count was negative before
- * the increment operation.
- *
- * waking_non_zero() (from asm/semaphore.h) must execute
- * atomically.
- *
- * When __up() is called, the count was negative before
- * incrementing it, and we need to wake up somebody.
- *
- * This routine adds one to the count of processes that need to
- * wake up and exit.  ALL waiting processes actually wake up but
- * only the one that gets to the "waking" field first will gate
- * through and acquire the semaphore.  The others will go back
- * to sleep.
- *
- * Note that these functions are only called when there is
- * contention on the lock, and as such all this is the
- * "non-critical" part of the whole semaphore business. The
- * critical part is the inline stuff in <asm/semaphore.h>
- * where we want to avoid any extra jumps and calls.
- */
-void __up(struct semaphore *sem)
-{
-	wake_one_more(sem);
-	wake_up(&sem->wait);
-}
-
-/*
- * Perform the "down" function.  Return zero for semaphore acquired,
- * return negative for signalled out of the function.
- *
- * If called from __down, the return is ignored and the wait loop is
- * not interruptible.  This means that a task waiting on a semaphore
- * using "down()" cannot be killed until someone does an "up()" on
- * the semaphore.
- *
- * If called from __down_interruptible, the return value gets checked
- * upon return.  If the return value is negative then the task continues
- * with the negative value in the return register (it can be tested by
- * the caller).
- *
- * Either form may be used in conjunction with "up()".
- *
- */
-
-
-#define DOWN_HEAD(task_state)						\
-									\
-									\
-	current->state = (task_state);					\
-	add_wait_queue(&sem->wait, &wait);				\
-									\
-	/*								\
-	 * Ok, we're set up.  sem->count is known to be less than zero	\
-	 * so we must wait.						\
-	 *								\
-	 * We can let go the lock for purposes of waiting.		\
-	 * We re-acquire it after awaking so as to protect		\
-	 * all semaphore operations.					\
-	 *								\
-	 * If "up()" is called before we call waking_non_zero() then	\
-	 * we will catch it right away.  If it is called later then	\
-	 * we will have to go through a wakeup cycle to catch it.	\
-	 *								\
-	 * Multiple waiters contend for the semaphore lock to see	\
-	 * who gets to gate through and who has to wait some more.	\
-	 */								\
-	for (;;) {
-
-#define DOWN_TAIL(task_state)			\
-		current->state = (task_state);	\
-	}					\
-	current->state = TASK_RUNNING;		\
-	remove_wait_queue(&sem->wait, &wait);
-
-void __sched __down(struct semaphore * sem)
-{
-	DECLARE_WAITQUEUE(wait, current);
-
-	DOWN_HEAD(TASK_UNINTERRUPTIBLE)
-	if (waking_non_zero(sem))
-		break;
-	schedule();
-	DOWN_TAIL(TASK_UNINTERRUPTIBLE)
-}
-
-int __sched __down_interruptible(struct semaphore * sem)
-{
-	DECLARE_WAITQUEUE(wait, current);
-	int ret = 0;
-
-	DOWN_HEAD(TASK_INTERRUPTIBLE)
-
-	ret = waking_non_zero_interruptible(sem, current);
-	if (ret)
-	{
-		if (ret == 1)
-			/* ret != 0 only if we get interrupted -arca */
-			ret = 0;
-		break;
-	}
-	schedule();
-	DOWN_TAIL(TASK_INTERRUPTIBLE)
-	return ret;
-}
-
-int __down_trylock(struct semaphore * sem)
-{
-	return waking_non_zero_trylock(sem);
-}
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 8fa3faf5ef1b..ed21737a00c5 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -283,6 +283,17 @@ config FORCE_MAX_ZONEORDER
 	default "17" if HUGETLB_PAGE
 	default "11"
 
+config VIRT_CPU_ACCOUNTING
+	bool "Deterministic task and CPU time accounting"
+	default n
+	help
+	  Select this option to enable more accurate task and CPU time
+	  accounting.  This is done by reading a CPU counter on each
+	  kernel entry and exit and on transitions within the kernel
+	  between system, softirq and hardirq state, so there is a
+	  small performance impact.
+	  If in doubt, say N here.
+
 config SMP
 	bool "Symmetric multi-processing support"
 	help
@@ -611,6 +622,9 @@ config IRQ_PER_CPU
 	bool
 	default y
 
+config IOMMU_HELPER
+	def_bool (IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_GENERIC)
+
 source "arch/ia64/hp/sim/Kconfig"
 
 source "arch/ia64/Kconfig.debug"
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index 523eae6d3e49..9409de5c9441 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -35,6 +35,7 @@
 #include <linux/nodemask.h>
 #include <linux/bitops.h>         /* hweight64() */
 #include <linux/crash_dump.h>
+#include <linux/iommu-helper.h>
 
 #include <asm/delay.h>		/* ia64_get_itc() */
 #include <asm/io.h>
@@ -460,6 +461,13 @@ get_iovp_order (unsigned long size)
 	return order;
 }
 
+static unsigned long ptr_to_pide(struct ioc *ioc, unsigned long *res_ptr,
+				 unsigned int bitshiftcnt)
+{
+	return (((unsigned long)res_ptr - (unsigned long)ioc->res_map) << 3)
+		+ bitshiftcnt;
+}
+
 /**
  * sba_search_bitmap - find free space in IO PDIR resource bitmap
  * @ioc: IO MMU structure which owns the pdir we are interested in.
@@ -471,15 +479,25 @@ get_iovp_order (unsigned long size)
  * Cool perf optimization: search for log2(size) bits at a time.
  */
 static SBA_INLINE unsigned long
-sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint)
+sba_search_bitmap(struct ioc *ioc, struct device *dev,
+		  unsigned long bits_wanted, int use_hint)
 {
 	unsigned long *res_ptr;
 	unsigned long *res_end = (unsigned long *) &(ioc->res_map[ioc->res_size]);
-	unsigned long flags, pide = ~0UL;
+	unsigned long flags, pide = ~0UL, tpide;
+	unsigned long boundary_size;
+	unsigned long shift;
+	int ret;
 
 	ASSERT(((unsigned long) ioc->res_hint & (sizeof(unsigned long) - 1UL)) == 0);
 	ASSERT(res_ptr < res_end);
 
+	boundary_size = (unsigned long long)dma_get_seg_boundary(dev) + 1;
+	boundary_size = ALIGN(boundary_size, 1ULL << iovp_shift) >> iovp_shift;
+
+	BUG_ON(ioc->ibase & ~iovp_mask);
+	shift = ioc->ibase >> iovp_shift;
+
 	spin_lock_irqsave(&ioc->res_lock, flags);
 
 	/* Allow caller to force a search through the entire resource space */
@@ -504,9 +522,7 @@ sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint)
 			if (likely(*res_ptr != ~0UL)) {
 				bitshiftcnt = ffz(*res_ptr);
 				*res_ptr |= (1UL << bitshiftcnt);
-				pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map);
-				pide <<= 3;	/* convert to bit address */
-				pide += bitshiftcnt;
+				pide = ptr_to_pide(ioc, res_ptr, bitshiftcnt);
 				ioc->res_bitshift = bitshiftcnt + bits_wanted;
 				goto found_it;
 			}
@@ -535,11 +551,13 @@ sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint)
 			DBG_RES("    %p %lx %lx\n", res_ptr, mask, *res_ptr);
 			ASSERT(0 != mask);
 			for (; mask ; mask <<= o, bitshiftcnt += o) {
-				if(0 == ((*res_ptr) & mask)) {
+				tpide = ptr_to_pide(ioc, res_ptr, bitshiftcnt);
+				ret = iommu_is_span_boundary(tpide, bits_wanted,
+							     shift,
+							     boundary_size);
+				if ((0 == ((*res_ptr) & mask)) && !ret) {
 					*res_ptr |= mask;     /* mark resources busy! */
-					pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map);
-					pide <<= 3;	/* convert to bit address */
-					pide += bitshiftcnt;
+					pide = tpide;
 					ioc->res_bitshift = bitshiftcnt + bits_wanted;
 					goto found_it;
 				}
@@ -560,6 +578,11 @@ sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint)
 		end = res_end - qwords;
 
 		for (; res_ptr < end; res_ptr++) {
+			tpide = ptr_to_pide(ioc, res_ptr, 0);
+			ret = iommu_is_span_boundary(tpide, bits_wanted,
+						     shift, boundary_size);
+			if (ret)
+				goto next_ptr;
 			for (i = 0 ; i < qwords ; i++) {
 				if (res_ptr[i] != 0)
 					goto next_ptr;
@@ -572,8 +595,7 @@ sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted, int use_hint)
 				res_ptr[i] = ~0UL;
 			res_ptr[i] |= RESMAP_MASK(bits);
 
-			pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map);
-			pide <<= 3;	/* convert to bit address */
+			pide = tpide;
 			res_ptr += qwords;
 			ioc->res_bitshift = bits;
 			goto found_it;
@@ -605,7 +627,7 @@ found_it:
  * resource bit map.
  */
 static int
-sba_alloc_range(struct ioc *ioc, size_t size)
+sba_alloc_range(struct ioc *ioc, struct device *dev, size_t size)
 {
 	unsigned int pages_needed = size >> iovp_shift;
 #ifdef PDIR_SEARCH_TIMING
@@ -622,9 +644,9 @@ sba_alloc_range(struct ioc *ioc, size_t size)
 	/*
 	** "seek and ye shall find"...praying never hurts either...
 	*/
-	pide = sba_search_bitmap(ioc, pages_needed, 1);
+	pide = sba_search_bitmap(ioc, dev, pages_needed, 1);
 	if (unlikely(pide >= (ioc->res_size << 3))) {
-		pide = sba_search_bitmap(ioc, pages_needed, 0);
+		pide = sba_search_bitmap(ioc, dev, pages_needed, 0);
 		if (unlikely(pide >= (ioc->res_size << 3))) {
 #if DELAYED_RESOURCE_CNT > 0
 			unsigned long flags;
@@ -653,7 +675,7 @@ sba_alloc_range(struct ioc *ioc, size_t size)
 			}
 			spin_unlock_irqrestore(&ioc->saved_lock, flags);
 
-			pide = sba_search_bitmap(ioc, pages_needed, 0);
+			pide = sba_search_bitmap(ioc, dev, pages_needed, 0);
 			if (unlikely(pide >= (ioc->res_size << 3)))
 				panic(__FILE__ ": I/O MMU @ %p is out of mapping resources\n",
 				      ioc->ioc_hpa);
@@ -936,7 +958,7 @@ sba_map_single(struct device *dev, void *addr, size_t size, int dir)
 	spin_unlock_irqrestore(&ioc->res_lock, flags);
 #endif
 
-	pide = sba_alloc_range(ioc, size);
+	pide = sba_alloc_range(ioc, dev, size);
 
 	iovp = (dma_addr_t) pide << iovp_shift;
 
@@ -1373,7 +1395,7 @@ sba_coalesce_chunks(struct ioc *ioc, struct device *dev,
 		dma_len = (dma_len + dma_offset + ~iovp_mask) & iovp_mask;
 		ASSERT(dma_len <= DMA_CHUNK_SIZE);
 		dma_sg->dma_address = (dma_addr_t) (PIDE_FLAG
-			| (sba_alloc_range(ioc, dma_len) << iovp_shift)
+			| (sba_alloc_range(ioc, dev, dma_len) << iovp_shift)
 			| dma_offset);
 		n_mappings++;
 	}
diff --git a/arch/ia64/hp/sim/simscsi.c b/arch/ia64/hp/sim/simscsi.c
index 7661bb065fa5..3a078ad3aa44 100644
--- a/arch/ia64/hp/sim/simscsi.c
+++ b/arch/ia64/hp/sim/simscsi.c
@@ -201,22 +201,6 @@ simscsi_readwrite10 (struct scsi_cmnd *sc, int mode)
 	simscsi_sg_readwrite(sc, mode, offset);
 }
 
-static void simscsi_fillresult(struct scsi_cmnd *sc, char *buf, unsigned len)
-{
-
-	int i;
-	unsigned thislen;
-	struct scatterlist *slp;
-
-	scsi_for_each_sg(sc, slp, scsi_sg_count(sc), i) {
-		if (!len)
-			break;
-		thislen = min(len, slp->length);
-		memcpy(sg_virt(slp), buf, thislen);
-		len -= thislen;
-	}
-}
-
 static int
 simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 {
@@ -258,7 +242,7 @@ simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 			buf[6] = 0;	/* reserved */
 			buf[7] = 0;	/* various flags */
 			memcpy(buf + 8, "HP      SIMULATED DISK  0.00",  28);
-			simscsi_fillresult(sc, buf, 36);
+			scsi_sg_copy_from_buffer(sc, buf, 36);
 			sc->result = GOOD;
 			break;
 
@@ -306,14 +290,15 @@ simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 			buf[5] = 0;
 			buf[6] = 2;
 			buf[7] = 0;
-			simscsi_fillresult(sc, buf, 8);
+			scsi_sg_copy_from_buffer(sc, buf, 8);
 			sc->result = GOOD;
 			break;
 
 		      case MODE_SENSE:
 		      case MODE_SENSE_10:
 			/* sd.c uses this to determine whether disk does write-caching. */
-			simscsi_fillresult(sc, (char *)empty_zero_page, scsi_bufflen(sc));
+			scsi_sg_copy_from_buffer(sc, (char *)empty_zero_page,
+						 PAGE_SIZE);
 			sc->result = GOOD;
 			break;
 
diff --git a/arch/ia64/ia32/elfcore32.h b/arch/ia64/ia32/elfcore32.h
index 446c9aac924d..9a3abf58cea3 100644
--- a/arch/ia64/ia32/elfcore32.h
+++ b/arch/ia64/ia32/elfcore32.h
@@ -30,7 +30,19 @@ struct elf_siginfo
 	int	si_errno;			/* errno */
 };
 
-#define jiffies_to_timeval(a,b) do { (b)->tv_usec = 0; (b)->tv_sec = (a)/HZ; }while(0)
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+/*
+ * Hacks are here since types between compat_timeval (= pair of s32) and
+ * ia64-native timeval (= pair of s64) are not compatible, at least a file
+ * arch/ia64/ia32/../../../fs/binfmt_elf.c will get warnings from compiler on
+ * use of cputime_to_timeval(), which usually an alias of jiffies_to_timeval().
+ */
+#define cputime_to_timeval(a,b) \
+	do { (b)->tv_usec = 0; (b)->tv_sec = (a)/NSEC_PER_SEC; } while(0)
+#else
+#define jiffies_to_timeval(a,b) \
+	do { (b)->tv_usec = 0; (b)->tv_sec = (a)/HZ; } while(0)
+#endif
 
 struct elf_prstatus
 {
diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
index b1bf51fe97b4..7e028ceb93ba 100644
--- a/arch/ia64/ia32/sys_ia32.c
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -38,6 +38,7 @@
 #include <linux/eventpoll.h>
 #include <linux/personality.h>
 #include <linux/ptrace.h>
+#include <linux/regset.h>
 #include <linux/stat.h>
 #include <linux/ipc.h>
 #include <linux/capability.h>
@@ -2387,16 +2388,45 @@ get_free_idx (void)
 	return -ESRCH;
 }
 
+static void set_tls_desc(struct task_struct *p, int idx,
+		const struct ia32_user_desc *info, int n)
+{
+	struct thread_struct *t = &p->thread;
+	struct desc_struct *desc = &t->tls_array[idx - GDT_ENTRY_TLS_MIN];
+	int cpu;
+
+	/*
+	 * We must not get preempted while modifying the TLS.
+	 */
+	cpu = get_cpu();
+
+	while (n-- > 0) {
+		if (LDT_empty(info)) {
+			desc->a = 0;
+			desc->b = 0;
+		} else {
+			desc->a = LDT_entry_a(info);
+			desc->b = LDT_entry_b(info);
+		}
+
+		++info;
+		++desc;
+	}
+
+	if (t == &current->thread)
+		load_TLS(t, cpu);
+
+	put_cpu();
+}
+
 /*
  * Set a given TLS descriptor:
  */
 asmlinkage int
 sys32_set_thread_area (struct ia32_user_desc __user *u_info)
 {
-	struct thread_struct *t = &current->thread;
 	struct ia32_user_desc info;
-	struct desc_struct *desc;
-	int cpu, idx;
+	int idx;
 
 	if (copy_from_user(&info, u_info, sizeof(info)))
 		return -EFAULT;
@@ -2416,18 +2446,7 @@ sys32_set_thread_area (struct ia32_user_desc __user *u_info)
 	if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
 		return -EINVAL;
 
-	desc = t->tls_array + idx - GDT_ENTRY_TLS_MIN;
-
-	cpu = smp_processor_id();
-
-	if (LDT_empty(&info)) {
-		desc->a = 0;
-		desc->b = 0;
-	} else {
-		desc->a = LDT_entry_a(&info);
-		desc->b = LDT_entry_b(&info);
-	}
-	load_TLS(t, cpu);
+	set_tls_desc(current, idx, &info, 1);
 	return 0;
 }
 
@@ -2451,6 +2470,20 @@ sys32_set_thread_area (struct ia32_user_desc __user *u_info)
 #define GET_PRESENT(desc)	(((desc)->b >> 15) & 1)
 #define GET_USEABLE(desc)	(((desc)->b >> 20) & 1)
 
+static void fill_user_desc(struct ia32_user_desc *info, int idx,
+		const struct desc_struct *desc)
+{
+	info->entry_number = idx;
+	info->base_addr = GET_BASE(desc);
+	info->limit = GET_LIMIT(desc);
+	info->seg_32bit = GET_32BIT(desc);
+	info->contents = GET_CONTENTS(desc);
+	info->read_exec_only = !GET_WRITABLE(desc);
+	info->limit_in_pages = GET_LIMIT_PAGES(desc);
+	info->seg_not_present = !GET_PRESENT(desc);
+	info->useable = GET_USEABLE(desc);
+}
+
 asmlinkage int
 sys32_get_thread_area (struct ia32_user_desc __user *u_info)
 {
@@ -2464,22 +2497,588 @@ sys32_get_thread_area (struct ia32_user_desc __user *u_info)
 		return -EINVAL;
 
 	desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
-
-	info.entry_number = idx;
-	info.base_addr = GET_BASE(desc);
-	info.limit = GET_LIMIT(desc);
-	info.seg_32bit = GET_32BIT(desc);
-	info.contents = GET_CONTENTS(desc);
-	info.read_exec_only = !GET_WRITABLE(desc);
-	info.limit_in_pages = GET_LIMIT_PAGES(desc);
-	info.seg_not_present = !GET_PRESENT(desc);
-	info.useable = GET_USEABLE(desc);
+	fill_user_desc(&info, idx, desc);
 
 	if (copy_to_user(u_info, &info, sizeof(info)))
 		return -EFAULT;
 	return 0;
 }
 
+struct regset_get {
+	void *kbuf;
+	void __user *ubuf;
+};
+
+struct regset_set {
+	const void *kbuf;
+	const void __user *ubuf;
+};
+
+struct regset_getset {
+	struct task_struct *target;
+	const struct user_regset *regset;
+	union {
+		struct regset_get get;
+		struct regset_set set;
+	} u;
+	unsigned int pos;
+	unsigned int count;
+	int ret;
+};
+
+static void getfpreg(struct task_struct *task, int regno, int *val)
+{
+	switch (regno / sizeof(int)) {
+	case 0:
+		*val = task->thread.fcr & 0xffff;
+		break;
+	case 1:
+		*val = task->thread.fsr & 0xffff;
+		break;
+	case 2:
+		*val = (task->thread.fsr>>16) & 0xffff;
+		break;
+	case 3:
+		*val = task->thread.fir;
+		break;
+	case 4:
+		*val = (task->thread.fir>>32) & 0xffff;
+		break;
+	case 5:
+		*val = task->thread.fdr;
+		break;
+	case 6:
+		*val = (task->thread.fdr >> 32) & 0xffff;
+		break;
+	}
+}
+
+static void setfpreg(struct task_struct *task, int regno, int val)
+{
+	switch (regno / sizeof(int)) {
+	case 0:
+		task->thread.fcr = (task->thread.fcr & (~0x1f3f))
+			| (val & 0x1f3f);
+		break;
+	case 1:
+		task->thread.fsr = (task->thread.fsr & (~0xffff)) | val;
+		break;
+	case 2:
+		task->thread.fsr = (task->thread.fsr & (~0xffff0000))
+			| (val << 16);
+		break;
+	case 3:
+		task->thread.fir = (task->thread.fir & (~0xffffffff)) | val;
+		break;
+	case 5:
+		task->thread.fdr = (task->thread.fdr & (~0xffffffff)) | val;
+		break;
+	}
+}
+
+static void access_fpreg_ia32(int regno, void *reg,
+		struct pt_regs *pt, struct switch_stack *sw,
+		int tos, int write)
+{
+	void *f;
+
+	if ((regno += tos) >= 8)
+		regno -= 8;
+	if (regno < 4)
+		f = &pt->f8 + regno;
+	else if (regno <= 7)
+		f = &sw->f12 + (regno - 4);
+	else {
+		printk(KERN_ERR "regno must be less than 7 \n");
+		 return;
+	}
+
+	if (write)
+		memcpy(f, reg, sizeof(struct _fpreg_ia32));
+	else
+		memcpy(reg, f, sizeof(struct _fpreg_ia32));
+}
+
+static void do_fpregs_get(struct unw_frame_info *info, void *arg)
+{
+	struct regset_getset *dst = arg;
+	struct task_struct *task = dst->target;
+	struct pt_regs *pt;
+	int start, end, tos;
+	char buf[80];
+
+	if (dst->count == 0 || unw_unwind_to_user(info) < 0)
+		return;
+	if (dst->pos < 7 * sizeof(int)) {
+		end = min((dst->pos + dst->count),
+			(unsigned int)(7 * sizeof(int)));
+		for (start = dst->pos; start < end; start += sizeof(int))
+			getfpreg(task, start, (int *)(buf + start));
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf, buf,
+				0, 7 * sizeof(int));
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+	if (dst->pos < sizeof(struct ia32_user_i387_struct)) {
+		pt = task_pt_regs(task);
+		tos = (task->thread.fsr >> 11) & 7;
+		end = min(dst->pos + dst->count,
+			(unsigned int)(sizeof(struct ia32_user_i387_struct)));
+		start = (dst->pos - 7 * sizeof(int)) /
+			sizeof(struct _fpreg_ia32);
+		end = (end - 7 * sizeof(int)) / sizeof(struct _fpreg_ia32);
+		for (; start < end; start++)
+			access_fpreg_ia32(start,
+				(struct _fpreg_ia32 *)buf + start,
+				pt, info->sw, tos, 0);
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf,
+				buf, 7 * sizeof(int),
+				sizeof(struct ia32_user_i387_struct));
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+}
+
+static void do_fpregs_set(struct unw_frame_info *info, void *arg)
+{
+	struct regset_getset *dst = arg;
+	struct task_struct *task = dst->target;
+	struct pt_regs *pt;
+	char buf[80];
+	int end, start, tos;
+
+	if (dst->count == 0 || unw_unwind_to_user(info) < 0)
+		return;
+
+	if (dst->pos < 7 * sizeof(int)) {
+		start = dst->pos;
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf, buf,
+				0, 7 * sizeof(int));
+		if (dst->ret)
+			return;
+		for (; start < dst->pos; start += sizeof(int))
+			setfpreg(task, start, *((int *)(buf + start)));
+		if (dst->count == 0)
+			return;
+	}
+	if (dst->pos < sizeof(struct ia32_user_i387_struct)) {
+		start = (dst->pos - 7 * sizeof(int)) /
+			sizeof(struct _fpreg_ia32);
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf,
+				buf, 7 * sizeof(int),
+				sizeof(struct ia32_user_i387_struct));
+		if (dst->ret)
+			return;
+		pt = task_pt_regs(task);
+		tos = (task->thread.fsr >> 11) & 7;
+		end = (dst->pos - 7 * sizeof(int)) / sizeof(struct _fpreg_ia32);
+		for (; start < end; start++)
+			access_fpreg_ia32(start,
+				(struct _fpreg_ia32 *)buf + start,
+				pt, info->sw, tos, 1);
+		if (dst->count == 0)
+			return;
+	}
+}
+
+#define OFFSET(member) ((int)(offsetof(struct ia32_user_fxsr_struct, member)))
+static void getfpxreg(struct task_struct *task, int start, int end, char *buf)
+{
+	int min_val;
+
+	min_val = min(end, OFFSET(fop));
+	while (start < min_val) {
+		if (start == OFFSET(cwd))
+			*((short *)buf) = task->thread.fcr & 0xffff;
+		else if (start == OFFSET(swd))
+			*((short *)buf) = task->thread.fsr & 0xffff;
+		else if (start == OFFSET(twd))
+			*((short *)buf) = (task->thread.fsr>>16) & 0xffff;
+		buf += 2;
+		start += 2;
+	}
+	/* skip fop element */
+	if (start == OFFSET(fop)) {
+		start += 2;
+		buf += 2;
+	}
+	while (start < end) {
+		if (start == OFFSET(fip))
+			*((int *)buf) = task->thread.fir;
+		else if (start == OFFSET(fcs))
+			*((int *)buf) = (task->thread.fir>>32) & 0xffff;
+		else if (start == OFFSET(foo))
+			*((int *)buf) = task->thread.fdr;
+		else if (start == OFFSET(fos))
+			*((int *)buf) = (task->thread.fdr>>32) & 0xffff;
+		else if (start == OFFSET(mxcsr))
+			*((int *)buf) = ((task->thread.fcr>>32) & 0xff80)
+					 | ((task->thread.fsr>>32) & 0x3f);
+		buf += 4;
+		start += 4;
+	}
+}
+
+static void setfpxreg(struct task_struct *task, int start, int end, char *buf)
+{
+	int min_val, num32;
+	short num;
+	unsigned long num64;
+
+	min_val = min(end, OFFSET(fop));
+	while (start < min_val) {
+		num = *((short *)buf);
+		if (start == OFFSET(cwd)) {
+			task->thread.fcr = (task->thread.fcr & (~0x1f3f))
+						| (num & 0x1f3f);
+		} else if (start == OFFSET(swd)) {
+			task->thread.fsr = (task->thread.fsr & (~0xffff)) | num;
+		} else if (start == OFFSET(twd)) {
+			task->thread.fsr = (task->thread.fsr & (~0xffff0000))
+				| (((int)num) << 16);
+		}
+		buf += 2;
+		start += 2;
+	}
+	/* skip fop element */
+	if (start == OFFSET(fop)) {
+		start += 2;
+		buf += 2;
+	}
+	while (start < end) {
+		num32 = *((int *)buf);
+		if (start == OFFSET(fip))
+			task->thread.fir = (task->thread.fir & (~0xffffffff))
+						 | num32;
+		else if (start == OFFSET(foo))
+			task->thread.fdr = (task->thread.fdr & (~0xffffffff))
+						 | num32;
+		else if (start == OFFSET(mxcsr)) {
+			num64 = num32 & 0xff10;
+			task->thread.fcr = (task->thread.fcr &
+				(~0xff1000000000UL)) | (num64<<32);
+			num64 = num32 & 0x3f;
+			task->thread.fsr = (task->thread.fsr &
+				(~0x3f00000000UL)) | (num64<<32);
+		}
+		buf += 4;
+		start += 4;
+	}
+}
+
+static void do_fpxregs_get(struct unw_frame_info *info, void *arg)
+{
+	struct regset_getset *dst = arg;
+	struct task_struct *task = dst->target;
+	struct pt_regs *pt;
+	char buf[128];
+	int start, end, tos;
+
+	if (dst->count == 0 || unw_unwind_to_user(info) < 0)
+		return;
+	if (dst->pos < OFFSET(st_space[0])) {
+		end = min(dst->pos + dst->count, (unsigned int)32);
+		getfpxreg(task, dst->pos, end, buf);
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf, buf,
+				0, OFFSET(st_space[0]));
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+	if (dst->pos < OFFSET(xmm_space[0])) {
+		pt = task_pt_regs(task);
+		tos = (task->thread.fsr >> 11) & 7;
+		end = min(dst->pos + dst->count,
+				(unsigned int)OFFSET(xmm_space[0]));
+		start = (dst->pos - OFFSET(st_space[0])) / 16;
+		end = (end - OFFSET(st_space[0])) / 16;
+		for (; start < end; start++)
+			access_fpreg_ia32(start, buf + 16 * start, pt,
+						info->sw, tos, 0);
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf,
+				buf, OFFSET(st_space[0]), OFFSET(xmm_space[0]));
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+	if (dst->pos < OFFSET(padding[0]))
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf,
+				&info->sw->f16, OFFSET(xmm_space[0]),
+				OFFSET(padding[0]));
+}
+
+static void do_fpxregs_set(struct unw_frame_info *info, void *arg)
+{
+	struct regset_getset *dst = arg;
+	struct task_struct *task = dst->target;
+	char buf[128];
+	int start, end;
+
+	if (dst->count == 0 || unw_unwind_to_user(info) < 0)
+		return;
+
+	if (dst->pos < OFFSET(st_space[0])) {
+		start = dst->pos;
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf,
+				buf, 0, OFFSET(st_space[0]));
+		if (dst->ret)
+			return;
+		setfpxreg(task, start, dst->pos, buf);
+		if (dst->count == 0)
+			return;
+	}
+	if (dst->pos < OFFSET(xmm_space[0])) {
+		struct pt_regs *pt;
+		int tos;
+		pt = task_pt_regs(task);
+		tos = (task->thread.fsr >> 11) & 7;
+		start = (dst->pos - OFFSET(st_space[0])) / 16;
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf,
+				buf, OFFSET(st_space[0]), OFFSET(xmm_space[0]));
+		if (dst->ret)
+			return;
+		end = (dst->pos - OFFSET(st_space[0])) / 16;
+		for (; start < end; start++)
+			access_fpreg_ia32(start, buf + 16 * start, pt, info->sw,
+						 tos, 1);
+		if (dst->count == 0)
+			return;
+	}
+	if (dst->pos < OFFSET(padding[0]))
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf,
+				&info->sw->f16, OFFSET(xmm_space[0]),
+				 OFFSET(padding[0]));
+}
+#undef OFFSET
+
+static int do_regset_call(void (*call)(struct unw_frame_info *, void *),
+		struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		const void *kbuf, const void __user *ubuf)
+{
+	struct regset_getset info = { .target = target, .regset = regset,
+		.pos = pos, .count = count,
+		.u.set = { .kbuf = kbuf, .ubuf = ubuf },
+		.ret = 0 };
+
+	if (target == current)
+		unw_init_running(call, &info);
+	else {
+		struct unw_frame_info ufi;
+		memset(&ufi, 0, sizeof(ufi));
+		unw_init_from_blocked_task(&ufi, target);
+		(*call)(&ufi, &info);
+	}
+
+	return info.ret;
+}
+
+static int ia32_fpregs_get(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		void *kbuf, void __user *ubuf)
+{
+	return do_regset_call(do_fpregs_get, target, regset, pos, count,
+		kbuf, ubuf);
+}
+
+static int ia32_fpregs_set(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		const void *kbuf, const void __user *ubuf)
+{
+	return do_regset_call(do_fpregs_set, target, regset, pos, count,
+		kbuf, ubuf);
+}
+
+static int ia32_fpxregs_get(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		void *kbuf, void __user *ubuf)
+{
+	return do_regset_call(do_fpxregs_get, target, regset, pos, count,
+		kbuf, ubuf);
+}
+
+static int ia32_fpxregs_set(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		const void *kbuf, const void __user *ubuf)
+{
+	return do_regset_call(do_fpxregs_set, target, regset, pos, count,
+		kbuf, ubuf);
+}
+
+static int ia32_genregs_get(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		void *kbuf, void __user *ubuf)
+{
+	if (kbuf) {
+		u32 *kp = kbuf;
+		while (count > 0) {
+			*kp++ = getreg(target, pos);
+			pos += 4;
+			count -= 4;
+		}
+	} else {
+		u32 __user *up = ubuf;
+		while (count > 0) {
+			if (__put_user(getreg(target, pos), up++))
+				return -EFAULT;
+			pos += 4;
+			count -= 4;
+		}
+	}
+	return 0;
+}
+
+static int ia32_genregs_set(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		const void *kbuf, const void __user *ubuf)
+{
+	int ret = 0;
+
+	if (kbuf) {
+		const u32 *kp = kbuf;
+		while (!ret && count > 0) {
+			putreg(target, pos, *kp++);
+			pos += 4;
+			count -= 4;
+		}
+	} else {
+		const u32 __user *up = ubuf;
+		u32 val;
+		while (!ret && count > 0) {
+			ret = __get_user(val, up++);
+			if (!ret)
+				putreg(target, pos, val);
+			pos += 4;
+			count -= 4;
+		}
+	}
+	return ret;
+}
+
+static int ia32_tls_active(struct task_struct *target,
+		const struct user_regset *regset)
+{
+	struct thread_struct *t = &target->thread;
+	int n = GDT_ENTRY_TLS_ENTRIES;
+	while (n > 0 && desc_empty(&t->tls_array[n -1]))
+		--n;
+	return n;
+}
+
+static int ia32_tls_get(struct task_struct *target,
+		const struct user_regset *regset, unsigned int pos,
+		unsigned int count, void *kbuf, void __user *ubuf)
+{
+	const struct desc_struct *tls;
+
+	if (pos > GDT_ENTRY_TLS_ENTRIES * sizeof(struct ia32_user_desc) ||
+			(pos % sizeof(struct ia32_user_desc)) != 0 ||
+			(count % sizeof(struct ia32_user_desc)) != 0)
+		return -EINVAL;
+
+	pos /= sizeof(struct ia32_user_desc);
+	count /= sizeof(struct ia32_user_desc);
+
+	tls = &target->thread.tls_array[pos];
+
+	if (kbuf) {
+		struct ia32_user_desc *info = kbuf;
+		while (count-- > 0)
+			fill_user_desc(info++, GDT_ENTRY_TLS_MIN + pos++,
+					tls++);
+	} else {
+		struct ia32_user_desc __user *u_info = ubuf;
+		while (count-- > 0) {
+			struct ia32_user_desc info;
+			fill_user_desc(&info, GDT_ENTRY_TLS_MIN + pos++, tls++);
+			if (__copy_to_user(u_info++, &info, sizeof(info)))
+				return -EFAULT;
+		}
+	}
+
+	return 0;
+}
+
+static int ia32_tls_set(struct task_struct *target,
+		const struct user_regset *regset, unsigned int pos,
+		unsigned int count, const void *kbuf, const void __user *ubuf)
+{
+	struct ia32_user_desc infobuf[GDT_ENTRY_TLS_ENTRIES];
+	const struct ia32_user_desc *info;
+
+	if (pos > GDT_ENTRY_TLS_ENTRIES * sizeof(struct ia32_user_desc) ||
+			(pos % sizeof(struct ia32_user_desc)) != 0 ||
+			(count % sizeof(struct ia32_user_desc)) != 0)
+		return -EINVAL;
+
+	if (kbuf)
+		info = kbuf;
+	else if (__copy_from_user(infobuf, ubuf, count))
+		return -EFAULT;
+	else
+		info = infobuf;
+
+	set_tls_desc(target,
+		GDT_ENTRY_TLS_MIN + (pos / sizeof(struct ia32_user_desc)),
+		info, count / sizeof(struct ia32_user_desc));
+
+	return 0;
+}
+
+/*
+ * This should match arch/i386/kernel/ptrace.c:native_regsets.
+ * XXX ioperm? vm86?
+ */
+static const struct user_regset ia32_regsets[] = {
+	{
+		.core_note_type = NT_PRSTATUS,
+		.n = sizeof(struct user_regs_struct32)/4,
+		.size = 4, .align = 4,
+		.get = ia32_genregs_get, .set = ia32_genregs_set
+	},
+	{
+		.core_note_type = NT_PRFPREG,
+		.n = sizeof(struct ia32_user_i387_struct) / 4,
+		.size = 4, .align = 4,
+		.get = ia32_fpregs_get, .set = ia32_fpregs_set
+	},
+	{
+		.core_note_type = NT_PRXFPREG,
+		.n = sizeof(struct ia32_user_fxsr_struct) / 4,
+		.size = 4, .align = 4,
+		.get = ia32_fpxregs_get, .set = ia32_fpxregs_set
+	},
+	{
+		.core_note_type = NT_386_TLS,
+		.n = GDT_ENTRY_TLS_ENTRIES,
+		.bias = GDT_ENTRY_TLS_MIN,
+		.size = sizeof(struct ia32_user_desc),
+		.align = sizeof(struct ia32_user_desc),
+		.active = ia32_tls_active,
+		.get = ia32_tls_get, .set = ia32_tls_set,
+	},
+};
+
+const struct user_regset_view user_ia32_view = {
+	.name = "i386", .e_machine = EM_386,
+	.regsets = ia32_regsets, .n = ARRAY_SIZE(ia32_regsets)
+};
+
 long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high, 
 			__u32 len_low, __u32 len_high, int advice)
 { 
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index 33e5a598672d..13fd10e8699e 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -6,7 +6,7 @@ extra-y	:= head.o init_task.o vmlinux.lds
 
 obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o	\
 	 irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o		\
-	 salinfo.o semaphore.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
+	 salinfo.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
 	 unwind.o mca.o mca_asm.o topology.o
 
 obj-$(CONFIG_IA64_BRL_EMU)	+= brl_emu.o
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 78f28d825f30..c7467f863c7a 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -423,6 +423,7 @@ static u32 __devinitdata pxm_flag[PXM_FLAG_LEN];
 #define pxm_bit_set(bit)	(set_bit(bit,(void *)pxm_flag))
 #define pxm_bit_test(bit)	(test_bit(bit,(void *)pxm_flag))
 static struct acpi_table_slit __initdata *slit_table;
+cpumask_t early_cpu_possible_map = CPU_MASK_NONE;
 
 static int get_processor_proximity_domain(struct acpi_srat_cpu_affinity *pa)
 {
@@ -482,6 +483,7 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
 	    (pa->apic_id << 8) | (pa->local_sapic_eid);
 	/* nid should be overridden as logical node id later */
 	node_cpuid[srat_num_cpus].nid = pxm;
+	cpu_set(srat_num_cpus, early_cpu_possible_map);
 	srat_num_cpus++;
 }
 
@@ -559,7 +561,7 @@ void __init acpi_numa_arch_fixup(void)
 	}
 
 	/* set logical node id in cpu structure */
-	for (i = 0; i < srat_num_cpus; i++)
+	for_each_possible_early_cpu(i)
 		node_cpuid[i].nid = pxm_to_node(node_cpuid[i].nid);
 
 	printk(KERN_INFO "Number of logical nodes in system = %d\n",
diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c
index 0aebc6f79e95..230a6f92367f 100644
--- a/arch/ia64/kernel/asm-offsets.c
+++ b/arch/ia64/kernel/asm-offsets.c
@@ -7,6 +7,7 @@
 #define ASM_OFFSETS_C 1
 
 #include <linux/sched.h>
+#include <linux/pid.h>
 #include <linux/clocksource.h>
 
 #include <asm-ia64/processor.h>
@@ -34,17 +35,29 @@ void foo(void)
 	DEFINE(SIGFRAME_SIZE, sizeof (struct sigframe));
 	DEFINE(UNW_FRAME_INFO_SIZE, sizeof (struct unw_frame_info));
 
+	BUILD_BUG_ON(sizeof(struct upid) != 32);
+	DEFINE(IA64_UPID_SHIFT, 5);
+
 	BLANK();
 
 	DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
 	DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
 	DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count));
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	DEFINE(TI_AC_STAMP, offsetof(struct thread_info, ac_stamp));
+	DEFINE(TI_AC_LEAVE, offsetof(struct thread_info, ac_leave));
+	DEFINE(TI_AC_STIME, offsetof(struct thread_info, ac_stime));
+	DEFINE(TI_AC_UTIME, offsetof(struct thread_info, ac_utime));
+#endif
 
 	BLANK();
 
 	DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked));
 	DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid));
 	DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader));
+	DEFINE(IA64_TASK_TGIDLINK_OFFSET, offsetof (struct task_struct, pids[PIDTYPE_PID].pid));
+	DEFINE(IA64_PID_LEVEL_OFFSET, offsetof (struct pid, level));
+	DEFINE(IA64_PID_UPID_OFFSET, offsetof (struct pid, numbers[0]));
 	DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending));
 	DEFINE(IA64_TASK_PID_OFFSET, offsetof (struct task_struct, pid));
 	DEFINE(IA64_TASK_REAL_PARENT_OFFSET, offsetof (struct task_struct, real_parent));
diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c
index fbe742ad2fde..90ef338cf46f 100644
--- a/arch/ia64/kernel/crash.c
+++ b/arch/ia64/kernel/crash.c
@@ -24,6 +24,7 @@ int kdump_status[NR_CPUS];
 static atomic_t kdump_cpu_frozen;
 atomic_t kdump_in_progress;
 static int kdump_on_init = 1;
+static int kdump_on_fatal_mca = 1;
 
 static inline Elf64_Word
 *append_elf_note(Elf64_Word *buf, char *name, unsigned type, void *data,
@@ -118,6 +119,7 @@ machine_crash_shutdown(struct pt_regs *pt)
 static void
 machine_kdump_on_init(void)
 {
+	crash_save_vmcoreinfo();
 	local_irq_disable();
 	kexec_disable_iosapic();
 	machine_kexec(ia64_kimage);
@@ -148,7 +150,7 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data)
 	struct ia64_mca_notify_die *nd;
 	struct die_args *args = data;
 
-	if (!kdump_on_init)
+	if (!kdump_on_init && !kdump_on_fatal_mca)
 		return NOTIFY_DONE;
 
 	if (!ia64_kimage) {
@@ -173,32 +175,38 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data)
 		return NOTIFY_DONE;
 
 	switch (val) {
-		case DIE_INIT_MONARCH_PROCESS:
+	case DIE_INIT_MONARCH_PROCESS:
+		if (kdump_on_init) {
 			atomic_set(&kdump_in_progress, 1);
 			*(nd->monarch_cpu) = -1;
-			break;
-		case DIE_INIT_MONARCH_LEAVE:
+		}
+		break;
+	case DIE_INIT_MONARCH_LEAVE:
+		if (kdump_on_init)
 			machine_kdump_on_init();
-			break;
-		case DIE_INIT_SLAVE_LEAVE:
-			if (atomic_read(&kdump_in_progress))
-				unw_init_running(kdump_cpu_freeze, NULL);
-			break;
-		case DIE_MCA_RENDZVOUS_LEAVE:
-			if (atomic_read(&kdump_in_progress))
-				unw_init_running(kdump_cpu_freeze, NULL);
-			break;
-		case DIE_MCA_MONARCH_LEAVE:
-		     /* die_register->signr indicate if MCA is recoverable */
-			if (!args->signr)
-				machine_kdump_on_init();
-			break;
+		break;
+	case DIE_INIT_SLAVE_LEAVE:
+		if (atomic_read(&kdump_in_progress))
+			unw_init_running(kdump_cpu_freeze, NULL);
+		break;
+	case DIE_MCA_RENDZVOUS_LEAVE:
+		if (atomic_read(&kdump_in_progress))
+			unw_init_running(kdump_cpu_freeze, NULL);
+		break;
+	case DIE_MCA_MONARCH_LEAVE:
+		/* die_register->signr indicate if MCA is recoverable */
+		if (kdump_on_fatal_mca && !args->signr) {
+			atomic_set(&kdump_in_progress, 1);
+			*(nd->monarch_cpu) = -1;
+			machine_kdump_on_init();
+		}
+		break;
 	}
 	return NOTIFY_DONE;
 }
 
 #ifdef CONFIG_SYSCTL
-static ctl_table kdump_on_init_table[] = {
+static ctl_table kdump_ctl_table[] = {
 	{
 		.ctl_name = CTL_UNNUMBERED,
 		.procname = "kdump_on_init",
@@ -207,6 +215,14 @@ static ctl_table kdump_on_init_table[] = {
 		.mode = 0644,
 		.proc_handler = &proc_dointvec,
 	},
+	{
+		.ctl_name = CTL_UNNUMBERED,
+		.procname = "kdump_on_fatal_mca",
+		.data = &kdump_on_fatal_mca,
+		.maxlen = sizeof(int),
+		.mode = 0644,
+		.proc_handler = &proc_dointvec,
+	},
 	{ .ctl_name = 0 }
 };
 
@@ -215,7 +231,7 @@ static ctl_table sys_table[] = {
 	  .ctl_name = CTL_KERN,
 	  .procname = "kernel",
 	  .mode = 0555,
-	  .child = kdump_on_init_table,
+	  .child = kdump_ctl_table,
 	},
 	{ .ctl_name = 0 }
 };
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index 728d7247a1a6..d45f215bc8fc 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -37,6 +37,7 @@
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/mca.h>
+#include <asm/tlbflush.h>
 
 #define EFI_DEBUG	0
 
@@ -403,6 +404,41 @@ efi_get_pal_addr (void)
 	return NULL;
 }
 
+
+static u8 __init palo_checksum(u8 *buffer, u32 length)
+{
+	u8 sum = 0;
+	u8 *end = buffer + length;
+
+	while (buffer < end)
+		sum = (u8) (sum + *(buffer++));
+
+	return sum;
+}
+
+/*
+ * Parse and handle PALO table which is published at:
+ * http://www.dig64.org/home/DIG64_PALO_R1_0.pdf
+ */
+static void __init handle_palo(unsigned long palo_phys)
+{
+	struct palo_table *palo = __va(palo_phys);
+	u8  checksum;
+
+	if (strncmp(palo->signature, PALO_SIG, sizeof(PALO_SIG) - 1)) {
+		printk(KERN_INFO "PALO signature incorrect.\n");
+		return;
+	}
+
+	checksum = palo_checksum((u8 *)palo, palo->length);
+	if (checksum) {
+		printk(KERN_INFO "PALO checksum incorrect.\n");
+		return;
+	}
+
+	setup_ptcg_sem(palo->max_tlb_purges, NPTCG_FROM_PALO);
+}
+
 void
 efi_map_pal_code (void)
 {
@@ -432,6 +468,7 @@ efi_init (void)
 	u64 efi_desc_size;
 	char *cp, vendor[100] = "unknown";
 	int i;
+	unsigned long palo_phys;
 
 	/*
 	 * It's too early to be able to use the standard kernel command line
@@ -496,6 +533,8 @@ efi_init (void)
 	efi.hcdp       = EFI_INVALID_TABLE_ADDR;
 	efi.uga        = EFI_INVALID_TABLE_ADDR;
 
+	palo_phys      = EFI_INVALID_TABLE_ADDR;
+
 	for (i = 0; i < (int) efi.systab->nr_tables; i++) {
 		if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) {
 			efi.mps = config_tables[i].table;
@@ -515,10 +554,17 @@ efi_init (void)
 		} else if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) {
 			efi.hcdp = config_tables[i].table;
 			printk(" HCDP=0x%lx", config_tables[i].table);
+		} else if (efi_guidcmp(config_tables[i].guid,
+			 PROCESSOR_ABSTRACTION_LAYER_OVERWRITE_GUID) == 0) {
+			palo_phys = config_tables[i].table;
+			printk(" PALO=0x%lx", config_tables[i].table);
 		}
 	}
 	printk("\n");
 
+	if (palo_phys != EFI_INVALID_TABLE_ADDR)
+		handle_palo(palo_phys);
+
 	runtime = __va(efi.systab->runtime);
 	efi.get_time = phys_get_time;
 	efi.set_time = phys_set_time;
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 3c331c464b40..b0be4a280174 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -710,6 +710,16 @@ ENTRY(ia64_leave_syscall)
 (pUStk)	cmp.eq.unc p6,p0=r0,r0		// p6 <- pUStk
 #endif
 .work_processed_syscall:
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	adds r2=PT(LOADRS)+16,r12
+(pUStk)	mov.m r22=ar.itc			// fetch time at leave
+	adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
+	;;
+(p6)	ld4 r31=[r18]				// load current_thread_info()->flags
+	ld8 r19=[r2],PT(B6)-PT(LOADRS)		// load ar.rsc value for "loadrs"
+	adds r3=PT(AR_BSPSTORE)+16,r12		// deferred
+	;;
+#else
 	adds r2=PT(LOADRS)+16,r12
 	adds r3=PT(AR_BSPSTORE)+16,r12
 	adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
@@ -718,6 +728,7 @@ ENTRY(ia64_leave_syscall)
 	ld8 r19=[r2],PT(B6)-PT(LOADRS)		// load ar.rsc value for "loadrs"
 	nop.i 0
 	;;
+#endif
 	mov r16=ar.bsp				// M2  get existing backing store pointer
 	ld8 r18=[r2],PT(R9)-PT(B6)		// load b6
 (p6)	and r15=TIF_WORK_MASK,r31		// any work other than TIF_SYSCALL_TRACE?
@@ -737,12 +748,21 @@ ENTRY(ia64_leave_syscall)
 
 	ld8 r29=[r2],16		// M0|1 load cr.ipsr
 	ld8 r28=[r3],16		// M0|1 load cr.iip
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+(pUStk) add r14=TI_AC_LEAVE+IA64_TASK_SIZE,r13
+	;;
+	ld8 r30=[r2],16		// M0|1 load cr.ifs
+	ld8 r25=[r3],16		// M0|1 load ar.unat
+(pUStk) add r15=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
+	;;
+#else
 	mov r22=r0		// A    clear r22
 	;;
 	ld8 r30=[r2],16		// M0|1 load cr.ifs
 	ld8 r25=[r3],16		// M0|1 load ar.unat
 (pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
 	;;
+#endif
 	ld8 r26=[r2],PT(B0)-PT(AR_PFS)	// M0|1 load ar.pfs
 (pKStk)	mov r22=psr			// M2   read PSR now that interrupts are disabled
 	nop 0
@@ -759,7 +779,11 @@ ENTRY(ia64_leave_syscall)
 	ld8.fill r1=[r3],16			// M0|1 load r1
 (pUStk) mov r17=1				// A
 	;;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+(pUStk) st1 [r15]=r17				// M2|3
+#else
 (pUStk) st1 [r14]=r17				// M2|3
+#endif
 	ld8.fill r13=[r3],16			// M0|1
 	mov f8=f0				// F    clear f8
 	;;
@@ -775,12 +799,22 @@ ENTRY(ia64_leave_syscall)
 	shr.u r18=r19,16		// I0|1 get byte size of existing "dirty" partition
 	cover				// B    add current frame into dirty partition & set cr.ifs
 	;;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	mov r19=ar.bsp			// M2   get new backing store pointer
+	st8 [r14]=r22			// M	save time at leave
+	mov f10=f0			// F    clear f10
+
+	mov r22=r0			// A	clear r22
+	movl r14=__kernel_syscall_via_epc // X
+	;;
+#else
 	mov r19=ar.bsp			// M2   get new backing store pointer
 	mov f10=f0			// F    clear f10
 
 	nop.m 0
 	movl r14=__kernel_syscall_via_epc // X
 	;;
+#endif
 	mov.m ar.csd=r0			// M2   clear ar.csd
 	mov.m ar.ccv=r0			// M2   clear ar.ccv
 	mov b7=r14			// I0   clear b7 (hint with __kernel_syscall_via_epc)
@@ -913,10 +947,18 @@ GLOBAL_ENTRY(ia64_leave_kernel)
 	adds r16=PT(CR_IPSR)+16,r12
 	adds r17=PT(CR_IIP)+16,r12
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	.pred.rel.mutex pUStk,pKStk
+(pKStk)	mov r22=psr		// M2 read PSR now that interrupts are disabled
+(pUStk)	mov.m r22=ar.itc	// M  fetch time at leave
+	nop.i 0
+	;;
+#else
 (pKStk)	mov r22=psr		// M2 read PSR now that interrupts are disabled
 	nop.i 0
 	nop.i 0
 	;;
+#endif
 	ld8 r29=[r16],16	// load cr.ipsr
 	ld8 r28=[r17],16	// load cr.iip
 	;;
@@ -938,15 +980,37 @@ GLOBAL_ENTRY(ia64_leave_kernel)
 	;;
 	ld8.fill r12=[r16],16
 	ld8.fill r13=[r17],16
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+(pUStk)	adds r3=TI_AC_LEAVE+IA64_TASK_SIZE,r18
+#else
 (pUStk)	adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18
+#endif
 	;;
 	ld8 r20=[r16],16	// ar.fpsr
 	ld8.fill r15=[r17],16
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+(pUStk)	adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18	// deferred
+#endif
 	;;
 	ld8.fill r14=[r16],16
 	ld8.fill r2=[r17]
 (pUStk)	mov r17=1
 	;;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	//  mmi_ :  ld8 st1 shr;;         mmi_ : st8 st1 shr;;
+	//  mib  :  mov add br        ->  mib  : ld8 add br
+	//  bbb_ :  br  nop cover;;       mbb_ : mov br  cover;;
+	//
+	//  no one require bsp in r16 if (pKStk) branch is selected.
+(pUStk)	st8 [r3]=r22		// save time at leave
+(pUStk)	st1 [r18]=r17		// restore current->thread.on_ustack
+	shr.u r18=r19,16	// get byte size of existing "dirty" partition
+	;;
+	ld8.fill r3=[r16]	// deferred
+	LOAD_PHYS_STACK_REG_SIZE(r17)
+(pKStk)	br.cond.dpnt skip_rbs_switch
+	mov r16=ar.bsp		// get existing backing store pointer
+#else
 	ld8.fill r3=[r16]
 (pUStk)	st1 [r18]=r17		// restore current->thread.on_ustack
 	shr.u r18=r19,16	// get byte size of existing "dirty" partition
@@ -954,6 +1018,7 @@ GLOBAL_ENTRY(ia64_leave_kernel)
 	mov r16=ar.bsp		// get existing backing store pointer
 	LOAD_PHYS_STACK_REG_SIZE(r17)
 (pKStk)	br.cond.dpnt skip_rbs_switch
+#endif
 
 	/*
 	 * Restore user backing store.
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index 44841971f077..c1625c7e1779 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -61,13 +61,29 @@ ENTRY(fsys_getpid)
 	.prologue
 	.altrp b6
 	.body
+	add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
+	;;
+	ld8 r17=[r17]				// r17 = current->group_leader
 	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
 	;;
 	ld4 r9=[r9]
-	add r8=IA64_TASK_TGID_OFFSET,r16
+	add r17=IA64_TASK_TGIDLINK_OFFSET,r17
 	;;
 	and r9=TIF_ALLWORK_MASK,r9
-	ld4 r8=[r8]				// r8 = current->tgid
+	ld8 r17=[r17]				// r17 = current->group_leader->pids[PIDTYPE_PID].pid
+	;;
+	add r8=IA64_PID_LEVEL_OFFSET,r17
+	;;
+	ld4 r8=[r8]				// r8 = pid->level
+	add r17=IA64_PID_UPID_OFFSET,r17	// r17 = &pid->numbers[0]
+	;;
+	shl r8=r8,IA64_UPID_SHIFT
+	;;
+	add r17=r17,r8				// r17 = &pid->numbers[pid->level]
+	;;
+	ld4 r8=[r17]				// r8 = pid->numbers[pid->level].nr
+	;;
+	mov r17=0
 	;;
 	cmp.ne p8,p0=0,r9
 (p8)	br.spnt.many fsys_fallback_syscall
@@ -126,15 +142,25 @@ ENTRY(fsys_set_tid_address)
 	.altrp b6
 	.body
 	add r9=TI_FLAGS+IA64_TASK_SIZE,r16
+	add r17=IA64_TASK_TGIDLINK_OFFSET,r16
 	;;
 	ld4 r9=[r9]
 	tnat.z p6,p7=r32		// check argument register for being NaT
+	ld8 r17=[r17]				// r17 = current->pids[PIDTYPE_PID].pid
 	;;
 	and r9=TIF_ALLWORK_MASK,r9
-	add r8=IA64_TASK_PID_OFFSET,r16
+	add r8=IA64_PID_LEVEL_OFFSET,r17
 	add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16
 	;;
-	ld4 r8=[r8]
+	ld4 r8=[r8]				// r8 = pid->level
+	add r17=IA64_PID_UPID_OFFSET,r17	// r17 = &pid->numbers[0]
+	;;
+	shl r8=r8,IA64_UPID_SHIFT
+	;;
+	add r17=r17,r8				// r17 = &pid->numbers[pid->level]
+	;;
+	ld4 r8=[r17]				// r8 = pid->numbers[pid->level].nr
+	;;
 	cmp.ne p8,p0=0,r9
 	mov r17=-1
 	;;
@@ -210,27 +236,25 @@ ENTRY(fsys_gettimeofday)
 	// Note that instructions are optimized for McKinley. McKinley can
 	// process two bundles simultaneously and therefore we continuously
 	// try to feed the CPU two bundles and then a stop.
-	//
-	// Additional note that code has changed a lot. Optimization is TBD.
-	// Comments begin with "?" are maybe outdated.
-	tnat.nz p6,p0 = r31	// ? branch deferred to fit later bundle
-	mov pr = r30,0xc000	// Set predicates according to function
+
 	add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
+	tnat.nz p6,p0 = r31		// guard against Nat argument
+(p6)	br.cond.spnt.few .fail_einval
 	movl r20 = fsyscall_gtod_data // load fsyscall gettimeofday data address
 	;;
+	ld4 r2 = [r2]			// process work pending flags
 	movl r29 = itc_jitter_data	// itc_jitter
 	add r22 = IA64_GTOD_WALL_TIME_OFFSET,r20	// wall_time
-	ld4 r2 = [r2]		// process work pending flags
-	;;
-(p15)	add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20	// monotonic_time
 	add r21 = IA64_CLKSRC_MMIO_OFFSET,r20
-	add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29
+	mov pr = r30,0xc000	// Set predicates according to function
+	;;
 	and r2 = TIF_ALLWORK_MASK,r2
-(p6)    br.cond.spnt.few .fail_einval	// ? deferred branch
+	add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29
+(p15)	add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20	// monotonic_time
 	;;
-	add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20 // clksrc_cycle_last
+	add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20	// clksrc_cycle_last
 	cmp.ne p6, p0 = 0, r2	// Fallback if work is scheduled
-(p6)    br.cond.spnt.many fsys_fallback_syscall
+(p6)	br.cond.spnt.many fsys_fallback_syscall
 	;;
 	// Begin critical section
 .time_redo:
@@ -258,7 +282,6 @@ ENTRY(fsys_gettimeofday)
 (p8)	mov r2 = ar.itc		// CPU_TIMER. 36 clocks latency!!!
 (p9)	ld8 r2 = [r30]		// MMIO_TIMER. Could also have latency issues..
 (p13)	ld8 r25 = [r19]		// get itc_lastcycle value
-	;;		// ? could be removed by moving the last add upward
 	ld8 r9 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET	// tv_sec
 	;;
 	ld8 r8 = [r22],-IA64_TIMESPEC_TV_NSEC_OFFSET	// tv_nsec
@@ -285,13 +308,12 @@ ENTRY(fsys_gettimeofday)
 EX(.fail_efault, probe.w.fault r31, 3)
 	xmpy.l f8 = f8,f7	// nsec_per_cyc*(counter-last_counter)
 	;;
-	// ? simulate tbit.nz.or p7,p0 = r28,0
 	getf.sig r2 = f8
 	mf
 	;;
 	ld4 r10 = [r20]		// gtod_lock.sequence
 	shr.u r2 = r2,r23	// shift by factor
-	;;		// ? overloaded 3 bundles!
+	;;
 	add r8 = r8,r2		// Add xtime.nsecs
 	cmp4.ne p7,p0 = r28,r10
 (p7)	br.cond.dpnt.few .time_redo	// sequence number changed, redo
@@ -319,9 +341,9 @@ EX(.fail_efault, probe.w.fault r31, 3)
 EX(.fail_efault, probe.w.fault r23, 3)	// This also costs 5 cycles
 (p14)	xmpy.hu f8 = f8, f7		// xmpy has 5 cycles latency so use it
 	;;
-	mov r8 = r0
 (p14)	getf.sig r2 = f8
 	;;
+	mov r8 = r0
 (p14)	shr.u r21 = r2, 4
 	;;
 EX(.fail_efault, st8 [r31] = r9)
@@ -660,7 +682,11 @@ GLOBAL_ENTRY(fsys_bubble_down)
 	nop.i 0
 	;;
 	mov ar.rsc=0				// M2   set enforced lazy mode, pl 0, LE, loadrs=0
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	mov.m r30=ar.itc			// M    get cycle for accounting
+#else
 	nop.m 0
+#endif
 	nop.i 0
 	;;
 	mov r23=ar.bspstore			// M2 (12 cyc) save ar.bspstore
@@ -682,6 +708,28 @@ GLOBAL_ENTRY(fsys_bubble_down)
 	cmp.ne pKStk,pUStk=r0,r0		// A    set pKStk <- 0, pUStk <- 1
 	br.call.sptk.many b7=ia64_syscall_setup	// B
 	;;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	// mov.m r30=ar.itc is called in advance
+	add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2
+	add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2
+	;;
+	ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP	// time at last check in kernel
+	ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE	// time at leave kernel
+	;;
+	ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME	// cumulated stime
+	ld8 r21=[r17]				// cumulated utime
+	sub r22=r19,r18				// stime before leave kernel
+	;;
+	st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP	// update stamp
+	sub r18=r30,r19				// elapsed time in user mode
+	;;
+	add r20=r20,r22				// sum stime
+	add r21=r21,r18				// sum utime
+	;;
+	st8 [r16]=r20				// update stime
+	st8 [r17]=r21				// update utime
+	;;
+#endif
 	mov ar.rsc=0x3				// M2   set eager mode, pl 0, LE, loadrs=0
 	mov rp=r14				// I0   set the real return addr
 	and r3=_TIF_SYSCALL_TRACEAUDIT,r3	// A
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
index d3a41d5f8d12..ddeab4e36fd5 100644
--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@ -1002,6 +1002,26 @@ GLOBAL_ENTRY(sched_clock)
 	br.ret.sptk.many rp
 END(sched_clock)
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+GLOBAL_ENTRY(cycle_to_cputime)
+	alloc r16=ar.pfs,1,0,0,0
+	addl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
+	;;
+	ldf8 f8=[r8]
+	;;
+	setf.sig f9=r32
+	;;
+	xmpy.lu f10=f9,f8	// calculate low 64 bits of 128-bit product	(4 cyc)
+	xmpy.hu f11=f9,f8	// calculate high 64 bits of 128-bit product
+	;;
+	getf.sig r8=f10		//						(5 cyc)
+	getf.sig r9=f11
+	;;
+	shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
+	br.ret.sptk.many rp
+END(cycle_to_cputime)
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+
 GLOBAL_ENTRY(start_kernel_thread)
 	.prologue
 	.save rp, r0				// this is the end of the call-chain
diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c
index 8e7193d55528..6da1f20d7372 100644
--- a/arch/ia64/kernel/ia64_ksyms.c
+++ b/arch/ia64/kernel/ia64_ksyms.c
@@ -19,12 +19,6 @@ EXPORT_SYMBOL_GPL(empty_zero_page);
 EXPORT_SYMBOL(ip_fast_csum);		/* hand-coded assembly */
 EXPORT_SYMBOL(csum_ipv6_magic);
 
-#include <asm/semaphore.h>
-EXPORT_SYMBOL(__down);
-EXPORT_SYMBOL(__down_interruptible);
-EXPORT_SYMBOL(__down_trylock);
-EXPORT_SYMBOL(__up);
-
 #include <asm/page.h>
 EXPORT_SYMBOL(clear_page);
 
diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c
index d8be23fbe6bc..5538471e8d68 100644
--- a/arch/ia64/kernel/irq_ia64.c
+++ b/arch/ia64/kernel/irq_ia64.c
@@ -472,7 +472,7 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
 			static unsigned char count;
 			static long last_time;
 
-			if (jiffies - last_time > 5*HZ)
+			if (time_after(jiffies, last_time + 5 * HZ))
 				count = 0;
 			if (++count < 5) {
 				last_time = jiffies;
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index 34f44d8be00d..6678c49daba3 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -805,8 +805,13 @@ ENTRY(break_fault)
 
 (p8)	adds r28=16,r28				// A    switch cr.iip to next bundle
 (p9)	adds r8=1,r8				// A    increment ei to next slot
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	;;
+	mov b6=r30				// I0   setup syscall handler branch reg early
+#else
 	nop.i 0
 	;;
+#endif
 
 	mov.m r25=ar.unat			// M2 (5 cyc)
 	dep r29=r8,r29,41,2			// I0   insert new ei into cr.ipsr
@@ -817,7 +822,11 @@ ENTRY(break_fault)
 	//
 ///////////////////////////////////////////////////////////////////////
 	st1 [r16]=r0				// M2|3 clear current->thread.on_ustack flag
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	mov.m r30=ar.itc			// M    get cycle for accounting
+#else
 	mov b6=r30				// I0   setup syscall handler branch reg early
+#endif
 	cmp.eq pKStk,pUStk=r0,r17		// A    were we on kernel stacks already?
 
 	and r9=_TIF_SYSCALL_TRACEAUDIT,r9	// A    mask trace or audit
@@ -829,6 +838,30 @@ ENTRY(break_fault)
 	cmp.eq p14,p0=r9,r0			// A    are syscalls being traced/audited?
 	br.call.sptk.many b7=ia64_syscall_setup	// B
 1:
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	// mov.m r30=ar.itc is called in advance, and r13 is current
+	add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13	// A
+	add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13	// A
+(pKStk)	br.cond.spnt .skip_accounting		// B	unlikely skip
+	;;
+	ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP	// M  get last stamp
+	ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE	// M  time at leave
+	;;
+	ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME	// M  cumulated stime
+	ld8 r21=[r17]				// M  cumulated utime
+	sub r22=r19,r18				// A  stime before leave
+	;;
+	st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP	// M  update stamp
+	sub r18=r30,r19				// A  elapsed time in user
+	;;
+	add r20=r20,r22				// A  sum stime
+	add r21=r21,r18				// A  sum utime
+	;;
+	st8 [r16]=r20				// M  update stime
+	st8 [r17]=r21				// M  update utime
+	;;
+.skip_accounting:
+#endif
 	mov ar.rsc=0x3				// M2   set eager mode, pl 0, LE, loadrs=0
 	nop 0
 	bsw.1					// B (6 cyc) regs are saved, switch to bank 1
@@ -928,6 +961,7 @@ END(interrupt)
 	 *	- r27: saved ar.rsc
 	 *	- r28: saved cr.iip
 	 *	- r29: saved cr.ipsr
+	 *	- r30: ar.itc for accounting (don't touch)
 	 *	- r31: saved pr
 	 *	-  b0: original contents (to be saved)
 	 * On exit:
@@ -1090,6 +1124,41 @@ END(dispatch_illegal_op_fault)
 	DBG_FAULT(16)
 	FAULT(16)
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	/*
+	 * There is no particular reason for this code to be here, other than
+	 * that there happens to be space here that would go unused otherwise.
+	 * If this fault ever gets "unreserved", simply moved the following
+	 * code to a more suitable spot...
+	 *
+	 * account_sys_enter is called from SAVE_MIN* macros if accounting is
+	 * enabled and if the macro is entered from user mode.
+	 */
+ENTRY(account_sys_enter)
+	// mov.m r20=ar.itc is called in advance, and r13 is current
+	add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13
+	add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13
+	;;
+	ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP	// time at last check in kernel
+	ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE	// time at left from kernel
+        ;;
+	ld8 r23=[r16],TI_AC_STAMP-TI_AC_STIME	// cumulated stime
+	ld8 r21=[r17]				// cumulated utime
+	sub r22=r19,r18				// stime before leave kernel
+	;;
+	st8 [r16]=r20,TI_AC_STIME-TI_AC_STAMP	// update stamp
+	sub r18=r20,r19				// elapsed time in user mode
+	;;
+	add r23=r23,r22				// sum stime
+	add r21=r21,r18				// sum utime
+	;;
+	st8 [r16]=r23				// update stime
+	st8 [r17]=r21				// update utime
+	;;
+	br.ret.sptk.many rp
+END(account_sys_enter)
+#endif
+
 	.org ia64_ivt+0x4400
 /////////////////////////////////////////////////////////////////////////////////////////
 // 0x4400 Entry 17 (size 64 bundles) Reserved
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index 8d9a446a0d17..233434f4f88f 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -78,6 +78,20 @@ static enum instruction_type bundle_encoding[32][3] = {
   { u, u, u },  			/* 1F */
 };
 
+/* Insert a long branch code */
+static void __kprobes set_brl_inst(void *from, void *to)
+{
+	s64 rel = ((s64) to - (s64) from) >> 4;
+	bundle_t *brl;
+	brl = (bundle_t *) ((u64) from & ~0xf);
+	brl->quad0.template = 0x05;	/* [MLX](stop) */
+	brl->quad0.slot0 = NOP_M_INST;	/* nop.m 0x0 */
+	brl->quad0.slot1_p0 = ((rel >> 20) & 0x7fffffffff) << 2;
+	brl->quad1.slot1_p1 = (((rel >> 20) & 0x7fffffffff) << 2) >> (64 - 46);
+	/* brl.cond.sptk.many.clr rel<<4 (qp=0) */
+	brl->quad1.slot2 = BRL_INST(rel >> 59, rel & 0xfffff);
+}
+
 /*
  * In this function we check to see if the instruction
  * is IP relative instruction and update the kprobe
@@ -496,6 +510,77 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
 	regs->b0 = ((struct fnptr *)kretprobe_trampoline)->ip;
 }
 
+/* Check the instruction in the slot is break */
+static int __kprobes __is_ia64_break_inst(bundle_t *bundle, uint slot)
+{
+	unsigned int major_opcode;
+	unsigned int template = bundle->quad0.template;
+	unsigned long kprobe_inst;
+
+	/* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */
+	if (slot == 1 && bundle_encoding[template][1] == L)
+		slot++;
+
+	/* Get Kprobe probe instruction at given slot*/
+	get_kprobe_inst(bundle, slot, &kprobe_inst, &major_opcode);
+
+	/* For break instruction,
+	 * Bits 37:40 Major opcode to be zero
+	 * Bits 27:32 X6 to be zero
+	 * Bits 32:35 X3 to be zero
+	 */
+	if (major_opcode || ((kprobe_inst >> 27) & 0x1FF)) {
+		/* Not a break instruction */
+		return 0;
+	}
+
+	/* Is a break instruction */
+	return 1;
+}
+
+/*
+ * In this function, we check whether the target bundle modifies IP or
+ * it triggers an exception. If so, it cannot be boostable.
+ */
+static int __kprobes can_boost(bundle_t *bundle, uint slot,
+			       unsigned long bundle_addr)
+{
+	unsigned int template = bundle->quad0.template;
+
+	do {
+		if (search_exception_tables(bundle_addr + slot) ||
+		    __is_ia64_break_inst(bundle, slot))
+			return 0;	/* exception may occur in this bundle*/
+	} while ((++slot) < 3);
+	template &= 0x1e;
+	if (template >= 0x10 /* including B unit */ ||
+	    template == 0x04 /* including X unit */ ||
+	    template == 0x06) /* undefined */
+		return 0;
+
+	return 1;
+}
+
+/* Prepare long jump bundle and disables other boosters if need */
+static void __kprobes prepare_booster(struct kprobe *p)
+{
+	unsigned long addr = (unsigned long)p->addr & ~0xFULL;
+	unsigned int slot = (unsigned long)p->addr & 0xf;
+	struct kprobe *other_kp;
+
+	if (can_boost(&p->ainsn.insn[0].bundle, slot, addr)) {
+		set_brl_inst(&p->ainsn.insn[1].bundle, (bundle_t *)addr + 1);
+		p->ainsn.inst_flag |= INST_FLAG_BOOSTABLE;
+	}
+
+	/* disables boosters in previous slots */
+	for (; addr < (unsigned long)p->addr; addr++) {
+		other_kp = get_kprobe((void *)addr);
+		if (other_kp)
+			other_kp->ainsn.inst_flag &= ~INST_FLAG_BOOSTABLE;
+	}
+}
+
 int __kprobes arch_prepare_kprobe(struct kprobe *p)
 {
 	unsigned long addr = (unsigned long) p->addr;
@@ -530,6 +615,8 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
 
 	prepare_break_inst(template, slot, major_opcode, kprobe_inst, p, qp);
 
+	prepare_booster(p);
+
 	return 0;
 }
 
@@ -543,7 +630,9 @@ void __kprobes arch_arm_kprobe(struct kprobe *p)
 	src = &p->opcode.bundle;
 
 	flush_icache_range((unsigned long)p->ainsn.insn,
-			(unsigned long)p->ainsn.insn + sizeof(kprobe_opcode_t));
+			   (unsigned long)p->ainsn.insn +
+			   sizeof(kprobe_opcode_t) * MAX_INSN_SIZE);
+
 	switch (p->ainsn.slot) {
 		case 0:
 			dest->quad0.slot0 = src->quad0.slot0;
@@ -584,13 +673,13 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
 void __kprobes arch_remove_kprobe(struct kprobe *p)
 {
 	mutex_lock(&kprobe_mutex);
-	free_insn_slot(p->ainsn.insn, 0);
+	free_insn_slot(p->ainsn.insn, p->ainsn.inst_flag & INST_FLAG_BOOSTABLE);
 	mutex_unlock(&kprobe_mutex);
 }
 /*
  * We are resuming execution after a single step fault, so the pt_regs
  * structure reflects the register state after we executed the instruction
- * located in the kprobe (p->ainsn.insn.bundle).  We still need to adjust
+ * located in the kprobe (p->ainsn.insn->bundle).  We still need to adjust
  * the ip to point back to the original stack address. To set the IP address
  * to original stack address, handle the case where we need to fixup the
  * relative IP address and/or fixup branch register.
@@ -607,7 +696,7 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
 	if (slot == 1 && bundle_encoding[template][1] == L)
 		slot = 2;
 
-	if (p->ainsn.inst_flag) {
+	if (p->ainsn.inst_flag & ~INST_FLAG_BOOSTABLE) {
 
 		if (p->ainsn.inst_flag & INST_FLAG_FIX_RELATIVE_IP_ADDR) {
 			/* Fix relative IP address */
@@ -686,33 +775,12 @@ static void __kprobes prepare_ss(struct kprobe *p, struct pt_regs *regs)
 static int __kprobes is_ia64_break_inst(struct pt_regs *regs)
 {
 	unsigned int slot = ia64_psr(regs)->ri;
-	unsigned int template, major_opcode;
-	unsigned long kprobe_inst;
 	unsigned long *kprobe_addr = (unsigned long *)regs->cr_iip;
 	bundle_t bundle;
 
 	memcpy(&bundle, kprobe_addr, sizeof(bundle_t));
-	template = bundle.quad0.template;
-
-	/* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */
-	if (slot == 1 && bundle_encoding[template][1] == L)
-		slot++;
 
-	/* Get Kprobe probe instruction at given slot*/
-	get_kprobe_inst(&bundle, slot, &kprobe_inst, &major_opcode);
-
-	/* For break instruction,
-	 * Bits 37:40 Major opcode to be zero
-	 * Bits 27:32 X6 to be zero
-	 * Bits 32:35 X3 to be zero
-	 */
-	if (major_opcode || ((kprobe_inst >> 27) & 0x1FF) ) {
-		/* Not a break instruction */
-		return 0;
-	}
-
-	/* Is a break instruction */
-	return 1;
+	return __is_ia64_break_inst(&bundle, slot);
 }
 
 static int __kprobes pre_kprobes_handler(struct die_args *args)
@@ -802,6 +870,19 @@ static int __kprobes pre_kprobes_handler(struct die_args *args)
 		return 1;
 
 ss_probe:
+#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PM)
+	if (p->ainsn.inst_flag == INST_FLAG_BOOSTABLE && !p->post_handler) {
+		/* Boost up -- we can execute copied instructions directly */
+		ia64_psr(regs)->ri = p->ainsn.slot;
+		regs->cr_iip = (unsigned long)&p->ainsn.insn->bundle & ~0xFULL;
+		/* turn single stepping off */
+		ia64_psr(regs)->ss = 0;
+
+		reset_current_kprobe();
+		preempt_enable_no_resched();
+		return 1;
+	}
+#endif
 	prepare_ss(p, regs);
 	kcb->kprobe_status = KPROBE_HIT_SS;
 	return 1;
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 6c18221dba36..e51bced3b0fa 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -69,6 +69,7 @@
  * 2007-04-27 Russ Anderson <rja@sgi.com>
  *	      Support multiple cpus going through OS_MCA in the same event.
  */
+#include <linux/jiffies.h>
 #include <linux/types.h>
 #include <linux/init.h>
 #include <linux/sched.h>
@@ -97,6 +98,7 @@
 
 #include <asm/irq.h>
 #include <asm/hw_irq.h>
+#include <asm/tlb.h>
 
 #include "mca_drv.h"
 #include "entry.h"
@@ -112,6 +114,7 @@ DEFINE_PER_CPU(u64, ia64_mca_data); /* == __per_cpu_mca[smp_processor_id()] */
 DEFINE_PER_CPU(u64, ia64_mca_per_cpu_pte); /* PTE to map per-CPU area */
 DEFINE_PER_CPU(u64, ia64_mca_pal_pte);	    /* PTE to map PAL code */
 DEFINE_PER_CPU(u64, ia64_mca_pal_base);    /* vaddr PAL code granule */
+DEFINE_PER_CPU(u64, ia64_mca_tr_reload);   /* Flag for TR reload */
 
 unsigned long __per_cpu_mca[NR_CPUS];
 
@@ -293,7 +296,8 @@ static void ia64_mlogbuf_dump_from_init(void)
 	if (mlogbuf_finished)
 		return;
 
-	if (mlogbuf_timestamp && (mlogbuf_timestamp + 30*HZ > jiffies)) {
+	if (mlogbuf_timestamp &&
+			time_before(jiffies, mlogbuf_timestamp + 30 * HZ)) {
 		printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT "
 			" and the system seems to be messed up.\n");
 		ia64_mlogbuf_finish(0);
@@ -1182,6 +1186,49 @@ all_in:
 	return;
 }
 
+/*  mca_insert_tr
+ *
+ *  Switch rid when TR reload and needed!
+ *  iord: 1: itr, 2: itr;
+ *
+*/
+static void mca_insert_tr(u64 iord)
+{
+
+	int i;
+	u64 old_rr;
+	struct ia64_tr_entry *p;
+	unsigned long psr;
+	int cpu = smp_processor_id();
+
+	psr = ia64_clear_ic();
+	for (i = IA64_TR_ALLOC_BASE; i < IA64_TR_ALLOC_MAX; i++) {
+		p = &__per_cpu_idtrs[cpu][iord-1][i];
+		if (p->pte & 0x1) {
+			old_rr = ia64_get_rr(p->ifa);
+			if (old_rr != p->rr) {
+				ia64_set_rr(p->ifa, p->rr);
+				ia64_srlz_d();
+			}
+			ia64_ptr(iord, p->ifa, p->itir >> 2);
+			ia64_srlz_i();
+			if (iord & 0x1) {
+				ia64_itr(0x1, i, p->ifa, p->pte, p->itir >> 2);
+				ia64_srlz_i();
+			}
+			if (iord & 0x2) {
+				ia64_itr(0x2, i, p->ifa, p->pte, p->itir >> 2);
+				ia64_srlz_i();
+			}
+			if (old_rr != p->rr) {
+				ia64_set_rr(p->ifa, old_rr);
+				ia64_srlz_d();
+			}
+		}
+	}
+	ia64_set_psr(psr);
+}
+
 /*
  * ia64_mca_handler
  *
@@ -1266,16 +1313,17 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
 	} else {
 		/* Dump buffered message to console */
 		ia64_mlogbuf_finish(1);
-#ifdef CONFIG_KEXEC
-		atomic_set(&kdump_in_progress, 1);
-		monarch_cpu = -1;
-#endif
 	}
+
+	if (__get_cpu_var(ia64_mca_tr_reload)) {
+		mca_insert_tr(0x1); /*Reload dynamic itrs*/
+		mca_insert_tr(0x2); /*Reload dynamic itrs*/
+	}
+
 	if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, (long)&nd, 0, recover)
 			== NOTIFY_STOP)
 		ia64_mca_spin(__func__);
 
-
 	if (atomic_dec_return(&mca_count) > 0) {
 		int i;
 
diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S
index 8bc7d259e0c6..a06d46548ff9 100644
--- a/arch/ia64/kernel/mca_asm.S
+++ b/arch/ia64/kernel/mca_asm.S
@@ -219,8 +219,13 @@ ia64_reload_tr:
 	mov r20=IA64_TR_CURRENT_STACK
 	;;
 	itr.d dtr[r20]=r16
+	GET_THIS_PADDR(r2, ia64_mca_tr_reload)
+	mov r18 = 1
 	;;
 	srlz.d
+	;;
+	st8 [r2] =r18
+	;;
 
 done_tlb_purge_and_reload:
 
diff --git a/arch/ia64/kernel/minstate.h b/arch/ia64/kernel/minstate.h
index c9ac8bada786..7c548ac52bbc 100644
--- a/arch/ia64/kernel/minstate.h
+++ b/arch/ia64/kernel/minstate.h
@@ -3,6 +3,18 @@
 
 #include "entry.h"
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+/* read ar.itc in advance, and use it before leaving bank 0 */
+#define ACCOUNT_GET_STAMP				\
+(pUStk) mov.m r20=ar.itc;
+#define ACCOUNT_SYS_ENTER				\
+(pUStk) br.call.spnt rp=account_sys_enter		\
+	;;
+#else
+#define ACCOUNT_GET_STAMP
+#define ACCOUNT_SYS_ENTER
+#endif
+
 /*
  * DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
  * the minimum state necessary that allows us to turn psr.ic back
@@ -122,11 +134,13 @@
 	;;											\
 .mem.offset 0,0; st8.spill [r16]=r2,16;								\
 .mem.offset 8,0; st8.spill [r17]=r3,16;								\
+	ACCOUNT_GET_STAMP									\
 	adds r2=IA64_PT_REGS_R16_OFFSET,r1;							\
 	;;											\
 	EXTRA;											\
 	movl r1=__gp;		/* establish kernel global pointer */				\
 	;;											\
+	ACCOUNT_SYS_ENTER									\
 	bsw.1;			/* switch back to bank 1 (must be last in insn group) */	\
 	;;
 
diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c
index a78b45f5fe2f..c93420c97409 100644
--- a/arch/ia64/kernel/numa.c
+++ b/arch/ia64/kernel/numa.c
@@ -73,7 +73,7 @@ void __init build_cpu_to_node_map(void)
 	for(node=0; node < MAX_NUMNODES; node++)
 		cpus_clear(node_to_cpu_mask[node]);
 
-	for(cpu = 0; cpu < NR_CPUS; ++cpu) {
+	for_each_possible_early_cpu(cpu) {
 		node = -1;
 		for (i = 0; i < NR_CPUS; ++i)
 			if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) {
diff --git a/arch/ia64/kernel/patch.c b/arch/ia64/kernel/patch.c
index 2cb9425e0421..e0dca8743dbb 100644
--- a/arch/ia64/kernel/patch.c
+++ b/arch/ia64/kernel/patch.c
@@ -135,10 +135,10 @@ ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)
 
 	while (offp < (s32 *) end) {
 		wp = (u64 *) ia64_imva((char *) offp + *offp);
-		wp[0] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */
-		wp[1] = 0x0004000000000200UL;
-		wp[2] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */
-		wp[3] = 0x0084006880000200UL;
+		wp[0] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */
+		wp[1] = 0x0084006880000200UL;
+		wp[2] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */
+		wp[3] = 0x0004000000000200UL;
 		ia64_fc(wp); ia64_fc(wp + 2);
 		++offp;
 	}
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index a2aabfdc80d9..d1d24f4598da 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -4204,10 +4204,10 @@ pfm_check_task_exist(pfm_context_t *ctx)
 	do_each_thread (g, t) {
 		if (t->thread.pfm_context == ctx) {
 			ret = 0;
-			break;
+			goto out;
 		}
 	} while_each_thread (g, t);
-
+out:
 	read_unlock(&tasklist_lock);
 
 	DPRINT(("pfm_check_task_exist: ret=%d ctx=%p\n", ret, ctx));
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 49937a383b23..a5ea817cbcbf 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -625,21 +625,6 @@ do_dump_fpu (struct unw_frame_info *info, void *arg)
 	do_dump_task_fpu(current, info, arg);
 }
 
-int
-dump_task_regs(struct task_struct *task, elf_gregset_t *regs)
-{
-	struct unw_frame_info tcore_info;
-
-	if (current == task) {
-		unw_init_running(do_copy_regs, regs);
-	} else {
-		memset(&tcore_info, 0, sizeof(tcore_info));
-		unw_init_from_blocked_task(&tcore_info, task);
-		do_copy_task_regs(task, &tcore_info, regs);
-	}
-	return 1;
-}
-
 void
 ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst)
 {
@@ -647,21 +632,6 @@ ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst)
 }
 
 int
-dump_task_fpu (struct task_struct *task, elf_fpregset_t *dst)
-{
-	struct unw_frame_info tcore_info;
-
-	if (current == task) {
-		unw_init_running(do_dump_fpu, dst);
-	} else {
-		memset(&tcore_info, 0, sizeof(tcore_info));
-		unw_init_from_blocked_task(&tcore_info, task);
-		do_dump_task_fpu(task, &tcore_info, dst);
-	}
-	return 1;
-}
-
-int
 dump_fpu (struct pt_regs *pt, elf_fpregset_t dst)
 {
 	unw_init_running(do_dump_fpu, dst);
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index ab784ec4319d..2a9943b5947f 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -3,6 +3,9 @@
  *
  * Copyright (C) 1999-2005 Hewlett-Packard Co
  *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 2006 Intel Co
+ *  2006-08-12	- IA64 Native Utrace implementation support added by
+ *	Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
  *
  * Derived from the x86 and Alpha versions.
  */
@@ -17,6 +20,8 @@
 #include <linux/security.h>
 #include <linux/audit.h>
 #include <linux/signal.h>
+#include <linux/regset.h>
+#include <linux/elf.h>
 
 #include <asm/pgtable.h>
 #include <asm/processor.h>
@@ -740,25 +745,6 @@ ia64_sync_fph (struct task_struct *task)
 	psr->dfh = 1;
 }
 
-static int
-access_fr (struct unw_frame_info *info, int regnum, int hi,
-	   unsigned long *data, int write_access)
-{
-	struct ia64_fpreg fpval;
-	int ret;
-
-	ret = unw_get_fr(info, regnum, &fpval);
-	if (ret < 0)
-		return ret;
-
-	if (write_access) {
-		fpval.u.bits[hi] = *data;
-		ret = unw_set_fr(info, regnum, fpval);
-	} else
-		*data = fpval.u.bits[hi];
-	return ret;
-}
-
 /*
  * Change the machine-state of CHILD such that it will return via the normal
  * kernel exit-path, rather than the syscall-exit path.
@@ -860,309 +846,7 @@ access_nat_bits (struct task_struct *child, struct pt_regs *pt,
 
 static int
 access_uarea (struct task_struct *child, unsigned long addr,
-	      unsigned long *data, int write_access)
-{
-	unsigned long *ptr, regnum, urbs_end, cfm;
-	struct switch_stack *sw;
-	struct pt_regs *pt;
-#	define pt_reg_addr(pt, reg)	((void *)			    \
-					 ((unsigned long) (pt)		    \
-					  + offsetof(struct pt_regs, reg)))
-
-
-	pt = task_pt_regs(child);
-	sw = (struct switch_stack *) (child->thread.ksp + 16);
-
-	if ((addr & 0x7) != 0) {
-		dprintk("ptrace: unaligned register address 0x%lx\n", addr);
-		return -1;
-	}
-
-	if (addr < PT_F127 + 16) {
-		/* accessing fph */
-		if (write_access)
-			ia64_sync_fph(child);
-		else
-			ia64_flush_fph(child);
-		ptr = (unsigned long *)
-			((unsigned long) &child->thread.fph + addr);
-	} else if ((addr >= PT_F10) && (addr < PT_F11 + 16)) {
-		/* scratch registers untouched by kernel (saved in pt_regs) */
-		ptr = pt_reg_addr(pt, f10) + (addr - PT_F10);
-	} else if (addr >= PT_F12 && addr < PT_F15 + 16) {
-		/*
-		 * Scratch registers untouched by kernel (saved in
-		 * switch_stack).
-		 */
-		ptr = (unsigned long *) ((long) sw
-					 + (addr - PT_NAT_BITS - 32));
-	} else if (addr < PT_AR_LC + 8) {
-		/* preserved state: */
-		struct unw_frame_info info;
-		char nat = 0;
-		int ret;
-
-		unw_init_from_blocked_task(&info, child);
-		if (unw_unwind_to_user(&info) < 0)
-			return -1;
-
-		switch (addr) {
-		      case PT_NAT_BITS:
-			return access_nat_bits(child, pt, &info,
-					       data, write_access);
-
-		      case PT_R4: case PT_R5: case PT_R6: case PT_R7:
-			if (write_access) {
-				/* read NaT bit first: */
-				unsigned long dummy;
-
-				ret = unw_get_gr(&info, (addr - PT_R4)/8 + 4,
-						 &dummy, &nat);
-				if (ret < 0)
-					return ret;
-			}
-			return unw_access_gr(&info, (addr - PT_R4)/8 + 4, data,
-					     &nat, write_access);
-
-		      case PT_B1: case PT_B2: case PT_B3:
-		      case PT_B4: case PT_B5:
-			return unw_access_br(&info, (addr - PT_B1)/8 + 1, data,
-					     write_access);
-
-		      case PT_AR_EC:
-			return unw_access_ar(&info, UNW_AR_EC, data,
-					     write_access);
-
-		      case PT_AR_LC:
-			return unw_access_ar(&info, UNW_AR_LC, data,
-					     write_access);
-
-		      default:
-			if (addr >= PT_F2 && addr < PT_F5 + 16)
-				return access_fr(&info, (addr - PT_F2)/16 + 2,
-						 (addr & 8) != 0, data,
-						 write_access);
-			else if (addr >= PT_F16 && addr < PT_F31 + 16)
-				return access_fr(&info,
-						 (addr - PT_F16)/16 + 16,
-						 (addr & 8) != 0,
-						 data, write_access);
-			else {
-				dprintk("ptrace: rejecting access to register "
-					"address 0x%lx\n", addr);
-				return -1;
-			}
-		}
-	} else if (addr < PT_F9+16) {
-		/* scratch state */
-		switch (addr) {
-		      case PT_AR_BSP:
-			/*
-			 * By convention, we use PT_AR_BSP to refer to
-			 * the end of the user-level backing store.
-			 * Use ia64_rse_skip_regs(PT_AR_BSP, -CFM.sof)
-			 * to get the real value of ar.bsp at the time
-			 * the kernel was entered.
-			 *
-			 * Furthermore, when changing the contents of
-			 * PT_AR_BSP (or PT_CFM) while the task is
-			 * blocked in a system call, convert the state
-			 * so that the non-system-call exit
-			 * path is used.  This ensures that the proper
-			 * state will be picked up when resuming
-			 * execution.  However, it *also* means that
-			 * once we write PT_AR_BSP/PT_CFM, it won't be
-			 * possible to modify the syscall arguments of
-			 * the pending system call any longer.  This
-			 * shouldn't be an issue because modifying
-			 * PT_AR_BSP/PT_CFM generally implies that
-			 * we're either abandoning the pending system
-			 * call or that we defer it's re-execution
-			 * (e.g., due to GDB doing an inferior
-			 * function call).
-			 */
-			urbs_end = ia64_get_user_rbs_end(child, pt, &cfm);
-			if (write_access) {
-				if (*data != urbs_end) {
-					if (in_syscall(pt))
-						convert_to_non_syscall(child,
-								       pt,
-								       cfm);
-					/*
-					 * Simulate user-level write
-					 * of ar.bsp:
-					 */
-					pt->loadrs = 0;
-					pt->ar_bspstore = *data;
-				}
-			} else
-				*data = urbs_end;
-			return 0;
-
-		      case PT_CFM:
-			urbs_end = ia64_get_user_rbs_end(child, pt, &cfm);
-			if (write_access) {
-				if (((cfm ^ *data) & PFM_MASK) != 0) {
-					if (in_syscall(pt))
-						convert_to_non_syscall(child,
-								       pt,
-								       cfm);
-					pt->cr_ifs = ((pt->cr_ifs & ~PFM_MASK)
-						      | (*data & PFM_MASK));
-				}
-			} else
-				*data = cfm;
-			return 0;
-
-		      case PT_CR_IPSR:
-			if (write_access) {
-				unsigned long tmp = *data;
-				/* psr.ri==3 is a reserved value: SDM 2:25 */
-				if ((tmp & IA64_PSR_RI) == IA64_PSR_RI)
-					tmp &= ~IA64_PSR_RI;
-				pt->cr_ipsr = ((tmp & IPSR_MASK)
-					       | (pt->cr_ipsr & ~IPSR_MASK));
-			} else
-				*data = (pt->cr_ipsr & IPSR_MASK);
-			return 0;
-
-		      case PT_AR_RSC:
-			if (write_access)
-				pt->ar_rsc = *data | (3 << 2); /* force PL3 */
-			else
-				*data = pt->ar_rsc;
-			return 0;
-
-		      case PT_AR_RNAT:
-			ptr = pt_reg_addr(pt, ar_rnat);
-			break;
-		      case PT_R1:
-			ptr = pt_reg_addr(pt, r1);
-			break;
-		      case PT_R2:  case PT_R3:
-			ptr = pt_reg_addr(pt, r2) + (addr - PT_R2);
-			break;
-		      case PT_R8:  case PT_R9:  case PT_R10: case PT_R11:
-			ptr = pt_reg_addr(pt, r8) + (addr - PT_R8);
-			break;
-		      case PT_R12: case PT_R13:
-			ptr = pt_reg_addr(pt, r12) + (addr - PT_R12);
-			break;
-		      case PT_R14:
-			ptr = pt_reg_addr(pt, r14);
-			break;
-		      case PT_R15:
-			ptr = pt_reg_addr(pt, r15);
-			break;
-		      case PT_R16: case PT_R17: case PT_R18: case PT_R19:
-		      case PT_R20: case PT_R21: case PT_R22: case PT_R23:
-		      case PT_R24: case PT_R25: case PT_R26: case PT_R27:
-		      case PT_R28: case PT_R29: case PT_R30: case PT_R31:
-			ptr = pt_reg_addr(pt, r16) + (addr - PT_R16);
-			break;
-		      case PT_B0:
-			ptr = pt_reg_addr(pt, b0);
-			break;
-		      case PT_B6:
-			ptr = pt_reg_addr(pt, b6);
-			break;
-		      case PT_B7:
-			ptr = pt_reg_addr(pt, b7);
-			break;
-		      case PT_F6:  case PT_F6+8: case PT_F7: case PT_F7+8:
-		      case PT_F8:  case PT_F8+8: case PT_F9: case PT_F9+8:
-			ptr = pt_reg_addr(pt, f6) + (addr - PT_F6);
-			break;
-		      case PT_AR_BSPSTORE:
-			ptr = pt_reg_addr(pt, ar_bspstore);
-			break;
-		      case PT_AR_UNAT:
-			ptr = pt_reg_addr(pt, ar_unat);
-			break;
-		      case PT_AR_PFS:
-			ptr = pt_reg_addr(pt, ar_pfs);
-			break;
-		      case PT_AR_CCV:
-			ptr = pt_reg_addr(pt, ar_ccv);
-			break;
-		      case PT_AR_FPSR:
-			ptr = pt_reg_addr(pt, ar_fpsr);
-			break;
-		      case PT_CR_IIP:
-			ptr = pt_reg_addr(pt, cr_iip);
-			break;
-		      case PT_PR:
-			ptr = pt_reg_addr(pt, pr);
-			break;
-			/* scratch register */
-
-		      default:
-			/* disallow accessing anything else... */
-			dprintk("ptrace: rejecting access to register "
-				"address 0x%lx\n", addr);
-			return -1;
-		}
-	} else if (addr <= PT_AR_SSD) {
-		ptr = pt_reg_addr(pt, ar_csd) + (addr - PT_AR_CSD);
-	} else {
-		/* access debug registers */
-
-		if (addr >= PT_IBR) {
-			regnum = (addr - PT_IBR) >> 3;
-			ptr = &child->thread.ibr[0];
-		} else {
-			regnum = (addr - PT_DBR) >> 3;
-			ptr = &child->thread.dbr[0];
-		}
-
-		if (regnum >= 8) {
-			dprintk("ptrace: rejecting access to register "
-				"address 0x%lx\n", addr);
-			return -1;
-		}
-#ifdef CONFIG_PERFMON
-		/*
-		 * Check if debug registers are used by perfmon. This
-		 * test must be done once we know that we can do the
-		 * operation, i.e. the arguments are all valid, but
-		 * before we start modifying the state.
-		 *
-		 * Perfmon needs to keep a count of how many processes
-		 * are trying to modify the debug registers for system
-		 * wide monitoring sessions.
-		 *
-		 * We also include read access here, because they may
-		 * cause the PMU-installed debug register state
-		 * (dbr[], ibr[]) to be reset. The two arrays are also
-		 * used by perfmon, but we do not use
-		 * IA64_THREAD_DBG_VALID. The registers are restored
-		 * by the PMU context switch code.
-		 */
-		if (pfm_use_debug_registers(child)) return -1;
-#endif
-
-		if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) {
-			child->thread.flags |= IA64_THREAD_DBG_VALID;
-			memset(child->thread.dbr, 0,
-			       sizeof(child->thread.dbr));
-			memset(child->thread.ibr, 0,
-			       sizeof(child->thread.ibr));
-		}
-
-		ptr += regnum;
-
-		if ((regnum & 1) && write_access) {
-			/* don't let the user set kernel-level breakpoints: */
-			*ptr = *data & ~(7UL << 56);
-			return 0;
-		}
-	}
-	if (write_access)
-		*ptr = *data;
-	else
-		*data = *ptr;
-	return 0;
-}
+	      unsigned long *data, int write_access);
 
 static long
 ptrace_getregs (struct task_struct *child, struct pt_all_user_regs __user *ppr)
@@ -1626,3 +1310,892 @@ syscall_trace_leave (long arg0, long arg1, long arg2, long arg3,
 	if (test_thread_flag(TIF_RESTORE_RSE))
 		ia64_sync_krbs();
 }
+
+/* Utrace implementation starts here */
+struct regset_get {
+	void *kbuf;
+	void __user *ubuf;
+};
+
+struct regset_set {
+	const void *kbuf;
+	const void __user *ubuf;
+};
+
+struct regset_getset {
+	struct task_struct *target;
+	const struct user_regset *regset;
+	union {
+		struct regset_get get;
+		struct regset_set set;
+	} u;
+	unsigned int pos;
+	unsigned int count;
+	int ret;
+};
+
+static int
+access_elf_gpreg(struct task_struct *target, struct unw_frame_info *info,
+		unsigned long addr, unsigned long *data, int write_access)
+{
+	struct pt_regs *pt;
+	unsigned long *ptr = NULL;
+	int ret;
+	char nat = 0;
+
+	pt = task_pt_regs(target);
+	switch (addr) {
+	case ELF_GR_OFFSET(1):
+		ptr = &pt->r1;
+		break;
+	case ELF_GR_OFFSET(2):
+	case ELF_GR_OFFSET(3):
+		ptr = (void *)&pt->r2 + (addr - ELF_GR_OFFSET(2));
+		break;
+	case ELF_GR_OFFSET(4) ... ELF_GR_OFFSET(7):
+		if (write_access) {
+			/* read NaT bit first: */
+			unsigned long dummy;
+
+			ret = unw_get_gr(info, addr/8, &dummy, &nat);
+			if (ret < 0)
+				return ret;
+		}
+		return unw_access_gr(info, addr/8, data, &nat, write_access);
+	case ELF_GR_OFFSET(8) ... ELF_GR_OFFSET(11):
+		ptr = (void *)&pt->r8 + addr - ELF_GR_OFFSET(8);
+		break;
+	case ELF_GR_OFFSET(12):
+	case ELF_GR_OFFSET(13):
+		ptr = (void *)&pt->r12 + addr - ELF_GR_OFFSET(12);
+		break;
+	case ELF_GR_OFFSET(14):
+		ptr = &pt->r14;
+		break;
+	case ELF_GR_OFFSET(15):
+		ptr = &pt->r15;
+	}
+	if (write_access)
+		*ptr = *data;
+	else
+		*data = *ptr;
+	return 0;
+}
+
+static int
+access_elf_breg(struct task_struct *target, struct unw_frame_info *info,
+		unsigned long addr, unsigned long *data, int write_access)
+{
+	struct pt_regs *pt;
+	unsigned long *ptr = NULL;
+
+	pt = task_pt_regs(target);
+	switch (addr) {
+	case ELF_BR_OFFSET(0):
+		ptr = &pt->b0;
+		break;
+	case ELF_BR_OFFSET(1) ... ELF_BR_OFFSET(5):
+		return unw_access_br(info, (addr - ELF_BR_OFFSET(0))/8,
+				     data, write_access);
+	case ELF_BR_OFFSET(6):
+		ptr = &pt->b6;
+		break;
+	case ELF_BR_OFFSET(7):
+		ptr = &pt->b7;
+	}
+	if (write_access)
+		*ptr = *data;
+	else
+		*data = *ptr;
+	return 0;
+}
+
+static int
+access_elf_areg(struct task_struct *target, struct unw_frame_info *info,
+		unsigned long addr, unsigned long *data, int write_access)
+{
+	struct pt_regs *pt;
+	unsigned long cfm, urbs_end;
+	unsigned long *ptr = NULL;
+
+	pt = task_pt_regs(target);
+	if (addr >= ELF_AR_RSC_OFFSET && addr <= ELF_AR_SSD_OFFSET) {
+		switch (addr) {
+		case ELF_AR_RSC_OFFSET:
+			/* force PL3 */
+			if (write_access)
+				pt->ar_rsc = *data | (3 << 2);
+			else
+				*data = pt->ar_rsc;
+			return 0;
+		case ELF_AR_BSP_OFFSET:
+			/*
+			 * By convention, we use PT_AR_BSP to refer to
+			 * the end of the user-level backing store.
+			 * Use ia64_rse_skip_regs(PT_AR_BSP, -CFM.sof)
+			 * to get the real value of ar.bsp at the time
+			 * the kernel was entered.
+			 *
+			 * Furthermore, when changing the contents of
+			 * PT_AR_BSP (or PT_CFM) while the task is
+			 * blocked in a system call, convert the state
+			 * so that the non-system-call exit
+			 * path is used.  This ensures that the proper
+			 * state will be picked up when resuming
+			 * execution.  However, it *also* means that
+			 * once we write PT_AR_BSP/PT_CFM, it won't be
+			 * possible to modify the syscall arguments of
+			 * the pending system call any longer.  This
+			 * shouldn't be an issue because modifying
+			 * PT_AR_BSP/PT_CFM generally implies that
+			 * we're either abandoning the pending system
+			 * call or that we defer it's re-execution
+			 * (e.g., due to GDB doing an inferior
+			 * function call).
+			 */
+			urbs_end = ia64_get_user_rbs_end(target, pt, &cfm);
+			if (write_access) {
+				if (*data != urbs_end) {
+					if (in_syscall(pt))
+						convert_to_non_syscall(target,
+								       pt,
+								       cfm);
+					/*
+					 * Simulate user-level write
+					 * of ar.bsp:
+					 */
+					pt->loadrs = 0;
+					pt->ar_bspstore = *data;
+				}
+			} else
+				*data = urbs_end;
+			return 0;
+		case ELF_AR_BSPSTORE_OFFSET:
+			ptr = &pt->ar_bspstore;
+			break;
+		case ELF_AR_RNAT_OFFSET:
+			ptr = &pt->ar_rnat;
+			break;
+		case ELF_AR_CCV_OFFSET:
+			ptr = &pt->ar_ccv;
+			break;
+		case ELF_AR_UNAT_OFFSET:
+			ptr = &pt->ar_unat;
+			break;
+		case ELF_AR_FPSR_OFFSET:
+			ptr = &pt->ar_fpsr;
+			break;
+		case ELF_AR_PFS_OFFSET:
+			ptr = &pt->ar_pfs;
+			break;
+		case ELF_AR_LC_OFFSET:
+			return unw_access_ar(info, UNW_AR_LC, data,
+					     write_access);
+		case ELF_AR_EC_OFFSET:
+			return unw_access_ar(info, UNW_AR_EC, data,
+					     write_access);
+		case ELF_AR_CSD_OFFSET:
+			ptr = &pt->ar_csd;
+			break;
+		case ELF_AR_SSD_OFFSET:
+			ptr = &pt->ar_ssd;
+		}
+	} else if (addr >= ELF_CR_IIP_OFFSET && addr <= ELF_CR_IPSR_OFFSET) {
+		switch (addr) {
+		case ELF_CR_IIP_OFFSET:
+			ptr = &pt->cr_iip;
+			break;
+		case ELF_CFM_OFFSET:
+			urbs_end = ia64_get_user_rbs_end(target, pt, &cfm);
+			if (write_access) {
+				if (((cfm ^ *data) & PFM_MASK) != 0) {
+					if (in_syscall(pt))
+						convert_to_non_syscall(target,
+								       pt,
+								       cfm);
+					pt->cr_ifs = ((pt->cr_ifs & ~PFM_MASK)
+						      | (*data & PFM_MASK));
+				}
+			} else
+				*data = cfm;
+			return 0;
+		case ELF_CR_IPSR_OFFSET:
+			if (write_access) {
+				unsigned long tmp = *data;
+				/* psr.ri==3 is a reserved value: SDM 2:25 */
+				if ((tmp & IA64_PSR_RI) == IA64_PSR_RI)
+					tmp &= ~IA64_PSR_RI;
+				pt->cr_ipsr = ((tmp & IPSR_MASK)
+					       | (pt->cr_ipsr & ~IPSR_MASK));
+			} else
+				*data = (pt->cr_ipsr & IPSR_MASK);
+			return 0;
+		}
+	} else if (addr == ELF_NAT_OFFSET)
+		return access_nat_bits(target, pt, info,
+				       data, write_access);
+	else if (addr == ELF_PR_OFFSET)
+		ptr = &pt->pr;
+	else
+		return -1;
+
+	if (write_access)
+		*ptr = *data;
+	else
+		*data = *ptr;
+
+	return 0;
+}
+
+static int
+access_elf_reg(struct task_struct *target, struct unw_frame_info *info,
+		unsigned long addr, unsigned long *data, int write_access)
+{
+	if (addr >= ELF_GR_OFFSET(1) && addr <= ELF_GR_OFFSET(15))
+		return access_elf_gpreg(target, info, addr, data, write_access);
+	else if (addr >= ELF_BR_OFFSET(0) && addr <= ELF_BR_OFFSET(7))
+		return access_elf_breg(target, info, addr, data, write_access);
+	else
+		return access_elf_areg(target, info, addr, data, write_access);
+}
+
+void do_gpregs_get(struct unw_frame_info *info, void *arg)
+{
+	struct pt_regs *pt;
+	struct regset_getset *dst = arg;
+	elf_greg_t tmp[16];
+	unsigned int i, index, min_copy;
+
+	if (unw_unwind_to_user(info) < 0)
+		return;
+
+	/*
+	 * coredump format:
+	 *      r0-r31
+	 *      NaT bits (for r0-r31; bit N == 1 iff rN is a NaT)
+	 *      predicate registers (p0-p63)
+	 *      b0-b7
+	 *      ip cfm user-mask
+	 *      ar.rsc ar.bsp ar.bspstore ar.rnat
+	 *      ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec
+	 */
+
+
+	/* Skip r0 */
+	if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(1)) {
+		dst->ret = user_regset_copyout_zero(&dst->pos, &dst->count,
+						      &dst->u.get.kbuf,
+						      &dst->u.get.ubuf,
+						      0, ELF_GR_OFFSET(1));
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+
+	/* gr1 - gr15 */
+	if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(16)) {
+		index = (dst->pos - ELF_GR_OFFSET(1)) / sizeof(elf_greg_t);
+		min_copy = ELF_GR_OFFSET(16) > (dst->pos + dst->count) ?
+			 (dst->pos + dst->count) : ELF_GR_OFFSET(16);
+		for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t),
+				index++)
+			if (access_elf_reg(dst->target, info, i,
+						&tmp[index], 0) < 0) {
+				dst->ret = -EIO;
+				return;
+			}
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf, tmp,
+				ELF_GR_OFFSET(1), ELF_GR_OFFSET(16));
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+
+	/* r16-r31 */
+	if (dst->count > 0 && dst->pos < ELF_NAT_OFFSET) {
+		pt = task_pt_regs(dst->target);
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf, &pt->r16,
+				ELF_GR_OFFSET(16), ELF_NAT_OFFSET);
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+
+	/* nat, pr, b0 - b7 */
+	if (dst->count > 0 && dst->pos < ELF_CR_IIP_OFFSET) {
+		index = (dst->pos - ELF_NAT_OFFSET) / sizeof(elf_greg_t);
+		min_copy = ELF_CR_IIP_OFFSET > (dst->pos + dst->count) ?
+			 (dst->pos + dst->count) : ELF_CR_IIP_OFFSET;
+		for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t),
+				index++)
+			if (access_elf_reg(dst->target, info, i,
+						&tmp[index], 0) < 0) {
+				dst->ret = -EIO;
+				return;
+			}
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf, tmp,
+				ELF_NAT_OFFSET, ELF_CR_IIP_OFFSET);
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+
+	/* ip cfm psr ar.rsc ar.bsp ar.bspstore ar.rnat
+	 * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec ar.csd ar.ssd
+	 */
+	if (dst->count > 0 && dst->pos < (ELF_AR_END_OFFSET)) {
+		index = (dst->pos - ELF_CR_IIP_OFFSET) / sizeof(elf_greg_t);
+		min_copy = ELF_AR_END_OFFSET > (dst->pos + dst->count) ?
+			 (dst->pos + dst->count) : ELF_AR_END_OFFSET;
+		for (i = dst->pos; i < min_copy; i += sizeof(elf_greg_t),
+				index++)
+			if (access_elf_reg(dst->target, info, i,
+						&tmp[index], 0) < 0) {
+				dst->ret = -EIO;
+				return;
+			}
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf, tmp,
+				ELF_CR_IIP_OFFSET, ELF_AR_END_OFFSET);
+	}
+}
+
+void do_gpregs_set(struct unw_frame_info *info, void *arg)
+{
+	struct pt_regs *pt;
+	struct regset_getset *dst = arg;
+	elf_greg_t tmp[16];
+	unsigned int i, index;
+
+	if (unw_unwind_to_user(info) < 0)
+		return;
+
+	/* Skip r0 */
+	if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(1)) {
+		dst->ret = user_regset_copyin_ignore(&dst->pos, &dst->count,
+						       &dst->u.set.kbuf,
+						       &dst->u.set.ubuf,
+						       0, ELF_GR_OFFSET(1));
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+
+	/* gr1-gr15 */
+	if (dst->count > 0 && dst->pos < ELF_GR_OFFSET(16)) {
+		i = dst->pos;
+		index = (dst->pos - ELF_GR_OFFSET(1)) / sizeof(elf_greg_t);
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf, tmp,
+				ELF_GR_OFFSET(1), ELF_GR_OFFSET(16));
+		if (dst->ret)
+			return;
+		for ( ; i < dst->pos; i += sizeof(elf_greg_t), index++)
+			if (access_elf_reg(dst->target, info, i,
+						&tmp[index], 1) < 0) {
+				dst->ret = -EIO;
+				return;
+			}
+		if (dst->count == 0)
+			return;
+	}
+
+	/* gr16-gr31 */
+	if (dst->count > 0 && dst->pos < ELF_NAT_OFFSET) {
+		pt = task_pt_regs(dst->target);
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf, &pt->r16,
+				ELF_GR_OFFSET(16), ELF_NAT_OFFSET);
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+
+	/* nat, pr, b0 - b7 */
+	if (dst->count > 0 && dst->pos < ELF_CR_IIP_OFFSET) {
+		i = dst->pos;
+		index = (dst->pos - ELF_NAT_OFFSET) / sizeof(elf_greg_t);
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf, tmp,
+				ELF_NAT_OFFSET, ELF_CR_IIP_OFFSET);
+		if (dst->ret)
+			return;
+		for (; i < dst->pos; i += sizeof(elf_greg_t), index++)
+			if (access_elf_reg(dst->target, info, i,
+						&tmp[index], 1) < 0) {
+				dst->ret = -EIO;
+				return;
+			}
+		if (dst->count == 0)
+			return;
+	}
+
+	/* ip cfm psr ar.rsc ar.bsp ar.bspstore ar.rnat
+	 * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec ar.csd ar.ssd
+	 */
+	if (dst->count > 0 && dst->pos < (ELF_AR_END_OFFSET)) {
+		i = dst->pos;
+		index = (dst->pos - ELF_CR_IIP_OFFSET) / sizeof(elf_greg_t);
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf, tmp,
+				ELF_CR_IIP_OFFSET, ELF_AR_END_OFFSET);
+		if (dst->ret)
+			return;
+		for ( ; i < dst->pos; i += sizeof(elf_greg_t), index++)
+			if (access_elf_reg(dst->target, info, i,
+						&tmp[index], 1) < 0) {
+				dst->ret = -EIO;
+				return;
+			}
+	}
+}
+
+#define ELF_FP_OFFSET(i)	(i * sizeof(elf_fpreg_t))
+
+void do_fpregs_get(struct unw_frame_info *info, void *arg)
+{
+	struct regset_getset *dst = arg;
+	struct task_struct *task = dst->target;
+	elf_fpreg_t tmp[30];
+	int index, min_copy, i;
+
+	if (unw_unwind_to_user(info) < 0)
+		return;
+
+	/* Skip pos 0 and 1 */
+	if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(2)) {
+		dst->ret = user_regset_copyout_zero(&dst->pos, &dst->count,
+						      &dst->u.get.kbuf,
+						      &dst->u.get.ubuf,
+						      0, ELF_FP_OFFSET(2));
+		if (dst->count == 0 || dst->ret)
+			return;
+	}
+
+	/* fr2-fr31 */
+	if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(32)) {
+		index = (dst->pos - ELF_FP_OFFSET(2)) / sizeof(elf_fpreg_t);
+
+		min_copy = min(((unsigned int)ELF_FP_OFFSET(32)),
+				dst->pos + dst->count);
+		for (i = dst->pos; i < min_copy; i += sizeof(elf_fpreg_t),
+				index++)
+			if (unw_get_fr(info, i / sizeof(elf_fpreg_t),
+					 &tmp[index])) {
+				dst->ret = -EIO;
+				return;
+			}
+		dst->ret = user_regset_copyout(&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf, tmp,
+				ELF_FP_OFFSET(2), ELF_FP_OFFSET(32));
+		if (dst->count == 0 || dst->ret)
+			return;
+	}
+
+	/* fph */
+	if (dst->count > 0) {
+		ia64_flush_fph(dst->target);
+		if (task->thread.flags & IA64_THREAD_FPH_VALID)
+			dst->ret = user_regset_copyout(
+				&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf,
+				&dst->target->thread.fph,
+				ELF_FP_OFFSET(32), -1);
+		else
+			/* Zero fill instead.  */
+			dst->ret = user_regset_copyout_zero(
+				&dst->pos, &dst->count,
+				&dst->u.get.kbuf, &dst->u.get.ubuf,
+				ELF_FP_OFFSET(32), -1);
+	}
+}
+
+void do_fpregs_set(struct unw_frame_info *info, void *arg)
+{
+	struct regset_getset *dst = arg;
+	elf_fpreg_t fpreg, tmp[30];
+	int index, start, end;
+
+	if (unw_unwind_to_user(info) < 0)
+		return;
+
+	/* Skip pos 0 and 1 */
+	if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(2)) {
+		dst->ret = user_regset_copyin_ignore(&dst->pos, &dst->count,
+						       &dst->u.set.kbuf,
+						       &dst->u.set.ubuf,
+						       0, ELF_FP_OFFSET(2));
+		if (dst->count == 0 || dst->ret)
+			return;
+	}
+
+	/* fr2-fr31 */
+	if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(32)) {
+		start = dst->pos;
+		end = min(((unsigned int)ELF_FP_OFFSET(32)),
+			 dst->pos + dst->count);
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+				&dst->u.set.kbuf, &dst->u.set.ubuf, tmp,
+				ELF_FP_OFFSET(2), ELF_FP_OFFSET(32));
+		if (dst->ret)
+			return;
+
+		if (start & 0xF) { /* only write high part */
+			if (unw_get_fr(info, start / sizeof(elf_fpreg_t),
+					 &fpreg)) {
+				dst->ret = -EIO;
+				return;
+			}
+			tmp[start / sizeof(elf_fpreg_t) - 2].u.bits[0]
+				= fpreg.u.bits[0];
+			start &= ~0xFUL;
+		}
+		if (end & 0xF) { /* only write low part */
+			if (unw_get_fr(info, end / sizeof(elf_fpreg_t),
+					&fpreg)) {
+				dst->ret = -EIO;
+				return;
+			}
+			tmp[end / sizeof(elf_fpreg_t) - 2].u.bits[1]
+				= fpreg.u.bits[1];
+			end = (end + 0xF) & ~0xFUL;
+		}
+
+		for ( ;	start < end ; start += sizeof(elf_fpreg_t)) {
+			index = start / sizeof(elf_fpreg_t);
+			if (unw_set_fr(info, index, tmp[index - 2])) {
+				dst->ret = -EIO;
+				return;
+			}
+		}
+		if (dst->ret || dst->count == 0)
+			return;
+	}
+
+	/* fph */
+	if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(128)) {
+		ia64_sync_fph(dst->target);
+		dst->ret = user_regset_copyin(&dst->pos, &dst->count,
+						&dst->u.set.kbuf,
+						&dst->u.set.ubuf,
+						&dst->target->thread.fph,
+						ELF_FP_OFFSET(32), -1);
+	}
+}
+
+static int
+do_regset_call(void (*call)(struct unw_frame_info *, void *),
+	       struct task_struct *target,
+	       const struct user_regset *regset,
+	       unsigned int pos, unsigned int count,
+	       const void *kbuf, const void __user *ubuf)
+{
+	struct regset_getset info = { .target = target, .regset = regset,
+				 .pos = pos, .count = count,
+				 .u.set = { .kbuf = kbuf, .ubuf = ubuf },
+				 .ret = 0 };
+
+	if (target == current)
+		unw_init_running(call, &info);
+	else {
+		struct unw_frame_info ufi;
+		memset(&ufi, 0, sizeof(ufi));
+		unw_init_from_blocked_task(&ufi, target);
+		(*call)(&ufi, &info);
+	}
+
+	return info.ret;
+}
+
+static int
+gpregs_get(struct task_struct *target,
+	   const struct user_regset *regset,
+	   unsigned int pos, unsigned int count,
+	   void *kbuf, void __user *ubuf)
+{
+	return do_regset_call(do_gpregs_get, target, regset, pos, count,
+		kbuf, ubuf);
+}
+
+static int gpregs_set(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		const void *kbuf, const void __user *ubuf)
+{
+	return do_regset_call(do_gpregs_set, target, regset, pos, count,
+		kbuf, ubuf);
+}
+
+static void do_gpregs_writeback(struct unw_frame_info *info, void *arg)
+{
+	do_sync_rbs(info, ia64_sync_user_rbs);
+}
+
+/*
+ * This is called to write back the register backing store.
+ * ptrace does this before it stops, so that a tracer reading the user
+ * memory after the thread stops will get the current register data.
+ */
+static int
+gpregs_writeback(struct task_struct *target,
+		 const struct user_regset *regset,
+		 int now)
+{
+	if (test_and_set_tsk_thread_flag(target, TIF_RESTORE_RSE))
+		return 0;
+	tsk_set_notify_resume(target);
+	return do_regset_call(do_gpregs_writeback, target, regset, 0, 0,
+		NULL, NULL);
+}
+
+static int
+fpregs_active(struct task_struct *target, const struct user_regset *regset)
+{
+	return (target->thread.flags & IA64_THREAD_FPH_VALID) ? 128 : 32;
+}
+
+static int fpregs_get(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		void *kbuf, void __user *ubuf)
+{
+	return do_regset_call(do_fpregs_get, target, regset, pos, count,
+		kbuf, ubuf);
+}
+
+static int fpregs_set(struct task_struct *target,
+		const struct user_regset *regset,
+		unsigned int pos, unsigned int count,
+		const void *kbuf, const void __user *ubuf)
+{
+	return do_regset_call(do_fpregs_set, target, regset, pos, count,
+		kbuf, ubuf);
+}
+
+static int
+access_uarea(struct task_struct *child, unsigned long addr,
+	      unsigned long *data, int write_access)
+{
+	unsigned int pos = -1; /* an invalid value */
+	int ret;
+	unsigned long *ptr, regnum;
+
+	if ((addr & 0x7) != 0) {
+		dprintk("ptrace: unaligned register address 0x%lx\n", addr);
+		return -1;
+	}
+	if ((addr >= PT_NAT_BITS + 8 && addr < PT_F2) ||
+		(addr >= PT_R7 + 8 && addr < PT_B1) ||
+		(addr >= PT_AR_LC + 8 && addr < PT_CR_IPSR) ||
+		(addr >= PT_AR_SSD + 8 && addr < PT_DBR)) {
+		dprintk("ptrace: rejecting access to register "
+					"address 0x%lx\n", addr);
+		return -1;
+	}
+
+	switch (addr) {
+	case PT_F32 ... (PT_F127 + 15):
+		pos = addr - PT_F32 + ELF_FP_OFFSET(32);
+		break;
+	case PT_F2 ... (PT_F5 + 15):
+		pos = addr - PT_F2 + ELF_FP_OFFSET(2);
+		break;
+	case PT_F10 ... (PT_F31 + 15):
+		pos = addr - PT_F10 + ELF_FP_OFFSET(10);
+		break;
+	case PT_F6 ... (PT_F9 + 15):
+		pos = addr - PT_F6 + ELF_FP_OFFSET(6);
+		break;
+	}
+
+	if (pos != -1) {
+		if (write_access)
+			ret = fpregs_set(child, NULL, pos,
+				sizeof(unsigned long), data, NULL);
+		else
+			ret = fpregs_get(child, NULL, pos,
+				sizeof(unsigned long), data, NULL);
+		if (ret != 0)
+			return -1;
+		return 0;
+	}
+
+	switch (addr) {
+	case PT_NAT_BITS:
+		pos = ELF_NAT_OFFSET;
+		break;
+	case PT_R4 ... PT_R7:
+		pos = addr - PT_R4 + ELF_GR_OFFSET(4);
+		break;
+	case PT_B1 ... PT_B5:
+		pos = addr - PT_B1 + ELF_BR_OFFSET(1);
+		break;
+	case PT_AR_EC:
+		pos = ELF_AR_EC_OFFSET;
+		break;
+	case PT_AR_LC:
+		pos = ELF_AR_LC_OFFSET;
+		break;
+	case PT_CR_IPSR:
+		pos = ELF_CR_IPSR_OFFSET;
+		break;
+	case PT_CR_IIP:
+		pos = ELF_CR_IIP_OFFSET;
+		break;
+	case PT_CFM:
+		pos = ELF_CFM_OFFSET;
+		break;
+	case PT_AR_UNAT:
+		pos = ELF_AR_UNAT_OFFSET;
+		break;
+	case PT_AR_PFS:
+		pos = ELF_AR_PFS_OFFSET;
+		break;
+	case PT_AR_RSC:
+		pos = ELF_AR_RSC_OFFSET;
+		break;
+	case PT_AR_RNAT:
+		pos = ELF_AR_RNAT_OFFSET;
+		break;
+	case PT_AR_BSPSTORE:
+		pos = ELF_AR_BSPSTORE_OFFSET;
+		break;
+	case PT_PR:
+		pos = ELF_PR_OFFSET;
+		break;
+	case PT_B6:
+		pos = ELF_BR_OFFSET(6);
+		break;
+	case PT_AR_BSP:
+		pos = ELF_AR_BSP_OFFSET;
+		break;
+	case PT_R1 ... PT_R3:
+		pos = addr - PT_R1 + ELF_GR_OFFSET(1);
+		break;
+	case PT_R12 ... PT_R15:
+		pos = addr - PT_R12 + ELF_GR_OFFSET(12);
+		break;
+	case PT_R8 ... PT_R11:
+		pos = addr - PT_R8 + ELF_GR_OFFSET(8);
+		break;
+	case PT_R16 ... PT_R31:
+		pos = addr - PT_R16 + ELF_GR_OFFSET(16);
+		break;
+	case PT_AR_CCV:
+		pos = ELF_AR_CCV_OFFSET;
+		break;
+	case PT_AR_FPSR:
+		pos = ELF_AR_FPSR_OFFSET;
+		break;
+	case PT_B0:
+		pos = ELF_BR_OFFSET(0);
+		break;
+	case PT_B7:
+		pos = ELF_BR_OFFSET(7);
+		break;
+	case PT_AR_CSD:
+		pos = ELF_AR_CSD_OFFSET;
+		break;
+	case PT_AR_SSD:
+		pos = ELF_AR_SSD_OFFSET;
+		break;
+	}
+
+	if (pos != -1) {
+		if (write_access)
+			ret = gpregs_set(child, NULL, pos,
+				sizeof(unsigned long), data, NULL);
+		else
+			ret = gpregs_get(child, NULL, pos,
+				sizeof(unsigned long), data, NULL);
+		if (ret != 0)
+			return -1;
+		return 0;
+	}
+
+	/* access debug registers */
+	if (addr >= PT_IBR) {
+		regnum = (addr - PT_IBR) >> 3;
+		ptr = &child->thread.ibr[0];
+	} else {
+		regnum = (addr - PT_DBR) >> 3;
+		ptr = &child->thread.dbr[0];
+	}
+
+	if (regnum >= 8) {
+		dprintk("ptrace: rejecting access to register "
+				"address 0x%lx\n", addr);
+		return -1;
+	}
+#ifdef CONFIG_PERFMON
+	/*
+	 * Check if debug registers are used by perfmon. This
+	 * test must be done once we know that we can do the
+	 * operation, i.e. the arguments are all valid, but
+	 * before we start modifying the state.
+	 *
+	 * Perfmon needs to keep a count of how many processes
+	 * are trying to modify the debug registers for system
+	 * wide monitoring sessions.
+	 *
+	 * We also include read access here, because they may
+	 * cause the PMU-installed debug register state
+	 * (dbr[], ibr[]) to be reset. The two arrays are also
+	 * used by perfmon, but we do not use
+	 * IA64_THREAD_DBG_VALID. The registers are restored
+	 * by the PMU context switch code.
+	 */
+	if (pfm_use_debug_registers(child))
+		return -1;
+#endif
+
+	if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) {
+		child->thread.flags |= IA64_THREAD_DBG_VALID;
+		memset(child->thread.dbr, 0,
+				sizeof(child->thread.dbr));
+		memset(child->thread.ibr, 0,
+				sizeof(child->thread.ibr));
+	}
+
+	ptr += regnum;
+
+	if ((regnum & 1) && write_access) {
+		/* don't let the user set kernel-level breakpoints: */
+		*ptr = *data & ~(7UL << 56);
+		return 0;
+	}
+	if (write_access)
+		*ptr = *data;
+	else
+		*data = *ptr;
+	return 0;
+}
+
+static const struct user_regset native_regsets[] = {
+	{
+		.core_note_type = NT_PRSTATUS,
+		.n = ELF_NGREG,
+		.size = sizeof(elf_greg_t), .align = sizeof(elf_greg_t),
+		.get = gpregs_get, .set = gpregs_set,
+		.writeback = gpregs_writeback
+	},
+	{
+		.core_note_type = NT_PRFPREG,
+		.n = ELF_NFPREG,
+		.size = sizeof(elf_fpreg_t), .align = sizeof(elf_fpreg_t),
+		.get = fpregs_get, .set = fpregs_set, .active = fpregs_active
+	},
+};
+
+static const struct user_regset_view user_ia64_view = {
+	.name = "ia64",
+	.e_machine = EM_IA_64,
+	.regsets = native_regsets, .n = ARRAY_SIZE(native_regsets)
+};
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *tsk)
+{
+#ifdef CONFIG_IA32_SUPPORT
+	extern const struct user_regset_view user_ia32_view;
+	if (IS_IA32_PROCESS(task_pt_regs(tsk)))
+		return &user_ia32_view;
+#endif
+	return &user_ia64_view;
+}
diff --git a/arch/ia64/kernel/semaphore.c b/arch/ia64/kernel/semaphore.c
deleted file mode 100644
index 2724ef3fbae2..000000000000
--- a/arch/ia64/kernel/semaphore.c
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * IA-64 semaphore implementation (derived from x86 version).
- *
- * Copyright (C) 1999-2000, 2002 Hewlett-Packard Co
- *	David Mosberger-Tang <davidm@hpl.hp.com>
- */
-
-/*
- * Semaphores are implemented using a two-way counter: The "count"
- * variable is decremented for each process that tries to acquire the
- * semaphore, while the "sleepers" variable is a count of such
- * acquires.
- *
- * Notably, the inline "up()" and "down()" functions can efficiently
- * test if they need to do any extra work (up needs to do something
- * only if count was negative before the increment operation.
- *
- * "sleeping" and the contention routine ordering is protected
- * by the spinlock in the semaphore's waitqueue head.
- *
- * Note that these functions are only called when there is contention
- * on the lock, and as such all this is the "non-critical" part of the
- * whole semaphore business. The critical part is the inline stuff in
- * <asm/semaphore.h> where we want to avoid any extra jumps and calls.
- */
-#include <linux/sched.h>
-#include <linux/init.h>
-
-#include <asm/errno.h>
-#include <asm/semaphore.h>
-
-/*
- * Logic:
- *  - Only on a boundary condition do we need to care. When we go
- *    from a negative count to a non-negative, we wake people up.
- *  - When we go from a non-negative count to a negative do we
- *    (a) synchronize with the "sleepers" count and (b) make sure
- *    that we're on the wakeup list before we synchronize so that
- *    we cannot lose wakeup events.
- */
-
-void
-__up (struct semaphore *sem)
-{
-	wake_up(&sem->wait);
-}
-
-void __sched __down (struct semaphore *sem)
-{
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-	unsigned long flags;
-
-	tsk->state = TASK_UNINTERRUPTIBLE;
-	spin_lock_irqsave(&sem->wait.lock, flags);
-	add_wait_queue_exclusive_locked(&sem->wait, &wait);
-
-	sem->sleepers++;
-	for (;;) {
-		int sleepers = sem->sleepers;
-
-		/*
-		 * Add "everybody else" into it. They aren't
-		 * playing, because we own the spinlock in
-		 * the wait_queue_head.
-		 */
-		if (!atomic_add_negative(sleepers - 1, &sem->count)) {
-			sem->sleepers = 0;
-			break;
-		}
-		sem->sleepers = 1;	/* us - see -1 above */
-		spin_unlock_irqrestore(&sem->wait.lock, flags);
-
-		schedule();
-
-		spin_lock_irqsave(&sem->wait.lock, flags);
-		tsk->state = TASK_UNINTERRUPTIBLE;
-	}
-	remove_wait_queue_locked(&sem->wait, &wait);
-	wake_up_locked(&sem->wait);
-	spin_unlock_irqrestore(&sem->wait.lock, flags);
-	tsk->state = TASK_RUNNING;
-}
-
-int __sched __down_interruptible (struct semaphore * sem)
-{
-	int retval = 0;
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-	unsigned long flags;
-
-	tsk->state = TASK_INTERRUPTIBLE;
-	spin_lock_irqsave(&sem->wait.lock, flags);
-	add_wait_queue_exclusive_locked(&sem->wait, &wait);
-
-	sem->sleepers ++;
-	for (;;) {
-		int sleepers = sem->sleepers;
-
-		/*
-		 * With signals pending, this turns into
-		 * the trylock failure case - we won't be
-		 * sleeping, and we* can't get the lock as
-		 * it has contention. Just correct the count
-		 * and exit.
-		 */
-		if (signal_pending(current)) {
-			retval = -EINTR;
-			sem->sleepers = 0;
-			atomic_add(sleepers, &sem->count);
-			break;
-		}
-
-		/*
-		 * Add "everybody else" into it. They aren't
-		 * playing, because we own the spinlock in
-		 * wait_queue_head. The "-1" is because we're
-		 * still hoping to get the semaphore.
-		 */
-		if (!atomic_add_negative(sleepers - 1, &sem->count)) {
-			sem->sleepers = 0;
-			break;
-		}
-		sem->sleepers = 1;	/* us - see -1 above */
-		spin_unlock_irqrestore(&sem->wait.lock, flags);
-
-		schedule();
-
-		spin_lock_irqsave(&sem->wait.lock, flags);
-		tsk->state = TASK_INTERRUPTIBLE;
-	}
-	remove_wait_queue_locked(&sem->wait, &wait);
-	wake_up_locked(&sem->wait);
-	spin_unlock_irqrestore(&sem->wait.lock, flags);
-
-	tsk->state = TASK_RUNNING;
-	return retval;
-}
-
-/*
- * Trylock failed - make sure we correct for having decremented the
- * count.
- */
-int
-__down_trylock (struct semaphore *sem)
-{
-	unsigned long flags;
-	int sleepers;
-
-	spin_lock_irqsave(&sem->wait.lock, flags);
-	sleepers = sem->sleepers + 1;
-	sem->sleepers = 0;
-
-	/*
-	 * Add "everybody else" and us into it. They aren't
-	 * playing, because we own the spinlock in the
-	 * wait_queue_head.
-	 */
-	if (!atomic_add_negative(sleepers, &sem->count)) {
-		wake_up_locked(&sem->wait);
-	}
-
-	spin_unlock_irqrestore(&sem->wait.lock, flags);
-	return 1;
-}
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 4aa9eaea76c3..5015ca1275ca 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -59,6 +59,7 @@
 #include <asm/setup.h>
 #include <asm/smp.h>
 #include <asm/system.h>
+#include <asm/tlbflush.h>
 #include <asm/unistd.h>
 #include <asm/hpsim.h>
 
@@ -176,6 +177,29 @@ filter_rsvd_memory (unsigned long start, unsigned long end, void *arg)
 	return 0;
 }
 
+/*
+ * Similar to "filter_rsvd_memory()", but the reserved memory ranges
+ * are not filtered out.
+ */
+int __init
+filter_memory(unsigned long start, unsigned long end, void *arg)
+{
+	void (*func)(unsigned long, unsigned long, int);
+
+#if IGNORE_PFN0
+	if (start == PAGE_OFFSET) {
+		printk(KERN_WARNING "warning: skipping physical page 0\n");
+		start += PAGE_SIZE;
+		if (start >= end)
+			return 0;
+	}
+#endif
+	func = arg;
+	if (start < end)
+		call_pernode_memory(__pa(start), end - start, func);
+	return 0;
+}
+
 static void __init
 sort_regions (struct rsvd_region *rsvd_region, int max)
 {
@@ -493,6 +517,8 @@ setup_arch (char **cmdline_p)
 	acpi_table_init();
 # ifdef CONFIG_ACPI_NUMA
 	acpi_numa_init();
+	per_cpu_scan_finalize((cpus_weight(early_cpu_possible_map) == 0 ?
+		32 : cpus_weight(early_cpu_possible_map)), additional_cpus);
 # endif
 #else
 # ifdef CONFIG_SMP
@@ -946,9 +972,10 @@ cpu_init (void)
 #endif
 
 	/* set ia64_ctx.max_rid to the maximum RID that is supported by all CPUs: */
-	if (ia64_pal_vm_summary(NULL, &vmi) == 0)
+	if (ia64_pal_vm_summary(NULL, &vmi) == 0) {
 		max_ctx = (1U << (vmi.pal_vm_info_2_s.rid_size - 3)) - 1;
-	else {
+		setup_ptcg_sem(vmi.pal_vm_info_2_s.max_purges, NPTCG_FROM_PAL);
+	} else {
 		printk(KERN_WARNING "cpu_init: PAL VM summary failed, assuming 18 RID bits\n");
 		max_ctx = (1U << 15) - 1;	/* use architected minimum */
 	}
diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c
index 4e446aa5f4ac..9a9d4c489330 100644
--- a/arch/ia64/kernel/smp.c
+++ b/arch/ia64/kernel/smp.c
@@ -213,6 +213,19 @@ send_IPI_allbutself (int op)
  * Called with preemption disabled.
  */
 static inline void
+send_IPI_mask(cpumask_t mask, int op)
+{
+	unsigned int cpu;
+
+	for_each_cpu_mask(cpu, mask) {
+			send_IPI_single(cpu, op);
+	}
+}
+
+/*
+ * Called with preemption disabled.
+ */
+static inline void
 send_IPI_all (int op)
 {
 	int i;
@@ -401,6 +414,75 @@ smp_call_function_single (int cpuid, void (*func) (void *info), void *info, int
 }
 EXPORT_SYMBOL(smp_call_function_single);
 
+/**
+ * smp_call_function_mask(): Run a function on a set of other CPUs.
+ * <mask>	The set of cpus to run on.  Must not include the current cpu.
+ * <func> 	The function to run. This must be fast and non-blocking.
+ * <info>	An arbitrary pointer to pass to the function.
+ * <wait>	If true, wait (atomically) until function
+ *		has completed on other CPUs.
+ *
+ * Returns 0 on success, else a negative status code.
+ *
+ * If @wait is true, then returns once @func has returned; otherwise
+ * it returns just before the target cpu calls @func.
+ *
+ * You must not call this function with disabled interrupts or from a
+ * hardware interrupt handler or from a bottom half handler.
+ */
+int smp_call_function_mask(cpumask_t mask,
+			   void (*func)(void *), void *info,
+			   int wait)
+{
+	struct call_data_struct data;
+	cpumask_t allbutself;
+	int cpus;
+
+	spin_lock(&call_lock);
+	allbutself = cpu_online_map;
+	cpu_clear(smp_processor_id(), allbutself);
+
+	cpus_and(mask, mask, allbutself);
+	cpus = cpus_weight(mask);
+	if (!cpus) {
+		spin_unlock(&call_lock);
+		return 0;
+	}
+
+	/* Can deadlock when called with interrupts disabled */
+	WARN_ON(irqs_disabled());
+
+	data.func = func;
+	data.info = info;
+	atomic_set(&data.started, 0);
+	data.wait = wait;
+	if (wait)
+		atomic_set(&data.finished, 0);
+
+	call_data = &data;
+	mb(); /* ensure store to call_data precedes setting of IPI_CALL_FUNC*/
+
+	/* Send a message to other CPUs */
+	if (cpus_equal(mask, allbutself))
+		send_IPI_allbutself(IPI_CALL_FUNC);
+	else
+		send_IPI_mask(mask, IPI_CALL_FUNC);
+
+	/* Wait for response */
+	while (atomic_read(&data.started) != cpus)
+		cpu_relax();
+
+	if (wait)
+		while (atomic_read(&data.finished) != cpus)
+			cpu_relax();
+	call_data = NULL;
+
+	spin_unlock(&call_lock);
+	return 0;
+
+}
+EXPORT_SYMBOL(smp_call_function_mask);
+
 /*
  * this function sends a 'generic call function' IPI to all other CPUs
  * in the system.
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 32ee5979a042..16483be18c0b 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -400,9 +400,9 @@ smp_callin (void)
 	/* Setup the per cpu irq handling data structures */
 	__setup_vector_irq(cpuid);
 	cpu_set(cpuid, cpu_online_map);
-	unlock_ipi_calllock();
 	per_cpu(cpu_state, cpuid) = CPU_ONLINE;
 	spin_unlock(&vector_lock);
+	unlock_ipi_calllock();
 
 	smp_setup_percpu_timer();
 
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 17fda5293c67..48e15a51782f 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -59,6 +59,84 @@ static struct clocksource clocksource_itc = {
 };
 static struct clocksource *itc_clocksource;
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+
+#include <linux/kernel_stat.h>
+
+extern cputime_t cycle_to_cputime(u64 cyc);
+
+/*
+ * Called from the context switch with interrupts disabled, to charge all
+ * accumulated times to the current process, and to prepare accounting on
+ * the next process.
+ */
+void ia64_account_on_switch(struct task_struct *prev, struct task_struct *next)
+{
+	struct thread_info *pi = task_thread_info(prev);
+	struct thread_info *ni = task_thread_info(next);
+	cputime_t delta_stime, delta_utime;
+	__u64 now;
+
+	now = ia64_get_itc();
+
+	delta_stime = cycle_to_cputime(pi->ac_stime + (now - pi->ac_stamp));
+	account_system_time(prev, 0, delta_stime);
+	account_system_time_scaled(prev, delta_stime);
+
+	if (pi->ac_utime) {
+		delta_utime = cycle_to_cputime(pi->ac_utime);
+		account_user_time(prev, delta_utime);
+		account_user_time_scaled(prev, delta_utime);
+	}
+
+	pi->ac_stamp = ni->ac_stamp = now;
+	ni->ac_stime = ni->ac_utime = 0;
+}
+
+/*
+ * Account time for a transition between system, hard irq or soft irq state.
+ * Note that this function is called with interrupts enabled.
+ */
+void account_system_vtime(struct task_struct *tsk)
+{
+	struct thread_info *ti = task_thread_info(tsk);
+	unsigned long flags;
+	cputime_t delta_stime;
+	__u64 now;
+
+	local_irq_save(flags);
+
+	now = ia64_get_itc();
+
+	delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp));
+	account_system_time(tsk, 0, delta_stime);
+	account_system_time_scaled(tsk, delta_stime);
+	ti->ac_stime = 0;
+
+	ti->ac_stamp = now;
+
+	local_irq_restore(flags);
+}
+
+/*
+ * Called from the timer interrupt handler to charge accumulated user time
+ * to the current process.  Must be called with interrupts disabled.
+ */
+void account_process_tick(struct task_struct *p, int user_tick)
+{
+	struct thread_info *ti = task_thread_info(p);
+	cputime_t delta_utime;
+
+	if (ti->ac_utime) {
+		delta_utime = cycle_to_cputime(ti->ac_utime);
+		account_user_time(p, delta_utime);
+		account_user_time_scaled(p, delta_utime);
+		ti->ac_utime = 0;
+	}
+}
+
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+
 static irqreturn_t
 timer_interrupt (int irq, void *dev_id)
 {
diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c
index 6903361d11a5..ff0e7c10faa7 100644
--- a/arch/ia64/kernel/unaligned.c
+++ b/arch/ia64/kernel/unaligned.c
@@ -13,6 +13,7 @@
  * 2001/08/13	Correct size of extended floats (float_fsz) from 16 to 10 bytes.
  * 2001/01/17	Add support emulation of unaligned kernel accesses.
  */
+#include <linux/jiffies.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/tty.h>
@@ -1290,7 +1291,7 @@ within_logging_rate_limit (void)
 {
 	static unsigned long count, last_time;
 
-	if (jiffies - last_time > 5*HZ)
+	if (time_after(jiffies, last_time + 5 * HZ))
 		count = 0;
 	if (count < 5) {
 		last_time = jiffies;
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index 344f64eca7a9..798bf9835a51 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -45,8 +45,6 @@ void show_mem(void)
 
 	printk(KERN_INFO "Mem-info:\n");
 	show_free_areas();
-	printk(KERN_INFO "Free swap:       %6ldkB\n",
-	       nr_swap_pages<<(PAGE_SHIFT-10));
 	printk(KERN_INFO "Node memory in pages:\n");
 	for_each_online_pgdat(pgdat) {
 		unsigned long present;
@@ -255,7 +253,7 @@ paging_init (void)
 	max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
 
 #ifdef CONFIG_VIRTUAL_MEM_MAP
-	efi_memmap_walk(register_active_ranges, NULL);
+	efi_memmap_walk(filter_memory, register_active_ranges);
 	efi_memmap_walk(find_largest_hole, (u64 *)&max_gap);
 	if (max_gap < LARGE_GAP) {
 		vmem_map = (struct page *) 0;
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index ee5e68b2af94..544dc420c65e 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -104,7 +104,7 @@ static int __meminit early_nr_cpus_node(int node)
 {
 	int cpu, n = 0;
 
-	for (cpu = 0; cpu < NR_CPUS; cpu++)
+	for_each_possible_early_cpu(cpu)
 		if (node == node_cpuid[cpu].nid)
 			n++;
 
@@ -124,6 +124,7 @@ static unsigned long __meminit compute_pernodesize(int node)
 	pernodesize += node * L1_CACHE_BYTES;
 	pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t));
 	pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
+	pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t));
 	pernodesize = PAGE_ALIGN(pernodesize);
 	return pernodesize;
 }
@@ -142,7 +143,7 @@ static void *per_cpu_node_setup(void *cpu_data, int node)
 #ifdef CONFIG_SMP
 	int cpu;
 
-	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+	for_each_possible_early_cpu(cpu) {
 		if (node == node_cpuid[cpu].nid) {
 			memcpy(__va(cpu_data), __phys_per_cpu_start,
 			       __per_cpu_end - __per_cpu_start);
@@ -345,7 +346,7 @@ static void __init initialize_pernode_data(void)
 
 #ifdef CONFIG_SMP
 	/* Set the node_data pointer for each per-cpu struct */
-	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+	for_each_possible_early_cpu(cpu) {
 		node = node_cpuid[cpu].nid;
 		per_cpu(cpu_info, cpu).node_data = mem_data[node].node_data;
 	}
@@ -444,7 +445,7 @@ void __init find_memory(void)
 			mem_data[node].min_pfn = ~0UL;
 		}
 
-	efi_memmap_walk(register_active_ranges, NULL);
+	efi_memmap_walk(filter_memory, register_active_ranges);
 
 	/*
 	 * Initialize the boot memory maps in reverse order since that's
@@ -493,13 +494,9 @@ void __cpuinit *per_cpu_init(void)
 	int cpu;
 	static int first_time = 1;
 
-
-	if (smp_processor_id() != 0)
-		return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
-
 	if (first_time) {
 		first_time = 0;
-		for (cpu = 0; cpu < NR_CPUS; cpu++)
+		for_each_possible_early_cpu(cpu)
 			per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
 	}
 
@@ -522,8 +519,6 @@ void show_mem(void)
 
 	printk(KERN_INFO "Mem-info:\n");
 	show_free_areas();
-	printk(KERN_INFO "Free swap:       %6ldkB\n",
-	       nr_swap_pages<<(PAGE_SHIFT-10));
 	printk(KERN_INFO "Node memory in pages:\n");
 	for_each_online_pgdat(pgdat) {
 		unsigned long present;
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index a4ca657c72c6..5c1de53c8c1c 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -58,7 +58,6 @@ __ia64_sync_icache_dcache (pte_t pte)
 {
 	unsigned long addr;
 	struct page *page;
-	unsigned long order;
 
 	page = pte_page(pte);
 	addr = (unsigned long) page_address(page);
@@ -66,12 +65,7 @@ __ia64_sync_icache_dcache (pte_t pte)
 	if (test_bit(PG_arch_1, &page->flags))
 		return;				/* i-cache is already coherent with d-cache */
 
-	if (PageCompound(page)) {
-		order = compound_order(page);
-		flush_icache_range(addr, addr + (1UL << order << PAGE_SHIFT));
-	}
-	else
-		flush_icache_range(addr, addr + PAGE_SIZE);
+	flush_icache_range(addr, addr + (PAGE_SIZE << compound_order(page)));
 	set_bit(PG_arch_1, &page->flags);	/* mark page as clean */
 }
 
@@ -553,12 +547,10 @@ find_largest_hole (u64 start, u64 end, void *arg)
 #endif /* CONFIG_VIRTUAL_MEM_MAP */
 
 int __init
-register_active_ranges(u64 start, u64 end, void *arg)
+register_active_ranges(u64 start, u64 len, int nid)
 {
-	int nid = paddr_to_nid(__pa(start));
+	u64 end = start + len;
 
-	if (nid < 0)
-		nid = 0;
 #ifdef CONFIG_KEXEC
 	if (start > crashk_res.start && start < crashk_res.end)
 		start = crashk_res.end;
diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c
index 7807fc5c0422..b73bf1838e57 100644
--- a/arch/ia64/mm/numa.c
+++ b/arch/ia64/mm/numa.c
@@ -27,7 +27,9 @@
  */
 int num_node_memblks;
 struct node_memblk_s node_memblk[NR_NODE_MEMBLKS];
-struct node_cpuid_s node_cpuid[NR_CPUS];
+struct node_cpuid_s node_cpuid[NR_CPUS] =
+	{ [0 ... NR_CPUS-1] = { .phys_id = 0, .nid = NUMA_NO_NODE } };
+
 /*
  * This is a matrix with "distances" between nodes, they should be
  * proportional to the memory access latency ratios.
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
index 655da240d13c..d52ec4e83409 100644
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -11,6 +11,9 @@
  * Rohit Seth <rohit.seth@intel.com>
  * Ken Chen <kenneth.w.chen@intel.com>
  * Christophe de Dinechin <ddd@hp.com>: Avoid ptc.e on memory allocation
+ * Copyright (C) 2007 Intel Corp
+ *	Fenghua Yu <fenghua.yu@intel.com>
+ *	Add multiple ptc.g/ptc.ga instruction support in global tlb purge.
  */
 #include <linux/module.h>
 #include <linux/init.h>
@@ -26,6 +29,9 @@
 #include <asm/pal.h>
 #include <asm/tlbflush.h>
 #include <asm/dma.h>
+#include <asm/processor.h>
+#include <asm/sal.h>
+#include <asm/tlb.h>
 
 static struct {
 	unsigned long mask;	/* mask of supported purge page-sizes */
@@ -39,6 +45,10 @@ struct ia64_ctx ia64_ctx = {
 };
 
 DEFINE_PER_CPU(u8, ia64_need_tlb_flush);
+DEFINE_PER_CPU(u8, ia64_tr_num);  /*Number of TR slots in current processor*/
+DEFINE_PER_CPU(u8, ia64_tr_used); /*Max Slot number used by kernel*/
+
+struct ia64_tr_entry __per_cpu_idtrs[NR_CPUS][2][IA64_TR_ALLOC_MAX];
 
 /*
  * Initializes the ia64_ctx.bitmap array based on max_ctx+1.
@@ -84,14 +94,140 @@ wrap_mmu_context (struct mm_struct *mm)
 	local_flush_tlb_all();
 }
 
+/*
+ * Implement "spinaphores" ... like counting semaphores, but they
+ * spin instead of sleeping.  If there are ever any other users for
+ * this primitive it can be moved up to a spinaphore.h header.
+ */
+struct spinaphore {
+	atomic_t	cur;
+};
+
+static inline void spinaphore_init(struct spinaphore *ss, int val)
+{
+	atomic_set(&ss->cur, val);
+}
+
+static inline void down_spin(struct spinaphore *ss)
+{
+	while (unlikely(!atomic_add_unless(&ss->cur, -1, 0)))
+		while (atomic_read(&ss->cur) == 0)
+			cpu_relax();
+}
+
+static inline void up_spin(struct spinaphore *ss)
+{
+	atomic_add(1, &ss->cur);
+}
+
+static struct spinaphore ptcg_sem;
+static u16 nptcg = 1;
+static int need_ptcg_sem = 1;
+static int toolatetochangeptcgsem = 0;
+
+/*
+ * Kernel parameter "nptcg=" overrides max number of concurrent global TLB
+ * purges which is reported from either PAL or SAL PALO.
+ *
+ * We don't have sanity checking for nptcg value. It's the user's responsibility
+ * for valid nptcg value on the platform. Otherwise, kernel may hang in some
+ * cases.
+ */
+static int __init
+set_nptcg(char *str)
+{
+	int value = 0;
+
+	get_option(&str, &value);
+	setup_ptcg_sem(value, NPTCG_FROM_KERNEL_PARAMETER);
+
+	return 1;
+}
+
+__setup("nptcg=", set_nptcg);
+
+/*
+ * Maximum number of simultaneous ptc.g purges in the system can
+ * be defined by PAL_VM_SUMMARY (in which case we should take
+ * the smallest value for any cpu in the system) or by the PAL
+ * override table (in which case we should ignore the value from
+ * PAL_VM_SUMMARY).
+ *
+ * Kernel parameter "nptcg=" overrides maximum number of simultanesous ptc.g
+ * purges defined in either PAL_VM_SUMMARY or PAL override table. In this case,
+ * we should ignore the value from either PAL_VM_SUMMARY or PAL override table.
+ *
+ * Complicating the logic here is the fact that num_possible_cpus()
+ * isn't fully setup until we start bringing cpus online.
+ */
+void
+setup_ptcg_sem(int max_purges, int nptcg_from)
+{
+	static int kp_override;
+	static int palo_override;
+	static int firstcpu = 1;
+
+	if (toolatetochangeptcgsem) {
+		BUG_ON(max_purges < nptcg);
+		return;
+	}
+
+	if (nptcg_from == NPTCG_FROM_KERNEL_PARAMETER) {
+		kp_override = 1;
+		nptcg = max_purges;
+		goto resetsema;
+	}
+	if (kp_override) {
+		need_ptcg_sem = num_possible_cpus() > nptcg;
+		return;
+	}
+
+	if (nptcg_from == NPTCG_FROM_PALO) {
+		palo_override = 1;
+
+		/* In PALO max_purges == 0 really means it! */
+		if (max_purges == 0)
+			panic("Whoa! Platform does not support global TLB purges.\n");
+		nptcg = max_purges;
+		if (nptcg == PALO_MAX_TLB_PURGES) {
+			need_ptcg_sem = 0;
+			return;
+		}
+		goto resetsema;
+	}
+	if (palo_override) {
+		if (nptcg != PALO_MAX_TLB_PURGES)
+			need_ptcg_sem = (num_possible_cpus() > nptcg);
+		return;
+	}
+
+	/* In PAL_VM_SUMMARY max_purges == 0 actually means 1 */
+	if (max_purges == 0) max_purges = 1;
+
+	if (firstcpu) {
+		nptcg = max_purges;
+		firstcpu = 0;
+	}
+	if (max_purges < nptcg)
+		nptcg = max_purges;
+	if (nptcg == PAL_MAX_PURGES) {
+		need_ptcg_sem = 0;
+		return;
+	} else
+		need_ptcg_sem = (num_possible_cpus() > nptcg);
+
+resetsema:
+	spinaphore_init(&ptcg_sem, max_purges);
+}
+
 void
 ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start,
 		       unsigned long end, unsigned long nbits)
 {
-	static DEFINE_SPINLOCK(ptcg_lock);
-
 	struct mm_struct *active_mm = current->active_mm;
 
+	toolatetochangeptcgsem = 1;
+
 	if (mm != active_mm) {
 		/* Restore region IDs for mm */
 		if (mm && active_mm) {
@@ -102,19 +238,20 @@ ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start,
 		}
 	}
 
-	/* HW requires global serialization of ptc.ga.  */
-	spin_lock(&ptcg_lock);
-	{
-		do {
-			/*
-			 * Flush ALAT entries also.
-			 */
-			ia64_ptcga(start, (nbits<<2));
-			ia64_srlz_i();
-			start += (1UL << nbits);
-		} while (start < end);
-	}
-	spin_unlock(&ptcg_lock);
+	if (need_ptcg_sem)
+		down_spin(&ptcg_sem);
+
+	do {
+		/*
+		 * Flush ALAT entries also.
+		 */
+		ia64_ptcga(start, (nbits << 2));
+		ia64_srlz_i();
+		start += (1UL << nbits);
+	} while (start < end);
+
+	if (need_ptcg_sem)
+		up_spin(&ptcg_sem);
 
         if (mm != active_mm) {
                 activate_context(active_mm);
@@ -190,6 +327,9 @@ ia64_tlb_init (void)
 	ia64_ptce_info_t uninitialized_var(ptce_info); /* GCC be quiet */
 	unsigned long tr_pgbits;
 	long status;
+	pal_vm_info_1_u_t vm_info_1;
+	pal_vm_info_2_u_t vm_info_2;
+	int cpu = smp_processor_id();
 
 	if ((status = ia64_pal_vm_page_size(&tr_pgbits, &purge.mask)) != 0) {
 		printk(KERN_ERR "PAL_VM_PAGE_SIZE failed with status=%ld; "
@@ -206,4 +346,191 @@ ia64_tlb_init (void)
 	local_cpu_data->ptce_stride[1] = ptce_info.stride[1];
 
 	local_flush_tlb_all();	/* nuke left overs from bootstrapping... */
+	status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2);
+
+	if (status) {
+		printk(KERN_ERR "ia64_pal_vm_summary=%ld\n", status);
+		per_cpu(ia64_tr_num, cpu) = 8;
+		return;
+	}
+	per_cpu(ia64_tr_num, cpu) = vm_info_1.pal_vm_info_1_s.max_itr_entry+1;
+	if (per_cpu(ia64_tr_num, cpu) >
+				(vm_info_1.pal_vm_info_1_s.max_dtr_entry+1))
+		per_cpu(ia64_tr_num, cpu) =
+				vm_info_1.pal_vm_info_1_s.max_dtr_entry+1;
+	if (per_cpu(ia64_tr_num, cpu) > IA64_TR_ALLOC_MAX) {
+		per_cpu(ia64_tr_num, cpu) = IA64_TR_ALLOC_MAX;
+		printk(KERN_DEBUG "TR register number exceeds IA64_TR_ALLOC_MAX!"
+			"IA64_TR_ALLOC_MAX should be extended\n");
+	}
+}
+
+/*
+ * is_tr_overlap
+ *
+ * Check overlap with inserted TRs.
+ */
+static int is_tr_overlap(struct ia64_tr_entry *p, u64 va, u64 log_size)
+{
+	u64 tr_log_size;
+	u64 tr_end;
+	u64 va_rr = ia64_get_rr(va);
+	u64 va_rid = RR_TO_RID(va_rr);
+	u64 va_end = va + (1<<log_size) - 1;
+
+	if (va_rid != RR_TO_RID(p->rr))
+		return 0;
+	tr_log_size = (p->itir & 0xff) >> 2;
+	tr_end = p->ifa + (1<<tr_log_size) - 1;
+
+	if (va > tr_end || p->ifa > va_end)
+		return 0;
+	return 1;
+
+}
+
+/*
+ * ia64_insert_tr in virtual mode. Allocate a TR slot
+ *
+ * target_mask : 0x1 : itr, 0x2 : dtr, 0x3 : idtr
+ *
+ * va 	: virtual address.
+ * pte 	: pte entries inserted.
+ * log_size: range to be covered.
+ *
+ * Return value:  <0 :  error No.
+ *
+ *		  >=0 : slot number allocated for TR.
+ * Must be called with preemption disabled.
+ */
+int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size)
+{
+	int i, r;
+	unsigned long psr;
+	struct ia64_tr_entry *p;
+	int cpu = smp_processor_id();
+
+	r = -EINVAL;
+	/*Check overlap with existing TR entries*/
+	if (target_mask & 0x1) {
+		p = &__per_cpu_idtrs[cpu][0][0];
+		for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu);
+								i++, p++) {
+			if (p->pte & 0x1)
+				if (is_tr_overlap(p, va, log_size)) {
+					printk(KERN_DEBUG "Overlapped Entry"
+						"Inserted for TR Reigster!!\n");
+					goto out;
+			}
+		}
+	}
+	if (target_mask & 0x2) {
+		p = &__per_cpu_idtrs[cpu][1][0];
+		for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu);
+								i++, p++) {
+			if (p->pte & 0x1)
+				if (is_tr_overlap(p, va, log_size)) {
+					printk(KERN_DEBUG "Overlapped Entry"
+						"Inserted for TR Reigster!!\n");
+					goto out;
+				}
+		}
+	}
+
+	for (i = IA64_TR_ALLOC_BASE; i < per_cpu(ia64_tr_num, cpu); i++) {
+		switch (target_mask & 0x3) {
+		case 1:
+			if (!(__per_cpu_idtrs[cpu][0][i].pte & 0x1))
+				goto found;
+			continue;
+		case 2:
+			if (!(__per_cpu_idtrs[cpu][1][i].pte & 0x1))
+				goto found;
+			continue;
+		case 3:
+			if (!(__per_cpu_idtrs[cpu][0][i].pte & 0x1) &&
+				!(__per_cpu_idtrs[cpu][1][i].pte & 0x1))
+				goto found;
+			continue;
+		default:
+			r = -EINVAL;
+			goto out;
+		}
+	}
+found:
+	if (i >= per_cpu(ia64_tr_num, cpu))
+		return -EBUSY;
+
+	/*Record tr info for mca hander use!*/
+	if (i > per_cpu(ia64_tr_used, cpu))
+		per_cpu(ia64_tr_used, cpu) = i;
+
+	psr = ia64_clear_ic();
+	if (target_mask & 0x1) {
+		ia64_itr(0x1, i, va, pte, log_size);
+		ia64_srlz_i();
+		p = &__per_cpu_idtrs[cpu][0][i];
+		p->ifa = va;
+		p->pte = pte;
+		p->itir = log_size << 2;
+		p->rr = ia64_get_rr(va);
+	}
+	if (target_mask & 0x2) {
+		ia64_itr(0x2, i, va, pte, log_size);
+		ia64_srlz_i();
+		p = &__per_cpu_idtrs[cpu][1][i];
+		p->ifa = va;
+		p->pte = pte;
+		p->itir = log_size << 2;
+		p->rr = ia64_get_rr(va);
+	}
+	ia64_set_psr(psr);
+	r = i;
+out:
+	return r;
+}
+EXPORT_SYMBOL_GPL(ia64_itr_entry);
+
+/*
+ * ia64_purge_tr
+ *
+ * target_mask: 0x1: purge itr, 0x2 : purge dtr, 0x3 purge idtr.
+ * slot: slot number to be freed.
+ *
+ * Must be called with preemption disabled.
+ */
+void ia64_ptr_entry(u64 target_mask, int slot)
+{
+	int cpu = smp_processor_id();
+	int i;
+	struct ia64_tr_entry *p;
+
+	if (slot < IA64_TR_ALLOC_BASE || slot >= per_cpu(ia64_tr_num, cpu))
+		return;
+
+	if (target_mask & 0x1) {
+		p = &__per_cpu_idtrs[cpu][0][slot];
+		if ((p->pte&0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) {
+			p->pte = 0;
+			ia64_ptr(0x1, p->ifa, p->itir>>2);
+			ia64_srlz_i();
+		}
+	}
+
+	if (target_mask & 0x2) {
+		p = &__per_cpu_idtrs[cpu][1][slot];
+		if ((p->pte & 0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) {
+			p->pte = 0;
+			ia64_ptr(0x2, p->ifa, p->itir>>2);
+			ia64_srlz_i();
+		}
+	}
+
+	for (i = per_cpu(ia64_tr_used, cpu); i >= IA64_TR_ALLOC_BASE; i--) {
+		if ((__per_cpu_idtrs[cpu][0][i].pte & 0x1) ||
+				(__per_cpu_idtrs[cpu][1][i].pte & 0x1))
+			break;
+	}
+	per_cpu(ia64_tr_used, cpu) = i;
 }
+EXPORT_SYMBOL_GPL(ia64_ptr_entry);
diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c
index 81785b78bc1e..9e0b164da9c2 100644
--- a/arch/ia64/sn/kernel/xpc_main.c
+++ b/arch/ia64/sn/kernel/xpc_main.c
@@ -199,7 +199,7 @@ xpc_timeout_partition_disengage_request(unsigned long data)
 	struct xpc_partition *part = (struct xpc_partition *) data;
 
 
-	DBUG_ON(jiffies < part->disengage_request_timeout);
+	DBUG_ON(time_before(jiffies, part->disengage_request_timeout));
 
 	(void) xpc_partition_disengaged(part);
 
@@ -230,7 +230,7 @@ xpc_hb_beater(unsigned long dummy)
 {
 	xpc_vars->heartbeat++;
 
-	if (jiffies >= xpc_hb_check_timeout) {
+	if (time_after_eq(jiffies, xpc_hb_check_timeout)) {
 		wake_up_interruptible(&xpc_act_IRQ_wq);
 	}
 
@@ -270,7 +270,7 @@ xpc_hb_checker(void *ignore)
 
 
 		/* checking of remote heartbeats is skewed by IRQ handling */
-		if (jiffies >= xpc_hb_check_timeout) {
+		if (time_after_eq(jiffies, xpc_hb_check_timeout)) {
 			dev_dbg(xpc_part, "checking remote heartbeats\n");
 			xpc_check_remote_hb();
 
@@ -305,7 +305,7 @@ xpc_hb_checker(void *ignore)
 		/* wait for IRQ or timeout */
 		(void) wait_event_interruptible(xpc_act_IRQ_wq,
 			    (last_IRQ_count < atomic_read(&xpc_act_IRQ_rcvd) ||
-					jiffies >= xpc_hb_check_timeout ||
+					time_after_eq(jiffies, xpc_hb_check_timeout) ||
 						(volatile int) xpc_exiting));
 	}
 
diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c
index 7ba403232cb8..9e97c2684832 100644
--- a/arch/ia64/sn/kernel/xpc_partition.c
+++ b/arch/ia64/sn/kernel/xpc_partition.c
@@ -877,7 +877,7 @@ xpc_partition_disengaged(struct xpc_partition *part)
 	disengaged = (xpc_partition_engaged(1UL << partid) == 0);
 	if (part->disengage_request_timeout) {
 		if (!disengaged) {
-			if (jiffies < part->disengage_request_timeout) {
+			if (time_before(jiffies, part->disengage_request_timeout)) {
 				/* timelimit hasn't been reached yet */
 				return 0;
 			}
diff --git a/arch/m32r/kernel/Makefile b/arch/m32r/kernel/Makefile
index e97e26e87c9e..09200d4886e3 100644
--- a/arch/m32r/kernel/Makefile
+++ b/arch/m32r/kernel/Makefile
@@ -5,7 +5,7 @@
 extra-y	:= head.o init_task.o vmlinux.lds
 
 obj-y	:= process.o entry.o traps.o align.o irq.o setup.o time.o \
-	m32r_ksyms.o sys_m32r.o semaphore.o signal.o ptrace.o
+	m32r_ksyms.o sys_m32r.o signal.o ptrace.o
 
 obj-$(CONFIG_SMP)		+= smp.o smpboot.o
 obj-$(CONFIG_MODULES)		+= module.o
diff --git a/arch/m32r/kernel/m32r_ksyms.c b/arch/m32r/kernel/m32r_ksyms.c
index 41a4c95e06d6..e6709fe950ba 100644
--- a/arch/m32r/kernel/m32r_ksyms.c
+++ b/arch/m32r/kernel/m32r_ksyms.c
@@ -7,7 +7,6 @@
 #include <linux/interrupt.h>
 #include <linux/string.h>
 
-#include <asm/semaphore.h>
 #include <asm/processor.h>
 #include <asm/uaccess.h>
 #include <asm/checksum.h>
@@ -22,10 +21,6 @@ EXPORT_SYMBOL(dump_fpu);
 EXPORT_SYMBOL(__ioremap);
 EXPORT_SYMBOL(iounmap);
 EXPORT_SYMBOL(kernel_thread);
-EXPORT_SYMBOL(__down);
-EXPORT_SYMBOL(__down_interruptible);
-EXPORT_SYMBOL(__up);
-EXPORT_SYMBOL(__down_trylock);
 
 /* Networking helper routines. */
 /* Delay loops */
diff --git a/arch/m32r/kernel/semaphore.c b/arch/m32r/kernel/semaphore.c
deleted file mode 100644
index 940c2d37cfd1..000000000000
--- a/arch/m32r/kernel/semaphore.c
+++ /dev/null
@@ -1,185 +0,0 @@
-/*
- *  linux/arch/m32r/semaphore.c
- *    orig : i386 2.6.4
- *
- *  M32R semaphore implementation.
- *
- *	Copyright (c) 2002 - 2004 Hitoshi Yamamoto
- */
-
-/*
- * i386 semaphore implementation.
- *
- * (C) Copyright 1999 Linus Torvalds
- *
- * Portions Copyright 1999 Red Hat, Inc.
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
- * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org>
- */
-#include <linux/sched.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <asm/semaphore.h>
-
-/*
- * Semaphores are implemented using a two-way counter:
- * The "count" variable is decremented for each process
- * that tries to acquire the semaphore, while the "sleeping"
- * variable is a count of such acquires.
- *
- * Notably, the inline "up()" and "down()" functions can
- * efficiently test if they need to do any extra work (up
- * needs to do something only if count was negative before
- * the increment operation.
- *
- * "sleeping" and the contention routine ordering is protected
- * by the spinlock in the semaphore's waitqueue head.
- *
- * Note that these functions are only called when there is
- * contention on the lock, and as such all this is the
- * "non-critical" part of the whole semaphore business. The
- * critical part is the inline stuff in <asm/semaphore.h>
- * where we want to avoid any extra jumps and calls.
- */
-
-/*
- * Logic:
- *  - only on a boundary condition do we need to care. When we go
- *    from a negative count to a non-negative, we wake people up.
- *  - when we go from a non-negative count to a negative do we
- *    (a) synchronize with the "sleeper" count and (b) make sure
- *    that we're on the wakeup list before we synchronize so that
- *    we cannot lose wakeup events.
- */
-
-asmlinkage void __up(struct semaphore *sem)
-{
-	wake_up(&sem->wait);
-}
-
-asmlinkage void __sched __down(struct semaphore * sem)
-{
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-	unsigned long flags;
-
-	tsk->state = TASK_UNINTERRUPTIBLE;
-	spin_lock_irqsave(&sem->wait.lock, flags);
-	add_wait_queue_exclusive_locked(&sem->wait, &wait);
-
-	sem->sleepers++;
-	for (;;) {
-		int sleepers = sem->sleepers;
-
-		/*
-		 * Add "everybody else" into it. They aren't
-		 * playing, because we own the spinlock in
-		 * the wait_queue_head.
-		 */
-		if (!atomic_add_negative(sleepers - 1, &sem->count)) {
-			sem->sleepers = 0;
-			break;
-		}
-		sem->sleepers = 1;	/* us - see -1 above */
-		spin_unlock_irqrestore(&sem->wait.lock, flags);
-
-		schedule();
-
-		spin_lock_irqsave(&sem->wait.lock, flags);
-		tsk->state = TASK_UNINTERRUPTIBLE;
-	}
-	remove_wait_queue_locked(&sem->wait, &wait);
-	wake_up_locked(&sem->wait);
-	spin_unlock_irqrestore(&sem->wait.lock, flags);
-	tsk->state = TASK_RUNNING;
-}
-
-asmlinkage int __sched __down_interruptible(struct semaphore * sem)
-{
-	int retval = 0;
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-	unsigned long flags;
-
-	tsk->state = TASK_INTERRUPTIBLE;
-	spin_lock_irqsave(&sem->wait.lock, flags);
-	add_wait_queue_exclusive_locked(&sem->wait, &wait);
-
-	sem->sleepers++;
-	for (;;) {
-		int sleepers = sem->sleepers;
-
-		/*
-		 * With signals pending, this turns into
-		 * the trylock failure case - we won't be
-		 * sleeping, and we* can't get the lock as
-		 * it has contention. Just correct the count
-		 * and exit.
-		 */
-		if (signal_pending(current)) {
-			retval = -EINTR;
-			sem->sleepers = 0;
-			atomic_add(sleepers, &sem->count);
-			break;
-		}
-
-		/*
-		 * Add "everybody else" into it. They aren't
-		 * playing, because we own the spinlock in
-		 * wait_queue_head. The "-1" is because we're
-		 * still hoping to get the semaphore.
-		 */
-		if (!atomic_add_negative(sleepers - 1, &sem->count)) {
-			sem->sleepers = 0;
-			break;
-		}
-		sem->sleepers = 1;	/* us - see -1 above */
-		spin_unlock_irqrestore(&sem->wait.lock, flags);
-
-		schedule();
-
-		spin_lock_irqsave(&sem->wait.lock, flags);
-		tsk->state = TASK_INTERRUPTIBLE;
-	}
-	remove_wait_queue_locked(&sem->wait, &wait);
-	wake_up_locked(&sem->wait);
-	spin_unlock_irqrestore(&sem->wait.lock, flags);
-
-	tsk->state = TASK_RUNNING;
-	return retval;
-}
-
-/*
- * Trylock failed - make sure we correct for
- * having decremented the count.
- *
- * We could have done the trylock with a
- * single "cmpxchg" without failure cases,
- * but then it wouldn't work on a 386.
- */
-asmlinkage int __down_trylock(struct semaphore * sem)
-{
-	int sleepers;
-	unsigned long flags;
-
-	spin_lock_irqsave(&sem->wait.lock, flags);
-	sleepers = sem->sleepers + 1;
-	sem->sleepers = 0;
-
-	/*
-	 * Add "everybody else" and us into it. They aren't
-	 * playing, because we own the spinlock in the
-	 * wait_queue_head.
-	 */
-	if (!atomic_add_negative(sleepers, &sem->count)) {
-		wake_up_locked(&sem->wait);
-	}
-
-	spin_unlock_irqrestore(&sem->wait.lock, flags);
-	return 1;
-}
diff --git a/arch/m68k/kernel/Makefile b/arch/m68k/kernel/Makefile
index a806208c7fb5..7a62a718143b 100644
--- a/arch/m68k/kernel/Makefile
+++ b/arch/m68k/kernel/Makefile
@@ -10,7 +10,7 @@ endif
 extra-y	+= vmlinux.lds
 
 obj-y	:= entry.o process.o traps.o ints.o signal.o ptrace.o module.o \
-	   sys_m68k.o time.o semaphore.o setup.o m68k_ksyms.o devres.o
+	   sys_m68k.o time.o setup.o m68k_ksyms.o devres.o
 
 devres-y = ../../../kernel/irq/devres.o
 
diff --git a/arch/m68k/kernel/m68k_ksyms.c b/arch/m68k/kernel/m68k_ksyms.c
index 6fc69c74fe2e..d900e77e5363 100644
--- a/arch/m68k/kernel/m68k_ksyms.c
+++ b/arch/m68k/kernel/m68k_ksyms.c
@@ -1,5 +1,4 @@
 #include <linux/module.h>
-#include <asm/semaphore.h>
 
 asmlinkage long long __ashldi3 (long long, int);
 asmlinkage long long __ashrdi3 (long long, int);
@@ -15,8 +14,3 @@ EXPORT_SYMBOL(__ashrdi3);
 EXPORT_SYMBOL(__lshrdi3);
 EXPORT_SYMBOL(__muldi3);
 
-EXPORT_SYMBOL(__down_failed);
-EXPORT_SYMBOL(__down_failed_interruptible);
-EXPORT_SYMBOL(__down_failed_trylock);
-EXPORT_SYMBOL(__up_wakeup);
-
diff --git a/arch/m68k/kernel/semaphore.c b/arch/m68k/kernel/semaphore.c
deleted file mode 100644
index d12cbbfe6ebd..000000000000
--- a/arch/m68k/kernel/semaphore.c
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- *  Generic semaphore code. Buyer beware. Do your own
- * specific changes in <asm/semaphore-helper.h>
- */
-
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <asm/semaphore-helper.h>
-
-#ifndef CONFIG_RMW_INSNS
-spinlock_t semaphore_wake_lock;
-#endif
-
-/*
- * Semaphores are implemented using a two-way counter:
- * The "count" variable is decremented for each process
- * that tries to sleep, while the "waking" variable is
- * incremented when the "up()" code goes to wake up waiting
- * processes.
- *
- * Notably, the inline "up()" and "down()" functions can
- * efficiently test if they need to do any extra work (up
- * needs to do something only if count was negative before
- * the increment operation.
- *
- * waking_non_zero() (from asm/semaphore.h) must execute
- * atomically.
- *
- * When __up() is called, the count was negative before
- * incrementing it, and we need to wake up somebody.
- *
- * This routine adds one to the count of processes that need to
- * wake up and exit.  ALL waiting processes actually wake up but
- * only the one that gets to the "waking" field first will gate
- * through and acquire the semaphore.  The others will go back
- * to sleep.
- *
- * Note that these functions are only called when there is
- * contention on the lock, and as such all this is the
- * "non-critical" part of the whole semaphore business. The
- * critical part is the inline stuff in <asm/semaphore.h>
- * where we want to avoid any extra jumps and calls.
- */
-void __up(struct semaphore *sem)
-{
-	wake_one_more(sem);
-	wake_up(&sem->wait);
-}
-
-/*
- * Perform the "down" function.  Return zero for semaphore acquired,
- * return negative for signalled out of the function.
- *
- * If called from __down, the return is ignored and the wait loop is
- * not interruptible.  This means that a task waiting on a semaphore
- * using "down()" cannot be killed until someone does an "up()" on
- * the semaphore.
- *
- * If called from __down_interruptible, the return value gets checked
- * upon return.  If the return value is negative then the task continues
- * with the negative value in the return register (it can be tested by
- * the caller).
- *
- * Either form may be used in conjunction with "up()".
- *
- */
-
-
-#define DOWN_HEAD(task_state)						\
-									\
-									\
-	current->state = (task_state);					\
-	add_wait_queue(&sem->wait, &wait);				\
-									\
-	/*								\
-	 * Ok, we're set up.  sem->count is known to be less than zero	\
-	 * so we must wait.						\
-	 *								\
-	 * We can let go the lock for purposes of waiting.		\
-	 * We re-acquire it after awaking so as to protect		\
-	 * all semaphore operations.					\
-	 *								\
-	 * If "up()" is called before we call waking_non_zero() then	\
-	 * we will catch it right away.  If it is called later then	\
-	 * we will have to go through a wakeup cycle to catch it.	\
-	 *								\
-	 * Multiple waiters contend for the semaphore lock to see	\
-	 * who gets to gate through and who has to wait some more.	\
-	 */								\
-	for (;;) {
-
-#define DOWN_TAIL(task_state)			\
-		current->state = (task_state);	\
-	}					\
-	current->state = TASK_RUNNING;		\
-	remove_wait_queue(&sem->wait, &wait);
-
-void __sched __down(struct semaphore * sem)
-{
-	DECLARE_WAITQUEUE(wait, current);
-
-	DOWN_HEAD(TASK_UNINTERRUPTIBLE)
-	if (waking_non_zero(sem))
-		break;
-	schedule();
-	DOWN_TAIL(TASK_UNINTERRUPTIBLE)
-}
-
-int __sched __down_interruptible(struct semaphore * sem)
-{
-	DECLARE_WAITQUEUE(wait, current);
-	int ret = 0;
-
-	DOWN_HEAD(TASK_INTERRUPTIBLE)
-
-	ret = waking_non_zero_interruptible(sem, current);
-	if (ret)
-	{
-		if (ret == 1)
-			/* ret != 0 only if we get interrupted -arca */
-			ret = 0;
-		break;
-	}
-	schedule();
-	DOWN_TAIL(TASK_INTERRUPTIBLE)
-	return ret;
-}
-
-int __down_trylock(struct semaphore * sem)
-{
-	return waking_non_zero_trylock(sem);
-}
diff --git a/arch/m68k/lib/Makefile b/arch/m68k/lib/Makefile
index 6bbf19f96007..a18af095cd7c 100644
--- a/arch/m68k/lib/Makefile
+++ b/arch/m68k/lib/Makefile
@@ -5,4 +5,4 @@
 EXTRA_AFLAGS := -traditional
 
 lib-y	:= ashldi3.o ashrdi3.o lshrdi3.o muldi3.o \
-	   checksum.o string.o semaphore.o uaccess.o
+	   checksum.o string.o uaccess.o
diff --git a/arch/m68k/lib/semaphore.S b/arch/m68k/lib/semaphore.S
deleted file mode 100644
index 0215624c1602..000000000000
--- a/arch/m68k/lib/semaphore.S
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- *  linux/arch/m68k/lib/semaphore.S
- *
- *  Copyright (C) 1996  Linus Torvalds
- *
- *  m68k version by Andreas Schwab
- */
-
-#include <linux/linkage.h>
-#include <asm/semaphore.h>
-
-/*
- * The semaphore operations have a special calling sequence that
- * allow us to do a simpler in-line version of them. These routines
- * need to convert that sequence back into the C sequence when
- * there is contention on the semaphore.
- */
-ENTRY(__down_failed)
-	moveml %a0/%d0/%d1,-(%sp)
-	movel %a1,-(%sp)
-	jbsr __down
-	movel (%sp)+,%a1
-	moveml (%sp)+,%a0/%d0/%d1
-	rts
-
-ENTRY(__down_failed_interruptible)
-	movel %a0,-(%sp)
-	movel %d1,-(%sp)
-	movel %a1,-(%sp)
-	jbsr __down_interruptible
-	movel (%sp)+,%a1
-	movel (%sp)+,%d1
-	movel (%sp)+,%a0
-	rts
-
-ENTRY(__down_failed_trylock)
-	movel %a0,-(%sp)
-	movel %d1,-(%sp)
-	movel %a1,-(%sp)
-	jbsr __down_trylock
-	movel (%sp)+,%a1
-	movel (%sp)+,%d1
-	movel (%sp)+,%a0
-	rts
-
-ENTRY(__up_wakeup)
-	moveml %a0/%d0/%d1,-(%sp)
-	movel %a1,-(%sp)
-	jbsr __up
-	movel (%sp)+,%a1
-	moveml (%sp)+,%a0/%d0/%d1
-	rts
-
diff --git a/arch/m68knommu/kernel/Makefile b/arch/m68knommu/kernel/Makefile
index 1524b39ad63f..f0eab3dedb5a 100644
--- a/arch/m68knommu/kernel/Makefile
+++ b/arch/m68knommu/kernel/Makefile
@@ -5,7 +5,7 @@
 extra-y := vmlinux.lds
 
 obj-y += dma.o entry.o init_task.o irq.o m68k_ksyms.o process.o ptrace.o \
-	 semaphore.o setup.o signal.o syscalltable.o sys_m68k.o time.o traps.o
+	 setup.o signal.o syscalltable.o sys_m68k.o time.o traps.o
 
 obj-$(CONFIG_MODULES)	+= module.o
 obj-$(CONFIG_COMEMPCI)	+= comempci.o
diff --git a/arch/m68knommu/kernel/m68k_ksyms.c b/arch/m68knommu/kernel/m68k_ksyms.c
index 53fad1490282..39fe0a7aec32 100644
--- a/arch/m68knommu/kernel/m68k_ksyms.c
+++ b/arch/m68knommu/kernel/m68k_ksyms.c
@@ -13,7 +13,6 @@
 #include <asm/pgalloc.h>
 #include <asm/irq.h>
 #include <asm/io.h>
-#include <asm/semaphore.h>
 #include <asm/checksum.h>
 #include <asm/current.h>
 
@@ -39,11 +38,6 @@ EXPORT_SYMBOL(csum_partial_copy_nocheck);
 EXPORT_SYMBOL(memcpy);
 EXPORT_SYMBOL(memset);
 
-EXPORT_SYMBOL(__down_failed);
-EXPORT_SYMBOL(__down_failed_interruptible);
-EXPORT_SYMBOL(__down_failed_trylock);
-EXPORT_SYMBOL(__up_wakeup);
-
 /*
  * libgcc functions - functions that are used internally by the
  * compiler...  (prototypes are not correct though, but that
diff --git a/arch/m68knommu/kernel/semaphore.c b/arch/m68knommu/kernel/semaphore.c
deleted file mode 100644
index bce2bc7d87c6..000000000000
--- a/arch/m68knommu/kernel/semaphore.c
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- *  Generic semaphore code. Buyer beware. Do your own
- * specific changes in <asm/semaphore-helper.h>
- */
-
-#include <linux/sched.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <asm/semaphore-helper.h>
-
-#ifndef CONFIG_RMW_INSNS
-spinlock_t semaphore_wake_lock;
-#endif
-
-/*
- * Semaphores are implemented using a two-way counter:
- * The "count" variable is decremented for each process
- * that tries to sleep, while the "waking" variable is
- * incremented when the "up()" code goes to wake up waiting
- * processes.
- *
- * Notably, the inline "up()" and "down()" functions can
- * efficiently test if they need to do any extra work (up
- * needs to do something only if count was negative before
- * the increment operation.
- *
- * waking_non_zero() (from asm/semaphore.h) must execute
- * atomically.
- *
- * When __up() is called, the count was negative before
- * incrementing it, and we need to wake up somebody.
- *
- * This routine adds one to the count of processes that need to
- * wake up and exit.  ALL waiting processes actually wake up but
- * only the one that gets to the "waking" field first will gate
- * through and acquire the semaphore.  The others will go back
- * to sleep.
- *
- * Note that these functions are only called when there is
- * contention on the lock, and as such all this is the
- * "non-critical" part of the whole semaphore business. The
- * critical part is the inline stuff in <asm/semaphore.h>
- * where we want to avoid any extra jumps and calls.
- */
-void __up(struct semaphore *sem)
-{
-	wake_one_more(sem);
-	wake_up(&sem->wait);
-}
-
-/*
- * Perform the "down" function.  Return zero for semaphore acquired,
- * return negative for signalled out of the function.
- *
- * If called from __down, the return is ignored and the wait loop is
- * not interruptible.  This means that a task waiting on a semaphore
- * using "down()" cannot be killed until someone does an "up()" on
- * the semaphore.
- *
- * If called from __down_interruptible, the return value gets checked
- * upon return.  If the return value is negative then the task continues
- * with the negative value in the return register (it can be tested by
- * the caller).
- *
- * Either form may be used in conjunction with "up()".
- *
- */
-
-
-#define DOWN_HEAD(task_state)						\
-									\
-									\
-	current->state = (task_state);					\
-	add_wait_queue(&sem->wait, &wait);				\
-									\
-	/*								\
-	 * Ok, we're set up.  sem->count is known to be less than zero	\
-	 * so we must wait.						\
-	 *								\
-	 * We can let go the lock for purposes of waiting.		\
-	 * We re-acquire it after awaking so as to protect		\
-	 * all semaphore operations.					\
-	 *								\
-	 * If "up()" is called before we call waking_non_zero() then	\
-	 * we will catch it right away.  If it is called later then	\
-	 * we will have to go through a wakeup cycle to catch it.	\
-	 *								\
-	 * Multiple waiters contend for the semaphore lock to see	\
-	 * who gets to gate through and who has to wait some more.	\
-	 */								\
-	for (;;) {
-
-#define DOWN_TAIL(task_state)			\
-		current->state = (task_state);	\
-	}					\
-	current->state = TASK_RUNNING;		\
-	remove_wait_queue(&sem->wait, &wait);
-
-void __sched __down(struct semaphore * sem)
-{
-	DECLARE_WAITQUEUE(wait, current);
-
-	DOWN_HEAD(TASK_UNINTERRUPTIBLE)
-	if (waking_non_zero(sem))
-		break;
-	schedule();
-	DOWN_TAIL(TASK_UNINTERRUPTIBLE)
-}
-
-int __sched __down_interruptible(struct semaphore * sem)
-{
-	DECLARE_WAITQUEUE(wait, current);
-	int ret = 0;
-
-	DOWN_HEAD(TASK_INTERRUPTIBLE)
-
-	ret = waking_non_zero_interruptible(sem, current);
-	if (ret)
-	{
-		if (ret == 1)
-			/* ret != 0 only if we get interrupted -arca */
-			ret = 0;
-		break;
-	}
-	schedule();
-	DOWN_TAIL(TASK_INTERRUPTIBLE)
-	return ret;
-}
-
-int __down_trylock(struct semaphore * sem)
-{
-	return waking_non_zero_trylock(sem);
-}
diff --git a/arch/m68knommu/lib/Makefile b/arch/m68knommu/lib/Makefile
index e051a7913987..d94d709665aa 100644
--- a/arch/m68knommu/lib/Makefile
+++ b/arch/m68knommu/lib/Makefile
@@ -4,4 +4,4 @@
 
 lib-y	:= ashldi3.o ashrdi3.o lshrdi3.o \
 	   muldi3.o mulsi3.o divsi3.o udivsi3.o modsi3.o umodsi3.o \
-	   checksum.o semaphore.o memcpy.o memset.o delay.o
+	   checksum.o memcpy.o memset.o delay.o
diff --git a/arch/m68knommu/lib/semaphore.S b/arch/m68knommu/lib/semaphore.S
deleted file mode 100644
index 87c746034376..000000000000
--- a/arch/m68knommu/lib/semaphore.S
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- *  linux/arch/m68k/lib/semaphore.S
- *
- *  Copyright (C) 1996  Linus Torvalds
- *
- *  m68k version by Andreas Schwab
- *
- *  MAR/1999 -- modified to support ColdFire (gerg@snapgear.com)
- */
-
-#include <linux/linkage.h>
-#include <asm/semaphore.h>
-
-/*
- * "down_failed" is called with the eventual return address
- * in %a0, and the address of the semaphore in %a1. We need
- * to increment the number of waiters on the semaphore,
- * call "__down()", and then eventually return to try again.
- */
-ENTRY(__down_failed)
-#ifdef CONFIG_COLDFIRE
-	subl #12,%sp
-	moveml %a0/%d0/%d1,(%sp)
-#else
-	moveml %a0/%d0/%d1,-(%sp)
-#endif
-	movel %a1,-(%sp)
-	jbsr __down
-	movel (%sp)+,%a1
-	movel (%sp)+,%d0
-	movel (%sp)+,%d1
-	rts
-
-ENTRY(__down_failed_interruptible)
-	movel %a0,-(%sp)
-	movel %d1,-(%sp)
-	movel %a1,-(%sp)
-	jbsr __down_interruptible
-	movel (%sp)+,%a1
-	movel (%sp)+,%d1
-	rts
-
-ENTRY(__up_wakeup)
-#ifdef CONFIG_COLDFIRE
-	subl #12,%sp
-	moveml %a0/%d0/%d1,(%sp)
-#else
-	moveml %a0/%d0/%d1,-(%sp)
-#endif
-	movel %a1,-(%sp)
-	jbsr __up
-	movel (%sp)+,%a1
-	movel (%sp)+,%d0
-	movel (%sp)+,%d1
-	rts
-
-ENTRY(__down_failed_trylock)
-	movel %a0,-(%sp)
-	movel %d1,-(%sp)
-	movel %a1,-(%sp)
-	jbsr __down_trylock
-	movel (%sp)+,%a1
-	movel (%sp)+,%d1
-	movel (%sp)+,%a0
-	rts
-
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index 9e78e1a4ca17..6fcdb6fda2e2 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -5,7 +5,7 @@
 extra-y		:= head.o init_task.o vmlinux.lds
 
 obj-y		+= cpu-probe.o branch.o entry.o genex.o irq.o process.o \
-		   ptrace.o reset.o semaphore.o setup.o signal.o syscall.o \
+		   ptrace.o reset.o setup.o signal.o syscall.o \
 		   time.o topology.o traps.o unaligned.o
 
 obj-$(CONFIG_CEVT_BCM1480)	+= cevt-bcm1480.o
diff --git a/arch/mips/kernel/semaphore.c b/arch/mips/kernel/semaphore.c
deleted file mode 100644
index 1265358cdca1..000000000000
--- a/arch/mips/kernel/semaphore.c
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- * MIPS-specific semaphore code.
- *
- * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
- * Copyright (C) 2004 Ralf Baechle <ralf@linux-mips.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * April 2001 - Reworked by Paul Mackerras <paulus@samba.org>
- * to eliminate the SMP races in the old version between the updates
- * of `count' and `waking'.  Now we use negative `count' values to
- * indicate that some process(es) are waiting for the semaphore.
- */
-
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <asm/atomic.h>
-#include <asm/cpu-features.h>
-#include <asm/errno.h>
-#include <asm/semaphore.h>
-#include <asm/war.h>
-/*
- * Atomically update sem->count.
- * This does the equivalent of the following:
- *
- *	old_count = sem->count;
- *	tmp = MAX(old_count, 0) + incr;
- *	sem->count = tmp;
- *	return old_count;
- *
- * On machines without lld/scd we need a spinlock to make the manipulation of
- * sem->count and sem->waking atomic.  Scalability isn't an issue because
- * this lock is used on UP only so it's just an empty variable.
- */
-static inline int __sem_update_count(struct semaphore *sem, int incr)
-{
-	int old_count, tmp;
-
-	if (cpu_has_llsc && R10000_LLSC_WAR) {
-		__asm__ __volatile__(
-		"	.set	mips3					\n"
-		"1:	ll	%0, %2		# __sem_update_count	\n"
-		"	sra	%1, %0, 31				\n"
-		"	not	%1					\n"
-		"	and	%1, %0, %1				\n"
-		"	addu	%1, %1, %3				\n"
-		"	sc	%1, %2					\n"
-		"	beqzl	%1, 1b					\n"
-		"	.set	mips0					\n"
-		: "=&r" (old_count), "=&r" (tmp), "=m" (sem->count)
-		: "r" (incr), "m" (sem->count));
-	} else if (cpu_has_llsc) {
-		__asm__ __volatile__(
-		"	.set	mips3					\n"
-		"1:	ll	%0, %2		# __sem_update_count	\n"
-		"	sra	%1, %0, 31				\n"
-		"	not	%1					\n"
-		"	and	%1, %0, %1				\n"
-		"	addu	%1, %1, %3				\n"
-		"	sc	%1, %2					\n"
-		"	beqz	%1, 1b					\n"
-		"	.set	mips0					\n"
-		: "=&r" (old_count), "=&r" (tmp), "=m" (sem->count)
-		: "r" (incr), "m" (sem->count));
-	} else {
-		static DEFINE_SPINLOCK(semaphore_lock);
-		unsigned long flags;
-
-		spin_lock_irqsave(&semaphore_lock, flags);
-		old_count = atomic_read(&sem->count);
-		tmp = max_t(int, old_count, 0) + incr;
-		atomic_set(&sem->count, tmp);
-		spin_unlock_irqrestore(&semaphore_lock, flags);
-	}
-
-	return old_count;
-}
-
-void __up(struct semaphore *sem)
-{
-	/*
-	 * Note that we incremented count in up() before we came here,
-	 * but that was ineffective since the result was <= 0, and
-	 * any negative value of count is equivalent to 0.
-	 * This ends up setting count to 1, unless count is now > 0
-	 * (i.e. because some other cpu has called up() in the meantime),
-	 * in which case we just increment count.
-	 */
-	__sem_update_count(sem, 1);
-	wake_up(&sem->wait);
-}
-
-EXPORT_SYMBOL(__up);
-
-/*
- * Note that when we come in to __down or __down_interruptible,
- * we have already decremented count, but that decrement was
- * ineffective since the result was < 0, and any negative value
- * of count is equivalent to 0.
- * Thus it is only when we decrement count from some value > 0
- * that we have actually got the semaphore.
- */
-void __sched __down(struct semaphore *sem)
-{
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-
-	__set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-	add_wait_queue_exclusive(&sem->wait, &wait);
-
-	/*
-	 * Try to get the semaphore.  If the count is > 0, then we've
-	 * got the semaphore; we decrement count and exit the loop.
-	 * If the count is 0 or negative, we set it to -1, indicating
-	 * that we are asleep, and then sleep.
-	 */
-	while (__sem_update_count(sem, -1) <= 0) {
-		schedule();
-		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-	}
-	remove_wait_queue(&sem->wait, &wait);
-	__set_task_state(tsk, TASK_RUNNING);
-
-	/*
-	 * If there are any more sleepers, wake one of them up so
-	 * that it can either get the semaphore, or set count to -1
-	 * indicating that there are still processes sleeping.
-	 */
-	wake_up(&sem->wait);
-}
-
-EXPORT_SYMBOL(__down);
-
-int __sched __down_interruptible(struct semaphore * sem)
-{
-	int retval = 0;
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-
-	__set_task_state(tsk, TASK_INTERRUPTIBLE);
-	add_wait_queue_exclusive(&sem->wait, &wait);
-
-	while (__sem_update_count(sem, -1) <= 0) {
-		if (signal_pending(current)) {
-			/*
-			 * A signal is pending - give up trying.
-			 * Set sem->count to 0 if it is negative,
-			 * since we are no longer sleeping.
-			 */
-			__sem_update_count(sem, 0);
-			retval = -EINTR;
-			break;
-		}
-		schedule();
-		set_task_state(tsk, TASK_INTERRUPTIBLE);
-	}
-	remove_wait_queue(&sem->wait, &wait);
-	__set_task_state(tsk, TASK_RUNNING);
-
-	wake_up(&sem->wait);
-	return retval;
-}
-
-EXPORT_SYMBOL(__down_interruptible);
diff --git a/arch/mn10300/kernel/Makefile b/arch/mn10300/kernel/Makefile
index ef07c956170a..23f2ab67574c 100644
--- a/arch/mn10300/kernel/Makefile
+++ b/arch/mn10300/kernel/Makefile
@@ -3,7 +3,7 @@
 #
 extra-y := head.o init_task.o vmlinux.lds
 
-obj-y   := process.o semaphore.o signal.o entry.o fpu.o traps.o irq.o \
+obj-y   := process.o signal.o entry.o fpu.o traps.o irq.o \
 	   ptrace.o setup.o time.o sys_mn10300.o io.o kthread.o \
 	   switch_to.o mn10300_ksyms.o kernel_execve.o
 
diff --git a/arch/mn10300/kernel/semaphore.c b/arch/mn10300/kernel/semaphore.c
deleted file mode 100644
index 9153c4039fd2..000000000000
--- a/arch/mn10300/kernel/semaphore.c
+++ /dev/null
@@ -1,149 +0,0 @@
-/* MN10300 Semaphore implementation
- *
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-#include <linux/sched.h>
-#include <linux/module.h>
-#include <asm/semaphore.h>
-
-struct sem_waiter {
-	struct list_head	list;
-	struct task_struct	*task;
-};
-
-#if SEMAPHORE_DEBUG
-void semtrace(struct semaphore *sem, const char *str)
-{
-	if (sem->debug)
-		printk(KERN_DEBUG "[%d] %s({%d,%d})\n",
-		       current->pid,
-		       str,
-		       atomic_read(&sem->count),
-		       list_empty(&sem->wait_list) ? 0 : 1);
-}
-#else
-#define semtrace(SEM, STR) do { } while (0)
-#endif
-
-/*
- * wait for a token to be granted from a semaphore
- * - entered with lock held and interrupts disabled
- */
-void __down(struct semaphore *sem, unsigned long flags)
-{
-	struct task_struct *tsk = current;
-	struct sem_waiter waiter;
-
-	semtrace(sem, "Entering __down");
-
-	/* set up my own style of waitqueue */
-	waiter.task = tsk;
-	get_task_struct(tsk);
-
-	list_add_tail(&waiter.list, &sem->wait_list);
-
-	/* we don't need to touch the semaphore struct anymore */
-	spin_unlock_irqrestore(&sem->wait_lock, flags);
-
-	/* wait to be given the semaphore */
-	set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-
-	for (;;) {
-		if (!waiter.task)
-			break;
-		schedule();
-		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-	}
-
-	tsk->state = TASK_RUNNING;
-	semtrace(sem, "Leaving __down");
-}
-EXPORT_SYMBOL(__down);
-
-/*
- * interruptibly wait for a token to be granted from a semaphore
- * - entered with lock held and interrupts disabled
- */
-int __down_interruptible(struct semaphore *sem, unsigned long flags)
-{
-	struct task_struct *tsk = current;
-	struct sem_waiter waiter;
-	int ret;
-
-	semtrace(sem, "Entering __down_interruptible");
-
-	/* set up my own style of waitqueue */
-	waiter.task = tsk;
-	get_task_struct(tsk);
-
-	list_add_tail(&waiter.list, &sem->wait_list);
-
-	/* we don't need to touch the semaphore struct anymore */
-	set_task_state(tsk, TASK_INTERRUPTIBLE);
-
-	spin_unlock_irqrestore(&sem->wait_lock, flags);
-
-	/* wait to be given the semaphore */
-	ret = 0;
-	for (;;) {
-		if (!waiter.task)
-			break;
-		if (unlikely(signal_pending(current)))
-			goto interrupted;
-		schedule();
-		set_task_state(tsk, TASK_INTERRUPTIBLE);
-	}
-
- out:
-	tsk->state = TASK_RUNNING;
-	semtrace(sem, "Leaving __down_interruptible");
-	return ret;
-
- interrupted:
-	spin_lock_irqsave(&sem->wait_lock, flags);
-	list_del(&waiter.list);
-	spin_unlock_irqrestore(&sem->wait_lock, flags);
-
-	ret = 0;
-	if (!waiter.task) {
-		put_task_struct(current);
-		ret = -EINTR;
-	}
-	goto out;
-}
-EXPORT_SYMBOL(__down_interruptible);
-
-/*
- * release a single token back to a semaphore
- * - entered with lock held and interrupts disabled
- */
-void __up(struct semaphore *sem)
-{
-	struct task_struct *tsk;
-	struct sem_waiter *waiter;
-
-	semtrace(sem, "Entering __up");
-
-	/* grant the token to the process at the front of the queue */
-	waiter = list_entry(sem->wait_list.next, struct sem_waiter, list);
-
-	/* We must be careful not to touch 'waiter' after we set ->task = NULL.
-	 * It is an allocated on the waiter's stack and may become invalid at
-	 * any time after that point (due to a wakeup from another source).
-	 */
-	list_del_init(&waiter->list);
-	tsk = waiter->task;
-	smp_mb();
-	waiter->task = NULL;
-	wake_up_process(tsk);
-	put_task_struct(tsk);
-
-	semtrace(sem, "Leaving __up");
-}
-EXPORT_SYMBOL(__up);
diff --git a/arch/parisc/kernel/Makefile b/arch/parisc/kernel/Makefile
index 27827bc3717e..1f6585a56f97 100644
--- a/arch/parisc/kernel/Makefile
+++ b/arch/parisc/kernel/Makefile
@@ -9,7 +9,7 @@ AFLAGS_pacache.o := -traditional
 
 obj-y	     	:= cache.o pacache.o setup.o traps.o time.o irq.o \
 		   pa7300lc.o syscall.o entry.o sys_parisc.o firmware.o \
-		   ptrace.o hardware.o inventory.o drivers.o semaphore.o \
+		   ptrace.o hardware.o inventory.o drivers.o \
 		   signal.o hpmc.o real2.o parisc_ksyms.o unaligned.o \
 		   process.o processor.o pdc_cons.o pdc_chassis.o unwind.o \
 		   topology.o
diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c
index 7aca704e96f0..5b7fc4aa044d 100644
--- a/arch/parisc/kernel/parisc_ksyms.c
+++ b/arch/parisc/kernel/parisc_ksyms.c
@@ -69,11 +69,6 @@ EXPORT_SYMBOL(memcpy_toio);
 EXPORT_SYMBOL(memcpy_fromio);
 EXPORT_SYMBOL(memset_io);
 
-#include <asm/semaphore.h>
-EXPORT_SYMBOL(__up);
-EXPORT_SYMBOL(__down_interruptible);
-EXPORT_SYMBOL(__down);
-
 extern void $$divI(void);
 extern void $$divU(void);
 extern void $$remI(void);
diff --git a/arch/parisc/kernel/semaphore.c b/arch/parisc/kernel/semaphore.c
deleted file mode 100644
index ee806bcc3726..000000000000
--- a/arch/parisc/kernel/semaphore.c
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Semaphore implementation Copyright (c) 2001 Matthew Wilcox, Hewlett-Packard
- */
-
-#include <linux/sched.h>
-#include <linux/spinlock.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-
-/*
- * Semaphores are complex as we wish to avoid using two variables.
- * `count' has multiple roles, depending on its value.  If it is positive
- * or zero, there are no waiters.  The functions here will never be
- * called; see <asm/semaphore.h>
- *
- * When count is -1 it indicates there is at least one task waiting
- * for the semaphore.
- *
- * When count is less than that, there are '- count - 1' wakeups
- * pending.  ie if it has value -3, there are 2 wakeups pending.
- *
- * Note that these functions are only called when there is contention
- * on the lock, and as such all this is the "non-critical" part of the
- * whole semaphore business. The critical part is the inline stuff in
- * <asm/semaphore.h> where we want to avoid any extra jumps and calls.
- */
-void __up(struct semaphore *sem)
-{
-	sem->count--;
-	wake_up(&sem->wait);
-}
-
-#define wakers(count) (-1 - count)
-
-#define DOWN_HEAD							\
-	int ret = 0;							\
-	DECLARE_WAITQUEUE(wait, current);				\
-									\
-	/* Note that someone is waiting */				\
-	if (sem->count == 0)						\
-		sem->count = -1;					\
-									\
-	/* protected by the sentry still -- use unlocked version */	\
-	wait.flags = WQ_FLAG_EXCLUSIVE;					\
-	__add_wait_queue_tail(&sem->wait, &wait);			\
- lost_race:								\
-	spin_unlock_irq(&sem->sentry);					\
-
-#define DOWN_TAIL							\
-	spin_lock_irq(&sem->sentry);					\
-	if (wakers(sem->count) == 0 && ret == 0)			\
-		goto lost_race;	/* Someone stole our wakeup */		\
-	__remove_wait_queue(&sem->wait, &wait);				\
-	current->state = TASK_RUNNING;					\
-	if (!waitqueue_active(&sem->wait) && (sem->count < 0))		\
-		sem->count = wakers(sem->count);
-
-#define UPDATE_COUNT							\
-	sem->count += (sem->count < 0) ? 1 : - 1;
-	
-
-void __sched __down(struct semaphore * sem)
-{
-	DOWN_HEAD
-
-	for(;;) {
-		set_task_state(current, TASK_UNINTERRUPTIBLE);
-		/* we can _read_ this without the sentry */
-		if (sem->count != -1)
-			break;
- 		schedule();
- 	}
-
-	DOWN_TAIL
-	UPDATE_COUNT
-}
-
-int __sched __down_interruptible(struct semaphore * sem)
-{
-	DOWN_HEAD
-
-	for(;;) {
-		set_task_state(current, TASK_INTERRUPTIBLE);
-		/* we can _read_ this without the sentry */
-		if (sem->count != -1)
-			break;
-
-		if (signal_pending(current)) {
-			ret = -EINTR;
-			break;
-		}
-		schedule();
-	}
-
-	DOWN_TAIL
-
-	if (!ret) {
-		UPDATE_COUNT
-	}
-
-	return ret;
-}
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index c1baf9d5903f..b9dbfff9afe9 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -12,7 +12,7 @@ CFLAGS_prom_init.o      += -fPIC
 CFLAGS_btext.o		+= -fPIC
 endif
 
-obj-y				:= semaphore.o cputable.o ptrace.o syscalls.o \
+obj-y				:= cputable.o ptrace.o syscalls.o \
 				   irq.o align.o signal_32.o pmc.o vdso.o \
 				   init_task.o process.o systbl.o idle.o \
 				   signal.o
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 9c98424277a8..65d14e6ddc3c 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -15,7 +15,6 @@
 #include <linux/bitops.h>
 
 #include <asm/page.h>
-#include <asm/semaphore.h>
 #include <asm/processor.h>
 #include <asm/cacheflush.h>
 #include <asm/uaccess.h>
diff --git a/arch/powerpc/kernel/semaphore.c b/arch/powerpc/kernel/semaphore.c
deleted file mode 100644
index 2f8c3c951394..000000000000
--- a/arch/powerpc/kernel/semaphore.c
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * PowerPC-specific semaphore code.
- *
- * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * April 2001 - Reworked by Paul Mackerras <paulus@samba.org>
- * to eliminate the SMP races in the old version between the updates
- * of `count' and `waking'.  Now we use negative `count' values to
- * indicate that some process(es) are waiting for the semaphore.
- */
-
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/module.h>
-
-#include <asm/atomic.h>
-#include <asm/semaphore.h>
-#include <asm/errno.h>
-
-/*
- * Atomically update sem->count.
- * This does the equivalent of the following:
- *
- *	old_count = sem->count;
- *	tmp = MAX(old_count, 0) + incr;
- *	sem->count = tmp;
- *	return old_count;
- */
-static inline int __sem_update_count(struct semaphore *sem, int incr)
-{
-	int old_count, tmp;
-
-	__asm__ __volatile__("\n"
-"1:	lwarx	%0,0,%3\n"
-"	srawi	%1,%0,31\n"
-"	andc	%1,%0,%1\n"
-"	add	%1,%1,%4\n"
-	PPC405_ERR77(0,%3)
-"	stwcx.	%1,0,%3\n"
-"	bne	1b"
-	: "=&r" (old_count), "=&r" (tmp), "=m" (sem->count)
-	: "r" (&sem->count), "r" (incr), "m" (sem->count)
-	: "cc");
-
-	return old_count;
-}
-
-void __up(struct semaphore *sem)
-{
-	/*
-	 * Note that we incremented count in up() before we came here,
-	 * but that was ineffective since the result was <= 0, and
-	 * any negative value of count is equivalent to 0.
-	 * This ends up setting count to 1, unless count is now > 0
-	 * (i.e. because some other cpu has called up() in the meantime),
-	 * in which case we just increment count.
-	 */
-	__sem_update_count(sem, 1);
-	wake_up(&sem->wait);
-}
-EXPORT_SYMBOL(__up);
-
-/*
- * Note that when we come in to __down or __down_interruptible,
- * we have already decremented count, but that decrement was
- * ineffective since the result was < 0, and any negative value
- * of count is equivalent to 0.
- * Thus it is only when we decrement count from some value > 0
- * that we have actually got the semaphore.
- */
-void __sched __down(struct semaphore *sem)
-{
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-
-	__set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-	add_wait_queue_exclusive(&sem->wait, &wait);
-
-	/*
-	 * Try to get the semaphore.  If the count is > 0, then we've
-	 * got the semaphore; we decrement count and exit the loop.
-	 * If the count is 0 or negative, we set it to -1, indicating
-	 * that we are asleep, and then sleep.
-	 */
-	while (__sem_update_count(sem, -1) <= 0) {
-		schedule();
-		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-	}
-	remove_wait_queue(&sem->wait, &wait);
-	__set_task_state(tsk, TASK_RUNNING);
-
-	/*
-	 * If there are any more sleepers, wake one of them up so
-	 * that it can either get the semaphore, or set count to -1
-	 * indicating that there are still processes sleeping.
-	 */
-	wake_up(&sem->wait);
-}
-EXPORT_SYMBOL(__down);
-
-int __sched __down_interruptible(struct semaphore * sem)
-{
-	int retval = 0;
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-
-	__set_task_state(tsk, TASK_INTERRUPTIBLE);
-	add_wait_queue_exclusive(&sem->wait, &wait);
-
-	while (__sem_update_count(sem, -1) <= 0) {
-		if (signal_pending(current)) {
-			/*
-			 * A signal is pending - give up trying.
-			 * Set sem->count to 0 if it is negative,
-			 * since we are no longer sleeping.
-			 */
-			__sem_update_count(sem, 0);
-			retval = -EINTR;
-			break;
-		}
-		schedule();
-		set_task_state(tsk, TASK_INTERRUPTIBLE);
-	}
-	remove_wait_queue(&sem->wait, &wait);
-	__set_task_state(tsk, TASK_RUNNING);
-
-	wake_up(&sem->wait);
-	return retval;
-}
-EXPORT_SYMBOL(__down_interruptible);
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index cd870a823d18..06d918d94dd1 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -10,9 +10,6 @@
 #include <linux/reboot.h>
 #include <linux/delay.h>
 #include <linux/initrd.h>
-#if defined(CONFIG_IDE) || defined(CONFIG_IDE_MODULE)
-#include <linux/ide.h>
-#endif
 #include <linux/tty.h>
 #include <linux/bootmem.h>
 #include <linux/seq_file.h>
@@ -51,11 +48,6 @@
 
 extern void bootx_init(unsigned long r4, unsigned long phys);
 
-#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE)
-struct ide_machdep_calls ppc_ide_md;
-EXPORT_SYMBOL(ppc_ide_md);
-#endif
-
 int boot_cpuid;
 EXPORT_SYMBOL_GPL(boot_cpuid);
 int boot_cpuid_phys;
diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c
index 1c58db9d42cb..bcf50d7056e9 100644
--- a/arch/powerpc/platforms/powermac/pci.c
+++ b/arch/powerpc/platforms/powermac/pci.c
@@ -1144,28 +1144,6 @@ void __init pmac_pcibios_after_init(void)
 {
 	struct device_node* nd;
 
-#ifdef CONFIG_BLK_DEV_IDE
-	struct pci_dev *dev = NULL;
-
-	/* OF fails to initialize IDE controllers on macs
-	 * (and maybe other machines)
-	 *
-	 * Ideally, this should be moved to the IDE layer, but we need
-	 * to check specifically with Andre Hedrick how to do it cleanly
-	 * since the common IDE code seem to care about the fact that the
-	 * BIOS may have disabled a controller.
-	 *
-	 * -- BenH
-	 */
-	for_each_pci_dev(dev) {
-		if ((dev->class >> 16) != PCI_BASE_CLASS_STORAGE)
-			continue;
-		if (pci_enable_device(dev))
-			printk(KERN_WARNING
-			       "pci: Failed to enable %s\n", pci_name(dev));
-	}
-#endif /* CONFIG_BLK_DEV_IDE */
-
 	for_each_node_by_name(nd, "firewire") {
 		if (nd->parent && (of_device_is_compatible(nd, "pci106b,18") ||
 				   of_device_is_compatible(nd, "pci106b,30") ||
diff --git a/arch/powerpc/platforms/powermac/pmac.h b/arch/powerpc/platforms/powermac/pmac.h
index b3abaaf61eb4..3362e781b6a7 100644
--- a/arch/powerpc/platforms/powermac/pmac.h
+++ b/arch/powerpc/platforms/powermac/pmac.h
@@ -2,7 +2,6 @@
 #define __PMAC_H__
 
 #include <linux/pci.h>
-#include <linux/ide.h>
 #include <linux/irq.h>
 
 /*
@@ -35,10 +34,6 @@ extern void pmac_check_ht_link(void);
 
 extern void pmac_setup_smp(void);
 
-extern unsigned long pmac_ide_get_base(int index);
-extern void pmac_ide_init_hwif_ports(hw_regs_t *hw,
-	unsigned long data_port, unsigned long ctrl_port, int *irq);
-
 extern int pmac_nvram_init(void);
 extern void pmac_pic_init(void);
 
diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index 36ff1b6b7fac..2693fc371eab 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -574,14 +574,6 @@ static int __init pmac_probe(void)
 	ISA_DMA_THRESHOLD = ~0L;
 	DMA_MODE_READ = 1;
 	DMA_MODE_WRITE = 2;
-
-#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE)
-#ifdef CONFIG_BLK_DEV_IDE_PMAC
-        ppc_ide_md.ide_init_hwif	= pmac_ide_init_hwif_ports;
-        ppc_ide_md.default_io_base	= pmac_ide_get_base;
-#endif /* CONFIG_BLK_DEV_IDE_PMAC */
-#endif /* defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) */
-
 #endif /* CONFIG_PPC32 */
 
 #ifdef CONFIG_PMAC_SMU
diff --git a/arch/ppc/configs/sandpoint_defconfig b/arch/ppc/configs/sandpoint_defconfig
index fb493a67c60d..9525e34138fc 100644
--- a/arch/ppc/configs/sandpoint_defconfig
+++ b/arch/ppc/configs/sandpoint_defconfig
@@ -189,7 +189,7 @@ CONFIG_IDE_TASKFILE_IO=y
 #
 # IDE chipset support/bugfixes
 #
-CONFIG_IDE_GENERIC=y
+CONFIG_BLK_DEV_SL82C105=y
 # CONFIG_BLK_DEV_IDEPCI is not set
 # CONFIG_BLK_DEV_IDEDMA is not set
 # CONFIG_IDEDMA_AUTO is not set
diff --git a/arch/ppc/kernel/ppc_ksyms.c b/arch/ppc/kernel/ppc_ksyms.c
index c35350250cfc..2ba659f401be 100644
--- a/arch/ppc/kernel/ppc_ksyms.c
+++ b/arch/ppc/kernel/ppc_ksyms.c
@@ -12,7 +12,6 @@
 #include <linux/irq.h>
 #include <linux/pci.h>
 #include <linux/delay.h>
-#include <linux/ide.h>
 #include <linux/pm.h>
 #include <linux/bitops.h>
 
@@ -124,10 +123,6 @@ EXPORT_SYMBOL(__ioremap);
 EXPORT_SYMBOL(iounmap);
 EXPORT_SYMBOL(ioremap_bot);	/* aka VMALLOC_END */
 
-#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE)
-EXPORT_SYMBOL(ppc_ide_md);
-#endif
-
 #ifdef CONFIG_PCI
 EXPORT_SYMBOL(isa_io_base);
 EXPORT_SYMBOL(isa_mem_base);
diff --git a/arch/ppc/kernel/semaphore.c b/arch/ppc/kernel/semaphore.c
deleted file mode 100644
index 2fe429b27c14..000000000000
--- a/arch/ppc/kernel/semaphore.c
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * PowerPC-specific semaphore code.
- *
- * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * April 2001 - Reworked by Paul Mackerras <paulus@samba.org>
- * to eliminate the SMP races in the old version between the updates
- * of `count' and `waking'.  Now we use negative `count' values to
- * indicate that some process(es) are waiting for the semaphore.
- */
-
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <asm/atomic.h>
-#include <asm/semaphore.h>
-#include <asm/errno.h>
-
-/*
- * Atomically update sem->count.
- * This does the equivalent of the following:
- *
- *	old_count = sem->count;
- *	tmp = MAX(old_count, 0) + incr;
- *	sem->count = tmp;
- *	return old_count;
- */
-static inline int __sem_update_count(struct semaphore *sem, int incr)
-{
-	int old_count, tmp;
-
-	__asm__ __volatile__("\n"
-"1:	lwarx	%0,0,%3\n"
-"	srawi	%1,%0,31\n"
-"	andc	%1,%0,%1\n"
-"	add	%1,%1,%4\n"
-	PPC405_ERR77(0,%3)
-"	stwcx.	%1,0,%3\n"
-"	bne	1b"
-	: "=&r" (old_count), "=&r" (tmp), "=m" (sem->count)
-	: "r" (&sem->count), "r" (incr), "m" (sem->count)
-	: "cc");
-
-	return old_count;
-}
-
-void __up(struct semaphore *sem)
-{
-	/*
-	 * Note that we incremented count in up() before we came here,
-	 * but that was ineffective since the result was <= 0, and
-	 * any negative value of count is equivalent to 0.
-	 * This ends up setting count to 1, unless count is now > 0
-	 * (i.e. because some other cpu has called up() in the meantime),
-	 * in which case we just increment count.
-	 */
-	__sem_update_count(sem, 1);
-	wake_up(&sem->wait);
-}
-
-/*
- * Note that when we come in to __down or __down_interruptible,
- * we have already decremented count, but that decrement was
- * ineffective since the result was < 0, and any negative value
- * of count is equivalent to 0.
- * Thus it is only when we decrement count from some value > 0
- * that we have actually got the semaphore.
- */
-void __sched __down(struct semaphore *sem)
-{
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-
-	tsk->state = TASK_UNINTERRUPTIBLE;
-	add_wait_queue_exclusive(&sem->wait, &wait);
-	smp_wmb();
-
-	/*
-	 * Try to get the semaphore.  If the count is > 0, then we've
-	 * got the semaphore; we decrement count and exit the loop.
-	 * If the count is 0 or negative, we set it to -1, indicating
-	 * that we are asleep, and then sleep.
-	 */
-	while (__sem_update_count(sem, -1) <= 0) {
-		schedule();
-		tsk->state = TASK_UNINTERRUPTIBLE;
-	}
-	remove_wait_queue(&sem->wait, &wait);
-	tsk->state = TASK_RUNNING;
-
-	/*
-	 * If there are any more sleepers, wake one of them up so
-	 * that it can either get the semaphore, or set count to -1
-	 * indicating that there are still processes sleeping.
-	 */
-	wake_up(&sem->wait);
-}
-
-int __sched __down_interruptible(struct semaphore * sem)
-{
-	int retval = 0;
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-
-	tsk->state = TASK_INTERRUPTIBLE;
-	add_wait_queue_exclusive(&sem->wait, &wait);
-	smp_wmb();
-
-	while (__sem_update_count(sem, -1) <= 0) {
-		if (signal_pending(current)) {
-			/*
-			 * A signal is pending - give up trying.
-			 * Set sem->count to 0 if it is negative,
-			 * since we are no longer sleeping.
-			 */
-			__sem_update_count(sem, 0);
-			retval = -EINTR;
-			break;
-		}
-		schedule();
-		tsk->state = TASK_INTERRUPTIBLE;
-	}
-	tsk->state = TASK_RUNNING;
-	remove_wait_queue(&sem->wait, &wait);
-	wake_up(&sem->wait);
-	return retval;
-}
diff --git a/arch/ppc/kernel/setup.c b/arch/ppc/kernel/setup.c
index 294055902f0c..bfddfdee0b65 100644
--- a/arch/ppc/kernel/setup.c
+++ b/arch/ppc/kernel/setup.c
@@ -10,7 +10,6 @@
 #include <linux/reboot.h>
 #include <linux/delay.h>
 #include <linux/initrd.h>
-#include <linux/ide.h>
 #include <linux/screen_info.h>
 #include <linux/bootmem.h>
 #include <linux/seq_file.h>
@@ -57,7 +56,6 @@ extern void ppc6xx_idle(void);
 extern void power4_idle(void);
 
 extern boot_infos_t *boot_infos;
-struct ide_machdep_calls ppc_ide_md;
 
 /* Used with the BI_MEMSIZE bootinfo parameter to store the memory
    size value reported by the boot loader. */
diff --git a/arch/ppc/platforms/4xx/bamboo.c b/arch/ppc/platforms/4xx/bamboo.c
index 017623c9bc4b..01f20f4c14fe 100644
--- a/arch/ppc/platforms/4xx/bamboo.c
+++ b/arch/ppc/platforms/4xx/bamboo.c
@@ -22,7 +22,6 @@
 #include <linux/blkdev.h>
 #include <linux/console.h>
 #include <linux/delay.h>
-#include <linux/ide.h>
 #include <linux/initrd.h>
 #include <linux/seq_file.h>
 #include <linux/root_dev.h>
diff --git a/arch/ppc/platforms/4xx/ebony.c b/arch/ppc/platforms/4xx/ebony.c
index 453643a0eeea..8027a36fc5bb 100644
--- a/arch/ppc/platforms/4xx/ebony.c
+++ b/arch/ppc/platforms/4xx/ebony.c
@@ -25,7 +25,6 @@
 #include <linux/blkdev.h>
 #include <linux/console.h>
 #include <linux/delay.h>
-#include <linux/ide.h>
 #include <linux/initrd.h>
 #include <linux/seq_file.h>
 #include <linux/root_dev.h>
diff --git a/arch/ppc/platforms/4xx/luan.c b/arch/ppc/platforms/4xx/luan.c
index b79ebb8a3e6c..f6d8c2e8b6b7 100644
--- a/arch/ppc/platforms/4xx/luan.c
+++ b/arch/ppc/platforms/4xx/luan.c
@@ -23,7 +23,6 @@
 #include <linux/blkdev.h>
 #include <linux/console.h>
 #include <linux/delay.h>
-#include <linux/ide.h>
 #include <linux/initrd.h>
 #include <linux/seq_file.h>
 #include <linux/root_dev.h>
diff --git a/arch/ppc/platforms/4xx/ocotea.c b/arch/ppc/platforms/4xx/ocotea.c
index 28a712cd4800..308386ef6f77 100644
--- a/arch/ppc/platforms/4xx/ocotea.c
+++ b/arch/ppc/platforms/4xx/ocotea.c
@@ -23,7 +23,6 @@
 #include <linux/blkdev.h>
 #include <linux/console.h>
 #include <linux/delay.h>
-#include <linux/ide.h>
 #include <linux/initrd.h>
 #include <linux/seq_file.h>
 #include <linux/root_dev.h>
diff --git a/arch/ppc/platforms/4xx/taishan.c b/arch/ppc/platforms/4xx/taishan.c
index f6a0c6650f33..115694275083 100644
--- a/arch/ppc/platforms/4xx/taishan.c
+++ b/arch/ppc/platforms/4xx/taishan.c
@@ -23,7 +23,6 @@
 #include <linux/blkdev.h>
 #include <linux/console.h>
 #include <linux/delay.h>
-#include <linux/ide.h>
 #include <linux/initrd.h>
 #include <linux/seq_file.h>
 #include <linux/root_dev.h>
diff --git a/arch/ppc/platforms/4xx/yucca.c b/arch/ppc/platforms/4xx/yucca.c
index 66a44ff0d926..f6cfd44281fc 100644
--- a/arch/ppc/platforms/4xx/yucca.c
+++ b/arch/ppc/platforms/4xx/yucca.c
@@ -24,7 +24,6 @@
 #include <linux/blkdev.h>
 #include <linux/console.h>
 #include <linux/delay.h>
-#include <linux/ide.h>
 #include <linux/initrd.h>
 #include <linux/seq_file.h>
 #include <linux/root_dev.h>
diff --git a/arch/ppc/platforms/chestnut.c b/arch/ppc/platforms/chestnut.c
index dcd6070b85eb..27c140f218ed 100644
--- a/arch/ppc/platforms/chestnut.c
+++ b/arch/ppc/platforms/chestnut.c
@@ -22,7 +22,6 @@
 #include <linux/initrd.h>
 #include <linux/delay.h>
 #include <linux/seq_file.h>
-#include <linux/ide.h>
 #include <linux/serial.h>
 #include <linux/serial_core.h>
 #include <linux/serial_8250.h>
diff --git a/arch/ppc/platforms/cpci690.c b/arch/ppc/platforms/cpci690.c
index e78bccf96c9d..07f672d58767 100644
--- a/arch/ppc/platforms/cpci690.c
+++ b/arch/ppc/platforms/cpci690.c
@@ -10,7 +10,6 @@
  */
 #include <linux/delay.h>
 #include <linux/pci.h>
-#include <linux/ide.h>
 #include <linux/irq.h>
 #include <linux/fs.h>
 #include <linux/seq_file.h>
diff --git a/arch/ppc/platforms/ev64260.c b/arch/ppc/platforms/ev64260.c
index c1f77e1d368e..f522b31c46d7 100644
--- a/arch/ppc/platforms/ev64260.c
+++ b/arch/ppc/platforms/ev64260.c
@@ -23,7 +23,6 @@
 
 #include <linux/delay.h>
 #include <linux/pci.h>
-#include <linux/ide.h>
 #include <linux/irq.h>
 #include <linux/fs.h>
 #include <linux/seq_file.h>
diff --git a/arch/ppc/platforms/hdpu.c b/arch/ppc/platforms/hdpu.c
index ca5de13712fd..904b518c152e 100644
--- a/arch/ppc/platforms/hdpu.c
+++ b/arch/ppc/platforms/hdpu.c
@@ -16,7 +16,6 @@
 #include <linux/pci.h>
 #include <linux/delay.h>
 #include <linux/irq.h>
-#include <linux/ide.h>
 #include <linux/seq_file.h>
 #include <linux/platform_device.h>
 
@@ -604,41 +603,6 @@ static void parse_bootinfo(unsigned long r3,
 	}
 }
 
-#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE)
-static void
-hdpu_ide_request_region(ide_ioreg_t from, unsigned int extent, const char *name)
-{
-	request_region(from, extent, name);
-	return;
-}
-
-static void hdpu_ide_release_region(ide_ioreg_t from, unsigned int extent)
-{
-	release_region(from, extent);
-	return;
-}
-
-static void __init
-hdpu_ide_pci_init_hwif_ports(hw_regs_t * hw, ide_ioreg_t data_port,
-			     ide_ioreg_t ctrl_port, int *irq)
-{
-	struct pci_dev *dev;
-
-	pci_for_each_dev(dev) {
-		if (((dev->class >> 8) == PCI_CLASS_STORAGE_IDE) ||
-		    ((dev->class >> 8) == PCI_CLASS_STORAGE_RAID)) {
-			hw->irq = dev->irq;
-
-			if (irq != NULL) {
-				*irq = dev->irq;
-			}
-		}
-	}
-
-	return;
-}
-#endif
-
 void hdpu_heartbeat(void)
 {
 	if (mv64x60_read(&bh, MV64x60_GPP_VALUE) & (1 << 5))
diff --git a/arch/ppc/platforms/lopec.c b/arch/ppc/platforms/lopec.c
index b947c774f524..1e3aa6e9b6c7 100644
--- a/arch/ppc/platforms/lopec.c
+++ b/arch/ppc/platforms/lopec.c
@@ -15,7 +15,6 @@
 #include <linux/pci_ids.h>
 #include <linux/ioport.h>
 #include <linux/init.h>
-#include <linux/ide.h>
 #include <linux/seq_file.h>
 #include <linux/initrd.h>
 #include <linux/console.h>
@@ -168,85 +167,6 @@ lopec_power_off(void)
 	lopec_halt();
 }
 
-#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE)
-int lopec_ide_ports_known = 0;
-static unsigned long lopec_ide_regbase[MAX_HWIFS];
-static unsigned long lopec_ide_ctl_regbase[MAX_HWIFS];
-static unsigned long lopec_idedma_regbase;
-
-static void
-lopec_ide_probe(void)
-{
-	struct pci_dev *dev = pci_get_device(PCI_VENDOR_ID_WINBOND,
-					      PCI_DEVICE_ID_WINBOND_82C105,
-					      NULL);
-	lopec_ide_ports_known = 1;
-
-	if (dev) {
-		lopec_ide_regbase[0] = dev->resource[0].start;
-		lopec_ide_regbase[1] = dev->resource[2].start;
-		lopec_ide_ctl_regbase[0] = dev->resource[1].start;
-		lopec_ide_ctl_regbase[1] = dev->resource[3].start;
-		lopec_idedma_regbase = dev->resource[4].start;
-		pci_dev_put(dev);
-	}
-}
-
-static int
-lopec_ide_default_irq(unsigned long base)
-{
-	if (lopec_ide_ports_known == 0)
-		lopec_ide_probe();
-
-	if (base == lopec_ide_regbase[0])
-		return 14;
-	else if (base == lopec_ide_regbase[1])
-		return 15;
-	else
-		return 0;
-}
-
-static unsigned long
-lopec_ide_default_io_base(int index)
-{
-	if (lopec_ide_ports_known == 0)
-		lopec_ide_probe();
-	return lopec_ide_regbase[index];
-}
-
-static void __init
-lopec_ide_init_hwif_ports(hw_regs_t *hw, unsigned long data,
-			  unsigned long ctl, int *irq)
-{
-	unsigned long reg = data;
-	uint alt_status_base;
-	int i;
-
-	for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++)
-		hw->io_ports[i] = reg++;
-
-	if (data == lopec_ide_regbase[0]) {
-		alt_status_base = lopec_ide_ctl_regbase[0] + 2;
-		hw->irq = 14;
-	} else if (data == lopec_ide_regbase[1]) {
-		alt_status_base = lopec_ide_ctl_regbase[1] + 2;
-		hw->irq = 15;
-	} else {
-		alt_status_base = 0;
-		hw->irq = 0;
-	}
-
-	if (ctl)
-		hw->io_ports[IDE_CONTROL_OFFSET] = ctl;
-	else
-		hw->io_ports[IDE_CONTROL_OFFSET] = alt_status_base;
-
-	if (irq != NULL)
-		*irq = hw->irq;
-
-}
-#endif /* BLK_DEV_IDE */
-
 static void __init
 lopec_init_IRQ(void)
 {
@@ -384,11 +304,6 @@ platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
 	ppc_md.nvram_read_val = todc_direct_read_val;
 	ppc_md.nvram_write_val = todc_direct_write_val;
 
-#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE)
-	ppc_ide_md.default_irq = lopec_ide_default_irq;
-	ppc_ide_md.default_io_base = lopec_ide_default_io_base;
-	ppc_ide_md.ide_init_hwif = lopec_ide_init_hwif_ports;
-#endif
 #ifdef CONFIG_SERIAL_TEXT_DEBUG
 	ppc_md.progress = gen550_progress;
 #endif
diff --git a/arch/ppc/platforms/mvme5100.c b/arch/ppc/platforms/mvme5100.c
index bb8d4a45437a..053b54ac88f2 100644
--- a/arch/ppc/platforms/mvme5100.c
+++ b/arch/ppc/platforms/mvme5100.c
@@ -17,7 +17,6 @@
 #include <linux/initrd.h>
 #include <linux/console.h>
 #include <linux/delay.h>
-#include <linux/ide.h>
 #include <linux/seq_file.h>
 #include <linux/kdev_t.h>
 #include <linux/root_dev.h>
diff --git a/arch/ppc/platforms/powerpmc250.c b/arch/ppc/platforms/powerpmc250.c
index 4d46650e07fd..162dc85ff7be 100644
--- a/arch/ppc/platforms/powerpmc250.c
+++ b/arch/ppc/platforms/powerpmc250.c
@@ -25,7 +25,6 @@
 #include <linux/delay.h>
 #include <linux/slab.h>
 #include <linux/seq_file.h>
-#include <linux/ide.h>
 #include <linux/root_dev.h>
 
 #include <asm/byteorder.h>
diff --git a/arch/ppc/platforms/pplus.c b/arch/ppc/platforms/pplus.c
index 8a1788c48155..cbcac85c7a78 100644
--- a/arch/ppc/platforms/pplus.c
+++ b/arch/ppc/platforms/pplus.c
@@ -19,7 +19,6 @@
 #include <linux/ioport.h>
 #include <linux/console.h>
 #include <linux/pci.h>
-#include <linux/ide.h>
 #include <linux/seq_file.h>
 #include <linux/root_dev.h>
 
@@ -668,57 +667,6 @@ static void __init pplus_init_IRQ(void)
 		ppc_md.progress("init_irq: exit", 0);
 }
 
-#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE)
-/*
- * IDE stuff.
- */
-static int pplus_ide_default_irq(unsigned long base)
-{
-	switch (base) {
-	case 0x1f0:
-		return 14;
-	case 0x170:
-		return 15;
-	default:
-		return 0;
-	}
-}
-
-static unsigned long pplus_ide_default_io_base(int index)
-{
-	switch (index) {
-	case 0:
-		return 0x1f0;
-	case 1:
-		return 0x170;
-	default:
-		return 0;
-	}
-}
-
-static void __init
-pplus_ide_init_hwif_ports(hw_regs_t * hw, unsigned long data_port,
-			  unsigned long ctrl_port, int *irq)
-{
-	unsigned long reg = data_port;
-	int i;
-
-	for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) {
-		hw->io_ports[i] = reg;
-		reg += 1;
-	}
-
-	if (ctrl_port)
-		hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port;
-	else
-		hw->io_ports[IDE_CONTROL_OFFSET] =
-		    hw->io_ports[IDE_DATA_OFFSET] + 0x206;
-
-	if (irq != NULL)
-		*irq = pplus_ide_default_irq(data_port);
-}
-#endif
-
 #ifdef CONFIG_SMP
 /* PowerPlus (MTX) support */
 static int __init smp_pplus_probe(void)
@@ -884,12 +832,6 @@ platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
 	ppc_md.find_end_of_memory = pplus_find_end_of_memory;
 	ppc_md.setup_io_mappings = pplus_map_io;
 
-#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE)
-	ppc_ide_md.default_irq = pplus_ide_default_irq;
-	ppc_ide_md.default_io_base = pplus_ide_default_io_base;
-	ppc_ide_md.ide_init_hwif = pplus_ide_init_hwif_ports;
-#endif
-
 #ifdef CONFIG_SERIAL_TEXT_DEBUG
 	ppc_md.progress = gen550_progress;
 #endif				/* CONFIG_SERIAL_TEXT_DEBUG */
diff --git a/arch/ppc/platforms/prep_setup.c b/arch/ppc/platforms/prep_setup.c
index 38449855d5ff..465b658c927d 100644
--- a/arch/ppc/platforms/prep_setup.c
+++ b/arch/ppc/platforms/prep_setup.c
@@ -33,7 +33,6 @@
 #include <linux/console.h>
 #include <linux/timex.h>
 #include <linux/pci.h>
-#include <linux/ide.h>
 #include <linux/seq_file.h>
 #include <linux/root_dev.h>
 
@@ -894,38 +893,6 @@ prep_init_IRQ(void)
 		i8259_init(MPC10X_MAPA_PCI_INTACK_ADDR, 0);
 }
 
-#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE)
-/*
- * IDE stuff.
- */
-static int
-prep_ide_default_irq(unsigned long base)
-{
-	switch (base) {
-		case 0x1f0: return 13;
-		case 0x170: return 13;
-		case 0x1e8: return 11;
-		case 0x168: return 10;
-		case 0xfff0: return 14;		/* MCP(N)750 ide0 */
-		case 0xffe0: return 15;		/* MCP(N)750 ide1 */
-		default: return 0;
-	}
-}
-
-static unsigned long
-prep_ide_default_io_base(int index)
-{
-	switch (index) {
-		case 0: return 0x1f0;
-		case 1: return 0x170;
-		case 2: return 0x1e8;
-		case 3: return 0x168;
-		default:
-			return 0;
-	}
-}
-#endif
-
 #ifdef CONFIG_SMP
 /* PReP (MTX) support */
 static int __init
@@ -1070,11 +1037,6 @@ prep_init(unsigned long r3, unsigned long r4, unsigned long r5,
 
 	ppc_md.setup_io_mappings = prep_map_io;
 
-#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE)
-	ppc_ide_md.default_irq = prep_ide_default_irq;
-	ppc_ide_md.default_io_base = prep_ide_default_io_base;
-#endif
-
 #ifdef CONFIG_SMP
 	smp_ops			 = &prep_smp_ops;
 #endif /* CONFIG_SMP */
diff --git a/arch/ppc/platforms/prpmc750.c b/arch/ppc/platforms/prpmc750.c
index fcab513e206d..93bd593cf957 100644
--- a/arch/ppc/platforms/prpmc750.c
+++ b/arch/ppc/platforms/prpmc750.c
@@ -22,7 +22,6 @@
 #include <linux/console.h>
 #include <linux/delay.h>
 #include <linux/seq_file.h>
-#include <linux/ide.h>
 #include <linux/root_dev.h>
 #include <linux/slab.h>
 #include <linux/serial_reg.h>
diff --git a/arch/ppc/platforms/prpmc800.c b/arch/ppc/platforms/prpmc800.c
index f4ade5cd7a88..5bcda7f92cd0 100644
--- a/arch/ppc/platforms/prpmc800.c
+++ b/arch/ppc/platforms/prpmc800.c
@@ -20,7 +20,6 @@
 #include <linux/console.h>
 #include <linux/delay.h>
 #include <linux/seq_file.h>
-#include <linux/ide.h>
 #include <linux/root_dev.h>
 #include <linux/harrier_defs.h>
 
diff --git a/arch/ppc/platforms/radstone_ppc7d.c b/arch/ppc/platforms/radstone_ppc7d.c
index 44d4398a36ff..179b4a99b5b5 100644
--- a/arch/ppc/platforms/radstone_ppc7d.c
+++ b/arch/ppc/platforms/radstone_ppc7d.c
@@ -29,7 +29,6 @@
 #include <linux/initrd.h>
 #include <linux/console.h>
 #include <linux/delay.h>
-#include <linux/ide.h>
 #include <linux/seq_file.h>
 #include <linux/root_dev.h>
 #include <linux/serial.h>
diff --git a/arch/ppc/platforms/residual.c b/arch/ppc/platforms/residual.c
index c9911601cfdf..18495e754e30 100644
--- a/arch/ppc/platforms/residual.c
+++ b/arch/ppc/platforms/residual.c
@@ -38,7 +38,6 @@
 #include <linux/init.h>
 #include <linux/ioport.h>
 #include <linux/pci.h>
-#include <linux/ide.h>
 
 #include <asm/sections.h>
 #include <asm/mmu.h>
diff --git a/arch/ppc/platforms/sandpoint.c b/arch/ppc/platforms/sandpoint.c
index 3352fae1c722..b4897bdb742a 100644
--- a/arch/ppc/platforms/sandpoint.c
+++ b/arch/ppc/platforms/sandpoint.c
@@ -71,7 +71,6 @@
 #include <linux/initrd.h>
 #include <linux/console.h>
 #include <linux/delay.h>
-#include <linux/ide.h>
 #include <linux/seq_file.h>
 #include <linux/root_dev.h>
 #include <linux/serial.h>
@@ -559,93 +558,6 @@ sandpoint_show_cpuinfo(struct seq_file *m)
 	return 0;
 }
 
-#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE)
-/*
- * IDE support.
- */
-static int		sandpoint_ide_ports_known = 0;
-static unsigned long	sandpoint_ide_regbase[MAX_HWIFS];
-static unsigned long	sandpoint_ide_ctl_regbase[MAX_HWIFS];
-static unsigned long	sandpoint_idedma_regbase;
-
-static void
-sandpoint_ide_probe(void)
-{
-	struct pci_dev *pdev = pci_get_device(PCI_VENDOR_ID_WINBOND,
-			PCI_DEVICE_ID_WINBOND_82C105, NULL);
-
-	if (pdev) {
-		sandpoint_ide_regbase[0]=pdev->resource[0].start;
-		sandpoint_ide_regbase[1]=pdev->resource[2].start;
-		sandpoint_ide_ctl_regbase[0]=pdev->resource[1].start;
-		sandpoint_ide_ctl_regbase[1]=pdev->resource[3].start;
-		sandpoint_idedma_regbase=pdev->resource[4].start;
-		pci_dev_put(pdev);
-	}
-
-	sandpoint_ide_ports_known = 1;
-}
-
-static int
-sandpoint_ide_default_irq(unsigned long base)
-{
-	if (sandpoint_ide_ports_known == 0)
-		sandpoint_ide_probe();
-
-	if (base == sandpoint_ide_regbase[0])
-		return SANDPOINT_IDE_INT0;
-	else if (base == sandpoint_ide_regbase[1])
-		return SANDPOINT_IDE_INT1;
-	else
-		return 0;
-}
-
-static unsigned long
-sandpoint_ide_default_io_base(int index)
-{
-	if (sandpoint_ide_ports_known == 0)
-		sandpoint_ide_probe();
-
-	return sandpoint_ide_regbase[index];
-}
-
-static void __init
-sandpoint_ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port,
-		unsigned long ctrl_port, int *irq)
-{
-	unsigned long reg = data_port;
-	uint	alt_status_base;
-	int	i;
-
-	for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; i++) {
-		hw->io_ports[i] = reg++;
-	}
-
-	if (data_port == sandpoint_ide_regbase[0]) {
-		alt_status_base = sandpoint_ide_ctl_regbase[0] + 2;
-		hw->irq = 14;
-	}
-	else if (data_port == sandpoint_ide_regbase[1]) {
-		alt_status_base = sandpoint_ide_ctl_regbase[1] + 2;
-		hw->irq = 15;
-	}
-	else {
-		alt_status_base = 0;
-		hw->irq = 0;
-	}
-
-	if (ctrl_port) {
-		hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port;
-	} else {
-		hw->io_ports[IDE_CONTROL_OFFSET] = alt_status_base;
-	}
-
-	if (irq != NULL) {
-		*irq = hw->irq;
-	}
-}
-#endif
-
 /*
  * Set BAT 3 to map 0xf8000000 to end of physical memory space 1-to-1.
  */
@@ -736,10 +648,4 @@ platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
 #ifdef CONFIG_SERIAL_TEXT_DEBUG
 	ppc_md.progress = gen550_progress;
 #endif
-
-#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE)
-	ppc_ide_md.default_irq = sandpoint_ide_default_irq;
-	ppc_ide_md.default_io_base = sandpoint_ide_default_io_base;
-	ppc_ide_md.ide_init_hwif = sandpoint_ide_init_hwif_ports;
-#endif
 }
diff --git a/arch/ppc/platforms/sandpoint.h b/arch/ppc/platforms/sandpoint.h
index 3b64e6418489..ed83759e4044 100644
--- a/arch/ppc/platforms/sandpoint.h
+++ b/arch/ppc/platforms/sandpoint.h
@@ -28,9 +28,6 @@
  */
 #define SANDPOINT_IDE_INT0		23	/* EPIC 7 */
 #define SANDPOINT_IDE_INT1		24	/* EPIC 8 */
-#else
-#define SANDPOINT_IDE_INT0		14	/* 8259 Test */
-#define SANDPOINT_IDE_INT1		15	/* 8259 Test */
 #endif
 
 /*
diff --git a/arch/ppc/platforms/spruce.c b/arch/ppc/platforms/spruce.c
index f4de50ba292e..a344134f14b8 100644
--- a/arch/ppc/platforms/spruce.c
+++ b/arch/ppc/platforms/spruce.c
@@ -22,7 +22,6 @@
 #include <linux/console.h>
 #include <linux/delay.h>
 #include <linux/seq_file.h>
-#include <linux/ide.h>
 #include <linux/root_dev.h>
 #include <linux/serial.h>
 #include <linux/tty.h>
diff --git a/arch/ppc/syslib/m8xx_setup.c b/arch/ppc/syslib/m8xx_setup.c
index 9caf850c9b38..19749e9bcf91 100644
--- a/arch/ppc/syslib/m8xx_setup.c
+++ b/arch/ppc/syslib/m8xx_setup.c
@@ -87,8 +87,6 @@ void m8xx_calibrate_decr(void);
 
 unsigned char __res[sizeof(bd_t)];
 
-extern void m8xx_ide_init(void);
-
 extern unsigned long find_available_memory(void);
 extern void m8xx_cpm_reset(void);
 extern void m8xx_wdt_handler_install(bd_t *bp);
@@ -474,8 +472,4 @@ platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
 
 	ppc_md.find_end_of_memory	= m8xx_find_end_of_memory;
 	ppc_md.setup_io_mappings	= m8xx_map_io;
-
-#if defined(CONFIG_BLK_DEV_MPC8xx_IDE)
-	m8xx_ide_init();
-#endif
 }
diff --git a/arch/ppc/syslib/ppc4xx_setup.c b/arch/ppc/syslib/ppc4xx_setup.c
index debe14c083a1..353d746b47e1 100644
--- a/arch/ppc/syslib/ppc4xx_setup.c
+++ b/arch/ppc/syslib/ppc4xx_setup.c
@@ -24,7 +24,6 @@
 #include <linux/pci.h>
 #include <linux/rtc.h>
 #include <linux/console.h>
-#include <linux/ide.h>
 #include <linux/serial_reg.h>
 #include <linux/seq_file.h>
 
@@ -189,24 +188,6 @@ ppc4xx_calibrate_decr(void)
 	mtspr(SPRN_PIT, tb_ticks_per_jiffy);
 }
 
-/*
- * IDE stuff.
- * should be generic for every IDE PCI chipset
- */
-#if defined(CONFIG_PCI) && defined(CONFIG_IDE)
-static void
-ppc4xx_ide_init_hwif_ports(hw_regs_t * hw, unsigned long data_port,
-			   unsigned long ctrl_port, int *irq)
-{
-	int i;
-
-	for (i = IDE_DATA_OFFSET; i <= IDE_STATUS_OFFSET; ++i)
-		hw->io_ports[i] = data_port + i - IDE_DATA_OFFSET;
-
-	hw->io_ports[IDE_CONTROL_OFFSET] = ctrl_port;
-}
-#endif /* defined(CONFIG_PCI) && defined(CONFIG_IDE) */
-
 TODC_ALLOC();
 
 /*
@@ -271,10 +252,6 @@ ppc4xx_init(unsigned long r3, unsigned long r4, unsigned long r5,
 #ifdef CONFIG_SERIAL_TEXT_DEBUG
 	ppc_md.progress = gen550_progress;
 #endif
-
-#if defined(CONFIG_PCI) && defined(CONFIG_IDE)
-	ppc_ide_md.ide_init_hwif = ppc4xx_ide_init_hwif_ports;
-#endif /* defined(CONFIG_PCI) && defined(CONFIG_IDE) */
 }
 
 /* Called from machine_check_exception */
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 1831833c430e..f6a68e178fc5 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -3,6 +3,10 @@
 # see Documentation/kbuild/kconfig-language.txt.
 #
 
+config SCHED_MC
+	def_bool y
+	depends on SMP
+
 config MMU
 	def_bool y
 
@@ -39,6 +43,9 @@ config GENERIC_HWEIGHT
 config GENERIC_TIME
 	def_bool y
 
+config GENERIC_CLOCKEVENTS
+	def_bool y
+
 config GENERIC_BUG
 	bool
 	depends on BUG
@@ -69,6 +76,8 @@ menu "Base setup"
 
 comment "Processor type and features"
 
+source "kernel/time/Kconfig"
+
 config 64BIT
 	bool "64 bit kernel"
 	help
@@ -301,10 +310,7 @@ config QDIO
 	tristate "QDIO support"
 	---help---
 	  This driver provides the Queued Direct I/O base support for
-	  IBM mainframes.
-
-	  For details please refer to the documentation provided by IBM at
-	  <http://www10.software.ibm.com/developerworks/opensource/linux390>
+	  IBM System z.
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called qdio.
@@ -486,25 +492,6 @@ config APPLDATA_NET_SUM
 
 source kernel/Kconfig.hz
 
-config NO_IDLE_HZ
-	bool "No HZ timer ticks in idle"
-	help
-	  Switches the regular HZ timer off when the system is going idle.
-	  This helps z/VM to detect that the Linux system is idle. VM can
-	  then "swap-out" this guest which reduces memory usage. It also
-	  reduces the overhead of idle systems.
-
-	  The HZ timer can be switched on/off via /proc/sys/kernel/hz_timer.
-	  hz_timer=0 means HZ timer is disabled. hz_timer=1 means HZ
-	  timer is active.
-
-config NO_IDLE_HZ_INIT
-	bool "HZ timer in idle off by default"
-	depends on NO_IDLE_HZ
-	help
-	  The HZ timer is switched off in idle by default. That means the
-	  HZ timer is already disabled at boot time.
-
 config S390_HYPFS_FS
 	bool "s390 hypervisor file system support"
 	select SYS_HYPERVISOR
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index a3f67f8b5427..e33f32b54c08 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -499,7 +499,7 @@ static struct crypto_alg cbc_aes_alg = {
 	}
 };
 
-static int __init aes_init(void)
+static int __init aes_s390_init(void)
 {
 	int ret;
 
@@ -542,15 +542,15 @@ aes_err:
 	goto out;
 }
 
-static void __exit aes_fini(void)
+static void __exit aes_s390_fini(void)
 {
 	crypto_unregister_alg(&cbc_aes_alg);
 	crypto_unregister_alg(&ecb_aes_alg);
 	crypto_unregister_alg(&aes_alg);
 }
 
-module_init(aes_init);
-module_exit(aes_fini);
+module_init(aes_s390_init);
+module_exit(aes_s390_fini);
 
 MODULE_ALIAS("aes");
 
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index ea22707f435f..4aba83b31596 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -550,7 +550,7 @@ static struct crypto_alg cbc_des3_192_alg = {
 	}
 };
 
-static int init(void)
+static int des_s390_init(void)
 {
 	int ret = 0;
 
@@ -612,7 +612,7 @@ des_err:
 	goto out;
 }
 
-static void __exit fini(void)
+static void __exit des_s390_fini(void)
 {
 	crypto_unregister_alg(&cbc_des3_192_alg);
 	crypto_unregister_alg(&ecb_des3_192_alg);
@@ -625,8 +625,8 @@ static void __exit fini(void)
 	crypto_unregister_alg(&des_alg);
 }
 
-module_init(init);
-module_exit(fini);
+module_init(des_s390_init);
+module_exit(des_s390_fini);
 
 MODULE_ALIAS("des");
 MODULE_ALIAS("des3_ede");
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
index 5a834f6578ab..9cf9eca22747 100644
--- a/arch/s390/crypto/sha1_s390.c
+++ b/arch/s390/crypto/sha1_s390.c
@@ -137,7 +137,7 @@ static struct crypto_alg alg = {
 	.dia_final	=	sha1_final } }
 };
 
-static int __init init(void)
+static int __init sha1_s390_init(void)
 {
 	if (!crypt_s390_func_available(KIMD_SHA_1))
 		return -EOPNOTSUPP;
@@ -145,13 +145,13 @@ static int __init init(void)
 	return crypto_register_alg(&alg);
 }
 
-static void __exit fini(void)
+static void __exit sha1_s390_fini(void)
 {
 	crypto_unregister_alg(&alg);
 }
 
-module_init(init);
-module_exit(fini);
+module_init(sha1_s390_init);
+module_exit(sha1_s390_fini);
 
 MODULE_ALIAS("sha1");
 
diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c
index ccf8633c4f65..2a3d756b35d4 100644
--- a/arch/s390/crypto/sha256_s390.c
+++ b/arch/s390/crypto/sha256_s390.c
@@ -133,7 +133,7 @@ static struct crypto_alg alg = {
 	.dia_final	=	sha256_final } }
 };
 
-static int init(void)
+static int sha256_s390_init(void)
 {
 	if (!crypt_s390_func_available(KIMD_SHA_256))
 		return -EOPNOTSUPP;
@@ -141,13 +141,13 @@ static int init(void)
 	return crypto_register_alg(&alg);
 }
 
-static void __exit fini(void)
+static void __exit sha256_s390_fini(void)
 {
 	crypto_unregister_alg(&alg);
 }
 
-module_init(init);
-module_exit(fini);
+module_init(sha256_s390_init);
+module_exit(sha256_s390_fini);
 
 MODULE_ALIAS("sha256");
 
diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index cb93bf20bd75..a72f208e62d0 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -3,6 +3,7 @@
 # Linux kernel version: 2.6.25-rc4
 # Wed Mar  5 11:22:59 2008
 #
+CONFIG_SCHED_MC=y
 CONFIG_MMU=y
 CONFIG_ZONE_DMA=y
 CONFIG_LOCKDEP_SUPPORT=y
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 4d3e38392cb1..77051cd27925 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -11,7 +11,7 @@ CFLAGS_smp.o	:= -Wno-nonnull
 
 obj-y	:=  bitmap.o traps.o time.o process.o base.o early.o \
             setup.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o \
-	    semaphore.o s390_ext.o debug.o irq.o ipl.o dis.o diag.o
+	    s390_ext.o debug.o irq.o ipl.o dis.o diag.o
 
 obj-y	+= $(if $(CONFIG_64BIT),entry64.o,entry.o)
 obj-y	+= $(if $(CONFIG_64BIT),reipl64.o,reipl.o)
@@ -19,7 +19,7 @@ obj-y	+= $(if $(CONFIG_64BIT),reipl64.o,reipl.o)
 extra-y				+= head.o init_task.o vmlinux.lds
 
 obj-$(CONFIG_MODULES)		+= s390_ksyms.o module.o
-obj-$(CONFIG_SMP)		+= smp.o
+obj-$(CONFIG_SMP)		+= smp.o topology.o
 
 obj-$(CONFIG_AUDIT)		+= audit.o
 compat-obj-$(CONFIG_AUDIT)	+= compat_audit.o
diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h
index e89f8c0c42a0..20723a062017 100644
--- a/arch/s390/kernel/compat_linux.h
+++ b/arch/s390/kernel/compat_linux.h
@@ -162,4 +162,77 @@ struct ucontext32 {
 	compat_sigset_t		uc_sigmask;	/* mask last for extensibility */
 };
 
+struct __sysctl_args32;
+struct stat64_emu31;
+struct mmap_arg_struct_emu31;
+struct fadvise64_64_args;
+struct old_sigaction32;
+struct old_sigaction32;
+
+long sys32_chown16(const char __user * filename, u16 user, u16 group);
+long sys32_lchown16(const char __user * filename, u16 user, u16 group);
+long sys32_fchown16(unsigned int fd, u16 user, u16 group);
+long sys32_setregid16(u16 rgid, u16 egid);
+long sys32_setgid16(u16 gid);
+long sys32_setreuid16(u16 ruid, u16 euid);
+long sys32_setuid16(u16 uid);
+long sys32_setresuid16(u16 ruid, u16 euid, u16 suid);
+long sys32_getresuid16(u16 __user *ruid, u16 __user *euid, u16 __user *suid);
+long sys32_setresgid16(u16 rgid, u16 egid, u16 sgid);
+long sys32_getresgid16(u16 __user *rgid, u16 __user *egid, u16 __user *sgid);
+long sys32_setfsuid16(u16 uid);
+long sys32_setfsgid16(u16 gid);
+long sys32_getgroups16(int gidsetsize, u16 __user *grouplist);
+long sys32_setgroups16(int gidsetsize, u16 __user *grouplist);
+long sys32_getuid16(void);
+long sys32_geteuid16(void);
+long sys32_getgid16(void);
+long sys32_getegid16(void);
+long sys32_ipc(u32 call, int first, int second, int third, u32 ptr);
+long sys32_truncate64(const char __user * path, unsigned long high,
+		      unsigned long low);
+long sys32_ftruncate64(unsigned int fd, unsigned long high, unsigned long low);
+long sys32_sched_rr_get_interval(compat_pid_t pid,
+				 struct compat_timespec __user *interval);
+long sys32_rt_sigprocmask(int how, compat_sigset_t __user *set,
+			  compat_sigset_t __user *oset, size_t sigsetsize);
+long sys32_rt_sigpending(compat_sigset_t __user *set, size_t sigsetsize);
+long sys32_rt_sigqueueinfo(int pid, int sig, compat_siginfo_t __user *uinfo);
+long sys32_execve(void);
+long sys32_init_module(void __user *umod, unsigned long len,
+		       const char __user *uargs);
+long sys32_delete_module(const char __user *name_user, unsigned int flags);
+long sys32_gettimeofday(struct compat_timeval __user *tv,
+			struct timezone __user *tz);
+long sys32_settimeofday(struct compat_timeval __user *tv,
+			struct timezone __user *tz);
+long sys32_pause(void);
+long sys32_pread64(unsigned int fd, char __user *ubuf, size_t count,
+		   u32 poshi, u32 poslo);
+long sys32_pwrite64(unsigned int fd, const char __user *ubuf,
+		    size_t count, u32 poshi, u32 poslo);
+compat_ssize_t sys32_readahead(int fd, u32 offhi, u32 offlo, s32 count);
+long sys32_sendfile(int out_fd, int in_fd, compat_off_t __user *offset,
+		    size_t count);
+long sys32_sendfile64(int out_fd, int in_fd, compat_loff_t __user *offset,
+		      s32 count);
+long sys32_sysctl(struct __sysctl_args32 __user *args);
+long sys32_stat64(char __user * filename, struct stat64_emu31 __user * statbuf);
+long sys32_lstat64(char __user * filename,
+		   struct stat64_emu31 __user * statbuf);
+long sys32_fstat64(unsigned long fd, struct stat64_emu31 __user * statbuf);
+long sys32_fstatat64(unsigned int dfd, char __user *filename,
+		     struct stat64_emu31 __user* statbuf, int flag);
+unsigned long old32_mmap(struct mmap_arg_struct_emu31 __user *arg);
+long sys32_mmap2(struct mmap_arg_struct_emu31 __user *arg);
+long sys32_read(unsigned int fd, char __user * buf, size_t count);
+long sys32_write(unsigned int fd, char __user * buf, size_t count);
+long sys32_clone(void);
+long sys32_fadvise64(int fd, loff_t offset, size_t len, int advise);
+long sys32_fadvise64_64(struct fadvise64_64_args __user *args);
+long sys32_sigaction(int sig, const struct old_sigaction32 __user *act,
+		     struct old_sigaction32 __user *oact);
+long sys32_rt_sigaction(int sig, const struct sigaction32 __user *act,
+			struct sigaction32 __user *oact, size_t sigsetsize);
+long sys32_sigaltstack(const stack_t32 __user *uss, stack_t32 __user *uoss);
 #endif /* _ASM_S390X_S390_H */
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index a5692c460bad..c7f02e777af2 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -29,6 +29,7 @@
 #include <asm/lowcore.h>
 #include "compat_linux.h"
 #include "compat_ptrace.h"
+#include "entry.h"
 
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
 
@@ -428,6 +429,10 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
 	/* Default to using normal stack */
 	sp = (unsigned long) A(regs->gprs[15]);
 
+	/* Overflow on alternate signal stack gives SIGSEGV. */
+	if (on_sig_stack(sp) && !on_sig_stack((sp - frame_size) & -8UL))
+		return (void __user *) -1UL;
+
 	/* This is the X/Open sanctioned signal stack switching.  */
 	if (ka->sa.sa_flags & SA_ONSTACK) {
 		if (! sas_ss_flags(sp))
@@ -461,6 +466,9 @@ static int setup_frame32(int sig, struct k_sigaction *ka,
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(sigframe32)))
 		goto give_sigsegv;
 
+	if (frame == (void __user *) -1UL)
+		goto give_sigsegv;
+
 	if (__copy_to_user(&frame->sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE32))
 		goto give_sigsegv;
 
@@ -514,6 +522,9 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info,
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(rt_sigframe32)))
 		goto give_sigsegv;
 
+	if (frame == (void __user *) -1UL)
+		goto give_sigsegv;
+
 	if (copy_siginfo_to_user32(&frame->info, info))
 		goto give_sigsegv;
 
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 1b2f5ce45320..1e7d4ac7068b 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -73,7 +73,7 @@ static ssize_t debug_input(struct file *file, const char __user *user_buf,
 static int debug_open(struct inode *inode, struct file *file);
 static int debug_close(struct inode *inode, struct file *file);
 static debug_info_t*  debug_info_create(char *name, int pages_per_area,
-			int nr_areas, int buf_size);
+			int nr_areas, int buf_size, mode_t mode);
 static void debug_info_get(debug_info_t *);
 static void debug_info_put(debug_info_t *);
 static int debug_prolog_level_fn(debug_info_t * id,
@@ -157,7 +157,7 @@ struct debug_view debug_sprintf_view = {
 };
 
 /* used by dump analysis tools to determine version of debug feature */
-unsigned int debug_feature_version = __DEBUG_FEATURE_VERSION;
+static unsigned int __used debug_feature_version = __DEBUG_FEATURE_VERSION;
 
 /* static globals */
 
@@ -327,7 +327,8 @@ debug_info_free(debug_info_t* db_info){
  */
 
 static debug_info_t*
-debug_info_create(char *name, int pages_per_area, int nr_areas, int buf_size)
+debug_info_create(char *name, int pages_per_area, int nr_areas, int buf_size,
+		  mode_t mode)
 {
 	debug_info_t* rc;
 
@@ -336,6 +337,8 @@ debug_info_create(char *name, int pages_per_area, int nr_areas, int buf_size)
         if(!rc) 
 		goto out;
 
+	rc->mode = mode & ~S_IFMT;
+
 	/* create root directory */
         rc->debugfs_root_entry = debugfs_create_dir(rc->name,
 					debug_debugfs_root_entry);
@@ -676,23 +679,30 @@ debug_close(struct inode *inode, struct file *file)
 }
 
 /*
- * debug_register:
- * - creates and initializes debug area for the caller
- * - returns handle for debug area
+ * debug_register_mode:
+ * - Creates and initializes debug area for the caller
+ *   The mode parameter allows to specify access rights for the s390dbf files
+ * - Returns handle for debug area
  */
 
-debug_info_t*
-debug_register (char *name, int pages_per_area, int nr_areas, int buf_size)
+debug_info_t *debug_register_mode(char *name, int pages_per_area, int nr_areas,
+				  int buf_size, mode_t mode, uid_t uid,
+				  gid_t gid)
 {
 	debug_info_t *rc = NULL;
 
+	/* Since debugfs currently does not support uid/gid other than root, */
+	/* we do not allow gid/uid != 0 until we get support for that. */
+	if ((uid != 0) || (gid != 0))
+		printk(KERN_WARNING "debug: Warning - Currently only uid/gid "
+		       "= 0 are supported. Using root as owner now!");
 	if (!initialized)
 		BUG();
 	mutex_lock(&debug_mutex);
 
         /* create new debug_info */
 
-	rc = debug_info_create(name, pages_per_area, nr_areas, buf_size);
+	rc = debug_info_create(name, pages_per_area, nr_areas, buf_size, mode);
 	if(!rc) 
 		goto out;
 	debug_register_view(rc, &debug_level_view);
@@ -705,6 +715,20 @@ out:
 	mutex_unlock(&debug_mutex);
 	return rc;
 }
+EXPORT_SYMBOL(debug_register_mode);
+
+/*
+ * debug_register:
+ * - creates and initializes debug area for the caller
+ * - returns handle for debug area
+ */
+
+debug_info_t *debug_register(char *name, int pages_per_area, int nr_areas,
+			     int buf_size)
+{
+	return debug_register_mode(name, pages_per_area, nr_areas, buf_size,
+				   S_IRUSR | S_IWUSR, 0, 0);
+}
 
 /*
  * debug_unregister:
@@ -1073,15 +1097,16 @@ debug_register_view(debug_info_t * id, struct debug_view *view)
 	int rc = 0;
 	int i;
 	unsigned long flags;
-	mode_t mode = S_IFREG;
+	mode_t mode;
 	struct dentry *pde;
 
 	if (!id)
 		goto out;
-	if (view->prolog_proc || view->format_proc || view->header_proc)
-		mode |= S_IRUSR;
-	if (view->input_proc)
-		mode |= S_IWUSR;
+	mode = (id->mode | S_IFREG) & ~S_IXUGO;
+	if (!(view->prolog_proc || view->format_proc || view->header_proc))
+		mode &= ~(S_IRUSR | S_IRGRP | S_IROTH);
+	if (!view->input_proc)
+		mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH);
 	pde = debugfs_create_file(view->name, mode, id->debugfs_root_entry,
 				id , &debug_file_ops);
 	if (!pde){
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 01832c440636..540a67f979b6 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -21,6 +21,7 @@
 #include <asm/setup.h>
 #include <asm/cpcmd.h>
 #include <asm/sclp.h>
+#include "entry.h"
 
 /*
  * Create a Kernel NSS if the SAVESYS= parameter is defined
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
new file mode 100644
index 000000000000..6b1896345eda
--- /dev/null
+++ b/arch/s390/kernel/entry.h
@@ -0,0 +1,60 @@
+#ifndef _ENTRY_H
+#define _ENTRY_H
+
+#include <linux/types.h>
+#include <linux/signal.h>
+#include <asm/ptrace.h>
+
+typedef void pgm_check_handler_t(struct pt_regs *, long);
+extern pgm_check_handler_t *pgm_check_table[128];
+pgm_check_handler_t do_protection_exception;
+pgm_check_handler_t do_dat_exception;
+
+extern int sysctl_userprocess_debug;
+
+void do_single_step(struct pt_regs *regs);
+void syscall_trace(struct pt_regs *regs, int entryexit);
+void kernel_stack_overflow(struct pt_regs * regs);
+void do_signal(struct pt_regs *regs);
+int handle_signal32(unsigned long sig, struct k_sigaction *ka,
+		    siginfo_t *info, sigset_t *oldset, struct pt_regs *regs);
+
+void do_extint(struct pt_regs *regs, unsigned short code);
+int __cpuinit start_secondary(void *cpuvoid);
+void __init startup_init(void);
+void die(const char * str, struct pt_regs * regs, long err);
+
+struct new_utsname;
+struct mmap_arg_struct;
+struct fadvise64_64_args;
+struct old_sigaction;
+struct sel_arg_struct;
+
+long sys_pipe(unsigned long __user *fildes);
+long sys_mmap2(struct mmap_arg_struct __user  *arg);
+long old_mmap(struct mmap_arg_struct __user *arg);
+long sys_ipc(uint call, int first, unsigned long second,
+	     unsigned long third, void __user *ptr);
+long s390x_newuname(struct new_utsname __user *name);
+long s390x_personality(unsigned long personality);
+long s390_fadvise64(int fd, u32 offset_high, u32 offset_low,
+		    size_t len, int advice);
+long s390_fadvise64_64(struct fadvise64_64_args __user *args);
+long s390_fallocate(int fd, int mode, loff_t offset, u32 len_high, u32 len_low);
+long sys_fork(void);
+long sys_clone(void);
+long sys_vfork(void);
+void execve_tail(void);
+long sys_execve(void);
+int sys_sigsuspend(int history0, int history1, old_sigset_t mask);
+long sys_sigaction(int sig, const struct old_sigaction __user *act,
+		   struct old_sigaction __user *oact);
+long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss);
+long sys_sigreturn(void);
+long sys_rt_sigreturn(void);
+long sys32_sigreturn(void);
+long sys32_rt_sigreturn(void);
+long old_select(struct sel_arg_struct __user *arg);
+long sys_ptrace(long request, long pid, long addr, long data);
+
+#endif /* _ENTRY_H */
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index efde6e178f6c..cd959c0b2e16 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -475,6 +475,7 @@ pgm_check_handler:
 pgm_no_vtime:
 #endif
 	lg	%r9,__LC_THREAD_INFO	# load pointer to thread_info struct
+	mvc	SP_ARGS(8,%r15),__LC_LAST_BREAK
 	TRACE_IRQS_OFF
 	lgf	%r3,__LC_PGM_ILC	# load program interruption code
 	lghi	%r8,0x7f
@@ -847,6 +848,7 @@ stack_overflow:
 	je	0f
 	la	%r1,__LC_SAVE_AREA+32
 0:	mvc	SP_R12(32,%r15),0(%r1)	# move %r12-%r15 to stack
+	mvc	SP_ARGS(8,%r15),__LC_LAST_BREAK
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) # clear back chain
 	la	%r2,SP_PTREGS(%r15)	# load pt_regs
 	jg	kernel_stack_overflow
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 375232c46c7a..532542447d66 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -655,7 +655,7 @@ static struct kobj_attribute reipl_type_attr =
 
 static struct kset *reipl_kset;
 
-void reipl_run(struct shutdown_trigger *trigger)
+static void reipl_run(struct shutdown_trigger *trigger)
 {
 	struct ccw_dev_id devid;
 	static char buf[100];
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index c5549a206284..ed04d1372d5d 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -360,7 +360,7 @@ no_kprobe:
  *	- When the probed function returns, this probe
  *		causes the handlers to fire
  */
-void kretprobe_trampoline_holder(void)
+static void __used kretprobe_trampoline_holder(void)
 {
 	asm volatile(".global kretprobe_trampoline\n"
 		     "kretprobe_trampoline: bcr 0,0\n");
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index ce203154d8ce..c1aff194141d 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -36,6 +36,8 @@
 #include <linux/module.h>
 #include <linux/notifier.h>
 #include <linux/utsname.h>
+#include <linux/tick.h>
+#include <linux/elfcore.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/system.h>
@@ -44,6 +46,7 @@
 #include <asm/irq.h>
 #include <asm/timer.h>
 #include <asm/cpu.h>
+#include "entry.h"
 
 asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
 
@@ -76,6 +79,7 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
  * Need to know about CPUs going idle?
  */
 static ATOMIC_NOTIFIER_HEAD(idle_chain);
+DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
 
 int register_idle_notifier(struct notifier_block *nb)
 {
@@ -89,9 +93,33 @@ int unregister_idle_notifier(struct notifier_block *nb)
 }
 EXPORT_SYMBOL(unregister_idle_notifier);
 
-void do_monitor_call(struct pt_regs *regs, long interruption_code)
+static int s390_idle_enter(void)
+{
+	struct s390_idle_data *idle;
+	int nr_calls = 0;
+	void *hcpu;
+	int rc;
+
+	hcpu = (void *)(long)smp_processor_id();
+	rc = __atomic_notifier_call_chain(&idle_chain, S390_CPU_IDLE, hcpu, -1,
+					  &nr_calls);
+	if (rc == NOTIFY_BAD) {
+		nr_calls--;
+		__atomic_notifier_call_chain(&idle_chain, S390_CPU_NOT_IDLE,
+					     hcpu, nr_calls, NULL);
+		return rc;
+	}
+	idle = &__get_cpu_var(s390_idle);
+	spin_lock(&idle->lock);
+	idle->idle_count++;
+	idle->in_idle = 1;
+	idle->idle_enter = get_clock();
+	spin_unlock(&idle->lock);
+	return NOTIFY_OK;
+}
+
+void s390_idle_leave(void)
 {
-#ifdef CONFIG_SMP
 	struct s390_idle_data *idle;
 
 	idle = &__get_cpu_var(s390_idle);
@@ -99,10 +127,6 @@ void do_monitor_call(struct pt_regs *regs, long interruption_code)
 	idle->idle_time += get_clock() - idle->idle_enter;
 	idle->in_idle = 0;
 	spin_unlock(&idle->lock);
-#endif
-	/* disable monitor call class 0 */
-	__ctl_clear_bit(8, 15);
-
 	atomic_notifier_call_chain(&idle_chain, S390_CPU_NOT_IDLE,
 				   (void *)(long) smp_processor_id());
 }
@@ -113,61 +137,30 @@ extern void s390_handle_mcck(void);
  */
 static void default_idle(void)
 {
-	int cpu, rc;
-	int nr_calls = 0;
-	void *hcpu;
-#ifdef CONFIG_SMP
-	struct s390_idle_data *idle;
-#endif
-
 	/* CPU is going idle. */
-	cpu = smp_processor_id();
-	hcpu = (void *)(long)cpu;
 	local_irq_disable();
 	if (need_resched()) {
 		local_irq_enable();
 		return;
 	}
-
-	rc = __atomic_notifier_call_chain(&idle_chain, S390_CPU_IDLE, hcpu, -1,
-					  &nr_calls);
-	if (rc == NOTIFY_BAD) {
-		nr_calls--;
-		__atomic_notifier_call_chain(&idle_chain, S390_CPU_NOT_IDLE,
-					     hcpu, nr_calls, NULL);
+	if (s390_idle_enter() == NOTIFY_BAD) {
 		local_irq_enable();
 		return;
 	}
-
-	/* enable monitor call class 0 */
-	__ctl_set_bit(8, 15);
-
 #ifdef CONFIG_HOTPLUG_CPU
-	if (cpu_is_offline(cpu)) {
+	if (cpu_is_offline(smp_processor_id())) {
 		preempt_enable_no_resched();
 		cpu_die();
 	}
 #endif
-
 	local_mcck_disable();
 	if (test_thread_flag(TIF_MCCK_PENDING)) {
 		local_mcck_enable();
-		/* disable monitor call class 0 */
-		__ctl_clear_bit(8, 15);
-		atomic_notifier_call_chain(&idle_chain, S390_CPU_NOT_IDLE,
-					   hcpu);
+		s390_idle_leave();
 		local_irq_enable();
 		s390_handle_mcck();
 		return;
 	}
-#ifdef CONFIG_SMP
-	idle = &__get_cpu_var(s390_idle);
-	spin_lock(&idle->lock);
-	idle->idle_count++;
-	idle->in_idle = 1;
-	idle->idle_enter = get_clock();
-	spin_unlock(&idle->lock);
-#endif
 	trace_hardirqs_on();
 	/* Wait for external, I/O or machine check interrupt. */
 	__load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT |
@@ -177,9 +170,10 @@ static void default_idle(void)
 void cpu_idle(void)
 {
 	for (;;) {
+		tick_nohz_stop_sched_tick();
 		while (!need_resched())
 			default_idle();
-
+		tick_nohz_restart_sched_tick();
 		preempt_enable_no_resched();
 		schedule();
 		preempt_disable();
@@ -201,6 +195,7 @@ void show_regs(struct pt_regs *regs)
 	/* Show stack backtrace if pt_regs is from kernel mode */
 	if (!(regs->psw.mask & PSW_MASK_PSTATE))
 		show_trace(NULL, (unsigned long *) regs->gprs[15]);
+	show_last_breaking_event(regs);
 }
 
 extern void kernel_thread_starter(void);
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 6e036bae9875..58a064296987 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -41,6 +41,7 @@
 #include <asm/system.h>
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
+#include "entry.h"
 
 #ifdef CONFIG_COMPAT
 #include "compat_ptrace.h"
diff --git a/arch/s390/kernel/s390_ext.c b/arch/s390/kernel/s390_ext.c
index acf93dba7727..e019b419efc6 100644
--- a/arch/s390/kernel/s390_ext.c
+++ b/arch/s390/kernel/s390_ext.c
@@ -13,11 +13,12 @@
 #include <linux/errno.h>
 #include <linux/kernel_stat.h>
 #include <linux/interrupt.h>
-
+#include <asm/cpu.h>
 #include <asm/lowcore.h>
 #include <asm/s390_ext.h>
 #include <asm/irq_regs.h>
 #include <asm/irq.h>
+#include "entry.h"
 
 /*
  * ext_int_hash[index] is the start of the list for all external interrupts
@@ -119,13 +120,10 @@ void do_extint(struct pt_regs *regs, unsigned short code)
 
 	old_regs = set_irq_regs(regs);
 	irq_enter();
-	asm volatile ("mc 0,0");
-	if (S390_lowcore.int_clock >= S390_lowcore.jiffy_timer)
-		/**
-		 * Make sure that the i/o interrupt did not "overtake"
-		 * the last HZ timer interrupt.
-		 */
-		account_ticks(S390_lowcore.int_clock);
+	s390_idle_check();
+	if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
+		/* Serve timer interrupts first. */
+		clock_comparator_work();
 	kstat_cpu(smp_processor_id()).irqs[EXTERNAL_INTERRUPT]++;
         index = ext_hash(code);
 	for (p = ext_int_hash[index]; p; p = p->next) {
diff --git a/arch/s390/kernel/s390_ksyms.c b/arch/s390/kernel/s390_ksyms.c
index 7234c737f825..48238a114ce9 100644
--- a/arch/s390/kernel/s390_ksyms.c
+++ b/arch/s390/kernel/s390_ksyms.c
@@ -27,13 +27,6 @@ EXPORT_SYMBOL(_zb_findmap);
 EXPORT_SYMBOL(_sb_findmap);
 
 /*
- * semaphore ops
- */
-EXPORT_SYMBOL(__up);
-EXPORT_SYMBOL(__down);
-EXPORT_SYMBOL(__down_interruptible);
-
-/*
  * binfmt_elf loader 
  */
 extern int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs);
diff --git a/arch/s390/kernel/semaphore.c b/arch/s390/kernel/semaphore.c
deleted file mode 100644
index 191303f6c1d8..000000000000
--- a/arch/s390/kernel/semaphore.c
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- *  linux/arch/s390/kernel/semaphore.c
- *
- *  S390 version
- *    Copyright (C) 1998-2000 IBM Corporation
- *    Author(s): Martin Schwidefsky
- *
- *  Derived from "linux/arch/i386/kernel/semaphore.c
- *    Copyright (C) 1999, Linus Torvalds
- *
- */
-#include <linux/sched.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-
-#include <asm/semaphore.h>
-
-/*
- * Atomically update sem->count. Equivalent to:
- *   old_val = sem->count.counter;
- *   new_val = ((old_val >= 0) ? old_val : 0) + incr;
- *   sem->count.counter = new_val;
- *   return old_val;
- */
-static inline int __sem_update_count(struct semaphore *sem, int incr)
-{
-	int old_val, new_val;
-
-	asm volatile(
-		"	l	%0,0(%3)\n"
-		"0:	ltr	%1,%0\n"
-		"	jhe	1f\n"
-		"	lhi	%1,0\n"
-		"1:	ar	%1,%4\n"
-		"	cs	%0,%1,0(%3)\n"
-		"	jl	0b\n"
-		: "=&d" (old_val), "=&d" (new_val), "=m" (sem->count)
-		: "a" (&sem->count), "d" (incr), "m" (sem->count)
-		: "cc");
-	return old_val;
-}
-
-/*
- * The inline function up() incremented count but the result
- * was <= 0. This indicates that some process is waiting on
- * the semaphore. The semaphore is free and we'll wake the
- * first sleeping process, so we set count to 1 unless some
- * other cpu has called up in the meantime in which case
- * we just increment count by 1.
- */
-void __up(struct semaphore *sem)
-{
-	__sem_update_count(sem, 1);
-	wake_up(&sem->wait);
-}
-
-/*
- * The inline function down() decremented count and the result
- * was < 0. The wait loop will atomically test and update the
- * semaphore counter following the rules:
- *   count > 0: decrement count, wake up queue and exit.
- *   count <= 0: set count to -1, go to sleep.
- */
-void __sched __down(struct semaphore * sem)
-{
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-
-	__set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-	add_wait_queue_exclusive(&sem->wait, &wait);
-	while (__sem_update_count(sem, -1) <= 0) {
-		schedule();
-		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-	}
-	remove_wait_queue(&sem->wait, &wait);
-	__set_task_state(tsk, TASK_RUNNING);
-	wake_up(&sem->wait);
-}
-
-/*
- * Same as __down() with an additional test for signals.
- * If a signal is pending the count is updated as follows:
- *   count > 0: wake up queue and exit.
- *   count <= 0: set count to 0, wake up queue and exit.
- */
-int __sched __down_interruptible(struct semaphore * sem)
-{
-	int retval = 0;
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-
-	__set_task_state(tsk, TASK_INTERRUPTIBLE);
-	add_wait_queue_exclusive(&sem->wait, &wait);
-	while (__sem_update_count(sem, -1) <= 0) {
-		if (signal_pending(current)) {
-			__sem_update_count(sem, 0);
-			retval = -EINTR;
-			break;
-		}
-		schedule();
-		set_task_state(tsk, TASK_INTERRUPTIBLE);
-	}
-	remove_wait_queue(&sem->wait, &wait);
-	__set_task_state(tsk, TASK_RUNNING);
-	wake_up(&sem->wait);
-	return retval;
-}
-
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 290e504061a3..7141147e6b63 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -39,6 +39,7 @@
 #include <linux/pfn.h>
 #include <linux/ctype.h>
 #include <linux/reboot.h>
+#include <linux/topology.h>
 
 #include <asm/ipl.h>
 #include <asm/uaccess.h>
@@ -427,7 +428,7 @@ setup_lowcore(void)
 	lc->io_new_psw.mask = psw_kernel_bits;
 	lc->io_new_psw.addr = PSW_ADDR_AMODE | (unsigned long) io_int_handler;
 	lc->ipl_device = S390_lowcore.ipl_device;
-	lc->jiffy_timer = -1LL;
+	lc->clock_comparator = -1ULL;
 	lc->kernel_stack = ((unsigned long) &init_thread_union) + THREAD_SIZE;
 	lc->async_stack = (unsigned long)
 		__alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0) + ASYNC_SIZE;
@@ -687,7 +688,7 @@ static __init unsigned int stfl(void)
 	return S390_lowcore.stfl_fac_list;
 }
 
-static __init int stfle(unsigned long long *list, int doublewords)
+static int __init __stfle(unsigned long long *list, int doublewords)
 {
 	typedef struct { unsigned long long _[doublewords]; } addrtype;
 	register unsigned long __nr asm("0") = doublewords - 1;
@@ -697,6 +698,13 @@ static __init int stfle(unsigned long long *list, int doublewords)
 	return __nr + 1;
 }
 
+int __init stfle(unsigned long long *list, int doublewords)
+{
+	if (!(stfl() & (1UL << 24)))
+		return -EOPNOTSUPP;
+	return __stfle(list, doublewords);
+}
+
 /*
  * Setup hardware capabilities.
  */
@@ -741,7 +749,7 @@ static void __init setup_hwcaps(void)
 	 *   HWCAP_S390_DFP bit 6.
 	 */
 	if ((elf_hwcap & (1UL << 2)) &&
-	    stfle(&facility_list_extended, 1) > 0) {
+	    __stfle(&facility_list_extended, 1) > 0) {
 		if (facility_list_extended & (1ULL << (64 - 43)))
 			elf_hwcap |= 1UL << 6;
 	}
@@ -823,6 +831,7 @@ setup_arch(char **cmdline_p)
 
         cpu_init();
         __cpu_logical_map[0] = S390_lowcore.cpu_data.cpu_addr;
+	s390_init_cpu_topology();
 
 	/*
 	 * Setup capabilities (ELF_HWCAP & ELF_PLATFORM).
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 4449bf32cbf1..b97682040215 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -27,6 +27,7 @@
 #include <asm/ucontext.h>
 #include <asm/uaccess.h>
 #include <asm/lowcore.h>
+#include "entry.h"
 
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
 
@@ -235,6 +236,10 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
 	/* Default to using normal stack */
 	sp = regs->gprs[15];
 
+	/* Overflow on alternate signal stack gives SIGSEGV. */
+	if (on_sig_stack(sp) && !on_sig_stack((sp - frame_size) & -8UL))
+		return (void __user *) -1UL;
+
 	/* This is the X/Open sanctioned signal stack switching.  */
 	if (ka->sa.sa_flags & SA_ONSTACK) {
 		if (! sas_ss_flags(sp))
@@ -270,6 +275,9 @@ static int setup_frame(int sig, struct k_sigaction *ka,
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(sigframe)))
 		goto give_sigsegv;
 
+	if (frame == (void __user *) -1UL)
+		goto give_sigsegv;
+
 	if (__copy_to_user(&frame->sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE))
 		goto give_sigsegv;
 
@@ -327,6 +335,9 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	if (!access_ok(VERIFY_WRITE, frame, sizeof(rt_sigframe)))
 		goto give_sigsegv;
 
+	if (frame == (void __user *) -1UL)
+		goto give_sigsegv;
+
 	if (copy_siginfo_to_user(&frame->info, info))
 		goto give_sigsegv;
 
@@ -474,11 +485,6 @@ void do_signal(struct pt_regs *regs)
 		int ret;
 #ifdef CONFIG_COMPAT
 		if (test_thread_flag(TIF_31BIT)) {
-			extern int handle_signal32(unsigned long sig,
-						   struct k_sigaction *ka,
-						   siginfo_t *info,
-						   sigset_t *oldset,
-						   struct pt_regs *regs);
 			ret = handle_signal32(signr, &ka, &info, oldset, regs);
 	        }
 		else
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 8f894d380a62..0dfa988c1b26 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -44,6 +44,7 @@
 #include <asm/lowcore.h>
 #include <asm/sclp.h>
 #include <asm/cpu.h>
+#include "entry.h"
 
 /*
  * An array with a pointer the lowcore of every CPU.
@@ -67,13 +68,12 @@ enum s390_cpu_state {
 	CPU_STATE_CONFIGURED,
 };
 
-#ifdef CONFIG_HOTPLUG_CPU
-static DEFINE_MUTEX(smp_cpu_state_mutex);
-#endif
+DEFINE_MUTEX(smp_cpu_state_mutex);
+int smp_cpu_polarization[NR_CPUS];
 static int smp_cpu_state[NR_CPUS];
+static int cpu_management;
 
 static DEFINE_PER_CPU(struct cpu, cpu_devices);
-DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
 
 static void smp_ext_bitcall(int, ec_bit_sig);
 
@@ -298,7 +298,7 @@ static void smp_ext_bitcall(int cpu, ec_bit_sig sig)
 /*
  * this function sends a 'purge tlb' signal to another CPU.
  */
-void smp_ptlb_callback(void *info)
+static void smp_ptlb_callback(void *info)
 {
 	__tlb_flush_local();
 }
@@ -456,6 +456,7 @@ static int smp_rescan_cpus_sigp(cpumask_t avail)
 		if (cpu_known(cpu_id))
 			continue;
 		__cpu_logical_map[logical_cpu] = cpu_id;
+		smp_cpu_polarization[logical_cpu] = POLARIZATION_UNKNWN;
 		if (!cpu_stopped(logical_cpu))
 			continue;
 		cpu_set(logical_cpu, cpu_present_map);
@@ -489,6 +490,7 @@ static int smp_rescan_cpus_sclp(cpumask_t avail)
 		if (cpu_known(cpu_id))
 			continue;
 		__cpu_logical_map[logical_cpu] = cpu_id;
+		smp_cpu_polarization[logical_cpu] = POLARIZATION_UNKNWN;
 		cpu_set(logical_cpu, cpu_present_map);
 		if (cpu >= info->configured)
 			smp_cpu_state[logical_cpu] = CPU_STATE_STANDBY;
@@ -846,6 +848,7 @@ void __init smp_prepare_boot_cpu(void)
 	S390_lowcore.percpu_offset = __per_cpu_offset[0];
 	current_set[0] = current;
 	smp_cpu_state[0] = CPU_STATE_CONFIGURED;
+	smp_cpu_polarization[0] = POLARIZATION_UNKNWN;
 	spin_lock_init(&(&__get_cpu_var(s390_idle))->lock);
 }
 
@@ -897,15 +900,19 @@ static ssize_t cpu_configure_store(struct sys_device *dev, const char *buf,
 	case 0:
 		if (smp_cpu_state[cpu] == CPU_STATE_CONFIGURED) {
 			rc = sclp_cpu_deconfigure(__cpu_logical_map[cpu]);
-			if (!rc)
+			if (!rc) {
 				smp_cpu_state[cpu] = CPU_STATE_STANDBY;
+				smp_cpu_polarization[cpu] = POLARIZATION_UNKNWN;
+			}
 		}
 		break;
 	case 1:
 		if (smp_cpu_state[cpu] == CPU_STATE_STANDBY) {
 			rc = sclp_cpu_configure(__cpu_logical_map[cpu]);
-			if (!rc)
+			if (!rc) {
 				smp_cpu_state[cpu] = CPU_STATE_CONFIGURED;
+				smp_cpu_polarization[cpu] = POLARIZATION_UNKNWN;
+			}
 		}
 		break;
 	default:
@@ -919,6 +926,34 @@ out:
 static SYSDEV_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store);
 #endif /* CONFIG_HOTPLUG_CPU */
 
+static ssize_t cpu_polarization_show(struct sys_device *dev, char *buf)
+{
+	int cpu = dev->id;
+	ssize_t count;
+
+	mutex_lock(&smp_cpu_state_mutex);
+	switch (smp_cpu_polarization[cpu]) {
+	case POLARIZATION_HRZ:
+		count = sprintf(buf, "horizontal\n");
+		break;
+	case POLARIZATION_VL:
+		count = sprintf(buf, "vertical:low\n");
+		break;
+	case POLARIZATION_VM:
+		count = sprintf(buf, "vertical:medium\n");
+		break;
+	case POLARIZATION_VH:
+		count = sprintf(buf, "vertical:high\n");
+		break;
+	default:
+		count = sprintf(buf, "unknown\n");
+		break;
+	}
+	mutex_unlock(&smp_cpu_state_mutex);
+	return count;
+}
+static SYSDEV_ATTR(polarization, 0444, cpu_polarization_show, NULL);
+
 static ssize_t show_cpu_address(struct sys_device *dev, char *buf)
 {
 	return sprintf(buf, "%d\n", __cpu_logical_map[dev->id]);
@@ -931,6 +966,7 @@ static struct attribute *cpu_common_attrs[] = {
 	&attr_configure.attr,
 #endif
 	&attr_address.attr,
+	&attr_polarization.attr,
 	NULL,
 };
 
@@ -1075,11 +1111,48 @@ static ssize_t __ref rescan_store(struct sys_device *dev,
 out:
 	put_online_cpus();
 	mutex_unlock(&smp_cpu_state_mutex);
+	if (!cpus_empty(newcpus))
+		topology_schedule_update();
 	return rc ? rc : count;
 }
 static SYSDEV_ATTR(rescan, 0200, NULL, rescan_store);
 #endif /* CONFIG_HOTPLUG_CPU */
 
+static ssize_t dispatching_show(struct sys_device *dev, char *buf)
+{
+	ssize_t count;
+
+	mutex_lock(&smp_cpu_state_mutex);
+	count = sprintf(buf, "%d\n", cpu_management);
+	mutex_unlock(&smp_cpu_state_mutex);
+	return count;
+}
+
+static ssize_t dispatching_store(struct sys_device *dev, const char *buf,
+				 size_t count)
+{
+	int val, rc;
+	char delim;
+
+	if (sscanf(buf, "%d %c", &val, &delim) != 1)
+		return -EINVAL;
+	if (val != 0 && val != 1)
+		return -EINVAL;
+	rc = 0;
+	mutex_lock(&smp_cpu_state_mutex);
+	get_online_cpus();
+	if (cpu_management == val)
+		goto out;
+	rc = topology_set_cpu_management(val);
+	if (!rc)
+		cpu_management = val;
+out:
+	put_online_cpus();
+	mutex_unlock(&smp_cpu_state_mutex);
+	return rc ? rc : count;
+}
+static SYSDEV_ATTR(dispatching, 0644, dispatching_show, dispatching_store);
+
 static int __init topology_init(void)
 {
 	int cpu;
@@ -1093,6 +1166,10 @@ static int __init topology_init(void)
 	if (rc)
 		return rc;
 #endif
+	rc = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
+			       &attr_dispatching.attr);
+	if (rc)
+		return rc;
 	for_each_present_cpu(cpu) {
 		rc = smp_add_present_cpu(cpu);
 		if (rc)
diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c
index fefee99f28aa..988d0d64c2c8 100644
--- a/arch/s390/kernel/sys_s390.c
+++ b/arch/s390/kernel/sys_s390.c
@@ -29,8 +29,8 @@
 #include <linux/personality.h>
 #include <linux/unistd.h>
 #include <linux/ipc.h>
-
 #include <asm/uaccess.h>
+#include "entry.h"
 
 /*
  * sys_pipe() is the normal C calling standard for creating
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index cb232c155360..7aec676fefd5 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -30,7 +30,7 @@
 #include <linux/timex.h>
 #include <linux/notifier.h>
 #include <linux/clocksource.h>
-
+#include <linux/clockchips.h>
 #include <asm/uaccess.h>
 #include <asm/delay.h>
 #include <asm/s390_ext.h>
@@ -39,6 +39,7 @@
 #include <asm/irq_regs.h>
 #include <asm/timer.h>
 #include <asm/etr.h>
+#include <asm/cio.h>
 
 /* change this if you have some constant time drift */
 #define USECS_PER_JIFFY     ((unsigned long) 1000000/HZ)
@@ -57,16 +58,16 @@
 
 static ext_int_info_t ext_int_info_cc;
 static ext_int_info_t ext_int_etr_cc;
-static u64 init_timer_cc;
 static u64 jiffies_timer_cc;
-static u64 xtime_cc;
+
+static DEFINE_PER_CPU(struct clock_event_device, comparators);
 
 /*
  * Scheduler clock - returns current time in nanosec units.
  */
 unsigned long long sched_clock(void)
 {
-	return ((get_clock() - jiffies_timer_cc) * 125) >> 9;
+	return ((get_clock_xt() - jiffies_timer_cc) * 125) >> 9;
 }
 
 /*
@@ -95,162 +96,40 @@ void tod_to_timeval(__u64 todval, struct timespec *xtime)
 #define s390_do_profile()	do { ; } while(0)
 #endif /* CONFIG_PROFILING */
 
-/*
- * Advance the per cpu tick counter up to the time given with the
- * "time" argument. The per cpu update consists of accounting
- * the virtual cpu time, calling update_process_times and calling
- * the profiling hook. If xtime is before time it is advanced as well.
- */
-void account_ticks(u64 time)
+void clock_comparator_work(void)
 {
-	__u32 ticks;
-	__u64 tmp;
-
-	/* Calculate how many ticks have passed. */
-	if (time < S390_lowcore.jiffy_timer)
-		return;
-	tmp = time - S390_lowcore.jiffy_timer;
-	if (tmp >= 2*CLK_TICKS_PER_JIFFY) {  /* more than two ticks ? */
-		ticks = __div(tmp, CLK_TICKS_PER_JIFFY) + 1;
-		S390_lowcore.jiffy_timer +=
-			CLK_TICKS_PER_JIFFY * (__u64) ticks;
-	} else if (tmp >= CLK_TICKS_PER_JIFFY) {
-		ticks = 2;
-		S390_lowcore.jiffy_timer += 2*CLK_TICKS_PER_JIFFY;
-	} else {
-		ticks = 1;
-		S390_lowcore.jiffy_timer += CLK_TICKS_PER_JIFFY;
-	}
-
-#ifdef CONFIG_SMP
-	/*
-	 * Do not rely on the boot cpu to do the calls to do_timer.
-	 * Spread it over all cpus instead.
-	 */
-	write_seqlock(&xtime_lock);
-	if (S390_lowcore.jiffy_timer > xtime_cc) {
-		__u32 xticks;
-		tmp = S390_lowcore.jiffy_timer - xtime_cc;
-		if (tmp >= 2*CLK_TICKS_PER_JIFFY) {
-			xticks = __div(tmp, CLK_TICKS_PER_JIFFY);
-			xtime_cc += (__u64) xticks * CLK_TICKS_PER_JIFFY;
-		} else {
-			xticks = 1;
-			xtime_cc += CLK_TICKS_PER_JIFFY;
-		}
-		do_timer(xticks);
-	}
-	write_sequnlock(&xtime_lock);
-#else
-	do_timer(ticks);
-#endif
-
-	while (ticks--)
-		update_process_times(user_mode(get_irq_regs()));
+	struct clock_event_device *cd;
 
+	S390_lowcore.clock_comparator = -1ULL;
+	set_clock_comparator(S390_lowcore.clock_comparator);
+	cd = &__get_cpu_var(comparators);
+	cd->event_handler(cd);
 	s390_do_profile();
 }
 
-#ifdef CONFIG_NO_IDLE_HZ
-
-#ifdef CONFIG_NO_IDLE_HZ_INIT
-int sysctl_hz_timer = 0;
-#else
-int sysctl_hz_timer = 1;
-#endif
-
-/*
- * Stop the HZ tick on the current CPU.
- * Only cpu_idle may call this function.
- */
-static void stop_hz_timer(void)
-{
-	unsigned long flags;
-	unsigned long seq, next;
-	__u64 timer, todval;
-	int cpu = smp_processor_id();
-
-	if (sysctl_hz_timer != 0)
-		return;
-
-	cpu_set(cpu, nohz_cpu_mask);
-
-	/*
-	 * Leave the clock comparator set up for the next timer
-	 * tick if either rcu or a softirq is pending.
-	 */
-	if (rcu_needs_cpu(cpu) || local_softirq_pending()) {
-		cpu_clear(cpu, nohz_cpu_mask);
-		return;
-	}
-
-	/*
-	 * This cpu is going really idle. Set up the clock comparator
-	 * for the next event.
-	 */
-	next = next_timer_interrupt();
-	do {
-		seq = read_seqbegin_irqsave(&xtime_lock, flags);
-		timer = ((__u64) next) - ((__u64) jiffies) + jiffies_64;
-	} while (read_seqretry_irqrestore(&xtime_lock, seq, flags));
-	todval = -1ULL;
-	/* Be careful about overflows. */
-	if (timer < (-1ULL / CLK_TICKS_PER_JIFFY)) {
-		timer = jiffies_timer_cc + timer * CLK_TICKS_PER_JIFFY;
-		if (timer >= jiffies_timer_cc)
-			todval = timer;
-	}
-	set_clock_comparator(todval);
-}
-
 /*
- * Start the HZ tick on the current CPU.
- * Only cpu_idle may call this function.
+ * Fixup the clock comparator.
  */
-static void start_hz_timer(void)
+static void fixup_clock_comparator(unsigned long long delta)
 {
-	if (!cpu_isset(smp_processor_id(), nohz_cpu_mask))
+	/* If nobody is waiting there's nothing to fix. */
+	if (S390_lowcore.clock_comparator == -1ULL)
 		return;
-	account_ticks(get_clock());
-	set_clock_comparator(S390_lowcore.jiffy_timer + CPU_DEVIATION);
-	cpu_clear(smp_processor_id(), nohz_cpu_mask);
-}
-
-static int nohz_idle_notify(struct notifier_block *self,
-			    unsigned long action, void *hcpu)
-{
-	switch (action) {
-	case S390_CPU_IDLE:
-		stop_hz_timer();
-		break;
-	case S390_CPU_NOT_IDLE:
-		start_hz_timer();
-		break;
-	}
-	return NOTIFY_OK;
+	S390_lowcore.clock_comparator += delta;
+	set_clock_comparator(S390_lowcore.clock_comparator);
 }
 
-static struct notifier_block nohz_idle_nb = {
-	.notifier_call = nohz_idle_notify,
-};
-
-static void __init nohz_init(void)
+static int s390_next_event(unsigned long delta,
+			   struct clock_event_device *evt)
 {
-	if (register_idle_notifier(&nohz_idle_nb))
-		panic("Couldn't register idle notifier");
+	S390_lowcore.clock_comparator = get_clock() + delta;
+	set_clock_comparator(S390_lowcore.clock_comparator);
+	return 0;
 }
 
-#endif
-
-/*
- * Set up per cpu jiffy timer and set the clock comparator.
- */
-static void setup_jiffy_timer(void)
+static void s390_set_mode(enum clock_event_mode mode,
+			  struct clock_event_device *evt)
 {
-	/* Set up clock comparator to next jiffy. */
-	S390_lowcore.jiffy_timer =
-		jiffies_timer_cc + (jiffies_64 + 1) * CLK_TICKS_PER_JIFFY;
-	set_clock_comparator(S390_lowcore.jiffy_timer + CPU_DEVIATION);
 }
 
 /*
@@ -259,7 +138,26 @@ static void setup_jiffy_timer(void)
  */
 void init_cpu_timer(void)
 {
-	setup_jiffy_timer();
+	struct clock_event_device *cd;
+	int cpu;
+
+	S390_lowcore.clock_comparator = -1ULL;
+	set_clock_comparator(S390_lowcore.clock_comparator);
+
+	cpu = smp_processor_id();
+	cd = &per_cpu(comparators, cpu);
+	cd->name		= "comparator";
+	cd->features		= CLOCK_EVT_FEAT_ONESHOT;
+	cd->mult		= 16777;
+	cd->shift		= 12;
+	cd->min_delta_ns	= 1;
+	cd->max_delta_ns	= LONG_MAX;
+	cd->rating		= 400;
+	cd->cpumask		= cpumask_of_cpu(cpu);
+	cd->set_next_event	= s390_next_event;
+	cd->set_mode		= s390_set_mode;
+
+	clockevents_register_device(cd);
 
 	/* Enable clock comparator timer interrupt. */
 	__ctl_set_bit(0,11);
@@ -270,8 +168,6 @@ void init_cpu_timer(void)
 
 static void clock_comparator_interrupt(__u16 code)
 {
-	/* set clock comparator for next tick */
-	set_clock_comparator(S390_lowcore.jiffy_timer + CPU_DEVIATION);
 }
 
 static void etr_reset(void);
@@ -316,8 +212,9 @@ static struct clocksource clocksource_tod = {
  */
 void __init time_init(void)
 {
+	u64 init_timer_cc;
+
 	init_timer_cc = reset_tod_clock();
-	xtime_cc = init_timer_cc + CLK_TICKS_PER_JIFFY;
 	jiffies_timer_cc = init_timer_cc - jiffies_64 * CLK_TICKS_PER_JIFFY;
 
 	/* set xtime */
@@ -342,10 +239,6 @@ void __init time_init(void)
 	/* Enable TOD clock interrupts on the boot cpu. */
 	init_cpu_timer();
 
-#ifdef CONFIG_NO_IDLE_HZ
-	nohz_init();
-#endif
-
 #ifdef CONFIG_VIRT_TIMER
 	vtime_init();
 #endif
@@ -699,53 +592,49 @@ static int etr_aib_follows(struct etr_aib *a1, struct etr_aib *a2, int p)
 }
 
 /*
- * The time is "clock". xtime is what we think the time is.
+ * The time is "clock". old is what we think the time is.
  * Adjust the value by a multiple of jiffies and add the delta to ntp.
  * "delay" is an approximation how long the synchronization took. If
  * the time correction is positive, then "delay" is subtracted from
  * the time difference and only the remaining part is passed to ntp.
  */
-static void etr_adjust_time(unsigned long long clock, unsigned long long delay)
+static unsigned long long etr_adjust_time(unsigned long long old,
+					  unsigned long long clock,
+					  unsigned long long delay)
 {
 	unsigned long long delta, ticks;
 	struct timex adjust;
 
-	/*
-	 * We don't have to take the xtime lock because the cpu
-	 * executing etr_adjust_time is running disabled in
-	 * tasklet context and all other cpus are looping in
-	 * etr_sync_cpu_start.
-	 */
-	if (clock > xtime_cc) {
+	if (clock > old) {
 		/* It is later than we thought. */
-		delta = ticks = clock - xtime_cc;
+		delta = ticks = clock - old;
 		delta = ticks = (delta < delay) ? 0 : delta - delay;
 		delta -= do_div(ticks, CLK_TICKS_PER_JIFFY);
-		init_timer_cc = init_timer_cc + delta;
-		jiffies_timer_cc = jiffies_timer_cc + delta;
-		xtime_cc = xtime_cc + delta;
 		adjust.offset = ticks * (1000000 / HZ);
 	} else {
 		/* It is earlier than we thought. */
-		delta = ticks = xtime_cc - clock;
+		delta = ticks = old - clock;
 		delta -= do_div(ticks, CLK_TICKS_PER_JIFFY);
-		init_timer_cc = init_timer_cc - delta;
-		jiffies_timer_cc = jiffies_timer_cc - delta;
-		xtime_cc = xtime_cc - delta;
+		delta = -delta;
 		adjust.offset = -ticks * (1000000 / HZ);
 	}
+	jiffies_timer_cc += delta;
 	if (adjust.offset != 0) {
 		printk(KERN_NOTICE "etr: time adjusted by %li micro-seconds\n",
 		       adjust.offset);
 		adjust.modes = ADJ_OFFSET_SINGLESHOT;
 		do_adjtimex(&adjust);
 	}
+	return delta;
 }
 
+static struct {
+	int in_sync;
+	unsigned long long fixup_cc;
+} etr_sync;
+
 static void etr_sync_cpu_start(void *dummy)
 {
-	int *in_sync = dummy;
-
 	etr_enable_sync_clock();
 	/*
 	 * This looks like a busy wait loop but it isn't. etr_sync_cpus
@@ -753,7 +642,7 @@ static void etr_sync_cpu_start(void *dummy)
 	 * __udelay will stop the cpu on an enabled wait psw until the
 	 * TOD is running again.
 	 */
-	while (*in_sync == 0) {
+	while (etr_sync.in_sync == 0) {
 		__udelay(1);
 		/*
 		 * A different cpu changes *in_sync. Therefore use
@@ -761,14 +650,14 @@ static void etr_sync_cpu_start(void *dummy)
 		 */
 		barrier();
 	}
-	if (*in_sync != 1)
+	if (etr_sync.in_sync != 1)
 		/* Didn't work. Clear per-cpu in sync bit again. */
 		etr_disable_sync_clock(NULL);
 	/*
 	 * This round of TOD syncing is done. Set the clock comparator
 	 * to the next tick and let the processor continue.
 	 */
-	setup_jiffy_timer();
+	fixup_clock_comparator(etr_sync.fixup_cc);
 }
 
 static void etr_sync_cpu_end(void *dummy)
@@ -783,8 +672,8 @@ static void etr_sync_cpu_end(void *dummy)
 static int etr_sync_clock(struct etr_aib *aib, int port)
 {
 	struct etr_aib *sync_port;
-	unsigned long long clock, delay;
-	int in_sync, follows;
+	unsigned long long clock, old_clock, delay, delta;
+	int follows;
 	int rc;
 
 	/* Check if the current aib is adjacent to the sync port aib. */
@@ -799,9 +688,9 @@ static int etr_sync_clock(struct etr_aib *aib, int port)
 	 * successfully synced the clock. smp_call_function will
 	 * return after all other cpus are in etr_sync_cpu_start.
 	 */
-	in_sync = 0;
+	memset(&etr_sync, 0, sizeof(etr_sync));
 	preempt_disable();
-	smp_call_function(etr_sync_cpu_start,&in_sync,0,0);
+	smp_call_function(etr_sync_cpu_start, NULL, 0, 0);
 	local_irq_disable();
 	etr_enable_sync_clock();
 
@@ -809,6 +698,7 @@ static int etr_sync_clock(struct etr_aib *aib, int port)
 	__ctl_set_bit(14, 21);
 	__ctl_set_bit(0, 29);
 	clock = ((unsigned long long) (aib->edf2.etv + 1)) << 32;
+	old_clock = get_clock();
 	if (set_clock(clock) == 0) {
 		__udelay(1);	/* Wait for the clock to start. */
 		__ctl_clear_bit(0, 29);
@@ -817,16 +707,17 @@ static int etr_sync_clock(struct etr_aib *aib, int port)
 		/* Adjust Linux timing variables. */
 		delay = (unsigned long long)
 			(aib->edf2.etv - sync_port->edf2.etv) << 32;
-		etr_adjust_time(clock, delay);
-		setup_jiffy_timer();
+		delta = etr_adjust_time(old_clock, clock, delay);
+		etr_sync.fixup_cc = delta;
+		fixup_clock_comparator(delta);
 		/* Verify that the clock is properly set. */
 		if (!etr_aib_follows(sync_port, aib, port)) {
 			/* Didn't work. */
 			etr_disable_sync_clock(NULL);
-			in_sync = -EAGAIN;
+			etr_sync.in_sync = -EAGAIN;
 			rc = -EAGAIN;
 		} else {
-			in_sync = 1;
+			etr_sync.in_sync = 1;
 			rc = 0;
 		}
 	} else {
@@ -834,7 +725,7 @@ static int etr_sync_clock(struct etr_aib *aib, int port)
 		__ctl_clear_bit(0, 29);
 		__ctl_clear_bit(14, 21);
 		etr_disable_sync_clock(NULL);
-		in_sync = -EAGAIN;
+		etr_sync.in_sync = -EAGAIN;
 		rc = -EAGAIN;
 	}
 	local_irq_enable();
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
new file mode 100644
index 000000000000..12b39b3d9c38
--- /dev/null
+++ b/arch/s390/kernel/topology.c
@@ -0,0 +1,314 @@
+/*
+ *    Copyright IBM Corp. 2007
+ *    Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/bootmem.h>
+#include <linux/sched.h>
+#include <linux/workqueue.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+#include <asm/delay.h>
+#include <asm/s390_ext.h>
+#include <asm/sysinfo.h>
+
+#define CPU_BITS 64
+#define NR_MAG 6
+
+#define PTF_HORIZONTAL	(0UL)
+#define PTF_VERTICAL	(1UL)
+#define PTF_CHECK	(2UL)
+
+struct tl_cpu {
+	unsigned char reserved0[4];
+	unsigned char :6;
+	unsigned char pp:2;
+	unsigned char reserved1;
+	unsigned short origin;
+	unsigned long mask[CPU_BITS / BITS_PER_LONG];
+};
+
+struct tl_container {
+	unsigned char reserved[8];
+};
+
+union tl_entry {
+	unsigned char nl;
+	struct tl_cpu cpu;
+	struct tl_container container;
+};
+
+struct tl_info {
+	unsigned char reserved0[2];
+	unsigned short length;
+	unsigned char mag[NR_MAG];
+	unsigned char reserved1;
+	unsigned char mnest;
+	unsigned char reserved2[4];
+	union tl_entry tle[0];
+};
+
+struct core_info {
+	struct core_info *next;
+	cpumask_t mask;
+};
+
+static void topology_work_fn(struct work_struct *work);
+static struct tl_info *tl_info;
+static struct core_info core_info;
+static int machine_has_topology;
+static int machine_has_topology_irq;
+static struct timer_list topology_timer;
+static void set_topology_timer(void);
+static DECLARE_WORK(topology_work, topology_work_fn);
+
+cpumask_t cpu_coregroup_map(unsigned int cpu)
+{
+	struct core_info *core = &core_info;
+	cpumask_t mask;
+
+	cpus_clear(mask);
+	if (!machine_has_topology)
+		return cpu_present_map;
+	mutex_lock(&smp_cpu_state_mutex);
+	while (core) {
+		if (cpu_isset(cpu, core->mask)) {
+			mask = core->mask;
+			break;
+		}
+		core = core->next;
+	}
+	mutex_unlock(&smp_cpu_state_mutex);
+	if (cpus_empty(mask))
+		mask = cpumask_of_cpu(cpu);
+	return mask;
+}
+
+static void add_cpus_to_core(struct tl_cpu *tl_cpu, struct core_info *core)
+{
+	unsigned int cpu;
+
+	for (cpu = find_first_bit(&tl_cpu->mask[0], CPU_BITS);
+	     cpu < CPU_BITS;
+	     cpu = find_next_bit(&tl_cpu->mask[0], CPU_BITS, cpu + 1))
+	{
+		unsigned int rcpu, lcpu;
+
+		rcpu = CPU_BITS - 1 - cpu + tl_cpu->origin;
+		for_each_present_cpu(lcpu) {
+			if (__cpu_logical_map[lcpu] == rcpu) {
+				cpu_set(lcpu, core->mask);
+				smp_cpu_polarization[lcpu] = tl_cpu->pp;
+			}
+		}
+	}
+}
+
+static void clear_cores(void)
+{
+	struct core_info *core = &core_info;
+
+	while (core) {
+		cpus_clear(core->mask);
+		core = core->next;
+	}
+}
+
+static union tl_entry *next_tle(union tl_entry *tle)
+{
+	if (tle->nl)
+		return (union tl_entry *)((struct tl_container *)tle + 1);
+	else
+		return (union tl_entry *)((struct tl_cpu *)tle + 1);
+}
+
+static void tl_to_cores(struct tl_info *info)
+{
+	union tl_entry *tle, *end;
+	struct core_info *core = &core_info;
+
+	mutex_lock(&smp_cpu_state_mutex);
+	clear_cores();
+	tle = info->tle;
+	end = (union tl_entry *)((unsigned long)info + info->length);
+	while (tle < end) {
+		switch (tle->nl) {
+		case 5:
+		case 4:
+		case 3:
+		case 2:
+			break;
+		case 1:
+			core = core->next;
+			break;
+		case 0:
+			add_cpus_to_core(&tle->cpu, core);
+			break;
+		default:
+			clear_cores();
+			machine_has_topology = 0;
+			return;
+		}
+		tle = next_tle(tle);
+	}
+	mutex_unlock(&smp_cpu_state_mutex);
+}
+
+static void topology_update_polarization_simple(void)
+{
+	int cpu;
+
+	mutex_lock(&smp_cpu_state_mutex);
+	for_each_present_cpu(cpu)
+		smp_cpu_polarization[cpu] = POLARIZATION_HRZ;
+	mutex_unlock(&smp_cpu_state_mutex);
+}
+
+static int ptf(unsigned long fc)
+{
+	int rc;
+
+	asm volatile(
+		"	.insn	rre,0xb9a20000,%1,%1\n"
+		"	ipm	%0\n"
+		"	srl	%0,28\n"
+		: "=d" (rc)
+		: "d" (fc)  : "cc");
+	return rc;
+}
+
+int topology_set_cpu_management(int fc)
+{
+	int cpu;
+	int rc;
+
+	if (!machine_has_topology)
+		return -EOPNOTSUPP;
+	if (fc)
+		rc = ptf(PTF_VERTICAL);
+	else
+		rc = ptf(PTF_HORIZONTAL);
+	if (rc)
+		return -EBUSY;
+	for_each_present_cpu(cpu)
+		smp_cpu_polarization[cpu] = POLARIZATION_UNKNWN;
+	return rc;
+}
+
+void arch_update_cpu_topology(void)
+{
+	struct tl_info *info = tl_info;
+	struct sys_device *sysdev;
+	int cpu;
+
+	if (!machine_has_topology) {
+		topology_update_polarization_simple();
+		return;
+	}
+	stsi(info, 15, 1, 2);
+	tl_to_cores(info);
+	for_each_online_cpu(cpu) {
+		sysdev = get_cpu_sysdev(cpu);
+		kobject_uevent(&sysdev->kobj, KOBJ_CHANGE);
+	}
+}
+
+static void topology_work_fn(struct work_struct *work)
+{
+	arch_reinit_sched_domains();
+}
+
+void topology_schedule_update(void)
+{
+	schedule_work(&topology_work);
+}
+
+static void topology_timer_fn(unsigned long ignored)
+{
+	if (ptf(PTF_CHECK))
+		topology_schedule_update();
+	set_topology_timer();
+}
+
+static void set_topology_timer(void)
+{
+	topology_timer.function = topology_timer_fn;
+	topology_timer.data = 0;
+	topology_timer.expires = jiffies + 60 * HZ;
+	add_timer(&topology_timer);
+}
+
+static void topology_interrupt(__u16 code)
+{
+	schedule_work(&topology_work);
+}
+
+static int __init init_topology_update(void)
+{
+	int rc;
+
+	if (!machine_has_topology) {
+		topology_update_polarization_simple();
+		return 0;
+	}
+	init_timer_deferrable(&topology_timer);
+	if (machine_has_topology_irq) {
+		rc = register_external_interrupt(0x2005, topology_interrupt);
+		if (rc)
+			return rc;
+		ctl_set_bit(0, 8);
+	}
+	else
+		set_topology_timer();
+	return 0;
+}
+__initcall(init_topology_update);
+
+void __init s390_init_cpu_topology(void)
+{
+	unsigned long long facility_bits;
+	struct tl_info *info;
+	struct core_info *core;
+	int nr_cores;
+	int i;
+
+	if (stfle(&facility_bits, 1) <= 0)
+		return;
+	if (!(facility_bits & (1ULL << 52)) || !(facility_bits & (1ULL << 61)))
+		return;
+	machine_has_topology = 1;
+
+	if (facility_bits & (1ULL << 51))
+		machine_has_topology_irq = 1;
+
+	tl_info = alloc_bootmem_pages(PAGE_SIZE);
+	if (!tl_info)
+		goto error;
+	info = tl_info;
+	stsi(info, 15, 1, 2);
+
+	nr_cores = info->mag[NR_MAG - 2];
+	for (i = 0; i < info->mnest - 2; i++)
+		nr_cores *= info->mag[NR_MAG - 3 - i];
+
+	printk(KERN_INFO "CPU topology:");
+	for (i = 0; i < NR_MAG; i++)
+		printk(" %d", info->mag[i]);
+	printk(" / %d\n", info->mnest);
+
+	core = &core_info;
+	for (i = 0; i < nr_cores; i++) {
+		core->next = alloc_bootmem(sizeof(struct core_info));
+		core = core->next;
+		if (!core)
+			goto error;
+	}
+	return;
+error:
+	machine_has_topology = 0;
+	machine_has_topology_irq = 0;
+}
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 60f728aeaf12..57b607b61100 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -42,11 +42,8 @@
 #include <asm/s390_ext.h>
 #include <asm/lowcore.h>
 #include <asm/debug.h>
+#include "entry.h"
 
-/* Called from entry.S only */
-extern void handle_per_exception(struct pt_regs *regs);
-
-typedef void pgm_check_handler_t(struct pt_regs *, long);
 pgm_check_handler_t *pgm_check_table[128];
 
 #ifdef CONFIG_SYSCTL
@@ -59,7 +56,6 @@ int sysctl_userprocess_debug = 0;
 
 extern pgm_check_handler_t do_protection_exception;
 extern pgm_check_handler_t do_dat_exception;
-extern pgm_check_handler_t do_monitor_call;
 extern pgm_check_handler_t do_asce_exception;
 
 #define stack_pointer ({ void **sp; asm("la %0,0(15)" : "=&d" (sp)); sp; })
@@ -138,7 +134,6 @@ void show_trace(struct task_struct *task, unsigned long *stack)
 	else
 		__show_trace(sp, S390_lowcore.thread_info,
 			     S390_lowcore.thread_info + THREAD_SIZE);
-	printk("\n");
 	if (!task)
 		task = current;
 	debug_show_held_locks(task);
@@ -166,6 +161,15 @@ void show_stack(struct task_struct *task, unsigned long *sp)
 	show_trace(task, sp);
 }
 
+#ifdef CONFIG_64BIT
+void show_last_breaking_event(struct pt_regs *regs)
+{
+	printk("Last Breaking-Event-Address:\n");
+	printk(" [<%016lx>] ", regs->args[0] & PSW_ADDR_INSN);
+	print_symbol("%s\n", regs->args[0] & PSW_ADDR_INSN);
+}
+#endif
+
 /*
  * The architecture-independent dump_stack generator
  */
@@ -739,6 +743,5 @@ void __init trap_init(void)
         pgm_check_table[0x15] = &operand_exception;
         pgm_check_table[0x1C] = &space_switch_exception;
         pgm_check_table[0x1D] = &hfp_sqrt_exception;
-	pgm_check_table[0x40] = &do_monitor_call;
 	pfault_irq_init();
 }
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index 70f2a862b670..eae21a8ac72d 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -34,7 +34,7 @@ void __delay(unsigned long loops)
  */
 void __udelay(unsigned long usecs)
 {
-	u64 end, time, jiffy_timer = 0;
+	u64 end, time, old_cc = 0;
 	unsigned long flags, cr0, mask, dummy;
 	int irq_context;
 
@@ -43,8 +43,8 @@ void __udelay(unsigned long usecs)
 		local_bh_disable();
 	local_irq_save(flags);
 	if (raw_irqs_disabled_flags(flags)) {
-		jiffy_timer = S390_lowcore.jiffy_timer;
-		S390_lowcore.jiffy_timer = -1ULL - (4096 << 12);
+		old_cc = S390_lowcore.clock_comparator;
+		S390_lowcore.clock_comparator = -1ULL;
 		__ctl_store(cr0, 0, 0);
 		dummy = (cr0 & 0xffff00e0) | 0x00000800;
 		__ctl_load(dummy , 0, 0);
@@ -55,8 +55,8 @@ void __udelay(unsigned long usecs)
 
 	end = get_clock() + ((u64) usecs << 12);
 	do {
-		time = end < S390_lowcore.jiffy_timer ?
-			end : S390_lowcore.jiffy_timer;
+		time = end < S390_lowcore.clock_comparator ?
+			end : S390_lowcore.clock_comparator;
 		set_clock_comparator(time);
 		trace_hardirqs_on();
 		__load_psw_mask(mask);
@@ -65,10 +65,10 @@ void __udelay(unsigned long usecs)
 
 	if (raw_irqs_disabled_flags(flags)) {
 		__ctl_load(cr0, 0, 0);
-		S390_lowcore.jiffy_timer = jiffy_timer;
+		S390_lowcore.clock_comparator = old_cc;
 	}
 	if (!irq_context)
 		_local_bh_enable();
-	set_clock_comparator(S390_lowcore.jiffy_timer);
+	set_clock_comparator(S390_lowcore.clock_comparator);
 	local_irq_restore(flags);
 }
diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c
index 5efdfe9f5e76..d66215b0fde9 100644
--- a/arch/s390/lib/uaccess_pt.c
+++ b/arch/s390/lib/uaccess_pt.c
@@ -302,6 +302,10 @@ static size_t copy_in_user_pt(size_t n, void __user *to,
 	pte_t *pte_from, *pte_to;
 	int write_user;
 
+	if (segment_eq(get_fs(), KERNEL_DS)) {
+		memcpy((void __force *) to, (void __force *) from, n);
+		return 0;
+	}
 	done = 0;
 retry:
 	spin_lock(&mm->page_table_lock);
@@ -361,18 +365,10 @@ fault:
 		     : "0" (-EFAULT), "d" (oparg), "a" (uaddr),		\
 		       "m" (*uaddr) : "cc" );
 
-int futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old)
+static int __futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old)
 {
 	int oldval = 0, newval, ret;
 
-	spin_lock(&current->mm->page_table_lock);
-	uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr);
-	if (!uaddr) {
-		spin_unlock(&current->mm->page_table_lock);
-		return -EFAULT;
-	}
-	get_page(virt_to_page(uaddr));
-	spin_unlock(&current->mm->page_table_lock);
 	switch (op) {
 	case FUTEX_OP_SET:
 		__futex_atomic_op("lr %2,%5\n",
@@ -397,17 +393,17 @@ int futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old)
 	default:
 		ret = -ENOSYS;
 	}
-	put_page(virt_to_page(uaddr));
-	*old = oldval;
+	if (ret == 0)
+		*old = oldval;
 	return ret;
 }
 
-int futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval)
+int futex_atomic_op_pt(int op, int __user *uaddr, int oparg, int *old)
 {
 	int ret;
 
-	if (!current->mm)
-		return -EFAULT;
+	if (segment_eq(get_fs(), KERNEL_DS))
+		return __futex_atomic_op_pt(op, uaddr, oparg, old);
 	spin_lock(&current->mm->page_table_lock);
 	uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr);
 	if (!uaddr) {
@@ -416,13 +412,40 @@ int futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval)
 	}
 	get_page(virt_to_page(uaddr));
 	spin_unlock(&current->mm->page_table_lock);
-	asm volatile("   cs   %1,%4,0(%5)\n"
-		     "0: lr   %0,%1\n"
-		     "1:\n"
-		     EX_TABLE(0b,1b)
+	ret = __futex_atomic_op_pt(op, uaddr, oparg, old);
+	put_page(virt_to_page(uaddr));
+	return ret;
+}
+
+static int __futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval)
+{
+	int ret;
+
+	asm volatile("0: cs   %1,%4,0(%5)\n"
+		     "1: lr   %0,%1\n"
+		     "2:\n"
+		     EX_TABLE(0b,2b) EX_TABLE(1b,2b)
 		     : "=d" (ret), "+d" (oldval), "=m" (*uaddr)
 		     : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr)
 		     : "cc", "memory" );
+	return ret;
+}
+
+int futex_atomic_cmpxchg_pt(int __user *uaddr, int oldval, int newval)
+{
+	int ret;
+
+	if (segment_eq(get_fs(), KERNEL_DS))
+		return __futex_atomic_cmpxchg_pt(uaddr, oldval, newval);
+	spin_lock(&current->mm->page_table_lock);
+	uaddr = (int __user *) __dat_user_addr((unsigned long) uaddr);
+	if (!uaddr) {
+		spin_unlock(&current->mm->page_table_lock);
+		return -EFAULT;
+	}
+	get_page(virt_to_page(uaddr));
+	spin_unlock(&current->mm->page_table_lock);
+	ret = __futex_atomic_cmpxchg_pt(uaddr, oldval, newval);
 	put_page(virt_to_page(uaddr));
 	return ret;
 }
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
index 880b0ebf894b..ed2af0a3303b 100644
--- a/arch/s390/mm/extmem.c
+++ b/arch/s390/mm/extmem.c
@@ -289,22 +289,8 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
 
 	rc = add_shared_memory(seg->start_addr, seg->end - seg->start_addr + 1);
 
-	switch (rc) {
-	case 0:
-		break;
-	case -ENOSPC:
-		PRINT_WARN("segment_load: not loading segment %s - overlaps "
-			   "storage/segment\n", name);
-		goto out_free;
-	case -ERANGE:
-		PRINT_WARN("segment_load: not loading segment %s - exceeds "
-			   "kernel mapping range\n", name);
-		goto out_free;
-	default:
-		PRINT_WARN("segment_load: not loading segment %s (rc: %d)\n",
-			   name, rc);
+	if (rc)
 		goto out_free;
-	}
 
 	seg->res = kzalloc(sizeof(struct resource), GFP_KERNEL);
 	if (seg->res == NULL) {
@@ -582,8 +568,59 @@ out:
 	mutex_unlock(&dcss_lock);
 }
 
+/*
+ * print appropriate error message for segment_load()/segment_type()
+ * return code
+ */
+void segment_warning(int rc, char *seg_name)
+{
+	switch (rc) {
+	case -ENOENT:
+		PRINT_WARN("cannot load/query segment %s, "
+			   "does not exist\n", seg_name);
+		break;
+	case -ENOSYS:
+		PRINT_WARN("cannot load/query segment %s, "
+			   "not running on VM\n", seg_name);
+		break;
+	case -EIO:
+		PRINT_WARN("cannot load/query segment %s, "
+			   "hardware error\n", seg_name);
+		break;
+	case -ENOTSUPP:
+		PRINT_WARN("cannot load/query segment %s, "
+			   "is a multi-part segment\n", seg_name);
+		break;
+	case -ENOSPC:
+		PRINT_WARN("cannot load/query segment %s, "
+			   "overlaps with storage\n", seg_name);
+		break;
+	case -EBUSY:
+		PRINT_WARN("cannot load/query segment %s, "
+			   "overlaps with already loaded dcss\n", seg_name);
+		break;
+	case -EPERM:
+		PRINT_WARN("cannot load/query segment %s, "
+			   "already loaded in incompatible mode\n", seg_name);
+		break;
+	case -ENOMEM:
+		PRINT_WARN("cannot load/query segment %s, "
+			   "out of memory\n", seg_name);
+		break;
+	case -ERANGE:
+		PRINT_WARN("cannot load/query segment %s, "
+			   "exceeds kernel mapping range\n", seg_name);
+		break;
+	default:
+		PRINT_WARN("cannot load/query segment %s, "
+			   "return value %i\n", seg_name, rc);
+		break;
+	}
+}
+
 EXPORT_SYMBOL(segment_load);
 EXPORT_SYMBOL(segment_unload);
 EXPORT_SYMBOL(segment_save);
 EXPORT_SYMBOL(segment_type);
 EXPORT_SYMBOL(segment_modify_shared);
+EXPORT_SYMBOL(segment_warning);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index ed13d429a487..2650f46001d0 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -28,11 +28,11 @@
 #include <linux/hardirq.h>
 #include <linux/kprobes.h>
 #include <linux/uaccess.h>
-
 #include <asm/system.h>
 #include <asm/pgtable.h>
 #include <asm/s390_ext.h>
 #include <asm/mmu_context.h>
+#include "../kernel/entry.h"
 
 #ifndef CONFIG_64BIT
 #define __FAIL_ADDR_MASK 0x7ffff000
@@ -50,8 +50,6 @@
 extern int sysctl_userprocess_debug;
 #endif
 
-extern void die(const char *,struct pt_regs *,long);
-
 #ifdef CONFIG_KPROBES
 static inline int notify_page_fault(struct pt_regs *regs, long err)
 {
@@ -245,11 +243,6 @@ static void do_sigbus(struct pt_regs *regs, unsigned long error_code,
 }
 
 #ifdef CONFIG_S390_EXEC_PROTECT
-extern long sys_sigreturn(struct pt_regs *regs);
-extern long sys_rt_sigreturn(struct pt_regs *regs);
-extern long sys32_sigreturn(struct pt_regs *regs);
-extern long sys32_rt_sigreturn(struct pt_regs *regs);
-
 static int signal_return(struct mm_struct *mm, struct pt_regs *regs,
 			 unsigned long address, unsigned long error_code)
 {
@@ -270,15 +263,15 @@ static int signal_return(struct mm_struct *mm, struct pt_regs *regs,
 #ifdef CONFIG_COMPAT
 	compat = test_tsk_thread_flag(current, TIF_31BIT);
 	if (compat && instruction == 0x0a77)
-		sys32_sigreturn(regs);
+		sys32_sigreturn();
 	else if (compat && instruction == 0x0aad)
-		sys32_rt_sigreturn(regs);
+		sys32_rt_sigreturn();
 	else
 #endif
 	if (instruction == 0x0a77)
-		sys_sigreturn(regs);
+		sys_sigreturn();
 	else if (instruction == 0x0aad)
-		sys_rt_sigreturn(regs);
+		sys_rt_sigreturn();
 	else {
 		current->thread.prot_addr = address;
 		current->thread.trap_no = error_code;
@@ -424,7 +417,7 @@ no_context:
 }
 
 void __kprobes do_protection_exception(struct pt_regs *regs,
-				       unsigned long error_code)
+				       long error_code)
 {
 	/* Protection exception is supressing, decrement psw address. */
 	regs->psw.addr -= (error_code >> 16);
@@ -440,7 +433,7 @@ void __kprobes do_protection_exception(struct pt_regs *regs,
 	do_exception(regs, 4, 1);
 }
 
-void __kprobes do_dat_exception(struct pt_regs *regs, unsigned long error_code)
+void __kprobes do_dat_exception(struct pt_regs *regs, long error_code)
 {
 	do_exception(regs, error_code & 0xff, 0);
 }
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 8053245fe259..202c952a29b4 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -50,7 +50,6 @@ void show_mem(void)
 
 	printk("Mem-info:\n");
 	show_free_areas();
-	printk("Free swap:       %6ldkB\n", nr_swap_pages << (PAGE_SHIFT - 10));
 	i = max_mapnr;
 	while (i-- > 0) {
 		if (!pfn_valid(i))
diff --git a/arch/sh/kernel/Makefile_32 b/arch/sh/kernel/Makefile_32
index 62bf373266f7..4bbdce36b92b 100644
--- a/arch/sh/kernel/Makefile_32
+++ b/arch/sh/kernel/Makefile_32
@@ -5,7 +5,7 @@
 extra-y	:= head_32.o init_task.o vmlinux.lds
 
 obj-y	:= debugtraps.o io.o io_generic.o irq.o machvec.o process_32.o \
-	   ptrace_32.o semaphore.o setup.o signal_32.o sys_sh.o sys_sh32.o \
+	   ptrace_32.o setup.o signal_32.o sys_sh.o sys_sh32.o \
 	   syscalls_32.o time_32.o topology.o traps.o traps_32.o
 
 obj-y				+= cpu/ timers/
diff --git a/arch/sh/kernel/Makefile_64 b/arch/sh/kernel/Makefile_64
index e01283d49cbf..6edf53b93d94 100644
--- a/arch/sh/kernel/Makefile_64
+++ b/arch/sh/kernel/Makefile_64
@@ -1,7 +1,7 @@
 extra-y	:= head_64.o init_task.o vmlinux.lds
 
 obj-y	:= debugtraps.o io.o io_generic.o irq.o machvec.o process_64.o \
-	   ptrace_64.o semaphore.o setup.o signal_64.o sys_sh.o sys_sh64.o \
+	   ptrace_64.o setup.o signal_64.o sys_sh.o sys_sh64.o \
 	   syscalls_64.o time_64.o topology.o traps.o traps_64.o
 
 obj-y				+= cpu/ timers/
diff --git a/arch/sh/kernel/semaphore.c b/arch/sh/kernel/semaphore.c
deleted file mode 100644
index 184119eeae56..000000000000
--- a/arch/sh/kernel/semaphore.c
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Just taken from alpha implementation.
- * This can't work well, perhaps.
- */
-/*
- *  Generic semaphore code. Buyer beware. Do your own
- * specific changes in <asm/semaphore-helper.h>
- */
-
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/wait.h>
-#include <linux/init.h>
-#include <asm/semaphore.h>
-#include <asm/semaphore-helper.h>
-
-DEFINE_SPINLOCK(semaphore_wake_lock);
-
-/*
- * Semaphores are implemented using a two-way counter:
- * The "count" variable is decremented for each process
- * that tries to sleep, while the "waking" variable is
- * incremented when the "up()" code goes to wake up waiting
- * processes.
- *
- * Notably, the inline "up()" and "down()" functions can
- * efficiently test if they need to do any extra work (up
- * needs to do something only if count was negative before
- * the increment operation.
- *
- * waking_non_zero() (from asm/semaphore.h) must execute
- * atomically.
- *
- * When __up() is called, the count was negative before
- * incrementing it, and we need to wake up somebody.
- *
- * This routine adds one to the count of processes that need to
- * wake up and exit.  ALL waiting processes actually wake up but
- * only the one that gets to the "waking" field first will gate
- * through and acquire the semaphore.  The others will go back
- * to sleep.
- *
- * Note that these functions are only called when there is
- * contention on the lock, and as such all this is the
- * "non-critical" part of the whole semaphore business. The
- * critical part is the inline stuff in <asm/semaphore.h>
- * where we want to avoid any extra jumps and calls.
- */
-void __up(struct semaphore *sem)
-{
-	wake_one_more(sem);
-	wake_up(&sem->wait);
-}
-
-/*
- * Perform the "down" function.  Return zero for semaphore acquired,
- * return negative for signalled out of the function.
- *
- * If called from __down, the return is ignored and the wait loop is
- * not interruptible.  This means that a task waiting on a semaphore
- * using "down()" cannot be killed until someone does an "up()" on
- * the semaphore.
- *
- * If called from __down_interruptible, the return value gets checked
- * upon return.  If the return value is negative then the task continues
- * with the negative value in the return register (it can be tested by
- * the caller).
- *
- * Either form may be used in conjunction with "up()".
- *
- */
-
-#define DOWN_VAR				\
-	struct task_struct *tsk = current;	\
-	wait_queue_t wait;			\
-	init_waitqueue_entry(&wait, tsk);
-
-#define DOWN_HEAD(task_state)						\
-									\
-									\
-	tsk->state = (task_state);					\
-	add_wait_queue(&sem->wait, &wait);				\
-									\
-	/*								\
-	 * Ok, we're set up.  sem->count is known to be less than zero	\
-	 * so we must wait.						\
-	 *								\
-	 * We can let go the lock for purposes of waiting.		\
-	 * We re-acquire it after awaking so as to protect		\
-	 * all semaphore operations.					\
-	 *								\
-	 * If "up()" is called before we call waking_non_zero() then	\
-	 * we will catch it right away.  If it is called later then	\
-	 * we will have to go through a wakeup cycle to catch it.	\
-	 *								\
-	 * Multiple waiters contend for the semaphore lock to see	\
-	 * who gets to gate through and who has to wait some more.	\
-	 */								\
-	for (;;) {
-
-#define DOWN_TAIL(task_state)			\
-		tsk->state = (task_state);	\
-	}					\
-	tsk->state = TASK_RUNNING;		\
-	remove_wait_queue(&sem->wait, &wait);
-
-void __sched __down(struct semaphore * sem)
-{
-	DOWN_VAR
-	DOWN_HEAD(TASK_UNINTERRUPTIBLE)
-	if (waking_non_zero(sem))
-		break;
-	schedule();
-	DOWN_TAIL(TASK_UNINTERRUPTIBLE)
-}
-
-int __sched __down_interruptible(struct semaphore * sem)
-{
-	int ret = 0;
-	DOWN_VAR
-	DOWN_HEAD(TASK_INTERRUPTIBLE)
-
-	ret = waking_non_zero_interruptible(sem, tsk);
-	if (ret)
-	{
-		if (ret == 1)
-			/* ret != 0 only if we get interrupted -arca */
-			ret = 0;
-		break;
-	}
-	schedule();
-	DOWN_TAIL(TASK_INTERRUPTIBLE)
-	return ret;
-}
-
-int __down_trylock(struct semaphore * sem)
-{
-	return waking_non_zero_trylock(sem);
-}
diff --git a/arch/sh/kernel/sh_ksyms_32.c b/arch/sh/kernel/sh_ksyms_32.c
index 45bb333fd9ec..6d405462cee8 100644
--- a/arch/sh/kernel/sh_ksyms_32.c
+++ b/arch/sh/kernel/sh_ksyms_32.c
@@ -9,7 +9,6 @@
 #include <linux/pci.h>
 #include <linux/irq.h>
 #include <asm/sections.h>
-#include <asm/semaphore.h>
 #include <asm/processor.h>
 #include <asm/uaccess.h>
 #include <asm/checksum.h>
@@ -48,12 +47,6 @@ EXPORT_SYMBOL(__copy_user);
 EXPORT_SYMBOL(get_vm_area);
 #endif
 
-/* semaphore exports */
-EXPORT_SYMBOL(__up);
-EXPORT_SYMBOL(__down);
-EXPORT_SYMBOL(__down_interruptible);
-EXPORT_SYMBOL(__down_trylock);
-
 EXPORT_SYMBOL(__udelay);
 EXPORT_SYMBOL(__ndelay);
 EXPORT_SYMBOL(__const_udelay);
diff --git a/arch/sh/kernel/sh_ksyms_64.c b/arch/sh/kernel/sh_ksyms_64.c
index b6410ce4bd1d..a310c9707f03 100644
--- a/arch/sh/kernel/sh_ksyms_64.c
+++ b/arch/sh/kernel/sh_ksyms_64.c
@@ -16,7 +16,6 @@
 #include <linux/in6.h>
 #include <linux/interrupt.h>
 #include <linux/screen_info.h>
-#include <asm/semaphore.h>
 #include <asm/processor.h>
 #include <asm/uaccess.h>
 #include <asm/checksum.h>
@@ -37,9 +36,6 @@ EXPORT_SYMBOL(csum_partial_copy_nocheck);
 EXPORT_SYMBOL(screen_info);
 #endif
 
-EXPORT_SYMBOL(__down);
-EXPORT_SYMBOL(__down_trylock);
-EXPORT_SYMBOL(__up);
 EXPORT_SYMBOL(__put_user_asm_l);
 EXPORT_SYMBOL(__get_user_asm_l);
 EXPORT_SYMBOL(copy_page);
diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile
index bf1b15d3f6f5..2712bb166f6f 100644
--- a/arch/sparc/kernel/Makefile
+++ b/arch/sparc/kernel/Makefile
@@ -12,7 +12,7 @@ obj-y    := entry.o wof.o wuf.o etrap.o rtrap.o traps.o $(IRQ_OBJS) \
 	    sys_sparc.o sunos_asm.o systbls.o \
 	    time.o windows.o cpu.o devices.o sclow.o \
 	    tadpole.o tick14.o ptrace.o sys_solaris.o \
-	    unaligned.o una_asm.o muldiv.o semaphore.o \
+	    unaligned.o una_asm.o muldiv.o \
 	    prom.o of_device.o devres.o
 
 devres-y = ../../../kernel/irq/devres.o
diff --git a/arch/sparc/kernel/semaphore.c b/arch/sparc/kernel/semaphore.c
deleted file mode 100644
index 0c37c1a7cd7e..000000000000
--- a/arch/sparc/kernel/semaphore.c
+++ /dev/null
@@ -1,155 +0,0 @@
-/* $Id: semaphore.c,v 1.7 2001/04/18 21:06:05 davem Exp $ */
-
-/* sparc32 semaphore implementation, based on i386 version */
-
-#include <linux/sched.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-
-#include <asm/semaphore.h>
-
-/*
- * Semaphores are implemented using a two-way counter:
- * The "count" variable is decremented for each process
- * that tries to acquire the semaphore, while the "sleeping"
- * variable is a count of such acquires.
- *
- * Notably, the inline "up()" and "down()" functions can
- * efficiently test if they need to do any extra work (up
- * needs to do something only if count was negative before
- * the increment operation.
- *
- * "sleeping" and the contention routine ordering is
- * protected by the semaphore spinlock.
- *
- * Note that these functions are only called when there is
- * contention on the lock, and as such all this is the
- * "non-critical" part of the whole semaphore business. The
- * critical part is the inline stuff in <asm/semaphore.h>
- * where we want to avoid any extra jumps and calls.
- */
-
-/*
- * Logic:
- *  - only on a boundary condition do we need to care. When we go
- *    from a negative count to a non-negative, we wake people up.
- *  - when we go from a non-negative count to a negative do we
- *    (a) synchronize with the "sleeper" count and (b) make sure
- *    that we're on the wakeup list before we synchronize so that
- *    we cannot lose wakeup events.
- */
-
-void __up(struct semaphore *sem)
-{
-	wake_up(&sem->wait);
-}
-
-static DEFINE_SPINLOCK(semaphore_lock);
-
-void __sched __down(struct semaphore * sem)
-{
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-	tsk->state = TASK_UNINTERRUPTIBLE;
-	add_wait_queue_exclusive(&sem->wait, &wait);
-
-	spin_lock_irq(&semaphore_lock);
-	sem->sleepers++;
-	for (;;) {
-		int sleepers = sem->sleepers;
-
-		/*
-		 * Add "everybody else" into it. They aren't
-		 * playing, because we own the spinlock.
-		 */
-		if (!atomic24_add_negative(sleepers - 1, &sem->count)) {
-			sem->sleepers = 0;
-			break;
-		}
-		sem->sleepers = 1;	/* us - see -1 above */
-		spin_unlock_irq(&semaphore_lock);
-
-		schedule();
-		tsk->state = TASK_UNINTERRUPTIBLE;
-		spin_lock_irq(&semaphore_lock);
-	}
-	spin_unlock_irq(&semaphore_lock);
-	remove_wait_queue(&sem->wait, &wait);
-	tsk->state = TASK_RUNNING;
-	wake_up(&sem->wait);
-}
-
-int __sched __down_interruptible(struct semaphore * sem)
-{
-	int retval = 0;
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-	tsk->state = TASK_INTERRUPTIBLE;
-	add_wait_queue_exclusive(&sem->wait, &wait);
-
-	spin_lock_irq(&semaphore_lock);
-	sem->sleepers ++;
-	for (;;) {
-		int sleepers = sem->sleepers;
-
-		/*
-		 * With signals pending, this turns into
-		 * the trylock failure case - we won't be
-		 * sleeping, and we* can't get the lock as
-		 * it has contention. Just correct the count
-		 * and exit.
-		 */
-		if (signal_pending(current)) {
-			retval = -EINTR;
-			sem->sleepers = 0;
-			atomic24_add(sleepers, &sem->count);
-			break;
-		}
-
-		/*
-		 * Add "everybody else" into it. They aren't
-		 * playing, because we own the spinlock. The
-		 * "-1" is because we're still hoping to get
-		 * the lock.
-		 */
-		if (!atomic24_add_negative(sleepers - 1, &sem->count)) {
-			sem->sleepers = 0;
-			break;
-		}
-		sem->sleepers = 1;	/* us - see -1 above */
-		spin_unlock_irq(&semaphore_lock);
-
-		schedule();
-		tsk->state = TASK_INTERRUPTIBLE;
-		spin_lock_irq(&semaphore_lock);
-	}
-	spin_unlock_irq(&semaphore_lock);
-	tsk->state = TASK_RUNNING;
-	remove_wait_queue(&sem->wait, &wait);
-	wake_up(&sem->wait);
-	return retval;
-}
-
-/*
- * Trylock failed - make sure we correct for
- * having decremented the count.
- */
-int __down_trylock(struct semaphore * sem)
-{
-	int sleepers;
-	unsigned long flags;
-
-	spin_lock_irqsave(&semaphore_lock, flags);
-	sleepers = sem->sleepers + 1;
-	sem->sleepers = 0;
-
-	/*
-	 * Add "everybody else" and us into it. They aren't
-	 * playing, because we own the spinlock.
-	 */
-	if (!atomic24_add_negative(sleepers, &sem->count))
-		wake_up(&sem->wait);
-
-	spin_unlock_irqrestore(&semaphore_lock, flags);
-	return 1;
-}
diff --git a/arch/sparc/kernel/sparc_ksyms.c b/arch/sparc/kernel/sparc_ksyms.c
index c1025e551650..97b1de0e9094 100644
--- a/arch/sparc/kernel/sparc_ksyms.c
+++ b/arch/sparc/kernel/sparc_ksyms.c
@@ -107,11 +107,6 @@ EXPORT_SYMBOL(___rw_read_try);
 EXPORT_SYMBOL(___rw_read_exit);
 EXPORT_SYMBOL(___rw_write_enter);
 #endif
-/* semaphores */
-EXPORT_SYMBOL(__up);
-EXPORT_SYMBOL(__down);
-EXPORT_SYMBOL(__down_trylock);
-EXPORT_SYMBOL(__down_interruptible);
 
 EXPORT_SYMBOL(sparc_valid_addr_bitmap);
 EXPORT_SYMBOL(phys_base);
diff --git a/arch/sparc64/kernel/Makefile b/arch/sparc64/kernel/Makefile
index 1bf5b187de49..459462e80a12 100644
--- a/arch/sparc64/kernel/Makefile
+++ b/arch/sparc64/kernel/Makefile
@@ -10,7 +10,7 @@ extra-y		:= head.o init_task.o vmlinux.lds
 obj-y		:= process.o setup.o cpu.o idprom.o \
 		   traps.o auxio.o una_asm.o sysfs.o iommu.o \
 		   irq.o ptrace.o time.o sys_sparc.o signal.o \
-		   unaligned.o central.o pci.o starfire.o semaphore.o \
+		   unaligned.o central.o pci.o starfire.o \
 		   power.o sbus.o sparc64_ksyms.o chmc.o \
 		   visemul.o prom.o of_device.o hvapi.o sstate.o mdesc.o
 
diff --git a/arch/sparc64/kernel/semaphore.c b/arch/sparc64/kernel/semaphore.c
deleted file mode 100644
index 9974a6899551..000000000000
--- a/arch/sparc64/kernel/semaphore.c
+++ /dev/null
@@ -1,254 +0,0 @@
-/* semaphore.c: Sparc64 semaphore implementation.
- *
- * This is basically the PPC semaphore scheme ported to use
- * the sparc64 atomic instructions, so see the PPC code for
- * credits.
- */
-
-#include <linux/sched.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-
-/*
- * Atomically update sem->count.
- * This does the equivalent of the following:
- *
- *	old_count = sem->count;
- *	tmp = MAX(old_count, 0) + incr;
- *	sem->count = tmp;
- *	return old_count;
- */
-static inline int __sem_update_count(struct semaphore *sem, int incr)
-{
-	int old_count, tmp;
-
-	__asm__ __volatile__("\n"
-"	! __sem_update_count old_count(%0) tmp(%1) incr(%4) &sem->count(%3)\n"
-"1:	ldsw	[%3], %0\n"
-"	mov	%0, %1\n"
-"	cmp	%0, 0\n"
-"	movl	%%icc, 0, %1\n"
-"	add	%1, %4, %1\n"
-"	cas	[%3], %0, %1\n"
-"	cmp	%0, %1\n"
-"	membar	#StoreLoad | #StoreStore\n"
-"	bne,pn	%%icc, 1b\n"
-"	 nop\n"
-	: "=&r" (old_count), "=&r" (tmp), "=m" (sem->count)
-	: "r" (&sem->count), "r" (incr), "m" (sem->count)
-	: "cc");
-
-	return old_count;
-}
-
-static void __up(struct semaphore *sem)
-{
-	__sem_update_count(sem, 1);
-	wake_up(&sem->wait);
-}
-
-void up(struct semaphore *sem)
-{
-	/* This atomically does:
-	 * 	old_val = sem->count;
-	 *	new_val = sem->count + 1;
-	 *	sem->count = new_val;
-	 *	if (old_val < 0)
-	 *		__up(sem);
-	 *
-	 * The (old_val < 0) test is equivalent to
-	 * the more straightforward (new_val <= 0),
-	 * but it is easier to test the former because
-	 * of how the CAS instruction works.
-	 */
-
-	__asm__ __volatile__("\n"
-"	! up sem(%0)\n"
-"	membar	#StoreLoad | #LoadLoad\n"
-"1:	lduw	[%0], %%g1\n"
-"	add	%%g1, 1, %%g7\n"
-"	cas	[%0], %%g1, %%g7\n"
-"	cmp	%%g1, %%g7\n"
-"	bne,pn	%%icc, 1b\n"
-"	 addcc	%%g7, 1, %%g0\n"
-"	membar	#StoreLoad | #StoreStore\n"
-"	ble,pn	%%icc, 3f\n"
-"	 nop\n"
-"2:\n"
-"	.subsection 2\n"
-"3:	mov	%0, %%g1\n"
-"	save	%%sp, -160, %%sp\n"
-"	call	%1\n"
-"	 mov	%%g1, %%o0\n"
-"	ba,pt	%%xcc, 2b\n"
-"	 restore\n"
-"	.previous\n"
-	: : "r" (sem), "i" (__up)
-	: "g1", "g2", "g3", "g7", "memory", "cc");
-}
-
-static void __sched __down(struct semaphore * sem)
-{
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-
-	tsk->state = TASK_UNINTERRUPTIBLE;
-	add_wait_queue_exclusive(&sem->wait, &wait);
-
-	while (__sem_update_count(sem, -1) <= 0) {
-		schedule();
-		tsk->state = TASK_UNINTERRUPTIBLE;
-	}
-	remove_wait_queue(&sem->wait, &wait);
-	tsk->state = TASK_RUNNING;
-
-	wake_up(&sem->wait);
-}
-
-void __sched down(struct semaphore *sem)
-{
-	might_sleep();
-	/* This atomically does:
-	 * 	old_val = sem->count;
-	 *	new_val = sem->count - 1;
-	 *	sem->count = new_val;
-	 *	if (old_val < 1)
-	 *		__down(sem);
-	 *
-	 * The (old_val < 1) test is equivalent to
-	 * the more straightforward (new_val < 0),
-	 * but it is easier to test the former because
-	 * of how the CAS instruction works.
-	 */
-
-	__asm__ __volatile__("\n"
-"	! down sem(%0)\n"
-"1:	lduw	[%0], %%g1\n"
-"	sub	%%g1, 1, %%g7\n"
-"	cas	[%0], %%g1, %%g7\n"
-"	cmp	%%g1, %%g7\n"
-"	bne,pn	%%icc, 1b\n"
-"	 cmp	%%g7, 1\n"
-"	membar	#StoreLoad | #StoreStore\n"
-"	bl,pn	%%icc, 3f\n"
-"	 nop\n"
-"2:\n"
-"	.subsection 2\n"
-"3:	mov	%0, %%g1\n"
-"	save	%%sp, -160, %%sp\n"
-"	call	%1\n"
-"	 mov	%%g1, %%o0\n"
-"	ba,pt	%%xcc, 2b\n"
-"	 restore\n"
-"	.previous\n"
-	: : "r" (sem), "i" (__down)
-	: "g1", "g2", "g3", "g7", "memory", "cc");
-}
-
-int down_trylock(struct semaphore *sem)
-{
-	int ret;
-
-	/* This atomically does:
-	 * 	old_val = sem->count;
-	 *	new_val = sem->count - 1;
-	 *	if (old_val < 1) {
-	 *		ret = 1;
-	 *	} else {
-	 *		sem->count = new_val;
-	 *		ret = 0;
-	 *	}
-	 *
-	 * The (old_val < 1) test is equivalent to
-	 * the more straightforward (new_val < 0),
-	 * but it is easier to test the former because
-	 * of how the CAS instruction works.
-	 */
-
-	__asm__ __volatile__("\n"
-"	! down_trylock sem(%1) ret(%0)\n"
-"1:	lduw	[%1], %%g1\n"
-"	sub	%%g1, 1, %%g7\n"
-"	cmp	%%g1, 1\n"
-"	bl,pn	%%icc, 2f\n"
-"	 mov	1, %0\n"
-"	cas	[%1], %%g1, %%g7\n"
-"	cmp	%%g1, %%g7\n"
-"	bne,pn	%%icc, 1b\n"
-"	 mov	0, %0\n"
-"	membar	#StoreLoad | #StoreStore\n"
-"2:\n"
-	: "=&r" (ret)
-	: "r" (sem)
-	: "g1", "g7", "memory", "cc");
-
-	return ret;
-}
-
-static int __sched __down_interruptible(struct semaphore * sem)
-{
-	int retval = 0;
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-
-	tsk->state = TASK_INTERRUPTIBLE;
-	add_wait_queue_exclusive(&sem->wait, &wait);
-
-	while (__sem_update_count(sem, -1) <= 0) {
-		if (signal_pending(current)) {
-			__sem_update_count(sem, 0);
-			retval = -EINTR;
-			break;
-		}
-		schedule();
-		tsk->state = TASK_INTERRUPTIBLE;
-	}
-	tsk->state = TASK_RUNNING;
-	remove_wait_queue(&sem->wait, &wait);
-	wake_up(&sem->wait);
-	return retval;
-}
-
-int __sched down_interruptible(struct semaphore *sem)
-{
-	int ret = 0;
-	
-	might_sleep();
-	/* This atomically does:
-	 * 	old_val = sem->count;
-	 *	new_val = sem->count - 1;
-	 *	sem->count = new_val;
-	 *	if (old_val < 1)
-	 *		ret = __down_interruptible(sem);
-	 *
-	 * The (old_val < 1) test is equivalent to
-	 * the more straightforward (new_val < 0),
-	 * but it is easier to test the former because
-	 * of how the CAS instruction works.
-	 */
-
-	__asm__ __volatile__("\n"
-"	! down_interruptible sem(%2) ret(%0)\n"
-"1:	lduw	[%2], %%g1\n"
-"	sub	%%g1, 1, %%g7\n"
-"	cas	[%2], %%g1, %%g7\n"
-"	cmp	%%g1, %%g7\n"
-"	bne,pn	%%icc, 1b\n"
-"	 cmp	%%g7, 1\n"
-"	membar	#StoreLoad | #StoreStore\n"
-"	bl,pn	%%icc, 3f\n"
-"	 nop\n"
-"2:\n"
-"	.subsection 2\n"
-"3:	mov	%2, %%g1\n"
-"	save	%%sp, -160, %%sp\n"
-"	call	%3\n"
-"	 mov	%%g1, %%o0\n"
-"	ba,pt	%%xcc, 2b\n"
-"	 restore\n"
-"	.previous\n"
-	: "=r" (ret)
-	: "0" (ret), "r" (sem), "i" (__down_interruptible)
-	: "g1", "g2", "g3", "g7", "memory", "cc");
-	return ret;
-}
diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c
index 51fa773f38c9..051b8d9cb989 100644
--- a/arch/sparc64/kernel/sparc64_ksyms.c
+++ b/arch/sparc64/kernel/sparc64_ksyms.c
@@ -130,12 +130,6 @@ EXPORT_SYMBOL(_mcount);
 
 EXPORT_SYMBOL(sparc64_get_clock_tick);
 
-/* semaphores */
-EXPORT_SYMBOL(down);
-EXPORT_SYMBOL(down_trylock);
-EXPORT_SYMBOL(down_interruptible);
-EXPORT_SYMBOL(up);
-
 /* RW semaphores */
 EXPORT_SYMBOL(__down_read);
 EXPORT_SYMBOL(__down_read_trylock);
diff --git a/arch/um/Kconfig.i386 b/arch/um/Kconfig.i386
index 3cd8a04d66d8..e09edfa560da 100644
--- a/arch/um/Kconfig.i386
+++ b/arch/um/Kconfig.i386
@@ -19,10 +19,6 @@ config 64BIT
 	bool
 	default n
 
-config SEMAPHORE_SLEEPERS
-	bool
-	default y
-
 config 3_LEVEL_PGTABLES
 	bool "Three-level pagetables (EXPERIMENTAL)"
 	default n
diff --git a/arch/um/Kconfig.x86_64 b/arch/um/Kconfig.x86_64
index 6533b349f061..3fbe69e359ed 100644
--- a/arch/um/Kconfig.x86_64
+++ b/arch/um/Kconfig.x86_64
@@ -11,10 +11,6 @@ config RWSEM_GENERIC_SPINLOCK
 	bool
 	default y
 
-config SEMAPHORE_SLEEPERS
-	bool
-	default y
-
 config 3_LEVEL_PGTABLES
        bool
        default y
diff --git a/arch/um/sys-i386/ksyms.c b/arch/um/sys-i386/ksyms.c
index 2a1eac1859ce..bfbefd30db8f 100644
--- a/arch/um/sys-i386/ksyms.c
+++ b/arch/um/sys-i386/ksyms.c
@@ -1,17 +1,5 @@
 #include "linux/module.h"
-#include "linux/in6.h"
-#include "linux/rwsem.h"
-#include "asm/byteorder.h"
-#include "asm/delay.h"
-#include "asm/semaphore.h"
-#include "asm/uaccess.h"
 #include "asm/checksum.h"
-#include "asm/errno.h"
-
-EXPORT_SYMBOL(__down_failed);
-EXPORT_SYMBOL(__down_failed_interruptible);
-EXPORT_SYMBOL(__down_failed_trylock);
-EXPORT_SYMBOL(__up_wakeup);
 
 /* Networking helper routines. */
 EXPORT_SYMBOL(csum_partial);
diff --git a/arch/um/sys-ppc/Makefile b/arch/um/sys-ppc/Makefile
index 08901526e893..b8bc844fd2c4 100644
--- a/arch/um/sys-ppc/Makefile
+++ b/arch/um/sys-ppc/Makefile
@@ -3,7 +3,7 @@ OBJ = built-in.o
 .S.o:
 	$(CC) $(KBUILD_AFLAGS) -D__ASSEMBLY__ -D__UM_PPC__ -c $< -o $*.o
 
-OBJS = ptrace.o sigcontext.o semaphore.o checksum.o miscthings.o misc.o \
+OBJS = ptrace.o sigcontext.o checksum.o miscthings.o misc.o \
 	ptrace_user.o sysrq.o
 
 EXTRA_AFLAGS := -DCONFIG_PPC32 -I. -I$(srctree)/arch/ppc/kernel
@@ -20,10 +20,6 @@ ptrace_user.o: ptrace_user.c
 sigcontext.o: sigcontext.c
 	$(CC) $(USER_CFLAGS) $(EXTRA_CFLAGS) -c -o $@ $<
 
-semaphore.c:
-	rm -f $@
-	ln -s $(srctree)/arch/ppc/kernel/$@ $@
-
 checksum.S:
 	rm -f $@
 	ln -s $(srctree)/arch/ppc/lib/$@ $@
@@ -66,4 +62,4 @@ misc.o: misc.S ppc_defs.h
 	$(CC) $(EXTRA_AFLAGS) $(KBUILD_AFLAGS) -D__ASSEMBLY__ -D__UM_PPC__ -c $< -o $*.o
 	rm -f asm
 
-clean-files := $(OBJS) ppc_defs.h checksum.S semaphore.c mk_defs.c
+clean-files := $(OBJS) ppc_defs.h checksum.S mk_defs.c
diff --git a/arch/um/sys-x86_64/ksyms.c b/arch/um/sys-x86_64/ksyms.c
index 12c593607c59..4d7d1a812d8f 100644
--- a/arch/um/sys-x86_64/ksyms.c
+++ b/arch/um/sys-x86_64/ksyms.c
@@ -1,16 +1,5 @@
 #include "linux/module.h"
-#include "linux/in6.h"
-#include "linux/rwsem.h"
-#include "asm/byteorder.h"
-#include "asm/semaphore.h"
-#include "asm/uaccess.h"
-#include "asm/checksum.h"
-#include "asm/errno.h"
-
-EXPORT_SYMBOL(__down_failed);
-EXPORT_SYMBOL(__down_failed_interruptible);
-EXPORT_SYMBOL(__down_failed_trylock);
-EXPORT_SYMBOL(__up_wakeup);
+#include "asm/string.h"
 
 /*XXX: we need them because they would be exported by x86_64 */
 EXPORT_SYMBOL(__memcpy);
diff --git a/arch/v850/kernel/Makefile b/arch/v850/kernel/Makefile
index 3930482bddc4..da5889c53576 100644
--- a/arch/v850/kernel/Makefile
+++ b/arch/v850/kernel/Makefile
@@ -11,7 +11,7 @@
 
 extra-y := head.o init_task.o vmlinux.lds
 
-obj-y += intv.o entry.o process.o syscalls.o time.o semaphore.o setup.o \
+obj-y += intv.o entry.o process.o syscalls.o time.o setup.o \
 	 signal.o irq.o mach.o ptrace.o bug.o
 obj-$(CONFIG_MODULES)		+= module.o v850_ksyms.o
 # chip-specific code
diff --git a/arch/v850/kernel/semaphore.c b/arch/v850/kernel/semaphore.c
deleted file mode 100644
index fc89fd661c99..000000000000
--- a/arch/v850/kernel/semaphore.c
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * arch/v850/kernel/semaphore.c -- Semaphore support
- *
- *  Copyright (C) 1998-2000  IBM Corporation
- *  Copyright (C) 1999  Linus Torvalds
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- *
- * This file is a copy of the s390 version, arch/s390/kernel/semaphore.c
- *    Author(s): Martin Schwidefsky
- * which was derived from the i386 version, linux/arch/i386/kernel/semaphore.c
- */
-
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-
-#include <asm/semaphore.h>
-
-/*
- * Semaphores are implemented using a two-way counter:
- * The "count" variable is decremented for each process
- * that tries to acquire the semaphore, while the "sleeping"
- * variable is a count of such acquires.
- *
- * Notably, the inline "up()" and "down()" functions can
- * efficiently test if they need to do any extra work (up
- * needs to do something only if count was negative before
- * the increment operation.
- *
- * "sleeping" and the contention routine ordering is
- * protected by the semaphore spinlock.
- *
- * Note that these functions are only called when there is
- * contention on the lock, and as such all this is the
- * "non-critical" part of the whole semaphore business. The
- * critical part is the inline stuff in <asm/semaphore.h>
- * where we want to avoid any extra jumps and calls.
- */
-
-/*
- * Logic:
- *  - only on a boundary condition do we need to care. When we go
- *    from a negative count to a non-negative, we wake people up.
- *  - when we go from a non-negative count to a negative do we
- *    (a) synchronize with the "sleeper" count and (b) make sure
- *    that we're on the wakeup list before we synchronize so that
- *    we cannot lose wakeup events.
- */
-
-void __up(struct semaphore *sem)
-{
-	wake_up(&sem->wait);
-}
-
-static DEFINE_SPINLOCK(semaphore_lock);
-
-void __sched __down(struct semaphore * sem)
-{
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-	tsk->state = TASK_UNINTERRUPTIBLE;
-	add_wait_queue_exclusive(&sem->wait, &wait);
-
-	spin_lock_irq(&semaphore_lock);
-	sem->sleepers++;
-	for (;;) {
-		int sleepers = sem->sleepers;
-
-		/*
-		 * Add "everybody else" into it. They aren't
-		 * playing, because we own the spinlock.
-		 */
-		if (!atomic_add_negative(sleepers - 1, &sem->count)) {
-			sem->sleepers = 0;
-			break;
-		}
-		sem->sleepers = 1;	/* us - see -1 above */
-		spin_unlock_irq(&semaphore_lock);
-
-		schedule();
-		tsk->state = TASK_UNINTERRUPTIBLE;
-		spin_lock_irq(&semaphore_lock);
-	}
-	spin_unlock_irq(&semaphore_lock);
-	remove_wait_queue(&sem->wait, &wait);
-	tsk->state = TASK_RUNNING;
-	wake_up(&sem->wait);
-}
-
-int __sched __down_interruptible(struct semaphore * sem)
-{
-	int retval = 0;
-	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
-	tsk->state = TASK_INTERRUPTIBLE;
-	add_wait_queue_exclusive(&sem->wait, &wait);
-
-	spin_lock_irq(&semaphore_lock);
-	sem->sleepers ++;
-	for (;;) {
-		int sleepers = sem->sleepers;
-
-		/*
-		 * With signals pending, this turns into
-		 * the trylock failure case - we won't be
-		 * sleeping, and we* can't get the lock as
-		 * it has contention. Just correct the count
-		 * and exit.
-		 */
-		if (signal_pending(current)) {
-			retval = -EINTR;
-			sem->sleepers = 0;
-			atomic_add(sleepers, &sem->count);
-			break;
-		}
-
-		/*
-		 * Add "everybody else" into it. They aren't
-		 * playing, because we own the spinlock. The
-		 * "-1" is because we're still hoping to get
-		 * the lock.
-		 */
-		if (!atomic_add_negative(sleepers - 1, &sem->count)) {
-			sem->sleepers = 0;
-			break;
-		}
-		sem->sleepers = 1;	/* us - see -1 above */
-		spin_unlock_irq(&semaphore_lock);
-
-		schedule();
-		tsk->state = TASK_INTERRUPTIBLE;
-		spin_lock_irq(&semaphore_lock);
-	}
-	spin_unlock_irq(&semaphore_lock);
-	tsk->state = TASK_RUNNING;
-	remove_wait_queue(&sem->wait, &wait);
-	wake_up(&sem->wait);
-	return retval;
-}
-
-/*
- * Trylock failed - make sure we correct for
- * having decremented the count.
- */
-int __down_trylock(struct semaphore * sem)
-{
-        unsigned long flags;
-	int sleepers;
-
-	spin_lock_irqsave(&semaphore_lock, flags);
-	sleepers = sem->sleepers + 1;
-	sem->sleepers = 0;
-
-	/*
-	 * Add "everybody else" and us into it. They aren't
-	 * playing, because we own the spinlock.
-	 */
-	if (!atomic_add_negative(sleepers, &sem->count))
-		wake_up(&sem->wait);
-
-	spin_unlock_irqrestore(&semaphore_lock, flags);
-	return 1;
-}
diff --git a/arch/v850/kernel/v850_ksyms.c b/arch/v850/kernel/v850_ksyms.c
index 93575fdc874d..8d386a5dbc4a 100644
--- a/arch/v850/kernel/v850_ksyms.c
+++ b/arch/v850/kernel/v850_ksyms.c
@@ -11,7 +11,6 @@
 #include <asm/pgalloc.h>
 #include <asm/irq.h>
 #include <asm/io.h>
-#include <asm/semaphore.h>
 #include <asm/checksum.h>
 #include <asm/current.h>
 
@@ -34,12 +33,6 @@ EXPORT_SYMBOL (memset);
 EXPORT_SYMBOL (memcpy);
 EXPORT_SYMBOL (memmove);
 
-/* semaphores */
-EXPORT_SYMBOL (__down);
-EXPORT_SYMBOL (__down_interruptible);
-EXPORT_SYMBOL (__down_trylock);
-EXPORT_SYMBOL (__up);
-
 /*
  * libgcc functions - functions that are used internally by the
  * compiler...  (prototypes are not correct though, but that
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 6c70fed0f9a0..2a59dbb28248 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -23,6 +23,7 @@ config X86
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES
 	select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
+	select HAVE_ARCH_KGDB
 
 
 config GENERIC_LOCKBREAK
@@ -53,9 +54,6 @@ config STACKTRACE_SUPPORT
 config HAVE_LATENCYTOP_SUPPORT
 	def_bool y
 
-config SEMAPHORE_SLEEPERS
-	def_bool y
-
 config FAST_CMPXCHG_LOCAL
 	bool
 	default y
@@ -117,7 +115,7 @@ config ARCH_HAS_CPU_RELAX
 	def_bool y
 
 config HAVE_SETUP_PER_CPU_AREA
-	def_bool X86_64
+	def_bool X86_64 || (X86_SMP && !X86_VOYAGER)
 
 config ARCH_HIBERNATION_POSSIBLE
 	def_bool y
@@ -171,7 +169,7 @@ config X86_64_SMP
 config X86_HT
 	bool
 	depends on SMP
-	depends on (X86_32 && !(X86_VISWS || X86_VOYAGER)) || (X86_64 && !MK8)
+	depends on (X86_32 && !(X86_VISWS || X86_VOYAGER)) || X86_64
 	default y
 
 config X86_BIOS_REBOOT
@@ -181,7 +179,7 @@ config X86_BIOS_REBOOT
 
 config X86_TRAMPOLINE
 	bool
-	depends on X86_SMP || (X86_VOYAGER && SMP)
+	depends on X86_SMP || (X86_VOYAGER && SMP) || (64BIT && ACPI_SLEEP)
 	default y
 
 config KTIME_SCALAR
@@ -241,8 +239,7 @@ config X86_ELAN
 
 config X86_VOYAGER
 	bool "Voyager (NCR)"
-	depends on X86_32
-	select SMP if !BROKEN
+	depends on X86_32 && (SMP || BROKEN)
 	help
 	  Voyager is an MCA-based 32-way capable SMP architecture proprietary
 	  to NCR Corp.  Machine classes 345x/35xx/4100/51xx are Voyager-based.
@@ -254,9 +251,8 @@ config X86_VOYAGER
 
 config X86_NUMAQ
 	bool "NUMAQ (IBM/Sequent)"
-	select SMP
+	depends on SMP && X86_32
 	select NUMA
-	depends on X86_32
 	help
 	  This option is used for getting Linux to run on a (IBM/Sequent) NUMA
 	  multiquad box. This changes the way that processors are bootstrapped,
@@ -327,8 +323,9 @@ config X86_RDC321X
 
 config X86_VSMP
 	bool "Support for ScaleMP vSMP"
-	depends on X86_64 && PCI
-	 help
+	select PARAVIRT
+	depends on X86_64
+	help
 	  Support for ScaleMP vSMP systems.  Say 'Y' here if this kernel is
 	  supposed to run on these EM64T-based machines.  Only choose this option
 	  if you have one of these machines.
@@ -383,6 +380,35 @@ config PARAVIRT
 
 endif
 
+config MEMTEST_BOOTPARAM
+	bool "Memtest boot parameter"
+	depends on X86_64
+	default y
+	help
+	  This option adds a kernel parameter 'memtest', which allows memtest
+	  to be disabled at boot.  If this option is selected, memtest
+	  functionality can be disabled with memtest=0 on the kernel
+	  command line.  The purpose of this option is to allow a single
+	  kernel image to be distributed with memtest built in, but not
+	  necessarily enabled.
+
+	  If you are unsure how to answer this question, answer Y.
+
+config MEMTEST_BOOTPARAM_VALUE
+	int "Memtest boot parameter default value (0-4)"
+	depends on MEMTEST_BOOTPARAM
+	range 0 4
+	default 0
+	help
+	  This option sets the default value for the kernel parameter
+	  'memtest', which allows memtest to be disabled at boot.  If this
+	  option is set to 0 (zero), the memtest kernel parameter will
+	  default to 0, disabling memtest at bootup.  If this option is
+	  set to 4, the memtest kernel parameter will default to 4,
+	  enabling memtest at bootup, and use that as pattern number.
+
+	  If you are unsure how to answer this question, answer 0.
+
 config ACPI_SRAT
 	def_bool y
 	depends on X86_32 && ACPI && NUMA && (X86_SUMMIT || X86_GENERICARCH)
@@ -507,7 +533,7 @@ config NR_CPUS
 
 config SCHED_SMT
 	bool "SMT (Hyperthreading) scheduler support"
-	depends on (X86_64 && SMP) || (X86_32 && X86_HT)
+	depends on X86_HT
 	help
 	  SMT scheduler support improves the CPU scheduler's decision making
 	  when dealing with Intel Pentium 4 chips with HyperThreading at a
@@ -517,7 +543,7 @@ config SCHED_SMT
 config SCHED_MC
 	def_bool y
 	prompt "Multi-core scheduler support"
-	depends on (X86_64 && SMP) || (X86_32 && X86_HT)
+	depends on X86_HT
 	help
 	  Multi-core scheduler support improves the CPU scheduler's decision
 	  making when dealing with multi-core CPU chips at a cost of slightly
@@ -886,7 +912,7 @@ config NUMA_EMU
 	  number of nodes. This is only useful for debugging.
 
 config NODES_SHIFT
-	int
+	int "Max num nodes shift(1-15)"
 	range 1 15  if X86_64
 	default "6" if X86_64
 	default "4" if X86_NUMAQ
@@ -1010,6 +1036,21 @@ config MTRR
 
 	  See <file:Documentation/mtrr.txt> for more information.
 
+config X86_PAT
+	def_bool y
+	prompt "x86 PAT support"
+	depends on MTRR && NONPROMISC_DEVMEM
+	help
+	  Use PAT attributes to setup page level cache control.
+
+	  PATs are the modern equivalents of MTRRs and are much more
+	  flexible than MTRRs.
+
+	  Say N here if you see bootup problems (boot crash, boot hang,
+	  spontaneous reboots) or a non-working video driver.
+
+	  If unsure, say Y.
+
 config EFI
 	def_bool n
 	prompt "EFI runtime service support"
@@ -1078,6 +1119,7 @@ source kernel/Kconfig.hz
 
 config KEXEC
 	bool "kexec system call"
+	depends on X86_64 || X86_BIOS_REBOOT
 	help
 	  kexec is a system call that implements the ability to shutdown your
 	  current kernel, and to start another kernel.  It is like a reboot
@@ -1379,7 +1421,7 @@ endmenu
 menu "Bus options (PCI etc.)"
 
 config PCI
-	bool "PCI support" if !X86_VISWS
+	bool "PCI support" if !X86_VISWS && !X86_VSMP
 	depends on !X86_VOYAGER
 	default y
 	select ARCH_SUPPORTS_MSI if (X86_LOCAL_APIC && X86_IO_APIC)
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 9304bfba7d45..57072f2716f9 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -388,7 +388,7 @@ config X86_OOSTORE
 #
 config X86_P6_NOP
 	def_bool y
-	depends on (X86_64 || !X86_GENERIC) && (M686 || MPENTIUMII || MPENTIUMIII || MPENTIUMM || MCORE2 || MPENTIUM4)
+	depends on (X86_64 || !X86_GENERIC) && (M686 || MPENTIUMII || MPENTIUMIII || MPENTIUMM || MCORE2 || MPENTIUM4 || MPSC)
 
 config X86_TSC
 	def_bool y
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 702eb39901ca..610aaecc19f8 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -54,6 +54,18 @@ config DEBUG_PER_CPU_MAPS
 
 	  Say N if unsure.
 
+config X86_PTDUMP
+	bool "Export kernel pagetable layout to userspace via debugfs"
+	depends on DEBUG_KERNEL
+	select DEBUG_FS
+	help
+	  Say Y here if you want to show the kernel pagetable layout in a
+	  debugfs file. This information is only useful for kernel developers
+	  who are working in architecture specific areas of the kernel.
+	  It is probably not a good idea to enable this feature in a production
+	  kernel.
+	  If in doubt, say "N"
+
 config DEBUG_RODATA
 	bool "Write protect kernel read-only data structures"
 	default y
@@ -64,6 +76,18 @@ config DEBUG_RODATA
 	  data. This is recommended so that we can catch kernel bugs sooner.
 	  If in doubt, say "Y".
 
+config DIRECT_GBPAGES
+	bool "Enable gbpages-mapped kernel pagetables"
+	depends on DEBUG_KERNEL && EXPERIMENTAL && X86_64
+	help
+	  Enable gigabyte pages support (if the CPU supports it). This can
+	  improve the kernel's performance a tiny bit by reducing TLB
+	  pressure.
+
+	  This is experimental code.
+
+	  If in doubt, say "N".
+
 config DEBUG_RODATA_TEST
 	bool "Testcase for the DEBUG_RODATA feature"
 	depends on DEBUG_RODATA
@@ -82,8 +106,8 @@ config DEBUG_NX_TEST
 
 config 4KSTACKS
 	bool "Use 4Kb for kernel stacks instead of 8Kb"
-	depends on DEBUG_KERNEL
 	depends on X86_32
+	default y
 	help
 	  If you say Y here the kernel will use a 4Kb stacksize for the
 	  kernel stack attached to each process/thread. This facilitates
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index f1e739a43d41..3cff3c894cf3 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -151,7 +151,6 @@ mflags-y += -Iinclude/asm-x86/mach-default
 # 64 bit does not support subarch support - clear sub arch variables
 fcore-$(CONFIG_X86_64)  :=
 mcore-$(CONFIG_X86_64)  :=
-mflags-$(CONFIG_X86_64) :=
 
 KBUILD_CFLAGS += $(mflags-y)
 KBUILD_AFLAGS += $(mflags-y)
@@ -159,9 +158,9 @@ KBUILD_AFLAGS += $(mflags-y)
 ###
 # Kernel objects
 
-head-y                := arch/x86/kernel/head_$(BITS).o
-head-$(CONFIG_X86_64) += arch/x86/kernel/head64.o
-head-y                += arch/x86/kernel/init_task.o
+head-y := arch/x86/kernel/head_$(BITS).o
+head-y += arch/x86/kernel/head$(BITS).o
+head-y += arch/x86/kernel/init_task.o
 
 libs-y  += arch/x86/lib/
 
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index f88458e83ef0..7ee102f9c4f8 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -30,7 +30,7 @@ subdir-		:= compressed
 
 setup-y		+= a20.o cmdline.o copy.o cpu.o cpucheck.o edd.o
 setup-y		+= header.o main.o mca.o memory.o pm.o pmjump.o
-setup-y		+= printf.o string.o tty.o video.o version.o
+setup-y		+= printf.o string.o tty.o video.o video-mode.o version.o
 setup-$(CONFIG_X86_APM_BOOT) += apm.o
 setup-$(CONFIG_X86_VOYAGER) += voyager.o
 
@@ -94,6 +94,20 @@ $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
 
 SETUP_OBJS = $(addprefix $(obj)/,$(setup-y))
 
+sed-offsets := -e 's/^00*/0/' \
+        -e 's/^\([0-9a-fA-F]*\) . \(input_data\|input_data_end\)$$/\#define \2 0x\1/p'
+
+quiet_cmd_offsets = OFFSETS $@
+      cmd_offsets = $(NM) $< | sed -n $(sed-offsets) > $@
+
+$(obj)/offsets.h: $(obj)/compressed/vmlinux FORCE
+	$(call if_changed,offsets)
+
+targets += offsets.h
+
+AFLAGS_header.o += -I$(obj)
+$(obj)/header.o: $(obj)/offsets.h
+
 LDFLAGS_setup.elf	:= -T
 $(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE
 	$(call if_changed,ld)
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index 7822a4983da2..09578070bfba 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -286,6 +286,11 @@ int getchar_timeout(void);
 /* video.c */
 void set_video(void);
 
+/* video-mode.c */
+int set_mode(u16 mode);
+int mode_defined(u16 mode);
+void probe_cards(int unsafe);
+
 /* video-vesa.c */
 void vesa_store_edid(void);
 
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index d2b9f3bb87c0..92fdd35bd93e 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -22,7 +22,7 @@ $(obj)/vmlinux: $(src)/vmlinux_$(BITS).lds $(obj)/head_$(BITS).o $(obj)/misc.o $
 	$(call if_changed,ld)
 	@:
 
-OBJCOPYFLAGS_vmlinux.bin := -O binary -R .note -R .comment -S
+OBJCOPYFLAGS_vmlinux.bin :=  -R .comment -S
 $(obj)/vmlinux.bin: vmlinux FORCE
 	$(call if_changed,objcopy)
 
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 8182e32c1b42..dad4e699f5a3 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -15,6 +15,10 @@
  * we just keep it from happening
  */
 #undef CONFIG_PARAVIRT
+#ifdef CONFIG_X86_32
+#define _ASM_DESC_H_ 1
+#endif
+
 #ifdef CONFIG_X86_64
 #define _LINUX_STRING_H_ 1
 #define __LINUX_BITMAP_H 1
@@ -22,6 +26,7 @@
 
 #include <linux/linkage.h>
 #include <linux/screen_info.h>
+#include <linux/elf.h>
 #include <asm/io.h>
 #include <asm/page.h>
 #include <asm/boot.h>
@@ -53,8 +58,8 @@
  * 1 bit (last block flag)
  * 2 bits (block type)
  *
- * 1 block occurs every 32K -1 bytes or when there 50% compression has been achieved.
- * The smallest block type encoding is always used.
+ * 1 block occurs every 32K -1 bytes or when there 50% compression
+ * has been achieved. The smallest block type encoding is always used.
  *
  * stored:
  *    32 bits length in bytes.
@@ -90,9 +95,9 @@
  *
  * All of which is enough to compute an amount of extra data that is required
  * to be safe.  To avoid problems at the block level allocating 5 extra bytes
- * per 32767 bytes of data is sufficient.  To avoind problems internal to a block
- * adding an extra 32767 bytes (the worst case uncompressed block size) is
- * sufficient, to ensure that in the worst case the decompressed data for
+ * per 32767 bytes of data is sufficient.  To avoind problems internal to a
+ * block adding an extra 32767 bytes (the worst case uncompressed block size)
+ * is sufficient, to ensure that in the worst case the decompressed data for
  * block will stop the byte before the compressed data for a block begins.
  * To avoid problems with the compressed data's meta information an extra 18
  * bytes are needed.  Leading to the formula:
@@ -111,58 +116,66 @@
  * gzip declarations
  */
 
-#define OF(args)  args
-#define STATIC static
+#define OF(args)	args
+#define STATIC		static
 
 #undef memset
 #undef memcpy
-#define memzero(s, n)     memset ((s), 0, (n))
+#define memzero(s, n)	memset((s), 0, (n))
+
+typedef unsigned char	uch;
+typedef unsigned short	ush;
+typedef unsigned long	ulg;
+
+/*
+ * Window size must be at least 32k, and a power of two.
+ * We don't actually have a window just a huge output buffer,
+ * so we report a 2G window size, as that should always be
+ * larger than our output buffer:
+ */
+#define WSIZE		0x80000000
+
+/* Input buffer: */
+static unsigned char	*inbuf;
 
-typedef unsigned char  uch;
-typedef unsigned short ush;
-typedef unsigned long  ulg;
+/* Sliding window buffer (and final output buffer): */
+static unsigned char	*window;
 
-#define WSIZE 0x80000000	/* Window size must be at least 32k,
-				 * and a power of two
-				 * We don't actually have a window just
-				 * a huge output buffer so I report
-				 * a 2G windows size, as that should
-				 * always be larger than our output buffer.
-				 */
+/* Valid bytes in inbuf: */
+static unsigned		insize;
 
-static uch *inbuf;	/* input buffer */
-static uch *window;	/* Sliding window buffer, (and final output buffer) */
+/* Index of next byte to be processed in inbuf: */
+static unsigned		inptr;
 
-static unsigned insize;  /* valid bytes in inbuf */
-static unsigned inptr;   /* index of next byte to be processed in inbuf */
-static unsigned outcnt;  /* bytes in output buffer */
+/* Bytes in output buffer: */
+static unsigned		outcnt;
 
 /* gzip flag byte */
-#define ASCII_FLAG   0x01 /* bit 0 set: file probably ASCII text */
-#define CONTINUATION 0x02 /* bit 1 set: continuation of multi-part gzip file */
-#define EXTRA_FIELD  0x04 /* bit 2 set: extra field present */
-#define ORIG_NAME    0x08 /* bit 3 set: original file name present */
-#define COMMENT      0x10 /* bit 4 set: file comment present */
-#define ENCRYPTED    0x20 /* bit 5 set: file is encrypted */
-#define RESERVED     0xC0 /* bit 6,7:   reserved */
-
-#define get_byte()  (inptr < insize ? inbuf[inptr++] : fill_inbuf())
-		
+#define ASCII_FLAG	0x01 /* bit 0 set: file probably ASCII text */
+#define CONTINUATION	0x02 /* bit 1 set: continuation of multi-part gz file */
+#define EXTRA_FIELD	0x04 /* bit 2 set: extra field present */
+#define ORIG_NAM	0x08 /* bit 3 set: original file name present */
+#define COMMENT		0x10 /* bit 4 set: file comment present */
+#define ENCRYPTED	0x20 /* bit 5 set: file is encrypted */
+#define RESERVED	0xC0 /* bit 6, 7:  reserved */
+
+#define get_byte()	(inptr < insize ? inbuf[inptr++] : fill_inbuf())
+
 /* Diagnostic functions */
 #ifdef DEBUG
-#  define Assert(cond,msg) {if(!(cond)) error(msg);}
-#  define Trace(x) fprintf x
-#  define Tracev(x) {if (verbose) fprintf x ;}
-#  define Tracevv(x) {if (verbose>1) fprintf x ;}
-#  define Tracec(c,x) {if (verbose && (c)) fprintf x ;}
-#  define Tracecv(c,x) {if (verbose>1 && (c)) fprintf x ;}
+#  define Assert(cond, msg) do { if (!(cond)) error(msg); } while (0)
+#  define Trace(x)	do { fprintf x; } while (0)
+#  define Tracev(x)	do { if (verbose) fprintf x ; } while (0)
+#  define Tracevv(x)	do { if (verbose > 1) fprintf x ; } while (0)
+#  define Tracec(c, x)	do { if (verbose && (c)) fprintf x ; } while (0)
+#  define Tracecv(c, x)	do { if (verbose > 1 && (c)) fprintf x ; } while (0)
 #else
-#  define Assert(cond,msg)
+#  define Assert(cond, msg)
 #  define Trace(x)
 #  define Tracev(x)
 #  define Tracevv(x)
-#  define Tracec(c,x)
-#  define Tracecv(c,x)
+#  define Tracec(c, x)
+#  define Tracecv(c, x)
 #endif
 
 static int  fill_inbuf(void);
@@ -170,7 +183,7 @@ static void flush_window(void);
 static void error(char *m);
 static void gzip_mark(void **);
 static void gzip_release(void **);
-  
+
 /*
  * This is set up by the setup-routine at boot-time
  */
@@ -185,7 +198,7 @@ static unsigned char *real_mode; /* Pointer to real-mode data */
 extern unsigned char input_data[];
 extern int input_len;
 
-static long bytes_out = 0;
+static long bytes_out;
 
 static void *malloc(int size);
 static void free(void *where);
@@ -210,7 +223,7 @@ static memptr free_mem_end_ptr;
 #define HEAP_SIZE             0x4000
 #endif
 
-static char *vidmem = (char *)0xb8000;
+static char *vidmem;
 static int vidport;
 static int lines, cols;
 
@@ -224,8 +237,10 @@ static void *malloc(int size)
 {
 	void *p;
 
-	if (size <0) error("Malloc error");
-	if (free_mem_ptr <= 0) error("Memory error");
+	if (size < 0)
+		error("Malloc error");
+	if (free_mem_ptr <= 0)
+		error("Memory error");
 
 	free_mem_ptr = (free_mem_ptr + 3) & ~3;	/* Align */
 
@@ -251,19 +266,19 @@ static void gzip_release(void **ptr)
 {
 	free_mem_ptr = (memptr) *ptr;
 }
- 
+
 static void scroll(void)
 {
 	int i;
 
-	memcpy ( vidmem, vidmem + cols * 2, ( lines - 1 ) * cols * 2 );
-	for ( i = ( lines - 1 ) * cols * 2; i < lines * cols * 2; i += 2 )
+	memcpy(vidmem, vidmem + cols * 2, (lines - 1) * cols * 2);
+	for (i = (lines - 1) * cols * 2; i < lines * cols * 2; i += 2)
 		vidmem[i] = ' ';
 }
 
 static void putstr(const char *s)
 {
-	int x,y,pos;
+	int x, y, pos;
 	char c;
 
 #ifdef CONFIG_X86_32
@@ -274,18 +289,18 @@ static void putstr(const char *s)
 	x = RM_SCREEN_INFO.orig_x;
 	y = RM_SCREEN_INFO.orig_y;
 
-	while ( ( c = *s++ ) != '\0' ) {
-		if ( c == '\n' ) {
+	while ((c = *s++) != '\0') {
+		if (c == '\n') {
 			x = 0;
-			if ( ++y >= lines ) {
+			if (++y >= lines) {
 				scroll();
 				y--;
 			}
 		} else {
 			vidmem [(x + cols * y) * 2] = c;
-			if ( ++x >= cols ) {
+			if (++x >= cols) {
 				x = 0;
-				if ( ++y >= lines ) {
+				if (++y >= lines) {
 					scroll();
 					y--;
 				}
@@ -303,22 +318,22 @@ static void putstr(const char *s)
 	outb(0xff & (pos >> 1), vidport+1);
 }
 
-static void* memset(void* s, int c, unsigned n)
+static void *memset(void *s, int c, unsigned n)
 {
 	int i;
 	char *ss = s;
 
-	for (i=0;i<n;i++) ss[i] = c;
+	for (i = 0; i < n; i++) ss[i] = c;
 	return s;
 }
 
-static void* memcpy(void* dest, const void* src, unsigned n)
+static void *memcpy(void *dest, const void *src, unsigned n)
 {
 	int i;
 	const char *s = src;
 	char *d = dest;
 
-	for (i=0;i<n;i++) d[i] = s[i];
+	for (i = 0; i < n; i++) d[i] = s[i];
 	return dest;
 }
 
@@ -341,9 +356,9 @@ static void flush_window(void)
 	/* With my window equal to my output buffer
 	 * I only need to compute the crc here.
 	 */
-	ulg c = crc;         /* temporary variable */
+	unsigned long c = crc;         /* temporary variable */
 	unsigned n;
-	uch *in, ch;
+	unsigned char *in, ch;
 
 	in = window;
 	for (n = 0; n < outcnt; n++) {
@@ -351,7 +366,7 @@ static void flush_window(void)
 		c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
 	}
 	crc = c;
-	bytes_out += (ulg)outcnt;
+	bytes_out += (unsigned long)outcnt;
 	outcnt = 0;
 }
 
@@ -365,9 +380,59 @@ static void error(char *x)
 		asm("hlt");
 }
 
+static void parse_elf(void *output)
+{
+#ifdef CONFIG_X86_64
+	Elf64_Ehdr ehdr;
+	Elf64_Phdr *phdrs, *phdr;
+#else
+	Elf32_Ehdr ehdr;
+	Elf32_Phdr *phdrs, *phdr;
+#endif
+	void *dest;
+	int i;
+
+	memcpy(&ehdr, output, sizeof(ehdr));
+	if (ehdr.e_ident[EI_MAG0] != ELFMAG0 ||
+	   ehdr.e_ident[EI_MAG1] != ELFMAG1 ||
+	   ehdr.e_ident[EI_MAG2] != ELFMAG2 ||
+	   ehdr.e_ident[EI_MAG3] != ELFMAG3) {
+		error("Kernel is not a valid ELF file");
+		return;
+	}
+
+	putstr("Parsing ELF... ");
+
+	phdrs = malloc(sizeof(*phdrs) * ehdr.e_phnum);
+	if (!phdrs)
+		error("Failed to allocate space for phdrs");
+
+	memcpy(phdrs, output + ehdr.e_phoff, sizeof(*phdrs) * ehdr.e_phnum);
+
+	for (i = 0; i < ehdr.e_phnum; i++) {
+		phdr = &phdrs[i];
+
+		switch (phdr->p_type) {
+		case PT_LOAD:
+#ifdef CONFIG_RELOCATABLE
+			dest = output;
+			dest += (phdr->p_paddr - LOAD_PHYSICAL_ADDR);
+#else
+			dest = (void *)(phdr->p_paddr);
+#endif
+			memcpy(dest,
+			       output + phdr->p_offset,
+			       phdr->p_filesz);
+			break;
+		default: /* Ignore other PT_* */ break;
+		}
+	}
+}
+
 asmlinkage void decompress_kernel(void *rmode, memptr heap,
-				  uch *input_data, unsigned long input_len,
-				  uch *output)
+				  unsigned char *input_data,
+				  unsigned long input_len,
+				  unsigned char *output)
 {
 	real_mode = rmode;
 
@@ -390,12 +455,12 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
 	inptr  = 0;
 
 #ifdef CONFIG_X86_64
-	if ((ulg)output & (__KERNEL_ALIGN - 1))
+	if ((unsigned long)output & (__KERNEL_ALIGN - 1))
 		error("Destination address not 2M aligned");
-	if ((ulg)output >= 0xffffffffffUL)
+	if ((unsigned long)output >= 0xffffffffffUL)
 		error("Destination address too large");
 #else
-	if ((u32)output & (CONFIG_PHYSICAL_ALIGN -1))
+	if ((u32)output & (CONFIG_PHYSICAL_ALIGN - 1))
 		error("Destination address not CONFIG_PHYSICAL_ALIGN aligned");
 	if (heap > ((-__PAGE_OFFSET-(512<<20)-1) & 0x7fffffff))
 		error("Destination address too large");
@@ -408,6 +473,7 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
 	makecrc();
 	putstr("\nDecompressing Linux... ");
 	gunzip();
+	parse_elf(output);
 	putstr("done.\nBooting the kernel.\n");
 	return;
 }
diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c
index 769065bd23d7..2462c88689ed 100644
--- a/arch/x86/boot/cpucheck.c
+++ b/arch/x86/boot/cpucheck.c
@@ -56,27 +56,27 @@ static const u32 req_flags[NCAPINTS] =
 	REQUIRED_MASK7,
 };
 
-#define A32(a,b,c,d) (((d) << 24)+((c) << 16)+((b) << 8)+(a))
+#define A32(a, b, c, d) (((d) << 24)+((c) << 16)+((b) << 8)+(a))
 
 static int is_amd(void)
 {
-	return cpu_vendor[0] == A32('A','u','t','h') &&
-	       cpu_vendor[1] == A32('e','n','t','i') &&
-	       cpu_vendor[2] == A32('c','A','M','D');
+	return cpu_vendor[0] == A32('A', 'u', 't', 'h') &&
+	       cpu_vendor[1] == A32('e', 'n', 't', 'i') &&
+	       cpu_vendor[2] == A32('c', 'A', 'M', 'D');
 }
 
 static int is_centaur(void)
 {
-	return cpu_vendor[0] == A32('C','e','n','t') &&
-	       cpu_vendor[1] == A32('a','u','r','H') &&
-	       cpu_vendor[2] == A32('a','u','l','s');
+	return cpu_vendor[0] == A32('C', 'e', 'n', 't') &&
+	       cpu_vendor[1] == A32('a', 'u', 'r', 'H') &&
+	       cpu_vendor[2] == A32('a', 'u', 'l', 's');
 }
 
 static int is_transmeta(void)
 {
-	return cpu_vendor[0] == A32('G','e','n','u') &&
-	       cpu_vendor[1] == A32('i','n','e','T') &&
-	       cpu_vendor[2] == A32('M','x','8','6');
+	return cpu_vendor[0] == A32('G', 'e', 'n', 'u') &&
+	       cpu_vendor[1] == A32('i', 'n', 'e', 'T') &&
+	       cpu_vendor[2] == A32('M', 'x', '8', '6');
 }
 
 static int has_fpu(void)
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 64ad9016585a..6d2df8d61c54 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -22,6 +22,7 @@
 #include <asm/page.h>
 #include <asm/setup.h>
 #include "boot.h"
+#include "offsets.h"
 
 SETUPSECTS	= 4			/* default nr of setup-sectors */
 BOOTSEG		= 0x07C0		/* original address of boot-sector */
@@ -119,7 +120,7 @@ _start:
 	# Part 2 of the header, from the old setup.S
 
 		.ascii	"HdrS"		# header signature
-		.word	0x0207		# header version number (>= 0x0105)
+		.word	0x0208		# header version number (>= 0x0105)
 					# or else old loadlin-1.5 will fail)
 		.globl realmode_swtch
 realmode_swtch:	.word	0, 0		# default_switch, SETUPSEG
@@ -223,6 +224,9 @@ hardware_subarch:	.long 0			# subarchitecture, added with 2.07
 
 hardware_subarch_data:	.quad 0
 
+payload_offset:		.long input_data
+payload_length:		.long input_data_end-input_data
+
 # End of setup header #####################################################
 
 	.section ".inittext", "ax"
diff --git a/arch/x86/boot/pm.c b/arch/x86/boot/pm.c
index 1a0f936c160b..a93cb8bded4d 100644
--- a/arch/x86/boot/pm.c
+++ b/arch/x86/boot/pm.c
@@ -100,7 +100,7 @@ static void reset_coprocessor(void)
 /*
  * Set up the GDT
  */
-#define GDT_ENTRY(flags,base,limit)		\
+#define GDT_ENTRY(flags, base, limit)		\
 	(((u64)(base & 0xff000000) << 32) |	\
 	 ((u64)flags << 40) |			\
 	 ((u64)(limit & 0x00ff0000) << 32) |	\
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
index b4248740ff0d..44dc1923c0e3 100644
--- a/arch/x86/boot/tools/build.c
+++ b/arch/x86/boot/tools/build.c
@@ -50,6 +50,75 @@ typedef unsigned long  u32;
 u8 buf[SETUP_SECT_MAX*512];
 int is_big_kernel;
 
+/*----------------------------------------------------------------------*/
+
+static const u32 crctab32[] = {
+	0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419,
+	0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4,
+	0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07,
+	0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
+	0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856,
+	0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
+	0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4,
+	0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
+	0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3,
+	0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a,
+	0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599,
+	0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
+	0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190,
+	0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f,
+	0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e,
+	0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
+	0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed,
+	0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
+	0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3,
+	0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
+	0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a,
+	0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5,
+	0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010,
+	0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
+	0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17,
+	0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6,
+	0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615,
+	0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
+	0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344,
+	0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
+	0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a,
+	0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
+	0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1,
+	0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c,
+	0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef,
+	0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
+	0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe,
+	0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31,
+	0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c,
+	0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
+	0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b,
+	0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
+	0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1,
+	0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
+	0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278,
+	0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7,
+	0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66,
+	0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
+	0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605,
+	0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8,
+	0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b,
+	0x2d02ef8d
+};
+
+static u32 partial_crc32_one(u8 c, u32 crc)
+{
+	return crctab32[(crc ^ c) & 0xff] ^ (crc >> 8);
+}
+
+static u32 partial_crc32(const u8 *s, int len, u32 crc)
+{
+	while (len--)
+		crc = partial_crc32_one(*s++, crc);
+	return crc;
+}
+
 static void die(const char * str, ...)
 {
 	va_list args;
@@ -74,6 +143,7 @@ int main(int argc, char ** argv)
 	FILE *file;
 	int fd;
 	void *kernel;
+	u32 crc = 0xffffffffUL;
 
 	if (argc > 2 && !strcmp(argv[1], "-b"))
 	  {
@@ -144,7 +214,8 @@ int main(int argc, char ** argv)
 	kernel = mmap(NULL, sz, PROT_READ, MAP_SHARED, fd, 0);
 	if (kernel == MAP_FAILED)
 		die("Unable to mmap '%s': %m", argv[2]);
-	sys_size = (sz + 15) / 16;
+	/* Number of 16-byte paragraphs, including space for a 4-byte CRC */
+	sys_size = (sz + 15 + 4) / 16;
 	if (!is_big_kernel && sys_size > DEF_SYSSIZE)
 		die("System is too big. Try using bzImage or modules.");
 
@@ -155,12 +226,27 @@ int main(int argc, char ** argv)
 	buf[0x1f6] = sys_size >> 16;
 	buf[0x1f7] = sys_size >> 24;
 
+	crc = partial_crc32(buf, i, crc);
 	if (fwrite(buf, 1, i, stdout) != i)
 		die("Writing setup failed");
 
 	/* Copy the kernel code */
+	crc = partial_crc32(kernel, sz, crc);
 	if (fwrite(kernel, 1, sz, stdout) != sz)
 		die("Writing kernel failed");
+
+	/* Add padding leaving 4 bytes for the checksum */
+	while (sz++ < (sys_size*16) - 4) {
+		crc = partial_crc32_one('\0', crc);
+		if (fwrite("\0", 1, 1, stdout) != 1)
+			die("Writing padding failed");
+	}
+
+	/* Write the CRC */
+	fprintf(stderr, "CRC %lx\n", crc);
+	if (fwrite(&crc, 1, 4, stdout) != 4)
+		die("Writing CRC failed");
+
 	close(fd);
 
 	/* Everything is OK */
diff --git a/arch/x86/boot/video-bios.c b/arch/x86/boot/video-bios.c
index ff664a117096..39e247e96172 100644
--- a/arch/x86/boot/video-bios.c
+++ b/arch/x86/boot/video-bios.c
@@ -50,6 +50,7 @@ static int set_bios_mode(u8 mode)
 	if (new_mode == mode)
 		return 0;	/* Mode change OK */
 
+#ifndef _WAKEUP
 	if (new_mode != boot_params.screen_info.orig_video_mode) {
 		/* Mode setting failed, but we didn't end up where we
 		   started.  That's bad.  Try to revert to the original
@@ -59,13 +60,18 @@ static int set_bios_mode(u8 mode)
 			     : "+a" (ax)
 			     : : "ebx", "ecx", "edx", "esi", "edi");
 	}
+#endif
 	return -1;
 }
 
 static int bios_probe(void)
 {
 	u8 mode;
+#ifdef _WAKEUP
+	u8 saved_mode = 0x03;
+#else
 	u8 saved_mode = boot_params.screen_info.orig_video_mode;
+#endif
 	u16 crtc;
 	struct mode_info *mi;
 	int nmodes = 0;
diff --git a/arch/x86/boot/video-mode.c b/arch/x86/boot/video-mode.c
new file mode 100644
index 000000000000..748e8d06290a
--- /dev/null
+++ b/arch/x86/boot/video-mode.c
@@ -0,0 +1,173 @@
+/* -*- linux-c -*- ------------------------------------------------------- *
+ *
+ *   Copyright (C) 1991, 1992 Linus Torvalds
+ *   Copyright 2007-2008 rPath, Inc. - All Rights Reserved
+ *
+ *   This file is part of the Linux kernel, and is made available under
+ *   the terms of the GNU General Public License version 2.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * arch/i386/boot/video-mode.c
+ *
+ * Set the video mode.  This is separated out into a different
+ * file in order to be shared with the ACPI wakeup code.
+ */
+
+#include "boot.h"
+#include "video.h"
+#include "vesa.h"
+
+/*
+ * Common variables
+ */
+int adapter;			/* 0=CGA/MDA/HGC, 1=EGA, 2=VGA+ */
+u16 video_segment;
+int force_x, force_y;	/* Don't query the BIOS for cols/rows */
+
+int do_restore;		/* Screen contents changed during mode flip */
+int graphic_mode;	/* Graphic mode with linear frame buffer */
+
+/* Probe the video drivers and have them generate their mode lists. */
+void probe_cards(int unsafe)
+{
+	struct card_info *card;
+	static u8 probed[2];
+
+	if (probed[unsafe])
+		return;
+
+	probed[unsafe] = 1;
+
+	for (card = video_cards; card < video_cards_end; card++) {
+		if (card->unsafe == unsafe) {
+			if (card->probe)
+				card->nmodes = card->probe();
+			else
+				card->nmodes = 0;
+		}
+	}
+}
+
+/* Test if a mode is defined */
+int mode_defined(u16 mode)
+{
+	struct card_info *card;
+	struct mode_info *mi;
+	int i;
+
+	for (card = video_cards; card < video_cards_end; card++) {
+		mi = card->modes;
+		for (i = 0; i < card->nmodes; i++, mi++) {
+			if (mi->mode == mode)
+				return 1;
+		}
+	}
+
+	return 0;
+}
+
+/* Set mode (without recalc) */
+static int raw_set_mode(u16 mode, u16 *real_mode)
+{
+	int nmode, i;
+	struct card_info *card;
+	struct mode_info *mi;
+
+	/* Drop the recalc bit if set */
+	mode &= ~VIDEO_RECALC;
+
+	/* Scan for mode based on fixed ID, position, or resolution */
+	nmode = 0;
+	for (card = video_cards; card < video_cards_end; card++) {
+		mi = card->modes;
+		for (i = 0; i < card->nmodes; i++, mi++) {
+			int visible = mi->x || mi->y;
+
+			if ((mode == nmode && visible) ||
+			    mode == mi->mode ||
+			    mode == (mi->y << 8)+mi->x) {
+				*real_mode = mi->mode;
+				return card->set_mode(mi);
+			}
+
+			if (visible)
+				nmode++;
+		}
+	}
+
+	/* Nothing found?  Is it an "exceptional" (unprobed) mode? */
+	for (card = video_cards; card < video_cards_end; card++) {
+		if (mode >= card->xmode_first &&
+		    mode < card->xmode_first+card->xmode_n) {
+			struct mode_info mix;
+			*real_mode = mix.mode = mode;
+			mix.x = mix.y = 0;
+			return card->set_mode(&mix);
+		}
+	}
+
+	/* Otherwise, failure... */
+	return -1;
+}
+
+/*
+ * Recalculate the vertical video cutoff (hack!)
+ */
+static void vga_recalc_vertical(void)
+{
+	unsigned int font_size, rows;
+	u16 crtc;
+	u8 pt, ov;
+
+	set_fs(0);
+	font_size = rdfs8(0x485); /* BIOS: font size (pixels) */
+	rows = force_y ? force_y : rdfs8(0x484)+1; /* Text rows */
+
+	rows *= font_size;	/* Visible scan lines */
+	rows--;			/* ... minus one */
+
+	crtc = vga_crtc();
+
+	pt = in_idx(crtc, 0x11);
+	pt &= ~0x80;		/* Unlock CR0-7 */
+	out_idx(pt, crtc, 0x11);
+
+	out_idx((u8)rows, crtc, 0x12); /* Lower height register */
+
+	ov = in_idx(crtc, 0x07); /* Overflow register */
+	ov &= 0xbd;
+	ov |= (rows >> (8-1)) & 0x02;
+	ov |= (rows >> (9-6)) & 0x40;
+	out_idx(ov, crtc, 0x07);
+}
+
+/* Set mode (with recalc if specified) */
+int set_mode(u16 mode)
+{
+	int rv;
+	u16 real_mode;
+
+	/* Very special mode numbers... */
+	if (mode == VIDEO_CURRENT_MODE)
+		return 0;	/* Nothing to do... */
+	else if (mode == NORMAL_VGA)
+		mode = VIDEO_80x25;
+	else if (mode == EXTENDED_VGA)
+		mode = VIDEO_8POINT;
+
+	rv = raw_set_mode(mode, &real_mode);
+	if (rv)
+		return rv;
+
+	if (mode & VIDEO_RECALC)
+		vga_recalc_vertical();
+
+	/* Save the canonical mode number for the kernel, not
+	   an alias, size specification or menu position */
+#ifndef _WAKEUP
+	boot_params.hdr.vid_mode = real_mode;
+#endif
+	return 0;
+}
diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c
index 419b5c273374..5d5a3f6e8b5c 100644
--- a/arch/x86/boot/video-vesa.c
+++ b/arch/x86/boot/video-vesa.c
@@ -24,7 +24,11 @@ static struct vesa_mode_info vminfo;
 
 __videocard video_vesa;
 
+#ifndef _WAKEUP
 static void vesa_store_mode_params_graphics(void);
+#else /* _WAKEUP */
+static inline void vesa_store_mode_params_graphics(void) {}
+#endif /* _WAKEUP */
 
 static int vesa_probe(void)
 {
@@ -165,6 +169,8 @@ static int vesa_set_mode(struct mode_info *mode)
 }
 
 
+#ifndef _WAKEUP
+
 /* Switch DAC to 8-bit mode */
 static void vesa_dac_set_8bits(void)
 {
@@ -288,6 +294,8 @@ void vesa_store_edid(void)
 #endif /* CONFIG_FIRMWARE_EDID */
 }
 
+#endif /* not _WAKEUP */
+
 __videocard video_vesa =
 {
 	.card_name	= "VESA",
diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c
index 7259387b7d19..330d6589a2ad 100644
--- a/arch/x86/boot/video-vga.c
+++ b/arch/x86/boot/video-vga.c
@@ -210,6 +210,8 @@ static int vga_set_mode(struct mode_info *mode)
  */
 static int vga_probe(void)
 {
+	u16 ega_bx;
+
 	static const char *card_name[] = {
 		"CGA/MDA/HGC", "EGA", "VGA"
 	};
@@ -226,12 +228,16 @@ static int vga_probe(void)
 	u8 vga_flag;
 
 	asm(INT10
-	    : "=b" (boot_params.screen_info.orig_video_ega_bx)
+	    : "=b" (ega_bx)
 	    : "a" (0x1200), "b" (0x10) /* Check EGA/VGA */
 	    : "ecx", "edx", "esi", "edi");
 
+#ifndef _WAKEUP
+	boot_params.screen_info.orig_video_ega_bx = ega_bx;
+#endif
+
 	/* If we have MDA/CGA/HGC then BL will be unchanged at 0x10 */
-	if ((u8)boot_params.screen_info.orig_video_ega_bx != 0x10) {
+	if ((u8)ega_bx != 0x10) {
 		/* EGA/VGA */
 		asm(INT10
 		    : "=a" (vga_flag)
@@ -240,7 +246,9 @@ static int vga_probe(void)
 
 		if (vga_flag == 0x1a) {
 			adapter = ADAPTER_VGA;
+#ifndef _WAKEUP
 			boot_params.screen_info.orig_video_isVGA = 1;
+#endif
 		} else {
 			adapter = ADAPTER_EGA;
 		}
diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c
index 696d08f3843c..c1c47ba069ef 100644
--- a/arch/x86/boot/video.c
+++ b/arch/x86/boot/video.c
@@ -18,21 +18,6 @@
 #include "video.h"
 #include "vesa.h"
 
-/*
- * Mode list variables
- */
-static struct card_info cards[];    /* List of cards to probe for */
-
-/*
- * Common variables
- */
-int adapter;			/* 0=CGA/MDA/HGC, 1=EGA, 2=VGA+ */
-u16 video_segment;
-int force_x, force_y;	/* Don't query the BIOS for cols/rows */
-
-int do_restore = 0;	/* Screen contents changed during mode flip */
-int graphic_mode;	/* Graphic mode with linear frame buffer */
-
 static void store_cursor_position(void)
 {
 	u16 curpos;
@@ -107,147 +92,6 @@ static void store_mode_params(void)
 	boot_params.screen_info.orig_video_lines = y;
 }
 
-/* Probe the video drivers and have them generate their mode lists. */
-static void probe_cards(int unsafe)
-{
-	struct card_info *card;
-	static u8 probed[2];
-
-	if (probed[unsafe])
-		return;
-
-	probed[unsafe] = 1;
-
-	for (card = video_cards; card < video_cards_end; card++) {
-		if (card->unsafe == unsafe) {
-			if (card->probe)
-				card->nmodes = card->probe();
-			else
-				card->nmodes = 0;
-		}
-	}
-}
-
-/* Test if a mode is defined */
-int mode_defined(u16 mode)
-{
-	struct card_info *card;
-	struct mode_info *mi;
-	int i;
-
-	for (card = video_cards; card < video_cards_end; card++) {
-		mi = card->modes;
-		for (i = 0; i < card->nmodes; i++, mi++) {
-			if (mi->mode == mode)
-				return 1;
-		}
-	}
-
-	return 0;
-}
-
-/* Set mode (without recalc) */
-static int raw_set_mode(u16 mode, u16 *real_mode)
-{
-	int nmode, i;
-	struct card_info *card;
-	struct mode_info *mi;
-
-	/* Drop the recalc bit if set */
-	mode &= ~VIDEO_RECALC;
-
-	/* Scan for mode based on fixed ID, position, or resolution */
-	nmode = 0;
-	for (card = video_cards; card < video_cards_end; card++) {
-		mi = card->modes;
-		for (i = 0; i < card->nmodes; i++, mi++) {
-			int visible = mi->x || mi->y;
-
-			if ((mode == nmode && visible) ||
-			    mode == mi->mode ||
-			    mode == (mi->y << 8)+mi->x) {
-				*real_mode = mi->mode;
-				return card->set_mode(mi);
-			}
-
-			if (visible)
-				nmode++;
-		}
-	}
-
-	/* Nothing found?  Is it an "exceptional" (unprobed) mode? */
-	for (card = video_cards; card < video_cards_end; card++) {
-		if (mode >= card->xmode_first &&
-		    mode < card->xmode_first+card->xmode_n) {
-			struct mode_info mix;
-			*real_mode = mix.mode = mode;
-			mix.x = mix.y = 0;
-			return card->set_mode(&mix);
-		}
-	}
-
-	/* Otherwise, failure... */
-	return -1;
-}
-
-/*
- * Recalculate the vertical video cutoff (hack!)
- */
-static void vga_recalc_vertical(void)
-{
-	unsigned int font_size, rows;
-	u16 crtc;
-	u8 pt, ov;
-
-	set_fs(0);
-	font_size = rdfs8(0x485); /* BIOS: font size (pixels) */
-	rows = force_y ? force_y : rdfs8(0x484)+1; /* Text rows */
-
-	rows *= font_size;	/* Visible scan lines */
-	rows--;			/* ... minus one */
-
-	crtc = vga_crtc();
-
-	pt = in_idx(crtc, 0x11);
-	pt &= ~0x80;		/* Unlock CR0-7 */
-	out_idx(pt, crtc, 0x11);
-
-	out_idx((u8)rows, crtc, 0x12); /* Lower height register */
-
-	ov = in_idx(crtc, 0x07); /* Overflow register */
-	ov &= 0xbd;
-	ov |= (rows >> (8-1)) & 0x02;
-	ov |= (rows >> (9-6)) & 0x40;
-	out_idx(ov, crtc, 0x07);
-}
-
-/* Set mode (with recalc if specified) */
-static int set_mode(u16 mode)
-{
-	int rv;
-	u16 real_mode;
-
-	/* Very special mode numbers... */
-	if (mode == VIDEO_CURRENT_MODE)
-		return 0;	/* Nothing to do... */
-	else if (mode == NORMAL_VGA)
-		mode = VIDEO_80x25;
-	else if (mode == EXTENDED_VGA)
-		mode = VIDEO_8POINT;
-
-	rv = raw_set_mode(mode, &real_mode);
-	if (rv)
-		return rv;
-
-	if (mode & VIDEO_RECALC)
-		vga_recalc_vertical();
-
-	/* Save the canonical mode number for the kernel, not
-	   an alias, size specification or menu position */
-	boot_params.hdr.vid_mode = real_mode;
-	return 0;
-}
-
 static unsigned int get_entry(void)
 {
 	char entry_buf[4];
@@ -486,6 +330,7 @@ void set_video(void)
 		printf("Undefined video mode number: %x\n", mode);
 		mode = ASK_VGA;
 	}
+	boot_params.hdr.vid_mode = mode;
 	vesa_store_edid();
 	store_mode_params();
 
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 5e7771a3ba2f..05e155d3fb6c 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -468,7 +468,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
 		restorer = ka->sa.sa_restorer;
 	} else {
 		/* Return stub is in 32bit vsyscall page */
-		if (current->binfmt->hasvdso)
+		if (current->mm->context.vdso)
 			restorer = VDSO32_SYMBOL(current->mm->context.vdso,
 						 sigreturn);
 		else
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 8022d3c695c0..ae7158bce4d6 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -162,12 +162,14 @@ sysenter_tracesys:
 	SAVE_REST
 	CLEAR_RREGS
 	movq	%r9,R9(%rsp)
-	movq	$-ENOSYS,RAX(%rsp)	/* really needed? */
+	movq	$-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */
 	movq	%rsp,%rdi        /* &pt_regs -> arg1 */
 	call	syscall_trace_enter
 	LOAD_ARGS32 ARGOFFSET  /* reload args from stack in case ptrace changed it */
 	RESTORE_REST
 	xchgl	%ebp,%r9d
+	cmpl	$(IA32_NR_syscalls-1),%eax
+	ja	int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
 	jmp	sysenter_do_call
 	CFI_ENDPROC
 ENDPROC(ia32_sysenter_target)
@@ -261,13 +263,15 @@ cstar_tracesys:
 	SAVE_REST
 	CLEAR_RREGS
 	movq %r9,R9(%rsp)
-	movq $-ENOSYS,RAX(%rsp)	/* really needed? */
+	movq $-ENOSYS,RAX(%rsp)	/* ptrace can change this for a bad syscall */
 	movq %rsp,%rdi        /* &pt_regs -> arg1 */
 	call syscall_trace_enter
 	LOAD_ARGS32 ARGOFFSET  /* reload args from stack in case ptrace changed it */
 	RESTORE_REST
 	xchgl %ebp,%r9d
 	movl RSP-ARGOFFSET(%rsp), %r8d
+	cmpl $(IA32_NR_syscalls-1),%eax
+	ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */
 	jmp cstar_do_call
 END(ia32_cstar_target)
 				
@@ -325,7 +329,7 @@ ENTRY(ia32_syscall)
 	jnz ia32_tracesys
 ia32_do_syscall:	
 	cmpl $(IA32_NR_syscalls-1),%eax
-	ja  ia32_badsys
+	ja  int_ret_from_sys_call	/* ia32_tracesys has set RAX(%rsp) */
 	IA32_ARG_FIXUP
 	call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
 ia32_sysret:
@@ -335,7 +339,7 @@ ia32_sysret:
 ia32_tracesys:			 
 	SAVE_REST
 	CLEAR_RREGS
-	movq $-ENOSYS,RAX(%rsp)	/* really needed? */
+	movq $-ENOSYS,RAX(%rsp)	/* ptrace can change this for a bad syscall */
 	movq %rsp,%rdi        /* &pt_regs -> arg1 */
 	call syscall_trace_enter
 	LOAD_ARGS32 ARGOFFSET  /* reload args from stack in case ptrace changed it */
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index abf71d26fc2a..7cede7a9e0dc 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -26,51 +26,27 @@
 #include <linux/file.h>
 #include <linux/signal.h>
 #include <linux/syscalls.h>
-#include <linux/resource.h>
 #include <linux/times.h>
 #include <linux/utsname.h>
-#include <linux/smp.h>
 #include <linux/smp_lock.h>
-#include <linux/sem.h>
-#include <linux/msg.h>
 #include <linux/mm.h>
-#include <linux/shm.h>
-#include <linux/slab.h>
 #include <linux/uio.h>
-#include <linux/nfs_fs.h>
-#include <linux/quota.h>
-#include <linux/module.h>
-#include <linux/sunrpc/svc.h>
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/cache.h>
-#include <linux/nfsd/xdr.h>
-#include <linux/nfsd/syscall.h>
 #include <linux/poll.h>
 #include <linux/personality.h>
 #include <linux/stat.h>
-#include <linux/ipc.h>
 #include <linux/rwsem.h>
-#include <linux/binfmts.h>
-#include <linux/init.h>
-#include <linux/aio_abi.h>
-#include <linux/aio.h>
 #include <linux/compat.h>
 #include <linux/vfs.h>
 #include <linux/ptrace.h>
 #include <linux/highuid.h>
-#include <linux/vmalloc.h>
-#include <linux/fsnotify.h>
 #include <linux/sysctl.h>
 #include <asm/mman.h>
 #include <asm/types.h>
 #include <asm/uaccess.h>
 #include <asm/semaphore.h>
 #include <asm/atomic.h>
-#include <asm/ldt.h>
-
-#include <net/scm.h>
-#include <net/sock.h>
 #include <asm/ia32.h>
+#include <asm/vgtod.h>
 
 #define AA(__x)		((unsigned long)(__x))
 
@@ -804,11 +780,6 @@ asmlinkage long sys32_execve(char __user *name, compat_uptr_t __user *argv,
 	if (IS_ERR(filename))
 		return error;
 	error = compat_do_execve(filename, argv, envp, regs);
-	if (error == 0) {
-		task_lock(current);
-		current->ptrace &= ~PT_DTRACE;
-		task_unlock(current);
-	}
 	putname(filename);
 	return error;
 }
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 4eb5ce841106..c3920ea8ac56 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -2,8 +2,7 @@
 # Makefile for the linux kernel.
 #
 
-extra-y                := head_$(BITS).o init_task.o vmlinux.lds
-extra-$(CONFIG_X86_64) += head64.o
+extra-y                := head_$(BITS).o head$(BITS).o init_task.o vmlinux.lds
 
 CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
 
@@ -19,7 +18,7 @@ CFLAGS_tsc_64.o		:= $(nostackp)
 obj-y			:= process_$(BITS).o signal_$(BITS).o entry_$(BITS).o
 obj-y			+= traps_$(BITS).o irq_$(BITS).o
 obj-y			+= time_$(BITS).o ioport.o ldt.o
-obj-y			+= setup_$(BITS).o i8259_$(BITS).o
+obj-y			+= setup_$(BITS).o i8259_$(BITS).o setup.o
 obj-$(CONFIG_X86_32)	+= sys_i386_32.o i386_ksyms_32.o
 obj-$(CONFIG_X86_64)	+= sys_x86_64.o x8664_ksyms_64.o
 obj-$(CONFIG_X86_64)	+= syscall_64.o vsyscall_64.o setup64.o
@@ -29,6 +28,7 @@ obj-y			+= alternative.o i8253.o
 obj-$(CONFIG_X86_64)	+= pci-nommu_64.o bugs_64.o
 obj-y			+= tsc_$(BITS).o io_delay.o rtc.o
 
+obj-$(CONFIG_X86_TRAMPOLINE)	+= trampoline.o
 obj-y				+= i387.o
 obj-y				+= ptrace.o
 obj-y				+= ds.o
@@ -47,11 +47,12 @@ obj-$(CONFIG_MICROCODE)		+= microcode.o
 obj-$(CONFIG_PCI)		+= early-quirks.o
 apm-y				:= apm_32.o
 obj-$(CONFIG_APM)		+= apm.o
-obj-$(CONFIG_X86_SMP)		+= smp_$(BITS).o smpboot_$(BITS).o tsc_sync.o
-obj-$(CONFIG_X86_32_SMP)	+= smpcommon_32.o
-obj-$(CONFIG_X86_64_SMP)	+= smp_64.o smpboot_64.o tsc_sync.o
+obj-$(CONFIG_X86_SMP)		+= smp.o
+obj-$(CONFIG_X86_SMP)		+= smpboot.o tsc_sync.o ipi.o tlb_$(BITS).o
+obj-$(CONFIG_X86_32_SMP)	+= smpcommon.o
+obj-$(CONFIG_X86_64_SMP)	+= tsc_sync.o smpcommon.o
 obj-$(CONFIG_X86_TRAMPOLINE)	+= trampoline_$(BITS).o
-obj-$(CONFIG_X86_MPPARSE)	+= mpparse_$(BITS).o
+obj-$(CONFIG_X86_MPPARSE)	+= mpparse.o
 obj-$(CONFIG_X86_LOCAL_APIC)	+= apic_$(BITS).o nmi_$(BITS).o
 obj-$(CONFIG_X86_IO_APIC)	+= io_apic_$(BITS).o
 obj-$(CONFIG_X86_REBOOTFIXUPS)	+= reboot_fixups_32.o
@@ -60,12 +61,13 @@ obj-$(CONFIG_KEXEC)		+= relocate_kernel_$(BITS).o crash.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump_$(BITS).o
 obj-$(CONFIG_X86_NUMAQ)		+= numaq_32.o
 obj-$(CONFIG_X86_SUMMIT_NUMA)	+= summit_32.o
-obj-$(CONFIG_X86_VSMP)		+= vsmp_64.o
+obj-y				+= vsmp_64.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o
 obj-$(CONFIG_MODULES)		+= module_$(BITS).o
 obj-$(CONFIG_ACPI_SRAT) 	+= srat_32.o
 obj-$(CONFIG_EFI) 		+= efi.o efi_$(BITS).o efi_stub_$(BITS).o
 obj-$(CONFIG_DOUBLEFAULT) 	+= doublefault_32.o
+obj-$(CONFIG_KGDB)		+= kgdb.o
 obj-$(CONFIG_VM86)		+= vm86_32.o
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 
@@ -89,7 +91,7 @@ scx200-y			+= scx200_32.o
 ###
 # 64 bit specific files
 ifeq ($(CONFIG_X86_64),y)
-        obj-y				+= genapic_64.o genapic_flat_64.o
+        obj-y				+= genapic_64.o genapic_flat_64.o genx2apic_uv_x.o
         obj-$(CONFIG_X86_PM_TIMER)	+= pmtimer_64.o
         obj-$(CONFIG_AUDIT)		+= audit_64.o
 
diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile
index 19d3d6e9d09b..7335959b6aff 100644
--- a/arch/x86/kernel/acpi/Makefile
+++ b/arch/x86/kernel/acpi/Makefile
@@ -1,7 +1,14 @@
+subdir-				:= realmode
+
 obj-$(CONFIG_ACPI)		+= boot.o
-obj-$(CONFIG_ACPI_SLEEP)	+= sleep.o wakeup_$(BITS).o
+obj-$(CONFIG_ACPI_SLEEP)	+= sleep.o wakeup_rm.o wakeup_$(BITS).o
 
 ifneq ($(CONFIG_ACPI_PROCESSOR),)
 obj-y				+= cstate.o processor.o
 endif
 
+$(obj)/wakeup_rm.o:    $(obj)/realmode/wakeup.bin
+
+$(obj)/realmode/wakeup.bin: FORCE
+	$(Q)$(MAKE) $(build)=$(obj)/realmode $@
+
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 2cdc9de9371d..057ccf1d5ad4 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -39,6 +39,11 @@
 #include <asm/apic.h>
 #include <asm/io.h>
 #include <asm/mpspec.h>
+#include <asm/smp.h>
+
+#ifdef CONFIG_X86_LOCAL_APIC
+# include <mach_apic.h>
+#endif
 
 static int __initdata acpi_force = 0;
 
@@ -52,9 +57,7 @@ EXPORT_SYMBOL(acpi_disabled);
 #ifdef	CONFIG_X86_64
 
 #include <asm/proto.h>
-
-static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; }
-
+#include <asm/genapic.h>
 
 #else				/* X86 */
 
@@ -111,7 +114,7 @@ char *__init __acpi_map_table(unsigned long phys_addr, unsigned long size)
 	if (!phys_addr || !size)
 		return NULL;
 
-	if (phys_addr+size <= (end_pfn_map << PAGE_SHIFT) + PAGE_SIZE)
+	if (phys_addr+size <= (max_pfn_mapped << PAGE_SHIFT) + PAGE_SIZE)
 		return __va(phys_addr);
 
 	return NULL;
@@ -237,6 +240,16 @@ static int __init acpi_parse_madt(struct acpi_table_header *table)
 	return 0;
 }
 
+static void __cpuinit acpi_register_lapic(int id, u8 enabled)
+{
+	if (!enabled) {
+		++disabled_cpus;
+		return;
+	}
+
+	generic_processor_info(id, 0);
+}
+
 static int __init
 acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end)
 {
@@ -256,8 +269,26 @@ acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end)
 	 * to not preallocating memory for all NR_CPUS
 	 * when we use CPU hotplug.
 	 */
-	mp_register_lapic(processor->id,	/* APIC ID */
-			  processor->lapic_flags & ACPI_MADT_ENABLED);	/* Enabled? */
+	acpi_register_lapic(processor->id,	/* APIC ID */
+			    processor->lapic_flags & ACPI_MADT_ENABLED);
+
+	return 0;
+}
+
+static int __init
+acpi_parse_sapic(struct acpi_subtable_header *header, const unsigned long end)
+{
+	struct acpi_madt_local_sapic *processor = NULL;
+
+	processor = (struct acpi_madt_local_sapic *)header;
+
+	if (BAD_MADT_ENTRY(processor, end))
+		return -EINVAL;
+
+	acpi_table_print_madt_entry(header);
+
+	acpi_register_lapic((processor->id << 8) | processor->eid,/* APIC ID */
+			    processor->lapic_flags & ACPI_MADT_ENABLED);
 
 	return 0;
 }
@@ -300,6 +331,8 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e
 
 #ifdef CONFIG_X86_IO_APIC
 
+struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS];
+
 static int __init
 acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end)
 {
@@ -532,7 +565,7 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu)
 	buffer.pointer = NULL;
 
 	tmp_map = cpu_present_map;
-	mp_register_lapic(physid, lapic->lapic_flags & ACPI_MADT_ENABLED);
+	acpi_register_lapic(physid, lapic->lapic_flags & ACPI_MADT_ENABLED);
 
 	/*
 	 * If mp_register_lapic successfully generates a new logical cpu
@@ -732,6 +765,16 @@ static int __init acpi_parse_fadt(struct acpi_table_header *table)
  * Parse LAPIC entries in MADT
  * returns 0 on success, < 0 on error
  */
+
+static void __init acpi_register_lapic_address(unsigned long address)
+{
+	mp_lapic_addr = address;
+
+	set_fixmap_nocache(FIX_APIC_BASE, address);
+	if (boot_cpu_physical_apicid == -1U)
+		boot_cpu_physical_apicid  = GET_APIC_ID(read_apic_id());
+}
+
 static int __init acpi_parse_madt_lapic_entries(void)
 {
 	int count;
@@ -753,10 +796,14 @@ static int __init acpi_parse_madt_lapic_entries(void)
 		return count;
 	}
 
-	mp_register_lapic_address(acpi_lapic_addr);
+	acpi_register_lapic_address(acpi_lapic_addr);
+
+	count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC,
+				      acpi_parse_sapic, MAX_APICS);
 
-	count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, acpi_parse_lapic,
-				      MAX_APICS);
+	if (!count)
+		count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC,
+					      acpi_parse_lapic, MAX_APICS);
 	if (!count) {
 		printk(KERN_ERR PREFIX "No LAPIC entries present\n");
 		/* TBD: Cleanup to allow fallback to MPS */
diff --git a/arch/x86/kernel/acpi/realmode/Makefile b/arch/x86/kernel/acpi/realmode/Makefile
new file mode 100644
index 000000000000..092900854acc
--- /dev/null
+++ b/arch/x86/kernel/acpi/realmode/Makefile
@@ -0,0 +1,57 @@
+#
+# arch/x86/kernel/acpi/realmode/Makefile
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+
+targets		:= wakeup.bin wakeup.elf
+
+wakeup-y	+= wakeup.o wakemain.o video-mode.o copy.o
+
+# The link order of the video-*.o modules can matter.  In particular,
+# video-vga.o *must* be listed first, followed by video-vesa.o.
+# Hardware-specific drivers should follow in the order they should be
+# probed, and video-bios.o should typically be last.
+wakeup-y	+= video-vga.o
+wakeup-y	+= video-vesa.o
+wakeup-y	+= video-bios.o
+
+targets		+= $(wakeup-y)
+
+bootsrc		:= $(src)/../../../boot
+
+# ---------------------------------------------------------------------------
+
+# How to compile the 16-bit code.  Note we always compile for -march=i386,
+# that way we can complain to the user if the CPU is insufficient.
+# Compile with _SETUP since this is similar to the boot-time setup code.
+KBUILD_CFLAGS	:= $(LINUXINCLUDE) -g -Os -D_SETUP -D_WAKEUP -D__KERNEL__ \
+		   -I$(srctree)/$(bootsrc) \
+		   $(cflags-y) \
+		   -Wall -Wstrict-prototypes \
+		   -march=i386 -mregparm=3 \
+		   -include $(srctree)/$(bootsrc)/code16gcc.h \
+		   -fno-strict-aliasing -fomit-frame-pointer \
+		   $(call cc-option, -ffreestanding) \
+		   $(call cc-option, -fno-toplevel-reorder,\
+			$(call cc-option, -fno-unit-at-a-time)) \
+		   $(call cc-option, -fno-stack-protector) \
+		   $(call cc-option, -mpreferred-stack-boundary=2)
+KBUILD_CFLAGS	+= $(call cc-option, -m32)
+KBUILD_AFLAGS	:= $(KBUILD_CFLAGS) -D__ASSEMBLY__
+
+WAKEUP_OBJS = $(addprefix $(obj)/,$(wakeup-y))
+
+LDFLAGS_wakeup.elf	:= -T
+
+CPPFLAGS_wakeup.lds += -P -C
+
+$(obj)/wakeup.elf: $(src)/wakeup.lds $(WAKEUP_OBJS) FORCE
+	$(call if_changed,ld)
+
+OBJCOPYFLAGS_wakeup.bin	:= -O binary
+
+$(obj)/wakeup.bin: $(obj)/wakeup.elf FORCE
+	$(call if_changed,objcopy)
diff --git a/arch/x86/kernel/acpi/realmode/copy.S b/arch/x86/kernel/acpi/realmode/copy.S
new file mode 100644
index 000000000000..dc59ebee69d8
--- /dev/null
+++ b/arch/x86/kernel/acpi/realmode/copy.S
@@ -0,0 +1 @@
+#include "../../../boot/copy.S"
diff --git a/arch/x86/kernel/acpi/realmode/video-bios.c b/arch/x86/kernel/acpi/realmode/video-bios.c
new file mode 100644
index 000000000000..7deabc144a27
--- /dev/null
+++ b/arch/x86/kernel/acpi/realmode/video-bios.c
@@ -0,0 +1 @@
+#include "../../../boot/video-bios.c"
diff --git a/arch/x86/kernel/acpi/realmode/video-mode.c b/arch/x86/kernel/acpi/realmode/video-mode.c
new file mode 100644
index 000000000000..328ad209f113
--- /dev/null
+++ b/arch/x86/kernel/acpi/realmode/video-mode.c
@@ -0,0 +1 @@
+#include "../../../boot/video-mode.c"
diff --git a/arch/x86/kernel/acpi/realmode/video-vesa.c b/arch/x86/kernel/acpi/realmode/video-vesa.c
new file mode 100644
index 000000000000..9dbb9672226a
--- /dev/null
+++ b/arch/x86/kernel/acpi/realmode/video-vesa.c
@@ -0,0 +1 @@
+#include "../../../boot/video-vesa.c"
diff --git a/arch/x86/kernel/acpi/realmode/video-vga.c b/arch/x86/kernel/acpi/realmode/video-vga.c
new file mode 100644
index 000000000000..bcc81255f374
--- /dev/null
+++ b/arch/x86/kernel/acpi/realmode/video-vga.c
@@ -0,0 +1 @@
+#include "../../../boot/video-vga.c"
diff --git a/arch/x86/kernel/acpi/realmode/wakemain.c b/arch/x86/kernel/acpi/realmode/wakemain.c
new file mode 100644
index 000000000000..883962d9eef2
--- /dev/null
+++ b/arch/x86/kernel/acpi/realmode/wakemain.c
@@ -0,0 +1,81 @@
+#include "wakeup.h"
+#include "boot.h"
+
+static void udelay(int loops)
+{
+	while (loops--)
+		io_delay();	/* Approximately 1 us */
+}
+
+static void beep(unsigned int hz)
+{
+	u8 enable;
+
+	if (!hz) {
+		enable = 0x00;		/* Turn off speaker */
+	} else {
+		u16 div = 1193181/hz;
+
+		outb(0xb6, 0x43);	/* Ctr 2, squarewave, load, binary */
+		io_delay();
+		outb(div, 0x42);	/* LSB of counter */
+		io_delay();
+		outb(div >> 8, 0x42);	/* MSB of counter */
+		io_delay();
+
+		enable = 0x03;		/* Turn on speaker */
+	}
+	inb(0x61);		/* Dummy read of System Control Port B */
+	io_delay();
+	outb(enable, 0x61);	/* Enable timer 2 output to speaker */
+	io_delay();
+}
+
+#define DOT_HZ		880
+#define DASH_HZ		587
+#define US_PER_DOT	125000
+
+/* Okay, this is totally silly, but it's kind of fun. */
+static void send_morse(const char *pattern)
+{
+	char s;
+
+	while ((s = *pattern++)) {
+		switch (s) {
+		case '.':
+			beep(DOT_HZ);
+			udelay(US_PER_DOT);
+			beep(0);
+			udelay(US_PER_DOT);
+			break;
+		case '-':
+			beep(DASH_HZ);
+			udelay(US_PER_DOT * 3);
+			beep(0);
+			udelay(US_PER_DOT);
+			break;
+		default:	/* Assume it's a space */
+			udelay(US_PER_DOT * 3);
+			break;
+		}
+	}
+}
+
+void main(void)
+{
+	/* Kill machine if structures are wrong */
+	if (wakeup_header.real_magic != 0x12345678)
+		while (1);
+
+	if (wakeup_header.realmode_flags & 4)
+		send_morse("...-");
+
+	if (wakeup_header.realmode_flags & 1)
+		asm volatile("lcallw   $0xc000,$3");
+
+	if (wakeup_header.realmode_flags & 2) {
+		/* Need to call BIOS */
+		probe_cards(0);
+		set_mode(wakeup_header.video_mode);
+	}
+}
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.S b/arch/x86/kernel/acpi/realmode/wakeup.S
new file mode 100644
index 000000000000..f9b77fb37e5b
--- /dev/null
+++ b/arch/x86/kernel/acpi/realmode/wakeup.S
@@ -0,0 +1,113 @@
+/*
+ * ACPI wakeup real mode startup stub
+ */
+#include <asm/segment.h>
+#include <asm/msr-index.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+
+	.code16
+	.section ".header", "a"
+
+/* This should match the structure in wakeup.h */
+		.globl	wakeup_header
+wakeup_header:
+video_mode:	.short	0	/* Video mode number */
+pmode_return:	.byte	0x66, 0xea	/* ljmpl */
+		.long	0	/* offset goes here */
+		.short	__KERNEL_CS
+pmode_cr0:	.long	0	/* Saved %cr0 */
+pmode_cr3:	.long	0	/* Saved %cr3 */
+pmode_cr4:	.long	0	/* Saved %cr4 */
+pmode_efer:	.quad	0	/* Saved EFER */
+pmode_gdt:	.quad	0
+realmode_flags:	.long	0
+real_magic:	.long	0
+trampoline_segment:	.word 0
+signature:	.long	0x51ee1111
+
+	.text
+	.globl	_start
+	.code16
+wakeup_code:
+_start:
+	cli
+	cld
+
+	/* Set up segments */
+	movw	%cs, %ax
+	movw	%ax, %ds
+	movw	%ax, %es
+	movw	%ax, %ss
+
+	movl	$wakeup_stack_end, %esp
+
+	/* Clear the EFLAGS */
+	pushl	$0
+	popfl
+
+	/* Check header signature... */
+	movl	signature, %eax
+	cmpl	$0x51ee1111, %eax
+	jne	bogus_real_magic
+
+	/* Check we really have everything... */
+	movl	end_signature, %eax
+	cmpl	$0x65a22c82, %eax
+	jne	bogus_real_magic
+
+	/* Call the C code */
+	calll	main
+
+	/* Do any other stuff... */
+
+#ifndef CONFIG_64BIT
+	/* This could also be done in C code... */
+	movl	pmode_cr3, %eax
+	movl	%eax, %cr3
+
+	movl	pmode_cr4, %ecx
+	jecxz	1f
+	movl	%ecx, %cr4
+1:
+	movl	pmode_efer, %eax
+	movl	pmode_efer + 4, %edx
+	movl	%eax, %ecx
+	orl	%edx, %ecx
+	jz	1f
+	movl	$0xc0000080, %ecx
+	wrmsr
+1:
+
+	lgdtl	pmode_gdt
+
+	/* This really couldn't... */
+	movl	pmode_cr0, %eax
+	movl	%eax, %cr0
+	jmp	pmode_return
+#else
+	pushw	$0
+	pushw	trampoline_segment
+	pushw	$0
+	lret
+#endif
+
+bogus_real_magic:
+1:
+	hlt
+	jmp	1b
+
+	.data
+	.balign	4
+	.globl	HEAP, heap_end
+HEAP:
+	.long	wakeup_heap
+heap_end:
+	.long	wakeup_stack
+
+	.bss
+wakeup_heap:
+	.space	2048
+wakeup_stack:
+	.space	2048
+wakeup_stack_end:
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.h b/arch/x86/kernel/acpi/realmode/wakeup.h
new file mode 100644
index 000000000000..ef8166fe8020
--- /dev/null
+++ b/arch/x86/kernel/acpi/realmode/wakeup.h
@@ -0,0 +1,36 @@
+/*
+ * Definitions for the wakeup data structure at the head of the
+ * wakeup code.
+ */
+
+#ifndef ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H
+#define ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H
+
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+
+/* This must match data at wakeup.S */
+struct wakeup_header {
+	u16 video_mode;		/* Video mode number */
+	u16 _jmp1;		/* ljmpl opcode, 32-bit only */
+	u32 pmode_entry;	/* Protected mode resume point, 32-bit only */
+	u16 _jmp2;		/* CS value, 32-bit only */
+	u32 pmode_cr0;		/* Protected mode cr0 */
+	u32 pmode_cr3;		/* Protected mode cr3 */
+	u32 pmode_cr4;		/* Protected mode cr4 */
+	u32 pmode_efer_low;	/* Protected mode EFER */
+	u32 pmode_efer_high;
+	u64 pmode_gdt;
+	u32 realmode_flags;
+	u32 real_magic;
+	u16 trampoline_segment;	/* segment with trampoline code, 64-bit only */
+	u32 signature;		/* To check we have correct structure */
+} __attribute__((__packed__));
+
+extern struct wakeup_header wakeup_header;
+#endif
+
+#define HEADER_OFFSET 0x3f00
+#define WAKEUP_SIZE   0x4000
+
+#endif /* ARCH_X86_KERNEL_ACPI_RM_WAKEUP_H */
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.lds.S b/arch/x86/kernel/acpi/realmode/wakeup.lds.S
new file mode 100644
index 000000000000..22fab6c4be15
--- /dev/null
+++ b/arch/x86/kernel/acpi/realmode/wakeup.lds.S
@@ -0,0 +1,61 @@
+/*
+ * wakeup.ld
+ *
+ * Linker script for the real-mode wakeup code
+ */
+#undef i386
+#include "wakeup.h"
+
+OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
+OUTPUT_ARCH(i386)
+ENTRY(_start)
+
+SECTIONS
+{
+	. = HEADER_OFFSET;
+	.header : {
+		 *(.header)
+	}
+
+	. = 0;
+	.text : {
+		 *(.text*)
+	}
+
+	. = ALIGN(16);
+	.rodata : {
+		*(.rodata*)
+	}
+
+	.videocards : {
+		video_cards = .;
+		*(.videocards)
+		video_cards_end = .;
+	}
+
+	. = ALIGN(16);
+	.data : {
+		 *(.data*)
+	}
+
+	.signature : {
+		end_signature = .;
+		LONG(0x65a22c82)
+	}
+
+	. = ALIGN(16);
+	.bss :	{
+		__bss_start = .;
+		*(.bss)
+		__bss_end = .;
+	}
+
+	. = ALIGN(16);
+	_end = .;
+
+	/DISCARD/ : {
+		*(.note*)
+	}
+
+	. = ASSERT(_end <= WAKEUP_SIZE, "Wakeup too big!");
+}
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 6bc815cd8cb3..afc25ee9964b 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -10,30 +10,72 @@
 #include <linux/dmi.h>
 #include <linux/cpumask.h>
 
-#include <asm/smp.h>
+#include "realmode/wakeup.h"
+#include "sleep.h"
 
-/* address in low memory of the wakeup routine. */
-unsigned long acpi_wakeup_address = 0;
+unsigned long acpi_wakeup_address;
 unsigned long acpi_realmode_flags;
-extern char wakeup_start, wakeup_end;
 
-extern unsigned long acpi_copy_wakeup_routine(unsigned long);
+/* address in low memory of the wakeup routine. */
+static unsigned long acpi_realmode;
+
+#ifdef CONFIG_64BIT
+static char temp_stack[10240];
+#endif
 
 /**
  * acpi_save_state_mem - save kernel state
  *
  * Create an identity mapped page table and copy the wakeup routine to
  * low memory.
+ *
+ * Note that this is too late to change acpi_wakeup_address.
  */
 int acpi_save_state_mem(void)
 {
-	if (!acpi_wakeup_address) {
-		printk(KERN_ERR "Could not allocate memory during boot, S3 disabled\n");
+	struct wakeup_header *header;
+
+	if (!acpi_realmode) {
+		printk(KERN_ERR "Could not allocate memory during boot, "
+		       "S3 disabled\n");
 		return -ENOMEM;
 	}
-	memcpy((void *)acpi_wakeup_address, &wakeup_start,
-	       &wakeup_end - &wakeup_start);
-	acpi_copy_wakeup_routine(acpi_wakeup_address);
+	memcpy((void *)acpi_realmode, &wakeup_code_start, WAKEUP_SIZE);
+
+	header = (struct wakeup_header *)(acpi_realmode + HEADER_OFFSET);
+	if (header->signature != 0x51ee1111) {
+		printk(KERN_ERR "wakeup header does not match\n");
+		return -EINVAL;
+	}
+
+	header->video_mode = saved_video_mode;
+
+#ifndef CONFIG_64BIT
+	store_gdt((struct desc_ptr *)&header->pmode_gdt);
+
+	header->pmode_efer_low = nx_enabled;
+	if (header->pmode_efer_low & 1) {
+		/* This is strange, why not save efer, always? */
+		rdmsr(MSR_EFER, header->pmode_efer_low,
+			header->pmode_efer_high);
+	}
+#endif /* !CONFIG_64BIT */
+
+	header->pmode_cr0 = read_cr0();
+	header->pmode_cr4 = read_cr4();
+	header->realmode_flags = acpi_realmode_flags;
+	header->real_magic = 0x12345678;
+
+#ifndef CONFIG_64BIT
+	header->pmode_entry = (u32)&wakeup_pmode_return;
+	header->pmode_cr3 = (u32)(swsusp_pg_dir - __PAGE_OFFSET);
+	saved_magic = 0x12345678;
+#else /* CONFIG_64BIT */
+	header->trampoline_segment = setup_trampoline() >> 4;
+	init_rsp = (unsigned long)temp_stack + 4096;
+	initial_code = (unsigned long)wakeup_long64;
+	saved_magic = 0x123456789abcdef0;
+#endif /* CONFIG_64BIT */
 
 	return 0;
 }
@@ -56,15 +98,20 @@ void acpi_restore_state_mem(void)
  */
 void __init acpi_reserve_bootmem(void)
 {
-	if ((&wakeup_end - &wakeup_start) > PAGE_SIZE*2) {
+	if ((&wakeup_code_end - &wakeup_code_start) > WAKEUP_SIZE) {
 		printk(KERN_ERR
 		       "ACPI: Wakeup code way too big, S3 disabled.\n");
 		return;
 	}
 
-	acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE*2);
-	if (!acpi_wakeup_address)
+	acpi_realmode = (unsigned long)alloc_bootmem_low(WAKEUP_SIZE);
+
+	if (!acpi_realmode) {
 		printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n");
+		return;
+	}
+
+	acpi_wakeup_address = acpi_realmode;
 }
 
 
diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h
new file mode 100644
index 000000000000..adbcbaa6f1df
--- /dev/null
+++ b/arch/x86/kernel/acpi/sleep.h
@@ -0,0 +1,16 @@
+/*
+ *	Variables and functions used by the code in sleep.c
+ */
+
+#include <asm/trampoline.h>
+
+extern char wakeup_code_start, wakeup_code_end;
+
+extern unsigned long saved_video_mode;
+extern long saved_magic;
+
+extern int wakeup_pmode_return;
+extern char swsusp_pg_dir[PAGE_SIZE];
+
+extern unsigned long acpi_copy_wakeup_routine(unsigned long);
+extern void wakeup_long64(void);
diff --git a/arch/x86/kernel/acpi/sleep_32.c b/arch/x86/kernel/acpi/sleep_32.c
deleted file mode 100644
index 63fe5525e026..000000000000
--- a/arch/x86/kernel/acpi/sleep_32.c
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * sleep.c - x86-specific ACPI sleep support.
- *
- *  Copyright (C) 2001-2003 Patrick Mochel
- *  Copyright (C) 2001-2003 Pavel Machek <pavel@suse.cz>
- */
-
-#include <linux/acpi.h>
-#include <linux/bootmem.h>
-#include <linux/dmi.h>
-#include <linux/cpumask.h>
-
-#include <asm/smp.h>
-
-/* Ouch, we want to delete this. We already have better version in userspace, in
-   s2ram from suspend.sf.net project */
-static __init int reset_videomode_after_s3(const struct dmi_system_id *d)
-{
-	acpi_realmode_flags |= 2;
-	return 0;
-}
-
-static __initdata struct dmi_system_id acpisleep_dmi_table[] = {
-	{			/* Reset video mode after returning from ACPI S3 sleep */
-	 .callback = reset_videomode_after_s3,
-	 .ident = "Toshiba Satellite 4030cdt",
-	 .matches = {
-		     DMI_MATCH(DMI_PRODUCT_NAME, "S4030CDT/4.3"),
-		     },
-	 },
-	{}
-};
-
-static int __init acpisleep_dmi_init(void)
-{
-	dmi_check_system(acpisleep_dmi_table);
-	return 0;
-}
-
-core_initcall(acpisleep_dmi_init);
diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S
index f53e3277f8e5..a12e6a9fb659 100644
--- a/arch/x86/kernel/acpi/wakeup_32.S
+++ b/arch/x86/kernel/acpi/wakeup_32.S
@@ -3,178 +3,12 @@
 #include <asm/segment.h>
 #include <asm/page.h>
 
-#
-# wakeup_code runs in real mode, and at unknown address (determined at run-time).
-# Therefore it must only use relative jumps/calls. 
-#
-# Do we need to deal with A20? It is okay: ACPI specs says A20 must be enabled
-#
-# If physical address of wakeup_code is 0x12345, BIOS should call us with
-# cs = 0x1234, eip = 0x05
-#
-
-#define BEEP \
-	inb	$97, %al; 	\
-	outb	%al, $0x80; 	\
-	movb	$3, %al; 	\
-	outb	%al, $97; 	\
-	outb	%al, $0x80; 	\
-	movb	$-74, %al; 	\
-	outb	%al, $67; 	\
-	outb	%al, $0x80; 	\
-	movb	$-119, %al; 	\
-	outb	%al, $66; 	\
-	outb	%al, $0x80; 	\
-	movb	$15, %al; 	\
-	outb	%al, $66;
-
-ALIGN
-	.align	4096
-ENTRY(wakeup_start)
-wakeup_code:
-	wakeup_code_start = .
-	.code16
-
-	cli
-	cld
-
-	# setup data segment
-	movw	%cs, %ax
-	movw	%ax, %ds					# Make ds:0 point to wakeup_start
-	movw	%ax, %ss
-
-	testl   $4, realmode_flags - wakeup_code
-	jz      1f
-	BEEP
-1:
-	mov	$(wakeup_stack - wakeup_code), %sp		# Private stack is needed for ASUS board
-
-	pushl	$0						# Kill any dangerous flags
-	popfl
-
-	movl	real_magic - wakeup_code, %eax
-	cmpl	$0x12345678, %eax
-	jne	bogus_real_magic
-
-	testl	$1, realmode_flags - wakeup_code
-	jz	1f
-	lcall   $0xc000,$3
-	movw	%cs, %ax
-	movw	%ax, %ds					# Bios might have played with that
-	movw	%ax, %ss
-1:
-
-	testl	$2, realmode_flags - wakeup_code
-	jz	1f
-	mov	video_mode - wakeup_code, %ax
-	call	mode_set
-1:
-
-	# set up page table
-	movl	$swsusp_pg_dir-__PAGE_OFFSET, %eax
-	movl	%eax, %cr3
-
-	testl	$1, real_efer_save_restore - wakeup_code
-	jz	4f
-	# restore efer setting
-	movl	real_save_efer_edx - wakeup_code, %edx
-	movl	real_save_efer_eax - wakeup_code, %eax
-	mov     $0xc0000080, %ecx
-	wrmsr
-4:
-	# make sure %cr4 is set correctly (features, etc)
-	movl	real_save_cr4 - wakeup_code, %eax
-	movl	%eax, %cr4
-	
-	# need a gdt -- use lgdtl to force 32-bit operands, in case
-	# the GDT is located past 16 megabytes.
-	lgdtl	real_save_gdt - wakeup_code
-
-	movl	real_save_cr0 - wakeup_code, %eax
-	movl	%eax, %cr0
-	jmp 1f
-1:
-	movl	real_magic - wakeup_code, %eax
-	cmpl	$0x12345678, %eax
-	jne	bogus_real_magic
-
-	testl   $8, realmode_flags - wakeup_code
-	jz      1f
-	BEEP
-1:
-	ljmpl	$__KERNEL_CS, $wakeup_pmode_return
-
-real_save_gdt:	.word 0
-		.long 0
-real_save_cr0:	.long 0
-real_save_cr3:	.long 0
-real_save_cr4:	.long 0
-real_magic:	.long 0
-video_mode:	.long 0
-realmode_flags:	.long 0
-real_efer_save_restore:	.long 0
-real_save_efer_edx: 	.long 0
-real_save_efer_eax: 	.long 0
-
-bogus_real_magic:
-	jmp bogus_real_magic
-
-/* This code uses an extended set of video mode numbers. These include:
- * Aliases for standard modes
- *	NORMAL_VGA (-1)
- *	EXTENDED_VGA (-2)
- *	ASK_VGA (-3)
- * Video modes numbered by menu position -- NOT RECOMMENDED because of lack
- * of compatibility when extending the table. These are between 0x00 and 0xff.
- */
-#define VIDEO_FIRST_MENU 0x0000
-
-/* Standard BIOS video modes (BIOS number + 0x0100) */
-#define VIDEO_FIRST_BIOS 0x0100
-
-/* VESA BIOS video modes (VESA number + 0x0200) */
-#define VIDEO_FIRST_VESA 0x0200
-
-/* Video7 special modes (BIOS number + 0x0900) */
-#define VIDEO_FIRST_V7 0x0900
-
-# Setting of user mode (AX=mode ID) => CF=success
-
-# For now, we only handle VESA modes (0x0200..0x03ff).  To handle other
-# modes, we should probably compile in the video code from the boot
-# directory.
-mode_set:
-	movw	%ax, %bx
-	subb	$VIDEO_FIRST_VESA>>8, %bh
-	cmpb	$2, %bh
-	jb	check_vesa
-
-setbad:
-	clc
-	ret
-
-check_vesa:
-	orw	$0x4000, %bx			# Use linear frame buffer
-	movw	$0x4f02, %ax			# VESA BIOS mode set call
-	int	$0x10
-	cmpw	$0x004f, %ax			# AL=4f if implemented
-	jnz	setbad				# AH=0 if OK
-
-	stc
-	ret
+# Copyright 2003, 2008 Pavel Machek <pavel@suse.cz>, distribute under GPLv2
 
 	.code32
 	ALIGN
 
-.org	0x800
-wakeup_stack_begin:	# Stack grows down
-
-.org	0xff0		# Just below end of page
-wakeup_stack:
-ENTRY(wakeup_end)
-	
-.org	0x1000
-
+ENTRY(wakeup_pmode_return)
 wakeup_pmode_return:
 	movw	$__KERNEL_DS, %ax
 	movw	%ax, %ss
@@ -187,7 +21,7 @@ wakeup_pmode_return:
 	lgdt	saved_gdt
 	lidt	saved_idt
 	lldt	saved_ldt
-	ljmp	$(__KERNEL_CS),$1f
+	ljmp	$(__KERNEL_CS), $1f
 1:
 	movl	%cr3, %eax
 	movl	%eax, %cr3
@@ -201,82 +35,41 @@ wakeup_pmode_return:
 	jne	bogus_magic
 
 	# jump to place where we left off
-	movl	saved_eip,%eax
+	movl	saved_eip, %eax
 	jmp	*%eax
 
 bogus_magic:
 	jmp	bogus_magic
 
 
-##
-# acpi_copy_wakeup_routine
-#
-# Copy the above routine to low memory.
-#
-# Parameters:
-# %eax:	place to copy wakeup routine to
-#
-# Returned address is location of code in low memory (past data and stack)
-#
-ENTRY(acpi_copy_wakeup_routine)
 
-	pushl	%ebx
+save_registers:
 	sgdt	saved_gdt
 	sidt	saved_idt
 	sldt	saved_ldt
 	str	saved_tss
 
-	movl	nx_enabled, %edx
-	movl	%edx, real_efer_save_restore - wakeup_start (%eax)
-	testl	$1, real_efer_save_restore - wakeup_start (%eax)
-	jz	2f
-	# save efer setting
-	pushl	%eax
-	movl	%eax, %ebx
-	mov     $0xc0000080, %ecx
-	rdmsr
-	movl	%edx, real_save_efer_edx - wakeup_start (%ebx)
-	movl	%eax, real_save_efer_eax - wakeup_start (%ebx)
-	popl	%eax
-2:
-
-	movl    %cr3, %edx
-	movl    %edx, real_save_cr3 - wakeup_start (%eax)
-	movl    %cr4, %edx
-	movl    %edx, real_save_cr4 - wakeup_start (%eax)
-	movl	%cr0, %edx
-	movl	%edx, real_save_cr0 - wakeup_start (%eax)
-	sgdt    real_save_gdt - wakeup_start (%eax)
-
-	movl	saved_videomode, %edx
-	movl	%edx, video_mode - wakeup_start (%eax)
-	movl	acpi_realmode_flags, %edx
-	movl	%edx, realmode_flags - wakeup_start (%eax)
-	movl	$0x12345678, real_magic - wakeup_start (%eax)
-	movl	$0x12345678, saved_magic
-	popl	%ebx
-	ret
-
-save_registers:
 	leal	4(%esp), %eax
 	movl	%eax, saved_context_esp
-	movl %ebx, saved_context_ebx
-	movl %ebp, saved_context_ebp
-	movl %esi, saved_context_esi
-	movl %edi, saved_context_edi
-	pushfl ; popl saved_context_eflags
-
-	movl $ret_point, saved_eip
+	movl	%ebx, saved_context_ebx
+	movl	%ebp, saved_context_ebp
+	movl	%esi, saved_context_esi
+	movl	%edi, saved_context_edi
+	pushfl
+	popl	saved_context_eflags
+
+	movl	$ret_point, saved_eip
 	ret
 
 
 restore_registers:
-	movl saved_context_ebp, %ebp
-	movl saved_context_ebx, %ebx
-	movl saved_context_esi, %esi
-	movl saved_context_edi, %edi
-	pushl saved_context_eflags ; popfl
-	ret	
+	movl	saved_context_ebp, %ebp
+	movl	saved_context_ebx, %ebx
+	movl	saved_context_esi, %esi
+	movl	saved_context_edi, %edi
+	pushl	saved_context_eflags
+	popfl
+	ret
 
 ENTRY(do_suspend_lowlevel)
 	call	save_processor_state
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index 2e1b9e0d0767..bcc293423a70 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -7,191 +7,18 @@
 #include <asm/asm-offsets.h>
 
 # Copyright 2003 Pavel Machek <pavel@suse.cz>, distribute under GPLv2
-#
-# wakeup_code runs in real mode, and at unknown address (determined at run-time).
-# Therefore it must only use relative jumps/calls. 
-#
-# Do we need to deal with A20? It is okay: ACPI specs says A20 must be enabled
-#
-# If physical address of wakeup_code is 0x12345, BIOS should call us with
-# cs = 0x1234, eip = 0x05
-#
-
-#define BEEP \
-	inb	$97, %al; 	\
-	outb	%al, $0x80; 	\
-	movb	$3, %al; 	\
-	outb	%al, $97; 	\
-	outb	%al, $0x80; 	\
-	movb	$-74, %al; 	\
-	outb	%al, $67; 	\
-	outb	%al, $0x80; 	\
-	movb	$-119, %al; 	\
-	outb	%al, $66; 	\
-	outb	%al, $0x80; 	\
-	movb	$15, %al; 	\
-	outb	%al, $66;
-
-
-ALIGN
-	.align	16
-ENTRY(wakeup_start)
-wakeup_code:
-	wakeup_code_start = .
-	.code16
-
-# Running in *copy* of this code, somewhere in low 1MB.
-
-	cli
-	cld
-	# setup data segment
-	movw	%cs, %ax
-	movw	%ax, %ds		# Make ds:0 point to wakeup_start
-	movw	%ax, %ss
-
-	# Data segment must be set up before we can see whether to beep.
-	testl   $4, realmode_flags - wakeup_code
-	jz      1f
-	BEEP
-1:
-
-					# Private stack is needed for ASUS board
-	mov	$(wakeup_stack - wakeup_code), %sp
-
-	pushl	$0			# Kill any dangerous flags
-	popfl
-
-	movl	real_magic - wakeup_code, %eax
-	cmpl	$0x12345678, %eax
-	jne	bogus_real_magic
-
-	testl	$1, realmode_flags - wakeup_code
-	jz	1f
-	lcall   $0xc000,$3
-	movw	%cs, %ax
-	movw	%ax, %ds		# Bios might have played with that
-	movw	%ax, %ss
-1:
-
-	testl	$2, realmode_flags - wakeup_code
-	jz	1f
-	mov	video_mode - wakeup_code, %ax
-	call	mode_set
-1:
-
-	mov	%ds, %ax			# Find 32bit wakeup_code addr
-	movzx   %ax, %esi			# (Convert %ds:gdt to a liner ptr)
-	shll    $4, %esi
-						# Fix up the vectors
-	addl    %esi, wakeup_32_vector - wakeup_code
-	addl    %esi, wakeup_long64_vector - wakeup_code
-	addl    %esi, gdt_48a + 2 - wakeup_code # Fixup the gdt pointer
-
-	lidtl	%ds:idt_48a - wakeup_code
-	lgdtl	%ds:gdt_48a - wakeup_code	# load gdt with whatever is
-						# appropriate
-
-	movl	$1, %eax			# protected mode (PE) bit
-	lmsw	%ax				# This is it!
-	jmp	1f
-1:
-
-	ljmpl   *(wakeup_32_vector - wakeup_code)
-
-	.balign 4
-wakeup_32_vector:
-	.long   wakeup_32 - wakeup_code
-	.word   __KERNEL32_CS, 0
-
-	.code32
-wakeup_32:
-# Running in this code, but at low address; paging is not yet turned on.
-
-	movl	$__KERNEL_DS, %eax
-	movl	%eax, %ds
-
-	/*
-	 * Prepare for entering 64bits mode
-	 */
-
-	/* Enable PAE */
-	xorl	%eax, %eax
-	btsl	$5, %eax
-	movl	%eax, %cr4
-
-	/* Setup early boot stage 4 level pagetables */
-	leal    (wakeup_level4_pgt - wakeup_code)(%esi), %eax
-	movl	%eax, %cr3
-
-        /* Check if nx is implemented */
-        movl    $0x80000001, %eax
-        cpuid
-        movl    %edx,%edi
-
-	/* Enable Long Mode */
-	xorl    %eax, %eax
-	btsl	$_EFER_LME, %eax
-
-	/* No Execute supported? */
-	btl	$20,%edi
-	jnc     1f
-	btsl	$_EFER_NX, %eax
-				
-	/* Make changes effective */
-1:	movl    $MSR_EFER, %ecx
-	xorl    %edx, %edx
-	wrmsr
-
-	xorl	%eax, %eax
-	btsl	$31, %eax			/* Enable paging and in turn activate Long Mode */
-	btsl	$0, %eax			/* Enable protected mode */
-
-	/* Make changes effective */
-	movl	%eax, %cr0
-
-	/* At this point:
-		CR4.PAE must be 1
-		CS.L must be 0
-		CR3 must point to PML4
-		Next instruction must be a branch
-		This must be on identity-mapped page
-	*/
-	/*
-	 * At this point we're in long mode but in 32bit compatibility mode
-	 * with EFER.LME = 1, CS.L = 0, CS.D = 1 (and in turn
-	 * EFER.LMA = 1). Now we want to jump in 64bit mode, to do that we load
-	 * the new gdt/idt that has __KERNEL_CS with CS.L = 1.
-	 */
-
-	/* Finally jump in 64bit mode */
-        ljmp    *(wakeup_long64_vector - wakeup_code)(%esi)
-
-	.balign 4
-wakeup_long64_vector:
-	.long   wakeup_long64 - wakeup_code
-	.word   __KERNEL_CS, 0
 
 .code64
-
-	/* Hooray, we are in Long 64-bit mode (but still running in
-	 * low memory)
-	 */
-wakeup_long64:
 	/*
-	 * We must switch to a new descriptor in kernel space for the GDT
-	 * because soon the kernel won't have access anymore to the userspace
-	 * addresses where we're currently running on. We have to do that here
-	 * because in 32bit we couldn't load a 64bit linear address.
+	 * Hooray, we are in Long 64-bit mode (but still running in low memory)
 	 */
-	lgdt	cpu_gdt_descr
-
-	movq    saved_magic, %rax
-	movq    $0x123456789abcdef0, %rdx
-	cmpq    %rdx, %rax
-	jne     bogus_64_magic
+ENTRY(wakeup_long64)
+wakeup_long64:
+	movq	saved_magic, %rax
+	movq	$0x123456789abcdef0, %rdx
+	cmpq	%rdx, %rax
+	jne	bogus_64_magic
 
-	nop
-	nop
 	movw	$__KERNEL_DS, %ax
 	movw	%ax, %ss	
 	movw	%ax, %ds
@@ -208,130 +35,8 @@ wakeup_long64:
 	movq	saved_rip, %rax
 	jmp	*%rax
 
-.code32
-
-	.align	64	
-gdta:
-	/* Its good to keep gdt in sync with one in trampoline.S */
-	.word	0, 0, 0, 0			# dummy
-	/* ??? Why I need the accessed bit set in order for this to work? */
-	.quad   0x00cf9b000000ffff              # __KERNEL32_CS
-	.quad   0x00af9b000000ffff              # __KERNEL_CS
-	.quad   0x00cf93000000ffff              # __KERNEL_DS
-
-idt_48a:
-	.word	0				# idt limit = 0
-	.word	0, 0				# idt base = 0L
-
-gdt_48a:
-	.word	0x800				# gdt limit=2048,
-						#  256 GDT entries
-	.long   gdta - wakeup_code              # gdt base (relocated in later)
-	
-real_magic:	.quad 0
-video_mode:	.quad 0
-realmode_flags:	.quad 0
-
-.code16
-bogus_real_magic:
-	jmp bogus_real_magic
-
-.code64
 bogus_64_magic:
-	jmp bogus_64_magic
-
-/* This code uses an extended set of video mode numbers. These include:
- * Aliases for standard modes
- *	NORMAL_VGA (-1)
- *	EXTENDED_VGA (-2)
- *	ASK_VGA (-3)
- * Video modes numbered by menu position -- NOT RECOMMENDED because of lack
- * of compatibility when extending the table. These are between 0x00 and 0xff.
- */
-#define VIDEO_FIRST_MENU 0x0000
-
-/* Standard BIOS video modes (BIOS number + 0x0100) */
-#define VIDEO_FIRST_BIOS 0x0100
-
-/* VESA BIOS video modes (VESA number + 0x0200) */
-#define VIDEO_FIRST_VESA 0x0200
-
-/* Video7 special modes (BIOS number + 0x0900) */
-#define VIDEO_FIRST_V7 0x0900
-
-# Setting of user mode (AX=mode ID) => CF=success
-
-# For now, we only handle VESA modes (0x0200..0x03ff).  To handle other
-# modes, we should probably compile in the video code from the boot
-# directory.
-.code16
-mode_set:
-	movw	%ax, %bx
-	subb	$VIDEO_FIRST_VESA>>8, %bh
-	cmpb	$2, %bh
-	jb	check_vesa
-
-setbad:
-	clc
-	ret
-
-check_vesa:
-	orw	$0x4000, %bx			# Use linear frame buffer
-	movw	$0x4f02, %ax			# VESA BIOS mode set call
-	int	$0x10
-	cmpw	$0x004f, %ax			# AL=4f if implemented
-	jnz	setbad				# AH=0 if OK
-
-	stc
-	ret
-
-wakeup_stack_begin:	# Stack grows down
-
-.org	0xff0
-wakeup_stack:		# Just below end of page
-
-.org   0x1000
-ENTRY(wakeup_level4_pgt)
-	.quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
-	.fill   510,8,0
-	/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
-	.quad   level3_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE
-
-ENTRY(wakeup_end)
-	
-##
-# acpi_copy_wakeup_routine
-#
-# Copy the above routine to low memory.
-#
-# Parameters:
-# %rdi:	place to copy wakeup routine to
-#
-# Returned address is location of code in low memory (past data and stack)
-#
-	.code64
-ENTRY(acpi_copy_wakeup_routine)
-	pushq	%rax
-	pushq	%rdx
-
-	movl	saved_video_mode, %edx
-	movl	%edx, video_mode - wakeup_start (,%rdi)
-	movl	acpi_realmode_flags, %edx
-	movl	%edx, realmode_flags - wakeup_start (,%rdi)
-	movq	$0x12345678, real_magic - wakeup_start (,%rdi)
-	movq	$0x123456789abcdef0, %rdx
-	movq	%rdx, saved_magic
-
-	movq    saved_magic, %rax
-	movq    $0x123456789abcdef0, %rdx
-	cmpq    %rdx, %rax
-	jne     bogus_64_magic
-
-	# restore the regs we used
-	popq	%rdx
-	popq	%rax
-ENTRY(do_suspend_lowlevel_s4bios)
-	ret
+	jmp	bogus_64_magic
 
 	.align 2
 	.p2align 4,,15
@@ -414,7 +119,7 @@ do_suspend_lowlevel:
 	jmp	restore_processor_state
 .LFE5:
 .Lfe5:
-	.size	do_suspend_lowlevel,.Lfe5-do_suspend_lowlevel
+	.size	do_suspend_lowlevel, .Lfe5-do_suspend_lowlevel
 	
 .data
 ALIGN
diff --git a/arch/x86/kernel/acpi/wakeup_rm.S b/arch/x86/kernel/acpi/wakeup_rm.S
new file mode 100644
index 000000000000..6ff3b5730575
--- /dev/null
+++ b/arch/x86/kernel/acpi/wakeup_rm.S
@@ -0,0 +1,10 @@
+/*
+ * Wrapper script for the realmode binary as a transport object
+ * before copying to low memory.
+ */
+	.section ".rodata","a"
+	.globl	wakeup_code_start, wakeup_code_end
+wakeup_code_start:
+	.incbin	"arch/x86/kernel/acpi/realmode/wakeup.bin"
+wakeup_code_end:
+	.size	wakeup_code_start, .-wakeup_code_start
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 5fed98ca0e1f..df4099dc1c68 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -11,6 +11,8 @@
 #include <asm/mce.h>
 #include <asm/nmi.h>
 #include <asm/vsyscall.h>
+#include <asm/cacheflush.h>
+#include <asm/io.h>
 
 #define MAX_PATCH_LEN (255-1)
 
@@ -177,7 +179,7 @@ static const unsigned char*const * find_nop_table(void)
 #endif /* CONFIG_X86_64 */
 
 /* Use this to add nops to a buffer, then text_poke the whole buffer. */
-static void add_nops(void *insns, unsigned int len)
+void add_nops(void *insns, unsigned int len)
 {
 	const unsigned char *const *noptable = find_nop_table();
 
@@ -190,6 +192,7 @@ static void add_nops(void *insns, unsigned int len)
 		len -= noplen;
 	}
 }
+EXPORT_SYMBOL_GPL(add_nops);
 
 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
 extern u8 *__smp_locks[], *__smp_locks_end[];
@@ -205,7 +208,7 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
 	struct alt_instr *a;
 	char insnbuf[MAX_PATCH_LEN];
 
-	DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end);
+	DPRINTK("%s: alt table %p -> %p\n", __func__, start, end);
 	for (a = start; a < end; a++) {
 		u8 *instr = a->instr;
 		BUG_ON(a->replacementlen > a->instrlen);
@@ -217,13 +220,13 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
 		if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) {
 			instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0));
 			DPRINTK("%s: vsyscall fixup: %p => %p\n",
-				__FUNCTION__, a->instr, instr);
+				__func__, a->instr, instr);
 		}
 #endif
 		memcpy(insnbuf, a->replacement, a->replacementlen);
 		add_nops(insnbuf + a->replacementlen,
 			 a->instrlen - a->replacementlen);
-		text_poke(instr, insnbuf, a->instrlen);
+		text_poke_early(instr, insnbuf, a->instrlen);
 	}
 }
 
@@ -284,7 +287,6 @@ void alternatives_smp_module_add(struct module *mod, char *name,
 				 void *text,  void *text_end)
 {
 	struct smp_alt_module *smp;
-	unsigned long flags;
 
 	if (noreplace_smp)
 		return;
@@ -307,42 +309,40 @@ void alternatives_smp_module_add(struct module *mod, char *name,
 	smp->text	= text;
 	smp->text_end	= text_end;
 	DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n",
-		__FUNCTION__, smp->locks, smp->locks_end,
+		__func__, smp->locks, smp->locks_end,
 		smp->text, smp->text_end, smp->name);
 
-	spin_lock_irqsave(&smp_alt, flags);
+	spin_lock(&smp_alt);
 	list_add_tail(&smp->next, &smp_alt_modules);
 	if (boot_cpu_has(X86_FEATURE_UP))
 		alternatives_smp_unlock(smp->locks, smp->locks_end,
 					smp->text, smp->text_end);
-	spin_unlock_irqrestore(&smp_alt, flags);
+	spin_unlock(&smp_alt);
 }
 
 void alternatives_smp_module_del(struct module *mod)
 {
 	struct smp_alt_module *item;
-	unsigned long flags;
 
 	if (smp_alt_once || noreplace_smp)
 		return;
 
-	spin_lock_irqsave(&smp_alt, flags);
+	spin_lock(&smp_alt);
 	list_for_each_entry(item, &smp_alt_modules, next) {
 		if (mod != item->mod)
 			continue;
 		list_del(&item->next);
-		spin_unlock_irqrestore(&smp_alt, flags);
-		DPRINTK("%s: %s\n", __FUNCTION__, item->name);
+		spin_unlock(&smp_alt);
+		DPRINTK("%s: %s\n", __func__, item->name);
 		kfree(item);
 		return;
 	}
-	spin_unlock_irqrestore(&smp_alt, flags);
+	spin_unlock(&smp_alt);
 }
 
 void alternatives_smp_switch(int smp)
 {
 	struct smp_alt_module *mod;
-	unsigned long flags;
 
 #ifdef CONFIG_LOCKDEP
 	/*
@@ -359,7 +359,7 @@ void alternatives_smp_switch(int smp)
 		return;
 	BUG_ON(!smp && (num_online_cpus() > 1));
 
-	spin_lock_irqsave(&smp_alt, flags);
+	spin_lock(&smp_alt);
 
 	/*
 	 * Avoid unnecessary switches because it forces JIT based VMs to
@@ -383,7 +383,7 @@ void alternatives_smp_switch(int smp)
 						mod->text, mod->text_end);
 	}
 	smp_mode = smp;
-	spin_unlock_irqrestore(&smp_alt, flags);
+	spin_unlock(&smp_alt);
 }
 
 #endif
@@ -411,7 +411,7 @@ void apply_paravirt(struct paravirt_patch_site *start,
 
 		/* Pad the rest with nops */
 		add_nops(insnbuf + used, p->len - used);
-		text_poke(p->instr, insnbuf, p->len);
+		text_poke_early(p->instr, insnbuf, p->len);
 	}
 }
 extern struct paravirt_patch_site __start_parainstructions[],
@@ -420,8 +420,6 @@ extern struct paravirt_patch_site __start_parainstructions[],
 
 void __init alternative_instructions(void)
 {
-	unsigned long flags;
-
 	/* The patching is not fully atomic, so try to avoid local interruptions
 	   that might execute the to be patched code.
 	   Other CPUs are not running. */
@@ -430,7 +428,6 @@ void __init alternative_instructions(void)
 	stop_mce();
 #endif
 
-	local_irq_save(flags);
 	apply_alternatives(__alt_instructions, __alt_instructions_end);
 
 	/* switch to patch-once-at-boottime-only mode and free the
@@ -462,7 +459,6 @@ void __init alternative_instructions(void)
 	}
 #endif
  	apply_paravirt(__parainstructions, __parainstructions_end);
-	local_irq_restore(flags);
 
 	if (smp_alt_once)
 		free_init_pages("SMP alternatives",
@@ -475,18 +471,71 @@ void __init alternative_instructions(void)
 #endif
 }
 
-/*
- * Warning:
+/**
+ * text_poke_early - Update instructions on a live kernel at boot time
+ * @addr: address to modify
+ * @opcode: source of the copy
+ * @len: length to copy
+ *
  * When you use this code to patch more than one byte of an instruction
  * you need to make sure that other CPUs cannot execute this code in parallel.
- * Also no thread must be currently preempted in the middle of these instructions.
- * And on the local CPU you need to be protected again NMI or MCE handlers
- * seeing an inconsistent instruction while you patch.
+ * Also no thread must be currently preempted in the middle of these
+ * instructions. And on the local CPU you need to be protected again NMI or MCE
+ * handlers seeing an inconsistent instruction while you patch.
  */
-void __kprobes text_poke(void *addr, unsigned char *opcode, int len)
+void *text_poke_early(void *addr, const void *opcode, size_t len)
 {
+	unsigned long flags;
+	local_irq_save(flags);
 	memcpy(addr, opcode, len);
+	local_irq_restore(flags);
+	sync_core();
+	/* Could also do a CLFLUSH here to speed up CPU recovery; but
+	   that causes hangs on some VIA CPUs. */
+	return addr;
+}
+
+/**
+ * text_poke - Update instructions on a live kernel
+ * @addr: address to modify
+ * @opcode: source of the copy
+ * @len: length to copy
+ *
+ * Only atomic text poke/set should be allowed when not doing early patching.
+ * It means the size must be writable atomically and the address must be aligned
+ * in a way that permits an atomic write. It also makes sure we fit on a single
+ * page.
+ */
+void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
+{
+	unsigned long flags;
+	char *vaddr;
+	int nr_pages = 2;
+
+	BUG_ON(len > sizeof(long));
+	BUG_ON((((long)addr + len - 1) & ~(sizeof(long) - 1))
+		- ((long)addr & ~(sizeof(long) - 1)));
+	if (kernel_text_address((unsigned long)addr)) {
+		struct page *pages[2] = { virt_to_page(addr),
+			virt_to_page(addr + PAGE_SIZE) };
+		if (!pages[1])
+			nr_pages = 1;
+		vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
+		BUG_ON(!vaddr);
+		local_irq_save(flags);
+		memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
+		local_irq_restore(flags);
+		vunmap(vaddr);
+	} else {
+		/*
+		 * modules are in vmalloc'ed memory, always writable.
+		 */
+		local_irq_save(flags);
+		memcpy(addr, opcode, len);
+		local_irq_restore(flags);
+	}
 	sync_core();
 	/* Could also do a CLFLUSH here to speed up CPU recovery; but
 	   that causes hangs on some VIA CPUs. */
+	return addr;
 }
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index 00df126169b4..479926d9e004 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -27,11 +27,11 @@
 #include <asm/k8.h>
 
 int gart_iommu_aperture;
-int gart_iommu_aperture_disabled __initdata = 0;
-int gart_iommu_aperture_allowed __initdata = 0;
+int gart_iommu_aperture_disabled __initdata;
+int gart_iommu_aperture_allowed __initdata;
 
 int fallback_aper_order __initdata = 1; /* 64MB */
-int fallback_aper_force __initdata = 0;
+int fallback_aper_force __initdata;
 
 int fix_aperture __initdata = 1;
 
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 35a568ea8400..687208190b06 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -50,6 +50,11 @@
 # error SPURIOUS_APIC_VECTOR definition error
 #endif
 
+unsigned long mp_lapic_addr;
+
+DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID;
+EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
+
 /*
  * Knob to control our willingness to enable the local APIC.
  *
@@ -621,6 +626,35 @@ int setup_profiling_timer(unsigned int multiplier)
 }
 
 /*
+ * Setup extended LVT, AMD specific (K8, family 10h)
+ *
+ * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
+ * MCE interrupts are supported. Thus MCE offset must be set to 0.
+ */
+
+#define APIC_EILVT_LVTOFF_MCE 0
+#define APIC_EILVT_LVTOFF_IBS 1
+
+static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask)
+{
+	unsigned long reg = (lvt_off << 4) + APIC_EILVT0;
+	unsigned int  v   = (mask << 16) | (msg_type << 8) | vector;
+	apic_write(reg, v);
+}
+
+u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask)
+{
+	setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask);
+	return APIC_EILVT_LVTOFF_MCE;
+}
+
+u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
+{
+	setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
+	return APIC_EILVT_LVTOFF_IBS;
+}
+
+/*
  * Local APIC start and shutdown
  */
 
@@ -868,12 +902,50 @@ void __init init_bsp_APIC(void)
 	apic_write_around(APIC_LVT1, value);
 }
 
+void __cpuinit lapic_setup_esr(void)
+{
+	unsigned long oldvalue, value, maxlvt;
+	if (lapic_is_integrated() && !esr_disable) {
+		/* !82489DX */
+		maxlvt = lapic_get_maxlvt();
+		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP. */
+			apic_write(APIC_ESR, 0);
+		oldvalue = apic_read(APIC_ESR);
+
+		/* enables sending errors */
+		value = ERROR_APIC_VECTOR;
+		apic_write_around(APIC_LVTERR, value);
+		/*
+		 * spec says clear errors after enabling vector.
+		 */
+		if (maxlvt > 3)
+			apic_write(APIC_ESR, 0);
+		value = apic_read(APIC_ESR);
+		if (value != oldvalue)
+			apic_printk(APIC_VERBOSE, "ESR value before enabling "
+				"vector: 0x%08lx  after: 0x%08lx\n",
+				oldvalue, value);
+	} else {
+		if (esr_disable)
+			/*
+			 * Something untraceable is creating bad interrupts on
+			 * secondary quads ... for the moment, just leave the
+			 * ESR disabled - we can't do anything useful with the
+			 * errors anyway - mbligh
+			 */
+			printk(KERN_INFO "Leaving ESR disabled.\n");
+		else
+			printk(KERN_INFO "No ESR for 82489DX.\n");
+	}
+}
+
+
 /**
  * setup_local_APIC - setup the local APIC
  */
 void __cpuinit setup_local_APIC(void)
 {
-	unsigned long oldvalue, value, maxlvt, integrated;
+	unsigned long value, integrated;
 	int i, j;
 
 	/* Pound the ESR really hard over the head with a big hammer - mbligh */
@@ -997,40 +1069,13 @@ void __cpuinit setup_local_APIC(void)
 	if (!integrated)		/* 82489DX */
 		value |= APIC_LVT_LEVEL_TRIGGER;
 	apic_write_around(APIC_LVT1, value);
+}
 
-	if (integrated && !esr_disable) {
-		/* !82489DX */
-		maxlvt = lapic_get_maxlvt();
-		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP. */
-			apic_write(APIC_ESR, 0);
-		oldvalue = apic_read(APIC_ESR);
-
-		/* enables sending errors */
-		value = ERROR_APIC_VECTOR;
-		apic_write_around(APIC_LVTERR, value);
-		/*
-		 * spec says clear errors after enabling vector.
-		 */
-		if (maxlvt > 3)
-			apic_write(APIC_ESR, 0);
-		value = apic_read(APIC_ESR);
-		if (value != oldvalue)
-			apic_printk(APIC_VERBOSE, "ESR value before enabling "
-				"vector: 0x%08lx  after: 0x%08lx\n",
-				oldvalue, value);
-	} else {
-		if (esr_disable)
-			/*
-			 * Something untraceable is creating bad interrupts on
-			 * secondary quads ... for the moment, just leave the
-			 * ESR disabled - we can't do anything useful with the
-			 * errors anyway - mbligh
-			 */
-			printk(KERN_INFO "Leaving ESR disabled.\n");
-		else
-			printk(KERN_INFO "No ESR for 82489DX.\n");
-	}
+void __cpuinit end_local_APIC_setup(void)
+{
+	unsigned long value;
 
+	lapic_setup_esr();
 	/* Disable the local apic timer */
 	value = apic_read(APIC_LVTT);
 	value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
@@ -1147,7 +1192,7 @@ void __init init_apic_mappings(void)
 	 * default configuration (or the MP table is broken).
 	 */
 	if (boot_cpu_physical_apicid == -1U)
-		boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
+		boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
 
 #ifdef CONFIG_X86_IO_APIC
 	{
@@ -1185,6 +1230,9 @@ fake_ioapic_page:
  * This initializes the IO-APIC and APIC hardware if this is
  * a UP kernel.
  */
+
+int apic_version[MAX_APICS];
+
 int __init APIC_init_uniprocessor(void)
 {
 	if (enable_local_apic < 0)
@@ -1214,12 +1262,13 @@ int __init APIC_init_uniprocessor(void)
 	 * might be zero if read from MP tables. Get it from LAPIC.
 	 */
 #ifdef CONFIG_CRASH_DUMP
-	boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
+	boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
 #endif
 	phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
 
 	setup_local_APIC();
 
+	end_local_APIC_setup();
 #ifdef CONFIG_X86_IO_APIC
 	if (smp_found_config)
 		if (!skip_ioapic_setup && nr_ioapics)
@@ -1288,6 +1337,29 @@ void smp_error_interrupt(struct pt_regs *regs)
 	irq_exit();
 }
 
+#ifdef CONFIG_SMP
+void __init smp_intr_init(void)
+{
+	/*
+	 * IRQ0 must be given a fixed assignment and initialized,
+	 * because it's used before the IO-APIC is set up.
+	 */
+	set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
+
+	/*
+	 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
+	 * IPI, driven by wakeup.
+	 */
+	set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
+
+	/* IPI for invalidation */
+	set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
+
+	/* IPI for generic function call */
+	set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
+}
+#endif
+
 /*
  * Initialize APIC interrupts
  */
@@ -1394,6 +1466,88 @@ void disconnect_bsp_APIC(int virt_wire_setup)
 	}
 }
 
+unsigned int __cpuinitdata maxcpus = NR_CPUS;
+
+void __cpuinit generic_processor_info(int apicid, int version)
+{
+	int cpu;
+	cpumask_t tmp_map;
+	physid_mask_t phys_cpu;
+
+	/*
+	 * Validate version
+	 */
+	if (version == 0x0) {
+		printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! "
+				"fixing up to 0x10. (tell your hw vendor)\n",
+				version);
+		version = 0x10;
+	}
+	apic_version[apicid] = version;
+
+	phys_cpu = apicid_to_cpu_present(apicid);
+	physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu);
+
+	if (num_processors >= NR_CPUS) {
+		printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
+			"  Processor ignored.\n", NR_CPUS);
+		return;
+	}
+
+	if (num_processors >= maxcpus) {
+		printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
+			" Processor ignored.\n", maxcpus);
+		return;
+	}
+
+	num_processors++;
+	cpus_complement(tmp_map, cpu_present_map);
+	cpu = first_cpu(tmp_map);
+
+	if (apicid == boot_cpu_physical_apicid)
+		/*
+		 * x86_bios_cpu_apicid is required to have processors listed
+		 * in same order as logical cpu numbers. Hence the first
+		 * entry is BSP, and so on.
+		 */
+		cpu = 0;
+
+	/*
+	 * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
+	 * but we need to work other dependencies like SMP_SUSPEND etc
+	 * before this can be done without some confusion.
+	 * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
+	 *       - Ashok Raj <ashok.raj@intel.com>
+	 */
+	if (num_processors > 8) {
+		switch (boot_cpu_data.x86_vendor) {
+		case X86_VENDOR_INTEL:
+			if (!APIC_XAPIC(version)) {
+				def_to_bigsmp = 0;
+				break;
+			}
+			/* If P4 and above fall through */
+		case X86_VENDOR_AMD:
+			def_to_bigsmp = 1;
+		}
+	}
+#ifdef CONFIG_SMP
+	/* are we being called early in kernel startup? */
+	if (x86_cpu_to_apicid_early_ptr) {
+		u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr;
+		u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr;
+
+		cpu_to_apicid[cpu] = apicid;
+		bios_cpu_apicid[cpu] = apicid;
+	} else {
+		per_cpu(x86_cpu_to_apicid, cpu) = apicid;
+		per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
+	}
+#endif
+	cpu_set(cpu, cpu_possible_map);
+	cpu_set(cpu, cpu_present_map);
+}
+
 /*
  * Power management
  */
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index d8d03e09dea2..9e8e5c050c55 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -34,13 +34,15 @@
 #include <asm/mpspec.h>
 #include <asm/hpet.h>
 #include <asm/pgalloc.h>
-#include <asm/mach_apic.h>
 #include <asm/nmi.h>
 #include <asm/idle.h>
 #include <asm/proto.h>
 #include <asm/timex.h>
 #include <asm/apic.h>
 
+#include <mach_ipi.h>
+#include <mach_apic.h>
+
 int disable_apic_timer __cpuinitdata;
 static int apic_calibrate_pmtmr __initdata;
 int disable_apic;
@@ -83,6 +85,12 @@ static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
 
 static unsigned long apic_phys;
 
+unsigned long mp_lapic_addr;
+
+DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID;
+EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
+
+unsigned int __cpuinitdata maxcpus = NR_CPUS;
 /*
  * Get the LAPIC version
  */
@@ -431,7 +439,8 @@ void __cpuinit check_boot_apic_timer_broadcast(void)
 	lapic_clockevent.features |= CLOCK_EVT_FEAT_DUMMY;
 
 	local_irq_enable();
-	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, &boot_cpu_id);
+	clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE,
+			   &boot_cpu_physical_apicid);
 	local_irq_disable();
 }
 
@@ -640,10 +649,10 @@ int __init verify_local_APIC(void)
 	/*
 	 * The ID register is read/write in a real APIC.
 	 */
-	reg0 = apic_read(APIC_ID);
+	reg0 = read_apic_id();
 	apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
 	apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
-	reg1 = apic_read(APIC_ID);
+	reg1 = read_apic_id();
 	apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
 	apic_write(APIC_ID, reg0);
 	if (reg1 != (reg0 ^ APIC_ID_MASK))
@@ -728,6 +737,7 @@ void __cpuinit setup_local_APIC(void)
 	unsigned int value;
 	int i, j;
 
+	preempt_disable();
 	value = apic_read(APIC_LVR);
 
 	BUILD_BUG_ON((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f);
@@ -821,6 +831,7 @@ void __cpuinit setup_local_APIC(void)
 	else
 		value = APIC_DM_NMI | APIC_LVT_MASKED;
 	apic_write(APIC_LVT1, value);
+	preempt_enable();
 }
 
 void __cpuinit lapic_setup_esr(void)
@@ -857,10 +868,34 @@ static int __init detect_init_APIC(void)
 	}
 
 	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-	boot_cpu_id = 0;
+	boot_cpu_physical_apicid = 0;
 	return 0;
 }
 
+void __init early_init_lapic_mapping(void)
+{
+	unsigned long apic_phys;
+
+	/*
+	 * If no local APIC can be found then go out
+	 * : it means there is no mpatable and MADT
+	 */
+	if (!smp_found_config)
+		return;
+
+	apic_phys = mp_lapic_addr;
+
+	set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
+	apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
+				 APIC_BASE, apic_phys);
+
+	/*
+	 * Fetch the APIC ID of the BSP in case we have a
+	 * default configuration (or the MP table is broken).
+	 */
+	boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+}
+
 /**
  * init_apic_mappings - initialize APIC mappings
  */
@@ -881,16 +916,11 @@ void __init init_apic_mappings(void)
 	apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
 				APIC_BASE, apic_phys);
 
-	/* Put local APIC into the resource map. */
-	lapic_resource.start = apic_phys;
-	lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
-	insert_resource(&iomem_resource, &lapic_resource);
-
 	/*
 	 * Fetch the APIC ID of the BSP in case we have a
 	 * default configuration (or the MP table is broken).
 	 */
-	boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID));
+	boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
 }
 
 /*
@@ -911,8 +941,8 @@ int __init APIC_init_uniprocessor(void)
 
 	verify_local_APIC();
 
-	phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id);
-	apic_write(APIC_ID, SET_APIC_ID(boot_cpu_id));
+	phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
+	apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
 
 	setup_local_APIC();
 
@@ -1029,6 +1059,52 @@ void disconnect_bsp_APIC(int virt_wire_setup)
 	apic_write(APIC_LVT1, value);
 }
 
+void __cpuinit generic_processor_info(int apicid, int version)
+{
+	int cpu;
+	cpumask_t tmp_map;
+
+	if (num_processors >= NR_CPUS) {
+		printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
+		       " Processor ignored.\n", NR_CPUS);
+		return;
+	}
+
+	if (num_processors >= maxcpus) {
+		printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
+		       " Processor ignored.\n", maxcpus);
+		return;
+	}
+
+	num_processors++;
+	cpus_complement(tmp_map, cpu_present_map);
+	cpu = first_cpu(tmp_map);
+
+	physid_set(apicid, phys_cpu_present_map);
+	if (apicid == boot_cpu_physical_apicid) {
+		/*
+		 * x86_bios_cpu_apicid is required to have processors listed
+		 * in same order as logical cpu numbers. Hence the first
+		 * entry is BSP, and so on.
+		 */
+		cpu = 0;
+	}
+	/* are we being called early in kernel startup? */
+	if (x86_cpu_to_apicid_early_ptr) {
+		u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr;
+		u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr;
+
+		cpu_to_apicid[cpu] = apicid;
+		bios_cpu_apicid[cpu] = apicid;
+	} else {
+		per_cpu(x86_cpu_to_apicid, cpu) = apicid;
+		per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
+	}
+
+	cpu_set(cpu, cpu_possible_map);
+	cpu_set(cpu, cpu_present_map);
+}
+
 /*
  * Power management
  */
@@ -1065,7 +1141,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
 
 	maxlvt = lapic_get_maxlvt();
 
-	apic_pm_state.apic_id = apic_read(APIC_ID);
+	apic_pm_state.apic_id = read_apic_id();
 	apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
 	apic_pm_state.apic_ldr = apic_read(APIC_LDR);
 	apic_pm_state.apic_dfr = apic_read(APIC_DFR);
@@ -1180,9 +1256,19 @@ __cpuinit int apic_is_clustered_box(void)
 {
 	int i, clusters, zeros;
 	unsigned id;
-	u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr;
+	u16 *bios_cpu_apicid;
 	DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
 
+	/*
+	 * there is not this kind of box with AMD CPU yet.
+	 * Some AMD box with quadcore cpu and 8 sockets apicid
+	 * will be [4, 0x23] or [8, 0x27] could be thought to
+	 * vsmp box still need checking...
+	 */
+	if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box())
+		return 0;
+
+	bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr;
 	bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
 
 	for (i = 0; i < NR_CPUS; i++) {
@@ -1219,6 +1305,12 @@ __cpuinit int apic_is_clustered_box(void)
 			++zeros;
 	}
 
+	/* ScaleMP vSMPowered boxes have one cluster per board and TSCs are
+	 * not guaranteed to be synced between boards
+	 */
+	if (is_vsmp_box() && clusters > 1)
+		return 1;
+
 	/*
 	 * If clusters > 2, then should be multi-chassis.
 	 * May have to revisit this when multi-core + hyperthreaded CPUs come
@@ -1290,3 +1382,21 @@ static __init int setup_apicpmtimer(char *s)
 }
 __setup("apicpmtimer", setup_apicpmtimer);
 
+static int __init lapic_insert_resource(void)
+{
+	if (!apic_phys)
+		return -1;
+
+	/* Put local APIC into the resource map. */
+	lapic_resource.start = apic_phys;
+	lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
+	insert_resource(&iomem_resource, &lapic_resource);
+
+	return 0;
+}
+
+/*
+ * need call insert after e820_reserve_resources()
+ * that is using request_resource
+ */
+late_initcall(lapic_insert_resource);
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index d4438ef296d8..f0030a0999c7 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -2217,7 +2217,6 @@ static struct dmi_system_id __initdata apm_dmi_table[] = {
  */
 static int __init apm_init(void)
 {
-	struct proc_dir_entry *apm_proc;
 	struct desc_struct *gdt;
 	int err;
 
@@ -2322,9 +2321,7 @@ static int __init apm_init(void)
 	set_base(gdt[APM_DS >> 3],
 		 __va((unsigned long)apm_info.bios.dseg << 4));
 
-	apm_proc = create_proc_entry("apm", 0, NULL);
-	if (apm_proc)
-		apm_proc->proc_fops = &apm_file_ops;
+	proc_create("apm", 0, NULL, &apm_file_ops);
 
 	kapmd_task = kthread_create(apm, NULL, "kapmd");
 	if (IS_ERR(kapmd_task)) {
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 8ea040124f7d..670c3c311289 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -10,7 +10,7 @@
 #include <linux/personality.h>
 #include <linux/suspend.h>
 #include <asm/ucontext.h>
-#include "sigframe_32.h"
+#include "sigframe.h"
 #include <asm/pgtable.h>
 #include <asm/fixmap.h>
 #include <asm/processor.h>
diff --git a/arch/x86/kernel/bugs_64.c b/arch/x86/kernel/bugs_64.c
index 8f520f93ffd4..9a3ed0649d4e 100644
--- a/arch/x86/kernel/bugs_64.c
+++ b/arch/x86/kernel/bugs_64.c
@@ -9,13 +9,25 @@
 #include <asm/bugs.h>
 #include <asm/processor.h>
 #include <asm/mtrr.h>
+#include <asm/cacheflush.h>
 
 void __init check_bugs(void)
 {
-	identify_cpu(&boot_cpu_data);
+	identify_boot_cpu();
 #if !defined(CONFIG_SMP)
 	printk("CPU: ");
 	print_cpu_info(&boot_cpu_data);
 #endif
 	alternative_instructions();
+
+	/*
+	 * Make sure the first 2MB area is not mapped by huge pages
+	 * There are typically fixed size MTRRs in there and overlapping
+	 * MTRRs into large pages causes slow downs.
+	 *
+	 * Right now we don't do that with gbpages because there seems
+	 * very little benefit for that case.
+	 */
+	if (!direct_gbpages)
+		set_memory_4k((unsigned long)__va(0), 1);
 }
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index a0c4d7c5dbd7..ee7c45235e54 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -3,9 +3,9 @@
 #
 
 obj-y			:= intel_cacheinfo.o addon_cpuid_features.o
-obj-y			+= feature_names.o
+obj-y			+= proc.o feature_names.o
 
-obj-$(CONFIG_X86_32)	+= common.o proc.o bugs.o
+obj-$(CONFIG_X86_32)	+= common.o bugs.o
 obj-$(CONFIG_X86_32)	+= amd.o
 obj-$(CONFIG_X86_32)	+= cyrix.o
 obj-$(CONFIG_X86_32)	+= centaur.o
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 693e353999cd..0173065dc3b7 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -4,8 +4,8 @@
 #include <asm/io.h>
 #include <asm/processor.h>
 #include <asm/apic.h>
-#include <asm/mach_apic.h>
 
+#include <mach_apic.h>
 #include "cpu.h"
 
 /*
@@ -20,7 +20,7 @@
  *	the chip setting when fixing the bug but they also tweaked some
  *	performance at the same time..
  */
- 
+
 extern void vide(void);
 __asm__(".align 4\nvide: ret");
 
@@ -63,12 +63,12 @@ static __cpuinit int amd_apic_timer_broken(void)
 
 int force_mwait __cpuinitdata;
 
-void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
+static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
 {
 	if (cpuid_eax(0x80000000) >= 0x80000007) {
 		c->x86_power = cpuid_edx(0x80000007);
 		if (c->x86_power & (1<<8))
-			set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
+			set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
 	}
 }
 
@@ -81,7 +81,8 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 #ifdef CONFIG_SMP
 	unsigned long long value;
 
-	/* Disable TLB flush filter by setting HWCR.FFDIS on K8
+	/*
+	 * Disable TLB flush filter by setting HWCR.FFDIS on K8
 	 * bit 6 of msr C001_0015
 	 *
 	 * Errata 63 for SH-B3 steppings
@@ -102,15 +103,16 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 	 *	no bus pipeline)
 	 */
 
-	/* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
-	   3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
-	clear_bit(0*32+31, c->x86_capability);
-	
+	/*
+	 * Bit 31 in normal CPUID used for nonstandard 3DNow ID;
+	 * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway
+	 */
+	clear_cpu_cap(c, 0*32+31);
+
 	r = get_model_name(c);
 
-	switch(c->x86)
-	{
-		case 4:
+	switch (c->x86) {
+	case 4:
 		/*
 		 * General Systems BIOSen alias the cpu frequency registers
 		 * of the Elan at 0x000df000. Unfortuantly, one of the Linux
@@ -120,61 +122,60 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 #define CBAR		(0xfffc) /* Configuration Base Address  (32-bit) */
 #define CBAR_ENB	(0x80000000)
 #define CBAR_KEY	(0X000000CB)
-			if (c->x86_model==9 || c->x86_model == 10) {
+			if (c->x86_model == 9 || c->x86_model == 10) {
 				if (inl (CBAR) & CBAR_ENB)
 					outl (0 | CBAR_KEY, CBAR);
 			}
 			break;
-		case 5:
-			if( c->x86_model < 6 )
-			{
+	case 5:
+			if (c->x86_model < 6) {
 				/* Based on AMD doc 20734R - June 2000 */
-				if ( c->x86_model == 0 ) {
-					clear_bit(X86_FEATURE_APIC, c->x86_capability);
-					set_bit(X86_FEATURE_PGE, c->x86_capability);
+				if (c->x86_model == 0) {
+					clear_cpu_cap(c, X86_FEATURE_APIC);
+					set_cpu_cap(c, X86_FEATURE_PGE);
 				}
 				break;
 			}
-			
-			if ( c->x86_model == 6 && c->x86_mask == 1 ) {
+
+			if (c->x86_model == 6 && c->x86_mask == 1) {
 				const int K6_BUG_LOOP = 1000000;
 				int n;
 				void (*f_vide)(void);
 				unsigned long d, d2;
-				
+
 				printk(KERN_INFO "AMD K6 stepping B detected - ");
-				
+
 				/*
-				 * It looks like AMD fixed the 2.6.2 bug and improved indirect 
+				 * It looks like AMD fixed the 2.6.2 bug and improved indirect
 				 * calls at the same time.
 				 */
 
 				n = K6_BUG_LOOP;
 				f_vide = vide;
 				rdtscl(d);
-				while (n--) 
+				while (n--)
 					f_vide();
 				rdtscl(d2);
 				d = d2-d;
 
-				if (d > 20*K6_BUG_LOOP) 
+				if (d > 20*K6_BUG_LOOP)
 					printk("system stability may be impaired when more than 32 MB are used.\n");
-				else 
+				else
 					printk("probably OK (after B9730xxxx).\n");
 				printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n");
 			}
 
 			/* K6 with old style WHCR */
 			if (c->x86_model < 8 ||
-			   (c->x86_model== 8 && c->x86_mask < 8)) {
+			   (c->x86_model == 8 && c->x86_mask < 8)) {
 				/* We can only write allocate on the low 508Mb */
-				if(mbytes>508)
-					mbytes=508;
+				if (mbytes > 508)
+					mbytes = 508;
 
 				rdmsr(MSR_K6_WHCR, l, h);
-				if ((l&0x0000FFFF)==0) {
+				if ((l&0x0000FFFF) == 0) {
 					unsigned long flags;
-					l=(1<<0)|((mbytes/4)<<1);
+					l = (1<<0)|((mbytes/4)<<1);
 					local_irq_save(flags);
 					wbinvd();
 					wrmsr(MSR_K6_WHCR, l, h);
@@ -185,17 +186,17 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 				break;
 			}
 
-			if ((c->x86_model == 8 && c->x86_mask >7) ||
+			if ((c->x86_model == 8 && c->x86_mask > 7) ||
 			     c->x86_model == 9 || c->x86_model == 13) {
 				/* The more serious chips .. */
 
-				if(mbytes>4092)
-					mbytes=4092;
+				if (mbytes > 4092)
+					mbytes = 4092;
 
 				rdmsr(MSR_K6_WHCR, l, h);
-				if ((l&0xFFFF0000)==0) {
+				if ((l&0xFFFF0000) == 0) {
 					unsigned long flags;
-					l=((mbytes>>2)<<22)|(1<<16);
+					l = ((mbytes>>2)<<22)|(1<<16);
 					local_irq_save(flags);
 					wbinvd();
 					wrmsr(MSR_K6_WHCR, l, h);
@@ -207,7 +208,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 				/*  Set MTRR capability flag if appropriate */
 				if (c->x86_model == 13 || c->x86_model == 9 ||
 				   (c->x86_model == 8 && c->x86_mask >= 8))
-					set_bit(X86_FEATURE_K6_MTRR, c->x86_capability);
+					set_cpu_cap(c, X86_FEATURE_K6_MTRR);
 				break;
 			}
 
@@ -217,10 +218,11 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 				break;
 			}
 			break;
-		case 6: /* An Athlon/Duron */
- 
-			/* Bit 15 of Athlon specific MSR 15, needs to be 0
- 			 * to enable SSE on Palomino/Morgan/Barton CPU's.
+	case 6: /* An Athlon/Duron */
+
+			/*
+			 * Bit 15 of Athlon specific MSR 15, needs to be 0
+			 * to enable SSE on Palomino/Morgan/Barton CPU's.
 			 * If the BIOS didn't enable it already, enable it here.
 			 */
 			if (c->x86_model >= 6 && c->x86_model <= 10) {
@@ -229,15 +231,16 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 					rdmsr(MSR_K7_HWCR, l, h);
 					l &= ~0x00008000;
 					wrmsr(MSR_K7_HWCR, l, h);
-					set_bit(X86_FEATURE_XMM, c->x86_capability);
+					set_cpu_cap(c, X86_FEATURE_XMM);
 				}
 			}
 
-			/* It's been determined by AMD that Athlons since model 8 stepping 1
+			/*
+			 * It's been determined by AMD that Athlons since model 8 stepping 1
 			 * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx
 			 * As per AMD technical note 27212 0.2
 			 */
-			if ((c->x86_model == 8 && c->x86_mask>=1) || (c->x86_model > 8)) {
+			if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) {
 				rdmsr(MSR_K7_CLK_CTL, l, h);
 				if ((l & 0xfff00000) != 0x20000000) {
 					printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l,
@@ -253,20 +256,19 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 	/* Use K8 tuning for Fam10h and Fam11h */
 	case 0x10:
 	case 0x11:
-		set_bit(X86_FEATURE_K8, c->x86_capability);
+		set_cpu_cap(c, X86_FEATURE_K8);
 		break;
 	case 6:
-		set_bit(X86_FEATURE_K7, c->x86_capability); 
+		set_cpu_cap(c, X86_FEATURE_K7);
 		break;
 	}
 	if (c->x86 >= 6)
-		set_bit(X86_FEATURE_FXSAVE_LEAK, c->x86_capability);
+		set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK);
 
 	display_cacheinfo(c);
 
-	if (cpuid_eax(0x80000000) >= 0x80000008) {
+	if (cpuid_eax(0x80000000) >= 0x80000008)
 		c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
-	}
 
 #ifdef CONFIG_X86_HT
 	/*
@@ -302,20 +304,20 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 
 	/* K6s reports MCEs but don't actually have all the MSRs */
 	if (c->x86 < 6)
-		clear_bit(X86_FEATURE_MCE, c->x86_capability);
+		clear_cpu_cap(c, X86_FEATURE_MCE);
 
 	if (cpu_has_xmm2)
-		set_bit(X86_FEATURE_MFENCE_RDTSC, c->x86_capability);
+		set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
 }
 
-static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 * c, unsigned int size)
+static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)
 {
 	/* AMD errata T13 (order #21922) */
 	if ((c->x86 == 6)) {
 		if (c->x86_model == 3 && c->x86_mask == 0)	/* Duron Rev A0 */
 			size = 64;
 		if (c->x86_model == 4 &&
-		    (c->x86_mask==0 || c->x86_mask==1))	/* Tbird rev A1/A2 */
+		    (c->x86_mask == 0 || c->x86_mask == 1))	/* Tbird rev A1/A2 */
 			size = 256;
 	}
 	return size;
@@ -323,19 +325,20 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 * c, unsigned in
 
 static struct cpu_dev amd_cpu_dev __cpuinitdata = {
 	.c_vendor	= "AMD",
-	.c_ident 	= { "AuthenticAMD" },
+	.c_ident	= { "AuthenticAMD" },
 	.c_models = {
 		{ .vendor = X86_VENDOR_AMD, .family = 4, .model_names =
 		  {
 			  [3] = "486 DX/2",
 			  [7] = "486 DX/2-WB",
-			  [8] = "486 DX/4", 
-			  [9] = "486 DX/4-WB", 
+			  [8] = "486 DX/4",
+			  [9] = "486 DX/4-WB",
 			  [14] = "Am5x86-WT",
-			  [15] = "Am5x86-WB" 
+			  [15] = "Am5x86-WB"
 		  }
 		},
 	},
+	.c_early_init   = early_init_amd,
 	.c_init		= init_amd,
 	.c_size_cache	= amd_size_cache,
 };
@@ -345,3 +348,5 @@ int __init amd_init_cpu(void)
 	cpu_devs[X86_VENDOR_AMD] = &amd_cpu_dev;
 	return 0;
 }
+
+cpu_vendor_dev_register(X86_VENDOR_AMD, &amd_cpu_dev);
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index 9681fa15ddf0..e0f45edd6a55 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -1,31 +1,34 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/bitops.h>
+
 #include <asm/processor.h>
 #include <asm/msr.h>
 #include <asm/e820.h>
 #include <asm/mtrr.h>
+
 #include "cpu.h"
 
 #ifdef CONFIG_X86_OOSTORE
 
 static u32 __cpuinit power2(u32 x)
 {
-	u32 s=1;
-	while(s<=x)
-		s<<=1;
-	return s>>=1;
+	u32 s = 1;
+
+	while (s <= x)
+		s <<= 1;
+
+	return s >>= 1;
 }
 
 
 /*
- *	Set up an actual MCR
+ * Set up an actual MCR
  */
- 
 static void __cpuinit centaur_mcr_insert(int reg, u32 base, u32 size, int key)
 {
 	u32 lo, hi;
-	
+
 	hi = base & ~0xFFF;
 	lo = ~(size-1);		/* Size is a power of 2 so this makes a mask */
 	lo &= ~0xFFF;		/* Remove the ctrl value bits */
@@ -35,30 +38,28 @@ static void __cpuinit centaur_mcr_insert(int reg, u32 base, u32 size, int key)
 }
 
 /*
- *	Figure what we can cover with MCR's
+ * Figure what we can cover with MCR's
  *
- *	Shortcut: We know you can't put 4Gig of RAM on a winchip
+ * Shortcut: We know you can't put 4Gig of RAM on a winchip
  */
-
-static u32 __cpuinit ramtop(void)		/* 16388 */
+static u32 __cpuinit ramtop(void)
 {
-	int i;
-	u32 top = 0;
 	u32 clip = 0xFFFFFFFFUL;
-	
+	u32 top = 0;
+	int i;
+
 	for (i = 0; i < e820.nr_map; i++) {
 		unsigned long start, end;
 
 		if (e820.map[i].addr > 0xFFFFFFFFUL)
 			continue;
 		/*
-		 *	Don't MCR over reserved space. Ignore the ISA hole
-		 *	we frob around that catastrophe already
+		 * Don't MCR over reserved space. Ignore the ISA hole
+		 * we frob around that catastrophe already
 		 */
-		 			
-		if (e820.map[i].type == E820_RESERVED)
-		{
-			if(e820.map[i].addr >= 0x100000UL && e820.map[i].addr < clip)
+		if (e820.map[i].type == E820_RESERVED) {
+			if (e820.map[i].addr >= 0x100000UL &&
+			    e820.map[i].addr < clip)
 				clip = e820.map[i].addr;
 			continue;
 		}
@@ -69,28 +70,27 @@ static u32 __cpuinit ramtop(void)		/* 16388 */
 		if (end > top)
 			top = end;
 	}
-	/* Everything below 'top' should be RAM except for the ISA hole.
-	   Because of the limited MCR's we want to map NV/ACPI into our
-	   MCR range for gunk in RAM 
-	   
-	   Clip might cause us to MCR insufficient RAM but that is an
-	   acceptable failure mode and should only bite obscure boxes with
-	   a VESA hole at 15Mb
-	   
-	   The second case Clip sometimes kicks in is when the EBDA is marked
-	   as reserved. Again we fail safe with reasonable results
-	*/
-	
-	if(top>clip)
-		top=clip;
-		
+	/*
+	 * Everything below 'top' should be RAM except for the ISA hole.
+	 * Because of the limited MCR's we want to map NV/ACPI into our
+	 * MCR range for gunk in RAM
+	 *
+	 * Clip might cause us to MCR insufficient RAM but that is an
+	 * acceptable failure mode and should only bite obscure boxes with
+	 * a VESA hole at 15Mb
+	 *
+	 * The second case Clip sometimes kicks in is when the EBDA is marked
+	 * as reserved. Again we fail safe with reasonable results
+	 */
+	if (top > clip)
+		top = clip;
+
 	return top;
 }
 
 /*
- *	Compute a set of MCR's to give maximum coverage
+ * Compute a set of MCR's to give maximum coverage
  */
-
 static int __cpuinit centaur_mcr_compute(int nr, int key)
 {
 	u32 mem = ramtop();
@@ -99,141 +99,131 @@ static int __cpuinit centaur_mcr_compute(int nr, int key)
 	u32 top = root;
 	u32 floor = 0;
 	int ct = 0;
-	
-	while(ct<nr)
-	{
+
+	while (ct < nr) {
 		u32 fspace = 0;
+		u32 high;
+		u32 low;
 
 		/*
-		 *	Find the largest block we will fill going upwards
+		 * Find the largest block we will fill going upwards
 		 */
-
-		u32 high = power2(mem-top);	
+		high = power2(mem-top);
 
 		/*
-		 *	Find the largest block we will fill going downwards
+		 * Find the largest block we will fill going downwards
 		 */
-
-		u32 low = base/2;
+		low = base/2;
 
 		/*
-		 *	Don't fill below 1Mb going downwards as there
-		 *	is an ISA hole in the way.
-		 */		
-		 
-		if(base <= 1024*1024)
+		 * Don't fill below 1Mb going downwards as there
+		 * is an ISA hole in the way.
+		 */
+		if (base <= 1024*1024)
 			low = 0;
-			
+
 		/*
-		 *	See how much space we could cover by filling below
-		 *	the ISA hole
+		 * See how much space we could cover by filling below
+		 * the ISA hole
 		 */
-		 
-		if(floor == 0)
+
+		if (floor == 0)
 			fspace = 512*1024;
-		else if(floor ==512*1024)
+		else if (floor == 512*1024)
 			fspace = 128*1024;
 
 		/* And forget ROM space */
-		
+
 		/*
-		 *	Now install the largest coverage we get
+		 * Now install the largest coverage we get
 		 */
-		 
-		if(fspace > high && fspace > low)
-		{
+		if (fspace > high && fspace > low) {
 			centaur_mcr_insert(ct, floor, fspace, key);
 			floor += fspace;
-		}
-		else if(high > low)
-		{
+		} else if (high > low) {
 			centaur_mcr_insert(ct, top, high, key);
 			top += high;
-		}
-		else if(low > 0)
-		{
+		} else if (low > 0) {
 			base -= low;
 			centaur_mcr_insert(ct, base, low, key);
-		}
-		else break;
+		} else
+			break;
 		ct++;
 	}
 	/*
-	 *	We loaded ct values. We now need to set the mask. The caller
-	 *	must do this bit.
+	 * We loaded ct values. We now need to set the mask. The caller
+	 * must do this bit.
 	 */
-	 
 	return ct;
 }
 
 static void __cpuinit centaur_create_optimal_mcr(void)
 {
+	int used;
 	int i;
+
 	/*
-	 *	Allocate up to 6 mcrs to mark as much of ram as possible
-	 *	as write combining and weak write ordered.
+	 * Allocate up to 6 mcrs to mark as much of ram as possible
+	 * as write combining and weak write ordered.
 	 *
-	 *	To experiment with: Linux never uses stack operations for 
-	 *	mmio spaces so we could globally enable stack operation wc
+	 * To experiment with: Linux never uses stack operations for
+	 * mmio spaces so we could globally enable stack operation wc
 	 *
-	 *	Load the registers with type 31 - full write combining, all
-	 *	writes weakly ordered.
+	 * Load the registers with type 31 - full write combining, all
+	 * writes weakly ordered.
 	 */
-	int used = centaur_mcr_compute(6, 31);
+	used = centaur_mcr_compute(6, 31);
 
 	/*
-	 *	Wipe unused MCRs
+	 * Wipe unused MCRs
 	 */
-	 
-	for(i=used;i<8;i++)
+	for (i = used; i < 8; i++)
 		wrmsr(MSR_IDT_MCR0+i, 0, 0);
 }
 
 static void __cpuinit winchip2_create_optimal_mcr(void)
 {
 	u32 lo, hi;
+	int used;
 	int i;
 
 	/*
-	 *	Allocate up to 6 mcrs to mark as much of ram as possible
-	 *	as write combining, weak store ordered.
+	 * Allocate up to 6 mcrs to mark as much of ram as possible
+	 * as write combining, weak store ordered.
 	 *
-	 *	Load the registers with type 25
-	 *		8	-	weak write ordering
-	 *		16	-	weak read ordering
-	 *		1	-	write combining
+	 * Load the registers with type 25
+	 *	8	-	weak write ordering
+	 *	16	-	weak read ordering
+	 *	1	-	write combining
 	 */
+	used = centaur_mcr_compute(6, 25);
 
-	int used = centaur_mcr_compute(6, 25);
-	
 	/*
-	 *	Mark the registers we are using.
+	 * Mark the registers we are using.
 	 */
-	 
 	rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
-	for(i=0;i<used;i++)
-		lo|=1<<(9+i);
+	for (i = 0; i < used; i++)
+		lo |= 1<<(9+i);
 	wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
-	
+
 	/*
-	 *	Wipe unused MCRs
+	 * Wipe unused MCRs
 	 */
-	 
-	for(i=used;i<8;i++)
+
+	for (i = used; i < 8; i++)
 		wrmsr(MSR_IDT_MCR0+i, 0, 0);
 }
 
 /*
- *	Handle the MCR key on the Winchip 2.
+ * Handle the MCR key on the Winchip 2.
  */
-
 static void __cpuinit winchip2_unprotect_mcr(void)
 {
 	u32 lo, hi;
 	u32 key;
-	
+
 	rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
-	lo&=~0x1C0;	/* blank bits 8-6 */
+	lo &= ~0x1C0;	/* blank bits 8-6 */
 	key = (lo>>17) & 7;
 	lo |= key<<6;	/* replace with unlock key */
 	wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
@@ -242,9 +232,9 @@ static void __cpuinit winchip2_unprotect_mcr(void)
 static void __cpuinit winchip2_protect_mcr(void)
 {
 	u32 lo, hi;
-	
+
 	rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
-	lo&=~0x1C0;	/* blank bits 8-6 */
+	lo &= ~0x1C0;	/* blank bits 8-6 */
 	wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
 }
 #endif /* CONFIG_X86_OOSTORE */
@@ -267,17 +257,17 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c)
 
 		/* enable ACE unit, if present and disabled */
 		if ((tmp & (ACE_PRESENT | ACE_ENABLED)) == ACE_PRESENT) {
-			rdmsr (MSR_VIA_FCR, lo, hi);
+			rdmsr(MSR_VIA_FCR, lo, hi);
 			lo |= ACE_FCR;		/* enable ACE unit */
-			wrmsr (MSR_VIA_FCR, lo, hi);
+			wrmsr(MSR_VIA_FCR, lo, hi);
 			printk(KERN_INFO "CPU: Enabled ACE h/w crypto\n");
 		}
 
 		/* enable RNG unit, if present and disabled */
 		if ((tmp & (RNG_PRESENT | RNG_ENABLED)) == RNG_PRESENT) {
-			rdmsr (MSR_VIA_RNG, lo, hi);
+			rdmsr(MSR_VIA_RNG, lo, hi);
 			lo |= RNG_ENABLE;	/* enable RNG unit */
-			wrmsr (MSR_VIA_RNG, lo, hi);
+			wrmsr(MSR_VIA_RNG, lo, hi);
 			printk(KERN_INFO "CPU: Enabled h/w RNG\n");
 		}
 
@@ -288,171 +278,183 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c)
 	}
 
 	/* Cyrix III family needs CX8 & PGE explicitly enabled. */
-	if (c->x86_model >=6 && c->x86_model <= 9) {
-		rdmsr (MSR_VIA_FCR, lo, hi);
+	if (c->x86_model >= 6 && c->x86_model <= 9) {
+		rdmsr(MSR_VIA_FCR, lo, hi);
 		lo |= (1<<1 | 1<<7);
-		wrmsr (MSR_VIA_FCR, lo, hi);
-		set_bit(X86_FEATURE_CX8, c->x86_capability);
+		wrmsr(MSR_VIA_FCR, lo, hi);
+		set_cpu_cap(c, X86_FEATURE_CX8);
 	}
 
 	/* Before Nehemiah, the C3's had 3dNOW! */
-	if (c->x86_model >=6 && c->x86_model <9)
-		set_bit(X86_FEATURE_3DNOW, c->x86_capability);
+	if (c->x86_model >= 6 && c->x86_model < 9)
+		set_cpu_cap(c, X86_FEATURE_3DNOW);
 
 	get_model_name(c);
 	display_cacheinfo(c);
 }
 
+enum {
+		ECX8		= 1<<1,
+		EIERRINT	= 1<<2,
+		DPM		= 1<<3,
+		DMCE		= 1<<4,
+		DSTPCLK		= 1<<5,
+		ELINEAR		= 1<<6,
+		DSMC		= 1<<7,
+		DTLOCK		= 1<<8,
+		EDCTLB		= 1<<8,
+		EMMX		= 1<<9,
+		DPDC		= 1<<11,
+		EBRPRED		= 1<<12,
+		DIC		= 1<<13,
+		DDC		= 1<<14,
+		DNA		= 1<<15,
+		ERETSTK		= 1<<16,
+		E2MMX		= 1<<19,
+		EAMD3D		= 1<<20,
+};
+
 static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
 {
-	enum {
-		ECX8=1<<1,
-		EIERRINT=1<<2,
-		DPM=1<<3,
-		DMCE=1<<4,
-		DSTPCLK=1<<5,
-		ELINEAR=1<<6,
-		DSMC=1<<7,
-		DTLOCK=1<<8,
-		EDCTLB=1<<8,
-		EMMX=1<<9,
-		DPDC=1<<11,
-		EBRPRED=1<<12,
-		DIC=1<<13,
-		DDC=1<<14,
-		DNA=1<<15,
-		ERETSTK=1<<16,
-		E2MMX=1<<19,
-		EAMD3D=1<<20,
-	};
 
 	char *name;
-	u32  fcr_set=0;
-	u32  fcr_clr=0;
-	u32  lo,hi,newlo;
-	u32  aa,bb,cc,dd;
+	u32  fcr_set = 0;
+	u32  fcr_clr = 0;
+	u32  lo, hi, newlo;
+	u32  aa, bb, cc, dd;
 
-	/* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
-	   3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
-	clear_bit(0*32+31, c->x86_capability);
+	/*
+	 * Bit 31 in normal CPUID used for nonstandard 3DNow ID;
+	 * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway
+	 */
+	clear_cpu_cap(c, 0*32+31);
 
 	switch (c->x86) {
-
-		case 5:
-			switch(c->x86_model) {
-			case 4:
-				name="C6";
-				fcr_set=ECX8|DSMC|EDCTLB|EMMX|ERETSTK;
-				fcr_clr=DPDC;
-				printk(KERN_NOTICE "Disabling bugged TSC.\n");
-				clear_bit(X86_FEATURE_TSC, c->x86_capability);
+	case 5:
+		switch (c->x86_model) {
+		case 4:
+			name = "C6";
+			fcr_set = ECX8|DSMC|EDCTLB|EMMX|ERETSTK;
+			fcr_clr = DPDC;
+			printk(KERN_NOTICE "Disabling bugged TSC.\n");
+			clear_cpu_cap(c, X86_FEATURE_TSC);
 #ifdef CONFIG_X86_OOSTORE
-				centaur_create_optimal_mcr();
-				/* Enable
-					write combining on non-stack, non-string
-					write combining on string, all types
-					weak write ordering 
-					
-				   The C6 original lacks weak read order 
-				   
-				   Note 0x120 is write only on Winchip 1 */
-				   
-				wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0);
-#endif				
+			centaur_create_optimal_mcr();
+			/*
+			 * Enable:
+			 *	write combining on non-stack, non-string
+			 *	write combining on string, all types
+			 *	weak write ordering
+			 *
+			 * The C6 original lacks weak read order
+			 *
+			 * Note 0x120 is write only on Winchip 1
+			 */
+			wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0);
+#endif
+			break;
+		case 8:
+			switch (c->x86_mask) {
+			default:
+			name = "2";
+				break;
+			case 7 ... 9:
+				name = "2A";
 				break;
-			case 8:
-				switch(c->x86_mask) {
-				default:
-					name="2";
-					break;
-				case 7 ... 9:
-					name="2A";
-					break;
-				case 10 ... 15:
-					name="2B";
-					break;
-				}
-				fcr_set=ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|E2MMX|EAMD3D;
-				fcr_clr=DPDC;
+			case 10 ... 15:
+				name = "2B";
+				break;
+			}
+			fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|
+				  E2MMX|EAMD3D;
+			fcr_clr = DPDC;
 #ifdef CONFIG_X86_OOSTORE
-				winchip2_unprotect_mcr();
-				winchip2_create_optimal_mcr();
-				rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
-				/* Enable
-					write combining on non-stack, non-string
-					write combining on string, all types
-					weak write ordering 
-				*/
-				lo|=31;				
-				wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
-				winchip2_protect_mcr();
+			winchip2_unprotect_mcr();
+			winchip2_create_optimal_mcr();
+			rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
+			/*
+			 * Enable:
+			 *	write combining on non-stack, non-string
+			 *	write combining on string, all types
+			 *	weak write ordering
+			 */
+			lo |= 31;
+			wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
+			winchip2_protect_mcr();
 #endif
-				break;
-			case 9:
-				name="3";
-				fcr_set=ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|E2MMX|EAMD3D;
-				fcr_clr=DPDC;
+			break;
+		case 9:
+			name = "3";
+			fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|
+				  E2MMX|EAMD3D;
+			fcr_clr = DPDC;
 #ifdef CONFIG_X86_OOSTORE
-				winchip2_unprotect_mcr();
-				winchip2_create_optimal_mcr();
-				rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
-				/* Enable
-					write combining on non-stack, non-string
-					write combining on string, all types
-					weak write ordering 
-				*/
-				lo|=31;				
-				wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
-				winchip2_protect_mcr();
+			winchip2_unprotect_mcr();
+			winchip2_create_optimal_mcr();
+			rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
+			/*
+			 * Enable:
+			 *	write combining on non-stack, non-string
+			 *	write combining on string, all types
+			 *	weak write ordering
+			 */
+			lo |= 31;
+			wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
+			winchip2_protect_mcr();
 #endif
-				break;
-			default:
-				name="??";
-			}
+			break;
+		default:
+			name = "??";
+		}
 
-			rdmsr(MSR_IDT_FCR1, lo, hi);
-			newlo=(lo|fcr_set) & (~fcr_clr);
+		rdmsr(MSR_IDT_FCR1, lo, hi);
+		newlo = (lo|fcr_set) & (~fcr_clr);
 
-			if (newlo!=lo) {
-				printk(KERN_INFO "Centaur FCR was 0x%X now 0x%X\n", lo, newlo );
-				wrmsr(MSR_IDT_FCR1, newlo, hi );
-			} else {
-				printk(KERN_INFO "Centaur FCR is 0x%X\n",lo);
-			}
-			/* Emulate MTRRs using Centaur's MCR. */
-			set_bit(X86_FEATURE_CENTAUR_MCR, c->x86_capability);
-			/* Report CX8 */
-			set_bit(X86_FEATURE_CX8, c->x86_capability);
-			/* Set 3DNow! on Winchip 2 and above. */
-			if (c->x86_model >=8)
-				set_bit(X86_FEATURE_3DNOW, c->x86_capability);
-			/* See if we can find out some more. */
-			if ( cpuid_eax(0x80000000) >= 0x80000005 ) {
-				/* Yes, we can. */
-				cpuid(0x80000005,&aa,&bb,&cc,&dd);
-				/* Add L1 data and code cache sizes. */
-				c->x86_cache_size = (cc>>24)+(dd>>24);
-			}
-			sprintf( c->x86_model_id, "WinChip %s", name );
-			break;
+		if (newlo != lo) {
+			printk(KERN_INFO "Centaur FCR was 0x%X now 0x%X\n",
+				lo, newlo);
+			wrmsr(MSR_IDT_FCR1, newlo, hi);
+		} else {
+			printk(KERN_INFO "Centaur FCR is 0x%X\n", lo);
+		}
+		/* Emulate MTRRs using Centaur's MCR. */
+		set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR);
+		/* Report CX8 */
+		set_cpu_cap(c, X86_FEATURE_CX8);
+		/* Set 3DNow! on Winchip 2 and above. */
+		if (c->x86_model >= 8)
+			set_cpu_cap(c, X86_FEATURE_3DNOW);
+		/* See if we can find out some more. */
+		if (cpuid_eax(0x80000000) >= 0x80000005) {
+			/* Yes, we can. */
+			cpuid(0x80000005, &aa, &bb, &cc, &dd);
+			/* Add L1 data and code cache sizes. */
+			c->x86_cache_size = (cc>>24)+(dd>>24);
+		}
+		sprintf(c->x86_model_id, "WinChip %s", name);
+		break;
 
-		case 6:
-			init_c3(c);
-			break;
+	case 6:
+		init_c3(c);
+		break;
 	}
 }
 
-static unsigned int __cpuinit centaur_size_cache(struct cpuinfo_x86 * c, unsigned int size)
+static unsigned int __cpuinit
+centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size)
 {
 	/* VIA C3 CPUs (670-68F) need further shifting. */
 	if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8)))
 		size >>= 8;
 
-	/* VIA also screwed up Nehemiah stepping 1, and made
-	   it return '65KB' instead of '64KB'
-	   - Note, it seems this may only be in engineering samples. */
-	if ((c->x86==6) && (c->x86_model==9) && (c->x86_mask==1) && (size==65))
-		size -=1;
+	/*
+	 * There's also an erratum in Nehemiah stepping 1, which
+	 * returns '65KB' instead of '64KB'
+	 *  - Note, it seems this may only be in engineering samples.
+	 */
+	if ((c->x86 == 6) && (c->x86_model == 9) &&
+				(c->x86_mask == 1) && (size == 65))
+		size -= 1;
 
 	return size;
 }
@@ -464,8 +466,4 @@ static struct cpu_dev centaur_cpu_dev __cpuinitdata = {
 	.c_size_cache	= centaur_size_cache,
 };
 
-int __init centaur_init_cpu(void)
-{
-	cpu_devs[X86_VENDOR_CENTAUR] = &centaur_cpu_dev;
-	return 0;
-}
+cpu_vendor_dev_register(X86_VENDOR_CENTAUR, &centaur_cpu_dev);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index a38aafaefc23..d999d7833bc2 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -62,9 +62,9 @@ __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
 static int cachesize_override __cpuinitdata = -1;
 static int disable_x86_serial_nr __cpuinitdata = 1;
 
-struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
+struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
 
-static void __cpuinit default_init(struct cpuinfo_x86 * c)
+static void __cpuinit default_init(struct cpuinfo_x86 *c)
 {
 	/* Not much we can do here... */
 	/* Check if at least it has cpuid */
@@ -81,11 +81,11 @@ static struct cpu_dev __cpuinitdata default_cpu = {
 	.c_init	= default_init,
 	.c_vendor = "Unknown",
 };
-static struct cpu_dev * this_cpu __cpuinitdata = &default_cpu;
+static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
 
 static int __init cachesize_setup(char *str)
 {
-	get_option (&str, &cachesize_override);
+	get_option(&str, &cachesize_override);
 	return 1;
 }
 __setup("cachesize=", cachesize_setup);
@@ -107,12 +107,12 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c)
 	/* Intel chips right-justify this string for some dumb reason;
 	   undo that brain damage */
 	p = q = &c->x86_model_id[0];
-	while ( *p == ' ' )
+	while (*p == ' ')
 	     p++;
-	if ( p != q ) {
-	     while ( *p )
+	if (p != q) {
+	     while (*p)
 		  *q++ = *p++;
-	     while ( q <= &c->x86_model_id[48] )
+	     while (q <= &c->x86_model_id[48])
 		  *q++ = '\0';	/* Zero-pad the rest */
 	}
 
@@ -130,7 +130,7 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
 		cpuid(0x80000005, &dummy, &dummy, &ecx, &edx);
 		printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
 			edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
-		c->x86_cache_size=(ecx>>24)+(edx>>24);	
+		c->x86_cache_size = (ecx>>24)+(edx>>24);
 	}
 
 	if (n < 0x80000006)	/* Some chips just has a large L1. */
@@ -138,16 +138,16 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
 
 	ecx = cpuid_ecx(0x80000006);
 	l2size = ecx >> 16;
-	
+
 	/* do processor-specific cache resizing */
 	if (this_cpu->c_size_cache)
-		l2size = this_cpu->c_size_cache(c,l2size);
+		l2size = this_cpu->c_size_cache(c, l2size);
 
 	/* Allow user to override all this if necessary. */
 	if (cachesize_override != -1)
 		l2size = cachesize_override;
 
-	if ( l2size == 0 )
+	if (l2size == 0)
 		return;		/* Again, no L2 cache is possible */
 
 	c->x86_cache_size = l2size;
@@ -156,16 +156,19 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
 	       l2size, ecx & 0xFF);
 }
 
-/* Naming convention should be: <Name> [(<Codename>)] */
-/* This table only is used unless init_<vendor>() below doesn't set it; */
-/* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */
+/*
+ * Naming convention should be: <Name> [(<Codename>)]
+ * This table only is used unless init_<vendor>() below doesn't set it;
+ * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used
+ *
+ */
 
 /* Look up CPU names by table lookup. */
 static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
 {
 	struct cpu_model_info *info;
 
-	if ( c->x86_model >= 16 )
+	if (c->x86_model >= 16)
 		return NULL;	/* Range check */
 
 	if (!this_cpu)
@@ -190,9 +193,9 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early)
 
 	for (i = 0; i < X86_VENDOR_NUM; i++) {
 		if (cpu_devs[i]) {
-			if (!strcmp(v,cpu_devs[i]->c_ident[0]) ||
-			    (cpu_devs[i]->c_ident[1] && 
-			     !strcmp(v,cpu_devs[i]->c_ident[1]))) {
+			if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
+			    (cpu_devs[i]->c_ident[1] &&
+			     !strcmp(v, cpu_devs[i]->c_ident[1]))) {
 				c->x86_vendor = i;
 				if (!early)
 					this_cpu = cpu_devs[i];
@@ -210,7 +213,7 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early)
 }
 
 
-static int __init x86_fxsr_setup(char * s)
+static int __init x86_fxsr_setup(char *s)
 {
 	setup_clear_cpu_cap(X86_FEATURE_FXSR);
 	setup_clear_cpu_cap(X86_FEATURE_XMM);
@@ -219,7 +222,7 @@ static int __init x86_fxsr_setup(char * s)
 __setup("nofxsr", x86_fxsr_setup);
 
 
-static int __init x86_sep_setup(char * s)
+static int __init x86_sep_setup(char *s)
 {
 	setup_clear_cpu_cap(X86_FEATURE_SEP);
 	return 1;
@@ -306,14 +309,30 @@ static void __cpuinit early_get_cap(struct cpuinfo_x86 *c)
 
 	}
 
-}
+	clear_cpu_cap(c, X86_FEATURE_PAT);
+
+	switch (c->x86_vendor) {
+	case X86_VENDOR_AMD:
+		if (c->x86 >= 0xf && c->x86 <= 0x11)
+			set_cpu_cap(c, X86_FEATURE_PAT);
+		break;
+	case X86_VENDOR_INTEL:
+		if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15))
+			set_cpu_cap(c, X86_FEATURE_PAT);
+		break;
+	}
 
-/* Do minimum CPU detection early.
-   Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment.
-   The others are not touched to avoid unwanted side effects.
+}
 
-   WARNING: this function is only called on the BP.  Don't add code here
-   that is supposed to run on all CPUs. */
+/*
+ * Do minimum CPU detection early.
+ * Fields really needed: vendor, cpuid_level, family, model, mask,
+ * cache alignment.
+ * The others are not touched to avoid unwanted side effects.
+ *
+ * WARNING: this function is only called on the BP.  Don't add code here
+ * that is supposed to run on all CPUs.
+ */
 static void __init early_cpu_detect(void)
 {
 	struct cpuinfo_x86 *c = &boot_cpu_data;
@@ -328,19 +347,14 @@ static void __init early_cpu_detect(void)
 
 	get_cpu_vendor(c, 1);
 
-	switch (c->x86_vendor) {
-	case X86_VENDOR_AMD:
-		early_init_amd(c);
-		break;
-	case X86_VENDOR_INTEL:
-		early_init_intel(c);
-		break;
-	}
+	if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
+	    cpu_devs[c->x86_vendor]->c_early_init)
+		cpu_devs[c->x86_vendor]->c_early_init(c);
 
 	early_get_cap(c);
 }
 
-static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
+static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
 {
 	u32 tfms, xlvl;
 	unsigned int ebx;
@@ -351,13 +365,12 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
 		      (unsigned int *)&c->x86_vendor_id[0],
 		      (unsigned int *)&c->x86_vendor_id[8],
 		      (unsigned int *)&c->x86_vendor_id[4]);
-		
+
 		get_cpu_vendor(c, 0);
 		/* Initialize the standard set of capabilities */
 		/* Note that the vendor-specific code below might override */
-	
 		/* Intel-defined flags: level 0x00000001 */
-		if ( c->cpuid_level >= 0x00000001 ) {
+		if (c->cpuid_level >= 0x00000001) {
 			u32 capability, excap;
 			cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
 			c->x86_capability[0] = capability;
@@ -369,12 +382,14 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
 			if (c->x86 >= 0x6)
 				c->x86_model += ((tfms >> 16) & 0xF) << 4;
 			c->x86_mask = tfms & 15;
+			c->initial_apicid = (ebx >> 24) & 0xFF;
 #ifdef CONFIG_X86_HT
-			c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0);
+			c->apicid = phys_pkg_id(c->initial_apicid, 0);
+			c->phys_proc_id = c->initial_apicid;
 #else
-			c->apicid = (ebx >> 24) & 0xFF;
+			c->apicid = c->initial_apicid;
 #endif
-			if (c->x86_capability[0] & (1<<19))
+			if (test_cpu_cap(c, X86_FEATURE_CLFLSH))
 				c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8;
 		} else {
 			/* Have CPUID level 0 only - unheard of */
@@ -383,33 +398,42 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
 
 		/* AMD-defined flags: level 0x80000001 */
 		xlvl = cpuid_eax(0x80000000);
-		if ( (xlvl & 0xffff0000) == 0x80000000 ) {
-			if ( xlvl >= 0x80000001 ) {
+		if ((xlvl & 0xffff0000) == 0x80000000) {
+			if (xlvl >= 0x80000001) {
 				c->x86_capability[1] = cpuid_edx(0x80000001);
 				c->x86_capability[6] = cpuid_ecx(0x80000001);
 			}
-			if ( xlvl >= 0x80000004 )
+			if (xlvl >= 0x80000004)
 				get_model_name(c); /* Default name */
 		}
 
 		init_scattered_cpuid_features(c);
 	}
 
-#ifdef CONFIG_X86_HT
-	c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff;
-#endif
+	clear_cpu_cap(c, X86_FEATURE_PAT);
+
+	switch (c->x86_vendor) {
+	case X86_VENDOR_AMD:
+		if (c->x86 >= 0xf && c->x86 <= 0x11)
+			set_cpu_cap(c, X86_FEATURE_PAT);
+		break;
+	case X86_VENDOR_INTEL:
+		if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15))
+			set_cpu_cap(c, X86_FEATURE_PAT);
+		break;
+	}
 }
 
 static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
 {
-	if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) {
+	if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) {
 		/* Disable processor serial number */
-		unsigned long lo,hi;
-		rdmsr(MSR_IA32_BBL_CR_CTL,lo,hi);
+		unsigned long lo, hi;
+		rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
 		lo |= 0x200000;
-		wrmsr(MSR_IA32_BBL_CR_CTL,lo,hi);
+		wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
 		printk(KERN_NOTICE "CPU serial number disabled.\n");
-		clear_bit(X86_FEATURE_PN, c->x86_capability);
+		clear_cpu_cap(c, X86_FEATURE_PN);
 
 		/* Disabling the serial number may affect the cpuid level */
 		c->cpuid_level = cpuid_eax(0);
@@ -444,9 +468,11 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 	memset(&c->x86_capability, 0, sizeof c->x86_capability);
 
 	if (!have_cpuid_p()) {
-		/* First of all, decide if this is a 486 or higher */
-		/* It's a 486 if we can modify the AC flag */
-		if ( flag_is_changeable_p(X86_EFLAGS_AC) )
+		/*
+		 * First of all, decide if this is a 486 or higher
+		 * It's a 486 if we can modify the AC flag
+		 */
+		if (flag_is_changeable_p(X86_EFLAGS_AC))
 			c->x86 = 4;
 		else
 			c->x86 = 3;
@@ -479,10 +505,10 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 	 */
 
 	/* If the model name is still unset, do table lookup. */
-	if ( !c->x86_model_id[0] ) {
+	if (!c->x86_model_id[0]) {
 		char *p;
 		p = table_lookup_model(c);
-		if ( p )
+		if (p)
 			strcpy(c->x86_model_id, p);
 		else
 			/* Last resort... */
@@ -496,9 +522,9 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 	 * common between the CPUs.  The first time this routine gets
 	 * executed, c == &boot_cpu_data.
 	 */
-	if ( c != &boot_cpu_data ) {
+	if (c != &boot_cpu_data) {
 		/* AND the already accumulated flags with these */
-		for ( i = 0 ; i < NCAPINTS ; i++ )
+		for (i = 0 ; i < NCAPINTS ; i++)
 			boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
 	}
 
@@ -542,7 +568,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
 
 	if (smp_num_siblings == 1) {
 		printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
-	} else if (smp_num_siblings > 1 ) {
+	} else if (smp_num_siblings > 1) {
 
 		if (smp_num_siblings > NR_CPUS) {
 			printk(KERN_WARNING "CPU: Unsupported number of the "
@@ -552,7 +578,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
 		}
 
 		index_msb = get_count_order(smp_num_siblings);
-		c->phys_proc_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
+		c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb);
 
 		printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
 		       c->phys_proc_id);
@@ -563,7 +589,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
 
 		core_bits = get_count_order(c->x86_max_cores);
 
-		c->cpu_core_id = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) &
+		c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) &
 					       ((1 << core_bits) - 1);
 
 		if (c->x86_max_cores > 1)
@@ -597,7 +623,7 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
 	else
 		printk("%s", c->x86_model_id);
 
-	if (c->x86_mask || c->cpuid_level >= 0) 
+	if (c->x86_mask || c->cpuid_level >= 0)
 		printk(" stepping %02x\n", c->x86_mask);
 	else
 		printk("\n");
@@ -616,23 +642,15 @@ __setup("clearcpuid=", setup_disablecpuid);
 
 cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
 
-/* This is hacky. :)
- * We're emulating future behavior.
- * In the future, the cpu-specific init functions will be called implicitly
- * via the magic of initcalls.
- * They will insert themselves into the cpu_devs structure.
- * Then, when cpu_init() is called, we can just iterate over that array.
- */
 void __init early_cpu_init(void)
 {
-	intel_cpu_init();
-	cyrix_init_cpu();
-	nsc_init_cpu();
-	amd_init_cpu();
-	centaur_init_cpu();
-	transmeta_init_cpu();
-	nexgen_init_cpu();
-	umc_init_cpu();
+	struct cpu_vendor_dev *cvdev;
+
+	for (cvdev = __x86cpuvendor_start ;
+	     cvdev < __x86cpuvendor_end   ;
+	     cvdev++)
+		cpu_devs[cvdev->vendor] = cvdev->cpu_dev;
+
 	early_cpu_detect();
 }
 
@@ -666,7 +684,7 @@ void __cpuinit cpu_init(void)
 {
 	int cpu = smp_processor_id();
 	struct task_struct *curr = current;
-	struct tss_struct * t = &per_cpu(init_tss, cpu);
+	struct tss_struct *t = &per_cpu(init_tss, cpu);
 	struct thread_struct *thread = &curr->thread;
 
 	if (cpu_test_and_set(cpu, cpu_initialized)) {
@@ -692,7 +710,7 @@ void __cpuinit cpu_init(void)
 	enter_lazy_tlb(&init_mm, curr);
 
 	load_sp0(t, thread);
-	set_tss_desc(cpu,t);
+	set_tss_desc(cpu, t);
 	load_TR_desc();
 	load_LDT(&init_mm.context);
 
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index e0b38c33d842..783691b2a738 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -14,6 +14,7 @@ struct cpu_dev {
 
 	struct		cpu_model_info c_models[4];
 
+	void            (*c_early_init)(struct cpuinfo_x86 *c);
 	void		(*c_init)(struct cpuinfo_x86 * c);
 	void		(*c_identify)(struct cpuinfo_x86 * c);
 	unsigned int	(*c_size_cache)(struct cpuinfo_x86 * c, unsigned int size);
@@ -21,18 +22,17 @@ struct cpu_dev {
 
 extern struct cpu_dev * cpu_devs [X86_VENDOR_NUM];
 
+struct cpu_vendor_dev {
+	int vendor;
+	struct cpu_dev *cpu_dev;
+};
+
+#define cpu_vendor_dev_register(cpu_vendor_id, cpu_dev) \
+	static struct cpu_vendor_dev __cpu_vendor_dev_##cpu_vendor_id __used \
+	__attribute__((__section__(".x86cpuvendor.init"))) = \
+	{ cpu_vendor_id, cpu_dev }
+
+extern struct cpu_vendor_dev __x86cpuvendor_start[], __x86cpuvendor_end[];
+
 extern int get_model_name(struct cpuinfo_x86 *c);
 extern void display_cacheinfo(struct cpuinfo_x86 *c);
-
-extern void early_init_intel(struct cpuinfo_x86 *c);
-extern void early_init_amd(struct cpuinfo_x86 *c);
-
-/* Specific CPU type init functions */
-int intel_cpu_init(void);
-int amd_init_cpu(void);
-int cyrix_init_cpu(void);
-int nsc_init_cpu(void);
-int centaur_init_cpu(void);
-int transmeta_init_cpu(void);
-int nexgen_init_cpu(void);
-int umc_init_cpu(void);
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 7139b0262703..3fd7a67bb06a 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -19,7 +19,7 @@ static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
 {
 	unsigned char ccr2, ccr3;
 	unsigned long flags;
-	
+
 	/* we test for DEVID by checking whether CCR3 is writable */
 	local_irq_save(flags);
 	ccr3 = getCx86(CX86_CCR3);
@@ -37,8 +37,7 @@ static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
 			setCx86(CX86_CCR2, ccr2);
 			*dir0 = 0xfe;
 		}
-	}
-	else {
+	} else {
 		setCx86(CX86_CCR3, ccr3);  /* restore CCR3 */
 
 		/* read DIR0 and DIR1 CPU registers */
@@ -86,7 +85,7 @@ static char cyrix_model_mult2[] __cpuinitdata = "12233445";
 static void __cpuinit check_cx686_slop(struct cpuinfo_x86 *c)
 {
 	unsigned long flags;
-	
+
 	if (Cx86_dir0_msb == 3) {
 		unsigned char ccr3, ccr5;
 
@@ -132,7 +131,7 @@ static void __cpuinit set_cx86_memwb(void)
 	/* set 'Not Write-through' */
 	write_cr0(read_cr0() | X86_CR0_NW);
 	/* CCR2 bit 2: lock NW bit and set WT1 */
-	setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 );
+	setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14);
 }
 
 static void __cpuinit set_cx86_inc(void)
@@ -148,7 +147,7 @@ static void __cpuinit set_cx86_inc(void)
 	setCx86(CX86_PCR1, getCx86(CX86_PCR1) | 0x02);
 	/* PCR0 -- Performance Control */
 	/* Incrementor Margin 10 */
-	setCx86(CX86_PCR0, getCx86(CX86_PCR0) | 0x04); 
+	setCx86(CX86_PCR0, getCx86(CX86_PCR0) | 0x04);
 	setCx86(CX86_CCR3, ccr3);	/* disable MAPEN */
 }
 
@@ -167,16 +166,16 @@ static void __cpuinit geode_configure(void)
 
 	ccr3 = getCx86(CX86_CCR3);
 	setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10);	/* enable MAPEN */
-	
+
 
 	/* FPU fast, DTE cache, Mem bypass */
 	setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x38);
 	setCx86(CX86_CCR3, ccr3);			/* disable MAPEN */
-	
+
 	set_cx86_memwb();
-	set_cx86_reorder();	
+	set_cx86_reorder();
 	set_cx86_inc();
-	
+
 	local_irq_restore(flags);
 }
 
@@ -187,14 +186,16 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
 	char *buf = c->x86_model_id;
 	const char *p = NULL;
 
-	/* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
-	   3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
-	clear_bit(0*32+31, c->x86_capability);
+	/*
+	 * Bit 31 in normal CPUID used for nonstandard 3DNow ID;
+	 * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway
+	 */
+	clear_cpu_cap(c, 0*32+31);
 
 	/* Cyrix used bit 24 in extended (AMD) CPUID for Cyrix MMX extensions */
-	if ( test_bit(1*32+24, c->x86_capability) ) {
-		clear_bit(1*32+24, c->x86_capability);
-		set_bit(X86_FEATURE_CXMMX, c->x86_capability);
+	if (test_cpu_cap(c, 1*32+24)) {
+		clear_cpu_cap(c, 1*32+24);
+		set_cpu_cap(c, X86_FEATURE_CXMMX);
 	}
 
 	do_cyrix_devid(&dir0, &dir1);
@@ -213,7 +214,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
 	 * the model, multiplier and stepping.  Black magic included,
 	 * to make the silicon step/rev numbers match the printed ones.
 	 */
-	 
+
 	switch (dir0_msn) {
 		unsigned char tmp;
 
@@ -241,7 +242,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
 		} else             /* 686 */
 			p = Cx86_cb+1;
 		/* Emulate MTRRs using Cyrix's ARRs. */
-		set_bit(X86_FEATURE_CYRIX_ARR, c->x86_capability);
+		set_cpu_cap(c, X86_FEATURE_CYRIX_ARR);
 		/* 6x86's contain this bug */
 		c->coma_bug = 1;
 		break;
@@ -250,17 +251,18 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
 #ifdef CONFIG_PCI
 	{
 		u32 vendor, device;
-		/* It isn't really a PCI quirk directly, but the cure is the
-		   same. The MediaGX has deep magic SMM stuff that handles the
-		   SB emulation. It throws away the fifo on disable_dma() which
-		   is wrong and ruins the audio. 
-
-		   Bug2: VSA1 has a wrap bug so that using maximum sized DMA 
-		   causes bad things. According to NatSemi VSA2 has another
-		   bug to do with 'hlt'. I've not seen any boards using VSA2
-		   and X doesn't seem to support it either so who cares 8).
-		   VSA1 we work around however.
-		*/
+		/*
+		 * It isn't really a PCI quirk directly, but the cure is the
+		 * same. The MediaGX has deep magic SMM stuff that handles the
+		 * SB emulation. It throws away the fifo on disable_dma() which
+		 * is wrong and ruins the audio.
+		 *
+		 *  Bug2: VSA1 has a wrap bug so that using maximum sized DMA
+		 *  causes bad things. According to NatSemi VSA2 has another
+		 *  bug to do with 'hlt'. I've not seen any boards using VSA2
+		 *  and X doesn't seem to support it either so who cares 8).
+		 *  VSA1 we work around however.
+		 */
 
 		printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n");
 		isa_dma_bridge_buggy = 2;
@@ -273,55 +275,51 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
 
 		/*
 		 *  The 5510/5520 companion chips have a funky PIT.
-		 */  
+		 */
 		if (vendor == PCI_VENDOR_ID_CYRIX &&
 	 (device == PCI_DEVICE_ID_CYRIX_5510 || device == PCI_DEVICE_ID_CYRIX_5520))
 			mark_tsc_unstable("cyrix 5510/5520 detected");
 	}
 #endif
-		c->x86_cache_size=16;	/* Yep 16K integrated cache thats it */
+		c->x86_cache_size = 16;	/* Yep 16K integrated cache thats it */
 
 		/* GXm supports extended cpuid levels 'ala' AMD */
 		if (c->cpuid_level == 2) {
 			/* Enable cxMMX extensions (GX1 Datasheet 54) */
 			setCx86(CX86_CCR7, getCx86(CX86_CCR7) | 1);
-			
+
 			/*
 			 * GXm : 0x30 ... 0x5f GXm  datasheet 51
 			 * GXlv: 0x6x          GXlv datasheet 54
 			 *  ?  : 0x7x
 			 * GX1 : 0x8x          GX1  datasheet 56
 			 */
-			if((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <=dir1 && dir1 <= 0x8f))
+			if ((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <= dir1 && dir1 <= 0x8f))
 				geode_configure();
 			get_model_name(c);  /* get CPU marketing name */
 			return;
-		}
-		else {  /* MediaGX */
+		} else { /* MediaGX */
 			Cx86_cb[2] = (dir0_lsn & 1) ? '3' : '4';
 			p = Cx86_cb+2;
 			c->x86_model = (dir1 & 0x20) ? 1 : 2;
 		}
 		break;
 
-        case 5: /* 6x86MX/M II */
-		if (dir1 > 7)
-		{
+	case 5: /* 6x86MX/M II */
+		if (dir1 > 7) {
 			dir0_msn++;  /* M II */
 			/* Enable MMX extensions (App note 108) */
 			setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1);
-		}
-		else
-		{
+		} else {
 			c->coma_bug = 1;      /* 6x86MX, it has the bug. */
 		}
 		tmp = (!(dir0_lsn & 7) || dir0_lsn & 1) ? 2 : 0;
 		Cx86_cb[tmp] = cyrix_model_mult2[dir0_lsn & 7];
 		p = Cx86_cb+tmp;
-        	if (((dir1 & 0x0f) > 4) || ((dir1 & 0xf0) == 0x20))
+		if (((dir1 & 0x0f) > 4) || ((dir1 & 0xf0) == 0x20))
 			(c->x86_model)++;
 		/* Emulate MTRRs using Cyrix's ARRs. */
-		set_bit(X86_FEATURE_CYRIX_ARR, c->x86_capability);
+		set_cpu_cap(c, X86_FEATURE_CYRIX_ARR);
 		break;
 
 	case 0xf:  /* Cyrix 486 without DEVID registers */
@@ -343,7 +341,8 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
 		break;
 	}
 	strcpy(buf, Cx86_model[dir0_msn & 7]);
-	if (p) strcat(buf, p);
+	if (p)
+		strcat(buf, p);
 	return;
 }
 
@@ -352,7 +351,8 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
  */
 static void __cpuinit init_nsc(struct cpuinfo_x86 *c)
 {
-	/* There may be GX1 processors in the wild that are branded
+	/*
+	 * There may be GX1 processors in the wild that are branded
 	 * NSC and not Cyrix.
 	 *
 	 * This function only handles the GX processor, and kicks every
@@ -377,7 +377,7 @@ static void __cpuinit init_nsc(struct cpuinfo_x86 *c)
  * by the fact that they preserve the flags across the division of 5/2.
  * PII and PPro exhibit this behavior too, but they have cpuid available.
  */
- 
+
 /*
  * Perform the Cyrix 5/2 test. A Cyrix won't change
  * the flags, while other 486 chips will.
@@ -398,27 +398,26 @@ static inline int test_cyrix_52div(void)
 	return (unsigned char) (test >> 8) == 0x02;
 }
 
-static void __cpuinit cyrix_identify(struct cpuinfo_x86 * c)
+static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c)
 {
 	/* Detect Cyrix with disabled CPUID */
-	if ( c->x86 == 4 && test_cyrix_52div() ) {
+	if (c->x86 == 4 && test_cyrix_52div()) {
 		unsigned char dir0, dir1;
-		
+
 		strcpy(c->x86_vendor_id, "CyrixInstead");
-	        c->x86_vendor = X86_VENDOR_CYRIX;
-	        
-	        /* Actually enable cpuid on the older cyrix */
-	    
-	    	/* Retrieve CPU revisions */
-	    	
+		c->x86_vendor = X86_VENDOR_CYRIX;
+
+		/* Actually enable cpuid on the older cyrix */
+
+		/* Retrieve CPU revisions */
+
 		do_cyrix_devid(&dir0, &dir1);
 
-		dir0>>=4;		
-		
+		dir0 >>= 4;
+
 		/* Check it is an affected model */
-		
-   	        if (dir0 == 5 || dir0 == 3)
-   	        {
+
+		if (dir0 == 5 || dir0 == 3) {
 			unsigned char ccr3;
 			unsigned long flags;
 			printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n");
@@ -434,26 +433,17 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 * c)
 
 static struct cpu_dev cyrix_cpu_dev __cpuinitdata = {
 	.c_vendor	= "Cyrix",
-	.c_ident 	= { "CyrixInstead" },
+	.c_ident	= { "CyrixInstead" },
 	.c_init		= init_cyrix,
 	.c_identify	= cyrix_identify,
 };
 
-int __init cyrix_init_cpu(void)
-{
-	cpu_devs[X86_VENDOR_CYRIX] = &cyrix_cpu_dev;
-	return 0;
-}
+cpu_vendor_dev_register(X86_VENDOR_CYRIX, &cyrix_cpu_dev);
 
 static struct cpu_dev nsc_cpu_dev __cpuinitdata = {
 	.c_vendor	= "NSC",
-	.c_ident 	= { "Geode by NSC" },
+	.c_ident	= { "Geode by NSC" },
 	.c_init		= init_nsc,
 };
 
-int __init nsc_init_cpu(void)
-{
-	cpu_devs[X86_VENDOR_NSC] = &nsc_cpu_dev;
-	return 0;
-}
-
+cpu_vendor_dev_register(X86_VENDOR_NSC, &nsc_cpu_dev);
diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c
index ee975ac6bbcb..e43ad4ad4cba 100644
--- a/arch/x86/kernel/cpu/feature_names.c
+++ b/arch/x86/kernel/cpu/feature_names.c
@@ -4,7 +4,7 @@
  * This file must not contain any executable code.
  */
 
-#include "asm/cpufeature.h"
+#include <asm/cpufeature.h>
 
 /*
  * These flag bits must match the definitions in <asm/cpufeature.h>.
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index fae31ce747bd..fe9224c51d37 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -30,7 +30,7 @@
 struct movsl_mask movsl_mask __read_mostly;
 #endif
 
-void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
+static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
 {
 	/* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */
 	if (c->x86 == 15 && c->x86_cache_alignment == 64)
@@ -45,7 +45,7 @@ void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
  *
  *	This is called before we do cpu ident work
  */
- 
+
 int __cpuinit ppro_with_ram_bug(void)
 {
 	/* Uses data from early_cpu_detect now */
@@ -58,7 +58,7 @@ int __cpuinit ppro_with_ram_bug(void)
 	}
 	return 0;
 }
-	
+
 
 /*
  * P4 Xeon errata 037 workaround.
@@ -69,7 +69,7 @@ static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c)
 	unsigned long lo, hi;
 
 	if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) {
-		rdmsr (MSR_IA32_MISC_ENABLE, lo, hi);
+		rdmsr(MSR_IA32_MISC_ENABLE, lo, hi);
 		if ((lo & (1<<9)) == 0) {
 			printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n");
 			printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n");
@@ -127,10 +127,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 	 */
 	c->f00f_bug = 0;
 	if (!paravirt_enabled() && c->x86 == 5) {
-		static int f00f_workaround_enabled = 0;
+		static int f00f_workaround_enabled;
 
 		c->f00f_bug = 1;
-		if ( !f00f_workaround_enabled ) {
+		if (!f00f_workaround_enabled) {
 			trap_init_f00f_bug();
 			printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n");
 			f00f_workaround_enabled = 1;
@@ -139,20 +139,22 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 #endif
 
 	l2 = init_intel_cacheinfo(c);
-	if (c->cpuid_level > 9 ) {
+	if (c->cpuid_level > 9) {
 		unsigned eax = cpuid_eax(10);
 		/* Check for version and the number of counters */
 		if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
-			set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability);
+			set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
 	}
 
 	/* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */
 	if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633)
-		clear_bit(X86_FEATURE_SEP, c->x86_capability);
+		clear_cpu_cap(c, X86_FEATURE_SEP);
 
-	/* Names for the Pentium II/Celeron processors 
-	   detectable only by also checking the cache size.
-	   Dixon is NOT a Celeron. */
+	/*
+	 * Names for the Pentium II/Celeron processors
+	 * detectable only by also checking the cache size.
+	 * Dixon is NOT a Celeron.
+	 */
 	if (c->x86 == 6) {
 		switch (c->x86_model) {
 		case 5:
@@ -163,14 +165,14 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 					p = "Mobile Pentium II (Dixon)";
 			}
 			break;
-			
+
 		case 6:
 			if (l2 == 128)
 				p = "Celeron (Mendocino)";
 			else if (c->x86_mask == 0 || c->x86_mask == 5)
 				p = "Celeron-A";
 			break;
-			
+
 		case 8:
 			if (l2 == 128)
 				p = "Celeron (Coppermine)";
@@ -178,9 +180,9 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 		}
 	}
 
-	if ( p )
+	if (p)
 		strcpy(c->x86_model_id, p);
-	
+
 	c->x86_max_cores = num_cpu_cores(c);
 
 	detect_ht(c);
@@ -207,28 +209,29 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 #endif
 
 	if (cpu_has_xmm2)
-		set_bit(X86_FEATURE_LFENCE_RDTSC, c->x86_capability);
+		set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
 	if (c->x86 == 15) {
-		set_bit(X86_FEATURE_P4, c->x86_capability);
+		set_cpu_cap(c, X86_FEATURE_P4);
 	}
-	if (c->x86 == 6) 
-		set_bit(X86_FEATURE_P3, c->x86_capability);
+	if (c->x86 == 6)
+		set_cpu_cap(c, X86_FEATURE_P3);
 	if (cpu_has_ds) {
 		unsigned int l1;
 		rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
 		if (!(l1 & (1<<11)))
-			set_bit(X86_FEATURE_BTS, c->x86_capability);
+			set_cpu_cap(c, X86_FEATURE_BTS);
 		if (!(l1 & (1<<12)))
-			set_bit(X86_FEATURE_PEBS, c->x86_capability);
+			set_cpu_cap(c, X86_FEATURE_PEBS);
 	}
 
 	if (cpu_has_bts)
 		ds_init_intel(c);
 }
 
-static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 * c, unsigned int size)
+static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size)
 {
-	/* Intel PIII Tualatin. This comes in two flavours.
+	/*
+	 * Intel PIII Tualatin. This comes in two flavours.
 	 * One has 256kb of cache, the other 512. We have no way
 	 * to determine which, so we use a boottime override
 	 * for the 512kb model, and assume 256 otherwise.
@@ -240,42 +243,42 @@ static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 * c, unsigned
 
 static struct cpu_dev intel_cpu_dev __cpuinitdata = {
 	.c_vendor	= "Intel",
-	.c_ident 	= { "GenuineIntel" },
+	.c_ident	= { "GenuineIntel" },
 	.c_models = {
-		{ .vendor = X86_VENDOR_INTEL, .family = 4, .model_names = 
-		  { 
-			  [0] = "486 DX-25/33", 
-			  [1] = "486 DX-50", 
-			  [2] = "486 SX", 
-			  [3] = "486 DX/2", 
-			  [4] = "486 SL", 
-			  [5] = "486 SX/2", 
-			  [7] = "486 DX/2-WB", 
-			  [8] = "486 DX/4", 
+		{ .vendor = X86_VENDOR_INTEL, .family = 4, .model_names =
+		  {
+			  [0] = "486 DX-25/33",
+			  [1] = "486 DX-50",
+			  [2] = "486 SX",
+			  [3] = "486 DX/2",
+			  [4] = "486 SL",
+			  [5] = "486 SX/2",
+			  [7] = "486 DX/2-WB",
+			  [8] = "486 DX/4",
 			  [9] = "486 DX/4-WB"
 		  }
 		},
 		{ .vendor = X86_VENDOR_INTEL, .family = 5, .model_names =
-		  { 
-			  [0] = "Pentium 60/66 A-step", 
-			  [1] = "Pentium 60/66", 
+		  {
+			  [0] = "Pentium 60/66 A-step",
+			  [1] = "Pentium 60/66",
 			  [2] = "Pentium 75 - 200",
-			  [3] = "OverDrive PODP5V83", 
+			  [3] = "OverDrive PODP5V83",
 			  [4] = "Pentium MMX",
-			  [7] = "Mobile Pentium 75 - 200", 
+			  [7] = "Mobile Pentium 75 - 200",
 			  [8] = "Mobile Pentium MMX"
 		  }
 		},
 		{ .vendor = X86_VENDOR_INTEL, .family = 6, .model_names =
-		  { 
+		  {
 			  [0] = "Pentium Pro A-step",
-			  [1] = "Pentium Pro", 
-			  [3] = "Pentium II (Klamath)", 
-			  [4] = "Pentium II (Deschutes)", 
-			  [5] = "Pentium II (Deschutes)", 
+			  [1] = "Pentium Pro",
+			  [3] = "Pentium II (Klamath)",
+			  [4] = "Pentium II (Deschutes)",
+			  [5] = "Pentium II (Deschutes)",
 			  [6] = "Mobile Pentium II",
-			  [7] = "Pentium III (Katmai)", 
-			  [8] = "Pentium III (Coppermine)", 
+			  [7] = "Pentium III (Katmai)",
+			  [8] = "Pentium III (Coppermine)",
 			  [10] = "Pentium III (Cascades)",
 			  [11] = "Pentium III (Tualatin)",
 		  }
@@ -290,15 +293,12 @@ static struct cpu_dev intel_cpu_dev __cpuinitdata = {
 		  }
 		},
 	},
+	.c_early_init   = early_init_intel,
 	.c_init		= init_intel,
 	.c_size_cache	= intel_size_cache,
 };
 
-__init int intel_cpu_init(void)
-{
-	cpu_devs[X86_VENDOR_INTEL] = &intel_cpu_dev;
-	return 0;
-}
+cpu_vendor_dev_register(X86_VENDOR_INTEL, &intel_cpu_dev);
 
 #ifndef CONFIG_X86_CMPXCHG
 unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new)
@@ -364,5 +364,5 @@ unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new)
 EXPORT_SYMBOL(cmpxchg_486_u64);
 #endif
 
-// arch_initcall(intel_cpu_init);
+/* arch_initcall(intel_cpu_init); */
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce_32.c b/arch/x86/kernel/cpu/mcheck/mce_32.c
index a5182dcd94ae..774d87cfd8cd 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_32.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_32.c
@@ -10,20 +10,20 @@
 #include <linux/smp.h>
 #include <linux/thread_info.h>
 
-#include <asm/processor.h> 
+#include <asm/processor.h>
 #include <asm/system.h>
 #include <asm/mce.h>
 
 #include "mce.h"
 
-int mce_disabled = 0;
+int mce_disabled;
 int nr_mce_banks;
 
 EXPORT_SYMBOL_GPL(nr_mce_banks);	/* non-fatal.o */
 
 /* Handle unconfigured int18 (should never happen) */
-static void unexpected_machine_check(struct pt_regs * regs, long error_code)
-{	
+static void unexpected_machine_check(struct pt_regs *regs, long error_code)
+{
 	printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", smp_processor_id());
 }
 
@@ -33,30 +33,30 @@ void (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_mac
 /* This has to be run for each processor */
 void mcheck_init(struct cpuinfo_x86 *c)
 {
-	if (mce_disabled==1)
+	if (mce_disabled == 1)
 		return;
 
 	switch (c->x86_vendor) {
-		case X86_VENDOR_AMD:
-			amd_mcheck_init(c);
-			break;
-
-		case X86_VENDOR_INTEL:
-			if (c->x86==5)
-				intel_p5_mcheck_init(c);
-			if (c->x86==6)
-				intel_p6_mcheck_init(c);
-			if (c->x86==15)
-				intel_p4_mcheck_init(c);
-			break;
-
-		case X86_VENDOR_CENTAUR:
-			if (c->x86==5)
-				winchip_mcheck_init(c);
-			break;
-
-		default:
-			break;
+	case X86_VENDOR_AMD:
+		amd_mcheck_init(c);
+		break;
+
+	case X86_VENDOR_INTEL:
+		if (c->x86 == 5)
+			intel_p5_mcheck_init(c);
+		if (c->x86 == 6)
+			intel_p6_mcheck_init(c);
+		if (c->x86 == 15)
+			intel_p4_mcheck_init(c);
+		break;
+
+	case X86_VENDOR_CENTAUR:
+		if (c->x86 == 5)
+			winchip_mcheck_init(c);
+		break;
+
+	default:
+		break;
 	}
 }
 
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c
index bf39409b3838..00ccb6c14ec2 100644
--- a/arch/x86/kernel/cpu/mcheck/non-fatal.c
+++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c
@@ -16,7 +16,7 @@
 #include <linux/smp.h>
 #include <linux/module.h>
 
-#include <asm/processor.h> 
+#include <asm/processor.h>
 #include <asm/system.h>
 #include <asm/msr.h>
 
@@ -26,23 +26,26 @@ static int firstbank;
 
 #define MCE_RATE	15*HZ	/* timer rate is 15s */
 
-static void mce_checkregs (void *info)
+static void mce_checkregs(void *info)
 {
 	u32 low, high;
 	int i;
 
-	for (i=firstbank; i<nr_mce_banks; i++) {
-		rdmsr (MSR_IA32_MC0_STATUS+i*4, low, high);
+	for (i = firstbank; i < nr_mce_banks; i++) {
+		rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
 
 		if (high & (1<<31)) {
 			printk(KERN_INFO "MCE: The hardware reports a non "
 				"fatal, correctable incident occurred on "
 				"CPU %d.\n",
 				smp_processor_id());
-			printk (KERN_INFO "Bank %d: %08x%08x\n", i, high, low);
+			printk(KERN_INFO "Bank %d: %08x%08x\n", i, high, low);
 
-			/* Scrub the error so we don't pick it up in MCE_RATE seconds time. */
-			wrmsr (MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
+			/*
+			 * Scrub the error so we don't pick it up in MCE_RATE
+			 * seconds time.
+			 */
+			wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
 
 			/* Serialize */
 			wmb();
@@ -55,10 +58,10 @@ static void mce_work_fn(struct work_struct *work);
 static DECLARE_DELAYED_WORK(mce_work, mce_work_fn);
 
 static void mce_work_fn(struct work_struct *work)
-{ 
+{
 	on_each_cpu(mce_checkregs, NULL, 1, 1);
 	schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
-} 
+}
 
 static int __init init_nonfatal_mce_checker(void)
 {
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
index a18310aaae0c..bfa5817afdda 100644
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -9,20 +9,20 @@
 #include <linux/interrupt.h>
 #include <linux/smp.h>
 
-#include <asm/processor.h> 
+#include <asm/processor.h>
 #include <asm/system.h>
 #include <asm/msr.h>
 
 #include "mce.h"
 
 /* Machine check handler for Pentium class Intel */
-static void pentium_machine_check(struct pt_regs * regs, long error_code)
+static void pentium_machine_check(struct pt_regs *regs, long error_code)
 {
 	u32 loaddr, hi, lotype;
 	rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
 	rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi);
 	printk(KERN_EMERG "CPU#%d: Machine Check Exception:  0x%8X (type 0x%8X).\n", smp_processor_id(), loaddr, lotype);
-	if(lotype&(1<<5))
+	if (lotype&(1<<5))
 		printk(KERN_EMERG "CPU#%d: Possible thermal failure (CPU on fire ?).\n", smp_processor_id());
 	add_taint(TAINT_MACHINE_CHECK);
 }
@@ -31,13 +31,13 @@ static void pentium_machine_check(struct pt_regs * regs, long error_code)
 void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
 {
 	u32 l, h;
-	
+
 	/*Check for MCE support */
-	if( !cpu_has(c, X86_FEATURE_MCE) )
-		return;	
+	if (!cpu_has(c, X86_FEATURE_MCE))
+		return;
 
 	/* Default P5 to off as its often misconnected */
-	if(mce_disabled != -1)
+	if (mce_disabled != -1)
 		return;
 	machine_check_vector = pentium_machine_check;
 	wmb();
@@ -47,7 +47,7 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
 	rdmsr(MSR_IA32_P5_MC_TYPE, l, h);
 	printk(KERN_INFO "Intel old style machine check architecture supported.\n");
 
- 	/* Enable MCE */
+	/* Enable MCE */
 	set_in_cr4(X86_CR4_MCE);
 	printk(KERN_INFO "Intel old style machine check reporting enabled on CPU#%d.\n", smp_processor_id());
 }
diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c
index 74342604d30e..62efc9c2b3af 100644
--- a/arch/x86/kernel/cpu/mcheck/p6.c
+++ b/arch/x86/kernel/cpu/mcheck/p6.c
@@ -9,23 +9,23 @@
 #include <linux/interrupt.h>
 #include <linux/smp.h>
 
-#include <asm/processor.h> 
+#include <asm/processor.h>
 #include <asm/system.h>
 #include <asm/msr.h>
 
 #include "mce.h"
 
 /* Machine Check Handler For PII/PIII */
-static void intel_machine_check(struct pt_regs * regs, long error_code)
+static void intel_machine_check(struct pt_regs *regs, long error_code)
 {
-	int recover=1;
+	int recover = 1;
 	u32 alow, ahigh, high, low;
 	u32 mcgstl, mcgsth;
 	int i;
 
-	rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
+	rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
 	if (mcgstl & (1<<0))	/* Recoverable ? */
-		recover=0;
+		recover = 0;
 
 	printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
 		smp_processor_id(), mcgsth, mcgstl);
@@ -55,30 +55,30 @@ static void intel_machine_check(struct pt_regs * regs, long error_code)
 	}
 
 	if (recover & 2)
-		panic ("CPU context corrupt");
+		panic("CPU context corrupt");
 	if (recover & 1)
-		panic ("Unable to continue");
+		panic("Unable to continue");
 
-	printk (KERN_EMERG "Attempting to continue.\n");
-	/* 
-	 * Do not clear the MSR_IA32_MCi_STATUS if the error is not 
+	printk(KERN_EMERG "Attempting to continue.\n");
+	/*
+	 * Do not clear the MSR_IA32_MCi_STATUS if the error is not
 	 * recoverable/continuable.This will allow BIOS to look at the MSRs
 	 * for errors if the OS could not log the error.
 	 */
-	for (i=0; i<nr_mce_banks; i++) {
+	for (i = 0; i < nr_mce_banks; i++) {
 		unsigned int msr;
 		msr = MSR_IA32_MC0_STATUS+i*4;
-		rdmsr (msr,low, high);
+		rdmsr(msr, low, high);
 		if (high & (1<<31)) {
 			/* Clear it */
-			wrmsr (msr, 0UL, 0UL);
+			wrmsr(msr, 0UL, 0UL);
 			/* Serialize */
 			wmb();
 			add_taint(TAINT_MACHINE_CHECK);
 		}
 	}
 	mcgstl &= ~(1<<2);
-	wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
+	wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
 }
 
 /* Set up machine check reporting for processors with Intel style MCE */
@@ -86,21 +86,21 @@ void intel_p6_mcheck_init(struct cpuinfo_x86 *c)
 {
 	u32 l, h;
 	int i;
-	
+
 	/* Check for MCE support */
 	if (!cpu_has(c, X86_FEATURE_MCE))
 		return;
 
 	/* Check for PPro style MCA */
- 	if (!cpu_has(c, X86_FEATURE_MCA))
+	if (!cpu_has(c, X86_FEATURE_MCA))
 		return;
 
 	/* Ok machine check is available */
 	machine_check_vector = intel_machine_check;
 	wmb();
 
-	printk (KERN_INFO "Intel machine check architecture supported.\n");
-	rdmsr (MSR_IA32_MCG_CAP, l, h);
+	printk(KERN_INFO "Intel machine check architecture supported.\n");
+	rdmsr(MSR_IA32_MCG_CAP, l, h);
 	if (l & (1<<8))	/* Control register present ? */
 		wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
 	nr_mce_banks = l & 0xff;
@@ -110,13 +110,13 @@ void intel_p6_mcheck_init(struct cpuinfo_x86 *c)
 	 * - MC0_CTL should not be written
 	 * - Status registers on all banks should be cleared on reset
 	 */
-	for (i=1; i<nr_mce_banks; i++)
-		wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
+	for (i = 1; i < nr_mce_banks; i++)
+		wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
 
-	for (i=0; i<nr_mce_banks; i++)
-		wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
+	for (i = 0; i < nr_mce_banks; i++)
+		wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
 
-	set_in_cr4 (X86_CR4_MCE);
-	printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
+	set_in_cr4(X86_CR4_MCE);
+	printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
 		smp_processor_id());
 }
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
index 3d428d5afc52..f2be3e190c6b 100644
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -8,14 +8,14 @@
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
 
-#include <asm/processor.h> 
+#include <asm/processor.h>
 #include <asm/system.h>
 #include <asm/msr.h>
 
 #include "mce.h"
 
 /* Machine check handler for WinChip C6 */
-static void winchip_machine_check(struct pt_regs * regs, long error_code)
+static void winchip_machine_check(struct pt_regs *regs, long error_code)
 {
 	printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
 	add_taint(TAINT_MACHINE_CHECK);
@@ -28,8 +28,8 @@ void winchip_mcheck_init(struct cpuinfo_x86 *c)
 	machine_check_vector = winchip_machine_check;
 	wmb();
 	rdmsr(MSR_IDT_FCR1, lo, hi);
-	lo|= (1<<2);	/* Enable EIERRINT (int 18 MCE) */
-	lo&= ~(1<<4);	/* Enable MCE */
+	lo |= (1<<2);	/* Enable EIERRINT (int 18 MCE) */
+	lo &= ~(1<<4);	/* Enable MCE */
 	wrmsr(MSR_IDT_FCR1, lo, hi);
 	set_in_cr4(X86_CR4_MCE);
 	printk(KERN_INFO "Winchip machine check reporting enabled on CPU#0.\n");
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 3e18db4cefee..353efe4f5017 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -11,6 +11,7 @@
 #include <asm/cpufeature.h>
 #include <asm/processor-flags.h>
 #include <asm/tlbflush.h>
+#include <asm/pat.h>
 #include "mtrr.h"
 
 struct mtrr_state {
@@ -35,6 +36,8 @@ static struct fixed_range_block fixed_range_blocks[] = {
 
 static unsigned long smp_changes_mask;
 static struct mtrr_state mtrr_state = {};
+static int mtrr_state_set;
+static u64 tom2;
 
 #undef MODULE_PARAM_PREFIX
 #define MODULE_PARAM_PREFIX "mtrr."
@@ -42,6 +45,111 @@ static struct mtrr_state mtrr_state = {};
 static int mtrr_show;
 module_param_named(show, mtrr_show, bool, 0);
 
+/*
+ * Returns the effective MTRR type for the region
+ * Error returns:
+ * - 0xFE - when the range is "not entirely covered" by _any_ var range MTRR
+ * - 0xFF - when MTRR is not enabled
+ */
+u8 mtrr_type_lookup(u64 start, u64 end)
+{
+	int i;
+	u64 base, mask;
+	u8 prev_match, curr_match;
+
+	if (!mtrr_state_set)
+		return 0xFF;
+
+	if (!mtrr_state.enabled)
+		return 0xFF;
+
+	/* Make end inclusive end, instead of exclusive */
+	end--;
+
+	/* Look in fixed ranges. Just return the type as per start */
+	if (mtrr_state.have_fixed && (start < 0x100000)) {
+		int idx;
+
+		if (start < 0x80000) {
+			idx = 0;
+			idx += (start >> 16);
+			return mtrr_state.fixed_ranges[idx];
+		} else if (start < 0xC0000) {
+			idx = 1 * 8;
+			idx += ((start - 0x80000) >> 14);
+			return mtrr_state.fixed_ranges[idx];
+		} else if (start < 0x1000000) {
+			idx = 3 * 8;
+			idx += ((start - 0xC0000) >> 12);
+			return mtrr_state.fixed_ranges[idx];
+		}
+	}
+
+	/*
+	 * Look in variable ranges
+	 * Look of multiple ranges matching this address and pick type
+	 * as per MTRR precedence
+	 */
+	if (!mtrr_state.enabled & 2) {
+		return mtrr_state.def_type;
+	}
+
+	prev_match = 0xFF;
+	for (i = 0; i < num_var_ranges; ++i) {
+		unsigned short start_state, end_state;
+
+		if (!(mtrr_state.var_ranges[i].mask_lo & (1 << 11)))
+			continue;
+
+		base = (((u64)mtrr_state.var_ranges[i].base_hi) << 32) +
+		       (mtrr_state.var_ranges[i].base_lo & PAGE_MASK);
+		mask = (((u64)mtrr_state.var_ranges[i].mask_hi) << 32) +
+		       (mtrr_state.var_ranges[i].mask_lo & PAGE_MASK);
+
+		start_state = ((start & mask) == (base & mask));
+		end_state = ((end & mask) == (base & mask));
+		if (start_state != end_state)
+			return 0xFE;
+
+		if ((start & mask) != (base & mask)) {
+			continue;
+		}
+
+		curr_match = mtrr_state.var_ranges[i].base_lo & 0xff;
+		if (prev_match == 0xFF) {
+			prev_match = curr_match;
+			continue;
+		}
+
+		if (prev_match == MTRR_TYPE_UNCACHABLE ||
+		    curr_match == MTRR_TYPE_UNCACHABLE) {
+			return MTRR_TYPE_UNCACHABLE;
+		}
+
+		if ((prev_match == MTRR_TYPE_WRBACK &&
+		     curr_match == MTRR_TYPE_WRTHROUGH) ||
+		    (prev_match == MTRR_TYPE_WRTHROUGH &&
+		     curr_match == MTRR_TYPE_WRBACK)) {
+			prev_match = MTRR_TYPE_WRTHROUGH;
+			curr_match = MTRR_TYPE_WRTHROUGH;
+		}
+
+		if (prev_match != curr_match) {
+			return MTRR_TYPE_UNCACHABLE;
+		}
+	}
+
+	if (tom2) {
+		if (start >= (1ULL<<32) && (end < tom2))
+			return MTRR_TYPE_WRBACK;
+	}
+
+	if (prev_match != 0xFF)
+		return prev_match;
+
+	return mtrr_state.def_type;
+}
+
 /*  Get the MSR pair relating to a var range  */
 static void
 get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr)
@@ -79,12 +187,16 @@ static void print_fixed(unsigned base, unsigned step, const mtrr_type*types)
 			base, base + step - 1, mtrr_attrib_to_str(*types));
 }
 
+static void prepare_set(void);
+static void post_set(void);
+
 /*  Grab all of the MTRR state for this CPU into *state  */
 void __init get_mtrr_state(void)
 {
 	unsigned int i;
 	struct mtrr_var_range *vrs;
 	unsigned lo, dummy;
+	unsigned long flags;
 
 	vrs = mtrr_state.var_ranges;
 
@@ -100,6 +212,15 @@ void __init get_mtrr_state(void)
 	mtrr_state.def_type = (lo & 0xff);
 	mtrr_state.enabled = (lo & 0xc00) >> 10;
 
+	if (amd_special_default_mtrr()) {
+		unsigned lo, hi;
+		/* TOP_MEM2 */
+		rdmsr(MSR_K8_TOP_MEM2, lo, hi);
+		tom2 = hi;
+		tom2 <<= 32;
+		tom2 |= lo;
+		tom2 &= 0xffffff8000000ULL;
+	}
 	if (mtrr_show) {
 		int high_width;
 
@@ -130,7 +251,22 @@ void __init get_mtrr_state(void)
 			else
 				printk(KERN_INFO "MTRR %u disabled\n", i);
 		}
+		if (tom2) {
+			printk(KERN_INFO "TOM2: %016llx aka %lldM\n",
+					  tom2, tom2>>20);
+		}
 	}
+	mtrr_state_set = 1;
+
+	/* PAT setup for BP. We need to go through sync steps here */
+	local_irq_save(flags);
+	prepare_set();
+
+	pat_init();
+
+	post_set();
+	local_irq_restore(flags);
+
 }
 
 /*  Some BIOS's are fucked and don't set all MTRRs the same!  */
@@ -397,6 +533,9 @@ static void generic_set_all(void)
 	/* Actually set the state */
 	mask = set_mtrr_state();
 
+	/* also set PAT */
+	pat_init();
+
 	post_set();
 	local_irq_restore(flags);
 
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index 91e150acb46c..1960f1985e5e 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -424,11 +424,10 @@ static int __init mtrr_if_init(void)
 		return -ENODEV;
 
 	proc_root_mtrr =
-	    create_proc_entry("mtrr", S_IWUSR | S_IRUGO, &proc_root);
-	if (proc_root_mtrr) {
+		proc_create("mtrr", S_IWUSR | S_IRUGO, &proc_root, &mtrr_fops);
+
+	if (proc_root_mtrr)
 		proc_root_mtrr->owner = THIS_MODULE;
-		proc_root_mtrr->proc_fops = &mtrr_fops;
-	}
 	return 0;
 }
 
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index a6450b3ae759..6a1e278d9323 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -627,7 +627,7 @@ early_param("disable_mtrr_trim", disable_mtrr_trim_setup);
 #define Tom2Enabled (1U << 21)
 #define Tom2ForceMemTypeWB (1U << 22)
 
-static __init int amd_special_default_mtrr(void)
+int __init amd_special_default_mtrr(void)
 {
 	u32 l, h;
 
diff --git a/arch/x86/kernel/cpu/mtrr/state.c b/arch/x86/kernel/cpu/mtrr/state.c
index 9f8ba923d1c9..7f7e2753685b 100644
--- a/arch/x86/kernel/cpu/mtrr/state.c
+++ b/arch/x86/kernel/cpu/mtrr/state.c
@@ -19,13 +19,15 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt)
 	if (use_intel() || is_cpu(CYRIX)) {
 
 		/*  Save value of CR4 and clear Page Global Enable (bit 7)  */
-		if ( cpu_has_pge ) {
+		if (cpu_has_pge) {
 			ctxt->cr4val = read_cr4();
 			write_cr4(ctxt->cr4val & ~X86_CR4_PGE);
 		}
 
-		/*  Disable and flush caches. Note that wbinvd flushes the TLBs as
-		    a side-effect  */
+		/*
+		 * Disable and flush caches. Note that wbinvd flushes the TLBs
+		 * as a side-effect
+		 */
 		cr0 = read_cr0() | X86_CR0_CD;
 		wbinvd();
 		write_cr0(cr0);
@@ -42,7 +44,7 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt)
 
 void set_mtrr_cache_disable(struct set_mtrr_context *ctxt)
 {
-	if (use_intel()) 
+	if (use_intel())
 		/*  Disable MTRRs, and set the default type to uncached  */
 		mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo & 0xf300UL,
 		      ctxt->deftype_hi);
@@ -66,12 +68,12 @@ void set_mtrr_done(struct set_mtrr_context *ctxt)
 		else
 			/* Cyrix ARRs - everything else was excluded at the top */
 			setCx86(CX86_CCR3, ctxt->ccr3);
-		
+
 		/*  Enable caches  */
 		write_cr0(read_cr0() & 0xbfffffff);
 
 		/*  Restore value of CR4  */
-		if ( cpu_has_pge )
+		if (cpu_has_pge)
 			write_cr4(ctxt->cr4val);
 	}
 	/*  Re-enable interrupts locally (if enabled previously)  */
diff --git a/arch/x86/kernel/cpu/nexgen.c b/arch/x86/kernel/cpu/nexgen.c
index 961fbe1a748f..5d5e1c134123 100644
--- a/arch/x86/kernel/cpu/nexgen.c
+++ b/arch/x86/kernel/cpu/nexgen.c
@@ -9,11 +9,11 @@
  *	Detect a NexGen CPU running without BIOS hypercode new enough
  *	to have CPUID. (Thanks to Herbert Oppmann)
  */
- 
+
 static int __cpuinit deep_magic_nexgen_probe(void)
 {
 	int ret;
-	
+
 	__asm__ __volatile__ (
 		"	movw	$0x5555, %%ax\n"
 		"	xorw	%%dx,%%dx\n"
@@ -22,22 +22,21 @@ static int __cpuinit deep_magic_nexgen_probe(void)
 		"	movl	$0, %%eax\n"
 		"	jnz	1f\n"
 		"	movl	$1, %%eax\n"
-		"1:\n" 
-		: "=a" (ret) : : "cx", "dx" );
+		"1:\n"
+		: "=a" (ret) : : "cx", "dx");
 	return  ret;
 }
 
-static void __cpuinit init_nexgen(struct cpuinfo_x86 * c)
+static void __cpuinit init_nexgen(struct cpuinfo_x86 *c)
 {
 	c->x86_cache_size = 256; /* A few had 1 MB... */
 }
 
-static void __cpuinit nexgen_identify(struct cpuinfo_x86 * c)
+static void __cpuinit nexgen_identify(struct cpuinfo_x86 *c)
 {
 	/* Detect NexGen with old hypercode */
-	if ( deep_magic_nexgen_probe() ) {
+	if (deep_magic_nexgen_probe())
 		strcpy(c->x86_vendor_id, "NexGenDriven");
-	}
 }
 
 static struct cpu_dev nexgen_cpu_dev __cpuinitdata = {
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index af11d31dce0a..0978a4a39418 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -8,78 +8,139 @@
 /*
  *	Get CPU information for use by the procfs.
  */
+#ifdef CONFIG_X86_32
+static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c,
+			      unsigned int cpu)
+{
+#ifdef CONFIG_X86_HT
+	if (c->x86_max_cores * smp_num_siblings > 1) {
+		seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
+		seq_printf(m, "siblings\t: %d\n",
+			   cpus_weight(per_cpu(cpu_core_map, cpu)));
+		seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
+		seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
+		seq_printf(m, "apicid\t\t: %d\n", c->apicid);
+		seq_printf(m, "initial apicid\t: %d\n", c->initial_apicid);
+	}
+#endif
+}
+
+static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
+{
+	/*
+	 * We use exception 16 if we have hardware math and we've either seen
+	 * it or the CPU claims it is internal
+	 */
+	int fpu_exception = c->hard_math && (ignore_fpu_irq || cpu_has_fpu);
+	seq_printf(m,
+		   "fdiv_bug\t: %s\n"
+		   "hlt_bug\t\t: %s\n"
+		   "f00f_bug\t: %s\n"
+		   "coma_bug\t: %s\n"
+		   "fpu\t\t: %s\n"
+		   "fpu_exception\t: %s\n"
+		   "cpuid level\t: %d\n"
+		   "wp\t\t: %s\n",
+		   c->fdiv_bug ? "yes" : "no",
+		   c->hlt_works_ok ? "no" : "yes",
+		   c->f00f_bug ? "yes" : "no",
+		   c->coma_bug ? "yes" : "no",
+		   c->hard_math ? "yes" : "no",
+		   fpu_exception ? "yes" : "no",
+		   c->cpuid_level,
+		   c->wp_works_ok ? "yes" : "no");
+}
+#else
+static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c,
+			      unsigned int cpu)
+{
+#ifdef CONFIG_SMP
+	if (c->x86_max_cores * smp_num_siblings > 1) {
+		seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
+		seq_printf(m, "siblings\t: %d\n",
+			   cpus_weight(per_cpu(cpu_core_map, cpu)));
+		seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
+		seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
+		seq_printf(m, "apicid\t\t: %d\n", c->apicid);
+		seq_printf(m, "initial apicid\t: %d\n", c->initial_apicid);
+	}
+#endif
+}
+
+static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
+{
+	seq_printf(m,
+		   "fpu\t\t: yes\n"
+		   "fpu_exception\t: yes\n"
+		   "cpuid level\t: %d\n"
+		   "wp\t\t: yes\n",
+		   c->cpuid_level);
+}
+#endif
+
 static int show_cpuinfo(struct seq_file *m, void *v)
 {
 	struct cpuinfo_x86 *c = v;
-	int i, n = 0;
-	int fpu_exception;
+	unsigned int cpu = 0;
+	int i;
 
 #ifdef CONFIG_SMP
-	n = c->cpu_index;
+	cpu = c->cpu_index;
 #endif
-	seq_printf(m, "processor\t: %d\n"
-		"vendor_id\t: %s\n"
-		"cpu family\t: %d\n"
-		"model\t\t: %d\n"
-		"model name\t: %s\n",
-		n,
-		c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
-		c->x86,
-		c->x86_model,
-		c->x86_model_id[0] ? c->x86_model_id : "unknown");
+	seq_printf(m, "processor\t: %u\n"
+		   "vendor_id\t: %s\n"
+		   "cpu family\t: %d\n"
+		   "model\t\t: %u\n"
+		   "model name\t: %s\n",
+		   cpu,
+		   c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
+		   c->x86,
+		   c->x86_model,
+		   c->x86_model_id[0] ? c->x86_model_id : "unknown");
 
 	if (c->x86_mask || c->cpuid_level >= 0)
 		seq_printf(m, "stepping\t: %d\n", c->x86_mask);
 	else
 		seq_printf(m, "stepping\t: unknown\n");
 
-	if ( cpu_has(c, X86_FEATURE_TSC) ) {
-		unsigned int freq = cpufreq_quick_get(n);
+	if (cpu_has(c, X86_FEATURE_TSC)) {
+		unsigned int freq = cpufreq_quick_get(cpu);
+
 		if (!freq)
 			freq = cpu_khz;
 		seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
-			freq / 1000, (freq % 1000));
+			   freq / 1000, (freq % 1000));
 	}
 
 	/* Cache size */
 	if (c->x86_cache_size >= 0)
 		seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
-#ifdef CONFIG_X86_HT
-	if (c->x86_max_cores * smp_num_siblings > 1) {
-		seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
-		seq_printf(m, "siblings\t: %d\n",
-				cpus_weight(per_cpu(cpu_core_map, n)));
-		seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
-		seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
-	}
-#endif
-	
-	/* We use exception 16 if we have hardware math and we've either seen it or the CPU claims it is internal */
-	fpu_exception = c->hard_math && (ignore_fpu_irq || cpu_has_fpu);
-	seq_printf(m, "fdiv_bug\t: %s\n"
-			"hlt_bug\t\t: %s\n"
-			"f00f_bug\t: %s\n"
-			"coma_bug\t: %s\n"
-			"fpu\t\t: %s\n"
-			"fpu_exception\t: %s\n"
-			"cpuid level\t: %d\n"
-			"wp\t\t: %s\n"
-			"flags\t\t:",
-		     c->fdiv_bug ? "yes" : "no",
-		     c->hlt_works_ok ? "no" : "yes",
-		     c->f00f_bug ? "yes" : "no",
-		     c->coma_bug ? "yes" : "no",
-		     c->hard_math ? "yes" : "no",
-		     fpu_exception ? "yes" : "no",
-		     c->cpuid_level,
-		     c->wp_works_ok ? "yes" : "no");
-
-	for ( i = 0 ; i < 32*NCAPINTS ; i++ )
-		if ( test_bit(i, c->x86_capability) &&
-		     x86_cap_flags[i] != NULL )
+
+	show_cpuinfo_core(m, c, cpu);
+	show_cpuinfo_misc(m, c);
+
+	seq_printf(m, "flags\t\t:");
+	for (i = 0; i < 32*NCAPINTS; i++)
+		if (cpu_has(c, i) && x86_cap_flags[i] != NULL)
 			seq_printf(m, " %s", x86_cap_flags[i]);
 
-	for (i = 0; i < 32; i++)
+	seq_printf(m, "\nbogomips\t: %lu.%02lu\n",
+		   c->loops_per_jiffy/(500000/HZ),
+		   (c->loops_per_jiffy/(5000/HZ)) % 100);
+
+#ifdef CONFIG_X86_64
+	if (c->x86_tlbsize > 0)
+		seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize);
+#endif
+	seq_printf(m, "clflush size\t: %u\n", c->x86_clflush_size);
+#ifdef CONFIG_X86_64
+	seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment);
+	seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n",
+		   c->x86_phys_bits, c->x86_virt_bits);
+#endif
+
+	seq_printf(m, "power management:");
+	for (i = 0; i < 32; i++) {
 		if (c->x86_power & (1 << i)) {
 			if (i < ARRAY_SIZE(x86_power_flags) &&
 			    x86_power_flags[i])
@@ -89,11 +150,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 			else
 				seq_printf(m, " [%d]", i);
 		}
+	}
 
-	seq_printf(m, "\nbogomips\t: %lu.%02lu\n",
-		     c->loops_per_jiffy/(500000/HZ),
-		     (c->loops_per_jiffy/(5000/HZ)) % 100);
-	seq_printf(m, "clflush size\t: %u\n\n", c->x86_clflush_size);
+	seq_printf(m, "\n\n");
 
 	return 0;
 }
@@ -106,14 +165,17 @@ static void *c_start(struct seq_file *m, loff_t *pos)
 		return &cpu_data(*pos);
 	return NULL;
 }
+
 static void *c_next(struct seq_file *m, void *v, loff_t *pos)
 {
 	*pos = next_cpu(*pos, cpu_online_map);
 	return c_start(m, pos);
 }
+
 static void c_stop(struct seq_file *m, void *v)
 {
 }
+
 const struct seq_operations cpuinfo_op = {
 	.start	= c_start,
 	.next	= c_next,
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c
index e8b422c1c512..b911a2c61b8f 100644
--- a/arch/x86/kernel/cpu/transmeta.c
+++ b/arch/x86/kernel/cpu/transmeta.c
@@ -18,8 +18,8 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
 	/* Print CMS and CPU revision */
 	max = cpuid_eax(0x80860000);
 	cpu_rev = 0;
-	if ( max >= 0x80860001 ) {
-		cpuid(0x80860001, &dummy, &cpu_rev, &cpu_freq, &cpu_flags); 
+	if (max >= 0x80860001) {
+		cpuid(0x80860001, &dummy, &cpu_rev, &cpu_freq, &cpu_flags);
 		if (cpu_rev != 0x02000000) {
 			printk(KERN_INFO "CPU: Processor revision %u.%u.%u.%u, %u MHz\n",
 				(cpu_rev >> 24) & 0xff,
@@ -29,7 +29,7 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
 				cpu_freq);
 		}
 	}
-	if ( max >= 0x80860002 ) {
+	if (max >= 0x80860002) {
 		cpuid(0x80860002, &new_cpu_rev, &cms_rev1, &cms_rev2, &dummy);
 		if (cpu_rev == 0x02000000) {
 			printk(KERN_INFO "CPU: Processor revision %08X, %u MHz\n",
@@ -42,7 +42,7 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
 		       cms_rev1 & 0xff,
 		       cms_rev2);
 	}
-	if ( max >= 0x80860006 ) {
+	if (max >= 0x80860006) {
 		cpuid(0x80860003,
 		      (void *)&cpu_info[0],
 		      (void *)&cpu_info[4],
@@ -74,23 +74,25 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
 	wrmsr(0x80860004, cap_mask, uk);
 
 	/* All Transmeta CPUs have a constant TSC */
-	set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
-	
+	set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+
 #ifdef CONFIG_SYSCTL
-	/* randomize_va_space slows us down enormously;
-	   it probably triggers retranslation of x86->native bytecode */
+	/*
+	 * randomize_va_space slows us down enormously;
+	 * it probably triggers retranslation of x86->native bytecode
+	 */
 	randomize_va_space = 0;
 #endif
 }
 
-static void __cpuinit transmeta_identify(struct cpuinfo_x86 * c)
+static void __cpuinit transmeta_identify(struct cpuinfo_x86 *c)
 {
 	u32 xlvl;
 
 	/* Transmeta-defined flags: level 0x80860001 */
 	xlvl = cpuid_eax(0x80860000);
-	if ( (xlvl & 0xffff0000) == 0x80860000 ) {
-		if (  xlvl >= 0x80860001 )
+	if ((xlvl & 0xffff0000) == 0x80860000) {
+		if (xlvl >= 0x80860001)
 			c->x86_capability[2] = cpuid_edx(0x80860001);
 	}
 }
@@ -102,8 +104,4 @@ static struct cpu_dev transmeta_cpu_dev __cpuinitdata = {
 	.c_identify	= transmeta_identify,
 };
 
-int __init transmeta_init_cpu(void)
-{
-	cpu_devs[X86_VENDOR_TRANSMETA] = &transmeta_cpu_dev;
-	return 0;
-}
+cpu_vendor_dev_register(X86_VENDOR_TRANSMETA, &transmeta_cpu_dev);
diff --git a/arch/x86/kernel/cpu/umc.c b/arch/x86/kernel/cpu/umc.c
index a7a4e75bdcd7..b1fc90989d75 100644
--- a/arch/x86/kernel/cpu/umc.c
+++ b/arch/x86/kernel/cpu/umc.c
@@ -3,24 +3,23 @@
 #include <asm/processor.h>
 #include "cpu.h"
 
-/* UMC chips appear to be only either 386 or 486, so no special init takes place.
+/*
+ * UMC chips appear to be only either 386 or 486,
+ * so no special init takes place.
  */
 
 static struct cpu_dev umc_cpu_dev __cpuinitdata = {
 	.c_vendor	= "UMC",
-	.c_ident 	= { "UMC UMC UMC" },
+	.c_ident	= { "UMC UMC UMC" },
 	.c_models = {
 		{ .vendor = X86_VENDOR_UMC, .family = 4, .model_names =
-		  { 
-			  [1] = "U5D", 
-			  [2] = "U5S", 
+		  {
+			  [1] = "U5D",
+			  [2] = "U5S",
 		  }
 		},
 	},
 };
 
-int __init umc_init_cpu(void)
-{
-	cpu_devs[X86_VENDOR_UMC] = &umc_cpu_dev;
-	return 0;
-}
+cpu_vendor_dev_register(X86_VENDOR_UMC, &umc_cpu_dev);
+
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 9a5fa0abfcc7..2251d0ae9570 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -26,11 +26,7 @@
 #include <linux/kdebug.h>
 #include <asm/smp.h>
 
-#ifdef CONFIG_X86_32
 #include <mach_ipi.h>
-#else
-#include <asm/mach_apic.h>
-#endif
 
 /* This keeps a track of which one is crashing cpu. */
 static int crashing_cpu;
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index dcd918c1580d..11c11b8ec48d 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -220,11 +220,11 @@ int ds_allocate(void **dsp, size_t bts_size_in_bytes)
 
 int ds_free(void **dsp)
 {
-	if (*dsp)
+	if (*dsp) {
 		kfree((void *)get_bts_buffer_base(*dsp));
-	kfree(*dsp);
-	*dsp = NULL;
-
+		kfree(*dsp);
+		*dsp = NULL;
+	}
 	return 0;
 }
 
diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c
index 80444c5c9b14..0240cd778365 100644
--- a/arch/x86/kernel/e820_32.c
+++ b/arch/x86/kernel/e820_32.c
@@ -450,38 +450,25 @@ int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
  * thinkpad 560x, for example, does not cooperate with the memory
  * detection code.)
  */
-int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
+int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
 {
 	/* Only one memory region (or negative)? Ignore it */
 	if (nr_map < 2)
 		return -1;
 
 	do {
-		unsigned long long start = biosmap->addr;
-		unsigned long long size = biosmap->size;
-		unsigned long long end = start + size;
-		unsigned long type = biosmap->type;
+		u64 start = biosmap->addr;
+		u64 size = biosmap->size;
+		u64 end = start + size;
+		u32 type = biosmap->type;
 
 		/* Overflow in 64 bits? Ignore the memory map. */
 		if (start > end)
 			return -1;
 
-		/*
-		 * Some BIOSes claim RAM in the 640k - 1M region.
-		 * Not right. Fix it up.
-		 */
-		if (type == E820_RAM) {
-			if (start < 0x100000ULL && end > 0xA0000ULL) {
-				if (start < 0xA0000ULL)
-					add_memory_region(start, 0xA0000ULL-start, type);
-				if (end <= 0x100000ULL)
-					continue;
-				start = 0x100000ULL;
-				size = end - start;
-			}
-		}
 		add_memory_region(start, size, type);
-	} while (biosmap++,--nr_map);
+	} while (biosmap++, --nr_map);
+
 	return 0;
 }
 
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
index 9be697126013..7f6c0c85c8f6 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -27,6 +27,7 @@
 #include <asm/setup.h>
 #include <asm/sections.h>
 #include <asm/kdebug.h>
+#include <asm/trampoline.h>
 
 struct e820map e820;
 
@@ -36,11 +37,11 @@ struct e820map e820;
 unsigned long end_pfn;
 
 /*
- * end_pfn only includes RAM, while end_pfn_map includes all e820 entries.
- * The direct mapping extends to end_pfn_map, so that we can directly access
+ * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
+ * The direct mapping extends to max_pfn_mapped, so that we can directly access
  * apertures, ACPI and other tables without having to play with fixmaps.
  */
-unsigned long end_pfn_map;
+unsigned long max_pfn_mapped;
 
 /*
  * Last pfn which the user wants to use.
@@ -58,8 +59,8 @@ struct early_res {
 };
 static struct early_res early_res[MAX_EARLY_RES] __initdata = {
 	{ 0, PAGE_SIZE, "BIOS data page" },			/* BIOS data page */
-#ifdef CONFIG_SMP
-	{ SMP_TRAMPOLINE_BASE, SMP_TRAMPOLINE_BASE + 2*PAGE_SIZE, "SMP_TRAMPOLINE" },
+#ifdef CONFIG_X86_TRAMPOLINE
+	{ TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" },
 #endif
 	{}
 };
@@ -95,7 +96,8 @@ void __init early_res_to_bootmem(void)
 }
 
 /* Check for already reserved areas */
-static inline int bad_addr(unsigned long *addrp, unsigned long size)
+static inline int
+bad_addr(unsigned long *addrp, unsigned long size, unsigned long align)
 {
 	int i;
 	unsigned long addr = *addrp, last;
@@ -105,7 +107,7 @@ again:
 	for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
 		struct early_res *r = &early_res[i];
 		if (last >= r->start && addr < r->end) {
-			*addrp = addr = r->end;
+			*addrp = addr = round_up(r->end, align);
 			changed = 1;
 			goto again;
 		}
@@ -113,6 +115,40 @@ again:
 	return changed;
 }
 
+/* Check for already reserved areas */
+static inline int
+bad_addr_size(unsigned long *addrp, unsigned long *sizep, unsigned long align)
+{
+	int i;
+	unsigned long addr = *addrp, last;
+	unsigned long size = *sizep;
+	int changed = 0;
+again:
+	last = addr + size;
+	for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
+		struct early_res *r = &early_res[i];
+		if (last > r->start && addr < r->start) {
+			size = r->start - addr;
+			changed = 1;
+			goto again;
+		}
+		if (last > r->end && addr < r->end) {
+			addr = round_up(r->end, align);
+			size = last - addr;
+			changed = 1;
+			goto again;
+		}
+		if (last <= r->end && addr >= r->start) {
+			(*sizep)++;
+			return 0;
+		}
+	}
+	if (changed) {
+		*addrp = addr;
+		*sizep = size;
+	}
+	return changed;
+}
 /*
  * This function checks if any part of the range <start,end> is mapped
  * with type.
@@ -174,26 +210,27 @@ int __init e820_all_mapped(unsigned long start, unsigned long end,
  * Find a free area with specified alignment in a specific range.
  */
 unsigned long __init find_e820_area(unsigned long start, unsigned long end,
-				    unsigned size, unsigned long align)
+				    unsigned long size, unsigned long align)
 {
 	int i;
-	unsigned long mask = ~(align - 1);
 
 	for (i = 0; i < e820.nr_map; i++) {
 		struct e820entry *ei = &e820.map[i];
-		unsigned long addr = ei->addr, last;
+		unsigned long addr, last;
+		unsigned long ei_last;
 
 		if (ei->type != E820_RAM)
 			continue;
+		addr = round_up(ei->addr, align);
+		ei_last = ei->addr + ei->size;
 		if (addr < start)
-			addr = start;
-		if (addr > ei->addr + ei->size)
+			addr = round_up(start, align);
+		if (addr >= ei_last)
 			continue;
-		while (bad_addr(&addr, size) && addr+size <= ei->addr+ei->size)
+		while (bad_addr(&addr, size, align) && addr+size <= ei_last)
 			;
-		addr = (addr + align - 1) & mask;
 		last = addr + size;
-		if (last > ei->addr + ei->size)
+		if (last > ei_last)
 			continue;
 		if (last > end)
 			continue;
@@ -203,6 +240,40 @@ unsigned long __init find_e820_area(unsigned long start, unsigned long end,
 }
 
 /*
+ * Find next free range after *start
+ */
+unsigned long __init find_e820_area_size(unsigned long start,
+					 unsigned long *sizep,
+					 unsigned long align)
+{
+	int i;
+
+	for (i = 0; i < e820.nr_map; i++) {
+		struct e820entry *ei = &e820.map[i];
+		unsigned long addr, last;
+		unsigned long ei_last;
+
+		if (ei->type != E820_RAM)
+			continue;
+		addr = round_up(ei->addr, align);
+		ei_last = ei->addr + ei->size;
+		if (addr < start)
+			addr = round_up(start, align);
+		if (addr >= ei_last)
+			continue;
+		*sizep = ei_last - addr;
+		while (bad_addr_size(&addr, sizep, align) &&
+			addr + *sizep <= ei_last)
+			;
+		last = addr + *sizep;
+		if (last > ei_last)
+			continue;
+		return addr;
+	}
+	return -1UL;
+
+}
+/*
  * Find the highest page frame number we have available
  */
 unsigned long __init e820_end_of_ram(void)
@@ -211,29 +282,29 @@ unsigned long __init e820_end_of_ram(void)
 
 	end_pfn = find_max_pfn_with_active_regions();
 
-	if (end_pfn > end_pfn_map)
-		end_pfn_map = end_pfn;
-	if (end_pfn_map > MAXMEM>>PAGE_SHIFT)
-		end_pfn_map = MAXMEM>>PAGE_SHIFT;
+	if (end_pfn > max_pfn_mapped)
+		max_pfn_mapped = end_pfn;
+	if (max_pfn_mapped > MAXMEM>>PAGE_SHIFT)
+		max_pfn_mapped = MAXMEM>>PAGE_SHIFT;
 	if (end_pfn > end_user_pfn)
 		end_pfn = end_user_pfn;
-	if (end_pfn > end_pfn_map)
-		end_pfn = end_pfn_map;
+	if (end_pfn > max_pfn_mapped)
+		end_pfn = max_pfn_mapped;
 
-	printk(KERN_INFO "end_pfn_map = %lu\n", end_pfn_map);
+	printk(KERN_INFO "max_pfn_mapped = %lu\n", max_pfn_mapped);
 	return end_pfn;
 }
 
 /*
  * Mark e820 reserved areas as busy for the resource manager.
  */
-void __init e820_reserve_resources(struct resource *code_resource,
-		struct resource *data_resource, struct resource *bss_resource)
+void __init e820_reserve_resources(void)
 {
 	int i;
+	struct resource *res;
+
+	res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map);
 	for (i = 0; i < e820.nr_map; i++) {
-		struct resource *res;
-		res = alloc_bootmem_low(sizeof(struct resource));
 		switch (e820.map[i].type) {
 		case E820_RAM:	res->name = "System RAM"; break;
 		case E820_ACPI:	res->name = "ACPI Tables"; break;
@@ -243,21 +314,8 @@ void __init e820_reserve_resources(struct resource *code_resource,
 		res->start = e820.map[i].addr;
 		res->end = res->start + e820.map[i].size - 1;
 		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
-		request_resource(&iomem_resource, res);
-		if (e820.map[i].type == E820_RAM) {
-			/*
-			 * We don't know which RAM region contains kernel data,
-			 * so we try it repeatedly and let the resource manager
-			 * test it.
-			 */
-			request_resource(res, code_resource);
-			request_resource(res, data_resource);
-			request_resource(res, bss_resource);
-#ifdef CONFIG_KEXEC
-			if (crashk_res.start != crashk_res.end)
-				request_resource(res, &crashk_res);
-#endif
-		}
+		insert_resource(&iomem_resource, res);
+		res++;
 	}
 }
 
@@ -309,9 +367,9 @@ static int __init e820_find_active_region(const struct e820entry *ei,
 	if (*ei_startpfn >= *ei_endpfn)
 		return 0;
 
-	/* Check if end_pfn_map should be updated */
-	if (ei->type != E820_RAM && *ei_endpfn > end_pfn_map)
-		end_pfn_map = *ei_endpfn;
+	/* Check if max_pfn_mapped should be updated */
+	if (ei->type != E820_RAM && *ei_endpfn > max_pfn_mapped)
+		max_pfn_mapped = *ei_endpfn;
 
 	/* Skip if map is outside the node */
 	if (ei->type != E820_RAM || *ei_endpfn <= start_pfn ||
@@ -634,10 +692,10 @@ static int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
 		return -1;
 
 	do {
-		unsigned long start = biosmap->addr;
-		unsigned long size = biosmap->size;
-		unsigned long end = start + size;
-		unsigned long type = biosmap->type;
+		u64 start = biosmap->addr;
+		u64 size = biosmap->size;
+		u64 end = start + size;
+		u32 type = biosmap->type;
 
 		/* Overflow in 64 bits? Ignore the memory map. */
 		if (start > end)
@@ -702,7 +760,7 @@ static int __init parse_memmap_opt(char *p)
 		saved_max_pfn = e820_end_of_ram();
 		remove_all_active_ranges();
 #endif
-		end_pfn_map = 0;
+		max_pfn_mapped = 0;
 		e820.nr_map = 0;
 		userdef = 1;
 		return 0;
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index cff84cd9987f..643fd861b724 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -13,7 +13,7 @@
 #define VGABASE		(__ISA_IO_base + 0xb8000)
 
 static int max_ypos = 25, max_xpos = 80;
-static int current_ypos = 25, current_xpos = 0;
+static int current_ypos = 25, current_xpos;
 
 static void early_vga_write(struct console *con, const char *str, unsigned n)
 {
@@ -108,12 +108,12 @@ static __init void early_serial_init(char *s)
 
 	if (*s) {
 		unsigned port;
-		if (!strncmp(s,"0x",2)) {
+		if (!strncmp(s, "0x", 2)) {
 			early_serial_base = simple_strtoul(s, &e, 16);
 		} else {
 			static int bases[] = { 0x3f8, 0x2f8 };
 
-			if (!strncmp(s,"ttyS",4))
+			if (!strncmp(s, "ttyS", 4))
 				s += 4;
 			port = simple_strtoul(s, &e, 10);
 			if (port > 1 || s == e)
@@ -194,7 +194,7 @@ static struct console simnow_console = {
 
 /* Direct interface for emergencies */
 static struct console *early_console = &early_vga_console;
-static int early_console_initialized = 0;
+static int early_console_initialized;
 
 void early_printk(const char *fmt, ...)
 {
@@ -202,9 +202,9 @@ void early_printk(const char *fmt, ...)
 	int n;
 	va_list ap;
 
-	va_start(ap,fmt);
-	n = vscnprintf(buf,512,fmt,ap);
-	early_console->write(early_console,buf,n);
+	va_start(ap, fmt);
+	n = vscnprintf(buf, 512, fmt, ap);
+	early_console->write(early_console, buf, n);
 	va_end(ap);
 }
 
@@ -229,15 +229,15 @@ static int __init setup_early_printk(char *buf)
 		early_serial_init(buf);
 		early_console = &early_serial_console;
 	} else if (!strncmp(buf, "vga", 3)
-	           && boot_params.screen_info.orig_video_isVGA == 1) {
+		&& boot_params.screen_info.orig_video_isVGA == 1) {
 		max_xpos = boot_params.screen_info.orig_video_cols;
 		max_ypos = boot_params.screen_info.orig_video_lines;
 		current_ypos = boot_params.screen_info.orig_y;
 		early_console = &early_vga_console;
- 	} else if (!strncmp(buf, "simnow", 6)) {
- 		simnow_init(buf + 6);
- 		early_console = &simnow_console;
- 		keep_early = 1;
+	} else if (!strncmp(buf, "simnow", 6)) {
+		simnow_init(buf + 6);
+		early_console = &simnow_console;
+		keep_early = 1;
 #ifdef CONFIG_HVC_XEN
 	} else if (!strncmp(buf, "xen", 3)) {
 		early_console = &xenboot_console;
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 4b87c32b639f..9ba49a26dff8 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -51,6 +51,7 @@
 #include <asm/desc.h>
 #include <asm/percpu.h>
 #include <asm/dwarf2.h>
+#include <asm/processor-flags.h>
 #include "irq_vectors.h"
 
 /*
@@ -68,13 +69,6 @@
 
 #define nr_syscalls ((syscall_table_size)/4)
 
-CF_MASK		= 0x00000001
-TF_MASK		= 0x00000100
-IF_MASK		= 0x00000200
-DF_MASK		= 0x00000400 
-NT_MASK		= 0x00004000
-VM_MASK		= 0x00020000
-
 #ifdef CONFIG_PREEMPT
 #define preempt_stop(clobbers)	DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
 #else
@@ -84,7 +78,7 @@ VM_MASK		= 0x00020000
 
 .macro TRACE_IRQS_IRET
 #ifdef CONFIG_TRACE_IRQFLAGS
-	testl $IF_MASK,PT_EFLAGS(%esp)     # interrupts off?
+	testl $X86_EFLAGS_IF,PT_EFLAGS(%esp)     # interrupts off?
 	jz 1f
 	TRACE_IRQS_ON
 1:
@@ -246,7 +240,7 @@ ret_from_intr:
 check_userspace:
 	movl PT_EFLAGS(%esp), %eax	# mix EFLAGS and CS
 	movb PT_CS(%esp), %al
-	andl $(VM_MASK | SEGMENT_RPL_MASK), %eax
+	andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
 	cmpl $USER_RPL, %eax
 	jb resume_kernel		# not returning to v8086 or userspace
 
@@ -271,7 +265,7 @@ need_resched:
 	movl TI_flags(%ebp), %ecx	# need_resched set ?
 	testb $_TIF_NEED_RESCHED, %cl
 	jz restore_all
-	testl $IF_MASK,PT_EFLAGS(%esp)	# interrupts off (exception path) ?
+	testl $X86_EFLAGS_IF,PT_EFLAGS(%esp)	# interrupts off (exception path) ?
 	jz restore_all
 	call preempt_schedule_irq
 	jmp need_resched
@@ -291,10 +285,10 @@ ENTRY(ia32_sysenter_target)
 	movl TSS_sysenter_sp0(%esp),%esp
 sysenter_past_esp:
 	/*
-	 * No need to follow this irqs on/off section: the syscall
-	 * disabled irqs and here we enable it straight after entry:
+	 * Interrupts are disabled here, but we can't trace it until
+	 * enough kernel state to call TRACE_IRQS_OFF can be called - but
+	 * we immediately enable interrupts at that point anyway.
 	 */
-	ENABLE_INTERRUPTS(CLBR_NONE)
 	pushl $(__USER_DS)
 	CFI_ADJUST_CFA_OFFSET 4
 	/*CFI_REL_OFFSET ss, 0*/
@@ -302,6 +296,7 @@ sysenter_past_esp:
 	CFI_ADJUST_CFA_OFFSET 4
 	CFI_REL_OFFSET esp, 0
 	pushfl
+	orl $X86_EFLAGS_IF, (%esp)
 	CFI_ADJUST_CFA_OFFSET 4
 	pushl $(__USER_CS)
 	CFI_ADJUST_CFA_OFFSET 4
@@ -315,6 +310,11 @@ sysenter_past_esp:
 	CFI_ADJUST_CFA_OFFSET 4
 	CFI_REL_OFFSET eip, 0
 
+	pushl %eax
+	CFI_ADJUST_CFA_OFFSET 4
+	SAVE_ALL
+	ENABLE_INTERRUPTS(CLBR_NONE)
+
 /*
  * Load the potential sixth argument from user stack.
  * Careful about security.
@@ -322,14 +322,12 @@ sysenter_past_esp:
 	cmpl $__PAGE_OFFSET-3,%ebp
 	jae syscall_fault
 1:	movl (%ebp),%ebp
+	movl %ebp,PT_EBP(%esp)
 .section __ex_table,"a"
 	.align 4
 	.long 1b,syscall_fault
 .previous
 
-	pushl %eax
-	CFI_ADJUST_CFA_OFFSET 4
-	SAVE_ALL
 	GET_THREAD_INFO(%ebp)
 
 	/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
@@ -384,7 +382,7 @@ syscall_exit:
 					# setting need_resched or sigpending
 					# between sampling and the iret
 	TRACE_IRQS_OFF
-	testl $TF_MASK,PT_EFLAGS(%esp)	# If tracing set singlestep flag on exit
+	testl $X86_EFLAGS_TF,PT_EFLAGS(%esp)	# If tracing set singlestep flag on exit
 	jz no_singlestep
 	orl $_TIF_SINGLESTEP,TI_flags(%ebp)
 no_singlestep:
@@ -399,7 +397,7 @@ restore_all:
 	# See comments in process.c:copy_thread() for details.
 	movb PT_OLDSS(%esp), %ah
 	movb PT_CS(%esp), %al
-	andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
+	andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
 	cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
 	CFI_REMEMBER_STATE
 	je ldt_ss			# returning to user-space with LDT SS
@@ -486,7 +484,7 @@ work_resched:
 work_notifysig:				# deal with pending signals and
 					# notify-resume requests
 #ifdef CONFIG_VM86
-	testl $VM_MASK, PT_EFLAGS(%esp)
+	testl $X86_EFLAGS_VM, PT_EFLAGS(%esp)
 	movl %esp, %eax
 	jne work_notifysig_v86		# returning to kernel-space or
 					# vm86-space
@@ -543,9 +541,6 @@ END(syscall_exit_work)
 
 	RING0_INT_FRAME			# can't unwind into user space anyway
 syscall_fault:
-	pushl %eax			# save orig_eax
-	CFI_ADJUST_CFA_OFFSET 4
-	SAVE_ALL
 	GET_THREAD_INFO(%ebp)
 	movl $-EFAULT,PT_EAX(%esp)
 	jmp resume_userspace
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index c20c9e7e08dd..556a8df522a7 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -319,19 +319,17 @@ badsys:
 	/* Do syscall tracing */
 tracesys:			 
 	SAVE_REST
-	movq $-ENOSYS,RAX(%rsp)
+	movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
 	FIXUP_TOP_OF_STACK %rdi
 	movq %rsp,%rdi
 	call syscall_trace_enter
 	LOAD_ARGS ARGOFFSET  /* reload args from stack in case ptrace changed it */
 	RESTORE_REST
 	cmpq $__NR_syscall_max,%rax
-	movq $-ENOSYS,%rcx
-	cmova %rcx,%rax
-	ja  1f
+	ja   int_ret_from_sys_call	/* RAX(%rsp) set to -ENOSYS above */
 	movq %r10,%rcx	/* fixup for C */
 	call *sys_call_table(,%rax,8)
-1:	movq %rax,RAX-ARGOFFSET(%rsp)
+	movq %rax,RAX-ARGOFFSET(%rsp)
 	/* Use IRET because user could have changed frame */
 		
 /* 
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index 4ae7b6440260..9546ef408b92 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -15,6 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/ctype.h>
 #include <linux/init.h>
+#include <linux/hardirq.h>
 
 #include <asm/smp.h>
 #include <asm/ipi.h>
@@ -24,20 +25,20 @@
 #include <acpi/acpi_bus.h>
 #endif
 
-/* which logical CPU number maps to which CPU (physical APIC ID) */
-u16 x86_cpu_to_apicid_init[NR_CPUS] __initdata
-					= { [0 ... NR_CPUS-1] = BAD_APICID };
-void *x86_cpu_to_apicid_early_ptr;
-DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID;
-EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
+DEFINE_PER_CPU(int, x2apic_extra_bits);
 
 struct genapic __read_mostly *genapic = &apic_flat;
 
+static enum uv_system_type uv_system_type;
+
 /*
  * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
  */
 void __init setup_apic_routing(void)
 {
+	if (uv_system_type == UV_NON_UNIQUE_APIC)
+		genapic = &apic_x2apic_uv_x;
+	else
 #ifdef CONFIG_ACPI
 	/*
 	 * Quirk: some x86_64 machines can only use physical APIC mode
@@ -64,3 +65,37 @@ void send_IPI_self(int vector)
 {
 	__send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
 }
+
+int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+	if (!strcmp(oem_id, "SGI")) {
+		if (!strcmp(oem_table_id, "UVL"))
+			uv_system_type = UV_LEGACY_APIC;
+		else if (!strcmp(oem_table_id, "UVX"))
+			uv_system_type = UV_X2APIC;
+		else if (!strcmp(oem_table_id, "UVH"))
+			uv_system_type = UV_NON_UNIQUE_APIC;
+	}
+	return 0;
+}
+
+unsigned int read_apic_id(void)
+{
+	unsigned int id;
+
+	WARN_ON(preemptible());
+	id = apic_read(APIC_ID);
+	if (uv_system_type >= UV_X2APIC)
+		id  |= __get_cpu_var(x2apic_extra_bits);
+	return id;
+}
+
+enum uv_system_type get_uv_system_type(void)
+{
+	return uv_system_type;
+}
+
+int is_uv_system(void)
+{
+	return uv_system_type != UV_NONE;
+}
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index 07352b74bda6..1a9c68845ee8 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -97,7 +97,7 @@ static void flat_send_IPI_all(int vector)
 
 static int flat_apic_id_registered(void)
 {
-	return physid_isset(GET_APIC_ID(apic_read(APIC_ID)), phys_cpu_present_map);
+	return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map);
 }
 
 static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask)
@@ -138,12 +138,9 @@ static cpumask_t physflat_target_cpus(void)
 
 static cpumask_t physflat_vector_allocation_domain(int cpu)
 {
-	cpumask_t domain = CPU_MASK_NONE;
-	cpu_set(cpu, domain);
-	return domain;
+	return cpumask_of_cpu(cpu);
 }
 
-
 static void physflat_send_IPI_mask(cpumask_t cpumask, int vector)
 {
 	send_IPI_mask_sequence(cpumask, vector);
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
new file mode 100644
index 000000000000..5d77c9cd8e15
--- /dev/null
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -0,0 +1,245 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * SGI UV APIC functions (note: not an Intel compatible APIC)
+ *
+ * Copyright (C) 2007 Silicon Graphics, Inc. All rights reserved.
+ */
+
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/bootmem.h>
+#include <linux/module.h>
+#include <asm/smp.h>
+#include <asm/ipi.h>
+#include <asm/genapic.h>
+#include <asm/uv/uv_mmrs.h>
+#include <asm/uv/uv_hub.h>
+
+DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
+EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info);
+
+struct uv_blade_info *uv_blade_info;
+EXPORT_SYMBOL_GPL(uv_blade_info);
+
+short *uv_node_to_blade;
+EXPORT_SYMBOL_GPL(uv_node_to_blade);
+
+short *uv_cpu_to_blade;
+EXPORT_SYMBOL_GPL(uv_cpu_to_blade);
+
+short uv_possible_blades;
+EXPORT_SYMBOL_GPL(uv_possible_blades);
+
+/* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
+
+static cpumask_t uv_target_cpus(void)
+{
+	return cpumask_of_cpu(0);
+}
+
+static cpumask_t uv_vector_allocation_domain(int cpu)
+{
+	cpumask_t domain = CPU_MASK_NONE;
+	cpu_set(cpu, domain);
+	return domain;
+}
+
+int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
+{
+	unsigned long val;
+	int nasid;
+
+	nasid = uv_apicid_to_nasid(phys_apicid);
+	val = (1UL << UVH_IPI_INT_SEND_SHFT) |
+	    (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
+	    (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
+	    (6 << UVH_IPI_INT_DELIVERY_MODE_SHFT);
+	uv_write_global_mmr64(nasid, UVH_IPI_INT, val);
+	return 0;
+}
+
+static void uv_send_IPI_one(int cpu, int vector)
+{
+	unsigned long val, apicid;
+	int nasid;
+
+	apicid = per_cpu(x86_cpu_to_apicid, cpu); /* ZZZ - cache node-local ? */
+	nasid = uv_apicid_to_nasid(apicid);
+	val =
+	    (1UL << UVH_IPI_INT_SEND_SHFT) | (apicid <<
+					      UVH_IPI_INT_APIC_ID_SHFT) |
+	    (vector << UVH_IPI_INT_VECTOR_SHFT);
+	uv_write_global_mmr64(nasid, UVH_IPI_INT, val);
+	printk(KERN_DEBUG
+	     "UV: IPI to cpu %d, apicid 0x%lx, vec %d, nasid%d, val 0x%lx\n",
+	     cpu, apicid, vector, nasid, val);
+}
+
+static void uv_send_IPI_mask(cpumask_t mask, int vector)
+{
+	unsigned int cpu;
+
+	for (cpu = 0; cpu < NR_CPUS; ++cpu)
+		if (cpu_isset(cpu, mask))
+			uv_send_IPI_one(cpu, vector);
+}
+
+static void uv_send_IPI_allbutself(int vector)
+{
+	cpumask_t mask = cpu_online_map;
+
+	cpu_clear(smp_processor_id(), mask);
+
+	if (!cpus_empty(mask))
+		uv_send_IPI_mask(mask, vector);
+}
+
+static void uv_send_IPI_all(int vector)
+{
+	uv_send_IPI_mask(cpu_online_map, vector);
+}
+
+static int uv_apic_id_registered(void)
+{
+	return 1;
+}
+
+static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
+{
+	int cpu;
+
+	/*
+	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
+	 * May as well be the first.
+	 */
+	cpu = first_cpu(cpumask);
+	if ((unsigned)cpu < NR_CPUS)
+		return per_cpu(x86_cpu_to_apicid, cpu);
+	else
+		return BAD_APICID;
+}
+
+static unsigned int phys_pkg_id(int index_msb)
+{
+	return GET_APIC_ID(read_apic_id()) >> index_msb;
+}
+
+#ifdef ZZZ		/* Needs x2apic patch */
+static void uv_send_IPI_self(int vector)
+{
+	apic_write(APIC_SELF_IPI, vector);
+}
+#endif
+
+struct genapic apic_x2apic_uv_x = {
+	.name = "UV large system",
+	.int_delivery_mode = dest_Fixed,
+	.int_dest_mode = (APIC_DEST_PHYSICAL != 0),
+	.target_cpus = uv_target_cpus,
+	.vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */
+	.apic_id_registered = uv_apic_id_registered,
+	.send_IPI_all = uv_send_IPI_all,
+	.send_IPI_allbutself = uv_send_IPI_allbutself,
+	.send_IPI_mask = uv_send_IPI_mask,
+	/* ZZZ.send_IPI_self = uv_send_IPI_self, */
+	.cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
+	.phys_pkg_id = phys_pkg_id,	/* Fixme ZZZ */
+};
+
+static __cpuinit void set_x2apic_extra_bits(int nasid)
+{
+	__get_cpu_var(x2apic_extra_bits) = ((nasid >> 1) << 6);
+}
+
+/*
+ * Called on boot cpu.
+ */
+static __init void uv_system_init(void)
+{
+	union uvh_si_addr_map_config_u m_n_config;
+	int bytes, nid, cpu, lcpu, nasid, last_nasid, blade;
+	unsigned long mmr_base;
+
+	m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG);
+	mmr_base =
+	    uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) &
+	    ~UV_MMR_ENABLE;
+	printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base);
+
+	last_nasid = -1;
+	for_each_possible_cpu(cpu) {
+		nid = cpu_to_node(cpu);
+		nasid = uv_apicid_to_nasid(per_cpu(x86_cpu_to_apicid, cpu));
+		if (nasid != last_nasid)
+			uv_possible_blades++;
+		last_nasid = nasid;
+	}
+	printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades());
+
+	bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
+	uv_blade_info = alloc_bootmem_pages(bytes);
+
+	bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes();
+	uv_node_to_blade = alloc_bootmem_pages(bytes);
+	memset(uv_node_to_blade, 255, bytes);
+
+	bytes = sizeof(uv_cpu_to_blade[0]) * num_possible_cpus();
+	uv_cpu_to_blade = alloc_bootmem_pages(bytes);
+	memset(uv_cpu_to_blade, 255, bytes);
+
+	last_nasid = -1;
+	blade = -1;
+	lcpu = -1;
+	for_each_possible_cpu(cpu) {
+		nid = cpu_to_node(cpu);
+		nasid = uv_apicid_to_nasid(per_cpu(x86_cpu_to_apicid, cpu));
+		if (nasid != last_nasid) {
+			blade++;
+			lcpu = -1;
+			uv_blade_info[blade].nr_posible_cpus = 0;
+			uv_blade_info[blade].nr_online_cpus = 0;
+		}
+		last_nasid = nasid;
+		lcpu++;
+
+		uv_cpu_hub_info(cpu)->m_val = m_n_config.s.m_skt;
+		uv_cpu_hub_info(cpu)->n_val = m_n_config.s.n_skt;
+		uv_cpu_hub_info(cpu)->numa_blade_id = blade;
+		uv_cpu_hub_info(cpu)->blade_processor_id = lcpu;
+		uv_cpu_hub_info(cpu)->local_nasid = nasid;
+		uv_cpu_hub_info(cpu)->gnode_upper =
+		    nasid & ~((1 << uv_hub_info->n_val) - 1);
+		uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
+		uv_cpu_hub_info(cpu)->coherency_domain_number = 0;/* ZZZ */
+		uv_blade_info[blade].nasid = nasid;
+		uv_blade_info[blade].nr_posible_cpus++;
+		uv_node_to_blade[nid] = blade;
+		uv_cpu_to_blade[cpu] = blade;
+
+		printk(KERN_DEBUG "UV cpu %d, apicid 0x%x, nasid %d, nid %d\n",
+		       cpu, per_cpu(x86_cpu_to_apicid, cpu), nasid, nid);
+		printk(KERN_DEBUG "UV   lcpu %d, blade %d\n", lcpu, blade);
+	}
+}
+
+/*
+ * Called on each cpu to initialize the per_cpu UV data area.
+ */
+void __cpuinit uv_cpu_init(void)
+{
+	if (!uv_node_to_blade)
+		uv_system_init();
+
+	uv_blade_info[uv_numa_blade_id()].nr_online_cpus++;
+
+	if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
+		set_x2apic_extra_bits(uv_hub_info->local_nasid);
+}
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
new file mode 100644
index 000000000000..3db059058927
--- /dev/null
+++ b/arch/x86/kernel/head32.c
@@ -0,0 +1,14 @@
+/*
+ *  linux/arch/i386/kernel/head32.c -- prepare to run common code
+ *
+ *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
+ *  Copyright (C) 2007 Eric Biederman <ebiederm@xmission.com>
+ */
+
+#include <linux/init.h>
+#include <linux/start_kernel.h>
+
+void __init i386_start_kernel(void)
+{
+	start_kernel();
+}
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index ad2440832de0..d6d54faa84df 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -49,39 +49,75 @@ static void __init copy_bootdata(char *real_mode_data)
 	}
 }
 
-#define EBDA_ADDR_POINTER 0x40E
+#define BIOS_EBDA_SEGMENT 0x40E
+#define BIOS_LOWMEM_KILOBYTES 0x413
 
-static __init void reserve_ebda(void)
+/*
+ * The BIOS places the EBDA/XBDA at the top of conventional
+ * memory, and usually decreases the reported amount of
+ * conventional memory (int 0x12) too. This also contains a
+ * workaround for Dell systems that neglect to reserve EBDA.
+ * The same workaround also avoids a problem with the AMD768MPX
+ * chipset: reserve a page before VGA to prevent PCI prefetch
+ * into it (errata #56). Usually the page is reserved anyways,
+ * unless you have no PS/2 mouse plugged in.
+ */
+static void __init reserve_ebda_region(void)
 {
-	unsigned ebda_addr, ebda_size;
+	unsigned int lowmem, ebda_addr;
+
+	/* To determine the position of the EBDA and the */
+	/* end of conventional memory, we need to look at */
+	/* the BIOS data area. In a paravirtual environment */
+	/* that area is absent. We'll just have to assume */
+	/* that the paravirt case can handle memory setup */
+	/* correctly, without our help. */
+	if (paravirt_enabled())
+		return;
 
-	/*
-	 * there is a real-mode segmented pointer pointing to the
-	 * 4K EBDA area at 0x40E
-	 */
-	ebda_addr = *(unsigned short *)__va(EBDA_ADDR_POINTER);
+	/* end of low (conventional) memory */
+	lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
+	lowmem <<= 10;
+
+	/* start of EBDA area */
+	ebda_addr = *(unsigned short *)__va(BIOS_EBDA_SEGMENT);
 	ebda_addr <<= 4;
 
-	if (!ebda_addr)
-		return;
+	/* Fixup: bios puts an EBDA in the top 64K segment */
+	/* of conventional memory, but does not adjust lowmem. */
+	if ((lowmem - ebda_addr) <= 0x10000)
+		lowmem = ebda_addr;
 
-	ebda_size = *(unsigned short *)__va(ebda_addr);
+	/* Fixup: bios does not report an EBDA at all. */
+	/* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
+	if ((ebda_addr == 0) && (lowmem >= 0x9f000))
+		lowmem = 0x9f000;
 
-	/* Round EBDA up to pages */
-	if (ebda_size == 0)
-		ebda_size = 1;
-	ebda_size <<= 10;
-	ebda_size = round_up(ebda_size + (ebda_addr & ~PAGE_MASK), PAGE_SIZE);
-	if (ebda_size > 64*1024)
-		ebda_size = 64*1024;
+	/* Paranoia: should never happen, but... */
+	if ((lowmem == 0) || (lowmem >= 0x100000))
+		lowmem = 0x9f000;
 
-	reserve_early(ebda_addr, ebda_addr + ebda_size, "EBDA");
+	/* reserve all memory between lowmem and the 1MB mark */
+	reserve_early(lowmem, 0x100000, "BIOS reserved");
 }
 
 void __init x86_64_start_kernel(char * real_mode_data)
 {
 	int i;
 
+	/*
+	 * Build-time sanity checks on the kernel image and module
+	 * area mappings. (these are purely build-time and produce no code)
+	 */
+	BUILD_BUG_ON(MODULES_VADDR < KERNEL_IMAGE_START);
+	BUILD_BUG_ON(MODULES_VADDR-KERNEL_IMAGE_START < KERNEL_IMAGE_SIZE);
+	BUILD_BUG_ON(MODULES_LEN + KERNEL_IMAGE_SIZE > 2*PUD_SIZE);
+	BUILD_BUG_ON((KERNEL_IMAGE_START & ~PMD_MASK) != 0);
+	BUILD_BUG_ON((MODULES_VADDR & ~PMD_MASK) != 0);
+	BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
+	BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
+				(__START_KERNEL & PGDIR_MASK)));
+
 	/* clear bss before set_intr_gate with early_idt_handler */
 	clear_bss();
 
@@ -91,7 +127,7 @@ void __init x86_64_start_kernel(char * real_mode_data)
 	/* Cleanup the over mapped high alias */
 	cleanup_highmap();
 
-	for (i = 0; i < IDT_ENTRIES; i++) {
+	for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) {
 #ifdef CONFIG_EARLY_PRINTK
 		set_intr_gate(i, &early_idt_handlers[i]);
 #else
@@ -118,7 +154,7 @@ void __init x86_64_start_kernel(char * real_mode_data)
 		reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
 	}
 
-	reserve_ebda();
+	reserve_ebda_region();
 
 	/*
 	 * At this point everything still needed from the boot loader
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 74d87ea85b5c..826988a6e964 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -450,7 +450,7 @@ is386:	movl $2,%ecx		# set MP
 	jmp initialize_secondary # all other CPUs call initialize_secondary
 1:
 #endif /* CONFIG_SMP */
-	jmp start_kernel
+	jmp i386_start_kernel
 
 /*
  * We depend on ET to be correct. This checks for 287/387.
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index a007454133a3..10a1955bb1d1 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -132,10 +132,6 @@ ident_complete:
 	addq	%rbp, trampoline_level4_pgt + 0(%rip)
 	addq	%rbp, trampoline_level4_pgt + (511*8)(%rip)
 #endif
-#ifdef CONFIG_ACPI_SLEEP
-	addq	%rbp, wakeup_level4_pgt + 0(%rip)
-	addq	%rbp, wakeup_level4_pgt + (511*8)(%rip)
-#endif
 
 	/* Due to ENTRY(), sometimes the empty space gets filled with
 	 * zeros. Better take a jmp than relying on empty space being
@@ -267,21 +263,16 @@ ENTRY(secondary_startup_64)
 bad_address:
 	jmp bad_address
 
+	.section ".init.text","ax"
 #ifdef CONFIG_EARLY_PRINTK
-.macro early_idt_tramp first, last
-	.ifgt \last-\first
-	early_idt_tramp \first, \last-1
-	.endif
-	movl $\last,%esi
-	jmp early_idt_handler
-.endm
-
 	.globl early_idt_handlers
 early_idt_handlers:
-	early_idt_tramp 0, 63
-	early_idt_tramp 64, 127
-	early_idt_tramp 128, 191
-	early_idt_tramp 192, 255
+	i = 0
+	.rept NUM_EXCEPTION_VECTORS
+	movl $i, %esi
+	jmp early_idt_handler
+	i = i + 1
+	.endr
 #endif
 
 ENTRY(early_idt_handler)
@@ -327,6 +318,7 @@ early_idt_msg:
 early_idt_ripmsg:
 	.asciz "RIP %s\n"
 #endif /* CONFIG_EARLY_PRINTK */
+	.previous
 
 .balign PAGE_SIZE
 
@@ -383,12 +375,12 @@ NEXT_PAGE(level2_ident_pgt)
 
 NEXT_PAGE(level2_kernel_pgt)
 	/*
-	 * 128 MB kernel mapping. We spend a full page on this pagetable
+	 * 512 MB kernel mapping. We spend a full page on this pagetable
 	 * anyway.
 	 *
 	 * The kernel code+data+bss must not be bigger than that.
 	 *
-	 * (NOTE: at +128MB starts the module area, see MODULES_VADDR.
+	 * (NOTE: at +512MB starts the module area, see MODULES_VADDR.
 	 *  If you want to increase this then increase MODULES_VADDR
 	 *  too.)
 	 */
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
index 061627806a2d..deb43785e923 100644
--- a/arch/x86/kernel/i386_ksyms_32.c
+++ b/arch/x86/kernel/i386_ksyms_32.c
@@ -1,13 +1,8 @@
 #include <linux/module.h>
-#include <asm/semaphore.h>
 #include <asm/checksum.h>
 #include <asm/desc.h>
 #include <asm/pgtable.h>
 
-EXPORT_SYMBOL(__down_failed);
-EXPORT_SYMBOL(__down_failed_interruptible);
-EXPORT_SYMBOL(__down_failed_trylock);
-EXPORT_SYMBOL(__up_wakeup);
 /* Networking helper routines. */
 EXPORT_SYMBOL(csum_partial_copy_generic);
 
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index d2e39e69aaf8..8f8102d967b3 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -5,45 +5,41 @@
  *  General FPU state handling cleanups
  *	Gareth Hughes <gareth@valinux.com>, May 2000
  */
-
-#include <linux/sched.h>
 #include <linux/module.h>
 #include <linux/regset.h>
+#include <linux/sched.h>
+
+#include <asm/sigcontext.h>
 #include <asm/processor.h>
-#include <asm/i387.h>
 #include <asm/math_emu.h>
-#include <asm/sigcontext.h>
-#include <asm/user.h>
-#include <asm/ptrace.h>
 #include <asm/uaccess.h>
+#include <asm/ptrace.h>
+#include <asm/i387.h>
+#include <asm/user.h>
 
 #ifdef CONFIG_X86_64
-
-#include <asm/sigcontext32.h>
-#include <asm/user32.h>
-
+# include <asm/sigcontext32.h>
+# include <asm/user32.h>
 #else
-
-#define	save_i387_ia32		save_i387
-#define	restore_i387_ia32	restore_i387
-
-#define _fpstate_ia32 		_fpstate
-#define user_i387_ia32_struct	user_i387_struct
-#define user32_fxsr_struct	user_fxsr_struct
-
+# define save_i387_ia32		save_i387
+# define restore_i387_ia32	restore_i387
+# define _fpstate_ia32		_fpstate
+# define user_i387_ia32_struct	user_i387_struct
+# define user32_fxsr_struct	user_fxsr_struct
 #endif
 
 #ifdef CONFIG_MATH_EMULATION
-#define HAVE_HWFP (boot_cpu_data.hard_math)
+# define HAVE_HWFP		(boot_cpu_data.hard_math)
 #else
-#define HAVE_HWFP 1
+# define HAVE_HWFP		1
 #endif
 
-static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
+static unsigned int		mxcsr_feature_mask __read_mostly = 0xffffffffu;
 
 void mxcsr_feature_mask_init(void)
 {
 	unsigned long mask = 0;
+
 	clts();
 	if (cpu_has_fxsr) {
 		memset(&current->thread.i387.fxsave, 0,
@@ -69,10 +65,11 @@ void __cpuinit fpu_init(void)
 
 	if (offsetof(struct task_struct, thread.i387.fxsave) & 15)
 		__bad_fxsave_alignment();
+
 	set_in_cr4(X86_CR4_OSFXSR);
 	set_in_cr4(X86_CR4_OSXMMEXCPT);
 
-	write_cr0(oldcr0 & ~((1UL<<3)|(1UL<<2))); /* clear TS and EM */
+	write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */
 
 	mxcsr_feature_mask_init();
 	/* clean state in init */
@@ -178,6 +175,7 @@ static inline unsigned short twd_i387_to_fxsr(unsigned short twd)
 	tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */
 	tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */
 	tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */
+
 	return tmp;
 }
 
@@ -232,8 +230,8 @@ static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave)
  * FXSR floating point environment conversions.
  */
 
-static void convert_from_fxsr(struct user_i387_ia32_struct *env,
-			      struct task_struct *tsk)
+static void
+convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
 {
 	struct i387_fxsave_struct *fxsave = &tsk->thread.i387.fxsave;
 	struct _fpreg *to = (struct _fpreg *) &env->st_space[0];
@@ -252,10 +250,11 @@ static void convert_from_fxsr(struct user_i387_ia32_struct *env,
 		 * should be actually ds/cs at fpu exception time, but
 		 * that information is not available in 64bit mode.
 		 */
-		asm("mov %%ds,%0" : "=r" (env->fos));
-		asm("mov %%cs,%0" : "=r" (env->fcs));
+		asm("mov %%ds, %[fos]" : [fos] "=r" (env->fos));
+		asm("mov %%cs, %[fcs]" : [fcs] "=r" (env->fcs));
 	} else {
 		struct pt_regs *regs = task_pt_regs(tsk);
+
 		env->fos = 0xffff0000 | tsk->thread.ds;
 		env->fcs = regs->cs;
 	}
@@ -309,9 +308,10 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
 
 	init_fpu(target);
 
-	if (!cpu_has_fxsr)
+	if (!cpu_has_fxsr) {
 		return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
 					   &target->thread.i387.fsave, 0, -1);
+	}
 
 	if (kbuf && pos == 0 && count == sizeof(env)) {
 		convert_from_fxsr(kbuf, target);
@@ -319,6 +319,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
 	}
 
 	convert_from_fxsr(&env, target);
+
 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &env, 0, -1);
 }
 
@@ -335,9 +336,10 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
 	init_fpu(target);
 	set_stopped_child_used_math(target);
 
-	if (!cpu_has_fxsr)
+	if (!cpu_has_fxsr) {
 		return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
 					  &target->thread.i387.fsave, 0, -1);
+	}
 
 	if (pos > 0 || count < sizeof(env))
 		convert_from_fxsr(&env, target);
@@ -392,28 +394,28 @@ int save_i387_ia32(struct _fpstate_ia32 __user *buf)
 {
 	if (!used_math())
 		return 0;
-
-	/* This will cause a "finit" to be triggered by the next
+	/*
+	 * This will cause a "finit" to be triggered by the next
 	 * attempted FPU operation by the 'current' process.
 	 */
 	clear_used_math();
 
-	if (HAVE_HWFP) {
-		if (cpu_has_fxsr) {
-			return save_i387_fxsave(buf);
-		} else {
-			return save_i387_fsave(buf);
-		}
-	} else {
+	if (!HAVE_HWFP) {
 		return fpregs_soft_get(current, NULL,
 				       0, sizeof(struct user_i387_ia32_struct),
 				       NULL, buf) ? -1 : 1;
 	}
+
+	if (cpu_has_fxsr)
+		return save_i387_fxsave(buf);
+	else
+		return save_i387_fsave(buf);
 }
 
 static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf)
 {
 	struct task_struct *tsk = current;
+
 	clear_fpu(tsk);
 	return __copy_from_user(&tsk->thread.i387.fsave, buf,
 				sizeof(struct i387_fsave_struct));
@@ -421,9 +423,10 @@ static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf)
 
 static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf)
 {
-	int err;
 	struct task_struct *tsk = current;
 	struct user_i387_ia32_struct env;
+	int err;
+
 	clear_fpu(tsk);
 	err = __copy_from_user(&tsk->thread.i387.fxsave, &buf->_fxsr_env[0],
 			       sizeof(struct i387_fxsave_struct));
@@ -432,6 +435,7 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf)
 	if (err || __copy_from_user(&env, buf, sizeof(env)))
 		return 1;
 	convert_to_fxsr(tsk, &env);
+
 	return 0;
 }
 
@@ -440,17 +444,17 @@ int restore_i387_ia32(struct _fpstate_ia32 __user *buf)
 	int err;
 
 	if (HAVE_HWFP) {
-		if (cpu_has_fxsr) {
+		if (cpu_has_fxsr)
 			err = restore_i387_fxsave(buf);
-		} else {
+		else
 			err = restore_i387_fsave(buf);
-		}
 	} else {
 		err = fpregs_soft_set(current, NULL,
 				      0, sizeof(struct user_i387_ia32_struct),
 				      NULL, buf) != 0;
 	}
 	set_used_math();
+
 	return err;
 }
 
@@ -463,8 +467,8 @@ int restore_i387_ia32(struct _fpstate_ia32 __user *buf)
  */
 int dump_fpu(struct pt_regs *regs, struct user_i387_struct *fpu)
 {
-	int fpvalid;
 	struct task_struct *tsk = current;
+	int fpvalid;
 
 	fpvalid = !!used_math();
 	if (fpvalid)
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 4ca548632c8d..2e2f42074e18 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -71,6 +71,16 @@ int sis_apic_bug = -1;
  */
 int nr_ioapic_registers[MAX_IO_APICS];
 
+/* I/O APIC entries */
+struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
+int nr_ioapics;
+
+/* MP IRQ source entries */
+struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+
+/* # of MP IRQ source entries */
+int mp_irq_entries;
+
 static int disable_timer_pin_1 __initdata;
 
 /*
@@ -810,10 +820,7 @@ static int __init find_isa_irq_pin(int irq, int type)
 	for (i = 0; i < mp_irq_entries; i++) {
 		int lbus = mp_irqs[i].mpc_srcbus;
 
-		if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
-		     mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
-		     mp_bus_id_to_type[lbus] == MP_BUS_MCA
-		    ) &&
+		if (test_bit(lbus, mp_bus_not_pci) &&
 		    (mp_irqs[i].mpc_irqtype == type) &&
 		    (mp_irqs[i].mpc_srcbusirq == irq))
 
@@ -829,10 +836,7 @@ static int __init find_isa_irq_apic(int irq, int type)
 	for (i = 0; i < mp_irq_entries; i++) {
 		int lbus = mp_irqs[i].mpc_srcbus;
 
-		if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
-		     mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
-		     mp_bus_id_to_type[lbus] == MP_BUS_MCA
-		    ) &&
+		if (test_bit(lbus, mp_bus_not_pci) &&
 		    (mp_irqs[i].mpc_irqtype == type) &&
 		    (mp_irqs[i].mpc_srcbusirq == irq))
 			break;
@@ -872,7 +876,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 			    mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
 				break;
 
-		if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) &&
+		if (!test_bit(lbus, mp_bus_not_pci) &&
 		    !mp_irqs[i].mpc_irqtype &&
 		    (bus == lbus) &&
 		    (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
@@ -921,6 +925,7 @@ void __init setup_ioapic_dest(void)
 }
 #endif
 
+#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
 /*
  * EISA Edge/Level control register, ELCR
  */
@@ -934,6 +939,13 @@ static int EISA_ELCR(unsigned int irq)
 			"Broken MPtable reports ISA irq %d\n", irq);
 	return 0;
 }
+#endif
+
+/* ISA interrupts are always polarity zero edge triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_ISA_trigger(idx)	(0)
+#define default_ISA_polarity(idx)	(0)
 
 /* EISA interrupts are always polarity zero and can be edge or level
  * trigger depending on the ELCR value.  If an interrupt is listed as
@@ -941,13 +953,7 @@ static int EISA_ELCR(unsigned int irq)
  * be read in from the ELCR */
 
 #define default_EISA_trigger(idx)	(EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
-#define default_EISA_polarity(idx)	(0)
-
-/* ISA interrupts are always polarity zero edge triggered,
- * when listed as conforming in the MP table. */
-
-#define default_ISA_trigger(idx)	(0)
-#define default_ISA_polarity(idx)	(0)
+#define default_EISA_polarity(idx)	default_ISA_polarity(idx)
 
 /* PCI interrupts are always polarity one level triggered,
  * when listed as conforming in the MP table. */
@@ -959,7 +965,7 @@ static int EISA_ELCR(unsigned int irq)
  * when listed as conforming in the MP table. */
 
 #define default_MCA_trigger(idx)	(1)
-#define default_MCA_polarity(idx)	(0)
+#define default_MCA_polarity(idx)	default_ISA_polarity(idx)
 
 static int MPBIOS_polarity(int idx)
 {
@@ -973,35 +979,9 @@ static int MPBIOS_polarity(int idx)
 	{
 		case 0: /* conforms, ie. bus-type dependent polarity */
 		{
-			switch (mp_bus_id_to_type[bus])
-			{
-				case MP_BUS_ISA: /* ISA pin */
-				{
-					polarity = default_ISA_polarity(idx);
-					break;
-				}
-				case MP_BUS_EISA: /* EISA pin */
-				{
-					polarity = default_EISA_polarity(idx);
-					break;
-				}
-				case MP_BUS_PCI: /* PCI pin */
-				{
-					polarity = default_PCI_polarity(idx);
-					break;
-				}
-				case MP_BUS_MCA: /* MCA pin */
-				{
-					polarity = default_MCA_polarity(idx);
-					break;
-				}
-				default:
-				{
-					printk(KERN_WARNING "broken BIOS!!\n");
-					polarity = 1;
-					break;
-				}
-			}
+			polarity = test_bit(bus, mp_bus_not_pci)?
+				default_ISA_polarity(idx):
+				default_PCI_polarity(idx);
 			break;
 		}
 		case 1: /* high active */
@@ -1042,11 +1022,15 @@ static int MPBIOS_trigger(int idx)
 	{
 		case 0: /* conforms, ie. bus-type dependent */
 		{
+			trigger = test_bit(bus, mp_bus_not_pci)?
+					default_ISA_trigger(idx):
+					default_PCI_trigger(idx);
+#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
 			switch (mp_bus_id_to_type[bus])
 			{
 				case MP_BUS_ISA: /* ISA pin */
 				{
-					trigger = default_ISA_trigger(idx);
+					/* set before the switch */
 					break;
 				}
 				case MP_BUS_EISA: /* EISA pin */
@@ -1056,7 +1040,7 @@ static int MPBIOS_trigger(int idx)
 				}
 				case MP_BUS_PCI: /* PCI pin */
 				{
-					trigger = default_PCI_trigger(idx);
+					/* set before the switch */
 					break;
 				}
 				case MP_BUS_MCA: /* MCA pin */
@@ -1071,6 +1055,7 @@ static int MPBIOS_trigger(int idx)
 					break;
 				}
 			}
+#endif
 			break;
 		}
 		case 1: /* edge */
@@ -1120,39 +1105,22 @@ static int pin_2_irq(int idx, int apic, int pin)
 	if (mp_irqs[idx].mpc_dstirq != pin)
 		printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
 
-	switch (mp_bus_id_to_type[bus])
-	{
-		case MP_BUS_ISA: /* ISA pin */
-		case MP_BUS_EISA:
-		case MP_BUS_MCA:
-		{
-			irq = mp_irqs[idx].mpc_srcbusirq;
-			break;
-		}
-		case MP_BUS_PCI: /* PCI pin */
-		{
-			/*
-			 * PCI IRQs are mapped in order
-			 */
-			i = irq = 0;
-			while (i < apic)
-				irq += nr_ioapic_registers[i++];
-			irq += pin;
-
-			/*
-			 * For MPS mode, so far only needed by ES7000 platform
-			 */
-			if (ioapic_renumber_irq)
-				irq = ioapic_renumber_irq(apic, irq);
+	if (test_bit(bus, mp_bus_not_pci))
+		irq = mp_irqs[idx].mpc_srcbusirq;
+	else {
+		/*
+		 * PCI IRQs are mapped in order
+		 */
+		i = irq = 0;
+		while (i < apic)
+			irq += nr_ioapic_registers[i++];
+		irq += pin;
 
-			break;
-		}
-		default:
-		{
-			printk(KERN_ERR "unknown bus type %d.\n",bus); 
-			irq = 0;
-			break;
-		}
+		/*
+		 * For MPS mode, so far only needed by ES7000 platform
+		 */
+		if (ioapic_renumber_irq)
+			irq = ioapic_renumber_irq(apic, irq);
 	}
 
 	/*
@@ -1260,7 +1228,6 @@ static void __init setup_IO_APIC_irqs(void)
 {
 	struct IO_APIC_route_entry entry;
 	int apic, pin, idx, irq, first_notcon = 1, vector;
-	unsigned long flags;
 
 	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
 
@@ -1326,9 +1293,7 @@ static void __init setup_IO_APIC_irqs(void)
 			if (!apic && (irq < 16))
 				disable_8259A_irq(irq);
 		}
-		spin_lock_irqsave(&ioapic_lock, flags);
-		__ioapic_write_entry(apic, pin, entry);
-		spin_unlock_irqrestore(&ioapic_lock, flags);
+		ioapic_write_entry(apic, pin, entry);
 	}
 	}
 
@@ -1524,8 +1489,8 @@ void /*__init*/ print_local_APIC(void * dummy)
 
 	printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
 		smp_processor_id(), hard_smp_processor_id());
-	v = apic_read(APIC_ID);
-	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, GET_APIC_ID(v));
+	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v,
+			GET_APIC_ID(read_apic_id()));
 	v = apic_read(APIC_LVR);
 	printk(KERN_INFO "... APIC VERSION: %08x\n", v);
 	ver = GET_APIC_VERSION(v);
@@ -1734,7 +1699,7 @@ void disable_IO_APIC(void)
 		entry.delivery_mode   = dest_ExtINT; /* ExtInt */
 		entry.vector          = 0;
 		entry.dest.physical.physical_dest =
-					GET_APIC_ID(apic_read(APIC_ID));
+					GET_APIC_ID(read_apic_id());
 
 		/*
 		 * Add it to the IO-APIC irq-routing table:
@@ -2031,8 +1996,7 @@ static inline void init_IO_APIC_traps(void)
 	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 	 */
 	for (irq = 0; irq < NR_IRQS ; irq++) {
-		int tmp = irq;
-		if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) {
+		if (IO_APIC_IRQ(irq) && !irq_vector[irq]) {
 			/*
 			 * Hmm.. We don't have an entry for this,
 			 * so default to an old-fashioned 8259
@@ -2156,8 +2120,6 @@ static inline void unlock_ExtINT_logic(void)
 	ioapic_write_entry(apic, pin, entry0);
 }
 
-int timer_uses_ioapic_pin_0;
-
 /*
  * This code may look a bit paranoid, but it's supposed to cooperate with
  * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
@@ -2168,10 +2130,14 @@ static inline void __init check_timer(void)
 {
 	int apic1, pin1, apic2, pin2;
 	int vector;
+	unsigned int ver;
 	unsigned long flags;
 
 	local_irq_save(flags);
 
+	ver = apic_read(APIC_LVR);
+	ver = GET_APIC_VERSION(ver);
+
 	/*
 	 * get/set the timer IRQ vector:
 	 */
@@ -2184,11 +2150,15 @@ static inline void __init check_timer(void)
 	 * mode for the 8259A whenever interrupts are routed
 	 * through I/O APICs.  Also IRQ0 has to be enabled in
 	 * the 8259A which implies the virtual wire has to be
-	 * disabled in the local APIC.
+	 * disabled in the local APIC.  Finally timer interrupts
+	 * need to be acknowledged manually in the 8259A for
+	 * timer_interrupt() and for the i82489DX when using
+	 * the NMI watchdog.
 	 */
 	apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
 	init_8259A(1);
-	timer_ack = 1;
+	timer_ack = !cpu_has_tsc;
+	timer_ack |= (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
 	if (timer_over_8254 > 0)
 		enable_8259A_irq(0);
 
@@ -2197,9 +2167,6 @@ static inline void __init check_timer(void)
 	pin2  = ioapic_i8259.pin;
 	apic2 = ioapic_i8259.apic;
 
-	if (pin1 == 0)
-		timer_uses_ioapic_pin_0 = 1;
-
 	printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
 		vector, apic1, pin1, apic2, pin2);
 
@@ -2789,7 +2756,6 @@ int __init io_apic_get_redir_entries (int ioapic)
 int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low)
 {
 	struct IO_APIC_route_entry entry;
-	unsigned long flags;
 
 	if (!IO_APIC_IRQ(irq)) {
 		printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
@@ -2830,9 +2796,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a
 	if (!ioapic && (irq < 16))
 		disable_8259A_irq(irq);
 
-	spin_lock_irqsave(&ioapic_lock, flags);
-	__ioapic_write_entry(ioapic, pin, entry);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
+	ioapic_write_entry(ioapic, pin, entry);
 
 	return 0;
 }
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 1627c0d53e0b..b54464b26658 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -43,13 +43,15 @@
 #include <asm/smp.h>
 #include <asm/desc.h>
 #include <asm/proto.h>
-#include <asm/mach_apic.h>
 #include <asm/acpi.h>
 #include <asm/dma.h>
 #include <asm/nmi.h>
 #include <asm/msidef.h>
 #include <asm/hypertransport.h>
 
+#include <mach_ipi.h>
+#include <mach_apic.h>
+
 struct irq_cfg {
 	cpumask_t domain;
 	cpumask_t old_domain;
@@ -101,6 +103,16 @@ DEFINE_SPINLOCK(vector_lock);
  */
 int nr_ioapic_registers[MAX_IO_APICS];
 
+/* I/O APIC entries */
+struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
+int nr_ioapics;
+
+/* MP IRQ source entries */
+struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+
+/* # of MP IRQ source entries */
+int mp_irq_entries;
+
 /*
  * Rough estimation of how many shared IRQs there are, can
  * be changed anytime.
@@ -155,11 +167,10 @@ static inline void io_apic_modify(unsigned int apic, unsigned int value)
 	writel(value, &io_apic->data);
 }
 
-static int io_apic_level_ack_pending(unsigned int irq)
+static bool io_apic_level_ack_pending(unsigned int irq)
 {
 	struct irq_pin_list *entry;
 	unsigned long flags;
-	int pending = 0;
 
 	spin_lock_irqsave(&ioapic_lock, flags);
 	entry = irq_2_pin + irq;
@@ -172,13 +183,17 @@ static int io_apic_level_ack_pending(unsigned int irq)
 			break;
 		reg = io_apic_read(entry->apic, 0x10 + pin*2);
 		/* Is the remote IRR bit set? */
-		pending |= (reg >> 14) & 1;
+		if ((reg >> 14) & 1) {
+			spin_unlock_irqrestore(&ioapic_lock, flags);
+			return true;
+		}
 		if (!entry->next)
 			break;
 		entry = irq_2_pin + entry->next;
 	}
 	spin_unlock_irqrestore(&ioapic_lock, flags);
-	return pending;
+
+	return false;
 }
 
 /*
@@ -902,9 +917,8 @@ static void __init setup_IO_APIC_irqs(void)
 static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector)
 {
 	struct IO_APIC_route_entry entry;
-	unsigned long flags;
 
-	memset(&entry,0,sizeof(entry));
+	memset(&entry, 0, sizeof(entry));
 
 	disable_8259A_irq(0);
 
@@ -932,10 +946,7 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in
 	/*
 	 * Add it to the IO-APIC irq-routing table:
 	 */
-	spin_lock_irqsave(&ioapic_lock, flags);
-	io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
-	io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
-	spin_unlock_irqrestore(&ioapic_lock, flags);
+	ioapic_write_entry(apic, pin, entry);
 
 	enable_8259A_irq(0);
 }
@@ -1066,8 +1077,7 @@ void __apicdebuginit print_local_APIC(void * dummy)
 
 	printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
 		smp_processor_id(), hard_smp_processor_id());
-	v = apic_read(APIC_ID);
-	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, GET_APIC_ID(v));
+	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, GET_APIC_ID(read_apic_id()));
 	v = apic_read(APIC_LVR);
 	printk(KERN_INFO "... APIC VERSION: %08x\n", v);
 	ver = GET_APIC_VERSION(v);
@@ -1261,7 +1271,7 @@ void disable_IO_APIC(void)
 		entry.dest_mode       = 0; /* Physical */
 		entry.delivery_mode   = dest_ExtINT; /* ExtInt */
 		entry.vector          = 0;
-		entry.dest          = GET_APIC_ID(apic_read(APIC_ID));
+		entry.dest          = GET_APIC_ID(read_apic_id());
 
 		/*
 		 * Add it to the IO-APIC irq-routing table:
@@ -1352,9 +1362,7 @@ static int ioapic_retrigger_irq(unsigned int irq)
 	unsigned long flags;
 
 	spin_lock_irqsave(&vector_lock, flags);
-	cpus_clear(mask);
-	cpu_set(first_cpu(cfg->domain), mask);
-
+	mask = cpumask_of_cpu(first_cpu(cfg->domain));
 	send_IPI_mask(mask, cfg->vector);
 	spin_unlock_irqrestore(&vector_lock, flags);
 
@@ -1517,8 +1525,7 @@ static inline void init_IO_APIC_traps(void)
 	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 	 */
 	for (irq = 0; irq < NR_IRQS ; irq++) {
-		int tmp = irq;
-		if (IO_APIC_IRQ(tmp) && !irq_cfg[tmp].vector) {
+		if (IO_APIC_IRQ(irq) && !irq_cfg[irq].vector) {
 			/*
 			 * Hmm.. We don't have an entry for this,
 			 * so default to an old-fashioned 8259
@@ -1597,17 +1604,14 @@ static inline void unlock_ExtINT_logic(void)
 	int apic, pin, i;
 	struct IO_APIC_route_entry entry0, entry1;
 	unsigned char save_control, save_freq_select;
-	unsigned long flags;
 
 	pin  = find_isa_irq_pin(8, mp_INT);
 	apic = find_isa_irq_apic(8, mp_INT);
 	if (pin == -1)
 		return;
 
-	spin_lock_irqsave(&ioapic_lock, flags);
-	*(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
-	*(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
-	spin_unlock_irqrestore(&ioapic_lock, flags);
+	entry0 = ioapic_read_entry(apic, pin);
+
 	clear_IO_APIC_pin(apic, pin);
 
 	memset(&entry1, 0, sizeof(entry1));
@@ -1620,10 +1624,7 @@ static inline void unlock_ExtINT_logic(void)
 	entry1.trigger = 0;
 	entry1.vector = 0;
 
-	spin_lock_irqsave(&ioapic_lock, flags);
-	io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
-	io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
-	spin_unlock_irqrestore(&ioapic_lock, flags);
+	ioapic_write_entry(apic, pin, entry1);
 
 	save_control = CMOS_READ(RTC_CONTROL);
 	save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
@@ -1642,10 +1643,7 @@ static inline void unlock_ExtINT_logic(void)
 	CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
 	clear_IO_APIC_pin(apic, pin);
 
-	spin_lock_irqsave(&ioapic_lock, flags);
-	io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
-	io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
-	spin_unlock_irqrestore(&ioapic_lock, flags);
+	ioapic_write_entry(apic, pin, entry0);
 }
 
 /*
@@ -2314,7 +2312,6 @@ static struct resource * __init ioapic_setup_resources(void)
 	res = (void *)mem;
 
 	if (mem != NULL) {
-		memset(mem, 0, n);
 		mem += sizeof(struct resource) * nr_ioapics;
 
 		for (i = 0; i < nr_ioapics; i++) {
diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c
new file mode 100644
index 000000000000..c0df7b89ca23
--- /dev/null
+++ b/arch/x86/kernel/ipi.c
@@ -0,0 +1,178 @@
+#include <linux/cpumask.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+
+#include <linux/mm.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/kernel_stat.h>
+#include <linux/mc146818rtc.h>
+#include <linux/cache.h>
+#include <linux/interrupt.h>
+#include <linux/cpu.h>
+#include <linux/module.h>
+
+#include <asm/smp.h>
+#include <asm/mtrr.h>
+#include <asm/tlbflush.h>
+#include <asm/mmu_context.h>
+#include <asm/apic.h>
+#include <asm/proto.h>
+
+#ifdef CONFIG_X86_32
+#include <mach_apic.h>
+/*
+ * the following functions deal with sending IPIs between CPUs.
+ *
+ * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
+ */
+
+static inline int __prepare_ICR(unsigned int shortcut, int vector)
+{
+	unsigned int icr = shortcut | APIC_DEST_LOGICAL;
+
+	switch (vector) {
+	default:
+		icr |= APIC_DM_FIXED | vector;
+		break;
+	case NMI_VECTOR:
+		icr |= APIC_DM_NMI;
+		break;
+	}
+	return icr;
+}
+
+static inline int __prepare_ICR2(unsigned int mask)
+{
+	return SET_APIC_DEST_FIELD(mask);
+}
+
+void __send_IPI_shortcut(unsigned int shortcut, int vector)
+{
+	/*
+	 * Subtle. In the case of the 'never do double writes' workaround
+	 * we have to lock out interrupts to be safe.  As we don't care
+	 * of the value read we use an atomic rmw access to avoid costly
+	 * cli/sti.  Otherwise we use an even cheaper single atomic write
+	 * to the APIC.
+	 */
+	unsigned int cfg;
+
+	/*
+	 * Wait for idle.
+	 */
+	apic_wait_icr_idle();
+
+	/*
+	 * No need to touch the target chip field
+	 */
+	cfg = __prepare_ICR(shortcut, vector);
+
+	/*
+	 * Send the IPI. The write to APIC_ICR fires this off.
+	 */
+	apic_write_around(APIC_ICR, cfg);
+}
+
+void send_IPI_self(int vector)
+{
+	__send_IPI_shortcut(APIC_DEST_SELF, vector);
+}
+
+/*
+ * This is used to send an IPI with no shorthand notation (the destination is
+ * specified in bits 56 to 63 of the ICR).
+ */
+static inline void __send_IPI_dest_field(unsigned long mask, int vector)
+{
+	unsigned long cfg;
+
+	/*
+	 * Wait for idle.
+	 */
+	if (unlikely(vector == NMI_VECTOR))
+		safe_apic_wait_icr_idle();
+	else
+		apic_wait_icr_idle();
+
+	/*
+	 * prepare target chip field
+	 */
+	cfg = __prepare_ICR2(mask);
+	apic_write_around(APIC_ICR2, cfg);
+
+	/*
+	 * program the ICR
+	 */
+	cfg = __prepare_ICR(0, vector);
+
+	/*
+	 * Send the IPI. The write to APIC_ICR fires this off.
+	 */
+	apic_write_around(APIC_ICR, cfg);
+}
+
+/*
+ * This is only used on smaller machines.
+ */
+void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
+{
+	unsigned long mask = cpus_addr(cpumask)[0];
+	unsigned long flags;
+
+	local_irq_save(flags);
+	WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
+	__send_IPI_dest_field(mask, vector);
+	local_irq_restore(flags);
+}
+
+void send_IPI_mask_sequence(cpumask_t mask, int vector)
+{
+	unsigned long flags;
+	unsigned int query_cpu;
+
+	/*
+	 * Hack. The clustered APIC addressing mode doesn't allow us to send
+	 * to an arbitrary mask, so I do a unicasts to each CPU instead. This
+	 * should be modified to do 1 message per cluster ID - mbligh
+	 */
+
+	local_irq_save(flags);
+	for_each_possible_cpu(query_cpu) {
+		if (cpu_isset(query_cpu, mask)) {
+			__send_IPI_dest_field(cpu_to_logical_apicid(query_cpu),
+					      vector);
+		}
+	}
+	local_irq_restore(flags);
+}
+
+/* must come after the send_IPI functions above for inlining */
+#include <mach_ipi.h>
+static int convert_apicid_to_cpu(int apic_id)
+{
+	int i;
+
+	for_each_possible_cpu(i) {
+		if (per_cpu(x86_cpu_to_apicid, i) == apic_id)
+			return i;
+	}
+	return -1;
+}
+
+int safe_smp_processor_id(void)
+{
+	int apicid, cpuid;
+
+	if (!boot_cpu_has(X86_FEATURE_APIC))
+		return 0;
+
+	apicid = hard_smp_processor_id();
+	if (apicid == BAD_APICID)
+		return 0;
+
+	cpuid = convert_apicid_to_cpu(apicid);
+
+	return cpuid >= 0 ? cpuid : 0;
+}
+#endif
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index cef054b09d27..6ea67b76a214 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -79,7 +79,7 @@ unsigned int do_IRQ(struct pt_regs *regs)
 
 	if (unlikely((unsigned)irq >= NR_IRQS)) {
 		printk(KERN_EMERG "%s: cannot handle IRQ %d\n",
-					__FUNCTION__, irq);
+					__func__, irq);
 		BUG();
 	}
 
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
new file mode 100644
index 000000000000..24362ecf5f9a
--- /dev/null
+++ b/arch/x86/kernel/kgdb.c
@@ -0,0 +1,571 @@
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ */
+
+/*
+ * Copyright (C) 2004 Amit S. Kale <amitkale@linsyssoft.com>
+ * Copyright (C) 2000-2001 VERITAS Software Corporation.
+ * Copyright (C) 2002 Andi Kleen, SuSE Labs
+ * Copyright (C) 2004 LinSysSoft Technologies Pvt. Ltd.
+ * Copyright (C) 2007 MontaVista Software, Inc.
+ * Copyright (C) 2007-2008 Jason Wessel, Wind River Systems, Inc.
+ */
+/****************************************************************************
+ *  Contributor:     Lake Stevens Instrument Division$
+ *  Written by:      Glenn Engel $
+ *  Updated by:	     Amit Kale<akale@veritas.com>
+ *  Updated by:	     Tom Rini <trini@kernel.crashing.org>
+ *  Updated by:	     Jason Wessel <jason.wessel@windriver.com>
+ *  Modified for 386 by Jim Kingdon, Cygnus Support.
+ *  Origianl kgdb, compatibility with 2.1.xx kernel by
+ *  David Grothe <dave@gcom.com>
+ *  Integrated into 2.2.5 kernel by Tigran Aivazian <tigran@sco.com>
+ *  X86_64 changes from Andi Kleen's patch merged by Jim Houston
+ */
+#include <linux/spinlock.h>
+#include <linux/kdebug.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ptrace.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/kgdb.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/nmi.h>
+
+#include <asm/apicdef.h>
+#include <asm/system.h>
+
+#ifdef CONFIG_X86_32
+# include <mach_ipi.h>
+#else
+# include <asm/mach_apic.h>
+#endif
+
+/*
+ * Put the error code here just in case the user cares:
+ */
+static int gdb_x86errcode;
+
+/*
+ * Likewise, the vector number here (since GDB only gets the signal
+ * number through the usual means, and that's not very specific):
+ */
+static int gdb_x86vector = -1;
+
+/**
+ *	pt_regs_to_gdb_regs - Convert ptrace regs to GDB regs
+ *	@gdb_regs: A pointer to hold the registers in the order GDB wants.
+ *	@regs: The &struct pt_regs of the current process.
+ *
+ *	Convert the pt_regs in @regs into the format for registers that
+ *	GDB expects, stored in @gdb_regs.
+ */
+void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
+{
+	gdb_regs[GDB_AX]	= regs->ax;
+	gdb_regs[GDB_BX]	= regs->bx;
+	gdb_regs[GDB_CX]	= regs->cx;
+	gdb_regs[GDB_DX]	= regs->dx;
+	gdb_regs[GDB_SI]	= regs->si;
+	gdb_regs[GDB_DI]	= regs->di;
+	gdb_regs[GDB_BP]	= regs->bp;
+	gdb_regs[GDB_PS]	= regs->flags;
+	gdb_regs[GDB_PC]	= regs->ip;
+#ifdef CONFIG_X86_32
+	gdb_regs[GDB_DS]	= regs->ds;
+	gdb_regs[GDB_ES]	= regs->es;
+	gdb_regs[GDB_CS]	= regs->cs;
+	gdb_regs[GDB_SS]	= __KERNEL_DS;
+	gdb_regs[GDB_FS]	= 0xFFFF;
+	gdb_regs[GDB_GS]	= 0xFFFF;
+#else
+	gdb_regs[GDB_R8]	= regs->r8;
+	gdb_regs[GDB_R9]	= regs->r9;
+	gdb_regs[GDB_R10]	= regs->r10;
+	gdb_regs[GDB_R11]	= regs->r11;
+	gdb_regs[GDB_R12]	= regs->r12;
+	gdb_regs[GDB_R13]	= regs->r13;
+	gdb_regs[GDB_R14]	= regs->r14;
+	gdb_regs[GDB_R15]	= regs->r15;
+#endif
+	gdb_regs[GDB_SP]	= regs->sp;
+}
+
+/**
+ *	sleeping_thread_to_gdb_regs - Convert ptrace regs to GDB regs
+ *	@gdb_regs: A pointer to hold the registers in the order GDB wants.
+ *	@p: The &struct task_struct of the desired process.
+ *
+ *	Convert the register values of the sleeping process in @p to
+ *	the format that GDB expects.
+ *	This function is called when kgdb does not have access to the
+ *	&struct pt_regs and therefore it should fill the gdb registers
+ *	@gdb_regs with what has	been saved in &struct thread_struct
+ *	thread field during switch_to.
+ */
+void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
+{
+	gdb_regs[GDB_AX]	= 0;
+	gdb_regs[GDB_BX]	= 0;
+	gdb_regs[GDB_CX]	= 0;
+	gdb_regs[GDB_DX]	= 0;
+	gdb_regs[GDB_SI]	= 0;
+	gdb_regs[GDB_DI]	= 0;
+	gdb_regs[GDB_BP]	= *(unsigned long *)p->thread.sp;
+#ifdef CONFIG_X86_32
+	gdb_regs[GDB_DS]	= __KERNEL_DS;
+	gdb_regs[GDB_ES]	= __KERNEL_DS;
+	gdb_regs[GDB_PS]	= 0;
+	gdb_regs[GDB_CS]	= __KERNEL_CS;
+	gdb_regs[GDB_PC]	= p->thread.ip;
+	gdb_regs[GDB_SS]	= __KERNEL_DS;
+	gdb_regs[GDB_FS]	= 0xFFFF;
+	gdb_regs[GDB_GS]	= 0xFFFF;
+#else
+	gdb_regs[GDB_PS]	= *(unsigned long *)(p->thread.sp + 8);
+	gdb_regs[GDB_PC]	= 0;
+	gdb_regs[GDB_R8]	= 0;
+	gdb_regs[GDB_R9]	= 0;
+	gdb_regs[GDB_R10]	= 0;
+	gdb_regs[GDB_R11]	= 0;
+	gdb_regs[GDB_R12]	= 0;
+	gdb_regs[GDB_R13]	= 0;
+	gdb_regs[GDB_R14]	= 0;
+	gdb_regs[GDB_R15]	= 0;
+#endif
+	gdb_regs[GDB_SP]	= p->thread.sp;
+}
+
+/**
+ *	gdb_regs_to_pt_regs - Convert GDB regs to ptrace regs.
+ *	@gdb_regs: A pointer to hold the registers we've received from GDB.
+ *	@regs: A pointer to a &struct pt_regs to hold these values in.
+ *
+ *	Convert the GDB regs in @gdb_regs into the pt_regs, and store them
+ *	in @regs.
+ */
+void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs)
+{
+	regs->ax		= gdb_regs[GDB_AX];
+	regs->bx		= gdb_regs[GDB_BX];
+	regs->cx		= gdb_regs[GDB_CX];
+	regs->dx		= gdb_regs[GDB_DX];
+	regs->si		= gdb_regs[GDB_SI];
+	regs->di		= gdb_regs[GDB_DI];
+	regs->bp		= gdb_regs[GDB_BP];
+	regs->flags		= gdb_regs[GDB_PS];
+	regs->ip		= gdb_regs[GDB_PC];
+#ifdef CONFIG_X86_32
+	regs->ds		= gdb_regs[GDB_DS];
+	regs->es		= gdb_regs[GDB_ES];
+	regs->cs		= gdb_regs[GDB_CS];
+#else
+	regs->r8		= gdb_regs[GDB_R8];
+	regs->r9		= gdb_regs[GDB_R9];
+	regs->r10		= gdb_regs[GDB_R10];
+	regs->r11		= gdb_regs[GDB_R11];
+	regs->r12		= gdb_regs[GDB_R12];
+	regs->r13		= gdb_regs[GDB_R13];
+	regs->r14		= gdb_regs[GDB_R14];
+	regs->r15		= gdb_regs[GDB_R15];
+#endif
+}
+
+static struct hw_breakpoint {
+	unsigned		enabled;
+	unsigned		type;
+	unsigned		len;
+	unsigned long		addr;
+} breakinfo[4];
+
+static void kgdb_correct_hw_break(void)
+{
+	unsigned long dr7;
+	int correctit = 0;
+	int breakbit;
+	int breakno;
+
+	get_debugreg(dr7, 7);
+	for (breakno = 0; breakno < 4; breakno++) {
+		breakbit = 2 << (breakno << 1);
+		if (!(dr7 & breakbit) && breakinfo[breakno].enabled) {
+			correctit = 1;
+			dr7 |= breakbit;
+			dr7 &= ~(0xf0000 << (breakno << 2));
+			dr7 |= ((breakinfo[breakno].len << 2) |
+				 breakinfo[breakno].type) <<
+			       ((breakno << 2) + 16);
+			if (breakno >= 0 && breakno <= 3)
+				set_debugreg(breakinfo[breakno].addr, breakno);
+
+		} else {
+			if ((dr7 & breakbit) && !breakinfo[breakno].enabled) {
+				correctit = 1;
+				dr7 &= ~breakbit;
+				dr7 &= ~(0xf0000 << (breakno << 2));
+			}
+		}
+	}
+	if (correctit)
+		set_debugreg(dr7, 7);
+}
+
+static int
+kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
+{
+	int i;
+
+	for (i = 0; i < 4; i++)
+		if (breakinfo[i].addr == addr && breakinfo[i].enabled)
+			break;
+	if (i == 4)
+		return -1;
+
+	breakinfo[i].enabled = 0;
+
+	return 0;
+}
+
+static void kgdb_remove_all_hw_break(void)
+{
+	int i;
+
+	for (i = 0; i < 4; i++)
+		memset(&breakinfo[i], 0, sizeof(struct hw_breakpoint));
+}
+
+static int
+kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
+{
+	unsigned type;
+	int i;
+
+	for (i = 0; i < 4; i++)
+		if (!breakinfo[i].enabled)
+			break;
+	if (i == 4)
+		return -1;
+
+	switch (bptype) {
+	case BP_HARDWARE_BREAKPOINT:
+		type = 0;
+		len  = 1;
+		break;
+	case BP_WRITE_WATCHPOINT:
+		type = 1;
+		break;
+	case BP_ACCESS_WATCHPOINT:
+		type = 3;
+		break;
+	default:
+		return -1;
+	}
+
+	if (len == 1 || len == 2 || len == 4)
+		breakinfo[i].len  = len - 1;
+	else
+		return -1;
+
+	breakinfo[i].enabled = 1;
+	breakinfo[i].addr = addr;
+	breakinfo[i].type = type;
+
+	return 0;
+}
+
+/**
+ *	kgdb_disable_hw_debug - Disable hardware debugging while we in kgdb.
+ *	@regs: Current &struct pt_regs.
+ *
+ *	This function will be called if the particular architecture must
+ *	disable hardware debugging while it is processing gdb packets or
+ *	handling exception.
+ */
+void kgdb_disable_hw_debug(struct pt_regs *regs)
+{
+	/* Disable hardware debugging while we are in kgdb: */
+	set_debugreg(0UL, 7);
+}
+
+/**
+ *	kgdb_post_primary_code - Save error vector/code numbers.
+ *	@regs: Original pt_regs.
+ *	@e_vector: Original error vector.
+ *	@err_code: Original error code.
+ *
+ *	This is needed on architectures which support SMP and KGDB.
+ *	This function is called after all the slave cpus have been put
+ *	to a know spin state and the primary CPU has control over KGDB.
+ */
+void kgdb_post_primary_code(struct pt_regs *regs, int e_vector, int err_code)
+{
+	/* primary processor is completely in the debugger */
+	gdb_x86vector = e_vector;
+	gdb_x86errcode = err_code;
+}
+
+#ifdef CONFIG_SMP
+/**
+ *	kgdb_roundup_cpus - Get other CPUs into a holding pattern
+ *	@flags: Current IRQ state
+ *
+ *	On SMP systems, we need to get the attention of the other CPUs
+ *	and get them be in a known state.  This should do what is needed
+ *	to get the other CPUs to call kgdb_wait(). Note that on some arches,
+ *	the NMI approach is not used for rounding up all the CPUs. For example,
+ *	in case of MIPS, smp_call_function() is used to roundup CPUs. In
+ *	this case, we have to make sure that interrupts are enabled before
+ *	calling smp_call_function(). The argument to this function is
+ *	the flags that will be used when restoring the interrupts. There is
+ *	local_irq_save() call before kgdb_roundup_cpus().
+ *
+ *	On non-SMP systems, this is not called.
+ */
+void kgdb_roundup_cpus(unsigned long flags)
+{
+	send_IPI_allbutself(APIC_DM_NMI);
+}
+#endif
+
+/**
+ *	kgdb_arch_handle_exception - Handle architecture specific GDB packets.
+ *	@vector: The error vector of the exception that happened.
+ *	@signo: The signal number of the exception that happened.
+ *	@err_code: The error code of the exception that happened.
+ *	@remcom_in_buffer: The buffer of the packet we have read.
+ *	@remcom_out_buffer: The buffer of %BUFMAX bytes to write a packet into.
+ *	@regs: The &struct pt_regs of the current process.
+ *
+ *	This function MUST handle the 'c' and 's' command packets,
+ *	as well packets to set / remove a hardware breakpoint, if used.
+ *	If there are additional packets which the hardware needs to handle,
+ *	they are handled here.  The code should return -1 if it wants to
+ *	process more packets, and a %0 or %1 if it wants to exit from the
+ *	kgdb callback.
+ */
+int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,
+			       char *remcomInBuffer, char *remcomOutBuffer,
+			       struct pt_regs *linux_regs)
+{
+	unsigned long addr;
+	unsigned long dr6;
+	char *ptr;
+	int newPC;
+
+	switch (remcomInBuffer[0]) {
+	case 'c':
+	case 's':
+		/* try to read optional parameter, pc unchanged if no parm */
+		ptr = &remcomInBuffer[1];
+		if (kgdb_hex2long(&ptr, &addr))
+			linux_regs->ip = addr;
+	case 'D':
+	case 'k':
+		newPC = linux_regs->ip;
+
+		/* clear the trace bit */
+		linux_regs->flags &= ~X86_EFLAGS_TF;
+		atomic_set(&kgdb_cpu_doing_single_step, -1);
+
+		/* set the trace bit if we're stepping */
+		if (remcomInBuffer[0] == 's') {
+			linux_regs->flags |= X86_EFLAGS_TF;
+			kgdb_single_step = 1;
+			if (kgdb_contthread) {
+				atomic_set(&kgdb_cpu_doing_single_step,
+					   raw_smp_processor_id());
+			}
+		}
+
+		get_debugreg(dr6, 6);
+		if (!(dr6 & 0x4000)) {
+			int breakno;
+
+			for (breakno = 0; breakno < 4; breakno++) {
+				if (dr6 & (1 << breakno) &&
+				    breakinfo[breakno].type == 0) {
+					/* Set restore flag: */
+					linux_regs->flags |= X86_EFLAGS_RF;
+					break;
+				}
+			}
+		}
+		set_debugreg(0UL, 6);
+		kgdb_correct_hw_break();
+
+		return 0;
+	}
+
+	/* this means that we do not want to exit from the handler: */
+	return -1;
+}
+
+static inline int
+single_step_cont(struct pt_regs *regs, struct die_args *args)
+{
+	/*
+	 * Single step exception from kernel space to user space so
+	 * eat the exception and continue the process:
+	 */
+	printk(KERN_ERR "KGDB: trap/step from kernel to user space, "
+			"resuming...\n");
+	kgdb_arch_handle_exception(args->trapnr, args->signr,
+				   args->err, "c", "", regs);
+
+	return NOTIFY_STOP;
+}
+
+static int was_in_debug_nmi[NR_CPUS];
+
+static int __kgdb_notify(struct die_args *args, unsigned long cmd)
+{
+	struct pt_regs *regs = args->regs;
+
+	switch (cmd) {
+	case DIE_NMI:
+		if (atomic_read(&kgdb_active) != -1) {
+			/* KGDB CPU roundup */
+			kgdb_nmicallback(raw_smp_processor_id(), regs);
+			was_in_debug_nmi[raw_smp_processor_id()] = 1;
+			touch_nmi_watchdog();
+			return NOTIFY_STOP;
+		}
+		return NOTIFY_DONE;
+
+	case DIE_NMI_IPI:
+		if (atomic_read(&kgdb_active) != -1) {
+			/* KGDB CPU roundup */
+			kgdb_nmicallback(raw_smp_processor_id(), regs);
+			was_in_debug_nmi[raw_smp_processor_id()] = 1;
+			touch_nmi_watchdog();
+		}
+		return NOTIFY_DONE;
+
+	case DIE_NMIUNKNOWN:
+		if (was_in_debug_nmi[raw_smp_processor_id()]) {
+			was_in_debug_nmi[raw_smp_processor_id()] = 0;
+			return NOTIFY_STOP;
+		}
+		return NOTIFY_DONE;
+
+	case DIE_NMIWATCHDOG:
+		if (atomic_read(&kgdb_active) != -1) {
+			/* KGDB CPU roundup: */
+			kgdb_nmicallback(raw_smp_processor_id(), regs);
+			return NOTIFY_STOP;
+		}
+		/* Enter debugger: */
+		break;
+
+	case DIE_DEBUG:
+		if (atomic_read(&kgdb_cpu_doing_single_step) ==
+			raw_smp_processor_id() &&
+			user_mode(regs))
+			return single_step_cont(regs, args);
+		/* fall through */
+	default:
+		if (user_mode(regs))
+			return NOTIFY_DONE;
+	}
+
+	if (kgdb_handle_exception(args->trapnr, args->signr, args->err, regs))
+		return NOTIFY_DONE;
+
+	/* Must touch watchdog before return to normal operation */
+	touch_nmi_watchdog();
+	return NOTIFY_STOP;
+}
+
+static int
+kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr)
+{
+	unsigned long flags;
+	int ret;
+
+	local_irq_save(flags);
+	ret = __kgdb_notify(ptr, cmd);
+	local_irq_restore(flags);
+
+	return ret;
+}
+
+static struct notifier_block kgdb_notifier = {
+	.notifier_call	= kgdb_notify,
+
+	/*
+	 * Lowest-prio notifier priority, we want to be notified last:
+	 */
+	.priority	= -INT_MAX,
+};
+
+/**
+ *	kgdb_arch_init - Perform any architecture specific initalization.
+ *
+ *	This function will handle the initalization of any architecture
+ *	specific callbacks.
+ */
+int kgdb_arch_init(void)
+{
+	return register_die_notifier(&kgdb_notifier);
+}
+
+/**
+ *	kgdb_arch_exit - Perform any architecture specific uninitalization.
+ *
+ *	This function will handle the uninitalization of any architecture
+ *	specific callbacks, for dynamic registration and unregistration.
+ */
+void kgdb_arch_exit(void)
+{
+	unregister_die_notifier(&kgdb_notifier);
+}
+
+/**
+ *
+ *	kgdb_skipexception - Bail out of KGDB when we've been triggered.
+ *	@exception: Exception vector number
+ *	@regs: Current &struct pt_regs.
+ *
+ *	On some architectures we need to skip a breakpoint exception when
+ *	it occurs after a breakpoint has been removed.
+ *
+ * Skip an int3 exception when it occurs after a breakpoint has been
+ * removed. Backtrack eip by 1 since the int3 would have caused it to
+ * increment by 1.
+ */
+int kgdb_skipexception(int exception, struct pt_regs *regs)
+{
+	if (exception == 3 && kgdb_isremovedbreak(regs->ip - 1)) {
+		regs->ip -= 1;
+		return 1;
+	}
+	return 0;
+}
+
+unsigned long kgdb_arch_pc(int exception, struct pt_regs *regs)
+{
+	if (exception == 3)
+		return instruction_pointer(regs) - 1;
+	return instruction_pointer(regs);
+}
+
+struct kgdb_arch arch_kgdb_ops = {
+	/* Breakpoint instruction: */
+	.gdb_bpt_instr		= { 0xcc },
+	.flags			= KGDB_HW_BREAKPOINT,
+	.set_hw_breakpoint	= kgdb_set_hw_break,
+	.remove_hw_breakpoint	= kgdb_remove_hw_break,
+	.remove_all_hw_break	= kgdb_remove_all_hw_break,
+	.correct_hw_break	= kgdb_correct_hw_break,
+};
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 34a591283f5d..b8c6743a13da 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -410,13 +410,13 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
 static void __kprobes clear_btf(void)
 {
 	if (test_thread_flag(TIF_DEBUGCTLMSR))
-		wrmsrl(MSR_IA32_DEBUGCTLMSR, 0);
+		update_debugctlmsr(0);
 }
 
 static void __kprobes restore_btf(void)
 {
 	if (test_thread_flag(TIF_DEBUGCTLMSR))
-		wrmsrl(MSR_IA32_DEBUGCTLMSR, current->thread.debugctlmsr);
+		update_debugctlmsr(current->thread.debugctlmsr);
 }
 
 static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
@@ -489,7 +489,7 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs,
 		break;
 	case KPROBE_HIT_SS:
 		if (p == kprobe_running()) {
-			regs->flags &= ~TF_MASK;
+			regs->flags &= ~X86_EFLAGS_TF;
 			regs->flags |= kcb->kprobe_saved_flags;
 			return 0;
 		} else {
@@ -858,15 +858,15 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs)
 	if (!cur)
 		return 0;
 
+	resume_execution(cur, regs, kcb);
+	regs->flags |= kcb->kprobe_saved_flags;
+	trace_hardirqs_fixup_flags(regs->flags);
+
 	if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
 		kcb->kprobe_status = KPROBE_HIT_SSDONE;
 		cur->post_handler(cur, regs, 0);
 	}
 
-	resume_execution(cur, regs, kcb);
-	regs->flags |= kcb->kprobe_saved_flags;
-	trace_hardirqs_fixup_flags(regs->flags);
-
 	/* Restore back the original saved kprobes variables and continue. */
 	if (kcb->kprobe_status == KPROBE_REENTER) {
 		restore_previous_kprobe(kcb);
diff --git a/arch/x86/kernel/mca_32.c b/arch/x86/kernel/mca_32.c
index 9482033ed0fe..2dc183758be3 100644
--- a/arch/x86/kernel/mca_32.c
+++ b/arch/x86/kernel/mca_32.c
@@ -53,9 +53,9 @@
 #include <linux/init.h>
 #include <asm/arch_hooks.h>
 
-static unsigned char which_scsi = 0;
+static unsigned char which_scsi;
 
-int MCA_bus = 0;
+int MCA_bus;
 EXPORT_SYMBOL(MCA_bus);
 
 /*
@@ -68,15 +68,17 @@ static DEFINE_SPINLOCK(mca_lock);
 
 /* Build the status info for the adapter */
 
-static void mca_configure_adapter_status(struct mca_device *mca_dev) {
+static void mca_configure_adapter_status(struct mca_device *mca_dev)
+{
 	mca_dev->status = MCA_ADAPTER_NONE;
 
 	mca_dev->pos_id = mca_dev->pos[0]
 		+ (mca_dev->pos[1] << 8);
 
-	if(!mca_dev->pos_id && mca_dev->slot < MCA_MAX_SLOT_NR) {
+	if (!mca_dev->pos_id && mca_dev->slot < MCA_MAX_SLOT_NR) {
 
-		/* id = 0x0000 usually indicates hardware failure,
+		/*
+		 * id = 0x0000 usually indicates hardware failure,
 		 * however, ZP Gu (zpg@castle.net> reports that his 9556
 		 * has 0x0000 as id and everything still works. There
 		 * also seem to be an adapter with id = 0x0000; the
@@ -87,9 +89,10 @@ static void mca_configure_adapter_status(struct mca_device *mca_dev) {
 		mca_dev->status = MCA_ADAPTER_ERROR;
 
 		return;
-	} else if(mca_dev->pos_id != 0xffff) {
+	} else if (mca_dev->pos_id != 0xffff) {
 
-		/* 0xffff usually indicates that there's no adapter,
+		/*
+		 * 0xffff usually indicates that there's no adapter,
 		 * however, some integrated adapters may have 0xffff as
 		 * their id and still be valid. Examples are on-board
 		 * VGA of the 55sx, the integrated SCSI of the 56 & 57,
@@ -99,19 +102,19 @@ static void mca_configure_adapter_status(struct mca_device *mca_dev) {
 		mca_dev->status = MCA_ADAPTER_NORMAL;
 	}
 
-	if((mca_dev->pos_id == 0xffff ||
+	if ((mca_dev->pos_id == 0xffff ||
 	    mca_dev->pos_id == 0x0000) && mca_dev->slot >= MCA_MAX_SLOT_NR) {
 		int j;
 
-		for(j = 2; j < 8; j++) {
-			if(mca_dev->pos[j] != 0xff) {
+		for (j = 2; j < 8; j++) {
+			if (mca_dev->pos[j] != 0xff) {
 				mca_dev->status = MCA_ADAPTER_NORMAL;
 				break;
 			}
 		}
 	}
 
-	if(!(mca_dev->pos[2] & MCA_ENABLED)) {
+	if (!(mca_dev->pos[2] & MCA_ENABLED)) {
 
 		/* enabled bit is in POS 2 */
 
@@ -133,7 +136,7 @@ static struct resource mca_standard_resources[] = {
 
 #define MCA_STANDARD_RESOURCES	ARRAY_SIZE(mca_standard_resources)
 
-/**
+/*
  *	mca_read_and_store_pos - read the POS registers into a memory buffer
  *      @pos: a char pointer to 8 bytes, contains the POS register value on
  *            successful return
@@ -141,12 +144,14 @@ static struct resource mca_standard_resources[] = {
  *	Returns 1 if a card actually exists (i.e. the pos isn't
  *	all 0xff) or 0 otherwise
  */
-static int mca_read_and_store_pos(unsigned char *pos) {
+static int mca_read_and_store_pos(unsigned char *pos)
+{
 	int j;
 	int found = 0;
 
-	for(j=0; j<8; j++) {
-		if((pos[j] = inb_p(MCA_POS_REG(j))) != 0xff) {
+	for (j = 0; j < 8; j++) {
+		pos[j] = inb_p(MCA_POS_REG(j));
+		if (pos[j] != 0xff) {
 			/* 0xff all across means no device. 0x00 means
 			 * something's broken, but a device is
 			 * probably there.  However, if you get 0x00
@@ -167,11 +172,11 @@ static unsigned char mca_pc_read_pos(struct mca_device *mca_dev, int reg)
 	unsigned char byte;
 	unsigned long flags;
 
-	if(reg < 0 || reg >= 8)
+	if (reg < 0 || reg >= 8)
 		return 0;
 
 	spin_lock_irqsave(&mca_lock, flags);
-	if(mca_dev->pos_register) {
+	if (mca_dev->pos_register) {
 		/* Disable adapter setup, enable motherboard setup */
 
 		outb_p(0, MCA_ADAPTER_SETUP_REG);
@@ -203,7 +208,7 @@ static void mca_pc_write_pos(struct mca_device *mca_dev, int reg,
 {
 	unsigned long flags;
 
-	if(reg < 0 || reg >= 8)
+	if (reg < 0 || reg >= 8)
 		return;
 
 	spin_lock_irqsave(&mca_lock, flags);
@@ -227,17 +232,17 @@ static void mca_pc_write_pos(struct mca_device *mca_dev, int reg,
 }
 
 /* for the primary MCA bus, we have identity transforms */
-static int mca_dummy_transform_irq(struct mca_device * mca_dev, int irq)
+static int mca_dummy_transform_irq(struct mca_device *mca_dev, int irq)
 {
 	return irq;
 }
 
-static int mca_dummy_transform_ioport(struct mca_device * mca_dev, int port)
+static int mca_dummy_transform_ioport(struct mca_device *mca_dev, int port)
 {
 	return port;
 }
 
-static void *mca_dummy_transform_memory(struct mca_device * mca_dev, void *mem)
+static void *mca_dummy_transform_memory(struct mca_device *mca_dev, void *mem)
 {
 	return mem;
 }
@@ -251,7 +256,8 @@ static int __init mca_init(void)
 	short mca_builtin_scsi_ports[] = {0xf7, 0xfd, 0x00};
 	struct mca_bus *bus;
 
-	/* WARNING: Be careful when making changes here. Putting an adapter
+	/*
+	 * WARNING: Be careful when making changes here. Putting an adapter
 	 * and the motherboard simultaneously into setup mode may result in
 	 * damage to chips (according to The Indispensible PC Hardware Book
 	 * by Hans-Peter Messmer). Also, we disable system interrupts (so
@@ -283,7 +289,7 @@ static int __init mca_init(void)
 
 	/* get the motherboard device */
 	mca_dev = kzalloc(sizeof(struct mca_device), GFP_KERNEL);
-	if(unlikely(!mca_dev))
+	if (unlikely(!mca_dev))
 		goto out_nomem;
 
 	/*
@@ -309,7 +315,7 @@ static int __init mca_init(void)
 	mca_register_device(MCA_PRIMARY_BUS, mca_dev);
 
 	mca_dev = kzalloc(sizeof(struct mca_device), GFP_ATOMIC);
-	if(unlikely(!mca_dev))
+	if (unlikely(!mca_dev))
 		goto out_unlock_nomem;
 
 	/* Put motherboard into video setup mode, read integrated video
@@ -326,7 +332,8 @@ static int __init mca_init(void)
 	mca_dev->slot = MCA_INTEGVIDEO;
 	mca_register_device(MCA_PRIMARY_BUS, mca_dev);
 
-	/* Put motherboard into scsi setup mode, read integrated scsi
+	/*
+	 * Put motherboard into scsi setup mode, read integrated scsi
 	 * POS registers, and turn motherboard setup off.
 	 *
 	 * It seems there are two possible SCSI registers. Martin says that
@@ -338,18 +345,18 @@ static int __init mca_init(void)
 	 * machine.
 	 */
 
-	for(i = 0; (which_scsi = mca_builtin_scsi_ports[i]) != 0; i++) {
+	for (i = 0; (which_scsi = mca_builtin_scsi_ports[i]) != 0; i++) {
 		outb_p(which_scsi, MCA_MOTHERBOARD_SETUP_REG);
-		if(mca_read_and_store_pos(pos))
+		if (mca_read_and_store_pos(pos))
 			break;
 	}
-	if(which_scsi) {
+	if (which_scsi) {
 		/* found a scsi card */
 		mca_dev = kzalloc(sizeof(struct mca_device), GFP_ATOMIC);
-		if(unlikely(!mca_dev))
+		if (unlikely(!mca_dev))
 			goto out_unlock_nomem;
 
-		for(j = 0; j < 8; j++)
+		for (j = 0; j < 8; j++)
 			mca_dev->pos[j] = pos[j];
 
 		mca_configure_adapter_status(mca_dev);
@@ -364,21 +371,22 @@ static int __init mca_init(void)
 
 	outb_p(0xff, MCA_MOTHERBOARD_SETUP_REG);
 
-	/* Now loop over MCA slots: put each adapter into setup mode, and
+	/*
+	 * Now loop over MCA slots: put each adapter into setup mode, and
 	 * read its POS registers. Then put adapter setup off.
 	 */
 
-	for(i=0; i<MCA_MAX_SLOT_NR; i++) {
+	for (i = 0; i < MCA_MAX_SLOT_NR; i++) {
 		outb_p(0x8|(i&0xf), MCA_ADAPTER_SETUP_REG);
-		if(!mca_read_and_store_pos(pos))
+		if (!mca_read_and_store_pos(pos))
 			continue;
 
 		mca_dev = kzalloc(sizeof(struct mca_device), GFP_ATOMIC);
-		if(unlikely(!mca_dev))
+		if (unlikely(!mca_dev))
 			goto out_unlock_nomem;
 
-		for(j=0; j<8; j++)
-			mca_dev->pos[j]=pos[j];
+		for (j = 0; j < 8; j++)
+			mca_dev->pos[j] = pos[j];
 
 		mca_dev->driver_loaded = 0;
 		mca_dev->slot = i;
@@ -414,20 +422,20 @@ mca_handle_nmi_device(struct mca_device *mca_dev, int check_flag)
 {
 	int slot = mca_dev->slot;
 
-	if(slot == MCA_INTEGSCSI) {
+	if (slot == MCA_INTEGSCSI) {
 		printk(KERN_CRIT "NMI: caused by MCA integrated SCSI adapter (%s)\n",
 			mca_dev->name);
-	} else if(slot == MCA_INTEGVIDEO) {
+	} else if (slot == MCA_INTEGVIDEO) {
 		printk(KERN_CRIT "NMI: caused by MCA integrated video adapter (%s)\n",
 			mca_dev->name);
-	} else if(slot == MCA_MOTHERBOARD) {
+	} else if (slot == MCA_MOTHERBOARD) {
 		printk(KERN_CRIT "NMI: caused by motherboard (%s)\n",
 			mca_dev->name);
 	}
 
 	/* More info available in POS 6 and 7? */
 
-	if(check_flag) {
+	if (check_flag) {
 		unsigned char pos6, pos7;
 
 		pos6 = mca_device_read_pos(mca_dev, 6);
@@ -447,8 +455,9 @@ static int __kprobes mca_handle_nmi_callback(struct device *dev, void *data)
 
 	pos5 = mca_device_read_pos(mca_dev, 5);
 
-	if(!(pos5 & 0x80)) {
-		/* Bit 7 of POS 5 is reset when this adapter has a hardware
+	if (!(pos5 & 0x80)) {
+		/*
+		 *  Bit 7 of POS 5 is reset when this adapter has a hardware
 		 * error. Bit 7 it reset if there's error information
 		 * available in POS 6 and 7.
 		 */
@@ -460,7 +469,8 @@ static int __kprobes mca_handle_nmi_callback(struct device *dev, void *data)
 
 void __kprobes mca_handle_nmi(void)
 {
-	/* First try - scan the various adapters and see if a specific
+	/*
+	 *  First try - scan the various adapters and see if a specific
 	 * adapter was responsible for the error.
 	 */
 	bus_for_each_dev(&mca_bus_type, NULL, NULL, mca_handle_nmi_callback);
diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c
index f2702d01b8a8..25cf6dee4e56 100644
--- a/arch/x86/kernel/microcode.c
+++ b/arch/x86/kernel/microcode.c
@@ -290,7 +290,7 @@ static int get_maching_microcode(void *mc, int cpu)
 	}
 	return 0;
 find:
-	pr_debug("microcode: CPU %d found a matching microcode update with"
+	pr_debug("microcode: CPU%d found a matching microcode update with"
 		" version 0x%x (current=0x%x)\n", cpu, mc_header->rev,uci->rev);
 	new_mc = vmalloc(total_size);
 	if (!new_mc) {
@@ -336,11 +336,11 @@ static void apply_microcode(int cpu)
 
 	spin_unlock_irqrestore(&microcode_update_lock, flags);
 	if (val[1] != uci->mc->hdr.rev) {
-		printk(KERN_ERR "microcode: CPU%d updated from revision "
+		printk(KERN_ERR "microcode: CPU%d update from revision "
 			"0x%x to 0x%x failed\n", cpu_num, uci->rev, val[1]);
 		return;
 	}
-	pr_debug("microcode: CPU%d updated from revision "
+	printk(KERN_INFO "microcode: CPU%d updated from revision "
 	       "0x%x to 0x%x, date = %08x \n", 
 	       cpu_num, uci->rev, val[1], uci->mc->hdr.date);
 	uci->rev = val[1];
@@ -534,7 +534,7 @@ static int cpu_request_microcode(int cpu)
 		c->x86, c->x86_model, c->x86_mask);
 	error = request_firmware(&firmware, name, &microcode_pdev->dev);
 	if (error) {
-		pr_debug("ucode data file %s load failed\n", name);
+		pr_debug("microcode: ucode data file %s load failed\n", name);
 		return error;
 	}
 	buf = firmware->data;
@@ -709,7 +709,7 @@ static int __mc_sysdev_add(struct sys_device *sys_dev, int resume)
 	if (!cpu_online(cpu))
 		return 0;
 
-	pr_debug("Microcode:CPU %d added\n", cpu);
+	pr_debug("microcode: CPU%d added\n", cpu);
 	memset(uci, 0, sizeof(*uci));
 
 	err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group);
@@ -733,7 +733,7 @@ static int mc_sysdev_remove(struct sys_device *sys_dev)
 	if (!cpu_online(cpu))
 		return 0;
 
-	pr_debug("Microcode:CPU %d removed\n", cpu);
+	pr_debug("microcode: CPU%d removed\n", cpu);
 	microcode_fini_cpu(cpu);
 	sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
 	return 0;
@@ -745,7 +745,7 @@ static int mc_sysdev_resume(struct sys_device *dev)
 
 	if (!cpu_online(cpu))
 		return 0;
-	pr_debug("Microcode:CPU %d resumed\n", cpu);
+	pr_debug("microcode: CPU%d resumed\n", cpu);
 	/* only CPU 0 will apply ucode here */
 	apply_microcode(0);
 	return 0;
@@ -783,7 +783,7 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
 		}
 	case CPU_DOWN_FAILED_FROZEN:
 		if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group))
-			printk(KERN_ERR "Microcode: Failed to create the sysfs "
+			printk(KERN_ERR "microcode: Failed to create the sysfs "
 				"group for CPU%d\n", cpu);
 		break;
 	case CPU_DOWN_PREPARE:
diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse.c
index f349e68e45a0..70744e344fa1 100644
--- a/arch/x86/kernel/mpparse_32.c
+++ b/arch/x86/kernel/mpparse.c
@@ -4,82 +4,56 @@
  *
  *	(c) 1995 Alan Cox, Building #3 <alan@redhat.com>
  *	(c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
- *
- *	Fixes
- *		Erich Boleyn	:	MP v1.4 and additional changes.
- *		Alan Cox	:	Added EBDA scanning
- *		Ingo Molnar	:	various cleanups and rewrites
- *		Maciej W. Rozycki:	Bits for default MP configurations
- *		Paul Diefenbaugh:	Added full ACPI support
+ *      (c) 2008 Alexey Starikovskiy <astarikovskiy@suse.de>
  */
 
 #include <linux/mm.h>
 #include <linux/init.h>
-#include <linux/acpi.h>
 #include <linux/delay.h>
 #include <linux/bootmem.h>
 #include <linux/kernel_stat.h>
 #include <linux/mc146818rtc.h>
 #include <linux/bitops.h>
+#include <linux/acpi.h>
+#include <linux/module.h>
 
 #include <asm/smp.h>
-#include <asm/acpi.h>
 #include <asm/mtrr.h>
 #include <asm/mpspec.h>
+#include <asm/pgalloc.h>
 #include <asm/io_apic.h>
+#include <asm/proto.h>
+#include <asm/acpi.h>
+#include <asm/bios_ebda.h>
 
 #include <mach_apic.h>
+#ifdef CONFIG_X86_32
 #include <mach_apicdef.h>
 #include <mach_mpparse.h>
-#include <bios_ebda.h>
+#endif
 
 /* Have we found an MP table */
 int smp_found_config;
-unsigned int __cpuinitdata maxcpus = NR_CPUS;
 
 /*
  * Various Linux-internal data structures created from the
  * MP-table.
  */
-int apic_version [MAX_APICS];
-int mp_bus_id_to_type [MAX_MP_BUSSES];
-int mp_bus_id_to_node [MAX_MP_BUSSES];
-int mp_bus_id_to_local [MAX_MP_BUSSES];
-int quad_local_to_mp_bus_id [NR_CPUS/4][4];
-int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
-static int mp_current_pci_id;
-
-/* I/O APIC entries */
-struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
-
-/* # of MP IRQ source entries */
-struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
+int mp_bus_id_to_type[MAX_MP_BUSSES];
+#endif
 
-/* MP IRQ source entries */
-int mp_irq_entries;
+DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
+int mp_bus_id_to_pci_bus[MAX_MP_BUSSES] = {[0 ... MAX_MP_BUSSES - 1] = -1 };
 
-int nr_ioapics;
+static int mp_current_pci_id;
 
 int pic_mode;
-unsigned long mp_lapic_addr;
-
-unsigned int def_to_bigsmp = 0;
-
-/* Processor that is doing the boot up */
-unsigned int boot_cpu_physical_apicid = -1U;
-/* Internal processor count */
-unsigned int num_processors;
-
-/* Bitmask of physically existing CPUs */
-physid_mask_t phys_cpu_present_map;
-
-u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
 
 /*
  * Intel MP BIOS table parsing routines:
  */
 
-
 /*
  * Checksum an MP configuration block.
  */
@@ -94,216 +68,153 @@ static int __init mpf_checksum(unsigned char *mp, int len)
 	return sum & 0xFF;
 }
 
+#ifdef CONFIG_X86_NUMAQ
 /*
  * Have to match translation table entries to main table entries by counter
  * hence the mpc_record variable .... can't see a less disgusting way of
  * doing this ....
  */
 
-static int mpc_record; 
-static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __cpuinitdata;
+static int mpc_record;
+static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY]
+    __cpuinitdata;
+#endif
 
-static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
+static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
 {
- 	int ver, apicid;
-	physid_mask_t phys_cpu;
- 	
-	if (!(m->mpc_cpuflag & CPU_ENABLED))
-		return;
+	int apicid;
+	char *bootup_cpu = "";
 
+	if (!(m->mpc_cpuflag & CPU_ENABLED)) {
+		disabled_cpus++;
+		return;
+	}
+#ifdef CONFIG_X86_NUMAQ
 	apicid = mpc_apic_id(m, translation_table[mpc_record]);
-
-	if (m->mpc_featureflag&(1<<0))
-		Dprintk("    Floating point unit present.\n");
-	if (m->mpc_featureflag&(1<<7))
-		Dprintk("    Machine Exception supported.\n");
-	if (m->mpc_featureflag&(1<<8))
-		Dprintk("    64 bit compare & exchange supported.\n");
-	if (m->mpc_featureflag&(1<<9))
-		Dprintk("    Internal APIC present.\n");
-	if (m->mpc_featureflag&(1<<11))
-		Dprintk("    SEP present.\n");
-	if (m->mpc_featureflag&(1<<12))
-		Dprintk("    MTRR  present.\n");
-	if (m->mpc_featureflag&(1<<13))
-		Dprintk("    PGE  present.\n");
-	if (m->mpc_featureflag&(1<<14))
-		Dprintk("    MCA  present.\n");
-	if (m->mpc_featureflag&(1<<15))
-		Dprintk("    CMOV  present.\n");
-	if (m->mpc_featureflag&(1<<16))
-		Dprintk("    PAT  present.\n");
-	if (m->mpc_featureflag&(1<<17))
-		Dprintk("    PSE  present.\n");
-	if (m->mpc_featureflag&(1<<18))
-		Dprintk("    PSN  present.\n");
-	if (m->mpc_featureflag&(1<<19))
-		Dprintk("    Cache Line Flush Instruction present.\n");
-	/* 20 Reserved */
-	if (m->mpc_featureflag&(1<<21))
-		Dprintk("    Debug Trace and EMON Store present.\n");
-	if (m->mpc_featureflag&(1<<22))
-		Dprintk("    ACPI Thermal Throttle Registers  present.\n");
-	if (m->mpc_featureflag&(1<<23))
-		Dprintk("    MMX  present.\n");
-	if (m->mpc_featureflag&(1<<24))
-		Dprintk("    FXSR  present.\n");
-	if (m->mpc_featureflag&(1<<25))
-		Dprintk("    XMM  present.\n");
-	if (m->mpc_featureflag&(1<<26))
-		Dprintk("    Willamette New Instructions  present.\n");
-	if (m->mpc_featureflag&(1<<27))
-		Dprintk("    Self Snoop  present.\n");
-	if (m->mpc_featureflag&(1<<28))
-		Dprintk("    HT  present.\n");
-	if (m->mpc_featureflag&(1<<29))
-		Dprintk("    Thermal Monitor present.\n");
-	/* 30, 31 Reserved */
-
-
+#else
+	apicid = m->mpc_apicid;
+#endif
 	if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
-		Dprintk("    Bootup CPU\n");
+		bootup_cpu = " (Bootup-CPU)";
 		boot_cpu_physical_apicid = m->mpc_apicid;
 	}
 
-	ver = m->mpc_apicver;
-
-	/*
-	 * Validate version
-	 */
-	if (ver == 0x0) {
-		printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! "
-				"fixing up to 0x10. (tell your hw vendor)\n",
-				m->mpc_apicid);
-		ver = 0x10;
-	}
-	apic_version[m->mpc_apicid] = ver;
-
-	phys_cpu = apicid_to_cpu_present(apicid);
-	physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu);
-
-	if (num_processors >= NR_CPUS) {
-		printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
-			"  Processor ignored.\n", NR_CPUS);
-		return;
-	}
-
-	if (num_processors >= maxcpus) {
-		printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
-			" Processor ignored.\n", maxcpus);
-		return;
-	}
-
-	cpu_set(num_processors, cpu_possible_map);
-	num_processors++;
-
-	/*
-	 * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
-	 * but we need to work other dependencies like SMP_SUSPEND etc
-	 * before this can be done without some confusion.
-	 * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
-	 *       - Ashok Raj <ashok.raj@intel.com>
-	 */
-	if (num_processors > 8) {
-		switch (boot_cpu_data.x86_vendor) {
-		case X86_VENDOR_INTEL:
-			if (!APIC_XAPIC(ver)) {
-				def_to_bigsmp = 0;
-				break;
-			}
-			/* If P4 and above fall through */
-		case X86_VENDOR_AMD:
-			def_to_bigsmp = 1;
-		}
-	}
-	bios_cpu_apicid[num_processors - 1] = m->mpc_apicid;
+	printk(KERN_INFO "Processor #%d%s\n", m->mpc_apicid, bootup_cpu);
+	generic_processor_info(apicid, m->mpc_apicver);
 }
 
-static void __init MP_bus_info (struct mpc_config_bus *m)
+static void __init MP_bus_info(struct mpc_config_bus *m)
 {
 	char str[7];
 
 	memcpy(str, m->mpc_bustype, 6);
 	str[6] = 0;
 
+#ifdef CONFIG_X86_NUMAQ
 	mpc_oem_bus_info(m, str, translation_table[mpc_record]);
+#else
+	Dprintk("Bus #%d is %s\n", m->mpc_busid, str);
+#endif
 
 #if MAX_MP_BUSSES < 256
 	if (m->mpc_busid >= MAX_MP_BUSSES) {
 		printk(KERN_WARNING "MP table busid value (%d) for bustype %s "
-			" is too large, max. supported is %d\n",
-			m->mpc_busid, str, MAX_MP_BUSSES - 1);
+		       " is too large, max. supported is %d\n",
+		       m->mpc_busid, str, MAX_MP_BUSSES - 1);
 		return;
 	}
 #endif
 
-	if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
+	if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) {
+		 set_bit(m->mpc_busid, mp_bus_not_pci);
+#if defined(CONFIG_EISA) || defined (CONFIG_MCA)
 		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
-	} else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA)-1) == 0) {
-		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
-	} else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI)-1) == 0) {
+#endif
+	} else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) {
+#ifdef CONFIG_X86_NUMAQ
 		mpc_oem_pci_bus(m, translation_table[mpc_record]);
-		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
+#endif
+		clear_bit(m->mpc_busid, mp_bus_not_pci);
 		mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
 		mp_current_pci_id++;
-	} else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) {
+#if defined(CONFIG_EISA) || defined (CONFIG_MCA)
+		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
+	} else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) {
+		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
+	} else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA) - 1) == 0) {
 		mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
-	} else {
+#endif
+	} else
 		printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
+}
+
+#ifdef CONFIG_X86_IO_APIC
+
+static int bad_ioapic(unsigned long address)
+{
+	if (nr_ioapics >= MAX_IO_APICS) {
+		printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
+		       "(found %d)\n", MAX_IO_APICS, nr_ioapics);
+		panic("Recompile kernel with bigger MAX_IO_APICS!\n");
+	}
+	if (!address) {
+		printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
+		       " found in table, skipping!\n");
+		return 1;
 	}
+	return 0;
 }
 
-static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
+static void __init MP_ioapic_info(struct mpc_config_ioapic *m)
 {
 	if (!(m->mpc_flags & MPC_APIC_USABLE))
 		return;
 
 	printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n",
-		m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
-	if (nr_ioapics >= MAX_IO_APICS) {
-		printk(KERN_CRIT "Max # of I/O APICs (%d) exceeded (found %d).\n",
-			MAX_IO_APICS, nr_ioapics);
-		panic("Recompile kernel with bigger MAX_IO_APICS!.\n");
-	}
-	if (!m->mpc_apicaddr) {
-		printk(KERN_ERR "WARNING: bogus zero I/O APIC address"
-			" found in MP table, skipping!\n");
+	       m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
+
+	if (bad_ioapic(m->mpc_apicaddr))
 		return;
-	}
+
 	mp_ioapics[nr_ioapics] = *m;
 	nr_ioapics++;
 }
 
-static void __init MP_intsrc_info (struct mpc_config_intsrc *m)
+static void __init MP_intsrc_info(struct mpc_config_intsrc *m)
 {
-	mp_irqs [mp_irq_entries] = *m;
+	mp_irqs[mp_irq_entries] = *m;
 	Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
 		" IRQ %02x, APIC ID %x, APIC INT %02x\n",
-			m->mpc_irqtype, m->mpc_irqflag & 3,
-			(m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
-			m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
+		m->mpc_irqtype, m->mpc_irqflag & 3,
+		(m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
+		m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
 	if (++mp_irq_entries == MAX_IRQ_SOURCES)
 		panic("Max # of irq sources exceeded!!\n");
 }
 
-static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m)
+#endif
+
+static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m)
 {
 	Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
 		" IRQ %02x, APIC ID %x, APIC LINT %02x\n",
-			m->mpc_irqtype, m->mpc_irqflag & 3,
-			(m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
-			m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
+		m->mpc_irqtype, m->mpc_irqflag & 3,
+		(m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid,
+		m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
 }
 
 #ifdef CONFIG_X86_NUMAQ
-static void __init MP_translation_info (struct mpc_config_translation *m)
+static void __init MP_translation_info(struct mpc_config_translation *m)
 {
-	printk(KERN_INFO "Translation: record %d, type %d, quad %d, global %d, local %d\n", mpc_record, m->trans_type, m->trans_quad, m->trans_global, m->trans_local);
+	printk(KERN_INFO
+	       "Translation: record %d, type %d, quad %d, global %d, local %d\n",
+	       mpc_record, m->trans_type, m->trans_quad, m->trans_global,
+	       m->trans_local);
 
-	if (mpc_record >= MAX_MPC_ENTRY) 
+	if (mpc_record >= MAX_MPC_ENTRY)
 		printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
 	else
-		translation_table[mpc_record] = m; /* stash this for later */
+		translation_table[mpc_record] = m;	/* stash this for later */
 	if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
 		node_set_online(m->trans_quad);
 }
@@ -312,118 +223,124 @@ static void __init MP_translation_info (struct mpc_config_translation *m)
  * Read/parse the MPC oem tables
  */
 
-static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, \
-	unsigned short oemsize)
+static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable,
+				    unsigned short oemsize)
 {
-	int count = sizeof (*oemtable); /* the header size */
-	unsigned char *oemptr = ((unsigned char *)oemtable)+count;
-	
+	int count = sizeof(*oemtable);	/* the header size */
+	unsigned char *oemptr = ((unsigned char *)oemtable) + count;
+
 	mpc_record = 0;
-	printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", oemtable);
-	if (memcmp(oemtable->oem_signature,MPC_OEM_SIGNATURE,4))
-	{
-		printk(KERN_WARNING "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
-			oemtable->oem_signature[0],
-			oemtable->oem_signature[1],
-			oemtable->oem_signature[2],
-			oemtable->oem_signature[3]);
+	printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n",
+	       oemtable);
+	if (memcmp(oemtable->oem_signature, MPC_OEM_SIGNATURE, 4)) {
+		printk(KERN_WARNING
+		       "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
+		       oemtable->oem_signature[0], oemtable->oem_signature[1],
+		       oemtable->oem_signature[2], oemtable->oem_signature[3]);
 		return;
 	}
-	if (mpf_checksum((unsigned char *)oemtable,oemtable->oem_length))
-	{
+	if (mpf_checksum((unsigned char *)oemtable, oemtable->oem_length)) {
 		printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
 		return;
 	}
 	while (count < oemtable->oem_length) {
 		switch (*oemptr) {
-			case MP_TRANSLATION:
+		case MP_TRANSLATION:
 			{
-				struct mpc_config_translation *m=
-					(struct mpc_config_translation *)oemptr;
+				struct mpc_config_translation *m =
+				    (struct mpc_config_translation *)oemptr;
 				MP_translation_info(m);
 				oemptr += sizeof(*m);
 				count += sizeof(*m);
 				++mpc_record;
 				break;
 			}
-			default:
+		default:
 			{
-				printk(KERN_WARNING "Unrecognised OEM table entry type! - %d\n", (int) *oemptr);
+				printk(KERN_WARNING
+				       "Unrecognised OEM table entry type! - %d\n",
+				       (int)*oemptr);
 				return;
 			}
 		}
-       }
+	}
 }
 
 static inline void mps_oem_check(struct mp_config_table *mpc, char *oem,
-		char *productid)
+				 char *productid)
 {
 	if (strncmp(oem, "IBM NUMA", 8))
 		printk("Warning!  May not be a NUMA-Q system!\n");
 	if (mpc->mpc_oemptr)
-		smp_read_mpc_oem((struct mp_config_oemtable *) mpc->mpc_oemptr,
-				mpc->mpc_oemsize);
+		smp_read_mpc_oem((struct mp_config_oemtable *)mpc->mpc_oemptr,
+				 mpc->mpc_oemsize);
 }
-#endif	/* CONFIG_X86_NUMAQ */
+#endif /* CONFIG_X86_NUMAQ */
 
 /*
  * Read/parse the MPC
  */
 
-static int __init smp_read_mpc(struct mp_config_table *mpc)
+static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
 {
 	char str[16];
 	char oem[10];
-	int count=sizeof(*mpc);
-	unsigned char *mpt=((unsigned char *)mpc)+count;
+	int count = sizeof(*mpc);
+	unsigned char *mpt = ((unsigned char *)mpc) + count;
 
-	if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
-		printk(KERN_ERR "SMP mptable: bad signature [0x%x]!\n",
-			*(u32 *)mpc->mpc_signature);
+	if (memcmp(mpc->mpc_signature, MPC_SIGNATURE, 4)) {
+		printk(KERN_ERR "MPTABLE: bad signature [%c%c%c%c]!\n",
+		       mpc->mpc_signature[0], mpc->mpc_signature[1],
+		       mpc->mpc_signature[2], mpc->mpc_signature[3]);
 		return 0;
 	}
-	if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
-		printk(KERN_ERR "SMP mptable: checksum error!\n");
+	if (mpf_checksum((unsigned char *)mpc, mpc->mpc_length)) {
+		printk(KERN_ERR "MPTABLE: checksum error!\n");
 		return 0;
 	}
-	if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
-		printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n",
-			mpc->mpc_spec);
+	if (mpc->mpc_spec != 0x01 && mpc->mpc_spec != 0x04) {
+		printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n",
+		       mpc->mpc_spec);
 		return 0;
 	}
 	if (!mpc->mpc_lapic) {
-		printk(KERN_ERR "SMP mptable: null local APIC address!\n");
+		printk(KERN_ERR "MPTABLE: null local APIC address!\n");
 		return 0;
 	}
-	memcpy(oem,mpc->mpc_oem,8);
-	oem[8]=0;
-	printk(KERN_INFO "OEM ID: %s ",oem);
+	memcpy(oem, mpc->mpc_oem, 8);
+	oem[8] = 0;
+	printk(KERN_INFO "MPTABLE: OEM ID: %s ", oem);
 
-	memcpy(str,mpc->mpc_productid,12);
-	str[12]=0;
-	printk("Product ID: %s ",str);
+	memcpy(str, mpc->mpc_productid, 12);
+	str[12] = 0;
+	printk("Product ID: %s ", str);
 
+#ifdef CONFIG_X86_32
 	mps_oem_check(mpc, oem, str);
+#endif
+	printk(KERN_INFO "MPTABLE: Product ID: %s ", str);
 
-	printk("APIC at: 0x%X\n", mpc->mpc_lapic);
+	printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->mpc_lapic);
 
-	/*
-	 * Save the local APIC address (it might be non-default) -- but only
-	 * if we're not using ACPI.
-	 */
+	/* save the local APIC address, it might be non-default */
 	if (!acpi_lapic)
 		mp_lapic_addr = mpc->mpc_lapic;
 
+	if (early)
+		return 1;
+
 	/*
-	 *	Now process the configuration blocks.
+	 *      Now process the configuration blocks.
 	 */
+#ifdef CONFIG_X86_NUMAQ
 	mpc_record = 0;
+#endif
 	while (count < mpc->mpc_length) {
-		switch(*mpt) {
-			case MP_PROCESSOR:
+		switch (*mpt) {
+		case MP_PROCESSOR:
 			{
-				struct mpc_config_processor *m=
-					(struct mpc_config_processor *)mpt;
+				struct mpc_config_processor *m =
+				    (struct mpc_config_processor *)mpt;
 				/* ACPI may have already provided this data */
 				if (!acpi_lapic)
 					MP_processor_info(m);
@@ -431,57 +348,68 @@ static int __init smp_read_mpc(struct mp_config_table *mpc)
 				count += sizeof(*m);
 				break;
 			}
-			case MP_BUS:
+		case MP_BUS:
 			{
-				struct mpc_config_bus *m=
-					(struct mpc_config_bus *)mpt;
+				struct mpc_config_bus *m =
+				    (struct mpc_config_bus *)mpt;
 				MP_bus_info(m);
 				mpt += sizeof(*m);
 				count += sizeof(*m);
 				break;
 			}
-			case MP_IOAPIC:
+		case MP_IOAPIC:
 			{
-				struct mpc_config_ioapic *m=
-					(struct mpc_config_ioapic *)mpt;
+#ifdef CONFIG_X86_IO_APIC
+				struct mpc_config_ioapic *m =
+				    (struct mpc_config_ioapic *)mpt;
 				MP_ioapic_info(m);
-				mpt+=sizeof(*m);
-				count+=sizeof(*m);
+#endif
+				mpt += sizeof(struct mpc_config_ioapic);
+				count += sizeof(struct mpc_config_ioapic);
 				break;
 			}
-			case MP_INTSRC:
+		case MP_INTSRC:
 			{
-				struct mpc_config_intsrc *m=
-					(struct mpc_config_intsrc *)mpt;
+#ifdef CONFIG_X86_IO_APIC
+				struct mpc_config_intsrc *m =
+				    (struct mpc_config_intsrc *)mpt;
 
 				MP_intsrc_info(m);
-				mpt+=sizeof(*m);
-				count+=sizeof(*m);
+#endif
+				mpt += sizeof(struct mpc_config_intsrc);
+				count += sizeof(struct mpc_config_intsrc);
 				break;
 			}
-			case MP_LINTSRC:
+		case MP_LINTSRC:
 			{
-				struct mpc_config_lintsrc *m=
-					(struct mpc_config_lintsrc *)mpt;
+				struct mpc_config_lintsrc *m =
+				    (struct mpc_config_lintsrc *)mpt;
 				MP_lintsrc_info(m);
-				mpt+=sizeof(*m);
-				count+=sizeof(*m);
-				break;
-			}
-			default:
-			{
-				count = mpc->mpc_length;
+				mpt += sizeof(*m);
+				count += sizeof(*m);
 				break;
 			}
+		default:
+			/* wrong mptable */
+			printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n");
+			printk(KERN_ERR "type %x\n", *mpt);
+			print_hex_dump(KERN_ERR, "  ", DUMP_PREFIX_ADDRESS, 16,
+					1, mpc, mpc->mpc_length, 1);
+			count = mpc->mpc_length;
+			break;
 		}
+#ifdef CONFIG_X86_NUMAQ
 		++mpc_record;
+#endif
 	}
 	setup_apic_routing();
 	if (!num_processors)
-		printk(KERN_ERR "SMP mptable: no processors registered!\n");
+		printk(KERN_ERR "MPTABLE: no processors registered!\n");
 	return num_processors;
 }
 
+#ifdef CONFIG_X86_IO_APIC
+
 static int __init ELCR_trigger(unsigned int irq)
 {
 	unsigned int port;
@@ -497,7 +425,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type)
 	int ELCR_fallback = 0;
 
 	intsrc.mpc_type = MP_INTSRC;
-	intsrc.mpc_irqflag = 0;			/* conforming */
+	intsrc.mpc_irqflag = 0;	/* conforming */
 	intsrc.mpc_srcbus = 0;
 	intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
 
@@ -512,12 +440,16 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type)
 	 *  If it does, we assume it's valid.
 	 */
 	if (mpc_default_type == 5) {
-		printk(KERN_INFO "ISA/PCI bus type with no IRQ information... falling back to ELCR\n");
+		printk(KERN_INFO "ISA/PCI bus type with no IRQ information... "
+		       "falling back to ELCR\n");
 
-		if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13))
-			printk(KERN_WARNING "ELCR contains invalid data... not using ELCR\n");
+		if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) ||
+		    ELCR_trigger(13))
+			printk(KERN_ERR "ELCR contains invalid data... "
+			       "not using ELCR\n");
 		else {
-			printk(KERN_INFO "Using ELCR to identify PCI interrupts\n");
+			printk(KERN_INFO
+			       "Using ELCR to identify PCI interrupts\n");
 			ELCR_fallback = 1;
 		}
 	}
@@ -546,21 +478,25 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type)
 		}
 
 		intsrc.mpc_srcbusirq = i;
-		intsrc.mpc_dstirq = i ? i : 2;		/* IRQ0 to INTIN2 */
+		intsrc.mpc_dstirq = i ? i : 2;	/* IRQ0 to INTIN2 */
 		MP_intsrc_info(&intsrc);
 	}
 
 	intsrc.mpc_irqtype = mp_ExtINT;
 	intsrc.mpc_srcbusirq = 0;
-	intsrc.mpc_dstirq = 0;				/* 8259A to INTIN0 */
+	intsrc.mpc_dstirq = 0;	/* 8259A to INTIN0 */
 	MP_intsrc_info(&intsrc);
 }
 
+#endif
+
 static inline void __init construct_default_ISA_mptable(int mpc_default_type)
 {
 	struct mpc_config_processor processor;
 	struct mpc_config_bus bus;
+#ifdef CONFIG_X86_IO_APIC
 	struct mpc_config_ioapic ioapic;
+#endif
 	struct mpc_config_lintsrc lintsrc;
 	int linttypes[2] = { mp_ExtINT, mp_NMI };
 	int i;
@@ -578,8 +514,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
 	processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
 	processor.mpc_cpuflag = CPU_ENABLED;
 	processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
-				   (boot_cpu_data.x86_model << 4) |
-				   boot_cpu_data.x86_mask;
+	    (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
 	processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
 	processor.mpc_reserved[0] = 0;
 	processor.mpc_reserved[1] = 0;
@@ -591,23 +526,22 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
 	bus.mpc_type = MP_BUS;
 	bus.mpc_busid = 0;
 	switch (mpc_default_type) {
-		default:
-			printk("???\n");
-			printk(KERN_ERR "Unknown standard configuration %d\n",
-				mpc_default_type);
-			/* fall through */
-		case 1:
-		case 5:
-			memcpy(bus.mpc_bustype, "ISA   ", 6);
-			break;
-		case 2:
-		case 6:
-		case 3:
-			memcpy(bus.mpc_bustype, "EISA  ", 6);
-			break;
-		case 4:
-		case 7:
-			memcpy(bus.mpc_bustype, "MCA   ", 6);
+	default:
+		printk(KERN_ERR "???\nUnknown standard configuration %d\n",
+		       mpc_default_type);
+		/* fall through */
+	case 1:
+	case 5:
+		memcpy(bus.mpc_bustype, "ISA   ", 6);
+		break;
+	case 2:
+	case 6:
+	case 3:
+		memcpy(bus.mpc_bustype, "EISA  ", 6);
+		break;
+	case 4:
+	case 7:
+		memcpy(bus.mpc_bustype, "MCA   ", 6);
 	}
 	MP_bus_info(&bus);
 	if (mpc_default_type > 4) {
@@ -616,6 +550,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
 		MP_bus_info(&bus);
 	}
 
+#ifdef CONFIG_X86_IO_APIC
 	ioapic.mpc_type = MP_IOAPIC;
 	ioapic.mpc_apicid = 2;
 	ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
@@ -627,9 +562,9 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
 	 * We set up most of the low 16 IO-APIC pins according to MPS rules.
 	 */
 	construct_default_ioirq_mptable(mpc_default_type);
-
+#endif
 	lintsrc.mpc_type = MP_LINTSRC;
-	lintsrc.mpc_irqflag = 0;		/* conforming */
+	lintsrc.mpc_irqflag = 0;	/* conforming */
 	lintsrc.mpc_srcbusid = 0;
 	lintsrc.mpc_srcbusirq = 0;
 	lintsrc.mpc_destapic = MP_APIC_ALL;
@@ -645,36 +580,49 @@ static struct intel_mp_floating *mpf_found;
 /*
  * Scan the memory blocks for an SMP configuration block.
  */
-void __init get_smp_config (void)
+static void __init __get_smp_config(unsigned early)
 {
 	struct intel_mp_floating *mpf = mpf_found;
 
+	if (acpi_lapic && early)
+		return;
 	/*
-	 * ACPI supports both logical (e.g. Hyper-Threading) and physical 
+	 * ACPI supports both logical (e.g. Hyper-Threading) and physical
 	 * processors, where MPS only supports physical.
 	 */
 	if (acpi_lapic && acpi_ioapic) {
-		printk(KERN_INFO "Using ACPI (MADT) for SMP configuration information\n");
+		printk(KERN_INFO "Using ACPI (MADT) for SMP configuration "
+		       "information\n");
 		return;
-	}
-	else if (acpi_lapic)
-		printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n");
-
-	printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
-	if (mpf->mpf_feature2 & (1<<7)) {
+	} else if (acpi_lapic)
+		printk(KERN_INFO "Using ACPI for processor (LAPIC) "
+		       "configuration information\n");
+
+	printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
+	       mpf->mpf_specification);
+#ifdef CONFIG_X86_32
+	if (mpf->mpf_feature2 & (1 << 7)) {
 		printk(KERN_INFO "    IMCR and PIC compatibility mode.\n");
 		pic_mode = 1;
 	} else {
 		printk(KERN_INFO "    Virtual Wire compatibility mode.\n");
 		pic_mode = 0;
 	}
-
+#endif
 	/*
 	 * Now see if we need to read further.
 	 */
 	if (mpf->mpf_feature1 != 0) {
+		if (early) {
+			/*
+			 * local APIC has default address
+			 */
+			mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+			return;
+		}
 
-		printk(KERN_INFO "Default MP configuration #%d\n", mpf->mpf_feature1);
+		printk(KERN_INFO "Default MP configuration #%d\n",
+		       mpf->mpf_feature1);
 		construct_default_ISA_mptable(mpf->mpf_feature1);
 
 	} else if (mpf->mpf_physptr) {
@@ -683,12 +631,18 @@ void __init get_smp_config (void)
 		 * Read the physical hardware table.  Anything here will
 		 * override the defaults.
 		 */
-		if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr))) {
+		if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr), early)) {
 			smp_found_config = 0;
-			printk(KERN_ERR "BIOS bug, MP table errors detected!...\n");
-			printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");
+			printk(KERN_ERR
+			       "BIOS bug, MP table errors detected!...\n");
+			printk(KERN_ERR "... disabling SMP support. "
+			       "(tell your hw vendor)\n");
 			return;
 		}
+
+		if (early)
+			return;
+#ifdef CONFIG_X86_IO_APIC
 		/*
 		 * If there are no explicit MP IRQ entries, then we are
 		 * broken.  We set up most of the low 16 IO-APIC pins to
@@ -697,7 +651,9 @@ void __init get_smp_config (void)
 		if (!mp_irq_entries) {
 			struct mpc_config_bus bus;
 
-			printk(KERN_ERR "BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n");
+			printk(KERN_ERR "BIOS bug, no explicit IRQ entries, "
+			       "using default mptable. "
+			       "(tell your hw vendor)\n");
 
 			bus.mpc_type = MP_BUS;
 			bus.mpc_busid = 0;
@@ -706,36 +662,51 @@ void __init get_smp_config (void)
 
 			construct_default_ioirq_mptable(0);
 		}
-
+#endif
 	} else
 		BUG();
 
-	printk(KERN_INFO "Processors: %d\n", num_processors);
+	if (!early)
+		printk(KERN_INFO "Processors: %d\n", num_processors);
 	/*
 	 * Only use the first configuration found.
 	 */
 }
 
-static int __init smp_scan_config (unsigned long base, unsigned long length)
+void __init early_get_smp_config(void)
+{
+	__get_smp_config(1);
+}
+
+void __init get_smp_config(void)
 {
-	unsigned long *bp = phys_to_virt(base);
+	__get_smp_config(0);
+}
+
+static int __init smp_scan_config(unsigned long base, unsigned long length,
+				  unsigned reserve)
+{
+	extern void __bad_mpf_size(void);
+	unsigned int *bp = phys_to_virt(base);
 	struct intel_mp_floating *mpf;
 
-	printk(KERN_INFO "Scan SMP from %p for %ld bytes.\n", bp,length);
+	Dprintk("Scan SMP from %p for %ld bytes.\n", bp, length);
 	if (sizeof(*mpf) != 16)
-		printk("Error: MPF size\n");
+		__bad_mpf_size();
 
 	while (length > 0) {
 		mpf = (struct intel_mp_floating *)bp;
 		if ((*bp == SMP_MAGIC_IDENT) &&
-			(mpf->mpf_length == 1) &&
-			!mpf_checksum((unsigned char *)bp, 16) &&
-			((mpf->mpf_specification == 1)
-				|| (mpf->mpf_specification == 4)) ) {
+		    (mpf->mpf_length == 1) &&
+		    !mpf_checksum((unsigned char *)bp, 16) &&
+		    ((mpf->mpf_specification == 1)
+		     || (mpf->mpf_specification == 4))) {
 
 			smp_found_config = 1;
+			mpf_found = mpf;
+#ifdef CONFIG_X86_32
 			printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
-				mpf, virt_to_phys(mpf));
+			       mpf, virt_to_phys(mpf));
 			reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE,
 					BOOTMEM_DEFAULT);
 			if (mpf->mpf_physptr) {
@@ -756,8 +727,16 @@ static int __init smp_scan_config (unsigned long base, unsigned long length)
 						BOOTMEM_DEFAULT);
 			}
 
-			mpf_found = mpf;
-			return 1;
+#else
+			if (!reserve)
+				return 1;
+
+			reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE);
+			if (mpf->mpf_physptr)
+				reserve_bootmem_generic(mpf->mpf_physptr,
+							PAGE_SIZE);
+#endif
+		return 1;
 		}
 		bp += 4;
 		length -= 16;
@@ -765,7 +744,7 @@ static int __init smp_scan_config (unsigned long base, unsigned long length)
 	return 0;
 }
 
-void __init find_smp_config (void)
+static void __init __find_smp_config(unsigned reserve)
 {
 	unsigned int address;
 
@@ -777,9 +756,9 @@ void __init find_smp_config (void)
 	 * 2) Scan the top 1K of base RAM
 	 * 3) Scan the 64K of bios
 	 */
-	if (smp_scan_config(0x0,0x400) ||
-		smp_scan_config(639*0x400,0x400) ||
-			smp_scan_config(0xF0000,0x10000))
+	if (smp_scan_config(0x0, 0x400, reserve) ||
+	    smp_scan_config(639 * 0x400, 0x400, reserve) ||
+	    smp_scan_config(0xF0000, 0x10000, reserve))
 		return;
 	/*
 	 * If it is an SMP machine we should know now, unless the
@@ -800,144 +779,113 @@ void __init find_smp_config (void)
 
 	address = get_bios_ebda();
 	if (address)
-		smp_scan_config(address, 0x400);
+		smp_scan_config(address, 0x400, reserve);
 }
 
-int es7000_plat;
-
-/* --------------------------------------------------------------------------
-                            ACPI-based MP Configuration
-   -------------------------------------------------------------------------- */
-
-#ifdef CONFIG_ACPI
-
-void __init mp_register_lapic_address(u64 address)
+void __init early_find_smp_config(void)
 {
-	mp_lapic_addr = (unsigned long) address;
-
-	set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
-
-	if (boot_cpu_physical_apicid == -1U)
-		boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
-
-	Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
+	__find_smp_config(0);
 }
 
-void __cpuinit mp_register_lapic (u8 id, u8 enabled)
+void __init find_smp_config(void)
 {
-	struct mpc_config_processor processor;
-	int boot_cpu = 0;
-	
-	if (MAX_APICS - id <= 0) {
-		printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
-			id, MAX_APICS);
-		return;
-	}
-
-	if (id == boot_cpu_physical_apicid)
-		boot_cpu = 1;
+	__find_smp_config(1);
+}
 
-	processor.mpc_type = MP_PROCESSOR;
-	processor.mpc_apicid = id;
-	processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR));
-	processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
-	processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
-	processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) | 
-		(boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
-	processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
-	processor.mpc_reserved[0] = 0;
-	processor.mpc_reserved[1] = 0;
+/* --------------------------------------------------------------------------
+                            ACPI-based MP Configuration
+   -------------------------------------------------------------------------- */
 
-	MP_processor_info(&processor);
-}
+#ifdef CONFIG_ACPI
 
 #ifdef	CONFIG_X86_IO_APIC
 
 #define MP_ISA_BUS		0
 #define MP_MAX_IOAPIC_PIN	127
 
-static struct mp_ioapic_routing {
-	int			apic_id;
-	int			gsi_base;
-	int			gsi_end;
-	u32			pin_programmed[4];
-} mp_ioapic_routing[MAX_IO_APICS];
+extern struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS];
 
-static int mp_find_ioapic (int gsi)
+static int mp_find_ioapic(int gsi)
 {
 	int i = 0;
 
 	/* Find the IOAPIC that manages this GSI. */
 	for (i = 0; i < nr_ioapics; i++) {
 		if ((gsi >= mp_ioapic_routing[i].gsi_base)
-			&& (gsi <= mp_ioapic_routing[i].gsi_end))
+		    && (gsi <= mp_ioapic_routing[i].gsi_end))
 			return i;
 	}
 
 	printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
-
 	return -1;
 }
 
-void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
+static u8 uniq_ioapic_id(u8 id)
+{
+#ifdef CONFIG_X86_32
+	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
+	    !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+		return io_apic_get_unique_id(nr_ioapics, id);
+	else
+		return id;
+#else
+	int i;
+	DECLARE_BITMAP(used, 256);
+	bitmap_zero(used, 256);
+	for (i = 0; i < nr_ioapics; i++) {
+		struct mpc_config_ioapic *ia = &mp_ioapics[i];
+		__set_bit(ia->mpc_apicid, used);
+	}
+	if (!test_bit(id, used))
+		return id;
+	return find_first_zero_bit(used, 256);
+#endif
+}
+
+void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
 {
 	int idx = 0;
-	int tmpid;
 
-	if (nr_ioapics >= MAX_IO_APICS) {
-		printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
-			"(found %d)\n", MAX_IO_APICS, nr_ioapics);
-		panic("Recompile kernel with bigger MAX_IO_APICS!\n");
-	}
-	if (!address) {
-		printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
-			" found in MADT table, skipping!\n");
+	if (bad_ioapic(address))
 		return;
-	}
 
-	idx = nr_ioapics++;
+	idx = nr_ioapics;
 
 	mp_ioapics[idx].mpc_type = MP_IOAPIC;
 	mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
 	mp_ioapics[idx].mpc_apicaddr = address;
 
 	set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
-	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
-		&& !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
-		tmpid = io_apic_get_unique_id(idx, id);
-	else
-		tmpid = id;
-	if (tmpid == -1) {
-		nr_ioapics--;
-		return;
-	}
-	mp_ioapics[idx].mpc_apicid = tmpid;
+	mp_ioapics[idx].mpc_apicid = uniq_ioapic_id(id);
+#ifdef CONFIG_X86_32
 	mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
-	
-	/* 
+#else
+	mp_ioapics[idx].mpc_apicver = 0;
+#endif
+	/*
 	 * Build basic GSI lookup table to facilitate gsi->io_apic lookups
 	 * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
 	 */
 	mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid;
 	mp_ioapic_routing[idx].gsi_base = gsi_base;
 	mp_ioapic_routing[idx].gsi_end = gsi_base +
-		io_apic_get_redir_entries(idx);
+	    io_apic_get_redir_entries(idx);
 
-	printk("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
+	printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
 	       "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid,
 	       mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
-	       mp_ioapic_routing[idx].gsi_base,
-	       mp_ioapic_routing[idx].gsi_end);
+	       mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end);
+
+	nr_ioapics++;
 }
 
-void __init
-mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
+void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
 {
 	struct mpc_config_intsrc intsrc;
-	int			ioapic = -1;
-	int			pin = -1;
+	int ioapic = -1;
+	int pin = -1;
 
-	/* 
+	/*
 	 * Convert 'gsi' to 'ioapic.pin'.
 	 */
 	ioapic = mp_find_ioapic(gsi);
@@ -947,7 +895,7 @@ mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
 
 	/*
 	 * TBD: This check is for faulty timer entries, where the override
-	 *      erroneously sets the trigger to level, resulting in a HUGE 
+	 *      erroneously sets the trigger to level, resulting in a HUGE
 	 *      increase of timer interrupts!
 	 */
 	if ((bus_irq == 0) && (trigger == 3))
@@ -957,13 +905,13 @@ mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
 	intsrc.mpc_irqtype = mp_INT;
 	intsrc.mpc_irqflag = (trigger << 2) | polarity;
 	intsrc.mpc_srcbus = MP_ISA_BUS;
-	intsrc.mpc_srcbusirq = bus_irq;				       /* IRQ */
-	intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;	   /* APIC ID */
-	intsrc.mpc_dstirq = pin;				    /* INTIN# */
+	intsrc.mpc_srcbusirq = bus_irq;	/* IRQ */
+	intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;	/* APIC ID */
+	intsrc.mpc_dstirq = pin;	/* INTIN# */
 
 	Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n",
-		intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, 
-		(intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, 
+		intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3,
+		(intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus,
 		intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq);
 
 	mp_irqs[mp_irq_entries] = intsrc;
@@ -971,16 +919,21 @@ mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
 		panic("Max # of irq sources exceeded!\n");
 }
 
-void __init mp_config_acpi_legacy_irqs (void)
+int es7000_plat;
+
+void __init mp_config_acpi_legacy_irqs(void)
 {
 	struct mpc_config_intsrc intsrc;
 	int i = 0;
 	int ioapic = -1;
 
-	/* 
+#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
+	/*
 	 * Fabricate the legacy ISA bus (bus #31).
 	 */
 	mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
+#endif
+	set_bit(MP_ISA_BUS, mp_bus_not_pci);
 	Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
 
 	/*
@@ -989,19 +942,20 @@ void __init mp_config_acpi_legacy_irqs (void)
 	if (es7000_plat == 1)
 		return;
 
-	/* 
-	 * Locate the IOAPIC that manages the ISA IRQs (0-15). 
+	/*
+	 * Locate the IOAPIC that manages the ISA IRQs (0-15).
 	 */
 	ioapic = mp_find_ioapic(0);
 	if (ioapic < 0)
 		return;
 
 	intsrc.mpc_type = MP_INTSRC;
-	intsrc.mpc_irqflag = 0;					/* Conforming */
+	intsrc.mpc_irqflag = 0;	/* Conforming */
 	intsrc.mpc_srcbus = MP_ISA_BUS;
+#ifdef CONFIG_X86_IO_APIC
 	intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;
-
-	/* 
+#endif
+	/*
 	 * Use the default configuration for the IRQs 0-15.  Unless
 	 * overridden by (MADT) interrupt source override entries.
 	 */
@@ -1012,28 +966,29 @@ void __init mp_config_acpi_legacy_irqs (void)
 			struct mpc_config_intsrc *irq = mp_irqs + idx;
 
 			/* Do we already have a mapping for this ISA IRQ? */
-			if (irq->mpc_srcbus == MP_ISA_BUS && irq->mpc_srcbusirq == i)
+			if (irq->mpc_srcbus == MP_ISA_BUS
+			    && irq->mpc_srcbusirq == i)
 				break;
 
 			/* Do we already have a mapping for this IOAPIC pin */
 			if ((irq->mpc_dstapic == intsrc.mpc_dstapic) &&
-				(irq->mpc_dstirq == i))
+			    (irq->mpc_dstirq == i))
 				break;
 		}
 
 		if (idx != mp_irq_entries) {
 			printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
-			continue;			/* IRQ already used */
+			continue;	/* IRQ already used */
 		}
 
 		intsrc.mpc_irqtype = mp_INT;
-		intsrc.mpc_srcbusirq = i;		   /* Identity mapped */
+		intsrc.mpc_srcbusirq = i;	/* Identity mapped */
 		intsrc.mpc_dstirq = i;
 
 		Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, "
-			"%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, 
-			(intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, 
-			intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, 
+			"%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3,
+			(intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus,
+			intsrc.mpc_srcbusirq, intsrc.mpc_dstapic,
 			intsrc.mpc_dstirq);
 
 		mp_irqs[mp_irq_entries] = intsrc;
@@ -1042,21 +997,27 @@ void __init mp_config_acpi_legacy_irqs (void)
 	}
 }
 
-#define MAX_GSI_NUM	4096
-#define IRQ_COMPRESSION_START	64
-
 int mp_register_gsi(u32 gsi, int triggering, int polarity)
 {
 	int ioapic = -1;
 	int ioapic_pin = 0;
 	int idx, bit = 0;
+#ifdef CONFIG_X86_32
+#define MAX_GSI_NUM	4096
+#define IRQ_COMPRESSION_START	64
+
 	static int pci_irq = IRQ_COMPRESSION_START;
 	/*
 	 * Mapping between Global System Interrupts, which
 	 * represent all possible interrupts, and IRQs
 	 * assigned to actual devices.
 	 */
-	static int		gsi_to_irq[MAX_GSI_NUM];
+	static int gsi_to_irq[MAX_GSI_NUM];
+#else
+
+	if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
+		return gsi;
+#endif
 
 	/* Don't set up the ACPI SCI because it's already set up */
 	if (acpi_gbl_FADT.sci_interrupt == gsi)
@@ -1070,11 +1031,13 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity)
 
 	ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
 
+#ifdef CONFIG_X86_32
 	if (ioapic_renumber_irq)
 		gsi = ioapic_renumber_irq(ioapic, gsi);
+#endif
 
-	/* 
-	 * Avoid pin reprogramming.  PRTs typically include entries  
+	/*
+	 * Avoid pin reprogramming.  PRTs typically include entries
 	 * with redundant pin->gsi mappings (but unique PCI devices);
 	 * we only program the IOAPIC on the first.
 	 */
@@ -1082,23 +1045,27 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity)
 	idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32);
 	if (idx > 3) {
 		printk(KERN_ERR "Invalid reference to IOAPIC pin "
-			"%d-%d\n", mp_ioapic_routing[ioapic].apic_id, 
-			ioapic_pin);
+		       "%d-%d\n", mp_ioapic_routing[ioapic].apic_id,
+		       ioapic_pin);
 		return gsi;
 	}
-	if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
+	if ((1 << bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
 		Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
 			mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
+#ifdef CONFIG_X86_32
 		return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]);
+#else
+		return gsi;
+#endif
 	}
 
-	mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
-
+	mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1 << bit);
+#ifdef CONFIG_X86_32
 	/*
 	 * For GSI >= 64, use IRQ compression
 	 */
 	if ((gsi >= IRQ_COMPRESSION_START)
-		&& (triggering == ACPI_LEVEL_SENSITIVE)) {
+	    && (triggering == ACPI_LEVEL_SENSITIVE)) {
 		/*
 		 * For PCI devices assign IRQs in order, avoiding gaps
 		 * due to unused I/O APIC pins.
@@ -1115,8 +1082,7 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity)
 			 * So test for this condition, and if necessary, avoid
 			 * the pin collision.
 			 */
-			if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0))
-				gsi = pci_irq++;
+			gsi = pci_irq++;
 			/*
 			 * Don't assign IRQ used by ACPI SCI
 			 */
@@ -1128,10 +1094,10 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity)
 			return gsi;
 		}
 	}
-
+#endif
 	io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
-		    triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
-		    polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
+				triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
+				polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
 	return gsi;
 }
 
diff --git a/arch/x86/kernel/mpparse_64.c b/arch/x86/kernel/mpparse_64.c
deleted file mode 100644
index 72ab1403fed7..000000000000
--- a/arch/x86/kernel/mpparse_64.c
+++ /dev/null
@@ -1,867 +0,0 @@
-/*
- *	Intel Multiprocessor Specification 1.1 and 1.4
- *	compliant MP-table parsing routines.
- *
- *	(c) 1995 Alan Cox, Building #3 <alan@redhat.com>
- *	(c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
- *
- *	Fixes
- *		Erich Boleyn	:	MP v1.4 and additional changes.
- *		Alan Cox	:	Added EBDA scanning
- *		Ingo Molnar	:	various cleanups and rewrites
- *		Maciej W. Rozycki:	Bits for default MP configurations
- *		Paul Diefenbaugh:	Added full ACPI support
- */
-
-#include <linux/mm.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/bootmem.h>
-#include <linux/kernel_stat.h>
-#include <linux/mc146818rtc.h>
-#include <linux/acpi.h>
-#include <linux/module.h>
-
-#include <asm/smp.h>
-#include <asm/mtrr.h>
-#include <asm/mpspec.h>
-#include <asm/pgalloc.h>
-#include <asm/io_apic.h>
-#include <asm/proto.h>
-#include <asm/acpi.h>
-
-/* Have we found an MP table */
-int smp_found_config;
-
-/*
- * Various Linux-internal data structures created from the
- * MP-table.
- */
-DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
-int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
-
-static int mp_current_pci_id = 0;
-/* I/O APIC entries */
-struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
-
-/* # of MP IRQ source entries */
-struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
-
-/* MP IRQ source entries */
-int mp_irq_entries;
-
-int nr_ioapics;
-unsigned long mp_lapic_addr = 0;
-
-
-
-/* Processor that is doing the boot up */
-unsigned int boot_cpu_id = -1U;
-EXPORT_SYMBOL(boot_cpu_id);
-
-/* Internal processor count */
-unsigned int num_processors;
-
-unsigned disabled_cpus __cpuinitdata;
-
-/* Bitmask of physically existing CPUs */
-physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE;
-
-u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata
-				= { [0 ... NR_CPUS-1] = BAD_APICID };
-void *x86_bios_cpu_apicid_early_ptr;
-DEFINE_PER_CPU(u16, x86_bios_cpu_apicid) = BAD_APICID;
-EXPORT_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
-
-
-/*
- * Intel MP BIOS table parsing routines:
- */
-
-/*
- * Checksum an MP configuration block.
- */
-
-static int __init mpf_checksum(unsigned char *mp, int len)
-{
-	int sum = 0;
-
-	while (len--)
-		sum += *mp++;
-
-	return sum & 0xFF;
-}
-
-static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
-{
-	int cpu;
-	cpumask_t tmp_map;
-	char *bootup_cpu = "";
-
-	if (!(m->mpc_cpuflag & CPU_ENABLED)) {
-		disabled_cpus++;
-		return;
-	}
-	if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
-		bootup_cpu = " (Bootup-CPU)";
-		boot_cpu_id = m->mpc_apicid;
-	}
-
-	printk(KERN_INFO "Processor #%d%s\n", m->mpc_apicid, bootup_cpu);
-
-	if (num_processors >= NR_CPUS) {
-		printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
-			" Processor ignored.\n", NR_CPUS);
-		return;
-	}
-
-	num_processors++;
-	cpus_complement(tmp_map, cpu_present_map);
-	cpu = first_cpu(tmp_map);
-
-	physid_set(m->mpc_apicid, phys_cpu_present_map);
- 	if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
- 		/*
-		 * x86_bios_cpu_apicid is required to have processors listed
- 		 * in same order as logical cpu numbers. Hence the first
- 		 * entry is BSP, and so on.
- 		 */
-		cpu = 0;
- 	}
-	/* are we being called early in kernel startup? */
-	if (x86_cpu_to_apicid_early_ptr) {
-		u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr;
-		u16 *bios_cpu_apicid = x86_bios_cpu_apicid_early_ptr;
-
-		cpu_to_apicid[cpu] = m->mpc_apicid;
-		bios_cpu_apicid[cpu] = m->mpc_apicid;
-	} else {
-		per_cpu(x86_cpu_to_apicid, cpu) = m->mpc_apicid;
-		per_cpu(x86_bios_cpu_apicid, cpu) = m->mpc_apicid;
-	}
-
-	cpu_set(cpu, cpu_possible_map);
-	cpu_set(cpu, cpu_present_map);
-}
-
-static void __init MP_bus_info (struct mpc_config_bus *m)
-{
-	char str[7];
-
-	memcpy(str, m->mpc_bustype, 6);
-	str[6] = 0;
-	Dprintk("Bus #%d is %s\n", m->mpc_busid, str);
-
-	if (strncmp(str, "ISA", 3) == 0) {
-		set_bit(m->mpc_busid, mp_bus_not_pci);
-	} else if (strncmp(str, "PCI", 3) == 0) {
-		clear_bit(m->mpc_busid, mp_bus_not_pci);
-		mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
-		mp_current_pci_id++;
-	} else {
-		printk(KERN_ERR "Unknown bustype %s\n", str);
-	}
-}
-
-static int bad_ioapic(unsigned long address)
-{
-	if (nr_ioapics >= MAX_IO_APICS) {
-		printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
-			"(found %d)\n", MAX_IO_APICS, nr_ioapics);
-		panic("Recompile kernel with bigger MAX_IO_APICS!\n");
-	}
-	if (!address) {
-		printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
-			" found in table, skipping!\n");
-		return 1;
-	}
-	return 0;
-}
-
-static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
-{
-	if (!(m->mpc_flags & MPC_APIC_USABLE))
-		return;
-
-	printk("I/O APIC #%d at 0x%X.\n",
-		m->mpc_apicid, m->mpc_apicaddr);
-
-	if (bad_ioapic(m->mpc_apicaddr))
-		return;
-
-	mp_ioapics[nr_ioapics] = *m;
-	nr_ioapics++;
-}
-
-static void __init MP_intsrc_info (struct mpc_config_intsrc *m)
-{
-	mp_irqs [mp_irq_entries] = *m;
-	Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
-		" IRQ %02x, APIC ID %x, APIC INT %02x\n",
-			m->mpc_irqtype, m->mpc_irqflag & 3,
-			(m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
-			m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
-	if (++mp_irq_entries >= MAX_IRQ_SOURCES)
-		panic("Max # of irq sources exceeded!!\n");
-}
-
-static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m)
-{
-	Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
-		" IRQ %02x, APIC ID %x, APIC LINT %02x\n",
-			m->mpc_irqtype, m->mpc_irqflag & 3,
-			(m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
-			m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
-}
-
-/*
- * Read/parse the MPC
- */
-
-static int __init smp_read_mpc(struct mp_config_table *mpc)
-{
-	char str[16];
-	int count=sizeof(*mpc);
-	unsigned char *mpt=((unsigned char *)mpc)+count;
-
-	if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
-		printk("MPTABLE: bad signature [%c%c%c%c]!\n",
-			mpc->mpc_signature[0],
-			mpc->mpc_signature[1],
-			mpc->mpc_signature[2],
-			mpc->mpc_signature[3]);
-		return 0;
-	}
-	if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
-		printk("MPTABLE: checksum error!\n");
-		return 0;
-	}
-	if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
-		printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n",
-			mpc->mpc_spec);
-		return 0;
-	}
-	if (!mpc->mpc_lapic) {
-		printk(KERN_ERR "MPTABLE: null local APIC address!\n");
-		return 0;
-	}
-	memcpy(str,mpc->mpc_oem,8);
-	str[8] = 0;
-	printk(KERN_INFO "MPTABLE: OEM ID: %s ",str);
-
-	memcpy(str,mpc->mpc_productid,12);
-	str[12] = 0;
-	printk("MPTABLE: Product ID: %s ",str);
-
-	printk("MPTABLE: APIC at: 0x%X\n",mpc->mpc_lapic);
-
-	/* save the local APIC address, it might be non-default */
-	if (!acpi_lapic)
-		mp_lapic_addr = mpc->mpc_lapic;
-
-	/*
-	 *	Now process the configuration blocks.
-	 */
-	while (count < mpc->mpc_length) {
-		switch(*mpt) {
-			case MP_PROCESSOR:
-			{
-				struct mpc_config_processor *m=
-					(struct mpc_config_processor *)mpt;
-				if (!acpi_lapic)
-					MP_processor_info(m);
-				mpt += sizeof(*m);
-				count += sizeof(*m);
-				break;
-			}
-			case MP_BUS:
-			{
-				struct mpc_config_bus *m=
-					(struct mpc_config_bus *)mpt;
-				MP_bus_info(m);
-				mpt += sizeof(*m);
-				count += sizeof(*m);
-				break;
-			}
-			case MP_IOAPIC:
-			{
-				struct mpc_config_ioapic *m=
-					(struct mpc_config_ioapic *)mpt;
-				MP_ioapic_info(m);
-				mpt += sizeof(*m);
-				count += sizeof(*m);
-				break;
-			}
-			case MP_INTSRC:
-			{
-				struct mpc_config_intsrc *m=
-					(struct mpc_config_intsrc *)mpt;
-
-				MP_intsrc_info(m);
-				mpt += sizeof(*m);
-				count += sizeof(*m);
-				break;
-			}
-			case MP_LINTSRC:
-			{
-				struct mpc_config_lintsrc *m=
-					(struct mpc_config_lintsrc *)mpt;
-				MP_lintsrc_info(m);
-				mpt += sizeof(*m);
-				count += sizeof(*m);
-				break;
-			}
-		}
-	}
-	setup_apic_routing();
-	if (!num_processors)
-		printk(KERN_ERR "MPTABLE: no processors registered!\n");
-	return num_processors;
-}
-
-static int __init ELCR_trigger(unsigned int irq)
-{
-	unsigned int port;
-
-	port = 0x4d0 + (irq >> 3);
-	return (inb(port) >> (irq & 7)) & 1;
-}
-
-static void __init construct_default_ioirq_mptable(int mpc_default_type)
-{
-	struct mpc_config_intsrc intsrc;
-	int i;
-	int ELCR_fallback = 0;
-
-	intsrc.mpc_type = MP_INTSRC;
-	intsrc.mpc_irqflag = 0;			/* conforming */
-	intsrc.mpc_srcbus = 0;
-	intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
-
-	intsrc.mpc_irqtype = mp_INT;
-
-	/*
-	 *  If true, we have an ISA/PCI system with no IRQ entries
-	 *  in the MP table. To prevent the PCI interrupts from being set up
-	 *  incorrectly, we try to use the ELCR. The sanity check to see if
-	 *  there is good ELCR data is very simple - IRQ0, 1, 2 and 13 can
-	 *  never be level sensitive, so we simply see if the ELCR agrees.
-	 *  If it does, we assume it's valid.
-	 */
-	if (mpc_default_type == 5) {
-		printk(KERN_INFO "ISA/PCI bus type with no IRQ information... falling back to ELCR\n");
-
-		if (ELCR_trigger(0) || ELCR_trigger(1) || ELCR_trigger(2) || ELCR_trigger(13))
-			printk(KERN_ERR "ELCR contains invalid data... not using ELCR\n");
-		else {
-			printk(KERN_INFO "Using ELCR to identify PCI interrupts\n");
-			ELCR_fallback = 1;
-		}
-	}
-
-	for (i = 0; i < 16; i++) {
-		switch (mpc_default_type) {
-		case 2:
-			if (i == 0 || i == 13)
-				continue;	/* IRQ0 & IRQ13 not connected */
-			/* fall through */
-		default:
-			if (i == 2)
-				continue;	/* IRQ2 is never connected */
-		}
-
-		if (ELCR_fallback) {
-			/*
-			 *  If the ELCR indicates a level-sensitive interrupt, we
-			 *  copy that information over to the MP table in the
-			 *  irqflag field (level sensitive, active high polarity).
-			 */
-			if (ELCR_trigger(i))
-				intsrc.mpc_irqflag = 13;
-			else
-				intsrc.mpc_irqflag = 0;
-		}
-
-		intsrc.mpc_srcbusirq = i;
-		intsrc.mpc_dstirq = i ? i : 2;		/* IRQ0 to INTIN2 */
-		MP_intsrc_info(&intsrc);
-	}
-
-	intsrc.mpc_irqtype = mp_ExtINT;
-	intsrc.mpc_srcbusirq = 0;
-	intsrc.mpc_dstirq = 0;				/* 8259A to INTIN0 */
-	MP_intsrc_info(&intsrc);
-}
-
-static inline void __init construct_default_ISA_mptable(int mpc_default_type)
-{
-	struct mpc_config_processor processor;
-	struct mpc_config_bus bus;
-	struct mpc_config_ioapic ioapic;
-	struct mpc_config_lintsrc lintsrc;
-	int linttypes[2] = { mp_ExtINT, mp_NMI };
-	int i;
-
-	/*
-	 * local APIC has default address
-	 */
-	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-
-	/*
-	 * 2 CPUs, numbered 0 & 1.
-	 */
-	processor.mpc_type = MP_PROCESSOR;
-	processor.mpc_apicver = 0;
-	processor.mpc_cpuflag = CPU_ENABLED;
-	processor.mpc_cpufeature = 0;
-	processor.mpc_featureflag = 0;
-	processor.mpc_reserved[0] = 0;
-	processor.mpc_reserved[1] = 0;
-	for (i = 0; i < 2; i++) {
-		processor.mpc_apicid = i;
-		MP_processor_info(&processor);
-	}
-
-	bus.mpc_type = MP_BUS;
-	bus.mpc_busid = 0;
-	switch (mpc_default_type) {
-		default:
-			printk(KERN_ERR "???\nUnknown standard configuration %d\n",
-				mpc_default_type);
-			/* fall through */
-		case 1:
-		case 5:
-			memcpy(bus.mpc_bustype, "ISA   ", 6);
-			break;
-	}
-	MP_bus_info(&bus);
-	if (mpc_default_type > 4) {
-		bus.mpc_busid = 1;
-		memcpy(bus.mpc_bustype, "PCI   ", 6);
-		MP_bus_info(&bus);
-	}
-
-	ioapic.mpc_type = MP_IOAPIC;
-	ioapic.mpc_apicid = 2;
-	ioapic.mpc_apicver = 0;
-	ioapic.mpc_flags = MPC_APIC_USABLE;
-	ioapic.mpc_apicaddr = 0xFEC00000;
-	MP_ioapic_info(&ioapic);
-
-	/*
-	 * We set up most of the low 16 IO-APIC pins according to MPS rules.
-	 */
-	construct_default_ioirq_mptable(mpc_default_type);
-
-	lintsrc.mpc_type = MP_LINTSRC;
-	lintsrc.mpc_irqflag = 0;		/* conforming */
-	lintsrc.mpc_srcbusid = 0;
-	lintsrc.mpc_srcbusirq = 0;
-	lintsrc.mpc_destapic = MP_APIC_ALL;
-	for (i = 0; i < 2; i++) {
-		lintsrc.mpc_irqtype = linttypes[i];
-		lintsrc.mpc_destapiclint = i;
-		MP_lintsrc_info(&lintsrc);
-	}
-}
-
-static struct intel_mp_floating *mpf_found;
-
-/*
- * Scan the memory blocks for an SMP configuration block.
- */
-void __init get_smp_config (void)
-{
-	struct intel_mp_floating *mpf = mpf_found;
-
-	/*
- 	 * ACPI supports both logical (e.g. Hyper-Threading) and physical 
- 	 * processors, where MPS only supports physical.
- 	 */
- 	if (acpi_lapic && acpi_ioapic) {
- 		printk(KERN_INFO "Using ACPI (MADT) for SMP configuration information\n");
- 		return;
-	}
- 	else if (acpi_lapic)
- 		printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n");
-
-	printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
-
-	/*
-	 * Now see if we need to read further.
-	 */
-	if (mpf->mpf_feature1 != 0) {
-
-		printk(KERN_INFO "Default MP configuration #%d\n", mpf->mpf_feature1);
-		construct_default_ISA_mptable(mpf->mpf_feature1);
-
-	} else if (mpf->mpf_physptr) {
-
-		/*
-		 * Read the physical hardware table.  Anything here will
-		 * override the defaults.
-		 */
-		if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr))) {
-			smp_found_config = 0;
-			printk(KERN_ERR "BIOS bug, MP table errors detected!...\n");
-			printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n");
-			return;
-		}
-		/*
-		 * If there are no explicit MP IRQ entries, then we are
-		 * broken.  We set up most of the low 16 IO-APIC pins to
-		 * ISA defaults and hope it will work.
-		 */
-		if (!mp_irq_entries) {
-			struct mpc_config_bus bus;
-
-			printk(KERN_ERR "BIOS bug, no explicit IRQ entries, using default mptable. (tell your hw vendor)\n");
-
-			bus.mpc_type = MP_BUS;
-			bus.mpc_busid = 0;
-			memcpy(bus.mpc_bustype, "ISA   ", 6);
-			MP_bus_info(&bus);
-
-			construct_default_ioirq_mptable(0);
-		}
-
-	} else
-		BUG();
-
-	printk(KERN_INFO "Processors: %d\n", num_processors);
-	/*
-	 * Only use the first configuration found.
-	 */
-}
-
-static int __init smp_scan_config (unsigned long base, unsigned long length)
-{
-	extern void __bad_mpf_size(void); 
-	unsigned int *bp = phys_to_virt(base);
-	struct intel_mp_floating *mpf;
-
-	Dprintk("Scan SMP from %p for %ld bytes.\n", bp,length);
-	if (sizeof(*mpf) != 16)
-		__bad_mpf_size();
-
-	while (length > 0) {
-		mpf = (struct intel_mp_floating *)bp;
-		if ((*bp == SMP_MAGIC_IDENT) &&
-			(mpf->mpf_length == 1) &&
-			!mpf_checksum((unsigned char *)bp, 16) &&
-			((mpf->mpf_specification == 1)
-				|| (mpf->mpf_specification == 4)) ) {
-
-			smp_found_config = 1;
-			reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE);
-			if (mpf->mpf_physptr)
-				reserve_bootmem_generic(mpf->mpf_physptr, PAGE_SIZE);
-			mpf_found = mpf;
-			return 1;
-		}
-		bp += 4;
-		length -= 16;
-	}
-	return 0;
-}
-
-void __init find_smp_config(void)
-{
-	unsigned int address;
-
-	/*
-	 * FIXME: Linux assumes you have 640K of base ram..
-	 * this continues the error...
-	 *
-	 * 1) Scan the bottom 1K for a signature
-	 * 2) Scan the top 1K of base RAM
-	 * 3) Scan the 64K of bios
-	 */
-	if (smp_scan_config(0x0,0x400) ||
-		smp_scan_config(639*0x400,0x400) ||
-			smp_scan_config(0xF0000,0x10000))
-		return;
-	/*
-	 * If it is an SMP machine we should know now.
-	 *
-	 * there is a real-mode segmented pointer pointing to the
-	 * 4K EBDA area at 0x40E, calculate and scan it here.
-	 *
-	 * NOTE! There are Linux loaders that will corrupt the EBDA
-	 * area, and as such this kind of SMP config may be less
-	 * trustworthy, simply because the SMP table may have been
-	 * stomped on during early boot. These loaders are buggy and
-	 * should be fixed.
-	 */
-
-	address = *(unsigned short *)phys_to_virt(0x40E);
-	address <<= 4;
-	if (smp_scan_config(address, 0x1000))
-		return;
-
-	/* If we have come this far, we did not find an MP table  */
-	 printk(KERN_INFO "No mptable found.\n");
-}
-
-/* --------------------------------------------------------------------------
-                            ACPI-based MP Configuration
-   -------------------------------------------------------------------------- */
-
-#ifdef CONFIG_ACPI
-
-void __init mp_register_lapic_address(u64 address)
-{
-	mp_lapic_addr = (unsigned long) address;
-	set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
-	if (boot_cpu_id == -1U)
-		boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID));
-}
-
-void __cpuinit mp_register_lapic (u8 id, u8 enabled)
-{
-	struct mpc_config_processor processor;
-	int			boot_cpu = 0;
-	
-	if (id == boot_cpu_id)
-		boot_cpu = 1;
-
-	processor.mpc_type = MP_PROCESSOR;
-	processor.mpc_apicid = id;
-	processor.mpc_apicver = 0;
-	processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
-	processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
-	processor.mpc_cpufeature = 0;
-	processor.mpc_featureflag = 0;
-	processor.mpc_reserved[0] = 0;
-	processor.mpc_reserved[1] = 0;
-
-	MP_processor_info(&processor);
-}
-
-#define MP_ISA_BUS		0
-#define MP_MAX_IOAPIC_PIN	127
-
-static struct mp_ioapic_routing {
-	int			apic_id;
-	int			gsi_start;
-	int			gsi_end;
-	u32			pin_programmed[4];
-} mp_ioapic_routing[MAX_IO_APICS];
-
-static int mp_find_ioapic(int gsi)
-{
-	int i = 0;
-
-	/* Find the IOAPIC that manages this GSI. */
-	for (i = 0; i < nr_ioapics; i++) {
-		if ((gsi >= mp_ioapic_routing[i].gsi_start)
-			&& (gsi <= mp_ioapic_routing[i].gsi_end))
-			return i;
-	}
-
-	printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
-	return -1;
-}
-
-static u8 uniq_ioapic_id(u8 id)
-{
-	int i;
-	DECLARE_BITMAP(used, 256);
-	bitmap_zero(used, 256);
-	for (i = 0; i < nr_ioapics; i++) {
-		struct mpc_config_ioapic *ia = &mp_ioapics[i];
-		__set_bit(ia->mpc_apicid, used);
-	}
-	if (!test_bit(id, used))
-		return id;
-	return find_first_zero_bit(used, 256);
-}
-
-void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
-{
-	int idx = 0;
-
-	if (bad_ioapic(address))
-		return;
-
-	idx = nr_ioapics;
-
-	mp_ioapics[idx].mpc_type = MP_IOAPIC;
-	mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
-	mp_ioapics[idx].mpc_apicaddr = address;
-
-	set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
-	mp_ioapics[idx].mpc_apicid = uniq_ioapic_id(id);
-	mp_ioapics[idx].mpc_apicver = 0;
-	
-	/* 
-	 * Build basic IRQ lookup table to facilitate gsi->io_apic lookups
-	 * and to prevent reprogramming of IOAPIC pins (PCI IRQs).
-	 */
-	mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid;
-	mp_ioapic_routing[idx].gsi_start = gsi_base;
-	mp_ioapic_routing[idx].gsi_end = gsi_base + 
-		io_apic_get_redir_entries(idx);
-
-	printk(KERN_INFO "IOAPIC[%d]: apic_id %d, address 0x%x, "
-		"GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid, 
-		mp_ioapics[idx].mpc_apicaddr,
-		mp_ioapic_routing[idx].gsi_start,
-		mp_ioapic_routing[idx].gsi_end);
-
-	nr_ioapics++;
-}
-
-void __init
-mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32	gsi)
-{
-	struct mpc_config_intsrc intsrc;
-	int			ioapic = -1;
-	int			pin = -1;
-
-	/* 
-	 * Convert 'gsi' to 'ioapic.pin'.
-	 */
-	ioapic = mp_find_ioapic(gsi);
-	if (ioapic < 0)
-		return;
-	pin = gsi - mp_ioapic_routing[ioapic].gsi_start;
-
-	/*
-	 * TBD: This check is for faulty timer entries, where the override
-	 *      erroneously sets the trigger to level, resulting in a HUGE 
-	 *      increase of timer interrupts!
-	 */
-	if ((bus_irq == 0) && (trigger == 3))
-		trigger = 1;
-
-	intsrc.mpc_type = MP_INTSRC;
-	intsrc.mpc_irqtype = mp_INT;
-	intsrc.mpc_irqflag = (trigger << 2) | polarity;
-	intsrc.mpc_srcbus = MP_ISA_BUS;
-	intsrc.mpc_srcbusirq = bus_irq;				       /* IRQ */
-	intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;	   /* APIC ID */
-	intsrc.mpc_dstirq = pin;				    /* INTIN# */
-
-	Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n", 
-		intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, 
-		(intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, 
-		intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq);
-
-	mp_irqs[mp_irq_entries] = intsrc;
-	if (++mp_irq_entries == MAX_IRQ_SOURCES)
-		panic("Max # of irq sources exceeded!\n");
-}
-
-void __init mp_config_acpi_legacy_irqs(void)
-{
-	struct mpc_config_intsrc intsrc;
-	int i = 0;
-	int ioapic = -1;
-
-	/* 
-	 * Fabricate the legacy ISA bus (bus #31).
-	 */
-	set_bit(MP_ISA_BUS, mp_bus_not_pci);
-
-	/* 
-	 * Locate the IOAPIC that manages the ISA IRQs (0-15). 
-	 */
-	ioapic = mp_find_ioapic(0);
-	if (ioapic < 0)
-		return;
-
-	intsrc.mpc_type = MP_INTSRC;
-	intsrc.mpc_irqflag = 0;					/* Conforming */
-	intsrc.mpc_srcbus = MP_ISA_BUS;
-	intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;
-
-	/* 
-	 * Use the default configuration for the IRQs 0-15.  Unless
-	 * overridden by (MADT) interrupt source override entries.
-	 */
-	for (i = 0; i < 16; i++) {
-		int idx;
-
-		for (idx = 0; idx < mp_irq_entries; idx++) {
-			struct mpc_config_intsrc *irq = mp_irqs + idx;
-
-			/* Do we already have a mapping for this ISA IRQ? */
-			if (irq->mpc_srcbus == MP_ISA_BUS && irq->mpc_srcbusirq == i)
-				break;
-
-			/* Do we already have a mapping for this IOAPIC pin */
-			if ((irq->mpc_dstapic == intsrc.mpc_dstapic) &&
-				(irq->mpc_dstirq == i))
-				break;
-		}
-
-		if (idx != mp_irq_entries) {
-			printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
-			continue;			/* IRQ already used */
-		}
-
-		intsrc.mpc_irqtype = mp_INT;
-		intsrc.mpc_srcbusirq = i;		   /* Identity mapped */
-		intsrc.mpc_dstirq = i;
-
-		Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, "
-			"%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, 
-			(intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, 
-			intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, 
-			intsrc.mpc_dstirq);
-
-		mp_irqs[mp_irq_entries] = intsrc;
-		if (++mp_irq_entries == MAX_IRQ_SOURCES)
-			panic("Max # of irq sources exceeded!\n");
-	}
-}
-
-int mp_register_gsi(u32 gsi, int triggering, int polarity)
-{
-	int ioapic = -1;
-	int ioapic_pin = 0;
-	int idx, bit = 0;
-
-	if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
-		return gsi;
-
-	/* Don't set up the ACPI SCI because it's already set up */
-	if (acpi_gbl_FADT.sci_interrupt == gsi)
-		return gsi;
-
-	ioapic = mp_find_ioapic(gsi);
-	if (ioapic < 0) {
-		printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
-		return gsi;
-	}
-
-	ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_start;
-
-	/* 
-	 * Avoid pin reprogramming.  PRTs typically include entries  
-	 * with redundant pin->gsi mappings (but unique PCI devices);
-	 * we only program the IOAPIC on the first.
-	 */
-	bit = ioapic_pin % 32;
-	idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32);
-	if (idx > 3) {
-		printk(KERN_ERR "Invalid reference to IOAPIC pin "
-			"%d-%d\n", mp_ioapic_routing[ioapic].apic_id, 
-			ioapic_pin);
-		return gsi;
-	}
-	if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
-		Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
-			mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
-		return gsi;
-	}
-
-	mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
-
-	io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
-		triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
-		polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
-	return gsi;
-}
-#endif /*CONFIG_ACPI*/
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index af51ea8400b2..4dfb40530057 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -65,8 +65,8 @@ static loff_t msr_seek(struct file *file, loff_t offset, int orig)
 	return ret;
 }
 
-static ssize_t msr_read(struct file *file, char __user * buf,
-			size_t count, loff_t * ppos)
+static ssize_t msr_read(struct file *file, char __user *buf,
+			size_t count, loff_t *ppos)
 {
 	u32 __user *tmp = (u32 __user *) buf;
 	u32 data[2];
diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c
index 6a0aa7038685..8421d0ac6f22 100644
--- a/arch/x86/kernel/nmi_32.c
+++ b/arch/x86/kernel/nmi_32.c
@@ -22,9 +22,11 @@
 #include <linux/cpumask.h>
 #include <linux/kernel_stat.h>
 #include <linux/kdebug.h>
+#include <linux/slab.h>
 
 #include <asm/smp.h>
 #include <asm/nmi.h>
+#include <asm/timer.h>
 
 #include "mach_traps.h"
 
@@ -67,7 +69,7 @@ static __init void nmi_cpu_busy(void *data)
 }
 #endif
 
-static int __init check_nmi_watchdog(void)
+int __init check_nmi_watchdog(void)
 {
 	unsigned int *prev_nmi_count;
 	int cpu;
@@ -80,7 +82,7 @@ static int __init check_nmi_watchdog(void)
 
 	prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
 	if (!prev_nmi_count)
-		return -1;
+		goto error;
 
 	printk(KERN_INFO "Testing NMI watchdog ... ");
 
@@ -117,7 +119,7 @@ static int __init check_nmi_watchdog(void)
 	if (!atomic_read(&nmi_active)) {
 		kfree(prev_nmi_count);
 		atomic_set(&nmi_active, -1);
-		return -1;
+		goto error;
 	}
 	printk("OK.\n");
 
@@ -128,9 +130,11 @@ static int __init check_nmi_watchdog(void)
 
 	kfree(prev_nmi_count);
 	return 0;
+error:
+	timer_ack = !cpu_has_tsc;
+
+	return -1;
 }
-/* This needs to happen later in boot so counters are working */
-late_initcall(check_nmi_watchdog);
 
 static int __init setup_nmi_watchdog(char *str)
 {
diff --git a/arch/x86/kernel/nmi_64.c b/arch/x86/kernel/nmi_64.c
index 9a4fde74bee1..11f9130ac513 100644
--- a/arch/x86/kernel/nmi_64.c
+++ b/arch/x86/kernel/nmi_64.c
@@ -26,6 +26,8 @@
 #include <asm/proto.h>
 #include <asm/mce.h>
 
+#include <mach_traps.h>
+
 int unknown_nmi_panic;
 int nmi_watchdog_enabled;
 int panic_on_unrecovered_nmi;
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 075962cc75ab..3733412d1357 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -206,13 +206,6 @@ static struct resource reserve_ioports = {
 	.flags = IORESOURCE_IO | IORESOURCE_BUSY,
 };
 
-static struct resource reserve_iomem = {
-	.start = 0,
-	.end = -1,
-	.name = "paravirt-iomem",
-	.flags = IORESOURCE_MEM | IORESOURCE_BUSY,
-};
-
 /*
  * Reserve the whole legacy IO space to prevent any legacy drivers
  * from wasting time probing for their hardware.  This is a fairly
@@ -222,16 +215,7 @@ static struct resource reserve_iomem = {
  */
 int paravirt_disable_iospace(void)
 {
-	int ret;
-
-	ret = request_resource(&ioport_resource, &reserve_ioports);
-	if (ret == 0) {
-		ret = request_resource(&iomem_resource, &reserve_iomem);
-		if (ret)
-			release_resource(&reserve_ioports);
-	}
-
-	return ret;
+	return request_resource(&ioport_resource, &reserve_ioports);
 }
 
 static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LAZY_NONE;
diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c
index 375cb2bc45be..ada5a0604992 100644
--- a/arch/x86/kernel/pci-dma_64.c
+++ b/arch/x86/kernel/pci-dma_64.c
@@ -232,32 +232,32 @@ static __init int iommu_setup(char *p)
 		return -EINVAL;
 
 	while (*p) {
-		if (!strncmp(p,"off",3))
+		if (!strncmp(p, "off", 3))
 			no_iommu = 1;
 		/* gart_parse_options has more force support */
-		if (!strncmp(p,"force",5))
+		if (!strncmp(p, "force", 5))
 			force_iommu = 1;
-		if (!strncmp(p,"noforce",7)) {
+		if (!strncmp(p, "noforce", 7)) {
 			iommu_merge = 0;
 			force_iommu = 0;
 		}
 
-		if (!strncmp(p, "biomerge",8)) {
+		if (!strncmp(p, "biomerge", 8)) {
 			iommu_bio_merge = 4096;
 			iommu_merge = 1;
 			force_iommu = 1;
 		}
-		if (!strncmp(p, "panic",5))
+		if (!strncmp(p, "panic", 5))
 			panic_on_overflow = 1;
-		if (!strncmp(p, "nopanic",7))
+		if (!strncmp(p, "nopanic", 7))
 			panic_on_overflow = 0;
-		if (!strncmp(p, "merge",5)) {
+		if (!strncmp(p, "merge", 5)) {
 			iommu_merge = 1;
 			force_iommu = 1;
 		}
-		if (!strncmp(p, "nomerge",7))
+		if (!strncmp(p, "nomerge", 7))
 			iommu_merge = 0;
-		if (!strncmp(p, "forcesac",8))
+		if (!strncmp(p, "forcesac", 8))
 			iommu_sac_force = 1;
 		if (!strncmp(p, "allowdac", 8))
 			forbid_dac = 0;
@@ -265,7 +265,7 @@ static __init int iommu_setup(char *p)
 			forbid_dac = -1;
 
 #ifdef CONFIG_SWIOTLB
-		if (!strncmp(p, "soft",4))
+		if (!strncmp(p, "soft", 4))
 			swiotlb = 1;
 #endif
 
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 43930e73f657..3903a8f2eb97 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -113,20 +113,13 @@ void default_idle(void)
 
 		local_irq_disable();
 		if (!need_resched()) {
-			ktime_t t0, t1;
-			u64 t0n, t1n;
-
-			t0 = ktime_get();
-			t0n = ktime_to_ns(t0);
 			safe_halt();	/* enables interrupts racelessly */
 			local_irq_disable();
-			t1 = ktime_get();
-			t1n = ktime_to_ns(t1);
-			sched_clock_idle_wakeup_event(t1n - t0n);
 		}
 		local_irq_enable();
 		current_thread_info()->status |= TS_POLLING;
 	} else {
+		local_irq_enable();
 		/* loop is done by the caller */
 		cpu_relax();
 	}
@@ -142,6 +135,7 @@ EXPORT_SYMBOL(default_idle);
  */
 static void poll_idle(void)
 {
+	local_irq_enable();
 	cpu_relax();
 }
 
@@ -248,8 +242,11 @@ void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
 		__monitor((void *)&current_thread_info()->flags, 0, 0);
 		smp_mb();
 		if (!need_resched())
-			__mwait(ax, cx);
-	}
+			__sti_mwait(ax, cx);
+		else
+			local_irq_enable();
+	} else
+		local_irq_enable();
 }
 
 /* Default MONITOR/MWAIT with no hints, used for default C1 state */
@@ -332,7 +329,7 @@ void __show_registers(struct pt_regs *regs, int all)
 			init_utsname()->version);
 
 	printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
-			0xffff & regs->cs, regs->ip, regs->flags,
+			(u16)regs->cs, regs->ip, regs->flags,
 			smp_processor_id());
 	print_symbol("EIP is at %s\n", regs->ip);
 
@@ -341,8 +338,7 @@ void __show_registers(struct pt_regs *regs, int all)
 	printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
 		regs->si, regs->di, regs->bp, sp);
 	printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n",
-	       regs->ds & 0xffff, regs->es & 0xffff,
-	       regs->fs & 0xffff, gs, ss);
+	       (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss);
 
 	if (!all)
 		return;
@@ -513,6 +509,21 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
 	return err;
 }
 
+void
+start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
+{
+	__asm__("movl %0, %%gs" :: "r"(0));
+	regs->fs		= 0;
+	set_fs(USER_DS);
+	regs->ds		= __USER_DS;
+	regs->es		= __USER_DS;
+	regs->ss		= __USER_DS;
+	regs->cs		= __USER_CS;
+	regs->ip		= new_ip;
+	regs->sp		= new_sp;
+}
+EXPORT_SYMBOL_GPL(start_thread);
+
 #ifdef CONFIG_SECCOMP
 static void hard_disable_TSC(void)
 {
@@ -550,12 +561,12 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 		/* we clear debugctl to make sure DS
 		 * is not in use when we change it */
 		debugctl = 0;
-		wrmsrl(MSR_IA32_DEBUGCTLMSR, 0);
+		update_debugctlmsr(0);
 		wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0);
 	}
 
 	if (next->debugctlmsr != debugctl)
-		wrmsr(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr, 0);
+		update_debugctlmsr(next->debugctlmsr);
 
 	if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
 		set_debugreg(next->debugreg0, 0);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 46c4c546b499..e75ccc8a2b87 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -107,16 +107,8 @@ void default_idle(void)
 	smp_mb();
 	local_irq_disable();
 	if (!need_resched()) {
-		ktime_t t0, t1;
-		u64 t0n, t1n;
-
-		t0 = ktime_get();
-		t0n = ktime_to_ns(t0);
 		safe_halt();	/* enables interrupts racelessly */
 		local_irq_disable();
-		t1 = ktime_get();
-		t1n = ktime_to_ns(t1);
-		sched_clock_idle_wakeup_event(t1n - t0n);
 	}
 	local_irq_enable();
 	current_thread_info()->status |= TS_POLLING;
@@ -528,6 +520,21 @@ out:
 	return err;
 }
 
+void
+start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
+{
+	asm volatile("movl %0, %%fs; movl %0, %%es; movl %0, %%ds" :: "r"(0));
+	load_gs_index(0);
+	regs->ip		= new_ip;
+	regs->sp		= new_sp;
+	write_pda(oldrsp, new_sp);
+	regs->cs		= __USER_CS;
+	regs->ss		= __USER_DS;
+	regs->flags		= 0x200;
+	set_fs(USER_DS);
+}
+EXPORT_SYMBOL_GPL(start_thread);
+
 /*
  * This special macro can be used to load a debugging register
  */
@@ -548,12 +555,12 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
 		/* we clear debugctl to make sure DS
 		 * is not in use when we change it */
 		debugctl = 0;
-		wrmsrl(MSR_IA32_DEBUGCTLMSR, 0);
+		update_debugctlmsr(0);
 		wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
 	}
 
 	if (next->debugctlmsr != debugctl)
-		wrmsrl(MSR_IA32_DEBUGCTLMSR, next->debugctlmsr);
+		update_debugctlmsr(next->debugctlmsr);
 
 	if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
 		loaddebug(next, 0);
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index eb92ccbb3502..559c1b027417 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1456,7 +1456,6 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
 /* notification of system call entry/exit
  * - triggered by current->work.syscall_trace
  */
-__attribute__((regparm(3)))
 int do_syscall_trace(struct pt_regs *regs, int entryexit)
 {
 	int is_sysemu = test_thread_flag(TIF_SYSCALL_EMU);
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 484c4a80d38a..9692202d3bfb 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -1,5 +1,4 @@
 #include <linux/module.h>
-#include <linux/init.h>
 #include <linux/reboot.h>
 #include <linux/init.h>
 #include <linux/pm.h>
@@ -412,12 +411,12 @@ static void native_machine_shutdown(void)
 #ifdef CONFIG_X86_32
 	/* See if there has been given a command line override */
 	if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) &&
-		cpu_isset(reboot_cpu, cpu_online_map))
+		cpu_online(reboot_cpu))
 		reboot_cpu_id = reboot_cpu;
 #endif
 
 	/* Make certain the cpu I'm about to reboot on is online */
-	if (!cpu_isset(reboot_cpu_id, cpu_online_map))
+	if (!cpu_online(reboot_cpu_id))
 		reboot_cpu_id = smp_processor_id();
 
 	/* Make certain I only run on the appropriate processor */
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index f151d6fae462..c30fe25d470d 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -9,18 +9,19 @@
 #include <linux/linkage.h>
 #include <asm/page.h>
 #include <asm/kexec.h>
+#include <asm/processor-flags.h>
+#include <asm/pgtable.h>
 
 /*
  * Must be relocatable PIC code callable as a C function
  */
 
 #define PTR(x) (x << 2)
-#define PAGE_ALIGNED (1 << PAGE_SHIFT)
-#define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */
-#define PAE_PGD_ATTR 0x01 /* _PAGE_PRESENT */
+#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
+#define PAE_PGD_ATTR (_PAGE_PRESENT)
 
 	.text
-	.align PAGE_ALIGNED
+	.align PAGE_SIZE
 	.globl relocate_kernel
 relocate_kernel:
 	movl	8(%esp), %ebp /* list of pages */
@@ -155,7 +156,7 @@ relocate_new_kernel:
 	movl	%eax, %cr3
 
 	/* setup a new stack at the end of the physical control page */
-	lea	4096(%edi), %esp
+	lea	PAGE_SIZE(%edi), %esp
 
 	/* jump to identity mapped page */
 	movl    %edi, %eax
@@ -168,16 +169,16 @@ identity_mapped:
 	pushl   %edx
 
 	/* Set cr0 to a known state:
-	 * 31 0 == Paging disabled
-	 * 18 0 == Alignment check disabled
-	 * 16 0 == Write protect disabled
-	 * 3  0 == No task switch
-	 * 2  0 == Don't do FP software emulation.
-	 * 0  1 == Proctected mode enabled
+	 *  - Paging disabled
+	 *  - Alignment check disabled
+	 *  - Write protect disabled
+	 *  - No task switch
+	 *  - Don't do FP software emulation.
+	 *  - Proctected mode enabled
 	 */
 	movl	%cr0, %eax
-	andl	$~((1<<31)|(1<<18)|(1<<16)|(1<<3)|(1<<2)), %eax
-	orl	$(1<<0), %eax
+	andl	$~(X86_CR0_PG | X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM), %eax
+	orl	$(X86_CR0_PE), %eax
 	movl	%eax, %cr0
 
 	/* clear cr4 if applicable */
@@ -186,8 +187,7 @@ identity_mapped:
 	/* Set cr4 to a known state:
 	 * Setting everything to zero seems safe.
 	 */
-	movl	%cr4, %eax
-	andl	$0, %eax
+	xorl	%eax, %eax
 	movl	%eax, %cr4
 
 	jmp 1f
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index 14e95872c6a3..f5afe665a82b 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -9,17 +9,18 @@
 #include <linux/linkage.h>
 #include <asm/page.h>
 #include <asm/kexec.h>
+#include <asm/processor-flags.h>
+#include <asm/pgtable.h>
 
 /*
  * Must be relocatable PIC code callable as a C function
  */
 
 #define PTR(x) (x << 3)
-#define PAGE_ALIGNED (1 << PAGE_SHIFT)
-#define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */
+#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
 
 	.text
-	.align PAGE_ALIGNED
+	.align PAGE_SIZE
 	.code64
 	.globl relocate_kernel
 relocate_kernel:
@@ -160,7 +161,7 @@ relocate_new_kernel:
 	movq	%r9, %cr3
 
 	/* setup a new stack at the end of the physical control page */
-	lea	4096(%r8), %rsp
+	lea	PAGE_SIZE(%r8), %rsp
 
 	/* jump to identity mapped page */
 	addq	$(identity_mapped - relocate_kernel), %r8
@@ -172,33 +173,22 @@ identity_mapped:
 	pushq   %rdx
 
 	/* Set cr0 to a known state:
-	 * 31 1 == Paging enabled
-	 * 18 0 == Alignment check disabled
-	 * 16 0 == Write protect disabled
-	 * 3  0 == No task switch
-	 * 2  0 == Don't do FP software emulation.
-	 * 0  1 == Proctected mode enabled
+	 *  - Paging enabled
+	 *  - Alignment check disabled
+	 *  - Write protect disabled
+	 *  - No task switch
+	 *  - Don't do FP software emulation.
+	 *  - Proctected mode enabled
 	 */
 	movq	%cr0, %rax
-	andq	$~((1<<18)|(1<<16)|(1<<3)|(1<<2)), %rax
-	orl	$((1<<31)|(1<<0)), %eax
+	andq	$~(X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM), %rax
+	orl	$(X86_CR0_PG | X86_CR0_PE), %eax
 	movq	%rax, %cr0
 
 	/* Set cr4 to a known state:
-	 * 10 0 == xmm exceptions disabled
-	 * 9  0 == xmm registers instructions disabled
-	 * 8  0 == performance monitoring counter disabled
-	 * 7  0 == page global disabled
-	 * 6  0 == machine check exceptions disabled
-	 * 5  1 == physical address extension enabled
-	 * 4  0 == page size extensions	disabled
-	 * 3  0 == Debug extensions disabled
-	 * 2  0 == Time stamp disable (disabled)
-	 * 1  0 == Protected mode virtual interrupts disabled
-	 * 0  0 == VME disabled
+	 *  - physical address extension enabled
 	 */
-
-	movq	$((1<<5)), %rax
+	movq	$X86_CR4_PAE, %rax
 	movq	%rax, %cr4
 
 	jmp 1f
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index eb9b1a198f5e..9615eee9b775 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -9,7 +9,6 @@
 #include <asm/vsyscall.h>
 
 #ifdef CONFIG_X86_32
-# define CMOS_YEARS_OFFS 1900
 /*
  * This is a special lock that is owned by the CPU and holds the index
  * register we are working with.  It is required for NMI access to the
@@ -17,14 +16,11 @@
  */
 volatile unsigned long cmos_lock = 0;
 EXPORT_SYMBOL(cmos_lock);
-#else
-/*
- * x86-64 systems only exists since 2002.
- * This will work up to Dec 31, 2100
- */
-# define CMOS_YEARS_OFFS 2000
 #endif
 
+/* For two digit years assume time is always after that */
+#define CMOS_YEARS_OFFS 2000
+
 DEFINE_SPINLOCK(rtc_lock);
 EXPORT_SYMBOL(rtc_lock);
 
@@ -98,7 +94,7 @@ int mach_set_rtc_mmss(unsigned long nowtime)
 
 unsigned long mach_get_cmos_time(void)
 {
-	unsigned int year, mon, day, hour, min, sec, century = 0;
+	unsigned int status, year, mon, day, hour, min, sec, century = 0;
 
 	/*
 	 * If UIP is clear, then we have >= 244 microseconds before
@@ -116,14 +112,16 @@ unsigned long mach_get_cmos_time(void)
 	mon = CMOS_READ(RTC_MONTH);
 	year = CMOS_READ(RTC_YEAR);
 
-#if defined(CONFIG_ACPI) && defined(CONFIG_X86_64)
-	/* CHECKME: Is this really 64bit only ??? */
+#ifdef CONFIG_ACPI
 	if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID &&
 	    acpi_gbl_FADT.century)
 		century = CMOS_READ(acpi_gbl_FADT.century);
 #endif
 
-	if (RTC_ALWAYS_BCD || !(CMOS_READ(RTC_CONTROL) & RTC_DM_BINARY)) {
+	status = CMOS_READ(RTC_CONTROL);
+	WARN_ON_ONCE(RTC_ALWAYS_BCD && (status & RTC_DM_BINARY));
+
+	if (RTC_ALWAYS_BCD || !(status & RTC_DM_BINARY)) {
 		BCD_TO_BIN(sec);
 		BCD_TO_BIN(min);
 		BCD_TO_BIN(hour);
@@ -136,11 +134,8 @@ unsigned long mach_get_cmos_time(void)
 		BCD_TO_BIN(century);
 		year += century * 100;
 		printk(KERN_INFO "Extended CMOS year: %d\n", century * 100);
-	} else {
+	} else
 		year += CMOS_YEARS_OFFS;
-		if (year < 1970)
-			year += 100;
-	}
 
 	return mktime(year, mon, day, hour, min, sec);
 }
@@ -151,8 +146,8 @@ unsigned char rtc_cmos_read(unsigned char addr)
 	unsigned char val;
 
 	lock_cmos_prefix(addr);
-	outb_p(addr, RTC_PORT(0));
-	val = inb_p(RTC_PORT(1));
+	outb(addr, RTC_PORT(0));
+	val = inb(RTC_PORT(1));
 	lock_cmos_suffix(addr);
 	return val;
 }
@@ -161,8 +156,8 @@ EXPORT_SYMBOL(rtc_cmos_read);
 void rtc_cmos_write(unsigned char val, unsigned char addr)
 {
 	lock_cmos_prefix(addr);
-	outb_p(addr, RTC_PORT(0));
-	outb_p(val, RTC_PORT(1));
+	outb(addr, RTC_PORT(0));
+	outb(val, RTC_PORT(1));
 	lock_cmos_suffix(addr);
 }
 EXPORT_SYMBOL(rtc_cmos_write);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
new file mode 100644
index 000000000000..ed157c90412e
--- /dev/null
+++ b/arch/x86/kernel/setup.c
@@ -0,0 +1,113 @@
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/percpu.h>
+#include <asm/smp.h>
+#include <asm/percpu.h>
+#include <asm/sections.h>
+#include <asm/processor.h>
+#include <asm/setup.h>
+#include <asm/topology.h>
+#include <asm/mpspec.h>
+#include <asm/apicdef.h>
+
+unsigned int num_processors;
+unsigned disabled_cpus __cpuinitdata;
+/* Processor that is doing the boot up */
+unsigned int boot_cpu_physical_apicid = -1U;
+EXPORT_SYMBOL(boot_cpu_physical_apicid);
+
+physid_mask_t phys_cpu_present_map;
+
+DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID;
+EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
+
+/* Bitmask of physically existing CPUs */
+physid_mask_t phys_cpu_present_map;
+
+#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_SMP)
+/*
+ * Copy data used in early init routines from the initial arrays to the
+ * per cpu data areas.  These arrays then become expendable and the
+ * *_early_ptr's are zeroed indicating that the static arrays are gone.
+ */
+static void __init setup_per_cpu_maps(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		per_cpu(x86_cpu_to_apicid, cpu) = x86_cpu_to_apicid_init[cpu];
+		per_cpu(x86_bios_cpu_apicid, cpu) =
+						x86_bios_cpu_apicid_init[cpu];
+#ifdef CONFIG_NUMA
+		per_cpu(x86_cpu_to_node_map, cpu) =
+						x86_cpu_to_node_map_init[cpu];
+#endif
+	}
+
+	/* indicate the early static arrays will soon be gone */
+	x86_cpu_to_apicid_early_ptr = NULL;
+	x86_bios_cpu_apicid_early_ptr = NULL;
+#ifdef CONFIG_NUMA
+	x86_cpu_to_node_map_early_ptr = NULL;
+#endif
+}
+
+#ifdef CONFIG_X86_32
+/*
+ * Great future not-so-futuristic plan: make i386 and x86_64 do it
+ * the same way
+ */
+unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
+EXPORT_SYMBOL(__per_cpu_offset);
+#endif
+
+/*
+ * Great future plan:
+ * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
+ * Always point %gs to its beginning
+ */
+void __init setup_per_cpu_areas(void)
+{
+	int i;
+	unsigned long size;
+
+#ifdef CONFIG_HOTPLUG_CPU
+	prefill_possible_map();
+#endif
+
+	/* Copy section for each CPU (we discard the original) */
+	size = PERCPU_ENOUGH_ROOM;
+	printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n",
+			  size);
+
+	for_each_possible_cpu(i) {
+		char *ptr;
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+		ptr = alloc_bootmem_pages(size);
+#else
+		int node = early_cpu_to_node(i);
+		if (!node_online(node) || !NODE_DATA(node)) {
+			ptr = alloc_bootmem_pages(size);
+			printk(KERN_INFO
+			       "cpu %d has no node or node-local memory\n", i);
+		}
+		else
+			ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
+#endif
+		if (!ptr)
+			panic("Cannot allocate cpu data for CPU %d\n", i);
+#ifdef CONFIG_X86_64
+		cpu_pda(i)->data_offset = ptr - __per_cpu_start;
+#else
+		__per_cpu_offset[i] = ptr - __per_cpu_start;
+#endif
+		memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
+	}
+
+	/* Setup percpu data maps */
+	setup_per_cpu_maps();
+}
+
+#endif
diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c
index e24c45677094..9042fb0e36f5 100644
--- a/arch/x86/kernel/setup64.c
+++ b/arch/x86/kernel/setup64.c
@@ -11,6 +11,7 @@
 #include <linux/bootmem.h>
 #include <linux/bitops.h>
 #include <linux/module.h>
+#include <linux/kgdb.h>
 #include <asm/pda.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
@@ -23,6 +24,7 @@
 #include <asm/proto.h>
 #include <asm/sections.h>
 #include <asm/setup.h>
+#include <asm/genapic.h>
 
 #ifndef CONFIG_DEBUG_BOOT_PARAMS
 struct boot_params __initdata boot_params;
@@ -85,83 +87,6 @@ static int __init nonx32_setup(char *str)
 }
 __setup("noexec32=", nonx32_setup);
 
-/*
- * Copy data used in early init routines from the initial arrays to the
- * per cpu data areas.  These arrays then become expendable and the
- * *_early_ptr's are zeroed indicating that the static arrays are gone.
- */
-static void __init setup_per_cpu_maps(void)
-{
-	int cpu;
-
-	for_each_possible_cpu(cpu) {
-#ifdef CONFIG_SMP
-		if (per_cpu_offset(cpu)) {
-#endif
-			per_cpu(x86_cpu_to_apicid, cpu) =
-						x86_cpu_to_apicid_init[cpu];
-			per_cpu(x86_bios_cpu_apicid, cpu) =
-						x86_bios_cpu_apicid_init[cpu];
-#ifdef CONFIG_NUMA
-			per_cpu(x86_cpu_to_node_map, cpu) =
-						x86_cpu_to_node_map_init[cpu];
-#endif
-#ifdef CONFIG_SMP
-		}
-		else
-			printk(KERN_NOTICE "per_cpu_offset zero for cpu %d\n",
-									cpu);
-#endif
-	}
-
-	/* indicate the early static arrays will soon be gone */
-	x86_cpu_to_apicid_early_ptr = NULL;
-	x86_bios_cpu_apicid_early_ptr = NULL;
-#ifdef CONFIG_NUMA
-	x86_cpu_to_node_map_early_ptr = NULL;
-#endif
-}
-
-/*
- * Great future plan:
- * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
- * Always point %gs to its beginning
- */
-void __init setup_per_cpu_areas(void)
-{ 
-	int i;
-	unsigned long size;
-
-#ifdef CONFIG_HOTPLUG_CPU
-	prefill_possible_map();
-#endif
-
-	/* Copy section for each CPU (we discard the original) */
-	size = PERCPU_ENOUGH_ROOM;
-
-	printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", size);
-	for_each_cpu_mask (i, cpu_possible_map) {
-		char *ptr;
-#ifndef CONFIG_NEED_MULTIPLE_NODES
-		ptr = alloc_bootmem_pages(size);
-#else
-		int node = early_cpu_to_node(i);
-
-		if (!node_online(node) || !NODE_DATA(node))
-			ptr = alloc_bootmem_pages(size);
-		else
-			ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
-#endif
-		if (!ptr)
-			panic("Cannot allocate cpu data for CPU %d\n", i);
-		cpu_pda(i)->data_offset = ptr - __per_cpu_start;
-		memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
-	}
-
-	/* setup percpu data maps early */
-	setup_per_cpu_maps();
-} 
-
 void pda_init(int cpu)
 { 
 	struct x8664_pda *pda = cpu_pda(cpu);
@@ -327,6 +252,17 @@ void __cpuinit cpu_init (void)
 	load_TR_desc();
 	load_LDT(&init_mm.context);
 
+#ifdef CONFIG_KGDB
+	/*
+	 * If the kgdb is connected no debug regs should be altered.  This
+	 * is only applicable when KGDB and a KGDB I/O module are built
+	 * into the kernel and you are using early debugging with
+	 * kgdbwait. KGDB will control the kernel HW breakpoint registers.
+	 */
+	if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
+		arch_kgdb_ops.correct_hw_break();
+	else {
+#endif
 	/*
 	 * Clear all 6 debug registers:
 	 */
@@ -337,8 +273,15 @@ void __cpuinit cpu_init (void)
 	set_debugreg(0UL, 3);
 	set_debugreg(0UL, 6);
 	set_debugreg(0UL, 7);
+#ifdef CONFIG_KGDB
+	/* If the kgdb is connected no debug regs should be altered. */
+	}
+#endif
 
 	fpu_init(); 
 
 	raw_local_save_flags(kernel_eflags);
+
+	if (is_uv_system())
+		uv_cpu_init();
 }
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 2b3e5d45176b..5b0bffb7fcc9 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -62,8 +62,9 @@
 #include <asm/io.h>
 #include <asm/vmi.h>
 #include <setup_arch.h>
-#include <bios_ebda.h>
+#include <asm/bios_ebda.h>
 #include <asm/cacheflush.h>
+#include <asm/processor.h>
 
 /* This value is set up by the early boot code to point to the value
    immediately after the boot time page tables.  It contains a *physical*
@@ -154,6 +155,8 @@ struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
 struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
 EXPORT_SYMBOL(boot_cpu_data);
 
+unsigned int def_to_bigsmp;
+
 #ifndef CONFIG_X86_PAE
 unsigned long mmu_cr4_features;
 #else
@@ -189,7 +192,7 @@ EXPORT_SYMBOL(ist_info);
 extern void early_cpu_init(void);
 extern int root_mountflags;
 
-unsigned long saved_videomode;
+unsigned long saved_video_mode;
 
 #define RAMDISK_IMAGE_START_MASK	0x07FF
 #define RAMDISK_PROMPT_FLAG		0x8000
@@ -227,7 +230,7 @@ static inline void copy_edd(void)
 }
 #endif
 
-int __initdata user_defined_memmap = 0;
+int __initdata user_defined_memmap;
 
 /*
  * "mem=nopentium" disables the 4MB page tables.
@@ -385,15 +388,56 @@ unsigned long __init find_max_low_pfn(void)
 	return max_low_pfn;
 }
 
+#define BIOS_EBDA_SEGMENT 0x40E
+#define BIOS_LOWMEM_KILOBYTES 0x413
+
 /*
- * workaround for Dell systems that neglect to reserve EBDA
+ * The BIOS places the EBDA/XBDA at the top of conventional
+ * memory, and usually decreases the reported amount of
+ * conventional memory (int 0x12) too. This also contains a
+ * workaround for Dell systems that neglect to reserve EBDA.
+ * The same workaround also avoids a problem with the AMD768MPX
+ * chipset: reserve a page before VGA to prevent PCI prefetch
+ * into it (errata #56). Usually the page is reserved anyways,
+ * unless you have no PS/2 mouse plugged in.
  */
 static void __init reserve_ebda_region(void)
 {
-	unsigned int addr;
-	addr = get_bios_ebda();
-	if (addr)
-		reserve_bootmem(addr, PAGE_SIZE, BOOTMEM_DEFAULT);
+	unsigned int lowmem, ebda_addr;
+
+	/* To determine the position of the EBDA and the */
+	/* end of conventional memory, we need to look at */
+	/* the BIOS data area. In a paravirtual environment */
+	/* that area is absent. We'll just have to assume */
+	/* that the paravirt case can handle memory setup */
+	/* correctly, without our help. */
+	if (paravirt_enabled())
+		return;
+
+	/* end of low (conventional) memory */
+	lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
+	lowmem <<= 10;
+
+	/* start of EBDA area */
+	ebda_addr = *(unsigned short *)__va(BIOS_EBDA_SEGMENT);
+	ebda_addr <<= 4;
+
+	/* Fixup: bios puts an EBDA in the top 64K segment */
+	/* of conventional memory, but does not adjust lowmem. */
+	if ((lowmem - ebda_addr) <= 0x10000)
+		lowmem = ebda_addr;
+
+	/* Fixup: bios does not report an EBDA at all. */
+	/* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
+	if ((ebda_addr == 0) && (lowmem >= 0x9f000))
+		lowmem = 0x9f000;
+
+	/* Paranoia: should never happen, but... */
+	if ((lowmem == 0) || (lowmem >= 0x100000))
+		lowmem = 0x9f000;
+
+	/* reserve all memory between lowmem and the 1MB mark */
+	reserve_bootmem(lowmem, 0x100000 - lowmem, BOOTMEM_DEFAULT);
 }
 
 #ifndef CONFIG_NEED_MULTIPLE_NODES
@@ -617,16 +661,9 @@ void __init setup_bootmem_allocator(void)
 	 */
 	reserve_bootmem(0, PAGE_SIZE, BOOTMEM_DEFAULT);
 
-	/* reserve EBDA region, it's a 4K region */
+	/* reserve EBDA region */
 	reserve_ebda_region();
 
-    /* could be an AMD 768MPX chipset. Reserve a page  before VGA to prevent
-       PCI prefetch into it (errata #56). Usually the page is reserved anyways,
-       unless you have no PS/2 mouse plugged in. */
-	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
-	    boot_cpu_data.x86 == 6)
-	     reserve_bootmem(0xa0000 - 4096, 4096, BOOTMEM_DEFAULT);
-
 #ifdef CONFIG_SMP
 	/*
 	 * But first pinch a few for the stack/trampoline stuff
@@ -687,6 +724,18 @@ char * __init __attribute__((weak)) memory_setup(void)
 	return machine_specific_memory_setup();
 }
 
+#ifdef CONFIG_NUMA
+/*
+ * In the golden day, when everything among i386 and x86_64 will be
+ * integrated, this will not live here
+ */
+void *x86_cpu_to_node_map_early_ptr;
+int x86_cpu_to_node_map_init[NR_CPUS] = {
+	[0 ... NR_CPUS-1] = NUMA_NO_NODE
+};
+DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE;
+#endif
+
 /*
  * Determine if we were loaded by an EFI loader.  If so, then we have also been
  * passed the efi memmap, systab, etc., so we should use these data structures
@@ -714,7 +763,7 @@ void __init setup_arch(char **cmdline_p)
 	edid_info = boot_params.edid_info;
 	apm_info.bios = boot_params.apm_bios_info;
 	ist_info = boot_params.ist_info;
-	saved_videomode = boot_params.hdr.vid_mode;
+	saved_video_mode = boot_params.hdr.vid_mode;
 	if( boot_params.sys_desc_table.length != 0 ) {
 		set_mca_bus(boot_params.sys_desc_table.table[3] & 0x2);
 		machine_id = boot_params.sys_desc_table.table[0];
@@ -820,6 +869,18 @@ void __init setup_arch(char **cmdline_p)
 
 	io_delay_init();
 
+#ifdef CONFIG_X86_SMP
+	/*
+	 * setup to use the early static init tables during kernel startup
+	 * X86_SMP will exclude sub-arches that don't deal well with it.
+	 */
+	x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init;
+	x86_bios_cpu_apicid_early_ptr = (void *)x86_bios_cpu_apicid_init;
+#ifdef CONFIG_NUMA
+	x86_cpu_to_node_map_early_ptr = (void *)x86_cpu_to_node_map_init;
+#endif
+#endif
+
 #ifdef CONFIG_X86_GENERICARCH
 	generic_apic_probe();
 #endif
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index f4f7ecfb898c..674ef3510cdf 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -58,7 +58,6 @@
 #include <asm/mmu_context.h>
 #include <asm/proto.h>
 #include <asm/setup.h>
-#include <asm/mach_apic.h>
 #include <asm/numa.h>
 #include <asm/sections.h>
 #include <asm/dmi.h>
@@ -66,7 +65,9 @@
 #include <asm/mce.h>
 #include <asm/ds.h>
 #include <asm/topology.h>
+#include <asm/trampoline.h>
 
+#include <mach_apic.h>
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else
@@ -248,6 +249,7 @@ static void __init reserve_crashkernel(void)
 				(unsigned long)(total_mem >> 20));
 		crashk_res.start = crash_base;
 		crashk_res.end   = crash_base + crash_size - 1;
+		insert_resource(&iomem_resource, &crashk_res);
 	}
 }
 #else
@@ -322,6 +324,11 @@ void __init setup_arch(char **cmdline_p)
 
 	finish_e820_parsing();
 
+	/* after parse_early_param, so could debug it */
+	insert_resource(&iomem_resource, &code_resource);
+	insert_resource(&iomem_resource, &data_resource);
+	insert_resource(&iomem_resource, &bss_resource);
+
 	early_gart_iommu_check();
 
 	e820_register_active_regions(0, 0, -1UL);
@@ -341,10 +348,12 @@ void __init setup_arch(char **cmdline_p)
 
 	check_efer();
 
-	init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT));
+	max_pfn_mapped = init_memory_mapping(0, (max_pfn_mapped << PAGE_SHIFT));
 	if (efi_enabled)
 		efi_init();
 
+	vsmp_init();
+
 	dmi_scan_machine();
 
 	io_delay_init();
@@ -450,7 +459,7 @@ void __init setup_arch(char **cmdline_p)
 	/*
 	 * We trust e820 completely. No explicit ROM probing in memory.
 	 */
-	e820_reserve_resources(&code_resource, &data_resource, &bss_resource);
+	e820_reserve_resources();
 	e820_mark_nosave_regions();
 
 	/* request I/O space for devices used on all i[345]86 PCs */
@@ -552,9 +561,9 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
 	bits = c->x86_coreid_bits;
 
 	/* Low order bits define the core id (index of core in socket) */
-	c->cpu_core_id = c->phys_proc_id & ((1 << bits)-1);
-	/* Convert the APIC ID into the socket ID */
-	c->phys_proc_id = phys_pkg_id(bits);
+	c->cpu_core_id = c->initial_apicid & ((1 << bits)-1);
+	/* Convert the initial APIC ID into the socket ID */
+	c->phys_proc_id = c->initial_apicid >> bits;
 
 #ifdef CONFIG_NUMA
 	node = c->phys_proc_id;
@@ -571,7 +580,7 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
 		   If that doesn't result in a usable node fall back to the
 		   path for the previous case.  */
 
-		int ht_nodeid = apicid - (cpu_data(0).phys_proc_id << bits);
+		int ht_nodeid = c->initial_apicid;
 
 		if (ht_nodeid >= 0 &&
 		    apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
@@ -677,7 +686,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 
 	/* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
 	   3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
-	clear_bit(0*32+31, (unsigned long *)&c->x86_capability);
+	clear_cpu_cap(c, 0*32+31);
 
 	/* On C+ stepping K8 rep microcode works well for copy/memset */
 	level = cpuid_eax(1);
@@ -721,6 +730,19 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
 
 	if (amd_apic_timer_broken())
 		disable_apic_timer = 1;
+
+	if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) {
+		unsigned long long tseg;
+
+		/*
+		 * Split up direct mapping around the TSEG SMM area.
+		 * Don't do it for gbpages because there seems very little
+		 * benefit in doing so.
+		 */
+		if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg) &&
+		(tseg >> PMD_SHIFT) < (max_pfn_mapped >> (PMD_SHIFT-PAGE_SHIFT)))
+			set_memory_4k((unsigned long)__va(tseg), 1);
+	}
 }
 
 void __cpuinit detect_ht(struct cpuinfo_x86 *c)
@@ -813,7 +835,7 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
 {
 	if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
 	    (c->x86 == 0x6 && c->x86_model >= 0x0e))
-		set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
+		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
 }
 
 static void __cpuinit init_intel(struct cpuinfo_x86 *c)
@@ -856,9 +878,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 
 	if (c->x86 == 15)
 		c->x86_cache_alignment = c->x86_clflush_size * 2;
-	if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
-	    (c->x86 == 0x6 && c->x86_model >= 0x0e))
-		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
 	if (c->x86 == 6)
 		set_cpu_cap(c, X86_FEATURE_REP_GOOD);
 	set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
@@ -867,6 +886,32 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 	srat_detect_node();
 }
 
+static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
+{
+	if (c->x86 == 0x6 && c->x86_model >= 0xf)
+		set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
+}
+
+static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
+{
+	/* Cache sizes */
+	unsigned n;
+
+	n = c->extended_cpuid_level;
+	if (n >= 0x80000008) {
+		unsigned eax = cpuid_eax(0x80000008);
+		c->x86_virt_bits = (eax >> 8) & 0xff;
+		c->x86_phys_bits = eax & 0xff;
+	}
+
+	if (c->x86 == 0x6 && c->x86_model >= 0xf) {
+		c->x86_cache_alignment = c->x86_clflush_size * 2;
+		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+		set_cpu_cap(c, X86_FEATURE_REP_GOOD);
+	}
+	set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
+}
+
 static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
 {
 	char *v = c->x86_vendor_id;
@@ -875,6 +920,8 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
 		c->x86_vendor = X86_VENDOR_AMD;
 	else if (!strcmp(v, "GenuineIntel"))
 		c->x86_vendor = X86_VENDOR_INTEL;
+	else if (!strcmp(v, "CentaurHauls"))
+		c->x86_vendor = X86_VENDOR_CENTAUR;
 	else
 		c->x86_vendor = X86_VENDOR_UNKNOWN;
 }
@@ -922,15 +969,16 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
 			c->x86 += (tfms >> 20) & 0xff;
 		if (c->x86 >= 0x6)
 			c->x86_model += ((tfms >> 16) & 0xF) << 4;
-		if (c->x86_capability[0] & (1<<19))
+		if (test_cpu_cap(c, X86_FEATURE_CLFLSH))
 			c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
 	} else {
 		/* Have CPUID level 0 only - unheard of */
 		c->x86 = 4;
 	}
 
+	c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff;
 #ifdef CONFIG_SMP
-	c->phys_proc_id = (cpuid_ebx(1) >> 24) & 0xff;
+	c->phys_proc_id = c->initial_apicid;
 #endif
 	/* AMD-defined flags: level 0x80000001 */
 	xlvl = cpuid_eax(0x80000000);
@@ -956,12 +1004,22 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
 	if (c->extended_cpuid_level >= 0x80000007)
 		c->x86_power = cpuid_edx(0x80000007);
 
+
+	clear_cpu_cap(c, X86_FEATURE_PAT);
+
 	switch (c->x86_vendor) {
 	case X86_VENDOR_AMD:
 		early_init_amd(c);
+		if (c->x86 >= 0xf && c->x86 <= 0x11)
+			set_cpu_cap(c, X86_FEATURE_PAT);
 		break;
 	case X86_VENDOR_INTEL:
 		early_init_intel(c);
+		if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15))
+			set_cpu_cap(c, X86_FEATURE_PAT);
+		break;
+	case X86_VENDOR_CENTAUR:
+		early_init_centaur(c);
 		break;
 	}
 
@@ -999,6 +1057,10 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 		init_intel(c);
 		break;
 
+	case X86_VENDOR_CENTAUR:
+		init_centaur(c);
+		break;
+
 	case X86_VENDOR_UNKNOWN:
 	default:
 		display_cacheinfo(c);
@@ -1028,14 +1090,24 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 #endif
 	select_idle_routine(c);
 
-	if (c != &boot_cpu_data)
-		mtrr_ap_init();
 #ifdef CONFIG_NUMA
 	numa_add_cpu(smp_processor_id());
 #endif
 
 }
 
+void __cpuinit identify_boot_cpu(void)
+{
+	identify_cpu(&boot_cpu_data);
+}
+
+void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
+{
+	BUG_ON(c == &boot_cpu_data);
+	identify_cpu(c);
+	mtrr_ap_init();
+}
+
 static __init int setup_noclflush(char *arg)
 {
 	setup_clear_cpu_cap(X86_FEATURE_CLFLSH);
@@ -1064,123 +1136,3 @@ static __init int setup_disablecpuid(char *arg)
 	return 1;
 }
 __setup("clearcpuid=", setup_disablecpuid);
-
-/*
- *	Get CPU information for use by the procfs.
- */
-
-static int show_cpuinfo(struct seq_file *m, void *v)
-{
-	struct cpuinfo_x86 *c = v;
-	int cpu = 0, i;
-
-#ifdef CONFIG_SMP
-	cpu = c->cpu_index;
-#endif
-
-	seq_printf(m, "processor\t: %u\n"
-		   "vendor_id\t: %s\n"
-		   "cpu family\t: %d\n"
-		   "model\t\t: %d\n"
-		   "model name\t: %s\n",
-		   (unsigned)cpu,
-		   c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown",
-		   c->x86,
-		   (int)c->x86_model,
-		   c->x86_model_id[0] ? c->x86_model_id : "unknown");
-
-	if (c->x86_mask || c->cpuid_level >= 0)
-		seq_printf(m, "stepping\t: %d\n", c->x86_mask);
-	else
-		seq_printf(m, "stepping\t: unknown\n");
-
-	if (cpu_has(c, X86_FEATURE_TSC)) {
-		unsigned int freq = cpufreq_quick_get((unsigned)cpu);
-
-		if (!freq)
-			freq = cpu_khz;
-		seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
-			   freq / 1000, (freq % 1000));
-	}
-
-	/* Cache size */
-	if (c->x86_cache_size >= 0)
-		seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
-
-#ifdef CONFIG_SMP
-	if (smp_num_siblings * c->x86_max_cores > 1) {
-		seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
-		seq_printf(m, "siblings\t: %d\n",
-			       cpus_weight(per_cpu(cpu_core_map, cpu)));
-		seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
-		seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
-	}
-#endif
-
-	seq_printf(m,
-		   "fpu\t\t: yes\n"
-		   "fpu_exception\t: yes\n"
-		   "cpuid level\t: %d\n"
-		   "wp\t\t: yes\n"
-		   "flags\t\t:",
-		   c->cpuid_level);
-
-	for (i = 0; i < 32*NCAPINTS; i++)
-		if (cpu_has(c, i) && x86_cap_flags[i] != NULL)
-			seq_printf(m, " %s", x86_cap_flags[i]);
-
-	seq_printf(m, "\nbogomips\t: %lu.%02lu\n",
-		   c->loops_per_jiffy/(500000/HZ),
-		   (c->loops_per_jiffy/(5000/HZ)) % 100);
-
-	if (c->x86_tlbsize > 0)
-		seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize);
-	seq_printf(m, "clflush size\t: %d\n", c->x86_clflush_size);
-	seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment);
-
-	seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n",
-		   c->x86_phys_bits, c->x86_virt_bits);
-
-	seq_printf(m, "power management:");
-	for (i = 0; i < 32; i++) {
-		if (c->x86_power & (1 << i)) {
-			if (i < ARRAY_SIZE(x86_power_flags) &&
-			    x86_power_flags[i])
-				seq_printf(m, "%s%s",
-					   x86_power_flags[i][0]?" ":"",
-					   x86_power_flags[i]);
-			else
-				seq_printf(m, " [%d]", i);
-		}
-	}
-
-	seq_printf(m, "\n\n");
-
-	return 0;
-}
-
-static void *c_start(struct seq_file *m, loff_t *pos)
-{
-	if (*pos == 0)	/* just in case, cpu 0 is not the first */
-		*pos = first_cpu(cpu_online_map);
-	if ((*pos) < NR_CPUS && cpu_online(*pos))
-		return &cpu_data(*pos);
-	return NULL;
-}
-
-static void *c_next(struct seq_file *m, void *v, loff_t *pos)
-{
-	*pos = next_cpu(*pos, cpu_online_map);
-	return c_start(m, pos);
-}
-
-static void c_stop(struct seq_file *m, void *v)
-{
-}
-
-const struct seq_operations cpuinfo_op = {
-	.start = c_start,
-	.next =	c_next,
-	.stop =	c_stop,
-	.show =	show_cpuinfo,
-};
diff --git a/arch/x86/kernel/sigframe_32.h b/arch/x86/kernel/sigframe.h
index 0b2221711dad..72bbb519d2dc 100644
--- a/arch/x86/kernel/sigframe_32.h
+++ b/arch/x86/kernel/sigframe.h
@@ -1,5 +1,5 @@
-struct sigframe
-{
+#ifdef CONFIG_X86_32
+struct sigframe {
 	char __user *pretcode;
 	int sig;
 	struct sigcontext sc;
@@ -8,8 +8,7 @@ struct sigframe
 	char retcode[8];
 };
 
-struct rt_sigframe
-{
+struct rt_sigframe {
 	char __user *pretcode;
 	int sig;
 	struct siginfo __user *pinfo;
@@ -19,3 +18,10 @@ struct rt_sigframe
 	struct _fpstate fpstate;
 	char retcode[8];
 };
+#else
+struct rt_sigframe {
+	char __user *pretcode;
+	struct ucontext uc;
+	struct siginfo info;
+};
+#endif
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 0157a6f0f41f..f1b117930837 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -4,32 +4,44 @@
  *  1997-11-28  Modified for POSIX.1b signals by Richard Henderson
  *  2000-06-20  Pentium III FXSR, SSE support by Gareth Hughes
  */
+#include <linux/list.h>
 
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
+#include <linux/personality.h>
+#include <linux/binfmts.h>
+#include <linux/suspend.h>
 #include <linux/kernel.h>
+#include <linux/ptrace.h>
 #include <linux/signal.h>
+#include <linux/stddef.h>
+#include <linux/unistd.h>
 #include <linux/errno.h>
+#include <linux/sched.h>
 #include <linux/wait.h>
-#include <linux/unistd.h>
-#include <linux/stddef.h>
-#include <linux/personality.h>
-#include <linux/suspend.h>
-#include <linux/ptrace.h>
 #include <linux/elf.h>
-#include <linux/binfmts.h>
+#include <linux/smp.h>
+#include <linux/mm.h>
+
 #include <asm/processor.h>
 #include <asm/ucontext.h>
 #include <asm/uaccess.h>
 #include <asm/i387.h>
 #include <asm/vdso.h>
-#include "sigframe_32.h"
 
-#define DEBUG_SIG 0
+#include "sigframe.h"
 
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
 
+#define __FIX_EFLAGS	(X86_EFLAGS_AC | X86_EFLAGS_OF | \
+			 X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \
+			 X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \
+			 X86_EFLAGS_CF)
+
+#ifdef CONFIG_X86_32
+# define FIX_EFLAGS	(__FIX_EFLAGS | X86_EFLAGS_RF)
+#else
+# define FIX_EFLAGS	__FIX_EFLAGS
+#endif
+
 /*
  * Atomically swap in the new signal mask, and wait for a signal.
  */
@@ -46,10 +58,11 @@ sys_sigsuspend(int history0, int history1, old_sigset_t mask)
 	current->state = TASK_INTERRUPTIBLE;
 	schedule();
 	set_thread_flag(TIF_RESTORE_SIGMASK);
+
 	return -ERESTARTNOHAND;
 }
 
-asmlinkage int 
+asmlinkage int
 sys_sigaction(int sig, const struct old_sigaction __user *act,
 	      struct old_sigaction __user *oact)
 {
@@ -58,10 +71,12 @@ sys_sigaction(int sig, const struct old_sigaction __user *act,
 
 	if (act) {
 		old_sigset_t mask;
+
 		if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
 		    __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
 		    __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
 			return -EFAULT;
+
 		__get_user(new_ka.sa.sa_flags, &act->sa_flags);
 		__get_user(mask, &act->sa_mask);
 		siginitset(&new_ka.sa.sa_mask, mask);
@@ -74,6 +89,7 @@ sys_sigaction(int sig, const struct old_sigaction __user *act,
 		    __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
 		    __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
 			return -EFAULT;
+
 		__put_user(old_ka.sa.sa_flags, &oact->sa_flags);
 		__put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
 	}
@@ -81,10 +97,12 @@ sys_sigaction(int sig, const struct old_sigaction __user *act,
 	return ret;
 }
 
-asmlinkage int
-sys_sigaltstack(unsigned long bx)
+asmlinkage int sys_sigaltstack(unsigned long bx)
 {
-	/* This is needed to make gcc realize it doesn't own the "struct pt_regs" */
+	/*
+	 * This is needed to make gcc realize it doesn't own the
+	 * "struct pt_regs"
+	 */
 	struct pt_regs *regs = (struct pt_regs *)&bx;
 	const stack_t __user *uss = (const stack_t __user *)bx;
 	stack_t __user *uoss = (stack_t __user *)regs->cx;
@@ -96,9 +114,9 @@ sys_sigaltstack(unsigned long bx)
 /*
  * Do a signal return; undo the signal stack.
  */
-
 static int
-restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax)
+restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
+		   unsigned long *pax)
 {
 	unsigned int err = 0;
 
@@ -120,37 +138,29 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax
 #define GET_SEG(seg)							\
 	{ unsigned short tmp;						\
 	  err |= __get_user(tmp, &sc->seg);				\
-	  loadsegment(seg,tmp); }
-
-#define	FIX_EFLAGS	(X86_EFLAGS_AC | X86_EFLAGS_RF |		 \
-			 X86_EFLAGS_OF | X86_EFLAGS_DF |		 \
-			 X86_EFLAGS_TF | X86_EFLAGS_SF | X86_EFLAGS_ZF | \
-			 X86_EFLAGS_AF | X86_EFLAGS_PF | X86_EFLAGS_CF)
+	  loadsegment(seg, tmp); }
 
 	GET_SEG(gs);
 	COPY_SEG(fs);
 	COPY_SEG(es);
 	COPY_SEG(ds);
-	COPY(di);
-	COPY(si);
-	COPY(bp);
-	COPY(sp);
-	COPY(bx);
-	COPY(dx);
-	COPY(cx);
-	COPY(ip);
+	COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
+	COPY(dx); COPY(cx); COPY(ip);
 	COPY_SEG_STRICT(cs);
 	COPY_SEG_STRICT(ss);
-	
+
 	{
 		unsigned int tmpflags;
+
 		err |= __get_user(tmpflags, &sc->flags);
-		regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
+		regs->flags = (regs->flags & ~FIX_EFLAGS) |
+						(tmpflags & FIX_EFLAGS);
 		regs->orig_ax = -1;		/* disable syscall checks */
 	}
 
 	{
-		struct _fpstate __user * buf;
+		struct _fpstate __user *buf;
+
 		err |= __get_user(buf, &sc->fpstate);
 		if (buf) {
 			if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
@@ -158,6 +168,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax
 			err |= restore_i387(buf);
 		} else {
 			struct task_struct *me = current;
+
 			if (used_math()) {
 				clear_fpu(me);
 				clear_used_math();
@@ -165,24 +176,26 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax
 		}
 	}
 
-	err |= __get_user(*peax, &sc->ax);
+	err |= __get_user(*pax, &sc->ax);
 	return err;
 
 badframe:
 	return 1;
 }
 
-asmlinkage int sys_sigreturn(unsigned long __unused)
+asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
 {
-	struct pt_regs *regs = (struct pt_regs *) &__unused;
-	struct sigframe __user *frame = (struct sigframe __user *)(regs->sp - 8);
+	struct sigframe __user *frame;
+	struct pt_regs *regs;
+	unsigned long ax;
 	sigset_t set;
-	int ax;
+
+	regs = (struct pt_regs *) &__unused;
+	frame = (struct sigframe __user *)(regs->sp - 8);
 
 	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
 		goto badframe;
-	if (__get_user(set.sig[0], &frame->sc.oldmask)
-	    || (_NSIG_WORDS > 1
+	if (__get_user(set.sig[0], &frame->sc.oldmask) || (_NSIG_WORDS > 1
 		&& __copy_from_user(&set.sig[1], &frame->extramask,
 				    sizeof(frame->extramask))))
 		goto badframe;
@@ -192,33 +205,35 @@ asmlinkage int sys_sigreturn(unsigned long __unused)
 	current->blocked = set;
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
-	
+
 	if (restore_sigcontext(regs, &frame->sc, &ax))
 		goto badframe;
 	return ax;
 
 badframe:
 	if (show_unhandled_signals && printk_ratelimit()) {
-		printk("%s%s[%d] bad frame in sigreturn frame:%p ip:%lx"
-		       " sp:%lx oeax:%lx",
+		printk(KERN_INFO "%s%s[%d] bad frame in sigreturn frame:"
+			"%p ip:%lx sp:%lx oeax:%lx",
 		    task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG,
 		    current->comm, task_pid_nr(current), frame, regs->ip,
 		    regs->sp, regs->orig_ax);
 		print_vma_addr(" in ", regs->ip);
-		printk("\n");
+		printk(KERN_CONT "\n");
 	}
 
 	force_sig(SIGSEGV, current);
+
 	return 0;
-}	
+}
 
 asmlinkage int sys_rt_sigreturn(unsigned long __unused)
 {
-	struct pt_regs *regs = (struct pt_regs *) &__unused;
-	struct rt_sigframe __user *frame = (struct rt_sigframe __user *)(regs->sp - 4);
+	struct pt_regs *regs = (struct pt_regs *)&__unused;
+	struct rt_sigframe __user *frame;
+	unsigned long ax;
 	sigset_t set;
-	int ax;
 
+	frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
 	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
 		goto badframe;
 	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
@@ -229,7 +244,7 @@ asmlinkage int sys_rt_sigreturn(unsigned long __unused)
 	current->blocked = set;
 	recalc_sigpending();
 	spin_unlock_irq(&current->sighand->siglock);
-	
+
 	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
 		goto badframe;
 
@@ -241,12 +256,11 @@ asmlinkage int sys_rt_sigreturn(unsigned long __unused)
 badframe:
 	force_sig(SIGSEGV, current);
 	return 0;
-}	
+}
 
 /*
  * Set up a signal frame.
  */
-
 static int
 setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
 		 struct pt_regs *regs, unsigned long mask)
@@ -277,9 +291,9 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
 
 	tmp = save_i387(fpstate);
 	if (tmp < 0)
-	  err = 1;
+		err = 1;
 	else
-	  err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate);
+		err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate);
 
 	/* non-iBCS2 extensions.. */
 	err |= __put_user(mask, &sc->oldmask);
@@ -292,7 +306,7 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
  * Determine which stack to use..
  */
 static inline void __user *
-get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
+get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size)
 {
 	unsigned long sp;
 
@@ -310,32 +324,30 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
 	if (ka->sa.sa_flags & SA_ONSTACK) {
 		if (sas_ss_flags(sp) == 0)
 			sp = current->sas_ss_sp + current->sas_ss_size;
-	}
-
-	/* This is the legacy signal stack switching. */
-	else if ((regs->ss & 0xffff) != __USER_DS &&
-		 !(ka->sa.sa_flags & SA_RESTORER) &&
-		 ka->sa.sa_restorer) {
-		sp = (unsigned long) ka->sa.sa_restorer;
+	} else {
+		/* This is the legacy signal stack switching. */
+		if ((regs->ss & 0xffff) != __USER_DS &&
+			!(ka->sa.sa_flags & SA_RESTORER) &&
+				ka->sa.sa_restorer)
+			sp = (unsigned long) ka->sa.sa_restorer;
 	}
 
 	sp -= frame_size;
-	/* Align the stack pointer according to the i386 ABI,
-	 * i.e. so that on function entry ((sp + 4) & 15) == 0. */
+	/*
+	 * Align the stack pointer according to the i386 ABI,
+	 * i.e. so that on function entry ((sp + 4) & 15) == 0.
+	 */
 	sp = ((sp + 4) & -16ul) - 4;
+
 	return (void __user *) sp;
 }
 
-/* These symbols are defined with the addresses in the vsyscall page.
-   See vsyscall-sigreturn.S.  */
-extern void __user __kernel_sigreturn;
-extern void __user __kernel_rt_sigreturn;
-
-static int setup_frame(int sig, struct k_sigaction *ka,
-		       sigset_t *set, struct pt_regs * regs)
+static int
+setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
+	    struct pt_regs *regs)
 {
-	void __user *restorer;
 	struct sigframe __user *frame;
+	void __user *restorer;
 	int err = 0;
 	int usig;
 
@@ -365,7 +377,7 @@ static int setup_frame(int sig, struct k_sigaction *ka,
 			goto give_sigsegv;
 	}
 
-	if (current->binfmt->hasvdso)
+	if (current->mm->context.vdso)
 		restorer = VDSO32_SYMBOL(current->mm->context.vdso, sigreturn);
 	else
 		restorer = &frame->retcode;
@@ -374,9 +386,9 @@ static int setup_frame(int sig, struct k_sigaction *ka,
 
 	/* Set up to return from userspace.  */
 	err |= __put_user(restorer, &frame->pretcode);
-	 
+
 	/*
-	 * This is popl %eax ; movl $,%eax ; int $0x80
+	 * This is popl %eax ; movl $__NR_sigreturn, %eax ; int $0x80
 	 *
 	 * WE DO NOT USE IT ANY MORE! It's only left here for historical
 	 * reasons and because gdb uses it as a signature to notice
@@ -390,11 +402,11 @@ static int setup_frame(int sig, struct k_sigaction *ka,
 		goto give_sigsegv;
 
 	/* Set up registers for signal handler */
-	regs->sp = (unsigned long) frame;
-	regs->ip = (unsigned long) ka->sa.sa_handler;
-	regs->ax = (unsigned long) sig;
-	regs->dx = (unsigned long) 0;
-	regs->cx = (unsigned long) 0;
+	regs->sp = (unsigned long)frame;
+	regs->ip = (unsigned long)ka->sa.sa_handler;
+	regs->ax = (unsigned long)sig;
+	regs->dx = 0;
+	regs->cx = 0;
 
 	regs->ds = __USER_DS;
 	regs->es = __USER_DS;
@@ -407,15 +419,10 @@ static int setup_frame(int sig, struct k_sigaction *ka,
 	 * The tracer may want to single-step inside the
 	 * handler too.
 	 */
-	regs->flags &= ~(TF_MASK | X86_EFLAGS_DF);
+	regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF);
 	if (test_thread_flag(TIF_SINGLESTEP))
 		ptrace_notify(SIGTRAP);
 
-#if DEBUG_SIG
-	printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n",
-		current->comm, current->pid, frame, regs->ip, frame->pretcode);
-#endif
-
 	return 0;
 
 give_sigsegv:
@@ -424,10 +431,10 @@ give_sigsegv:
 }
 
 static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
-			   sigset_t *set, struct pt_regs * regs)
+			  sigset_t *set, struct pt_regs *regs)
 {
-	void __user *restorer;
 	struct rt_sigframe __user *frame;
+	void __user *restorer;
 	int err = 0;
 	int usig;
 
@@ -457,7 +464,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 			  &frame->uc.uc_stack.ss_flags);
 	err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
 	err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate,
-			        regs, set->sig[0]);
+				regs, set->sig[0]);
 	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
 	if (err)
 		goto give_sigsegv;
@@ -467,9 +474,9 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	if (ka->sa.sa_flags & SA_RESTORER)
 		restorer = ka->sa.sa_restorer;
 	err |= __put_user(restorer, &frame->pretcode);
-	 
+
 	/*
-	 * This is movl $,%ax ; int $0x80
+	 * This is movl $__NR_rt_sigreturn, %ax ; int $0x80
 	 *
 	 * WE DO NOT USE IT ANY MORE! It's only left here for historical
 	 * reasons and because gdb uses it as a signature to notice
@@ -483,11 +490,11 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 		goto give_sigsegv;
 
 	/* Set up registers for signal handler */
-	regs->sp = (unsigned long) frame;
-	regs->ip = (unsigned long) ka->sa.sa_handler;
-	regs->ax = (unsigned long) usig;
-	regs->dx = (unsigned long) &frame->info;
-	regs->cx = (unsigned long) &frame->uc;
+	regs->sp = (unsigned long)frame;
+	regs->ip = (unsigned long)ka->sa.sa_handler;
+	regs->ax = (unsigned long)usig;
+	regs->dx = (unsigned long)&frame->info;
+	regs->cx = (unsigned long)&frame->uc;
 
 	regs->ds = __USER_DS;
 	regs->es = __USER_DS;
@@ -500,15 +507,10 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	 * The tracer may want to single-step inside the
 	 * handler too.
 	 */
-	regs->flags &= ~(TF_MASK | X86_EFLAGS_DF);
+	regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF);
 	if (test_thread_flag(TIF_SINGLESTEP))
 		ptrace_notify(SIGTRAP);
 
-#if DEBUG_SIG
-	printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n",
-		current->comm, current->pid, frame, regs->ip, frame->pretcode);
-#endif
-
 	return 0;
 
 give_sigsegv:
@@ -517,33 +519,33 @@ give_sigsegv:
 }
 
 /*
- * OK, we're invoking a handler
- */	
-
+ * OK, we're invoking a handler:
+ */
 static int
 handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
-	      sigset_t *oldset,	struct pt_regs * regs)
+	      sigset_t *oldset, struct pt_regs *regs)
 {
 	int ret;
 
 	/* Are we from a system call? */
-	if (regs->orig_ax >= 0) {
+	if ((long)regs->orig_ax >= 0) {
 		/* If so, check system call restarting.. */
 		switch (regs->ax) {
-		        case -ERESTART_RESTARTBLOCK:
-			case -ERESTARTNOHAND:
+		case -ERESTART_RESTARTBLOCK:
+		case -ERESTARTNOHAND:
+			regs->ax = -EINTR;
+			break;
+
+		case -ERESTARTSYS:
+			if (!(ka->sa.sa_flags & SA_RESTART)) {
 				regs->ax = -EINTR;
 				break;
-
-			case -ERESTARTSYS:
-				if (!(ka->sa.sa_flags & SA_RESTART)) {
-					regs->ax = -EINTR;
-					break;
-				}
-			/* fallthrough */
-			case -ERESTARTNOINTR:
-				regs->ax = regs->orig_ax;
-				regs->ip -= 2;
+			}
+		/* fallthrough */
+		case -ERESTARTNOINTR:
+			regs->ax = regs->orig_ax;
+			regs->ip -= 2;
+			break;
 		}
 	}
 
@@ -561,16 +563,17 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	else
 		ret = setup_frame(sig, ka, oldset, regs);
 
-	if (ret == 0) {
-		spin_lock_irq(&current->sighand->siglock);
-		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
-		if (!(ka->sa.sa_flags & SA_NODEFER))
-			sigaddset(&current->blocked,sig);
-		recalc_sigpending();
-		spin_unlock_irq(&current->sighand->siglock);
-	}
+	if (ret)
+		return ret;
 
-	return ret;
+	spin_lock_irq(&current->sighand->siglock);
+	sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
+	if (!(ka->sa.sa_flags & SA_NODEFER))
+		sigaddset(&current->blocked, sig);
+	recalc_sigpending();
+	spin_unlock_irq(&current->sighand->siglock);
+
+	return 0;
 }
 
 /*
@@ -580,18 +583,17 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
  */
 static void do_signal(struct pt_regs *regs)
 {
+	struct k_sigaction ka;
 	siginfo_t info;
 	int signr;
-	struct k_sigaction ka;
 	sigset_t *oldset;
 
 	/*
-	 * We want the common case to go fast, which
-	 * is why we may in certain cases get here from
-	 * kernel mode. Just return without doing anything
- 	 * if so.  vm86 regs switched out by assembly code
- 	 * before reaching here, so testing against kernel
- 	 * CS suffices.
+	 * We want the common case to go fast, which is why we may in certain
+	 * cases get here from kernel mode. Just return without doing anything
+	 * if so.
+	 * X86_32: vm86 regs switched out by assembly code before reaching
+	 * here, so testing against kernel CS suffices.
 	 */
 	if (!user_mode(regs))
 		return;
@@ -603,29 +605,31 @@ static void do_signal(struct pt_regs *regs)
 
 	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
 	if (signr > 0) {
-		/* Re-enable any watchpoints before delivering the
+		/*
+		 * Re-enable any watchpoints before delivering the
 		 * signal to user space. The processor register will
 		 * have been cleared if the watchpoint triggered
 		 * inside the kernel.
 		 */
-		if (unlikely(current->thread.debugreg7))
+		if (current->thread.debugreg7)
 			set_debugreg(current->thread.debugreg7, 7);
 
-		/* Whee!  Actually deliver the signal.  */
+		/* Whee! Actually deliver the signal.  */
 		if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
-			/* a signal was successfully delivered; the saved
+			/*
+			 * a signal was successfully delivered; the saved
 			 * sigmask will have been stored in the signal frame,
 			 * and will be restored by sigreturn, so we can simply
-			 * clear the TIF_RESTORE_SIGMASK flag */
+			 * clear the TIF_RESTORE_SIGMASK flag
+			 */
 			if (test_thread_flag(TIF_RESTORE_SIGMASK))
 				clear_thread_flag(TIF_RESTORE_SIGMASK);
 		}
-
 		return;
 	}
 
 	/* Did we come from a system call? */
-	if (regs->orig_ax >= 0) {
+	if ((long)regs->orig_ax >= 0) {
 		/* Restart the system call - no handlers present */
 		switch (regs->ax) {
 		case -ERESTARTNOHAND:
@@ -642,8 +646,10 @@ static void do_signal(struct pt_regs *regs)
 		}
 	}
 
-	/* if there's no signal to deliver, we just put the saved sigmask
-	 * back */
+	/*
+	 * If there's no signal to deliver, we just put the saved sigmask
+	 * back.
+	 */
 	if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
 		clear_thread_flag(TIF_RESTORE_SIGMASK);
 		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
@@ -654,13 +660,12 @@ static void do_signal(struct pt_regs *regs)
  * notification of userspace execution resumption
  * - triggered by the TIF_WORK_MASK flags
  */
-__attribute__((regparm(3)))
-void do_notify_resume(struct pt_regs *regs, void *_unused,
-		      __u32 thread_info_flags)
+void
+do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 {
 	/* Pending single-step? */
 	if (thread_info_flags & _TIF_SINGLESTEP) {
-		regs->flags |= TF_MASK;
+		regs->flags |= X86_EFLAGS_TF;
 		clear_thread_flag(TIF_SINGLESTEP);
 	}
 
@@ -670,6 +675,6 @@ void do_notify_resume(struct pt_regs *regs, void *_unused,
 
 	if (thread_info_flags & _TIF_HRTICK_RESCHED)
 		hrtick_resched();
-	
+
 	clear_thread_flag(TIF_IRET);
 }
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 1c83e5124c65..827179c5b32a 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -19,17 +19,28 @@
 #include <linux/stddef.h>
 #include <linux/personality.h>
 #include <linux/compiler.h>
+#include <asm/processor.h>
 #include <asm/ucontext.h>
 #include <asm/uaccess.h>
 #include <asm/i387.h>
 #include <asm/proto.h>
 #include <asm/ia32_unistd.h>
 #include <asm/mce.h>
-
-/* #define DEBUG_SIG 1 */
+#include "sigframe.h"
 
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
 
+#define __FIX_EFLAGS	(X86_EFLAGS_AC | X86_EFLAGS_OF | \
+			 X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \
+			 X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \
+			 X86_EFLAGS_CF)
+
+#ifdef CONFIG_X86_32
+# define FIX_EFLAGS	(__FIX_EFLAGS | X86_EFLAGS_RF)
+#else
+# define FIX_EFLAGS	__FIX_EFLAGS
+#endif
+
 int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
                sigset_t *set, struct pt_regs * regs); 
 int ia32_setup_frame(int sig, struct k_sigaction *ka,
@@ -46,16 +57,9 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
 /*
  * Do a signal return; undo the signal stack.
  */
-
-struct rt_sigframe
-{
-	char __user *pretcode;
-	struct ucontext uc;
-	struct siginfo info;
-};
-
 static int
-restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, unsigned long *prax)
+restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
+		   unsigned long *pax)
 {
 	unsigned int err = 0;
 
@@ -87,7 +91,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, unsigned
 	{
 		unsigned int tmpflags;
 		err |= __get_user(tmpflags, &sc->flags);
-		regs->flags = (regs->flags & ~0x40DD5) | (tmpflags & 0x40DD5);
+		regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
 		regs->orig_ax = -1;		/* disable syscall checks */
 	}
 
@@ -108,7 +112,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, unsigned
 		}
 	}
 
-	err |= __get_user(*prax, &sc->ax);
+	err |= __get_user(*pax, &sc->ax);
 	return err;
 
 badframe:
@@ -121,13 +125,11 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
 	sigset_t set;
 	unsigned long ax;
 
-	frame = (struct rt_sigframe __user *)(regs->sp - 8);
-	if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) {
+	frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
+	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
 		goto badframe;
-	} 
-	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) { 
+	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
 		goto badframe;
-	} 
 
 	sigdelsetmask(&set, ~_BLOCKABLE);
 	spin_lock_irq(&current->sighand->siglock);
@@ -138,10 +140,6 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
 	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
 		goto badframe;
 
-#ifdef DEBUG_SIG
-	printk("%d sigreturn ip:%lx sp:%lx frame:%p ax:%lx\n",current->pid,regs->ip,regs->sp,frame,ax);
-#endif
-
 	if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT)
 		goto badframe;
 
@@ -270,10 +268,6 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	if (err)
 		goto give_sigsegv;
 
-#ifdef DEBUG_SIG
-	printk("%d old ip %lx old sp %lx old ax %lx\n", current->pid,regs->ip,regs->sp,regs->ax);
-#endif
-
 	/* Set up registers for signal handler */
 	regs->di = sig;
 	/* In case the signal handler was declared without prototypes */ 
@@ -298,10 +292,6 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF);
 	if (test_thread_flag(TIF_SINGLESTEP))
 		ptrace_notify(SIGTRAP);
-#ifdef DEBUG_SIG
-	printk("SIG deliver (%s:%d): sp=%p pc=%lx ra=%p\n",
-		current->comm, current->pid, frame, regs->ip, frame->pretcode);
-#endif
 
 	return 0;
 
@@ -345,35 +335,29 @@ static long current_syscall_ret(struct pt_regs *regs)
 
 static int
 handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
-		sigset_t *oldset, struct pt_regs *regs)
+	      sigset_t *oldset, struct pt_regs *regs)
 {
 	int ret;
 
-#ifdef DEBUG_SIG
-	printk("handle_signal pid:%d sig:%lu ip:%lx sp:%lx regs=%p\n",
-		current->pid, sig,
-		regs->ip, regs->sp, regs);
-#endif
-
 	/* Are we from a system call? */
 	if (current_syscall(regs) >= 0) {
 		/* If so, check system call restarting.. */
 		switch (current_syscall_ret(regs)) {
-		        case -ERESTART_RESTARTBLOCK:
-			case -ERESTARTNOHAND:
-				regs->ax = -EINTR;
-				break;
+		case -ERESTART_RESTARTBLOCK:
+		case -ERESTARTNOHAND:
+			regs->ax = -EINTR;
+			break;
 
-			case -ERESTARTSYS:
-				if (!(ka->sa.sa_flags & SA_RESTART)) {
-					regs->ax = -EINTR;
-					break;
-				}
-				/* fallthrough */
-			case -ERESTARTNOINTR:
-				regs->ax = regs->orig_ax;
-				regs->ip -= 2;
+		case -ERESTARTSYS:
+			if (!(ka->sa.sa_flags & SA_RESTART)) {
+				regs->ax = -EINTR;
 				break;
+			}
+		/* fallthrough */
+		case -ERESTARTNOINTR:
+			regs->ax = regs->orig_ax;
+			regs->ip -= 2;
+			break;
 		}
 	}
 
@@ -420,10 +404,11 @@ static void do_signal(struct pt_regs *regs)
 	sigset_t *oldset;
 
 	/*
-	 * We want the common case to go fast, which
-	 * is why we may in certain cases get here from
-	 * kernel mode. Just return without doing anything
+	 * We want the common case to go fast, which is why we may in certain
+	 * cases get here from kernel mode. Just return without doing anything
 	 * if so.
+	 * X86_32: vm86 regs switched out by assembly code before reaching
+	 * here, so testing against kernel CS suffices.
 	 */
 	if (!user_mode(regs))
 		return;
@@ -473,22 +458,19 @@ static void do_signal(struct pt_regs *regs)
 		}
 	}
 
-	/* if there's no signal to deliver, we just put the saved sigmask
-	   back. */
+	/*
+	 * If there's no signal to deliver, we just put the saved sigmask
+	 * back.
+	 */
 	if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
 		clear_thread_flag(TIF_RESTORE_SIGMASK);
 		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
 	}
 }
 
-void
-do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
+void do_notify_resume(struct pt_regs *regs, void *unused,
+		      __u32 thread_info_flags)
 {
-#ifdef DEBUG_SIG
-	printk("do_notify_resume flags:%x ip:%lx sp:%lx caller:%p pending:%x\n",
-	       thread_info_flags, regs->ip, regs->sp, __builtin_return_address(0),signal_pending(current));
-#endif
-	       
 	/* Pending single-step? */
 	if (thread_info_flags & _TIF_SINGLESTEP) {
 		regs->flags |= X86_EFLAGS_TF;
@@ -502,7 +484,7 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 #endif /* CONFIG_X86_MCE */
 
 	/* deal with pending signal delivery */
-	if (thread_info_flags & (_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK))
+	if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
 		do_signal(regs);
 
 	if (thread_info_flags & _TIF_HRTICK_RESCHED)
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
new file mode 100644
index 000000000000..8f75893a6467
--- /dev/null
+++ b/arch/x86/kernel/smp.c
@@ -0,0 +1,343 @@
+/*
+ *	Intel SMP support routines.
+ *
+ *	(c) 1995 Alan Cox, Building #3 <alan@redhat.com>
+ *	(c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
+ *      (c) 2002,2003 Andi Kleen, SuSE Labs.
+ *
+ *	i386 and x86_64 integration by Glauber Costa <gcosta@redhat.com>
+ *
+ *	This code is released under the GNU General Public License version 2 or
+ *	later.
+ */
+
+#include <linux/init.h>
+
+#include <linux/mm.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/kernel_stat.h>
+#include <linux/mc146818rtc.h>
+#include <linux/cache.h>
+#include <linux/interrupt.h>
+#include <linux/cpu.h>
+
+#include <asm/mtrr.h>
+#include <asm/tlbflush.h>
+#include <asm/mmu_context.h>
+#include <asm/proto.h>
+#include <mach_ipi.h>
+#include <mach_apic.h>
+/*
+ *	Some notes on x86 processor bugs affecting SMP operation:
+ *
+ *	Pentium, Pentium Pro, II, III (and all CPUs) have bugs.
+ *	The Linux implications for SMP are handled as follows:
+ *
+ *	Pentium III / [Xeon]
+ *		None of the E1AP-E3AP errata are visible to the user.
+ *
+ *	E1AP.	see PII A1AP
+ *	E2AP.	see PII A2AP
+ *	E3AP.	see PII A3AP
+ *
+ *	Pentium II / [Xeon]
+ *		None of the A1AP-A3AP errata are visible to the user.
+ *
+ *	A1AP.	see PPro 1AP
+ *	A2AP.	see PPro 2AP
+ *	A3AP.	see PPro 7AP
+ *
+ *	Pentium Pro
+ *		None of 1AP-9AP errata are visible to the normal user,
+ *	except occasional delivery of 'spurious interrupt' as trap #15.
+ *	This is very rare and a non-problem.
+ *
+ *	1AP.	Linux maps APIC as non-cacheable
+ *	2AP.	worked around in hardware
+ *	3AP.	fixed in C0 and above steppings microcode update.
+ *		Linux does not use excessive STARTUP_IPIs.
+ *	4AP.	worked around in hardware
+ *	5AP.	symmetric IO mode (normal Linux operation) not affected.
+ *		'noapic' mode has vector 0xf filled out properly.
+ *	6AP.	'noapic' mode might be affected - fixed in later steppings
+ *	7AP.	We do not assume writes to the LVT deassering IRQs
+ *	8AP.	We do not enable low power mode (deep sleep) during MP bootup
+ *	9AP.	We do not use mixed mode
+ *
+ *	Pentium
+ *		There is a marginal case where REP MOVS on 100MHz SMP
+ *	machines with B stepping processors can fail. XXX should provide
+ *	an L1cache=Writethrough or L1cache=off option.
+ *
+ *		B stepping CPUs may hang. There are hardware work arounds
+ *	for this. We warn about it in case your board doesn't have the work
+ *	arounds. Basically that's so I can tell anyone with a B stepping
+ *	CPU and SMP problems "tough".
+ *
+ *	Specific items [From Pentium Processor Specification Update]
+ *
+ *	1AP.	Linux doesn't use remote read
+ *	2AP.	Linux doesn't trust APIC errors
+ *	3AP.	We work around this
+ *	4AP.	Linux never generated 3 interrupts of the same priority
+ *		to cause a lost local interrupt.
+ *	5AP.	Remote read is never used
+ *	6AP.	not affected - worked around in hardware
+ *	7AP.	not affected - worked around in hardware
+ *	8AP.	worked around in hardware - we get explicit CS errors if not
+ *	9AP.	only 'noapic' mode affected. Might generate spurious
+ *		interrupts, we log only the first one and count the
+ *		rest silently.
+ *	10AP.	not affected - worked around in hardware
+ *	11AP.	Linux reads the APIC between writes to avoid this, as per
+ *		the documentation. Make sure you preserve this as it affects
+ *		the C stepping chips too.
+ *	12AP.	not affected - worked around in hardware
+ *	13AP.	not affected - worked around in hardware
+ *	14AP.	we always deassert INIT during bootup
+ *	15AP.	not affected - worked around in hardware
+ *	16AP.	not affected - worked around in hardware
+ *	17AP.	not affected - worked around in hardware
+ *	18AP.	not affected - worked around in hardware
+ *	19AP.	not affected - worked around in BIOS
+ *
+ *	If this sounds worrying believe me these bugs are either ___RARE___,
+ *	or are signal timing bugs worked around in hardware and there's
+ *	about nothing of note with C stepping upwards.
+ */
+
+/*
+ * this function sends a 'reschedule' IPI to another CPU.
+ * it goes straight through and wastes no time serializing
+ * anything. Worst case is that we lose a reschedule ...
+ */
+static void native_smp_send_reschedule(int cpu)
+{
+	if (unlikely(cpu_is_offline(cpu))) {
+		WARN_ON(1);
+		return;
+	}
+	send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
+}
+
+/*
+ * Structure and data for smp_call_function(). This is designed to minimise
+ * static memory requirements. It also looks cleaner.
+ */
+static DEFINE_SPINLOCK(call_lock);
+
+struct call_data_struct {
+	void (*func) (void *info);
+	void *info;
+	atomic_t started;
+	atomic_t finished;
+	int wait;
+};
+
+void lock_ipi_call_lock(void)
+{
+	spin_lock_irq(&call_lock);
+}
+
+void unlock_ipi_call_lock(void)
+{
+	spin_unlock_irq(&call_lock);
+}
+
+static struct call_data_struct *call_data;
+
+static void __smp_call_function(void (*func) (void *info), void *info,
+				int nonatomic, int wait)
+{
+	struct call_data_struct data;
+	int cpus = num_online_cpus() - 1;
+
+	if (!cpus)
+		return;
+
+	data.func = func;
+	data.info = info;
+	atomic_set(&data.started, 0);
+	data.wait = wait;
+	if (wait)
+		atomic_set(&data.finished, 0);
+
+	call_data = &data;
+	mb();
+
+	/* Send a message to all other CPUs and wait for them to respond */
+	send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+
+	/* Wait for response */
+	while (atomic_read(&data.started) != cpus)
+		cpu_relax();
+
+	if (wait)
+		while (atomic_read(&data.finished) != cpus)
+			cpu_relax();
+}
+
+
+/**
+ * smp_call_function_mask(): Run a function on a set of other CPUs.
+ * @mask: The set of cpus to run on.  Must not include the current cpu.
+ * @func: The function to run. This must be fast and non-blocking.
+ * @info: An arbitrary pointer to pass to the function.
+ * @wait: If true, wait (atomically) until function has completed on other CPUs.
+ *
+  * Returns 0 on success, else a negative status code.
+ *
+ * If @wait is true, then returns once @func has returned; otherwise
+ * it returns just before the target cpu calls @func.
+ *
+ * You must not call this function with disabled interrupts or from a
+ * hardware interrupt handler or from a bottom half handler.
+ */
+static int
+native_smp_call_function_mask(cpumask_t mask,
+			      void (*func)(void *), void *info,
+			      int wait)
+{
+	struct call_data_struct data;
+	cpumask_t allbutself;
+	int cpus;
+
+	/* Can deadlock when called with interrupts disabled */
+	WARN_ON(irqs_disabled());
+
+	/* Holding any lock stops cpus from going down. */
+	spin_lock(&call_lock);
+
+	allbutself = cpu_online_map;
+	cpu_clear(smp_processor_id(), allbutself);
+
+	cpus_and(mask, mask, allbutself);
+	cpus = cpus_weight(mask);
+
+	if (!cpus) {
+		spin_unlock(&call_lock);
+		return 0;
+	}
+
+	data.func = func;
+	data.info = info;
+	atomic_set(&data.started, 0);
+	data.wait = wait;
+	if (wait)
+		atomic_set(&data.finished, 0);
+
+	call_data = &data;
+	wmb();
+
+	/* Send a message to other CPUs */
+	if (cpus_equal(mask, allbutself))
+		send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+	else
+		send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
+
+	/* Wait for response */
+	while (atomic_read(&data.started) != cpus)
+		cpu_relax();
+
+	if (wait)
+		while (atomic_read(&data.finished) != cpus)
+			cpu_relax();
+	spin_unlock(&call_lock);
+
+	return 0;
+}
+
+static void stop_this_cpu(void *dummy)
+{
+	local_irq_disable();
+	/*
+	 * Remove this CPU:
+	 */
+	cpu_clear(smp_processor_id(), cpu_online_map);
+	disable_local_APIC();
+	if (hlt_works(smp_processor_id()))
+		for (;;) halt();
+	for (;;);
+}
+
+/*
+ * this function calls the 'stop' function on all other CPUs in the system.
+ */
+
+static void native_smp_send_stop(void)
+{
+	int nolock;
+	unsigned long flags;
+
+	if (reboot_force)
+		return;
+
+	/* Don't deadlock on the call lock in panic */
+	nolock = !spin_trylock(&call_lock);
+	local_irq_save(flags);
+	__smp_call_function(stop_this_cpu, NULL, 0, 0);
+	if (!nolock)
+		spin_unlock(&call_lock);
+	disable_local_APIC();
+	local_irq_restore(flags);
+}
+
+/*
+ * Reschedule call back. Nothing to do,
+ * all the work is done automatically when
+ * we return from the interrupt.
+ */
+void smp_reschedule_interrupt(struct pt_regs *regs)
+{
+	ack_APIC_irq();
+#ifdef CONFIG_X86_32
+	__get_cpu_var(irq_stat).irq_resched_count++;
+#else
+	add_pda(irq_resched_count, 1);
+#endif
+}
+
+void smp_call_function_interrupt(struct pt_regs *regs)
+{
+	void (*func) (void *info) = call_data->func;
+	void *info = call_data->info;
+	int wait = call_data->wait;
+
+	ack_APIC_irq();
+	/*
+	 * Notify initiating CPU that I've grabbed the data and am
+	 * about to execute the function
+	 */
+	mb();
+	atomic_inc(&call_data->started);
+	/*
+	 * At this point the info structure may be out of scope unless wait==1
+	 */
+	irq_enter();
+	(*func)(info);
+#ifdef CONFIG_X86_32
+	__get_cpu_var(irq_stat).irq_call_count++;
+#else
+	add_pda(irq_call_count, 1);
+#endif
+	irq_exit();
+
+	if (wait) {
+		mb();
+		atomic_inc(&call_data->finished);
+	}
+}
+
+struct smp_ops smp_ops = {
+	.smp_prepare_boot_cpu = native_smp_prepare_boot_cpu,
+	.smp_prepare_cpus = native_smp_prepare_cpus,
+	.cpu_up = native_cpu_up,
+	.smp_cpus_done = native_smp_cpus_done,
+
+	.smp_send_stop = native_smp_send_stop,
+	.smp_send_reschedule = native_smp_send_reschedule,
+	.smp_call_function_mask = native_smp_call_function_mask,
+};
+EXPORT_SYMBOL_GPL(smp_ops);
+
diff --git a/arch/x86/kernel/smp_32.c b/arch/x86/kernel/smp_32.c
deleted file mode 100644
index dc0cde9d16fb..000000000000
--- a/arch/x86/kernel/smp_32.c
+++ /dev/null
@@ -1,712 +0,0 @@
-/*
- *	Intel SMP support routines.
- *
- *	(c) 1995 Alan Cox, Building #3 <alan@redhat.com>
- *	(c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
- *
- *	This code is released under the GNU General Public License version 2 or
- *	later.
- */
-
-#include <linux/init.h>
-
-#include <linux/mm.h>
-#include <linux/delay.h>
-#include <linux/spinlock.h>
-#include <linux/kernel_stat.h>
-#include <linux/mc146818rtc.h>
-#include <linux/cache.h>
-#include <linux/interrupt.h>
-#include <linux/cpu.h>
-#include <linux/module.h>
-
-#include <asm/mtrr.h>
-#include <asm/tlbflush.h>
-#include <asm/mmu_context.h>
-#include <mach_apic.h>
-
-/*
- *	Some notes on x86 processor bugs affecting SMP operation:
- *
- *	Pentium, Pentium Pro, II, III (and all CPUs) have bugs.
- *	The Linux implications for SMP are handled as follows:
- *
- *	Pentium III / [Xeon]
- *		None of the E1AP-E3AP errata are visible to the user.
- *
- *	E1AP.	see PII A1AP
- *	E2AP.	see PII A2AP
- *	E3AP.	see PII A3AP
- *
- *	Pentium II / [Xeon]
- *		None of the A1AP-A3AP errata are visible to the user.
- *
- *	A1AP.	see PPro 1AP
- *	A2AP.	see PPro 2AP
- *	A3AP.	see PPro 7AP
- *
- *	Pentium Pro
- *		None of 1AP-9AP errata are visible to the normal user,
- *	except occasional delivery of 'spurious interrupt' as trap #15.
- *	This is very rare and a non-problem.
- *
- *	1AP.	Linux maps APIC as non-cacheable
- *	2AP.	worked around in hardware
- *	3AP.	fixed in C0 and above steppings microcode update.
- *		Linux does not use excessive STARTUP_IPIs.
- *	4AP.	worked around in hardware
- *	5AP.	symmetric IO mode (normal Linux operation) not affected.
- *		'noapic' mode has vector 0xf filled out properly.
- *	6AP.	'noapic' mode might be affected - fixed in later steppings
- *	7AP.	We do not assume writes to the LVT deassering IRQs
- *	8AP.	We do not enable low power mode (deep sleep) during MP bootup
- *	9AP.	We do not use mixed mode
- *
- *	Pentium
- *		There is a marginal case where REP MOVS on 100MHz SMP
- *	machines with B stepping processors can fail. XXX should provide
- *	an L1cache=Writethrough or L1cache=off option.
- *
- *		B stepping CPUs may hang. There are hardware work arounds
- *	for this. We warn about it in case your board doesn't have the work
- *	arounds. Basically that's so I can tell anyone with a B stepping
- *	CPU and SMP problems "tough".
- *
- *	Specific items [From Pentium Processor Specification Update]
- *
- *	1AP.	Linux doesn't use remote read
- *	2AP.	Linux doesn't trust APIC errors
- *	3AP.	We work around this
- *	4AP.	Linux never generated 3 interrupts of the same priority
- *		to cause a lost local interrupt.
- *	5AP.	Remote read is never used
- *	6AP.	not affected - worked around in hardware
- *	7AP.	not affected - worked around in hardware
- *	8AP.	worked around in hardware - we get explicit CS errors if not
- *	9AP.	only 'noapic' mode affected. Might generate spurious
- *		interrupts, we log only the first one and count the
- *		rest silently.
- *	10AP.	not affected - worked around in hardware
- *	11AP.	Linux reads the APIC between writes to avoid this, as per
- *		the documentation. Make sure you preserve this as it affects
- *		the C stepping chips too.
- *	12AP.	not affected - worked around in hardware
- *	13AP.	not affected - worked around in hardware
- *	14AP.	we always deassert INIT during bootup
- *	15AP.	not affected - worked around in hardware
- *	16AP.	not affected - worked around in hardware
- *	17AP.	not affected - worked around in hardware
- *	18AP.	not affected - worked around in hardware
- *	19AP.	not affected - worked around in BIOS
- *
- *	If this sounds worrying believe me these bugs are either ___RARE___,
- *	or are signal timing bugs worked around in hardware and there's
- *	about nothing of note with C stepping upwards.
- */
-
-DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0, };
-
-/*
- * the following functions deal with sending IPIs between CPUs.
- *
- * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
- */
-
-static inline int __prepare_ICR (unsigned int shortcut, int vector)
-{
-	unsigned int icr = shortcut | APIC_DEST_LOGICAL;
-
-	switch (vector) {
-	default:
-		icr |= APIC_DM_FIXED | vector;
-		break;
-	case NMI_VECTOR:
-		icr |= APIC_DM_NMI;
-		break;
-	}
-	return icr;
-}
-
-static inline int __prepare_ICR2 (unsigned int mask)
-{
-	return SET_APIC_DEST_FIELD(mask);
-}
-
-void __send_IPI_shortcut(unsigned int shortcut, int vector)
-{
-	/*
-	 * Subtle. In the case of the 'never do double writes' workaround
-	 * we have to lock out interrupts to be safe.  As we don't care
-	 * of the value read we use an atomic rmw access to avoid costly
-	 * cli/sti.  Otherwise we use an even cheaper single atomic write
-	 * to the APIC.
-	 */
-	unsigned int cfg;
-
-	/*
-	 * Wait for idle.
-	 */
-	apic_wait_icr_idle();
-
-	/*
-	 * No need to touch the target chip field
-	 */
-	cfg = __prepare_ICR(shortcut, vector);
-
-	/*
-	 * Send the IPI. The write to APIC_ICR fires this off.
-	 */
-	apic_write_around(APIC_ICR, cfg);
-}
-
-void send_IPI_self(int vector)
-{
-	__send_IPI_shortcut(APIC_DEST_SELF, vector);
-}
-
-/*
- * This is used to send an IPI with no shorthand notation (the destination is
- * specified in bits 56 to 63 of the ICR).
- */
-static inline void __send_IPI_dest_field(unsigned long mask, int vector)
-{
-	unsigned long cfg;
-
-	/*
-	 * Wait for idle.
-	 */
-	if (unlikely(vector == NMI_VECTOR))
-		safe_apic_wait_icr_idle();
-	else
-		apic_wait_icr_idle();
-		
-	/*
-	 * prepare target chip field
-	 */
-	cfg = __prepare_ICR2(mask);
-	apic_write_around(APIC_ICR2, cfg);
-		
-	/*
-	 * program the ICR 
-	 */
-	cfg = __prepare_ICR(0, vector);
-			
-	/*
-	 * Send the IPI. The write to APIC_ICR fires this off.
-	 */
-	apic_write_around(APIC_ICR, cfg);
-}
-
-/*
- * This is only used on smaller machines.
- */
-void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
-{
-	unsigned long mask = cpus_addr(cpumask)[0];
-	unsigned long flags;
-
-	local_irq_save(flags);
-	WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
-	__send_IPI_dest_field(mask, vector);
-	local_irq_restore(flags);
-}
-
-void send_IPI_mask_sequence(cpumask_t mask, int vector)
-{
-	unsigned long flags;
-	unsigned int query_cpu;
-
-	/*
-	 * Hack. The clustered APIC addressing mode doesn't allow us to send 
-	 * to an arbitrary mask, so I do a unicasts to each CPU instead. This 
-	 * should be modified to do 1 message per cluster ID - mbligh
-	 */ 
-
-	local_irq_save(flags);
-	for_each_possible_cpu(query_cpu) {
-		if (cpu_isset(query_cpu, mask)) {
-			__send_IPI_dest_field(cpu_to_logical_apicid(query_cpu),
-					      vector);
-		}
-	}
-	local_irq_restore(flags);
-}
-
-#include <mach_ipi.h> /* must come after the send_IPI functions above for inlining */
-
-/*
- *	Smarter SMP flushing macros. 
- *		c/o Linus Torvalds.
- *
- *	These mean you can really definitely utterly forget about
- *	writing to user space from interrupts. (Its not allowed anyway).
- *
- *	Optimizations Manfred Spraul <manfred@colorfullife.com>
- */
-
-static cpumask_t flush_cpumask;
-static struct mm_struct * flush_mm;
-static unsigned long flush_va;
-static DEFINE_SPINLOCK(tlbstate_lock);
-
-/*
- * We cannot call mmdrop() because we are in interrupt context,
- * instead update mm->cpu_vm_mask.
- *
- * We need to reload %cr3 since the page tables may be going
- * away from under us..
- */
-void leave_mm(int cpu)
-{
-	if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
-		BUG();
-	cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
-	load_cr3(swapper_pg_dir);
-}
-EXPORT_SYMBOL_GPL(leave_mm);
-
-/*
- *
- * The flush IPI assumes that a thread switch happens in this order:
- * [cpu0: the cpu that switches]
- * 1) switch_mm() either 1a) or 1b)
- * 1a) thread switch to a different mm
- * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
- * 	Stop ipi delivery for the old mm. This is not synchronized with
- * 	the other cpus, but smp_invalidate_interrupt ignore flush ipis
- * 	for the wrong mm, and in the worst case we perform a superfluous
- * 	tlb flush.
- * 1a2) set cpu_tlbstate to TLBSTATE_OK
- * 	Now the smp_invalidate_interrupt won't call leave_mm if cpu0
- *	was in lazy tlb mode.
- * 1a3) update cpu_tlbstate[].active_mm
- * 	Now cpu0 accepts tlb flushes for the new mm.
- * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
- * 	Now the other cpus will send tlb flush ipis.
- * 1a4) change cr3.
- * 1b) thread switch without mm change
- *	cpu_tlbstate[].active_mm is correct, cpu0 already handles
- *	flush ipis.
- * 1b1) set cpu_tlbstate to TLBSTATE_OK
- * 1b2) test_and_set the cpu bit in cpu_vm_mask.
- * 	Atomically set the bit [other cpus will start sending flush ipis],
- * 	and test the bit.
- * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
- * 2) switch %%esp, ie current
- *
- * The interrupt must handle 2 special cases:
- * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
- * - the cpu performs speculative tlb reads, i.e. even if the cpu only
- *   runs in kernel space, the cpu could load tlb entries for user space
- *   pages.
- *
- * The good news is that cpu_tlbstate is local to each cpu, no
- * write/read ordering problems.
- */
-
-/*
- * TLB flush IPI:
- *
- * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
- * 2) Leave the mm if we are in the lazy tlb mode.
- */
-
-void smp_invalidate_interrupt(struct pt_regs *regs)
-{
-	unsigned long cpu;
-
-	cpu = get_cpu();
-
-	if (!cpu_isset(cpu, flush_cpumask))
-		goto out;
-		/* 
-		 * This was a BUG() but until someone can quote me the
-		 * line from the intel manual that guarantees an IPI to
-		 * multiple CPUs is retried _only_ on the erroring CPUs
-		 * its staying as a return
-		 *
-		 * BUG();
-		 */
-		 
-	if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
-		if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
-			if (flush_va == TLB_FLUSH_ALL)
-				local_flush_tlb();
-			else
-				__flush_tlb_one(flush_va);
-		} else
-			leave_mm(cpu);
-	}
-	ack_APIC_irq();
-	smp_mb__before_clear_bit();
-	cpu_clear(cpu, flush_cpumask);
-	smp_mb__after_clear_bit();
-out:
-	put_cpu_no_resched();
-	__get_cpu_var(irq_stat).irq_tlb_count++;
-}
-
-void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
-			     unsigned long va)
-{
-	cpumask_t cpumask = *cpumaskp;
-
-	/*
-	 * A couple of (to be removed) sanity checks:
-	 *
-	 * - current CPU must not be in mask
-	 * - mask must exist :)
-	 */
-	BUG_ON(cpus_empty(cpumask));
-	BUG_ON(cpu_isset(smp_processor_id(), cpumask));
-	BUG_ON(!mm);
-
-#ifdef CONFIG_HOTPLUG_CPU
-	/* If a CPU which we ran on has gone down, OK. */
-	cpus_and(cpumask, cpumask, cpu_online_map);
-	if (unlikely(cpus_empty(cpumask)))
-		return;
-#endif
-
-	/*
-	 * i'm not happy about this global shared spinlock in the
-	 * MM hot path, but we'll see how contended it is.
-	 * AK: x86-64 has a faster method that could be ported.
-	 */
-	spin_lock(&tlbstate_lock);
-	
-	flush_mm = mm;
-	flush_va = va;
-	cpus_or(flush_cpumask, cpumask, flush_cpumask);
-	/*
-	 * We have to send the IPI only to
-	 * CPUs affected.
-	 */
-	send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
-
-	while (!cpus_empty(flush_cpumask))
-		/* nothing. lockup detection does not belong here */
-		cpu_relax();
-
-	flush_mm = NULL;
-	flush_va = 0;
-	spin_unlock(&tlbstate_lock);
-}
-	
-void flush_tlb_current_task(void)
-{
-	struct mm_struct *mm = current->mm;
-	cpumask_t cpu_mask;
-
-	preempt_disable();
-	cpu_mask = mm->cpu_vm_mask;
-	cpu_clear(smp_processor_id(), cpu_mask);
-
-	local_flush_tlb();
-	if (!cpus_empty(cpu_mask))
-		flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
-	preempt_enable();
-}
-
-void flush_tlb_mm (struct mm_struct * mm)
-{
-	cpumask_t cpu_mask;
-
-	preempt_disable();
-	cpu_mask = mm->cpu_vm_mask;
-	cpu_clear(smp_processor_id(), cpu_mask);
-
-	if (current->active_mm == mm) {
-		if (current->mm)
-			local_flush_tlb();
-		else
-			leave_mm(smp_processor_id());
-	}
-	if (!cpus_empty(cpu_mask))
-		flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
-
-	preempt_enable();
-}
-
-void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
-{
-	struct mm_struct *mm = vma->vm_mm;
-	cpumask_t cpu_mask;
-
-	preempt_disable();
-	cpu_mask = mm->cpu_vm_mask;
-	cpu_clear(smp_processor_id(), cpu_mask);
-
-	if (current->active_mm == mm) {
-		if(current->mm)
-			__flush_tlb_one(va);
-		 else
-		 	leave_mm(smp_processor_id());
-	}
-
-	if (!cpus_empty(cpu_mask))
-		flush_tlb_others(cpu_mask, mm, va);
-
-	preempt_enable();
-}
-EXPORT_SYMBOL(flush_tlb_page);
-
-static void do_flush_tlb_all(void* info)
-{
-	unsigned long cpu = smp_processor_id();
-
-	__flush_tlb_all();
-	if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY)
-		leave_mm(cpu);
-}
-
-void flush_tlb_all(void)
-{
-	on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
-}
-
-/*
- * this function sends a 'reschedule' IPI to another CPU.
- * it goes straight through and wastes no time serializing
- * anything. Worst case is that we lose a reschedule ...
- */
-static void native_smp_send_reschedule(int cpu)
-{
-	WARN_ON(cpu_is_offline(cpu));
-	send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
-}
-
-/*
- * Structure and data for smp_call_function(). This is designed to minimise
- * static memory requirements. It also looks cleaner.
- */
-static DEFINE_SPINLOCK(call_lock);
-
-struct call_data_struct {
-	void (*func) (void *info);
-	void *info;
-	atomic_t started;
-	atomic_t finished;
-	int wait;
-};
-
-void lock_ipi_call_lock(void)
-{
-	spin_lock_irq(&call_lock);
-}
-
-void unlock_ipi_call_lock(void)
-{
-	spin_unlock_irq(&call_lock);
-}
-
-static struct call_data_struct *call_data;
-
-static void __smp_call_function(void (*func) (void *info), void *info,
-				int nonatomic, int wait)
-{
-	struct call_data_struct data;
-	int cpus = num_online_cpus() - 1;
-
-	if (!cpus)
-		return;
-
-	data.func = func;
-	data.info = info;
-	atomic_set(&data.started, 0);
-	data.wait = wait;
-	if (wait)
-		atomic_set(&data.finished, 0);
-
-	call_data = &data;
-	mb();
-	
-	/* Send a message to all other CPUs and wait for them to respond */
-	send_IPI_allbutself(CALL_FUNCTION_VECTOR);
-
-	/* Wait for response */
-	while (atomic_read(&data.started) != cpus)
-		cpu_relax();
-
-	if (wait)
-		while (atomic_read(&data.finished) != cpus)
-			cpu_relax();
-}
-
-
-/**
- * smp_call_function_mask(): Run a function on a set of other CPUs.
- * @mask: The set of cpus to run on.  Must not include the current cpu.
- * @func: The function to run. This must be fast and non-blocking.
- * @info: An arbitrary pointer to pass to the function.
- * @wait: If true, wait (atomically) until function has completed on other CPUs.
- *
-  * Returns 0 on success, else a negative status code.
- *
- * If @wait is true, then returns once @func has returned; otherwise
- * it returns just before the target cpu calls @func.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- */
-static int
-native_smp_call_function_mask(cpumask_t mask,
-			      void (*func)(void *), void *info,
-			      int wait)
-{
-	struct call_data_struct data;
-	cpumask_t allbutself;
-	int cpus;
-
-	/* Can deadlock when called with interrupts disabled */
-	WARN_ON(irqs_disabled());
-
-	/* Holding any lock stops cpus from going down. */
-	spin_lock(&call_lock);
-
-	allbutself = cpu_online_map;
-	cpu_clear(smp_processor_id(), allbutself);
-
-	cpus_and(mask, mask, allbutself);
-	cpus = cpus_weight(mask);
-
-	if (!cpus) {
-		spin_unlock(&call_lock);
-		return 0;
-	}
-
-	data.func = func;
-	data.info = info;
-	atomic_set(&data.started, 0);
-	data.wait = wait;
-	if (wait)
-		atomic_set(&data.finished, 0);
-
-	call_data = &data;
-	mb();
-
-	/* Send a message to other CPUs */
-	if (cpus_equal(mask, allbutself))
-		send_IPI_allbutself(CALL_FUNCTION_VECTOR);
-	else
-		send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
-
-	/* Wait for response */
-	while (atomic_read(&data.started) != cpus)
-		cpu_relax();
-
-	if (wait)
-		while (atomic_read(&data.finished) != cpus)
-			cpu_relax();
-	spin_unlock(&call_lock);
-
-	return 0;
-}
-
-static void stop_this_cpu (void * dummy)
-{
-	local_irq_disable();
-	/*
-	 * Remove this CPU:
-	 */
-	cpu_clear(smp_processor_id(), cpu_online_map);
-	disable_local_APIC();
-	if (cpu_data(smp_processor_id()).hlt_works_ok)
-		for(;;) halt();
-	for (;;);
-}
-
-/*
- * this function calls the 'stop' function on all other CPUs in the system.
- */
-
-static void native_smp_send_stop(void)
-{
-	/* Don't deadlock on the call lock in panic */
-	int nolock = !spin_trylock(&call_lock);
-	unsigned long flags;
-
-	local_irq_save(flags);
-	__smp_call_function(stop_this_cpu, NULL, 0, 0);
-	if (!nolock)
-		spin_unlock(&call_lock);
-	disable_local_APIC();
-	local_irq_restore(flags);
-}
-
-/*
- * Reschedule call back. Nothing to do,
- * all the work is done automatically when
- * we return from the interrupt.
- */
-void smp_reschedule_interrupt(struct pt_regs *regs)
-{
-	ack_APIC_irq();
-	__get_cpu_var(irq_stat).irq_resched_count++;
-}
-
-void smp_call_function_interrupt(struct pt_regs *regs)
-{
-	void (*func) (void *info) = call_data->func;
-	void *info = call_data->info;
-	int wait = call_data->wait;
-
-	ack_APIC_irq();
-	/*
-	 * Notify initiating CPU that I've grabbed the data and am
-	 * about to execute the function
-	 */
-	mb();
-	atomic_inc(&call_data->started);
-	/*
-	 * At this point the info structure may be out of scope unless wait==1
-	 */
-	irq_enter();
-	(*func)(info);
-	__get_cpu_var(irq_stat).irq_call_count++;
-	irq_exit();
-
-	if (wait) {
-		mb();
-		atomic_inc(&call_data->finished);
-	}
-}
-
-static int convert_apicid_to_cpu(int apic_id)
-{
-	int i;
-
-	for_each_possible_cpu(i) {
-		if (per_cpu(x86_cpu_to_apicid, i) == apic_id)
-			return i;
-	}
-	return -1;
-}
-
-int safe_smp_processor_id(void)
-{
-	int apicid, cpuid;
-
-	if (!boot_cpu_has(X86_FEATURE_APIC))
-		return 0;
-
-	apicid = hard_smp_processor_id();
-	if (apicid == BAD_APICID)
-		return 0;
-
-	cpuid = convert_apicid_to_cpu(apicid);
-
-	return cpuid >= 0 ? cpuid : 0;
-}
-
-struct smp_ops smp_ops = {
-	.smp_prepare_boot_cpu = native_smp_prepare_boot_cpu,
-	.smp_prepare_cpus = native_smp_prepare_cpus,
-	.cpu_up = native_cpu_up,
-	.smp_cpus_done = native_smp_cpus_done,
-
-	.smp_send_stop = native_smp_send_stop,
-	.smp_send_reschedule = native_smp_send_reschedule,
-	.smp_call_function_mask = native_smp_call_function_mask,
-};
-EXPORT_SYMBOL_GPL(smp_ops);
diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot.c
index 579b9b740c7c..e6abe8a49b1f 100644
--- a/arch/x86/kernel/smpboot_32.c
+++ b/arch/x86/kernel/smpboot.c
@@ -3,6 +3,7 @@
  *
  *	(c) 1995 Alan Cox, Building #3 <alan@redhat.com>
  *	(c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
+ *	Copyright 2001 Andi Kleen, SuSE Labs.
  *
  *	Much of the core SMP work is based on previous work by Thomas Radke, to
  *	whom a great many thanks are extended.
@@ -29,53 +30,90 @@
  *		Ingo Molnar	:	various cleanups and rewrites
  *		Tigran Aivazian	:	fixed "0.00 in /proc/uptime on SMP" bug.
  *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs
+ *	Andi Kleen		:	Changed for SMP boot into long mode.
  *		Martin J. Bligh	: 	Added support for multi-quad systems
  *		Dave Jones	:	Report invalid combinations of Athlon CPUs.
-*		Rusty Russell	:	Hacked into shape for new "hotplug" boot process. */
+ *		Rusty Russell	:	Hacked into shape for new "hotplug" boot process.
+ *      Andi Kleen              :       Converted to new state machine.
+ *	Ashok Raj		: 	CPU hotplug support
+ *	Glauber Costa		:	i386 and x86_64 integration
+ */
 
-#include <linux/module.h>
 #include <linux/init.h>
-#include <linux/kernel.h>
-
-#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/module.h>
 #include <linux/sched.h>
-#include <linux/kernel_stat.h>
-#include <linux/bootmem.h>
-#include <linux/notifier.h>
-#include <linux/cpu.h>
 #include <linux/percpu.h>
+#include <linux/bootmem.h>
+#include <linux/err.h>
 #include <linux/nmi.h>
 
-#include <linux/delay.h>
-#include <linux/mc146818rtc.h>
-#include <asm/tlbflush.h>
+#include <asm/acpi.h>
 #include <asm/desc.h>
-#include <asm/arch_hooks.h>
 #include <asm/nmi.h>
+#include <asm/irq.h>
+#include <asm/smp.h>
+#include <asm/trampoline.h>
+#include <asm/cpu.h>
+#include <asm/numa.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+#include <asm/mtrr.h>
+#include <asm/nmi.h>
+#include <asm/vmi.h>
+#include <linux/mc146818rtc.h>
 
 #include <mach_apic.h>
 #include <mach_wakecpu.h>
 #include <smpboot_hooks.h>
-#include <asm/vmi.h>
-#include <asm/mtrr.h>
 
-/* Set if we find a B stepping CPU */
-static int __cpuinitdata smp_b_stepping;
+/*
+ * FIXME: For x86_64, those are defined in other files. But moving them here,
+ * would make the setup areas dependent on smp, which is a loss. When we
+ * integrate apic between arches, we can probably do a better job, but
+ * right now, they'll stay here -- glommer
+ */
+
+/* which logical CPU number maps to which CPU (physical APIC ID) */
+u16 x86_cpu_to_apicid_init[NR_CPUS] __initdata =
+			{ [0 ... NR_CPUS-1] = BAD_APICID };
+void *x86_cpu_to_apicid_early_ptr;
+
+u16 x86_bios_cpu_apicid_init[NR_CPUS] __initdata
+				= { [0 ... NR_CPUS-1] = BAD_APICID };
+void *x86_bios_cpu_apicid_early_ptr;
+
+#ifdef CONFIG_X86_32
+u8 apicid_2_node[MAX_APICID];
+#endif
+
+/* State of each CPU */
+DEFINE_PER_CPU(int, cpu_state) = { 0 };
+
+/* Store all idle threads, this can be reused instead of creating
+* a new thread. Also avoids complicated thread destroy functionality
+* for idle threads.
+*/
+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * Needed only for CONFIG_HOTPLUG_CPU because __cpuinitdata is
+ * removed after init for !CONFIG_HOTPLUG_CPU.
+ */
+static DEFINE_PER_CPU(struct task_struct *, idle_thread_array);
+#define get_idle_for_cpu(x)      (per_cpu(idle_thread_array, x))
+#define set_idle_for_cpu(x, p)   (per_cpu(idle_thread_array, x) = (p))
+#else
+struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
+#define get_idle_for_cpu(x)      (idle_thread_array[(x)])
+#define set_idle_for_cpu(x, p)   (idle_thread_array[(x)] = (p))
+#endif
 
 /* Number of siblings per CPU package */
 int smp_num_siblings = 1;
 EXPORT_SYMBOL(smp_num_siblings);
 
 /* Last level cache ID of each logical CPU */
-DEFINE_PER_CPU(u8, cpu_llc_id) = BAD_APICID;
-
-/* representing HT siblings of each logical CPU */
-DEFINE_PER_CPU(cpumask_t, cpu_sibling_map);
-EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
-
-/* representing HT and core siblings of each logical CPU */
-DEFINE_PER_CPU(cpumask_t, cpu_core_map);
-EXPORT_PER_CPU_SYMBOL(cpu_core_map);
+DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID;
 
 /* bitmap of online cpus */
 cpumask_t cpu_online_map __read_mostly;
@@ -85,126 +123,94 @@ cpumask_t cpu_callin_map;
 cpumask_t cpu_callout_map;
 cpumask_t cpu_possible_map;
 EXPORT_SYMBOL(cpu_possible_map);
-static cpumask_t smp_commenced_mask;
+
+/* representing HT siblings of each logical CPU */
+DEFINE_PER_CPU(cpumask_t, cpu_sibling_map);
+EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
+
+/* representing HT and core siblings of each logical CPU */
+DEFINE_PER_CPU(cpumask_t, cpu_core_map);
+EXPORT_PER_CPU_SYMBOL(cpu_core_map);
 
 /* Per CPU bogomips and other parameters */
 DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
 EXPORT_PER_CPU_SYMBOL(cpu_info);
 
-/* which logical CPU number maps to which CPU (physical APIC ID) */
-u8 x86_cpu_to_apicid_init[NR_CPUS] __initdata =
-			{ [0 ... NR_CPUS-1] = BAD_APICID };
-void *x86_cpu_to_apicid_early_ptr;
-DEFINE_PER_CPU(u8, x86_cpu_to_apicid) = BAD_APICID;
-EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
-
-u8 apicid_2_node[MAX_APICID];
+static atomic_t init_deasserted;
 
-/*
- * Trampoline 80x86 program as an array.
- */
+static int boot_cpu_logical_apicid;
 
-extern const unsigned char trampoline_data [];
-extern const unsigned char trampoline_end  [];
-static unsigned char *trampoline_base;
+/* representing cpus for which sibling maps can be computed */
+static cpumask_t cpu_sibling_setup_map;
 
-static void map_cpu_to_logical_apicid(void);
+/* Set if we find a B stepping CPU */
+int __cpuinitdata smp_b_stepping;
 
-/* State of each CPU. */
-DEFINE_PER_CPU(int, cpu_state) = { 0 };
+#if defined(CONFIG_NUMA) && defined(CONFIG_X86_32)
 
-/*
- * Currently trivial. Write the real->protected mode
- * bootstrap into the page concerned. The caller
- * has made sure it's suitably aligned.
- */
+/* which logical CPUs are on which nodes */
+cpumask_t node_to_cpumask_map[MAX_NUMNODES] __read_mostly =
+				{ [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
+EXPORT_SYMBOL(node_to_cpumask_map);
+/* which node each logical CPU is on */
+int cpu_to_node_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 };
+EXPORT_SYMBOL(cpu_to_node_map);
 
-static unsigned long __cpuinit setup_trampoline(void)
+/* set up a mapping between cpu and node. */
+static void map_cpu_to_node(int cpu, int node)
 {
-	memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data);
-	return virt_to_phys(trampoline_base);
+	printk(KERN_INFO "Mapping cpu %d to node %d\n", cpu, node);
+	cpu_set(cpu, node_to_cpumask_map[node]);
+	cpu_to_node_map[cpu] = node;
 }
 
-/*
- * We are called very early to get the low memory for the
- * SMP bootup trampoline page.
- */
-void __init smp_alloc_memory(void)
+/* undo a mapping between cpu and node. */
+static void unmap_cpu_to_node(int cpu)
 {
-	trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE);
-	/*
-	 * Has to be in very low memory so we can execute
-	 * real-mode AP code.
-	 */
-	if (__pa(trampoline_base) >= 0x9F000)
-		BUG();
+	int node;
+
+	printk(KERN_INFO "Unmapping cpu %d from all nodes\n", cpu);
+	for (node = 0; node < MAX_NUMNODES; node++)
+		cpu_clear(cpu, node_to_cpumask_map[node]);
+	cpu_to_node_map[cpu] = 0;
 }
+#else /* !(CONFIG_NUMA && CONFIG_X86_32) */
+#define map_cpu_to_node(cpu, node)	({})
+#define unmap_cpu_to_node(cpu)	({})
+#endif
 
-/*
- * The bootstrap kernel entry code has set these up. Save them for
- * a given CPU
- */
+#ifdef CONFIG_X86_32
+u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly =
+					{ [0 ... NR_CPUS-1] = BAD_APICID };
 
-void __cpuinit smp_store_cpu_info(int id)
+void map_cpu_to_logical_apicid(void)
 {
-	struct cpuinfo_x86 *c = &cpu_data(id);
-
-	*c = boot_cpu_data;
-	c->cpu_index = id;
-	if (id!=0)
-		identify_secondary_cpu(c);
-	/*
-	 * Mask B, Pentium, but not Pentium MMX
-	 */
-	if (c->x86_vendor == X86_VENDOR_INTEL &&
-	    c->x86 == 5 &&
-	    c->x86_mask >= 1 && c->x86_mask <= 4 &&
-	    c->x86_model <= 3)
-		/*
-		 * Remember we have B step Pentia with bugs
-		 */
-		smp_b_stepping = 1;
-
-	/*
-	 * Certain Athlons might work (for various values of 'work') in SMP
-	 * but they are not certified as MP capable.
-	 */
-	if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {
-
-		if (num_possible_cpus() == 1)
-			goto valid_k7;
-
-		/* Athlon 660/661 is valid. */	
-		if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1)))
-			goto valid_k7;
-
-		/* Duron 670 is valid */
-		if ((c->x86_model==7) && (c->x86_mask==0))
-			goto valid_k7;
-
-		/*
-		 * Athlon 662, Duron 671, and Athlon >model 7 have capability bit.
-		 * It's worth noting that the A5 stepping (662) of some Athlon XP's
-		 * have the MP bit set.
-		 * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for more.
-		 */
-		if (((c->x86_model==6) && (c->x86_mask>=2)) ||
-		    ((c->x86_model==7) && (c->x86_mask>=1)) ||
-		     (c->x86_model> 7))
-			if (cpu_has_mp)
-				goto valid_k7;
+	int cpu = smp_processor_id();
+	int apicid = logical_smp_processor_id();
+	int node = apicid_to_node(apicid);
 
-		/* If we get here, it's not a certified SMP capable AMD system. */
-		add_taint(TAINT_UNSAFE_SMP);
-	}
+	if (!node_online(node))
+		node = first_online_node;
 
-valid_k7:
-	;
+	cpu_2_logical_apicid[cpu] = apicid;
+	map_cpu_to_node(cpu, node);
 }
 
-static atomic_t init_deasserted;
+void unmap_cpu_to_logical_apicid(int cpu)
+{
+	cpu_2_logical_apicid[cpu] = BAD_APICID;
+	unmap_cpu_to_node(cpu);
+}
+#else
+#define unmap_cpu_to_logical_apicid(cpu) do {} while (0)
+#define map_cpu_to_logical_apicid()  do {} while (0)
+#endif
 
-static void __cpuinit smp_callin(void)
+/*
+ * Report back to the Boot Processor.
+ * Running on AP.
+ */
+void __cpuinit smp_callin(void)
 {
 	int cpuid, phys_id;
 	unsigned long timeout;
@@ -220,12 +226,11 @@ static void __cpuinit smp_callin(void)
 	/*
 	 * (This works even if the APIC is not enabled.)
 	 */
-	phys_id = GET_APIC_ID(apic_read(APIC_ID));
+	phys_id = GET_APIC_ID(read_apic_id());
 	cpuid = smp_processor_id();
 	if (cpu_isset(cpuid, cpu_callin_map)) {
-		printk("huh, phys CPU#%d, CPU#%d already present??\n",
+		panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__,
 					phys_id, cpuid);
-		BUG();
 	}
 	Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
 
@@ -247,13 +252,12 @@ static void __cpuinit smp_callin(void)
 		 */
 		if (cpu_isset(cpuid, cpu_callout_map))
 			break;
-		rep_nop();
+		cpu_relax();
 	}
 
 	if (!time_before(jiffies, timeout)) {
-		printk("BUG: CPU%d started up but did not get a callout!\n",
-			cpuid);
-		BUG();
+		panic("%s: CPU%d started up but did not get a callout!\n",
+		      __func__, cpuid);
 	}
 
 	/*
@@ -266,13 +270,19 @@ static void __cpuinit smp_callin(void)
 	Dprintk("CALLIN, before setup_local_APIC().\n");
 	smp_callin_clear_local_apic();
 	setup_local_APIC();
+	end_local_APIC_setup();
 	map_cpu_to_logical_apicid();
 
 	/*
 	 * Get our bogomips.
+	 *
+	 * Need to enable IRQs because it can take longer and then
+	 * the NMI watchdog might kill us.
 	 */
+	local_irq_enable();
 	calibrate_delay();
-	Dprintk("Stack at about %p\n",&cpuid);
+	local_irq_disable();
+	Dprintk("Stack at about %p\n", &cpuid);
 
 	/*
 	 * Save our processor parameters
@@ -285,91 +295,10 @@ static void __cpuinit smp_callin(void)
 	cpu_set(cpuid, cpu_callin_map);
 }
 
-static int cpucount;
-
-/* maps the cpu to the sched domain representing multi-core */
-cpumask_t cpu_coregroup_map(int cpu)
-{
-	struct cpuinfo_x86 *c = &cpu_data(cpu);
-	/*
-	 * For perf, we return last level cache shared map.
-	 * And for power savings, we return cpu_core_map
-	 */
-	if (sched_mc_power_savings || sched_smt_power_savings)
-		return per_cpu(cpu_core_map, cpu);
-	else
-		return c->llc_shared_map;
-}
-
-/* representing cpus for which sibling maps can be computed */
-static cpumask_t cpu_sibling_setup_map;
-
-void __cpuinit set_cpu_sibling_map(int cpu)
-{
-	int i;
-	struct cpuinfo_x86 *c = &cpu_data(cpu);
-
-	cpu_set(cpu, cpu_sibling_setup_map);
-
-	if (smp_num_siblings > 1) {
-		for_each_cpu_mask(i, cpu_sibling_setup_map) {
-			if (c->phys_proc_id == cpu_data(i).phys_proc_id &&
-			    c->cpu_core_id == cpu_data(i).cpu_core_id) {
-				cpu_set(i, per_cpu(cpu_sibling_map, cpu));
-				cpu_set(cpu, per_cpu(cpu_sibling_map, i));
-				cpu_set(i, per_cpu(cpu_core_map, cpu));
-				cpu_set(cpu, per_cpu(cpu_core_map, i));
-				cpu_set(i, c->llc_shared_map);
-				cpu_set(cpu, cpu_data(i).llc_shared_map);
-			}
-		}
-	} else {
-		cpu_set(cpu, per_cpu(cpu_sibling_map, cpu));
-	}
-
-	cpu_set(cpu, c->llc_shared_map);
-
-	if (current_cpu_data.x86_max_cores == 1) {
-		per_cpu(cpu_core_map, cpu) = per_cpu(cpu_sibling_map, cpu);
-		c->booted_cores = 1;
-		return;
-	}
-
-	for_each_cpu_mask(i, cpu_sibling_setup_map) {
-		if (per_cpu(cpu_llc_id, cpu) != BAD_APICID &&
-		    per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) {
-			cpu_set(i, c->llc_shared_map);
-			cpu_set(cpu, cpu_data(i).llc_shared_map);
-		}
-		if (c->phys_proc_id == cpu_data(i).phys_proc_id) {
-			cpu_set(i, per_cpu(cpu_core_map, cpu));
-			cpu_set(cpu, per_cpu(cpu_core_map, i));
-			/*
-			 *  Does this new cpu bringup a new core?
-			 */
-			if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) {
-				/*
-				 * for each core in package, increment
-				 * the booted_cores for this new cpu
-				 */
-				if (first_cpu(per_cpu(cpu_sibling_map, i)) == i)
-					c->booted_cores++;
-				/*
-				 * increment the core count for all
-				 * the other cpus in this package
-				 */
-				if (i != cpu)
-					cpu_data(i).booted_cores++;
-			} else if (i != cpu && !c->booted_cores)
-				c->booted_cores = cpu_data(i).booted_cores;
-		}
-	}
-}
-
 /*
  * Activate a secondary processor.
  */
-static void __cpuinit start_secondary(void *unused)
+void __cpuinit start_secondary(void *unused)
 {
 	/*
 	 * Don't put *anything* before cpu_init(), SMP booting is too
@@ -382,24 +311,19 @@ static void __cpuinit start_secondary(void *unused)
 	cpu_init();
 	preempt_disable();
 	smp_callin();
-	while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
-		rep_nop();
+
+	/* otherwise gcc will move up smp_processor_id before the cpu_init */
+	barrier();
 	/*
 	 * Check TSC synchronization with the BP:
 	 */
 	check_tsc_sync_target();
 
-	setup_secondary_clock();
 	if (nmi_watchdog == NMI_IO_APIC) {
 		disable_8259A_irq(0);
 		enable_NMI_through_LVT0();
 		enable_8259A_irq(0);
 	}
-	/*
-	 * low-memory mappings have been cleared, flush them from
-	 * the local TLBs too.
-	 */
-	local_flush_tlb();
 
 	/* This must be done before setting cpu_online_map */
 	set_cpu_sibling_map(raw_smp_processor_id());
@@ -414,17 +338,27 @@ static void __cpuinit start_secondary(void *unused)
 	 * smp_call_function().
 	 */
 	lock_ipi_call_lock();
+#ifdef CONFIG_X86_64
+	spin_lock(&vector_lock);
+
+	/* Setup the per cpu irq handling data structures */
+	__setup_vector_irq(smp_processor_id());
+	/*
+	 * Allow the master to continue.
+	 */
+	spin_unlock(&vector_lock);
+#endif
 	cpu_set(smp_processor_id(), cpu_online_map);
 	unlock_ipi_call_lock();
 	per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
 
-	/* We can take interrupts now: we're officially "up". */
-	local_irq_enable();
+	setup_secondary_clock();
 
 	wmb();
 	cpu_idle();
 }
 
+#ifdef CONFIG_X86_32
 /*
  * Everything has been set up for the secondary
  * CPUs - they just need to reload everything
@@ -442,89 +376,233 @@ void __devinit initialize_secondary(void)
 		"movl %0,%%esp\n\t"
 		"jmp *%1"
 		:
-		:"m" (current->thread.sp),"m" (current->thread.ip));
+		:"m" (current->thread.sp), "m" (current->thread.ip));
 }
+#endif
 
-/* Static state in head.S used to set up a CPU */
-extern struct {
-	void * sp;
-	unsigned short ss;
-} stack_start;
+static void __cpuinit smp_apply_quirks(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_X86_32
+	/*
+	 * Mask B, Pentium, but not Pentium MMX
+	 */
+	if (c->x86_vendor == X86_VENDOR_INTEL &&
+	    c->x86 == 5 &&
+	    c->x86_mask >= 1 && c->x86_mask <= 4 &&
+	    c->x86_model <= 3)
+		/*
+		 * Remember we have B step Pentia with bugs
+		 */
+		smp_b_stepping = 1;
 
-#ifdef CONFIG_NUMA
+	/*
+	 * Certain Athlons might work (for various values of 'work') in SMP
+	 * but they are not certified as MP capable.
+	 */
+	if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {
 
-/* which logical CPUs are on which nodes */
-cpumask_t node_to_cpumask_map[MAX_NUMNODES] __read_mostly =
-				{ [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
-EXPORT_SYMBOL(node_to_cpumask_map);
-/* which node each logical CPU is on */
-int cpu_to_node_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 };
-EXPORT_SYMBOL(cpu_to_node_map);
+		if (num_possible_cpus() == 1)
+			goto valid_k7;
 
-/* set up a mapping between cpu and node. */
-static inline void map_cpu_to_node(int cpu, int node)
-{
-	printk("Mapping cpu %d to node %d\n", cpu, node);
-	cpu_set(cpu, node_to_cpumask_map[node]);
-	cpu_to_node_map[cpu] = node;
+		/* Athlon 660/661 is valid. */
+		if ((c->x86_model == 6) && ((c->x86_mask == 0) ||
+		    (c->x86_mask == 1)))
+			goto valid_k7;
+
+		/* Duron 670 is valid */
+		if ((c->x86_model == 7) && (c->x86_mask == 0))
+			goto valid_k7;
+
+		/*
+		 * Athlon 662, Duron 671, and Athlon >model 7 have capability
+		 * bit. It's worth noting that the A5 stepping (662) of some
+		 * Athlon XP's have the MP bit set.
+		 * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for
+		 * more.
+		 */
+		if (((c->x86_model == 6) && (c->x86_mask >= 2)) ||
+		    ((c->x86_model == 7) && (c->x86_mask >= 1)) ||
+		     (c->x86_model > 7))
+			if (cpu_has_mp)
+				goto valid_k7;
+
+		/* If we get here, not a certified SMP capable AMD system. */
+		add_taint(TAINT_UNSAFE_SMP);
+	}
+
+valid_k7:
+	;
+#endif
 }
 
-/* undo a mapping between cpu and node. */
-static inline void unmap_cpu_to_node(int cpu)
+void __cpuinit smp_checks(void)
 {
-	int node;
+	if (smp_b_stepping)
+		printk(KERN_WARNING "WARNING: SMP operation may be unreliable"
+				    "with B stepping processors.\n");
 
-	printk("Unmapping cpu %d from all nodes\n", cpu);
-	for (node = 0; node < MAX_NUMNODES; node ++)
-		cpu_clear(cpu, node_to_cpumask_map[node]);
-	cpu_to_node_map[cpu] = 0;
+	/*
+	 * Don't taint if we are running SMP kernel on a single non-MP
+	 * approved Athlon
+	 */
+	if (tainted & TAINT_UNSAFE_SMP) {
+		if (num_online_cpus())
+			printk(KERN_INFO "WARNING: This combination of AMD"
+				"processors is not suitable for SMP.\n");
+		else
+			tainted &= ~TAINT_UNSAFE_SMP;
+	}
 }
-#else /* !CONFIG_NUMA */
 
-#define map_cpu_to_node(cpu, node)	({})
-#define unmap_cpu_to_node(cpu)	({})
+/*
+ * The bootstrap kernel entry code has set these up. Save them for
+ * a given CPU
+ */
 
-#endif /* CONFIG_NUMA */
+void __cpuinit smp_store_cpu_info(int id)
+{
+	struct cpuinfo_x86 *c = &cpu_data(id);
+
+	*c = boot_cpu_data;
+	c->cpu_index = id;
+	if (id != 0)
+		identify_secondary_cpu(c);
+	smp_apply_quirks(c);
+}
 
-u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID };
 
-static void map_cpu_to_logical_apicid(void)
+void __cpuinit set_cpu_sibling_map(int cpu)
 {
-	int cpu = smp_processor_id();
-	int apicid = logical_smp_processor_id();
-	int node = apicid_to_node(apicid);
+	int i;
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
-	if (!node_online(node))
-		node = first_online_node;
+	cpu_set(cpu, cpu_sibling_setup_map);
 
-	cpu_2_logical_apicid[cpu] = apicid;
-	map_cpu_to_node(cpu, node);
+	if (smp_num_siblings > 1) {
+		for_each_cpu_mask(i, cpu_sibling_setup_map) {
+			if (c->phys_proc_id == cpu_data(i).phys_proc_id &&
+			    c->cpu_core_id == cpu_data(i).cpu_core_id) {
+				cpu_set(i, per_cpu(cpu_sibling_map, cpu));
+				cpu_set(cpu, per_cpu(cpu_sibling_map, i));
+				cpu_set(i, per_cpu(cpu_core_map, cpu));
+				cpu_set(cpu, per_cpu(cpu_core_map, i));
+				cpu_set(i, c->llc_shared_map);
+				cpu_set(cpu, cpu_data(i).llc_shared_map);
+			}
+		}
+	} else {
+		cpu_set(cpu, per_cpu(cpu_sibling_map, cpu));
+	}
+
+	cpu_set(cpu, c->llc_shared_map);
+
+	if (current_cpu_data.x86_max_cores == 1) {
+		per_cpu(cpu_core_map, cpu) = per_cpu(cpu_sibling_map, cpu);
+		c->booted_cores = 1;
+		return;
+	}
+
+	for_each_cpu_mask(i, cpu_sibling_setup_map) {
+		if (per_cpu(cpu_llc_id, cpu) != BAD_APICID &&
+		    per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) {
+			cpu_set(i, c->llc_shared_map);
+			cpu_set(cpu, cpu_data(i).llc_shared_map);
+		}
+		if (c->phys_proc_id == cpu_data(i).phys_proc_id) {
+			cpu_set(i, per_cpu(cpu_core_map, cpu));
+			cpu_set(cpu, per_cpu(cpu_core_map, i));
+			/*
+			 *  Does this new cpu bringup a new core?
+			 */
+			if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) {
+				/*
+				 * for each core in package, increment
+				 * the booted_cores for this new cpu
+				 */
+				if (first_cpu(per_cpu(cpu_sibling_map, i)) == i)
+					c->booted_cores++;
+				/*
+				 * increment the core count for all
+				 * the other cpus in this package
+				 */
+				if (i != cpu)
+					cpu_data(i).booted_cores++;
+			} else if (i != cpu && !c->booted_cores)
+				c->booted_cores = cpu_data(i).booted_cores;
+		}
+	}
 }
 
-static void unmap_cpu_to_logical_apicid(int cpu)
+/* maps the cpu to the sched domain representing multi-core */
+cpumask_t cpu_coregroup_map(int cpu)
 {
-	cpu_2_logical_apicid[cpu] = BAD_APICID;
-	unmap_cpu_to_node(cpu);
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
+	/*
+	 * For perf, we return last level cache shared map.
+	 * And for power savings, we return cpu_core_map
+	 */
+	if (sched_mc_power_savings || sched_smt_power_savings)
+		return per_cpu(cpu_core_map, cpu);
+	else
+		return c->llc_shared_map;
+}
+
+#ifdef CONFIG_X86_32
+/*
+ * We are called very early to get the low memory for the
+ * SMP bootup trampoline page.
+ */
+void __init smp_alloc_memory(void)
+{
+	trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE);
+	/*
+	 * Has to be in very low memory so we can execute
+	 * real-mode AP code.
+	 */
+	if (__pa(trampoline_base) >= 0x9F000)
+		BUG();
+}
+#endif
+
+void impress_friends(void)
+{
+	int cpu;
+	unsigned long bogosum = 0;
+	/*
+	 * Allow the user to impress friends.
+	 */
+	Dprintk("Before bogomips.\n");
+	for_each_possible_cpu(cpu)
+		if (cpu_isset(cpu, cpu_callout_map))
+			bogosum += cpu_data(cpu).loops_per_jiffy;
+	printk(KERN_INFO
+		"Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
+		num_online_cpus(),
+		bogosum/(500000/HZ),
+		(bogosum/(5000/HZ))%100);
+
+	Dprintk("Before bogocount - setting activated=1.\n");
 }
 
 static inline void __inquire_remote_apic(int apicid)
 {
-	int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
+	unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
 	char *names[] = { "ID", "VERSION", "SPIV" };
 	int timeout;
-	unsigned long status;
+	u32 status;
 
-	printk("Inquiring remote APIC #%d...\n", apicid);
+	printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid);
 
 	for (i = 0; i < ARRAY_SIZE(regs); i++) {
-		printk("... APIC #%d %s: ", apicid, names[i]);
+		printk(KERN_INFO "... APIC #%d %s: ", apicid, names[i]);
 
 		/*
 		 * Wait for idle.
 		 */
 		status = safe_apic_wait_icr_idle();
 		if (status)
-			printk("a previous APIC delivery may have failed\n");
+			printk(KERN_CONT
+			       "a previous APIC delivery may have failed\n");
 
 		apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
 		apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
@@ -538,16 +616,16 @@ static inline void __inquire_remote_apic(int apicid)
 		switch (status) {
 		case APIC_ICR_RR_VALID:
 			status = apic_read(APIC_RRR);
-			printk("%lx\n", status);
+			printk(KERN_CONT "%08x\n", status);
 			break;
 		default:
-			printk("failed\n");
+			printk(KERN_CONT "failed\n");
 		}
 	}
 }
 
 #ifdef WAKE_SECONDARY_VIA_NMI
-/* 
+/*
  * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal
  * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
  * won't ... remember to clear down the APIC, etc later.
@@ -584,9 +662,9 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
 	Dprintk("NMI sent.\n");
 
 	if (send_status)
-		printk("APIC never delivered???\n");
+		printk(KERN_ERR "APIC never delivered???\n");
 	if (accept_status)
-		printk("APIC delivery error (%lx).\n", accept_status);
+		printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status);
 
 	return (send_status | accept_status);
 }
@@ -637,6 +715,7 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
 	Dprintk("Waiting for send to finish...\n");
 	send_status = safe_apic_wait_icr_idle();
 
+	mb();
 	atomic_set(&init_deasserted, 1);
 
 	/*
@@ -655,7 +734,11 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
 	 * target processor state.
 	 */
 	startup_ipi_hook(phys_apicid, (unsigned long) start_secondary,
-		         (unsigned long) stack_start.sp);
+#ifdef CONFIG_X86_64
+			 (unsigned long)init_rsp);
+#else
+			 (unsigned long)stack_start.sp);
+#endif
 
 	/*
 	 * Run STARTUP IPI loop.
@@ -665,7 +748,7 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
 	maxlvt = lapic_get_maxlvt();
 
 	for (j = 1; j <= num_starts; j++) {
-		Dprintk("Sending STARTUP #%d.\n",j);
+		Dprintk("Sending STARTUP #%d.\n", j);
 		apic_read_around(APIC_SPIV);
 		apic_write(APIC_ESR, 0);
 		apic_read(APIC_ESR);
@@ -711,49 +794,29 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
 	Dprintk("After Startup.\n");
 
 	if (send_status)
-		printk("APIC never delivered???\n");
+		printk(KERN_ERR "APIC never delivered???\n");
 	if (accept_status)
-		printk("APIC delivery error (%lx).\n", accept_status);
+		printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status);
 
 	return (send_status | accept_status);
 }
 #endif	/* WAKE_SECONDARY_VIA_INIT */
 
-extern cpumask_t cpu_initialized;
-static inline int alloc_cpu_id(void)
-{
-	cpumask_t	tmp_map;
+struct create_idle {
+	struct work_struct work;
+	struct task_struct *idle;
+	struct completion done;
 	int cpu;
-	cpus_complement(tmp_map, cpu_present_map);
-	cpu = first_cpu(tmp_map);
-	if (cpu >= NR_CPUS)
-		return -ENODEV;
-	return cpu;
-}
+};
 
-#ifdef CONFIG_HOTPLUG_CPU
-static struct task_struct * __cpuinitdata cpu_idle_tasks[NR_CPUS];
-static inline struct task_struct * __cpuinit alloc_idle_task(int cpu)
+static void __cpuinit do_fork_idle(struct work_struct *work)
 {
-	struct task_struct *idle;
+	struct create_idle *c_idle =
+		container_of(work, struct create_idle, work);
 
-	if ((idle = cpu_idle_tasks[cpu]) != NULL) {
-		/* initialize thread_struct.  we really want to avoid destroy
-		 * idle tread
-		 */
-		idle->thread.sp = (unsigned long)task_pt_regs(idle);
-		init_idle(idle, cpu);
-		return idle;
-	}
-	idle = fork_idle(cpu);
-
-	if (!IS_ERR(idle))
-		cpu_idle_tasks[cpu] = idle;
-	return idle;
+	c_idle->idle = fork_idle(c_idle->cpu);
+	complete(&c_idle->done);
 }
-#else
-#define alloc_idle_task(cpu) fork_idle(cpu)
-#endif
 
 static int __cpuinit do_boot_cpu(int apicid, int cpu)
 /*
@@ -762,45 +825,92 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
  * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu.
  */
 {
-	struct task_struct *idle;
-	unsigned long boot_error;
+	unsigned long boot_error = 0;
 	int timeout;
-	unsigned long start_eip;
+	unsigned long start_ip;
 	unsigned short nmi_high = 0, nmi_low = 0;
+	struct create_idle c_idle = {
+		.cpu = cpu,
+		.done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
+	};
+	INIT_WORK(&c_idle.work, do_fork_idle);
+#ifdef CONFIG_X86_64
+	/* allocate memory for gdts of secondary cpus. Hotplug is considered */
+	if (!cpu_gdt_descr[cpu].address &&
+		!(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) {
+		printk(KERN_ERR "Failed to allocate GDT for CPU %d\n", cpu);
+		return -1;
+	}
 
-	/*
-	 * Save current MTRR state in case it was changed since early boot
-	 * (e.g. by the ACPI SMI) to initialize new CPUs with MTRRs in sync:
-	 */
-	mtrr_save_state();
+	/* Allocate node local memory for AP pdas */
+	if (cpu_pda(cpu) == &boot_cpu_pda[cpu]) {
+		struct x8664_pda *newpda, *pda;
+		int node = cpu_to_node(cpu);
+		pda = cpu_pda(cpu);
+		newpda = kmalloc_node(sizeof(struct x8664_pda), GFP_ATOMIC,
+				      node);
+		if (newpda) {
+			memcpy(newpda, pda, sizeof(struct x8664_pda));
+			cpu_pda(cpu) = newpda;
+		} else
+			printk(KERN_ERR
+		"Could not allocate node local PDA for CPU %d on node %d\n",
+				cpu, node);
+	}
+#endif
+
+	alternatives_smp_switch(1);
+
+	c_idle.idle = get_idle_for_cpu(cpu);
 
 	/*
 	 * We can't use kernel_thread since we must avoid to
 	 * reschedule the child.
 	 */
-	idle = alloc_idle_task(cpu);
-	if (IS_ERR(idle))
-		panic("failed fork for CPU %d", cpu);
+	if (c_idle.idle) {
+		c_idle.idle->thread.sp = (unsigned long) (((struct pt_regs *)
+			(THREAD_SIZE +  task_stack_page(c_idle.idle))) - 1);
+		init_idle(c_idle.idle, cpu);
+		goto do_rest;
+	}
 
+	if (!keventd_up() || current_is_keventd())
+		c_idle.work.func(&c_idle.work);
+	else {
+		schedule_work(&c_idle.work);
+		wait_for_completion(&c_idle.done);
+	}
+
+	if (IS_ERR(c_idle.idle)) {
+		printk("failed fork for CPU %d\n", cpu);
+		return PTR_ERR(c_idle.idle);
+	}
+
+	set_idle_for_cpu(cpu, c_idle.idle);
+do_rest:
+#ifdef CONFIG_X86_32
+	per_cpu(current_task, cpu) = c_idle.idle;
 	init_gdt(cpu);
- 	per_cpu(current_task, cpu) = idle;
 	early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
+	c_idle.idle->thread.ip = (unsigned long) start_secondary;
+	/* Stack for startup_32 can be just as for start_secondary onwards */
+	stack_start.sp = (void *) c_idle.idle->thread.sp;
+	irq_ctx_init(cpu);
+#else
+	cpu_pda(cpu)->pcurrent = c_idle.idle;
+	init_rsp = c_idle.idle->thread.sp;
+	load_sp0(&per_cpu(init_tss, cpu), &c_idle.idle->thread);
+	initial_code = (unsigned long)start_secondary;
+	clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
+#endif
 
-	idle->thread.ip = (unsigned long) start_secondary;
-	/* start_eip had better be page-aligned! */
-	start_eip = setup_trampoline();
-
-	++cpucount;
-	alternatives_smp_switch(1);
+	/* start_ip had better be page-aligned! */
+	start_ip = setup_trampoline();
 
 	/* So we see what's up   */
-	printk("Booting processor %d/%d ip %lx\n", cpu, apicid, start_eip);
-	/* Stack for startup_32 can be just as for start_secondary onwards */
-	stack_start.sp = (void *) idle->thread.sp;
+	printk(KERN_INFO "Booting processor %d/%d ip %lx\n",
+			  cpu, apicid, start_ip);
 
-	irq_ctx_init(cpu);
-
-	per_cpu(x86_cpu_to_apicid, cpu) = apicid;
 	/*
 	 * This grunge runs the startup process for
 	 * the targeted processor.
@@ -812,12 +922,17 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
 
 	store_NMI_vector(&nmi_high, &nmi_low);
 
-	smpboot_setup_warm_reset_vector(start_eip);
+	smpboot_setup_warm_reset_vector(start_ip);
+	/*
+	 * Be paranoid about clearing APIC errors.
+	 */
+	apic_write(APIC_ESR, 0);
+	apic_read(APIC_ESR);
 
 	/*
 	 * Starting actual IPI sequence...
 	 */
-	boot_error = wakeup_secondary_cpu(apicid, start_eip);
+	boot_error = wakeup_secondary_cpu(apicid, start_ip);
 
 	if (!boot_error) {
 		/*
@@ -839,18 +954,18 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
 		if (cpu_isset(cpu, cpu_callin_map)) {
 			/* number CPUs logically, starting from 1 (BSP is 0) */
 			Dprintk("OK.\n");
-			printk("CPU%d: ", cpu);
+			printk(KERN_INFO "CPU%d: ", cpu);
 			print_cpu_info(&cpu_data(cpu));
 			Dprintk("CPU has booted.\n");
 		} else {
-			boot_error= 1;
+			boot_error = 1;
 			if (*((volatile unsigned char *)trampoline_base)
 					== 0xA5)
 				/* trampoline started but...? */
-				printk("Stuck ??\n");
+				printk(KERN_ERR "Stuck ??\n");
 			else
 				/* trampoline code not run */
-				printk("Not responding.\n");
+				printk(KERN_ERR "Not responding.\n");
 			inquire_remote_apic(apicid);
 		}
 	}
@@ -858,156 +973,159 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
 	if (boot_error) {
 		/* Try to put things back the way they were before ... */
 		unmap_cpu_to_logical_apicid(cpu);
-		cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
+#ifdef CONFIG_X86_64
+		clear_node_cpumask(cpu); /* was set by numa_add_cpu */
+#endif
+		cpu_clear(cpu, cpu_callout_map); /* was set by do_boot_cpu() */
 		cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
-		cpucount--;
-	} else {
-		per_cpu(x86_cpu_to_apicid, cpu) = apicid;
-		cpu_set(cpu, cpu_present_map);
+		cpu_clear(cpu, cpu_possible_map);
+		cpu_clear(cpu, cpu_present_map);
+		per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID;
 	}
 
 	/* mark "stuck" area as not stuck */
 	*((volatile unsigned long *)trampoline_base) = 0;
 
+	/*
+	 * Cleanup possible dangling ends...
+	 */
+	smpboot_restore_warm_reset_vector();
+
 	return boot_error;
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
-void cpu_exit_clear(void)
+int __cpuinit native_cpu_up(unsigned int cpu)
 {
-	int cpu = raw_smp_processor_id();
-
-	idle_task_exit();
-
-	cpucount --;
-	cpu_uninit();
-	irq_ctx_exit(cpu);
-
-	cpu_clear(cpu, cpu_callout_map);
-	cpu_clear(cpu, cpu_callin_map);
+	int apicid = cpu_present_to_apicid(cpu);
+	unsigned long flags;
+	int err;
 
-	cpu_clear(cpu, smp_commenced_mask);
-	unmap_cpu_to_logical_apicid(cpu);
-}
+	WARN_ON(irqs_disabled());
 
-struct warm_boot_cpu_info {
-	struct completion *complete;
-	struct work_struct task;
-	int apicid;
-	int cpu;
-};
+	Dprintk("++++++++++++++++++++=_---CPU UP  %u\n", cpu);
 
-static void __cpuinit do_warm_boot_cpu(struct work_struct *work)
-{
-	struct warm_boot_cpu_info *info =
-		container_of(work, struct warm_boot_cpu_info, task);
-	do_boot_cpu(info->apicid, info->cpu);
-	complete(info->complete);
-}
+	if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid ||
+	    !physid_isset(apicid, phys_cpu_present_map)) {
+		printk(KERN_ERR "%s: bad cpu %d\n", __func__, cpu);
+		return -EINVAL;
+	}
 
-static int __cpuinit __smp_prepare_cpu(int cpu)
-{
-	DECLARE_COMPLETION_ONSTACK(done);
-	struct warm_boot_cpu_info info;
-	int	apicid, ret;
-
-	apicid = per_cpu(x86_cpu_to_apicid, cpu);
-	if (apicid == BAD_APICID) {
-		ret = -ENODEV;
-		goto exit;
+	/*
+	 * Already booted CPU?
+	 */
+	if (cpu_isset(cpu, cpu_callin_map)) {
+		Dprintk("do_boot_cpu %d Already started\n", cpu);
+		return -ENOSYS;
 	}
 
-	info.complete = &done;
-	info.apicid = apicid;
-	info.cpu = cpu;
-	INIT_WORK(&info.task, do_warm_boot_cpu);
+	/*
+	 * Save current MTRR state in case it was changed since early boot
+	 * (e.g. by the ACPI SMI) to initialize new CPUs with MTRRs in sync:
+	 */
+	mtrr_save_state();
+
+	per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
 
+#ifdef CONFIG_X86_32
 	/* init low mem mapping */
 	clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
 			min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS));
 	flush_tlb_all();
-	schedule_work(&info.task);
-	wait_for_completion(&done);
-
-	zap_low_mappings();
-	ret = 0;
-exit:
-	return ret;
-}
-#endif
-
-/*
- * Cycle through the processors sending APIC IPIs to boot each.
- */
-
-static int boot_cpu_logical_apicid;
-/* Where the IO area was mapped on multiquad, always 0 otherwise */
-void *xquad_portio;
-#ifdef CONFIG_X86_NUMAQ
-EXPORT_SYMBOL(xquad_portio);
 #endif
 
-static void __init smp_boot_cpus(unsigned int max_cpus)
-{
-	int apicid, cpu, bit, kicked;
-	unsigned long bogosum = 0;
+	err = do_boot_cpu(apicid, cpu);
+	if (err < 0) {
+		Dprintk("do_boot_cpu failed %d\n", err);
+		return err;
+	}
 
 	/*
-	 * Setup boot CPU information
+	 * Check TSC synchronization with the AP (keep irqs disabled
+	 * while doing so):
 	 */
-	smp_store_cpu_info(0); /* Final full version of the data */
-	printk("CPU%d: ", 0);
-	print_cpu_info(&cpu_data(0));
+	local_irq_save(flags);
+	check_tsc_sync_source(cpu);
+	local_irq_restore(flags);
 
-	boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID));
-	boot_cpu_logical_apicid = logical_smp_processor_id();
-	per_cpu(x86_cpu_to_apicid, 0) = boot_cpu_physical_apicid;
+	while (!cpu_isset(cpu, cpu_online_map)) {
+		cpu_relax();
+		touch_nmi_watchdog();
+	}
 
-	current_thread_info()->cpu = 0;
+	return 0;
+}
 
-	set_cpu_sibling_map(0);
+/*
+ * Fall back to non SMP mode after errors.
+ *
+ * RED-PEN audit/test this more. I bet there is more state messed up here.
+ */
+static __init void disable_smp(void)
+{
+	cpu_present_map = cpumask_of_cpu(0);
+	cpu_possible_map = cpumask_of_cpu(0);
+#ifdef CONFIG_X86_32
+	smpboot_clear_io_apic_irqs();
+#endif
+	if (smp_found_config)
+		phys_cpu_present_map =
+				physid_mask_of_physid(boot_cpu_physical_apicid);
+	else
+		phys_cpu_present_map = physid_mask_of_physid(0);
+	map_cpu_to_logical_apicid();
+	cpu_set(0, per_cpu(cpu_sibling_map, 0));
+	cpu_set(0, per_cpu(cpu_core_map, 0));
+}
+
+/*
+ * Various sanity checks.
+ */
+static int __init smp_sanity_check(unsigned max_cpus)
+{
+	preempt_disable();
+	if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
+		printk(KERN_WARNING "weird, boot CPU (#%d) not listed"
+				    "by the BIOS.\n", hard_smp_processor_id());
+		physid_set(hard_smp_processor_id(), phys_cpu_present_map);
+	}
 
 	/*
 	 * If we couldn't find an SMP configuration at boot time,
 	 * get out of here now!
 	 */
 	if (!smp_found_config && !acpi_lapic) {
+		preempt_enable();
 		printk(KERN_NOTICE "SMP motherboard not detected.\n");
-		smpboot_clear_io_apic_irqs();
-		phys_cpu_present_map = physid_mask_of_physid(0);
+		disable_smp();
 		if (APIC_init_uniprocessor())
 			printk(KERN_NOTICE "Local APIC not detected."
 					   " Using dummy APIC emulation.\n");
-		map_cpu_to_logical_apicid();
-		cpu_set(0, per_cpu(cpu_sibling_map, 0));
-		cpu_set(0, per_cpu(cpu_core_map, 0));
-		return;
+		return -1;
 	}
 
 	/*
 	 * Should not be necessary because the MP table should list the boot
 	 * CPU too, but we do it for the sake of robustness anyway.
-	 * Makes no sense to do this check in clustered apic mode, so skip it
 	 */
 	if (!check_phys_apicid_present(boot_cpu_physical_apicid)) {
-		printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
-				boot_cpu_physical_apicid);
+		printk(KERN_NOTICE
+			"weird, boot CPU (#%d) not listed by the BIOS.\n",
+			boot_cpu_physical_apicid);
 		physid_set(hard_smp_processor_id(), phys_cpu_present_map);
 	}
+	preempt_enable();
 
 	/*
 	 * If we couldn't find a local APIC, then get out of here now!
 	 */
-	if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) {
+	if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) &&
+	    !cpu_has_apic) {
 		printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
 			boot_cpu_physical_apicid);
-		printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
-		smpboot_clear_io_apic_irqs();
-		phys_cpu_present_map = physid_mask_of_physid(0);
-		map_cpu_to_logical_apicid();
-		cpu_set(0, per_cpu(cpu_sibling_map, 0));
-		cpu_set(0, per_cpu(cpu_core_map, 0));
-		return;
+		printk(KERN_ERR "... forcing use of dummy APIC emulation."
+				"(tell your hw vendor)\n");
+		smpboot_clear_io_apic();
+		return -1;
 	}
 
 	verify_local_APIC();
@@ -1016,137 +1134,148 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
 	 * If SMP should be disabled, then really disable it!
 	 */
 	if (!max_cpus) {
-		smp_found_config = 0;
-		printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
-
+		printk(KERN_INFO "SMP mode deactivated,"
+				 "forcing use of dummy APIC emulation.\n");
+		smpboot_clear_io_apic();
+#ifdef CONFIG_X86_32
 		if (nmi_watchdog == NMI_LOCAL_APIC) {
-			printk(KERN_INFO "activating minimal APIC for NMI watchdog use.\n");
+			printk(KERN_INFO "activating minimal APIC for"
+					 "NMI watchdog use.\n");
 			connect_bsp_APIC();
 			setup_local_APIC();
+			end_local_APIC_setup();
 		}
-		smpboot_clear_io_apic_irqs();
-		phys_cpu_present_map = physid_mask_of_physid(0);
-		map_cpu_to_logical_apicid();
-		cpu_set(0, per_cpu(cpu_sibling_map, 0));
-		cpu_set(0, per_cpu(cpu_core_map, 0));
-		return;
+#endif
+		return -1;
 	}
 
-	connect_bsp_APIC();
-	setup_local_APIC();
-	map_cpu_to_logical_apicid();
+	return 0;
+}
 
+static void __init smp_cpu_index_default(void)
+{
+	int i;
+	struct cpuinfo_x86 *c;
 
-	setup_portio_remap();
+	for_each_cpu_mask(i, cpu_possible_map) {
+		c = &cpu_data(i);
+		/* mark all to hotplug */
+		c->cpu_index = NR_CPUS;
+	}
+}
 
+/*
+ * Prepare for SMP bootup.  The MP table or ACPI has been read
+ * earlier.  Just do some sanity checking here and enable APIC mode.
+ */
+void __init native_smp_prepare_cpus(unsigned int max_cpus)
+{
+	nmi_watchdog_default();
+	smp_cpu_index_default();
+	current_cpu_data = boot_cpu_data;
+	cpu_callin_map = cpumask_of_cpu(0);
+	mb();
 	/*
-	 * Scan the CPU present map and fire up the other CPUs via do_boot_cpu
-	 *
-	 * In clustered apic mode, phys_cpu_present_map is a constructed thus:
-	 * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the 
-	 * clustered apic ID.
+	 * Setup boot CPU information
 	 */
-	Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map));
-
-	kicked = 1;
-	for (bit = 0; kicked < NR_CPUS && bit < MAX_APICS; bit++) {
-		apicid = cpu_present_to_apicid(bit);
-		/*
-		 * Don't even attempt to start the boot CPU!
-		 */
-		if ((apicid == boot_cpu_apicid) || (apicid == BAD_APICID))
-			continue;
+	smp_store_cpu_info(0); /* Final full version of the data */
+	boot_cpu_logical_apicid = logical_smp_processor_id();
+	current_thread_info()->cpu = 0;  /* needed? */
+	set_cpu_sibling_map(0);
 
-		if (!check_apicid_present(bit))
-			continue;
-		if (max_cpus <= cpucount+1)
-			continue;
+	if (smp_sanity_check(max_cpus) < 0) {
+		printk(KERN_INFO "SMP disabled\n");
+		disable_smp();
+		return;
+	}
 
-		if (((cpu = alloc_cpu_id()) <= 0) || do_boot_cpu(apicid, cpu))
-			printk("CPU #%d not responding - cannot use it.\n",
-								apicid);
-		else
-			++kicked;
+	preempt_disable();
+	if (GET_APIC_ID(read_apic_id()) != boot_cpu_physical_apicid) {
+		panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
+		     GET_APIC_ID(read_apic_id()), boot_cpu_physical_apicid);
+		/* Or can we switch back to PIC here? */
 	}
+	preempt_enable();
 
+#ifdef CONFIG_X86_32
+	connect_bsp_APIC();
+#endif
 	/*
-	 * Cleanup possible dangling ends...
+	 * Switch from PIC to APIC mode.
 	 */
-	smpboot_restore_warm_reset_vector();
+	setup_local_APIC();
 
+#ifdef CONFIG_X86_64
 	/*
-	 * Allow the user to impress friends.
+	 * Enable IO APIC before setting up error vector
 	 */
-	Dprintk("Before bogomips.\n");
-	for_each_possible_cpu(cpu)
-		if (cpu_isset(cpu, cpu_callout_map))
-			bogosum += cpu_data(cpu).loops_per_jiffy;
-	printk(KERN_INFO
-		"Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
-		cpucount+1,
-		bogosum/(500000/HZ),
-		(bogosum/(5000/HZ))%100);
-	
-	Dprintk("Before bogocount - setting activated=1.\n");
-
-	if (smp_b_stepping)
-		printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n");
+	if (!skip_ioapic_setup && nr_ioapics)
+		enable_IO_APIC();
+#endif
+	end_local_APIC_setup();
 
-	/*
-	 * Don't taint if we are running SMP kernel on a single non-MP
-	 * approved Athlon
-	 */
-	if (tainted & TAINT_UNSAFE_SMP) {
-		if (cpucount)
-			printk (KERN_INFO "WARNING: This combination of AMD processors is not suitable for SMP.\n");
-		else
-			tainted &= ~TAINT_UNSAFE_SMP;
-	}
+	map_cpu_to_logical_apicid();
 
-	Dprintk("Boot done.\n");
+	setup_portio_remap();
 
+	smpboot_setup_io_apic();
 	/*
-	 * construct cpu_sibling_map, so that we can tell sibling CPUs
-	 * efficiently.
+	 * Set up local APIC timer on boot CPU.
 	 */
-	for_each_possible_cpu(cpu) {
-		cpus_clear(per_cpu(cpu_sibling_map, cpu));
-		cpus_clear(per_cpu(cpu_core_map, cpu));
-	}
-
-	cpu_set(0, per_cpu(cpu_sibling_map, 0));
-	cpu_set(0, per_cpu(cpu_core_map, 0));
-
-	smpboot_setup_io_apic();
 
+	printk(KERN_INFO "CPU%d: ", 0);
+	print_cpu_info(&cpu_data(0));
 	setup_boot_clock();
 }
+/*
+ * Early setup to make printk work.
+ */
+void __init native_smp_prepare_boot_cpu(void)
+{
+	int me = smp_processor_id();
+#ifdef CONFIG_X86_32
+	init_gdt(me);
+	switch_to_new_gdt();
+#endif
+	/* already set me in cpu_online_map in boot_cpu_init() */
+	cpu_set(me, cpu_callout_map);
+	per_cpu(cpu_state, me) = CPU_ONLINE;
+}
 
-/* These are wrappers to interface to the new boot process.  Someone
-   who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
-void __init native_smp_prepare_cpus(unsigned int max_cpus)
+void __init native_smp_cpus_done(unsigned int max_cpus)
 {
-	smp_commenced_mask = cpumask_of_cpu(0);
-	cpu_callin_map = cpumask_of_cpu(0);
-	mb();
-	smp_boot_cpus(max_cpus);
+	Dprintk("Boot done.\n");
+
+	impress_friends();
+	smp_checks();
+#ifdef CONFIG_X86_IO_APIC
+	setup_ioapic_dest();
+#endif
+	check_nmi_watchdog();
+#ifdef CONFIG_X86_32
+	zap_low_mappings();
+#endif
 }
 
-void __init native_smp_prepare_boot_cpu(void)
+#ifdef CONFIG_HOTPLUG_CPU
+
+#  ifdef CONFIG_X86_32
+void cpu_exit_clear(void)
 {
-	unsigned int cpu = smp_processor_id();
+	int cpu = raw_smp_processor_id();
 
-	init_gdt(cpu);
-	switch_to_new_gdt();
+	idle_task_exit();
+
+	cpu_uninit();
+	irq_ctx_exit(cpu);
+
+	cpu_clear(cpu, cpu_callout_map);
+	cpu_clear(cpu, cpu_callin_map);
 
-	cpu_set(cpu, cpu_online_map);
-	cpu_set(cpu, cpu_callout_map);
-	cpu_set(cpu, cpu_present_map);
-	cpu_set(cpu, cpu_possible_map);
-	__get_cpu_var(cpu_state) = CPU_ONLINE;
+	unmap_cpu_to_logical_apicid(cpu);
 }
+#  endif /* CONFIG_X86_32 */
 
-#ifdef CONFIG_HOTPLUG_CPU
 void remove_siblinginfo(int cpu)
 {
 	int sibling;
@@ -1160,7 +1289,7 @@ void remove_siblinginfo(int cpu)
 		if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1)
 			cpu_data(sibling).booted_cores--;
 	}
-			
+
 	for_each_cpu_mask(sibling, per_cpu(cpu_sibling_map, cpu))
 		cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling));
 	cpus_clear(per_cpu(cpu_sibling_map, cpu));
@@ -1170,35 +1299,99 @@ void remove_siblinginfo(int cpu)
 	cpu_clear(cpu, cpu_sibling_setup_map);
 }
 
+int additional_cpus __initdata = -1;
+
+static __init int setup_additional_cpus(char *s)
+{
+	return s && get_option(&s, &additional_cpus) ? 0 : -EINVAL;
+}
+early_param("additional_cpus", setup_additional_cpus);
+
+/*
+ * cpu_possible_map should be static, it cannot change as cpu's
+ * are onlined, or offlined. The reason is per-cpu data-structures
+ * are allocated by some modules at init time, and dont expect to
+ * do this dynamically on cpu arrival/departure.
+ * cpu_present_map on the other hand can change dynamically.
+ * In case when cpu_hotplug is not compiled, then we resort to current
+ * behaviour, which is cpu_possible == cpu_present.
+ * - Ashok Raj
+ *
+ * Three ways to find out the number of additional hotplug CPUs:
+ * - If the BIOS specified disabled CPUs in ACPI/mptables use that.
+ * - The user can overwrite it with additional_cpus=NUM
+ * - Otherwise don't reserve additional CPUs.
+ * We do this because additional CPUs waste a lot of memory.
+ * -AK
+ */
+__init void prefill_possible_map(void)
+{
+	int i;
+	int possible;
+
+	if (additional_cpus == -1) {
+		if (disabled_cpus > 0)
+			additional_cpus = disabled_cpus;
+		else
+			additional_cpus = 0;
+	}
+	possible = num_processors + additional_cpus;
+	if (possible > NR_CPUS)
+		possible = NR_CPUS;
+
+	printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n",
+		possible, max_t(int, possible - num_processors, 0));
+
+	for (i = 0; i < possible; i++)
+		cpu_set(i, cpu_possible_map);
+}
+
+static void __ref remove_cpu_from_maps(int cpu)
+{
+	cpu_clear(cpu, cpu_online_map);
+#ifdef CONFIG_X86_64
+	cpu_clear(cpu, cpu_callout_map);
+	cpu_clear(cpu, cpu_callin_map);
+	/* was set by cpu_init() */
+	clear_bit(cpu, (unsigned long *)&cpu_initialized);
+	clear_node_cpumask(cpu);
+#endif
+}
+
 int __cpu_disable(void)
 {
-	cpumask_t map = cpu_online_map;
 	int cpu = smp_processor_id();
 
 	/*
 	 * Perhaps use cpufreq to drop frequency, but that could go
 	 * into generic code.
- 	 *
+	 *
 	 * We won't take down the boot processor on i386 due to some
 	 * interrupts only being able to be serviced by the BSP.
 	 * Especially so if we're not using an IOAPIC	-zwane
 	 */
 	if (cpu == 0)
 		return -EBUSY;
+
 	if (nmi_watchdog == NMI_LOCAL_APIC)
 		stop_apic_nmi_watchdog(NULL);
 	clear_local_APIC();
-	/* Allow any queued timer interrupts to get serviced */
+
+	/*
+	 * HACK:
+	 * Allow any queued timer interrupts to get serviced
+	 * This is only a temporary solution until we cleanup
+	 * fixup_irqs as we do for IA64.
+	 */
 	local_irq_enable();
 	mdelay(1);
-	local_irq_disable();
 
+	local_irq_disable();
 	remove_siblinginfo(cpu);
 
-	cpu_clear(cpu, map);
-	fixup_irqs(map);
 	/* It's now safe to remove this processor from the online map */
-	cpu_clear(cpu, cpu_online_map);
+	remove_cpu_from_maps(cpu);
+	fixup_irqs(cpu_online_map);
 	return 0;
 }
 
@@ -1210,14 +1403,14 @@ void __cpu_die(unsigned int cpu)
 	for (i = 0; i < 10; i++) {
 		/* They ack this in play_dead by setting CPU_DEAD */
 		if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
-			printk ("CPU %d is now offline\n", cpu);
+			printk(KERN_INFO "CPU %d is now offline\n", cpu);
 			if (1 == num_online_cpus())
 				alternatives_smp_switch(0);
 			return;
 		}
 		msleep(100);
 	}
- 	printk(KERN_ERR "CPU %u didn't die...\n", cpu);
+	printk(KERN_ERR "CPU %u didn't die...\n", cpu);
 }
 #else /* ... !CONFIG_HOTPLUG_CPU */
 int __cpu_disable(void)
@@ -1230,81 +1423,7 @@ void __cpu_die(unsigned int cpu)
 	/* We said "no" in __cpu_disable */
 	BUG();
 }
-#endif /* CONFIG_HOTPLUG_CPU */
-
-int __cpuinit native_cpu_up(unsigned int cpu)
-{
-	unsigned long flags;
-#ifdef CONFIG_HOTPLUG_CPU
-	int ret = 0;
-
-	/*
-	 * We do warm boot only on cpus that had booted earlier
-	 * Otherwise cold boot is all handled from smp_boot_cpus().
-	 * cpu_callin_map is set during AP kickstart process. Its reset
-	 * when a cpu is taken offline from cpu_exit_clear().
-	 */
-	if (!cpu_isset(cpu, cpu_callin_map))
-		ret = __smp_prepare_cpu(cpu);
-
-	if (ret)
-		return -EIO;
-#endif
-
-	/* In case one didn't come up */
-	if (!cpu_isset(cpu, cpu_callin_map)) {
-		printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu);
-		return -EIO;
-	}
-
-	per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
-	/* Unleash the CPU! */
-	cpu_set(cpu, smp_commenced_mask);
-
-	/*
-	 * Check TSC synchronization with the AP (keep irqs disabled
-	 * while doing so):
-	 */
-	local_irq_save(flags);
-	check_tsc_sync_source(cpu);
-	local_irq_restore(flags);
-
-	while (!cpu_isset(cpu, cpu_online_map)) {
-		cpu_relax();
-		touch_nmi_watchdog();
-	}
-
-	return 0;
-}
-
-void __init native_smp_cpus_done(unsigned int max_cpus)
-{
-#ifdef CONFIG_X86_IO_APIC
-	setup_ioapic_dest();
 #endif
-	zap_low_mappings();
-}
-
-void __init smp_intr_init(void)
-{
-	/*
-	 * IRQ0 must be given a fixed assignment and initialized,
-	 * because it's used before the IO-APIC is set up.
-	 */
-	set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
-
-	/*
-	 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
-	 * IPI, driven by wakeup.
-	 */
-	set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
-
-	/* IPI for invalidation */
-	set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
-
-	/* IPI for generic function call */
-	set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
-}
 
 /*
  * If the BIOS enumerates physical processors before logical,
diff --git a/arch/x86/kernel/smpboot_64.c b/arch/x86/kernel/smpboot_64.c
deleted file mode 100644
index 0880f2c388a9..000000000000
--- a/arch/x86/kernel/smpboot_64.c
+++ /dev/null
@@ -1,1108 +0,0 @@
-/*
- *	x86 SMP booting functions
- *
- *	(c) 1995 Alan Cox, Building #3 <alan@redhat.com>
- *	(c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
- *	Copyright 2001 Andi Kleen, SuSE Labs.
- *
- *	Much of the core SMP work is based on previous work by Thomas Radke, to
- *	whom a great many thanks are extended.
- *
- *	Thanks to Intel for making available several different Pentium,
- *	Pentium Pro and Pentium-II/Xeon MP machines.
- *	Original development of Linux SMP code supported by Caldera.
- *
- *	This code is released under the GNU General Public License version 2
- *
- *	Fixes
- *		Felix Koop	:	NR_CPUS used properly
- *		Jose Renau	:	Handle single CPU case.
- *		Alan Cox	:	By repeated request 8) - Total BogoMIP report.
- *		Greg Wright	:	Fix for kernel stacks panic.
- *		Erich Boleyn	:	MP v1.4 and additional changes.
- *	Matthias Sattler	:	Changes for 2.1 kernel map.
- *	Michel Lespinasse	:	Changes for 2.1 kernel map.
- *	Michael Chastain	:	Change trampoline.S to gnu as.
- *		Alan Cox	:	Dumb bug: 'B' step PPro's are fine
- *		Ingo Molnar	:	Added APIC timers, based on code
- *					from Jose Renau
- *		Ingo Molnar	:	various cleanups and rewrites
- *		Tigran Aivazian	:	fixed "0.00 in /proc/uptime on SMP" bug.
- *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs
- *	Andi Kleen		:	Changed for SMP boot into long mode.
- *		Rusty Russell	:	Hacked into shape for new "hotplug" boot process.
- *      Andi Kleen              :       Converted to new state machine.
- *					Various cleanups.
- *					Probably mostly hotplug CPU ready now.
- *	Ashok Raj			: CPU hotplug support
- */
-
-
-#include <linux/init.h>
-
-#include <linux/mm.h>
-#include <linux/kernel_stat.h>
-#include <linux/bootmem.h>
-#include <linux/thread_info.h>
-#include <linux/module.h>
-#include <linux/delay.h>
-#include <linux/mc146818rtc.h>
-#include <linux/smp.h>
-#include <linux/kdebug.h>
-
-#include <asm/mtrr.h>
-#include <asm/pgalloc.h>
-#include <asm/desc.h>
-#include <asm/tlbflush.h>
-#include <asm/proto.h>
-#include <asm/nmi.h>
-#include <asm/irq.h>
-#include <asm/hw_irq.h>
-#include <asm/numa.h>
-
-/* Number of siblings per CPU package */
-int smp_num_siblings = 1;
-EXPORT_SYMBOL(smp_num_siblings);
-
-/* Last level cache ID of each logical CPU */
-DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID;
-
-/* Bitmask of currently online CPUs */
-cpumask_t cpu_online_map __read_mostly;
-
-EXPORT_SYMBOL(cpu_online_map);
-
-/*
- * Private maps to synchronize booting between AP and BP.
- * Probably not needed anymore, but it makes for easier debugging. -AK
- */
-cpumask_t cpu_callin_map;
-cpumask_t cpu_callout_map;
-cpumask_t cpu_possible_map;
-EXPORT_SYMBOL(cpu_possible_map);
-
-/* Per CPU bogomips and other parameters */
-DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
-EXPORT_PER_CPU_SYMBOL(cpu_info);
-
-/* Set when the idlers are all forked */
-int smp_threads_ready;
-
-/* representing HT siblings of each logical CPU */
-DEFINE_PER_CPU(cpumask_t, cpu_sibling_map);
-EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
-
-/* representing HT and core siblings of each logical CPU */
-DEFINE_PER_CPU(cpumask_t, cpu_core_map);
-EXPORT_PER_CPU_SYMBOL(cpu_core_map);
-
-/*
- * Trampoline 80x86 program as an array.
- */
-
-extern const unsigned char trampoline_data[];
-extern const unsigned char trampoline_end[];
-
-/* State of each CPU */
-DEFINE_PER_CPU(int, cpu_state) = { 0 };
-
-/*
- * Store all idle threads, this can be reused instead of creating
- * a new thread. Also avoids complicated thread destroy functionality
- * for idle threads.
- */
-#ifdef CONFIG_HOTPLUG_CPU
-/*
- * Needed only for CONFIG_HOTPLUG_CPU because __cpuinitdata is
- * removed after init for !CONFIG_HOTPLUG_CPU.
- */
-static DEFINE_PER_CPU(struct task_struct *, idle_thread_array);
-#define get_idle_for_cpu(x)     (per_cpu(idle_thread_array, x))
-#define set_idle_for_cpu(x,p)   (per_cpu(idle_thread_array, x) = (p))
-#else
-struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
-#define get_idle_for_cpu(x)     (idle_thread_array[(x)])
-#define set_idle_for_cpu(x,p)   (idle_thread_array[(x)] = (p))
-#endif
-
-
-/*
- * Currently trivial. Write the real->protected mode
- * bootstrap into the page concerned. The caller
- * has made sure it's suitably aligned.
- */
-
-static unsigned long __cpuinit setup_trampoline(void)
-{
-	void *tramp = __va(SMP_TRAMPOLINE_BASE); 
-	memcpy(tramp, trampoline_data, trampoline_end - trampoline_data);
-	return virt_to_phys(tramp);
-}
-
-/*
- * The bootstrap kernel entry code has set these up. Save them for
- * a given CPU
- */
-
-static void __cpuinit smp_store_cpu_info(int id)
-{
-	struct cpuinfo_x86 *c = &cpu_data(id);
-
-	*c = boot_cpu_data;
-	c->cpu_index = id;
-	identify_cpu(c);
-	print_cpu_info(c);
-}
-
-static atomic_t init_deasserted __cpuinitdata;
-
-/*
- * Report back to the Boot Processor.
- * Running on AP.
- */
-void __cpuinit smp_callin(void)
-{
-	int cpuid, phys_id;
-	unsigned long timeout;
-
-	/*
-	 * If waken up by an INIT in an 82489DX configuration
-	 * we may get here before an INIT-deassert IPI reaches
-	 * our local APIC.  We have to wait for the IPI or we'll
-	 * lock up on an APIC access.
-	 */
-	while (!atomic_read(&init_deasserted))
-		cpu_relax();
-
-	/*
-	 * (This works even if the APIC is not enabled.)
-	 */
-	phys_id = GET_APIC_ID(apic_read(APIC_ID));
-	cpuid = smp_processor_id();
-	if (cpu_isset(cpuid, cpu_callin_map)) {
-		panic("smp_callin: phys CPU#%d, CPU#%d already present??\n",
-					phys_id, cpuid);
-	}
-	Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id);
-
-	/*
-	 * STARTUP IPIs are fragile beasts as they might sometimes
-	 * trigger some glue motherboard logic. Complete APIC bus
-	 * silence for 1 second, this overestimates the time the
-	 * boot CPU is spending to send the up to 2 STARTUP IPIs
-	 * by a factor of two. This should be enough.
-	 */
-
-	/*
-	 * Waiting 2s total for startup (udelay is not yet working)
-	 */
-	timeout = jiffies + 2*HZ;
-	while (time_before(jiffies, timeout)) {
-		/*
-		 * Has the boot CPU finished it's STARTUP sequence?
-		 */
-		if (cpu_isset(cpuid, cpu_callout_map))
-			break;
-		cpu_relax();
-	}
-
-	if (!time_before(jiffies, timeout)) {
-		panic("smp_callin: CPU%d started up but did not get a callout!\n",
-			cpuid);
-	}
-
-	/*
-	 * the boot CPU has finished the init stage and is spinning
-	 * on callin_map until we finish. We are free to set up this
-	 * CPU, first the APIC. (this is probably redundant on most
-	 * boards)
-	 */
-
-	Dprintk("CALLIN, before setup_local_APIC().\n");
-	setup_local_APIC();
-	end_local_APIC_setup();
-
-	/*
-	 * Get our bogomips.
- 	 *
- 	 * Need to enable IRQs because it can take longer and then
-	 * the NMI watchdog might kill us.
-	 */
-	local_irq_enable();
-	calibrate_delay();
-	local_irq_disable();
-	Dprintk("Stack at about %p\n",&cpuid);
-
-	/*
-	 * Save our processor parameters
-	 */
- 	smp_store_cpu_info(cpuid);
-
-	/*
-	 * Allow the master to continue.
-	 */
-	cpu_set(cpuid, cpu_callin_map);
-}
-
-/* maps the cpu to the sched domain representing multi-core */
-cpumask_t cpu_coregroup_map(int cpu)
-{
-	struct cpuinfo_x86 *c = &cpu_data(cpu);
-	/*
-	 * For perf, we return last level cache shared map.
-	 * And for power savings, we return cpu_core_map
-	 */
-	if (sched_mc_power_savings || sched_smt_power_savings)
-		return per_cpu(cpu_core_map, cpu);
-	else
-		return c->llc_shared_map;
-}
-
-/* representing cpus for which sibling maps can be computed */
-static cpumask_t cpu_sibling_setup_map;
-
-static inline void set_cpu_sibling_map(int cpu)
-{
-	int i;
-	struct cpuinfo_x86 *c = &cpu_data(cpu);
-
-	cpu_set(cpu, cpu_sibling_setup_map);
-
-	if (smp_num_siblings > 1) {
-		for_each_cpu_mask(i, cpu_sibling_setup_map) {
-			if (c->phys_proc_id == cpu_data(i).phys_proc_id &&
-			    c->cpu_core_id == cpu_data(i).cpu_core_id) {
-				cpu_set(i, per_cpu(cpu_sibling_map, cpu));
-				cpu_set(cpu, per_cpu(cpu_sibling_map, i));
-				cpu_set(i, per_cpu(cpu_core_map, cpu));
-				cpu_set(cpu, per_cpu(cpu_core_map, i));
-				cpu_set(i, c->llc_shared_map);
-				cpu_set(cpu, cpu_data(i).llc_shared_map);
-			}
-		}
-	} else {
-		cpu_set(cpu, per_cpu(cpu_sibling_map, cpu));
-	}
-
-	cpu_set(cpu, c->llc_shared_map);
-
-	if (current_cpu_data.x86_max_cores == 1) {
-		per_cpu(cpu_core_map, cpu) = per_cpu(cpu_sibling_map, cpu);
-		c->booted_cores = 1;
-		return;
-	}
-
-	for_each_cpu_mask(i, cpu_sibling_setup_map) {
-		if (per_cpu(cpu_llc_id, cpu) != BAD_APICID &&
-		    per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) {
-			cpu_set(i, c->llc_shared_map);
-			cpu_set(cpu, cpu_data(i).llc_shared_map);
-		}
-		if (c->phys_proc_id == cpu_data(i).phys_proc_id) {
-			cpu_set(i, per_cpu(cpu_core_map, cpu));
-			cpu_set(cpu, per_cpu(cpu_core_map, i));
-			/*
-			 *  Does this new cpu bringup a new core?
-			 */
-			if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1) {
-				/*
-				 * for each core in package, increment
-				 * the booted_cores for this new cpu
-				 */
-				if (first_cpu(per_cpu(cpu_sibling_map, i)) == i)
-					c->booted_cores++;
-				/*
-				 * increment the core count for all
-				 * the other cpus in this package
-				 */
-				if (i != cpu)
-					cpu_data(i).booted_cores++;
-			} else if (i != cpu && !c->booted_cores)
-				c->booted_cores = cpu_data(i).booted_cores;
-		}
-	}
-}
-
-/*
- * Setup code on secondary processor (after comming out of the trampoline)
- */
-void __cpuinit start_secondary(void)
-{
-	/*
-	 * Dont put anything before smp_callin(), SMP
-	 * booting is too fragile that we want to limit the
-	 * things done here to the most necessary things.
-	 */
-	cpu_init();
-	preempt_disable();
-	smp_callin();
-
-	/* otherwise gcc will move up the smp_processor_id before the cpu_init */
-	barrier();
-
-	/*
-  	 * Check TSC sync first:
- 	 */
-	check_tsc_sync_target();
-
-	if (nmi_watchdog == NMI_IO_APIC) {
-		disable_8259A_irq(0);
-		enable_NMI_through_LVT0();
-		enable_8259A_irq(0);
-	}
-
-	/*
-	 * The sibling maps must be set before turing the online map on for
-	 * this cpu
-	 */
-	set_cpu_sibling_map(smp_processor_id());
-
-	/*
-	 * We need to hold call_lock, so there is no inconsistency
-	 * between the time smp_call_function() determines number of
-	 * IPI recipients, and the time when the determination is made
-	 * for which cpus receive the IPI in genapic_flat.c. Holding this
-	 * lock helps us to not include this cpu in a currently in progress
-	 * smp_call_function().
-	 */
-	lock_ipi_call_lock();
-	spin_lock(&vector_lock);
-
-	/* Setup the per cpu irq handling data structures */
-	__setup_vector_irq(smp_processor_id());
-	/*
-	 * Allow the master to continue.
-	 */
-	cpu_set(smp_processor_id(), cpu_online_map);
-	per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
-	spin_unlock(&vector_lock);
-
-	unlock_ipi_call_lock();
-
-	setup_secondary_clock();
-
-	cpu_idle();
-}
-
-extern volatile unsigned long init_rsp;
-extern void (*initial_code)(void);
-
-#ifdef APIC_DEBUG
-static void inquire_remote_apic(int apicid)
-{
-	unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
-	char *names[] = { "ID", "VERSION", "SPIV" };
-	int timeout;
-	u32 status;
-
-	printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid);
-
-	for (i = 0; i < ARRAY_SIZE(regs); i++) {
-		printk(KERN_INFO "... APIC #%d %s: ", apicid, names[i]);
-
-		/*
-		 * Wait for idle.
-		 */
-		status = safe_apic_wait_icr_idle();
-		if (status)
-			printk(KERN_CONT
-			       "a previous APIC delivery may have failed\n");
-
-		apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
-		apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]);
-
-		timeout = 0;
-		do {
-			udelay(100);
-			status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK;
-		} while (status == APIC_ICR_RR_INPROG && timeout++ < 1000);
-
-		switch (status) {
-		case APIC_ICR_RR_VALID:
-			status = apic_read(APIC_RRR);
-			printk(KERN_CONT "%08x\n", status);
-			break;
-		default:
-			printk(KERN_CONT "failed\n");
-		}
-	}
-}
-#endif
-
-/*
- * Kick the secondary to wake up.
- */
-static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int start_rip)
-{
-	unsigned long send_status, accept_status = 0;
-	int maxlvt, num_starts, j;
-
-	Dprintk("Asserting INIT.\n");
-
-	/*
-	 * Turn INIT on target chip
-	 */
-	apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
-	/*
-	 * Send IPI
-	 */
-	apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT
-				| APIC_DM_INIT);
-
-	Dprintk("Waiting for send to finish...\n");
-	send_status = safe_apic_wait_icr_idle();
-
-	mdelay(10);
-
-	Dprintk("Deasserting INIT.\n");
-
-	/* Target chip */
-	apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
-	/* Send IPI */
-	apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
-
-	Dprintk("Waiting for send to finish...\n");
-	send_status = safe_apic_wait_icr_idle();
-
-	mb();
-	atomic_set(&init_deasserted, 1);
-
-	num_starts = 2;
-
-	/*
-	 * Run STARTUP IPI loop.
-	 */
-	Dprintk("#startup loops: %d.\n", num_starts);
-
-	maxlvt = lapic_get_maxlvt();
-
-	for (j = 1; j <= num_starts; j++) {
-		Dprintk("Sending STARTUP #%d.\n",j);
-		apic_write(APIC_ESR, 0);
-		apic_read(APIC_ESR);
-		Dprintk("After apic_write.\n");
-
-		/*
-		 * STARTUP IPI
-		 */
-
-		/* Target chip */
-		apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
-
-		/* Boot on the stack */
-		/* Kick the second */
-		apic_write(APIC_ICR, APIC_DM_STARTUP | (start_rip >> 12));
-
-		/*
-		 * Give the other CPU some time to accept the IPI.
-		 */
-		udelay(300);
-
-		Dprintk("Startup point 1.\n");
-
-		Dprintk("Waiting for send to finish...\n");
-		send_status = safe_apic_wait_icr_idle();
-
-		/*
-		 * Give the other CPU some time to accept the IPI.
-		 */
-		udelay(200);
-		/*
-		 * Due to the Pentium erratum 3AP.
-		 */
-		if (maxlvt > 3) {
-			apic_write(APIC_ESR, 0);
-		}
-		accept_status = (apic_read(APIC_ESR) & 0xEF);
-		if (send_status || accept_status)
-			break;
-	}
-	Dprintk("After Startup.\n");
-
-	if (send_status)
-		printk(KERN_ERR "APIC never delivered???\n");
-	if (accept_status)
-		printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status);
-
-	return (send_status | accept_status);
-}
-
-struct create_idle {
-	struct work_struct work;
-	struct task_struct *idle;
-	struct completion done;
-	int cpu;
-};
-
-static void __cpuinit do_fork_idle(struct work_struct *work)
-{
-	struct create_idle *c_idle =
-		container_of(work, struct create_idle, work);
-
-	c_idle->idle = fork_idle(c_idle->cpu);
-	complete(&c_idle->done);
-}
-
-/*
- * Boot one CPU.
- */
-static int __cpuinit do_boot_cpu(int cpu, int apicid)
-{
-	unsigned long boot_error;
-	int timeout;
-	unsigned long start_rip;
-	struct create_idle c_idle = {
-		.cpu = cpu,
-		.done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
-	};
-	INIT_WORK(&c_idle.work, do_fork_idle);
-
-	/* allocate memory for gdts of secondary cpus. Hotplug is considered */
-	if (!cpu_gdt_descr[cpu].address &&
-		!(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) {
-		printk(KERN_ERR "Failed to allocate GDT for CPU %d\n", cpu);
-		return -1;
-	}
-
-	/* Allocate node local memory for AP pdas */
-	if (cpu_pda(cpu) == &boot_cpu_pda[cpu]) {
-		struct x8664_pda *newpda, *pda;
-		int node = cpu_to_node(cpu);
-		pda = cpu_pda(cpu);
-		newpda = kmalloc_node(sizeof (struct x8664_pda), GFP_ATOMIC,
-				      node);
-		if (newpda) {
-			memcpy(newpda, pda, sizeof (struct x8664_pda));
-			cpu_pda(cpu) = newpda;
-		} else
-			printk(KERN_ERR
-		"Could not allocate node local PDA for CPU %d on node %d\n",
-				cpu, node);
-	}
-
-	alternatives_smp_switch(1);
-
-	c_idle.idle = get_idle_for_cpu(cpu);
-
-	if (c_idle.idle) {
-		c_idle.idle->thread.sp = (unsigned long) (((struct pt_regs *)
-			(THREAD_SIZE +  task_stack_page(c_idle.idle))) - 1);
-		init_idle(c_idle.idle, cpu);
-		goto do_rest;
-	}
-
-	/*
-	 * During cold boot process, keventd thread is not spun up yet.
-	 * When we do cpu hot-add, we create idle threads on the fly, we should
-	 * not acquire any attributes from the calling context. Hence the clean
-	 * way to create kernel_threads() is to do that from keventd().
-	 * We do the current_is_keventd() due to the fact that ACPI notifier
-	 * was also queuing to keventd() and when the caller is already running
-	 * in context of keventd(), we would end up with locking up the keventd
-	 * thread.
-	 */
-	if (!keventd_up() || current_is_keventd())
-		c_idle.work.func(&c_idle.work);
-	else {
-		schedule_work(&c_idle.work);
-		wait_for_completion(&c_idle.done);
-	}
-
-	if (IS_ERR(c_idle.idle)) {
-		printk("failed fork for CPU %d\n", cpu);
-		return PTR_ERR(c_idle.idle);
-	}
-
-	set_idle_for_cpu(cpu, c_idle.idle);
-
-do_rest:
-
-	cpu_pda(cpu)->pcurrent = c_idle.idle;
-
-	start_rip = setup_trampoline();
-
-	init_rsp = c_idle.idle->thread.sp;
-	load_sp0(&per_cpu(init_tss, cpu), &c_idle.idle->thread);
-	initial_code = start_secondary;
-	clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
-
-	printk(KERN_INFO "Booting processor %d/%d APIC 0x%x\n", cpu,
-		cpus_weight(cpu_present_map),
-		apicid);
-
-	/*
-	 * This grunge runs the startup process for
-	 * the targeted processor.
-	 */
-
-	atomic_set(&init_deasserted, 0);
-
-	Dprintk("Setting warm reset code and vector.\n");
-
-	CMOS_WRITE(0xa, 0xf);
-	local_flush_tlb();
-	Dprintk("1.\n");
-	*((volatile unsigned short *) phys_to_virt(0x469)) = start_rip >> 4;
-	Dprintk("2.\n");
-	*((volatile unsigned short *) phys_to_virt(0x467)) = start_rip & 0xf;
-	Dprintk("3.\n");
-
-	/*
-	 * Be paranoid about clearing APIC errors.
-	 */
-	apic_write(APIC_ESR, 0);
-	apic_read(APIC_ESR);
-
-	/*
-	 * Status is now clean
-	 */
-	boot_error = 0;
-
-	/*
-	 * Starting actual IPI sequence...
-	 */
-	boot_error = wakeup_secondary_via_INIT(apicid, start_rip);
-
-	if (!boot_error) {
-		/*
-		 * allow APs to start initializing.
-		 */
-		Dprintk("Before Callout %d.\n", cpu);
-		cpu_set(cpu, cpu_callout_map);
-		Dprintk("After Callout %d.\n", cpu);
-
-		/*
-		 * Wait 5s total for a response
-		 */
-		for (timeout = 0; timeout < 50000; timeout++) {
-			if (cpu_isset(cpu, cpu_callin_map))
-				break;	/* It has booted */
-			udelay(100);
-		}
-
-		if (cpu_isset(cpu, cpu_callin_map)) {
-			/* number CPUs logically, starting from 1 (BSP is 0) */
-			Dprintk("CPU has booted.\n");
-		} else {
-			boot_error = 1;
-			if (*((volatile unsigned char *)phys_to_virt(SMP_TRAMPOLINE_BASE))
-					== 0xA5)
-				/* trampoline started but...? */
-				printk("Stuck ??\n");
-			else
-				/* trampoline code not run */
-				printk("Not responding.\n");
-#ifdef APIC_DEBUG
-			inquire_remote_apic(apicid);
-#endif
-		}
-	}
-	if (boot_error) {
-		cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
-		clear_bit(cpu, (unsigned long *)&cpu_initialized); /* was set by cpu_init() */
-		clear_node_cpumask(cpu); /* was set by numa_add_cpu */
-		cpu_clear(cpu, cpu_present_map);
-		cpu_clear(cpu, cpu_possible_map);
-		per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID;
-		return -EIO;
-	}
-
-	return 0;
-}
-
-cycles_t cacheflush_time;
-unsigned long cache_decay_ticks;
-
-/*
- * Cleanup possible dangling ends...
- */
-static __cpuinit void smp_cleanup_boot(void)
-{
-	/*
-	 * Paranoid:  Set warm reset code and vector here back
-	 * to default values.
-	 */
-	CMOS_WRITE(0, 0xf);
-
-	/*
-	 * Reset trampoline flag
-	 */
-	*((volatile int *) phys_to_virt(0x467)) = 0;
-}
-
-/*
- * Fall back to non SMP mode after errors.
- *
- * RED-PEN audit/test this more. I bet there is more state messed up here.
- */
-static __init void disable_smp(void)
-{
-	cpu_present_map = cpumask_of_cpu(0);
-	cpu_possible_map = cpumask_of_cpu(0);
-	if (smp_found_config)
-		phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id);
-	else
-		phys_cpu_present_map = physid_mask_of_physid(0);
-	cpu_set(0, per_cpu(cpu_sibling_map, 0));
-	cpu_set(0, per_cpu(cpu_core_map, 0));
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-
-int additional_cpus __initdata = -1;
-
-/*
- * cpu_possible_map should be static, it cannot change as cpu's
- * are onlined, or offlined. The reason is per-cpu data-structures
- * are allocated by some modules at init time, and dont expect to
- * do this dynamically on cpu arrival/departure.
- * cpu_present_map on the other hand can change dynamically.
- * In case when cpu_hotplug is not compiled, then we resort to current
- * behaviour, which is cpu_possible == cpu_present.
- * - Ashok Raj
- *
- * Three ways to find out the number of additional hotplug CPUs:
- * - If the BIOS specified disabled CPUs in ACPI/mptables use that.
- * - The user can overwrite it with additional_cpus=NUM
- * - Otherwise don't reserve additional CPUs.
- * We do this because additional CPUs waste a lot of memory.
- * -AK
- */
-__init void prefill_possible_map(void)
-{
-	int i;
-	int possible;
-
- 	if (additional_cpus == -1) {
- 		if (disabled_cpus > 0)
- 			additional_cpus = disabled_cpus;
- 		else
-			additional_cpus = 0;
- 	}
-	possible = num_processors + additional_cpus;
-	if (possible > NR_CPUS) 
-		possible = NR_CPUS;
-
-	printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n",
-		possible,
-	        max_t(int, possible - num_processors, 0));
-
-	for (i = 0; i < possible; i++)
-		cpu_set(i, cpu_possible_map);
-}
-#endif
-
-/*
- * Various sanity checks.
- */
-static int __init smp_sanity_check(unsigned max_cpus)
-{
-	if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
-		printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
-		       hard_smp_processor_id());
-		physid_set(hard_smp_processor_id(), phys_cpu_present_map);
-	}
-
-	/*
-	 * If we couldn't find an SMP configuration at boot time,
-	 * get out of here now!
-	 */
-	if (!smp_found_config) {
-		printk(KERN_NOTICE "SMP motherboard not detected.\n");
-		disable_smp();
-		if (APIC_init_uniprocessor())
-			printk(KERN_NOTICE "Local APIC not detected."
-					   " Using dummy APIC emulation.\n");
-		return -1;
-	}
-
-	/*
-	 * Should not be necessary because the MP table should list the boot
-	 * CPU too, but we do it for the sake of robustness anyway.
-	 */
-	if (!physid_isset(boot_cpu_id, phys_cpu_present_map)) {
-		printk(KERN_NOTICE "weird, boot CPU (#%d) not listed by the BIOS.\n",
-								 boot_cpu_id);
-		physid_set(hard_smp_processor_id(), phys_cpu_present_map);
-	}
-
-	/*
-	 * If we couldn't find a local APIC, then get out of here now!
-	 */
-	if (!cpu_has_apic) {
-		printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
-			boot_cpu_id);
-		printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
-		nr_ioapics = 0;
-		return -1;
-	}
-
-	/*
-	 * If SMP should be disabled, then really disable it!
-	 */
-	if (!max_cpus) {
-		printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
-		nr_ioapics = 0;
-		return -1;
-	}
-
-	return 0;
-}
-
-static void __init smp_cpu_index_default(void)
-{
-	int i;
-	struct cpuinfo_x86 *c;
-
-	for_each_cpu_mask(i, cpu_possible_map) {
-		c = &cpu_data(i);
-		/* mark all to hotplug */
-		c->cpu_index = NR_CPUS;
-	}
-}
-
-/*
- * Prepare for SMP bootup.  The MP table or ACPI has been read
- * earlier.  Just do some sanity checking here and enable APIC mode.
- */
-void __init smp_prepare_cpus(unsigned int max_cpus)
-{
-	nmi_watchdog_default();
-	smp_cpu_index_default();
-	current_cpu_data = boot_cpu_data;
-	current_thread_info()->cpu = 0;  /* needed? */
-	set_cpu_sibling_map(0);
-
-	if (smp_sanity_check(max_cpus) < 0) {
-		printk(KERN_INFO "SMP disabled\n");
-		disable_smp();
-		return;
-	}
-
-
-	/*
-	 * Switch from PIC to APIC mode.
-	 */
-	setup_local_APIC();
-
-	/*
-	 * Enable IO APIC before setting up error vector
-	 */
-	if (!skip_ioapic_setup && nr_ioapics)
-		enable_IO_APIC();
-	end_local_APIC_setup();
-
-	if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id) {
-		panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
-		      GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id);
-		/* Or can we switch back to PIC here? */
-	}
-
-	/*
-	 * Now start the IO-APICs
-	 */
-	if (!skip_ioapic_setup && nr_ioapics)
-		setup_IO_APIC();
-	else
-		nr_ioapics = 0;
-
-	/*
-	 * Set up local APIC timer on boot CPU.
-	 */
-
-	setup_boot_clock();
-}
-
-/*
- * Early setup to make printk work.
- */
-void __init smp_prepare_boot_cpu(void)
-{
-	int me = smp_processor_id();
-	/* already set me in cpu_online_map in boot_cpu_init() */
-	cpu_set(me, cpu_callout_map);
-	per_cpu(cpu_state, me) = CPU_ONLINE;
-}
-
-/*
- * Entry point to boot a CPU.
- */
-int __cpuinit __cpu_up(unsigned int cpu)
-{
-	int apicid = cpu_present_to_apicid(cpu);
-	unsigned long flags;
-	int err;
-
-	WARN_ON(irqs_disabled());
-
-	Dprintk("++++++++++++++++++++=_---CPU UP  %u\n", cpu);
-
-	if (apicid == BAD_APICID || apicid == boot_cpu_id ||
-	    !physid_isset(apicid, phys_cpu_present_map)) {
-		printk("__cpu_up: bad cpu %d\n", cpu);
-		return -EINVAL;
-	}
-
-	/*
-	 * Already booted CPU?
-	 */
- 	if (cpu_isset(cpu, cpu_callin_map)) {
-		Dprintk("do_boot_cpu %d Already started\n", cpu);
- 		return -ENOSYS;
-	}
-
-	/*
-	 * Save current MTRR state in case it was changed since early boot
-	 * (e.g. by the ACPI SMI) to initialize new CPUs with MTRRs in sync:
-	 */
-	mtrr_save_state();
-
-	per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
-	/* Boot it! */
-	err = do_boot_cpu(cpu, apicid);
-	if (err < 0) {
-		Dprintk("do_boot_cpu failed %d\n", err);
-		return err;
-	}
-
-	/* Unleash the CPU! */
-	Dprintk("waiting for cpu %d\n", cpu);
-
-	/*
-  	 * Make sure and check TSC sync:
- 	 */
-	local_irq_save(flags);
-	check_tsc_sync_source(cpu);
-	local_irq_restore(flags);
-
-	while (!cpu_isset(cpu, cpu_online_map))
-		cpu_relax();
-	err = 0;
-
-	return err;
-}
-
-/*
- * Finish the SMP boot.
- */
-void __init smp_cpus_done(unsigned int max_cpus)
-{
-	smp_cleanup_boot();
-	setup_ioapic_dest();
-	check_nmi_watchdog();
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-
-static void remove_siblinginfo(int cpu)
-{
-	int sibling;
-	struct cpuinfo_x86 *c = &cpu_data(cpu);
-
-	for_each_cpu_mask(sibling, per_cpu(cpu_core_map, cpu)) {
-		cpu_clear(cpu, per_cpu(cpu_core_map, sibling));
-		/*
-		 * last thread sibling in this cpu core going down
-		 */
-		if (cpus_weight(per_cpu(cpu_sibling_map, cpu)) == 1)
-			cpu_data(sibling).booted_cores--;
-	}
-			
-	for_each_cpu_mask(sibling, per_cpu(cpu_sibling_map, cpu))
-		cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling));
-	cpus_clear(per_cpu(cpu_sibling_map, cpu));
-	cpus_clear(per_cpu(cpu_core_map, cpu));
-	c->phys_proc_id = 0;
-	c->cpu_core_id = 0;
-	cpu_clear(cpu, cpu_sibling_setup_map);
-}
-
-static void __ref remove_cpu_from_maps(void)
-{
-	int cpu = smp_processor_id();
-
-	cpu_clear(cpu, cpu_callout_map);
-	cpu_clear(cpu, cpu_callin_map);
-	clear_bit(cpu, (unsigned long *)&cpu_initialized); /* was set by cpu_init() */
-	clear_node_cpumask(cpu);
-}
-
-int __cpu_disable(void)
-{
-	int cpu = smp_processor_id();
-
-	/*
-	 * Perhaps use cpufreq to drop frequency, but that could go
-	 * into generic code.
- 	 *
-	 * We won't take down the boot processor on i386 due to some
-	 * interrupts only being able to be serviced by the BSP.
-	 * Especially so if we're not using an IOAPIC	-zwane
-	 */
-	if (cpu == 0)
-		return -EBUSY;
-
-	if (nmi_watchdog == NMI_LOCAL_APIC)
-		stop_apic_nmi_watchdog(NULL);
-	clear_local_APIC();
-
-	/*
-	 * HACK:
-	 * Allow any queued timer interrupts to get serviced
-	 * This is only a temporary solution until we cleanup
-	 * fixup_irqs as we do for IA64.
-	 */
-	local_irq_enable();
-	mdelay(1);
-
-	local_irq_disable();
-	remove_siblinginfo(cpu);
-
-	spin_lock(&vector_lock);
-	/* It's now safe to remove this processor from the online map */
-	cpu_clear(cpu, cpu_online_map);
-	spin_unlock(&vector_lock);
-	remove_cpu_from_maps();
-	fixup_irqs(cpu_online_map);
-	return 0;
-}
-
-void __cpu_die(unsigned int cpu)
-{
-	/* We don't do anything here: idle task is faking death itself. */
-	unsigned int i;
-
-	for (i = 0; i < 10; i++) {
-		/* They ack this in play_dead by setting CPU_DEAD */
-		if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
-			printk ("CPU %d is now offline\n", cpu);
-			if (1 == num_online_cpus())
-				alternatives_smp_switch(0);
-			return;
-		}
-		msleep(100);
-	}
- 	printk(KERN_ERR "CPU %u didn't die...\n", cpu);
-}
-
-static __init int setup_additional_cpus(char *s)
-{
-	return s && get_option(&s, &additional_cpus) ? 0 : -EINVAL;
-}
-early_param("additional_cpus", setup_additional_cpus);
-
-#else /* ... !CONFIG_HOTPLUG_CPU */
-
-int __cpu_disable(void)
-{
-	return -ENOSYS;
-}
-
-void __cpu_die(unsigned int cpu)
-{
-	/* We said "no" in __cpu_disable */
-	BUG();
-}
-#endif /* CONFIG_HOTPLUG_CPU */
diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c
new file mode 100644
index 000000000000..3449064d141a
--- /dev/null
+++ b/arch/x86/kernel/smpcommon.c
@@ -0,0 +1,83 @@
+/*
+ * SMP stuff which is common to all sub-architectures.
+ */
+#include <linux/module.h>
+#include <asm/smp.h>
+
+#ifdef CONFIG_X86_32
+DEFINE_PER_CPU(unsigned long, this_cpu_off);
+EXPORT_PER_CPU_SYMBOL(this_cpu_off);
+
+/* Initialize the CPU's GDT.  This is either the boot CPU doing itself
+   (still using the master per-cpu area), or a CPU doing it for a
+   secondary which will soon come up. */
+__cpuinit void init_gdt(int cpu)
+{
+	struct desc_struct *gdt = get_cpu_gdt_table(cpu);
+
+	pack_descriptor(&gdt[GDT_ENTRY_PERCPU],
+			__per_cpu_offset[cpu], 0xFFFFF,
+			0x2 | DESCTYPE_S, 0x8);
+
+	gdt[GDT_ENTRY_PERCPU].s = 1;
+
+	per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu];
+	per_cpu(cpu_number, cpu) = cpu;
+}
+#endif
+
+/**
+ * smp_call_function(): Run a function on all other CPUs.
+ * @func: The function to run. This must be fast and non-blocking.
+ * @info: An arbitrary pointer to pass to the function.
+ * @nonatomic: Unused.
+ * @wait: If true, wait (atomically) until function has completed on other CPUs.
+ *
+ * Returns 0 on success, else a negative status code.
+ *
+ * If @wait is true, then returns once @func has returned; otherwise
+ * it returns just before the target cpu calls @func.
+ *
+ * You must not call this function with disabled interrupts or from a
+ * hardware interrupt handler or from a bottom half handler.
+ */
+int smp_call_function(void (*func) (void *info), void *info, int nonatomic,
+		      int wait)
+{
+	return smp_call_function_mask(cpu_online_map, func, info, wait);
+}
+EXPORT_SYMBOL(smp_call_function);
+
+/**
+ * smp_call_function_single - Run a function on a specific CPU
+ * @cpu: The target CPU.  Cannot be the calling CPU.
+ * @func: The function to run. This must be fast and non-blocking.
+ * @info: An arbitrary pointer to pass to the function.
+ * @nonatomic: Unused.
+ * @wait: If true, wait until function has completed on other CPUs.
+ *
+ * Returns 0 on success, else a negative status code.
+ *
+ * If @wait is true, then returns once @func has returned; otherwise
+ * it returns just before the target cpu calls @func.
+ */
+int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
+			     int nonatomic, int wait)
+{
+	/* prevent preemption and reschedule on another processor */
+	int ret;
+	int me = get_cpu();
+	if (cpu == me) {
+		local_irq_disable();
+		func(info);
+		local_irq_enable();
+		put_cpu();
+		return 0;
+	}
+
+	ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait);
+
+	put_cpu();
+	return ret;
+}
+EXPORT_SYMBOL(smp_call_function_single);
diff --git a/arch/x86/kernel/smpcommon_32.c b/arch/x86/kernel/smpcommon_32.c
index 8bc38af29aef..8b137891791f 100644
--- a/arch/x86/kernel/smpcommon_32.c
+++ b/arch/x86/kernel/smpcommon_32.c
@@ -1,82 +1 @@
-/*
- * SMP stuff which is common to all sub-architectures.
- */
-#include <linux/module.h>
-#include <asm/smp.h>
 
-DEFINE_PER_CPU(unsigned long, this_cpu_off);
-EXPORT_PER_CPU_SYMBOL(this_cpu_off);
-
-/* Initialize the CPU's GDT.  This is either the boot CPU doing itself
-   (still using the master per-cpu area), or a CPU doing it for a
-   secondary which will soon come up. */
-__cpuinit void init_gdt(int cpu)
-{
-	struct desc_struct *gdt = get_cpu_gdt_table(cpu);
-
-	pack_descriptor(&gdt[GDT_ENTRY_PERCPU],
-			__per_cpu_offset[cpu], 0xFFFFF,
-			0x2 | DESCTYPE_S, 0x8);
-
-	gdt[GDT_ENTRY_PERCPU].s = 1;
-
-	per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu];
-	per_cpu(cpu_number, cpu) = cpu;
-}
-
-
-/**
- * smp_call_function(): Run a function on all other CPUs.
- * @func: The function to run. This must be fast and non-blocking.
- * @info: An arbitrary pointer to pass to the function.
- * @nonatomic: Unused.
- * @wait: If true, wait (atomically) until function has completed on other CPUs.
- *
- * Returns 0 on success, else a negative status code.
- *
- * If @wait is true, then returns once @func has returned; otherwise
- * it returns just before the target cpu calls @func.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- */
-int smp_call_function(void (*func) (void *info), void *info, int nonatomic,
-		      int wait)
-{
-	return smp_call_function_mask(cpu_online_map, func, info, wait);
-}
-EXPORT_SYMBOL(smp_call_function);
-
-/**
- * smp_call_function_single - Run a function on a specific CPU
- * @cpu: The target CPU.  Cannot be the calling CPU.
- * @func: The function to run. This must be fast and non-blocking.
- * @info: An arbitrary pointer to pass to the function.
- * @nonatomic: Unused.
- * @wait: If true, wait until function has completed on other CPUs.
- *
- * Returns 0 on success, else a negative status code.
- *
- * If @wait is true, then returns once @func has returned; otherwise
- * it returns just before the target cpu calls @func.
- */
-int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
-			     int nonatomic, int wait)
-{
-	/* prevent preemption and reschedule on another processor */
-	int ret;
-	int me = get_cpu();
-	if (cpu == me) {
-		local_irq_disable();
-		func(info);
-		local_irq_enable();
-		put_cpu();
-		return 0;
-	}
-
-	ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait);
-
-	put_cpu();
-	return ret;
-}
-EXPORT_SYMBOL(smp_call_function_single);
diff --git a/arch/x86/kernel/srat_32.c b/arch/x86/kernel/srat_32.c
index b72e61359c36..70e4a374b4e8 100644
--- a/arch/x86/kernel/srat_32.c
+++ b/arch/x86/kernel/srat_32.c
@@ -277,14 +277,14 @@ int __init get_memcfg_from_srat(void)
 	rsdp_address = acpi_os_get_root_pointer();
 	if (!rsdp_address) {
 		printk("%s: System description tables not found\n",
-		       __FUNCTION__);
+		       __func__);
 		goto out_err;
 	}
 
-	printk("%s: assigning address to rsdp\n", __FUNCTION__);
+	printk("%s: assigning address to rsdp\n", __func__);
 	rsdp = (struct acpi_table_rsdp *)(u32)rsdp_address;
 	if (!rsdp) {
-		printk("%s: Didn't find ACPI root!\n", __FUNCTION__);
+		printk("%s: Didn't find ACPI root!\n", __func__);
 		goto out_err;
 	}
 
@@ -292,7 +292,7 @@ int __init get_memcfg_from_srat(void)
 		rsdp->oem_id);
 
 	if (strncmp(rsdp->signature, ACPI_SIG_RSDP,strlen(ACPI_SIG_RSDP))) {
-		printk(KERN_WARNING "%s: RSDP table signature incorrect\n", __FUNCTION__);
+		printk(KERN_WARNING "%s: RSDP table signature incorrect\n", __func__);
 		goto out_err;
 	}
 
@@ -302,7 +302,7 @@ int __init get_memcfg_from_srat(void)
 	if (!rsdt) {
 		printk(KERN_WARNING
 		       "%s: ACPI: Invalid root system description tables (RSDT)\n",
-		       __FUNCTION__);
+		       __func__);
 		goto out_err;
 	}
 
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index 071ff4798236..92c20fee6781 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -148,7 +148,7 @@ static void write_debugctlmsr(struct task_struct *child, unsigned long val)
 	if (child != current)
 		return;
 
-	wrmsrl(MSR_IA32_DEBUGCTLMSR, val);
+	update_debugctlmsr(val);
 }
 
 /*
diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c
index 72f463401592..6878a9c2df5d 100644
--- a/arch/x86/kernel/summit_32.c
+++ b/arch/x86/kernel/summit_32.c
@@ -35,43 +35,47 @@ static struct rio_table_hdr *rio_table_hdr __initdata;
 static struct scal_detail   *scal_devs[MAX_NUMNODES] __initdata;
 static struct rio_detail    *rio_devs[MAX_NUMNODES*4] __initdata;
 
+static int mp_bus_id_to_node[MAX_MP_BUSSES] __initdata;
+
 static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)
 {
 	int twister = 0, node = 0;
 	int i, bus, num_buses;
 
-	for(i = 0; i < rio_table_hdr->num_rio_dev; i++){
-		if (rio_devs[i]->node_id == rio_devs[wpeg_num]->owner_id){
+	for (i = 0; i < rio_table_hdr->num_rio_dev; i++) {
+		if (rio_devs[i]->node_id == rio_devs[wpeg_num]->owner_id) {
 			twister = rio_devs[i]->owner_id;
 			break;
 		}
 	}
-	if (i == rio_table_hdr->num_rio_dev){
-		printk(KERN_ERR "%s: Couldn't find owner Cyclone for Winnipeg!\n", __FUNCTION__);
+	if (i == rio_table_hdr->num_rio_dev) {
+		printk(KERN_ERR "%s: Couldn't find owner Cyclone for Winnipeg!\n", __func__);
 		return last_bus;
 	}
 
-	for(i = 0; i < rio_table_hdr->num_scal_dev; i++){
-		if (scal_devs[i]->node_id == twister){
+	for (i = 0; i < rio_table_hdr->num_scal_dev; i++) {
+		if (scal_devs[i]->node_id == twister) {
 			node = scal_devs[i]->node_id;
 			break;
 		}
 	}
-	if (i == rio_table_hdr->num_scal_dev){
-		printk(KERN_ERR "%s: Couldn't find owner Twister for Cyclone!\n", __FUNCTION__);
+	if (i == rio_table_hdr->num_scal_dev) {
+		printk(KERN_ERR "%s: Couldn't find owner Twister for Cyclone!\n", __func__);
 		return last_bus;
 	}
 
-	switch (rio_devs[wpeg_num]->type){
+	switch (rio_devs[wpeg_num]->type) {
 	case CompatWPEG:
-		/* The Compatibility Winnipeg controls the 2 legacy buses,
+		/*
+		 * The Compatibility Winnipeg controls the 2 legacy buses,
 		 * the 66MHz PCI bus [2 slots] and the 2 "extra" buses in case
 		 * a PCI-PCI bridge card is used in either slot: total 5 buses.
 		 */
 		num_buses = 5;
 		break;
 	case AltWPEG:
-		/* The Alternate Winnipeg controls the 2 133MHz buses [1 slot
+		/*
+		 * The Alternate Winnipeg controls the 2 133MHz buses [1 slot
 		 * each], their 2 "extra" buses, the 100MHz bus [2 slots] and
 		 * the "extra" buses for each of those slots: total 7 buses.
 		 */
@@ -79,17 +83,18 @@ static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)
 		break;
 	case LookOutAWPEG:
 	case LookOutBWPEG:
-		/* A Lookout Winnipeg controls 3 100MHz buses [2 slots each]
+		/*
+		 * A Lookout Winnipeg controls 3 100MHz buses [2 slots each]
 		 * & the "extra" buses for each of those slots: total 9 buses.
 		 */
 		num_buses = 9;
 		break;
 	default:
-		printk(KERN_INFO "%s: Unsupported Winnipeg type!\n", __FUNCTION__);
+		printk(KERN_INFO "%s: Unsupported Winnipeg type!\n", __func__);
 		return last_bus;
 	}
 
-	for(bus = last_bus; bus < last_bus + num_buses; bus++)
+	for (bus = last_bus; bus < last_bus + num_buses; bus++)
 		mp_bus_id_to_node[bus] = node;
 	return bus;
 }
@@ -99,14 +104,14 @@ static int __init build_detail_arrays(void)
 	unsigned long ptr;
 	int i, scal_detail_size, rio_detail_size;
 
-	if (rio_table_hdr->num_scal_dev > MAX_NUMNODES){
-		printk(KERN_WARNING "%s: MAX_NUMNODES too low!  Defined as %d, but system has %d nodes.\n", __FUNCTION__, MAX_NUMNODES, rio_table_hdr->num_scal_dev);
+	if (rio_table_hdr->num_scal_dev > MAX_NUMNODES) {
+		printk(KERN_WARNING "%s: MAX_NUMNODES too low!  Defined as %d, but system has %d nodes.\n", __func__, MAX_NUMNODES, rio_table_hdr->num_scal_dev);
 		return 0;
 	}
 
-	switch (rio_table_hdr->version){
+	switch (rio_table_hdr->version) {
 	default:
-		printk(KERN_WARNING "%s: Invalid Rio Grande Table Version: %d\n", __FUNCTION__, rio_table_hdr->version);
+		printk(KERN_WARNING "%s: Invalid Rio Grande Table Version: %d\n", __func__, rio_table_hdr->version);
 		return 0;
 	case 2:
 		scal_detail_size = 11;
@@ -119,10 +124,10 @@ static int __init build_detail_arrays(void)
 	}
 
 	ptr = (unsigned long)rio_table_hdr + 3;
-	for(i = 0; i < rio_table_hdr->num_scal_dev; i++, ptr += scal_detail_size)
+	for (i = 0; i < rio_table_hdr->num_scal_dev; i++, ptr += scal_detail_size)
 		scal_devs[i] = (struct scal_detail *)ptr;
 
-	for(i = 0; i < rio_table_hdr->num_rio_dev; i++, ptr += rio_detail_size)
+	for (i = 0; i < rio_table_hdr->num_rio_dev; i++, ptr += rio_detail_size)
 		rio_devs[i] = (struct rio_detail *)ptr;
 
 	return 1;
@@ -140,9 +145,9 @@ void __init setup_summit(void)
 
 	rio_table_hdr = NULL;
 	offset = 0x180;
-	while (offset){
+	while (offset) {
 		/* The block id is stored in the 2nd word */
-		if (*((unsigned short *)(ptr + offset + 2)) == 0x4752){
+		if (*((unsigned short *)(ptr + offset + 2)) == 0x4752) {
 			/* set the pointer past the offset & block id */
 			rio_table_hdr = (struct rio_table_hdr *)(ptr + offset + 4);
 			break;
@@ -150,8 +155,8 @@ void __init setup_summit(void)
 		/* The next offset is stored in the 1st word.  0 means no more */
 		offset = *((unsigned short *)(ptr + offset));
 	}
-	if (!rio_table_hdr){
-		printk(KERN_ERR "%s: Unable to locate Rio Grande Table in EBDA - bailing!\n", __FUNCTION__);
+	if (!rio_table_hdr) {
+		printk(KERN_ERR "%s: Unable to locate Rio Grande Table in EBDA - bailing!\n", __func__);
 		return;
 	}
 
@@ -161,8 +166,8 @@ void __init setup_summit(void)
 	/* The first Winnipeg we're looking for has an index of 0 */
 	next_wpeg = 0;
 	do {
-		for(i = 0; i < rio_table_hdr->num_rio_dev; i++){
-			if (is_WPEG(rio_devs[i]) && rio_devs[i]->WP_index == next_wpeg){
+		for (i = 0; i < rio_table_hdr->num_rio_dev; i++) {
+			if (is_WPEG(rio_devs[i]) && rio_devs[i]->WP_index == next_wpeg) {
 				/* It's the Winnipeg we're looking for! */
 				next_bus = setup_pci_node_map_for_wpeg(i, next_bus);
 				next_wpeg++;
diff --git a/arch/x86/kernel/syscall_64.c b/arch/x86/kernel/syscall_64.c
index 9d498c2f8eea..170d43c17487 100644
--- a/arch/x86/kernel/syscall_64.c
+++ b/arch/x86/kernel/syscall_64.c
@@ -1,4 +1,4 @@
-/* System call table for x86-64. */ 
+/* System call table for x86-64. */
 
 #include <linux/linkage.h>
 #include <linux/sys.h>
@@ -7,20 +7,23 @@
 
 #define __NO_STUBS
 
-#define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ; 
+#define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ;
 #undef _ASM_X86_64_UNISTD_H_
 #include <asm/unistd_64.h>
 
 #undef __SYSCALL
-#define __SYSCALL(nr, sym) [ nr ] = sym, 
+#define __SYSCALL(nr, sym) [nr] = sym,
 #undef _ASM_X86_64_UNISTD_H_
 
-typedef void (*sys_call_ptr_t)(void); 
+typedef void (*sys_call_ptr_t)(void);
 
 extern void sys_ni_syscall(void);
 
 const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
-	/* Smells like a like a compiler bug -- it doesn't work when the & below is removed. */ 
+	/*
+	*Smells like a like a compiler bug -- it doesn't work
+	*when the & below is removed.
+	*/
 	[0 ... __NR_syscall_max] = &sys_ni_syscall,
 #include <asm/unistd_64.h>
 };
diff --git a/arch/x86/kernel/test_nx.c b/arch/x86/kernel/test_nx.c
index 10b8a6f69f84..787a5e499dd1 100644
--- a/arch/x86/kernel/test_nx.c
+++ b/arch/x86/kernel/test_nx.c
@@ -11,6 +11,8 @@
  */
 #include <linux/module.h>
 #include <linux/sort.h>
+#include <linux/slab.h>
+
 #include <asm/uaccess.h>
 #include <asm/asm.h>
 
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
new file mode 100644
index 000000000000..9bb2363851af
--- /dev/null
+++ b/arch/x86/kernel/tlb_32.c
@@ -0,0 +1,243 @@
+#include <linux/spinlock.h>
+#include <linux/cpu.h>
+#include <linux/interrupt.h>
+
+#include <asm/tlbflush.h>
+
+DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate)
+			____cacheline_aligned = { &init_mm, 0, };
+
+/* must come after the send_IPI functions above for inlining */
+#include <mach_ipi.h>
+
+/*
+ *	Smarter SMP flushing macros.
+ *		c/o Linus Torvalds.
+ *
+ *	These mean you can really definitely utterly forget about
+ *	writing to user space from interrupts. (Its not allowed anyway).
+ *
+ *	Optimizations Manfred Spraul <manfred@colorfullife.com>
+ */
+
+static cpumask_t flush_cpumask;
+static struct mm_struct *flush_mm;
+static unsigned long flush_va;
+static DEFINE_SPINLOCK(tlbstate_lock);
+
+/*
+ * We cannot call mmdrop() because we are in interrupt context,
+ * instead update mm->cpu_vm_mask.
+ *
+ * We need to reload %cr3 since the page tables may be going
+ * away from under us..
+ */
+void leave_mm(int cpu)
+{
+	if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
+		BUG();
+	cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
+	load_cr3(swapper_pg_dir);
+}
+EXPORT_SYMBOL_GPL(leave_mm);
+
+/*
+ *
+ * The flush IPI assumes that a thread switch happens in this order:
+ * [cpu0: the cpu that switches]
+ * 1) switch_mm() either 1a) or 1b)
+ * 1a) thread switch to a different mm
+ * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
+ * 	Stop ipi delivery for the old mm. This is not synchronized with
+ * 	the other cpus, but smp_invalidate_interrupt ignore flush ipis
+ * 	for the wrong mm, and in the worst case we perform a superfluous
+ * 	tlb flush.
+ * 1a2) set cpu_tlbstate to TLBSTATE_OK
+ * 	Now the smp_invalidate_interrupt won't call leave_mm if cpu0
+ *	was in lazy tlb mode.
+ * 1a3) update cpu_tlbstate[].active_mm
+ * 	Now cpu0 accepts tlb flushes for the new mm.
+ * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
+ * 	Now the other cpus will send tlb flush ipis.
+ * 1a4) change cr3.
+ * 1b) thread switch without mm change
+ *	cpu_tlbstate[].active_mm is correct, cpu0 already handles
+ *	flush ipis.
+ * 1b1) set cpu_tlbstate to TLBSTATE_OK
+ * 1b2) test_and_set the cpu bit in cpu_vm_mask.
+ * 	Atomically set the bit [other cpus will start sending flush ipis],
+ * 	and test the bit.
+ * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
+ * 2) switch %%esp, ie current
+ *
+ * The interrupt must handle 2 special cases:
+ * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
+ * - the cpu performs speculative tlb reads, i.e. even if the cpu only
+ *   runs in kernel space, the cpu could load tlb entries for user space
+ *   pages.
+ *
+ * The good news is that cpu_tlbstate is local to each cpu, no
+ * write/read ordering problems.
+ */
+
+/*
+ * TLB flush IPI:
+ *
+ * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
+ * 2) Leave the mm if we are in the lazy tlb mode.
+ */
+
+void smp_invalidate_interrupt(struct pt_regs *regs)
+{
+	unsigned long cpu;
+
+	cpu = get_cpu();
+
+	if (!cpu_isset(cpu, flush_cpumask))
+		goto out;
+		/*
+		 * This was a BUG() but until someone can quote me the
+		 * line from the intel manual that guarantees an IPI to
+		 * multiple CPUs is retried _only_ on the erroring CPUs
+		 * its staying as a return
+		 *
+		 * BUG();
+		 */
+
+	if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
+		if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
+			if (flush_va == TLB_FLUSH_ALL)
+				local_flush_tlb();
+			else
+				__flush_tlb_one(flush_va);
+		} else
+			leave_mm(cpu);
+	}
+	ack_APIC_irq();
+	smp_mb__before_clear_bit();
+	cpu_clear(cpu, flush_cpumask);
+	smp_mb__after_clear_bit();
+out:
+	put_cpu_no_resched();
+	__get_cpu_var(irq_stat).irq_tlb_count++;
+}
+
+void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
+			     unsigned long va)
+{
+	cpumask_t cpumask = *cpumaskp;
+
+	/*
+	 * A couple of (to be removed) sanity checks:
+	 *
+	 * - current CPU must not be in mask
+	 * - mask must exist :)
+	 */
+	BUG_ON(cpus_empty(cpumask));
+	BUG_ON(cpu_isset(smp_processor_id(), cpumask));
+	BUG_ON(!mm);
+
+#ifdef CONFIG_HOTPLUG_CPU
+	/* If a CPU which we ran on has gone down, OK. */
+	cpus_and(cpumask, cpumask, cpu_online_map);
+	if (unlikely(cpus_empty(cpumask)))
+		return;
+#endif
+
+	/*
+	 * i'm not happy about this global shared spinlock in the
+	 * MM hot path, but we'll see how contended it is.
+	 * AK: x86-64 has a faster method that could be ported.
+	 */
+	spin_lock(&tlbstate_lock);
+
+	flush_mm = mm;
+	flush_va = va;
+	cpus_or(flush_cpumask, cpumask, flush_cpumask);
+	/*
+	 * We have to send the IPI only to
+	 * CPUs affected.
+	 */
+	send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR);
+
+	while (!cpus_empty(flush_cpumask))
+		/* nothing. lockup detection does not belong here */
+		cpu_relax();
+
+	flush_mm = NULL;
+	flush_va = 0;
+	spin_unlock(&tlbstate_lock);
+}
+
+void flush_tlb_current_task(void)
+{
+	struct mm_struct *mm = current->mm;
+	cpumask_t cpu_mask;
+
+	preempt_disable();
+	cpu_mask = mm->cpu_vm_mask;
+	cpu_clear(smp_processor_id(), cpu_mask);
+
+	local_flush_tlb();
+	if (!cpus_empty(cpu_mask))
+		flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
+	preempt_enable();
+}
+
+void flush_tlb_mm(struct mm_struct *mm)
+{
+	cpumask_t cpu_mask;
+
+	preempt_disable();
+	cpu_mask = mm->cpu_vm_mask;
+	cpu_clear(smp_processor_id(), cpu_mask);
+
+	if (current->active_mm == mm) {
+		if (current->mm)
+			local_flush_tlb();
+		else
+			leave_mm(smp_processor_id());
+	}
+	if (!cpus_empty(cpu_mask))
+		flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
+
+	preempt_enable();
+}
+
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	cpumask_t cpu_mask;
+
+	preempt_disable();
+	cpu_mask = mm->cpu_vm_mask;
+	cpu_clear(smp_processor_id(), cpu_mask);
+
+	if (current->active_mm == mm) {
+		if (current->mm)
+			__flush_tlb_one(va);
+		 else
+			leave_mm(smp_processor_id());
+	}
+
+	if (!cpus_empty(cpu_mask))
+		flush_tlb_others(cpu_mask, mm, va);
+
+	preempt_enable();
+}
+EXPORT_SYMBOL(flush_tlb_page);
+
+static void do_flush_tlb_all(void *info)
+{
+	unsigned long cpu = smp_processor_id();
+
+	__flush_tlb_all();
+	if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY)
+		leave_mm(cpu);
+}
+
+void flush_tlb_all(void)
+{
+	on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
+}
+
diff --git a/arch/x86/kernel/smp_64.c b/arch/x86/kernel/tlb_64.c
index 2fd74b06db67..1558e513757e 100644
--- a/arch/x86/kernel/smp_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -1,14 +1,3 @@
-/*
- *	Intel SMP support routines.
- *
- *	(c) 1995 Alan Cox, Building #3 <alan@redhat.com>
- *	(c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
- *      (c) 2002,2003 Andi Kleen, SuSE Labs.
- *
- *	This code is released under the GNU General Public License version 2 or
- *	later.
- */
-
 #include <linux/init.h>
 
 #include <linux/mm.h>
@@ -22,12 +11,12 @@
 #include <asm/mtrr.h>
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
-#include <asm/mach_apic.h>
 #include <asm/mmu_context.h>
 #include <asm/proto.h>
 #include <asm/apicdef.h>
 #include <asm/idle.h>
 
+#include <mach_ipi.h>
 /*
  *	Smarter SMP flushing macros.
  *		c/o Linus Torvalds.
@@ -228,7 +217,7 @@ void flush_tlb_current_task(void)
 	preempt_enable();
 }
 
-void flush_tlb_mm (struct mm_struct * mm)
+void flush_tlb_mm(struct mm_struct *mm)
 {
 	cpumask_t cpu_mask;
 
@@ -248,7 +237,7 @@ void flush_tlb_mm (struct mm_struct * mm)
 	preempt_enable();
 }
 
-void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	cpumask_t cpu_mask;
@@ -258,7 +247,7 @@ void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
 	cpu_clear(smp_processor_id(), cpu_mask);
 
 	if (current->active_mm == mm) {
-		if(current->mm)
+		if (current->mm)
 			__flush_tlb_one(va);
 		else
 			leave_mm(smp_processor_id());
@@ -270,7 +259,7 @@ void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
 	preempt_enable();
 }
 
-static void do_flush_tlb_all(void* info)
+static void do_flush_tlb_all(void *info)
 {
 	unsigned long cpu = smp_processor_id();
 
@@ -283,248 +272,3 @@ void flush_tlb_all(void)
 {
 	on_each_cpu(do_flush_tlb_all, NULL, 1, 1);
 }
-
-/*
- * this function sends a 'reschedule' IPI to another CPU.
- * it goes straight through and wastes no time serializing
- * anything. Worst case is that we lose a reschedule ...
- */
-
-void smp_send_reschedule(int cpu)
-{
-	send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
-}
-
-/*
- * Structure and data for smp_call_function(). This is designed to minimise
- * static memory requirements. It also looks cleaner.
- */
-static DEFINE_SPINLOCK(call_lock);
-
-struct call_data_struct {
-	void (*func) (void *info);
-	void *info;
-	atomic_t started;
-	atomic_t finished;
-	int wait;
-};
-
-static struct call_data_struct * call_data;
-
-void lock_ipi_call_lock(void)
-{
-	spin_lock_irq(&call_lock);
-}
-
-void unlock_ipi_call_lock(void)
-{
-	spin_unlock_irq(&call_lock);
-}
-
-/*
- * this function sends a 'generic call function' IPI to all other CPU
- * of the system defined in the mask.
- */
-static int __smp_call_function_mask(cpumask_t mask,
-				    void (*func)(void *), void *info,
-				    int wait)
-{
-	struct call_data_struct data;
-	cpumask_t allbutself;
-	int cpus;
-
-	allbutself = cpu_online_map;
-	cpu_clear(smp_processor_id(), allbutself);
-
-	cpus_and(mask, mask, allbutself);
-	cpus = cpus_weight(mask);
-
-	if (!cpus)
-		return 0;
-
-	data.func = func;
-	data.info = info;
-	atomic_set(&data.started, 0);
-	data.wait = wait;
-	if (wait)
-		atomic_set(&data.finished, 0);
-
-	call_data = &data;
-	wmb();
-
-	/* Send a message to other CPUs */
-	if (cpus_equal(mask, allbutself))
-		send_IPI_allbutself(CALL_FUNCTION_VECTOR);
-	else
-		send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
-
-	/* Wait for response */
-	while (atomic_read(&data.started) != cpus)
-		cpu_relax();
-
-	if (!wait)
-		return 0;
-
-	while (atomic_read(&data.finished) != cpus)
-		cpu_relax();
-
-	return 0;
-}
-/**
- * smp_call_function_mask(): Run a function on a set of other CPUs.
- * @mask: The set of cpus to run on.  Must not include the current cpu.
- * @func: The function to run. This must be fast and non-blocking.
- * @info: An arbitrary pointer to pass to the function.
- * @wait: If true, wait (atomically) until function has completed on other CPUs.
- *
- * Returns 0 on success, else a negative status code.
- *
- * If @wait is true, then returns once @func has returned; otherwise
- * it returns just before the target cpu calls @func.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- */
-int smp_call_function_mask(cpumask_t mask,
-			   void (*func)(void *), void *info,
-			   int wait)
-{
-	int ret;
-
-	/* Can deadlock when called with interrupts disabled */
-	WARN_ON(irqs_disabled());
-
-	spin_lock(&call_lock);
-	ret = __smp_call_function_mask(mask, func, info, wait);
-	spin_unlock(&call_lock);
-	return ret;
-}
-EXPORT_SYMBOL(smp_call_function_mask);
-
-/*
- * smp_call_function_single - Run a function on a specific CPU
- * @func: The function to run. This must be fast and non-blocking.
- * @info: An arbitrary pointer to pass to the function.
- * @nonatomic: Currently unused.
- * @wait: If true, wait until function has completed on other CPUs.
- *
- * Retrurns 0 on success, else a negative status code.
- *
- * Does not return until the remote CPU is nearly ready to execute <func>
- * or is or has executed.
- */
-
-int smp_call_function_single (int cpu, void (*func) (void *info), void *info,
-			      int nonatomic, int wait)
-{
-	/* prevent preemption and reschedule on another processor */
-	int ret, me = get_cpu();
-
-	/* Can deadlock when called with interrupts disabled */
-	WARN_ON(irqs_disabled());
-
-	if (cpu == me) {
-		local_irq_disable();
-		func(info);
-		local_irq_enable();
-		put_cpu();
-		return 0;
-	}
-
-	ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait);
-
-	put_cpu();
-	return ret;
-}
-EXPORT_SYMBOL(smp_call_function_single);
-
-/*
- * smp_call_function - run a function on all other CPUs.
- * @func: The function to run. This must be fast and non-blocking.
- * @info: An arbitrary pointer to pass to the function.
- * @nonatomic: currently unused.
- * @wait: If true, wait (atomically) until function has completed on other
- *        CPUs.
- *
- * Returns 0 on success, else a negative status code. Does not return until
- * remote CPUs are nearly ready to execute func or are or have executed.
- *
- * You must not call this function with disabled interrupts or from a
- * hardware interrupt handler or from a bottom half handler.
- * Actually there are a few legal cases, like panic.
- */
-int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
-			int wait)
-{
-	return smp_call_function_mask(cpu_online_map, func, info, wait);
-}
-EXPORT_SYMBOL(smp_call_function);
-
-static void stop_this_cpu(void *dummy)
-{
-	local_irq_disable();
-	/*
-	 * Remove this CPU:
-	 */
-	cpu_clear(smp_processor_id(), cpu_online_map);
-	disable_local_APIC();
-	for (;;)
-		halt();
-}
-
-void smp_send_stop(void)
-{
-	int nolock;
-	unsigned long flags;
-
-	if (reboot_force)
-		return;
-
-	/* Don't deadlock on the call lock in panic */
-	nolock = !spin_trylock(&call_lock);
-	local_irq_save(flags);
-	__smp_call_function_mask(cpu_online_map, stop_this_cpu, NULL, 0);
-	if (!nolock)
-		spin_unlock(&call_lock);
-	disable_local_APIC();
-	local_irq_restore(flags);
-}
-
-/*
- * Reschedule call back. Nothing to do,
- * all the work is done automatically when
- * we return from the interrupt.
- */
-asmlinkage void smp_reschedule_interrupt(void)
-{
-	ack_APIC_irq();
-	add_pda(irq_resched_count, 1);
-}
-
-asmlinkage void smp_call_function_interrupt(void)
-{
-	void (*func) (void *info) = call_data->func;
-	void *info = call_data->info;
-	int wait = call_data->wait;
-
-	ack_APIC_irq();
-	/*
-	 * Notify initiating CPU that I've grabbed the data and am
-	 * about to execute the function
-	 */
-	mb();
-	atomic_inc(&call_data->started);
-	/*
-	 * At this point the info structure may be out of scope unless wait==1
-	 */
-	exit_idle();
-	irq_enter();
-	(*func)(info);
-	add_pda(irq_call_count, 1);
-	irq_exit();
-	if (wait) {
-		mb();
-		atomic_inc(&call_data->finished);
-	}
-}
-
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c
new file mode 100644
index 000000000000..abbf199adebb
--- /dev/null
+++ b/arch/x86/kernel/trampoline.c
@@ -0,0 +1,18 @@
+#include <linux/io.h>
+
+#include <asm/trampoline.h>
+
+/* ready for x86_64, no harm for x86, since it will overwrite after alloc */
+unsigned char *trampoline_base = __va(TRAMPOLINE_BASE);
+
+/*
+ * Currently trivial. Write the real->protected mode
+ * bootstrap into the page concerned. The caller
+ * has made sure it's suitably aligned.
+ */
+unsigned long setup_trampoline(void)
+{
+	memcpy(trampoline_base, trampoline_data,
+	       trampoline_end - trampoline_data);
+	return virt_to_phys(trampoline_base);
+}
diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S
index 4aedd0bcee4c..894293c598db 100644
--- a/arch/x86/kernel/trampoline_64.S
+++ b/arch/x86/kernel/trampoline_64.S
@@ -30,12 +30,7 @@
 #include <asm/msr.h>
 #include <asm/segment.h>
 
-/* We can free up trampoline after bootup if cpu hotplug is not supported. */
-#ifndef CONFIG_HOTPLUG_CPU
-.section .init.data, "aw", @progbits
-#else
 .section .rodata, "a", @progbits
-#endif
 
 .code16
 
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index b22c01e05a18..65791ca2824a 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -9,26 +9,28 @@
  * 'Traps.c' handles hardware traps and faults after we have saved some
  * state in 'asm.s'.
  */
-#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/kallsyms.h>
+#include <linux/spinlock.h>
+#include <linux/highmem.h>
+#include <linux/kprobes.h>
+#include <linux/uaccess.h>
+#include <linux/utsname.h>
+#include <linux/kdebug.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/ptrace.h>
 #include <linux/string.h>
+#include <linux/unwind.h>
+#include <linux/delay.h>
 #include <linux/errno.h>
+#include <linux/kexec.h>
+#include <linux/sched.h>
 #include <linux/timer.h>
-#include <linux/mm.h>
 #include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/spinlock.h>
-#include <linux/interrupt.h>
-#include <linux/highmem.h>
-#include <linux/kallsyms.h>
-#include <linux/ptrace.h>
-#include <linux/utsname.h>
-#include <linux/kprobes.h>
-#include <linux/kexec.h>
-#include <linux/unwind.h>
-#include <linux/uaccess.h>
-#include <linux/nmi.h>
 #include <linux/bug.h>
+#include <linux/nmi.h>
+#include <linux/mm.h>
 
 #ifdef CONFIG_EISA
 #include <linux/ioport.h>
@@ -43,21 +45,18 @@
 #include <linux/edac.h>
 #endif
 
+#include <asm/arch_hooks.h>
+#include <asm/stacktrace.h>
 #include <asm/processor.h>
-#include <asm/system.h>
-#include <asm/io.h>
-#include <asm/atomic.h>
 #include <asm/debugreg.h>
+#include <asm/atomic.h>
+#include <asm/system.h>
+#include <asm/unwind.h>
 #include <asm/desc.h>
 #include <asm/i387.h>
 #include <asm/nmi.h>
-#include <asm/unwind.h>
 #include <asm/smp.h>
-#include <asm/arch_hooks.h>
-#include <linux/kdebug.h>
-#include <asm/stacktrace.h>
-
-#include <linux/module.h>
+#include <asm/io.h>
 
 #include "mach_traps.h"
 
@@ -69,7 +68,7 @@ EXPORT_SYMBOL_GPL(used_vectors);
 asmlinkage int system_call(void);
 
 /* Do we ignore FPU interrupts ? */
-char ignore_fpu_irq = 0;
+char ignore_fpu_irq;
 
 /*
  * The IDT has to be page-aligned to simplify the Pentium
@@ -105,12 +104,13 @@ static unsigned int code_bytes = 64;
 void printk_address(unsigned long address, int reliable)
 {
 #ifdef CONFIG_KALLSYMS
-	unsigned long offset = 0, symsize;
+	char namebuf[KSYM_NAME_LEN];
+	unsigned long offset = 0;
+	unsigned long symsize;
 	const char *symname;
-	char *modname;
-	char *delim = ":";
-	char namebuf[128];
 	char reliab[4] = "";
+	char *delim = ":";
+	char *modname;
 
 	symname = kallsyms_lookup(address, &symsize, &offset,
 					&modname, namebuf);
@@ -138,13 +138,14 @@ static inline int valid_stack_ptr(struct thread_info *tinfo, void *p, unsigned s
 
 /* The form of the top of the frame on the stack */
 struct stack_frame {
-	struct stack_frame *next_frame;
-	unsigned long return_address;
+	struct stack_frame	*next_frame;
+	unsigned long		return_address;
 };
 
-static inline unsigned long print_context_stack(struct thread_info *tinfo,
-				unsigned long *stack, unsigned long bp,
-				const struct stacktrace_ops *ops, void *data)
+static inline unsigned long
+print_context_stack(struct thread_info *tinfo,
+		    unsigned long *stack, unsigned long bp,
+		    const struct stacktrace_ops *ops, void *data)
 {
 	struct stack_frame *frame = (struct stack_frame *)bp;
 
@@ -166,7 +167,7 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo,
 	return bp;
 }
 
-#define MSG(msg) ops->warning(data, msg)
+#define MSG(msg)		ops->warning(data, msg)
 
 void dump_trace(struct task_struct *task, struct pt_regs *regs,
 		unsigned long *stack, unsigned long bp,
@@ -177,6 +178,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 
 	if (!stack) {
 		unsigned long dummy;
+
 		stack = &dummy;
 		if (task != current)
 			stack = (unsigned long *)task->thread.sp;
@@ -186,7 +188,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 	if (!bp) {
 		if (task == current) {
 			/* Grab bp right from our regs */
-			asm ("movl %%ebp, %0" : "=r" (bp) : );
+			asm("movl %%ebp, %0" : "=r" (bp) :);
 		} else {
 			/* bp is the last reg pushed by switch_to */
 			bp = *(unsigned long *) task->thread.sp;
@@ -196,15 +198,18 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
 
 	while (1) {
 		struct thread_info *context;
+
 		context = (struct thread_info *)
 			((unsigned long)stack & (~(THREAD_SIZE - 1)));
 		bp = print_context_stack(context, stack, bp, ops, data);
-		/* Should be after the line below, but somewhere
-		   in early boot context comes out corrupted and we
-		   can't reference it -AK */
+		/*
+		 * Should be after the line below, but somewhere
+		 * in early boot context comes out corrupted and we
+		 * can't reference it:
+		 */
 		if (ops->stack(data, "IRQ") < 0)
 			break;
-		stack = (unsigned long*)context->previous_esp;
+		stack = (unsigned long *)context->previous_esp;
 		if (!stack)
 			break;
 		touch_nmi_watchdog();
@@ -243,15 +248,15 @@ static void print_trace_address(void *data, unsigned long addr, int reliable)
 }
 
 static const struct stacktrace_ops print_trace_ops = {
-	.warning = print_trace_warning,
-	.warning_symbol = print_trace_warning_symbol,
-	.stack = print_trace_stack,
-	.address = print_trace_address,
+	.warning		= print_trace_warning,
+	.warning_symbol		= print_trace_warning_symbol,
+	.stack			= print_trace_stack,
+	.address		= print_trace_address,
 };
 
 static void
 show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
-		unsigned long *stack, unsigned long bp, char *log_lvl)
+		   unsigned long *stack, unsigned long bp, char *log_lvl)
 {
 	dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
 	printk("%s =======================\n", log_lvl);
@@ -263,21 +268,22 @@ void show_trace(struct task_struct *task, struct pt_regs *regs,
 	show_trace_log_lvl(task, regs, stack, bp, "");
 }
 
-static void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
-		       unsigned long *sp, unsigned long bp, char *log_lvl)
+static void
+show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
+		   unsigned long *sp, unsigned long bp, char *log_lvl)
 {
 	unsigned long *stack;
 	int i;
 
 	if (sp == NULL) {
 		if (task)
-			sp = (unsigned long*)task->thread.sp;
+			sp = (unsigned long *)task->thread.sp;
 		else
 			sp = (unsigned long *)&sp;
 	}
 
 	stack = sp;
-	for(i = 0; i < kstack_depth_to_print; i++) {
+	for (i = 0; i < kstack_depth_to_print; i++) {
 		if (kstack_end(stack))
 			break;
 		if (i && ((i % 8) == 0))
@@ -285,6 +291,7 @@ static void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
 		printk("%08lx ", *stack++);
 	}
 	printk("\n%sCall Trace:\n", log_lvl);
+
 	show_trace_log_lvl(task, regs, sp, bp, log_lvl);
 }
 
@@ -299,8 +306,8 @@ void show_stack(struct task_struct *task, unsigned long *sp)
  */
 void dump_stack(void)
 {
-	unsigned long stack;
 	unsigned long bp = 0;
+	unsigned long stack;
 
 #ifdef CONFIG_FRAME_POINTER
 	if (!bp)
@@ -312,6 +319,7 @@ void dump_stack(void)
 		init_utsname()->release,
 		(int)strcspn(init_utsname()->version, " "),
 		init_utsname()->version);
+
 	show_trace(current, NULL, &stack, bp);
 }
 
@@ -323,6 +331,7 @@ void show_registers(struct pt_regs *regs)
 
 	print_modules();
 	__show_registers(regs, 0);
+
 	printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)",
 		TASK_COMM_LEN, current->comm, task_pid_nr(current),
 		current_thread_info(), current, task_thread_info(current));
@@ -331,10 +340,10 @@ void show_registers(struct pt_regs *regs)
 	 * time of the fault..
 	 */
 	if (!user_mode_vm(regs)) {
-		u8 *ip;
 		unsigned int code_prologue = code_bytes * 43 / 64;
 		unsigned int code_len = code_bytes;
 		unsigned char c;
+		u8 *ip;
 
 		printk("\n" KERN_EMERG "Stack: ");
 		show_stack_log_lvl(NULL, regs, &regs->sp, 0, KERN_EMERG);
@@ -361,7 +370,7 @@ void show_registers(struct pt_regs *regs)
 		}
 	}
 	printk("\n");
-}	
+}
 
 int is_valid_bugaddr(unsigned long ip)
 {
@@ -377,10 +386,10 @@ int is_valid_bugaddr(unsigned long ip)
 
 static int die_counter;
 
-int __kprobes __die(const char * str, struct pt_regs * regs, long err)
+int __kprobes __die(const char *str, struct pt_regs *regs, long err)
 {
-	unsigned long sp;
 	unsigned short ss;
+	unsigned long sp;
 
 	printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
 #ifdef CONFIG_PREEMPT
@@ -395,8 +404,8 @@ int __kprobes __die(const char * str, struct pt_regs * regs, long err)
 	printk("\n");
 
 	if (notify_die(DIE_OOPS, str, regs, err,
-				current->thread.trap_no, SIGSEGV) !=
-			NOTIFY_STOP) {
+			current->thread.trap_no, SIGSEGV) != NOTIFY_STOP) {
+
 		show_registers(regs);
 		/* Executive summary in case the oops scrolled away */
 		sp = (unsigned long) (&regs->sp);
@@ -408,17 +417,18 @@ int __kprobes __die(const char * str, struct pt_regs * regs, long err)
 		printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
 		print_symbol("%s", regs->ip);
 		printk(" SS:ESP %04x:%08lx\n", ss, sp);
+
 		return 0;
-	} else {
-		return 1;
 	}
+
+	return 1;
 }
 
 /*
- * This is gone through when something in the kernel has done something bad and
- * is about to be terminated.
+ * This is gone through when something in the kernel has done something bad
+ * and is about to be terminated:
  */
-void die(const char * str, struct pt_regs * regs, long err)
+void die(const char *str, struct pt_regs *regs, long err)
 {
 	static struct {
 		raw_spinlock_t lock;
@@ -440,8 +450,9 @@ void die(const char * str, struct pt_regs * regs, long err)
 		die.lock_owner = smp_processor_id();
 		die.lock_owner_depth = 0;
 		bust_spinlocks(1);
-	} else
+	} else {
 		raw_local_irq_save(flags);
+	}
 
 	if (++die.lock_owner_depth < 3) {
 		report_bug(regs->ip, regs);
@@ -474,19 +485,20 @@ void die(const char * str, struct pt_regs * regs, long err)
 	do_exit(SIGSEGV);
 }
 
-static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err)
+static inline void
+die_if_kernel(const char *str, struct pt_regs *regs, long err)
 {
 	if (!user_mode_vm(regs))
 		die(str, regs, err);
 }
 
-static void __kprobes do_trap(int trapnr, int signr, char *str, int vm86,
-			      struct pt_regs * regs, long error_code,
-			      siginfo_t *info)
+static void __kprobes
+do_trap(int trapnr, int signr, char *str, int vm86, struct pt_regs *regs,
+	long error_code, siginfo_t *info)
 {
 	struct task_struct *tsk = current;
 
-	if (regs->flags & VM_MASK) {
+	if (regs->flags & X86_VM_MASK) {
 		if (vm86)
 			goto vm86_trap;
 		goto trap_signal;
@@ -495,111 +507,112 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, int vm86,
 	if (!user_mode(regs))
 		goto kernel_trap;
 
-	trap_signal: {
-		/*
-		 * We want error_code and trap_no set for userspace faults and
-		 * kernelspace faults which result in die(), but not
-		 * kernelspace faults which are fixed up.  die() gives the
-		 * process no chance to handle the signal and notice the
-		 * kernel fault information, so that won't result in polluting
-		 * the information about previously queued, but not yet
-		 * delivered, faults.  See also do_general_protection below.
-		 */
-		tsk->thread.error_code = error_code;
-		tsk->thread.trap_no = trapnr;
+trap_signal:
+	/*
+	 * We want error_code and trap_no set for userspace faults and
+	 * kernelspace faults which result in die(), but not
+	 * kernelspace faults which are fixed up.  die() gives the
+	 * process no chance to handle the signal and notice the
+	 * kernel fault information, so that won't result in polluting
+	 * the information about previously queued, but not yet
+	 * delivered, faults.  See also do_general_protection below.
+	 */
+	tsk->thread.error_code = error_code;
+	tsk->thread.trap_no = trapnr;
 
-		if (info)
-			force_sig_info(signr, info, tsk);
-		else
-			force_sig(signr, tsk);
-		return;
-	}
+	if (info)
+		force_sig_info(signr, info, tsk);
+	else
+		force_sig(signr, tsk);
+	return;
 
-	kernel_trap: {
-		if (!fixup_exception(regs)) {
-			tsk->thread.error_code = error_code;
-			tsk->thread.trap_no = trapnr;
-			die(str, regs, error_code);
-		}
-		return;
+kernel_trap:
+	if (!fixup_exception(regs)) {
+		tsk->thread.error_code = error_code;
+		tsk->thread.trap_no = trapnr;
+		die(str, regs, error_code);
 	}
+	return;
 
-	vm86_trap: {
-		int ret = handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, trapnr);
-		if (ret) goto trap_signal;
-		return;
-	}
+vm86_trap:
+	if (handle_vm86_trap((struct kernel_vm86_regs *) regs,
+						error_code, trapnr))
+		goto trap_signal;
+	return;
 }
 
-#define DO_ERROR(trapnr, signr, str, name) \
-void do_##name(struct pt_regs * regs, long error_code) \
-{ \
-	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
-						== NOTIFY_STOP) \
-		return; \
-	do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \
+#define DO_ERROR(trapnr, signr, str, name)				\
+void do_##name(struct pt_regs *regs, long error_code)			\
+{									\
+	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)	\
+						== NOTIFY_STOP)		\
+		return;							\
+	do_trap(trapnr, signr, str, 0, regs, error_code, NULL);		\
 }
 
-#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr, irq) \
-void do_##name(struct pt_regs * regs, long error_code) \
-{ \
-	siginfo_t info; \
-	if (irq) \
-		local_irq_enable(); \
-	info.si_signo = signr; \
-	info.si_errno = 0; \
-	info.si_code = sicode; \
-	info.si_addr = (void __user *)siaddr; \
-	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
-						== NOTIFY_STOP) \
-		return; \
-	do_trap(trapnr, signr, str, 0, regs, error_code, &info); \
+#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr, irq)	\
+void do_##name(struct pt_regs *regs, long error_code)			\
+{									\
+	siginfo_t info;							\
+	if (irq)							\
+		local_irq_enable();					\
+	info.si_signo = signr;						\
+	info.si_errno = 0;						\
+	info.si_code = sicode;						\
+	info.si_addr = (void __user *)siaddr;				\
+	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)	\
+						== NOTIFY_STOP)		\
+		return;							\
+	do_trap(trapnr, signr, str, 0, regs, error_code, &info);	\
 }
 
-#define DO_VM86_ERROR(trapnr, signr, str, name) \
-void do_##name(struct pt_regs * regs, long error_code) \
-{ \
-	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
-						== NOTIFY_STOP) \
-		return; \
-	do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \
+#define DO_VM86_ERROR(trapnr, signr, str, name)				\
+void do_##name(struct pt_regs *regs, long error_code)			\
+{									\
+	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)	\
+						== NOTIFY_STOP)		\
+		return;							\
+	do_trap(trapnr, signr, str, 1, regs, error_code, NULL);		\
 }
 
-#define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
-void do_##name(struct pt_regs * regs, long error_code) \
-{ \
-	siginfo_t info; \
-	info.si_signo = signr; \
-	info.si_errno = 0; \
-	info.si_code = sicode; \
-	info.si_addr = (void __user *)siaddr; \
-	trace_hardirqs_fixup(); \
-	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
-						== NOTIFY_STOP) \
-		return; \
-	do_trap(trapnr, signr, str, 1, regs, error_code, &info); \
+#define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr)	\
+void do_##name(struct pt_regs *regs, long error_code)			\
+{									\
+	siginfo_t info;							\
+	info.si_signo = signr;						\
+	info.si_errno = 0;						\
+	info.si_code = sicode;						\
+	info.si_addr = (void __user *)siaddr;				\
+	trace_hardirqs_fixup();						\
+	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)	\
+						== NOTIFY_STOP)		\
+		return;							\
+	do_trap(trapnr, signr, str, 1, regs, error_code, &info);	\
 }
 
-DO_VM86_ERROR_INFO( 0, SIGFPE,  "divide error", divide_error, FPE_INTDIV, regs->ip)
+DO_VM86_ERROR_INFO(0, SIGFPE,  "divide error", divide_error, FPE_INTDIV, regs->ip)
 #ifndef CONFIG_KPROBES
-DO_VM86_ERROR( 3, SIGTRAP, "int3", int3)
+DO_VM86_ERROR(3, SIGTRAP, "int3", int3)
 #endif
-DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow)
-DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds)
-DO_ERROR_INFO( 6, SIGILL,  "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0)
-DO_ERROR( 9, SIGFPE,  "coprocessor segment overrun", coprocessor_segment_overrun)
+DO_VM86_ERROR(4, SIGSEGV, "overflow", overflow)
+DO_VM86_ERROR(5, SIGSEGV, "bounds", bounds)
+DO_ERROR_INFO(6, SIGILL,  "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0)
+DO_ERROR(9, SIGFPE,  "coprocessor segment overrun", coprocessor_segment_overrun)
 DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
 DO_ERROR(11, SIGBUS,  "segment not present", segment_not_present)
 DO_ERROR(12, SIGBUS,  "stack segment", stack_segment)
 DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0)
 DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0, 1)
 
-void __kprobes do_general_protection(struct pt_regs * regs,
-					      long error_code)
+void __kprobes do_general_protection(struct pt_regs *regs, long error_code)
 {
-	int cpu = get_cpu();
-	struct tss_struct *tss = &per_cpu(init_tss, cpu);
-	struct thread_struct *thread = &current->thread;
+	struct thread_struct *thread;
+	struct tss_struct *tss;
+	int cpu;
+
+	cpu = get_cpu();
+	tss = &per_cpu(init_tss, cpu);
+	thread = &current->thread;
 
 	/*
 	 * Perform the lazy TSS's I/O bitmap copy. If the TSS has an
@@ -616,19 +629,21 @@ void __kprobes do_general_protection(struct pt_regs * regs,
 		 * If the previously set map was extending to higher ports
 		 * than the current one, pad extra space with 0xff (no access).
 		 */
-		if (thread->io_bitmap_max < tss->io_bitmap_max)
+		if (thread->io_bitmap_max < tss->io_bitmap_max) {
 			memset((char *) tss->io_bitmap +
 				thread->io_bitmap_max, 0xff,
 				tss->io_bitmap_max - thread->io_bitmap_max);
+		}
 		tss->io_bitmap_max = thread->io_bitmap_max;
 		tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
 		tss->io_bitmap_owner = thread;
 		put_cpu();
+
 		return;
 	}
 	put_cpu();
 
-	if (regs->flags & VM_MASK)
+	if (regs->flags & X86_VM_MASK)
 		goto gp_in_vm86;
 
 	if (!user_mode(regs))
@@ -636,6 +651,7 @@ void __kprobes do_general_protection(struct pt_regs * regs,
 
 	current->thread.error_code = error_code;
 	current->thread.trap_no = 13;
+
 	if (show_unhandled_signals && unhandled_signal(current, SIGSEGV) &&
 	    printk_ratelimit()) {
 		printk(KERN_INFO
@@ -666,21 +682,24 @@ gp_in_kernel:
 }
 
 static __kprobes void
-mem_parity_error(unsigned char reason, struct pt_regs * regs)
+mem_parity_error(unsigned char reason, struct pt_regs *regs)
 {
-	printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
-		"CPU %d.\n", reason, smp_processor_id());
-	printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");
+	printk(KERN_EMERG
+		"Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
+			reason, smp_processor_id());
+
+	printk(KERN_EMERG
+		"You have some hardware problem, likely on the PCI bus.\n");
 
 #if defined(CONFIG_EDAC)
-	if(edac_handler_set()) {
+	if (edac_handler_set()) {
 		edac_atomic_assert_error();
 		return;
 	}
 #endif
 
 	if (panic_on_unrecovered_nmi)
-                panic("NMI: Not continuing");
+		panic("NMI: Not continuing");
 
 	printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
 
@@ -689,7 +708,7 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs)
 }
 
 static __kprobes void
-io_check_error(unsigned char reason, struct pt_regs * regs)
+io_check_error(unsigned char reason, struct pt_regs *regs)
 {
 	unsigned long i;
 
@@ -699,28 +718,37 @@ io_check_error(unsigned char reason, struct pt_regs * regs)
 	/* Re-enable the IOCK line, wait for a few seconds */
 	reason = (reason & 0xf) | 8;
 	outb(reason, 0x61);
+
 	i = 2000;
-	while (--i) udelay(1000);
+	while (--i)
+		udelay(1000);
+
 	reason &= ~8;
 	outb(reason, 0x61);
 }
 
 static __kprobes void
-unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
+unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
 {
+	if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
+		return;
 #ifdef CONFIG_MCA
-	/* Might actually be able to figure out what the guilty party
-	* is. */
-	if( MCA_bus ) {
+	/*
+	 * Might actually be able to figure out what the guilty party
+	 * is:
+	 */
+	if (MCA_bus) {
 		mca_handle_nmi();
 		return;
 	}
 #endif
-	printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
-		"CPU %d.\n", reason, smp_processor_id());
+	printk(KERN_EMERG
+		"Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
+			reason, smp_processor_id());
+
 	printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
 	if (panic_on_unrecovered_nmi)
-                panic("NMI: Not continuing");
+		panic("NMI: Not continuing");
 
 	printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
 }
@@ -729,14 +757,13 @@ static DEFINE_SPINLOCK(nmi_print_lock);
 
 void __kprobes die_nmi(struct pt_regs *regs, const char *msg)
 {
-	if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) ==
-	    NOTIFY_STOP)
+	if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) == NOTIFY_STOP)
 		return;
 
 	spin_lock(&nmi_print_lock);
 	/*
 	* We are in trouble anyway, lets at least try
-	* to get a message out.
+	* to get a message out:
 	*/
 	bust_spinlocks(1);
 	printk(KERN_EMERG "%s", msg);
@@ -747,9 +774,10 @@ void __kprobes die_nmi(struct pt_regs *regs, const char *msg)
 	spin_unlock(&nmi_print_lock);
 	bust_spinlocks(0);
 
-	/* If we are in kernel we are probably nested up pretty bad
-	 * and might aswell get out now while we still can.
-	*/
+	/*
+	 * If we are in kernel we are probably nested up pretty bad
+	 * and might aswell get out now while we still can:
+	 */
 	if (!user_mode_vm(regs)) {
 		current->thread.trap_no = 2;
 		crash_kexec(regs);
@@ -758,14 +786,14 @@ void __kprobes die_nmi(struct pt_regs *regs, const char *msg)
 	do_exit(SIGSEGV);
 }
 
-static __kprobes void default_do_nmi(struct pt_regs * regs)
+static __kprobes void default_do_nmi(struct pt_regs *regs)
 {
 	unsigned char reason = 0;
 
-	/* Only the BSP gets external NMIs from the system.  */
+	/* Only the BSP gets external NMIs from the system: */
 	if (!smp_processor_id())
 		reason = get_nmi_reason();
- 
+
 	if (!(reason & 0xc0)) {
 		if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
 							== NOTIFY_STOP)
@@ -778,8 +806,10 @@ static __kprobes void default_do_nmi(struct pt_regs * regs)
 		if (nmi_watchdog_tick(regs, reason))
 			return;
 		if (!do_nmi_callback(regs, smp_processor_id()))
-#endif
 			unknown_nmi_error(reason, regs);
+#else
+		unknown_nmi_error(reason, regs);
+#endif
 
 		return;
 	}
@@ -791,14 +821,14 @@ static __kprobes void default_do_nmi(struct pt_regs * regs)
 		io_check_error(reason, regs);
 	/*
 	 * Reassert NMI in case it became active meanwhile
-	 * as it's edge-triggered.
+	 * as it's edge-triggered:
 	 */
 	reassert_nmi();
 }
 
 static int ignore_nmis;
 
-__kprobes void do_nmi(struct pt_regs * regs, long error_code)
+__kprobes void do_nmi(struct pt_regs *regs, long error_code)
 {
 	int cpu;
 
@@ -834,9 +864,12 @@ void __kprobes do_int3(struct pt_regs *regs, long error_code)
 	if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
 			== NOTIFY_STOP)
 		return;
-	/* This is an interrupt gate, because kprobes wants interrupts
-	disabled.  Normal trap handlers don't. */
+	/*
+	 * This is an interrupt gate, because kprobes wants interrupts
+	 * disabled. Normal trap handlers don't.
+	 */
 	restore_interrupts(regs);
+
 	do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL);
 }
 #endif
@@ -851,7 +884,7 @@ void __kprobes do_int3(struct pt_regs *regs, long error_code)
  * from user space. Such code must not hold kernel locks (since it
  * can equally take a page fault), therefore it is safe to call
  * force_sig_info even though that claims and releases locks.
- * 
+ *
  * Code in ./signal.c ensures that the debug control register
  * is restored before we deliver any signal, and therefore that
  * user code runs with the correct debug control register even though
@@ -863,10 +896,10 @@ void __kprobes do_int3(struct pt_regs *regs, long error_code)
  * find every occurrence of the TF bit that could be saved away even
  * by user code)
  */
-void __kprobes do_debug(struct pt_regs * regs, long error_code)
+void __kprobes do_debug(struct pt_regs *regs, long error_code)
 {
-	unsigned int condition;
 	struct task_struct *tsk = current;
+	unsigned int condition;
 
 	trace_hardirqs_fixup();
 
@@ -891,7 +924,7 @@ void __kprobes do_debug(struct pt_regs * regs, long error_code)
 			goto clear_dr7;
 	}
 
-	if (regs->flags & VM_MASK)
+	if (regs->flags & X86_VM_MASK)
 		goto debug_vm86;
 
 	/* Save debug status register where ptrace can see it */
@@ -914,7 +947,8 @@ void __kprobes do_debug(struct pt_regs * regs, long error_code)
 	/* Ok, finally something we can handle */
 	send_sigtrap(tsk, regs, error_code);
 
-	/* Disable additional traps. They'll be re-enabled when
+	/*
+	 * Disable additional traps. They'll be re-enabled when
 	 * the signal is delivered.
 	 */
 clear_dr7:
@@ -927,7 +961,7 @@ debug_vm86:
 
 clear_TF_reenable:
 	set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
-	regs->flags &= ~TF_MASK;
+	regs->flags &= ~X86_EFLAGS_TF;
 	return;
 }
 
@@ -938,9 +972,10 @@ clear_TF_reenable:
  */
 void math_error(void __user *ip)
 {
-	struct task_struct * task;
+	struct task_struct *task;
+	unsigned short cwd;
+	unsigned short swd;
 	siginfo_t info;
-	unsigned short cwd, swd;
 
 	/*
 	 * Save the info for the exception handler and clear the error.
@@ -966,36 +1001,36 @@ void math_error(void __user *ip)
 	cwd = get_fpu_cwd(task);
 	swd = get_fpu_swd(task);
 	switch (swd & ~cwd & 0x3f) {
-		case 0x000: /* No unmasked exception */
-			return;
-		default:    /* Multiple exceptions */
-			break;
-		case 0x001: /* Invalid Op */
-			/*
-			 * swd & 0x240 == 0x040: Stack Underflow
-			 * swd & 0x240 == 0x240: Stack Overflow
-			 * User must clear the SF bit (0x40) if set
-			 */
-			info.si_code = FPE_FLTINV;
-			break;
-		case 0x002: /* Denormalize */
-		case 0x010: /* Underflow */
-			info.si_code = FPE_FLTUND;
-			break;
-		case 0x004: /* Zero Divide */
-			info.si_code = FPE_FLTDIV;
-			break;
-		case 0x008: /* Overflow */
-			info.si_code = FPE_FLTOVF;
-			break;
-		case 0x020: /* Precision */
-			info.si_code = FPE_FLTRES;
-			break;
+	case 0x000: /* No unmasked exception */
+		return;
+	default:    /* Multiple exceptions */
+		break;
+	case 0x001: /* Invalid Op */
+		/*
+		 * swd & 0x240 == 0x040: Stack Underflow
+		 * swd & 0x240 == 0x240: Stack Overflow
+		 * User must clear the SF bit (0x40) if set
+		 */
+		info.si_code = FPE_FLTINV;
+		break;
+	case 0x002: /* Denormalize */
+	case 0x010: /* Underflow */
+		info.si_code = FPE_FLTUND;
+		break;
+	case 0x004: /* Zero Divide */
+		info.si_code = FPE_FLTDIV;
+		break;
+	case 0x008: /* Overflow */
+		info.si_code = FPE_FLTOVF;
+		break;
+	case 0x020: /* Precision */
+		info.si_code = FPE_FLTRES;
+		break;
 	}
 	force_sig_info(SIGFPE, &info, task);
 }
 
-void do_coprocessor_error(struct pt_regs * regs, long error_code)
+void do_coprocessor_error(struct pt_regs *regs, long error_code)
 {
 	ignore_fpu_irq = 1;
 	math_error((void __user *)regs->ip);
@@ -1003,9 +1038,9 @@ void do_coprocessor_error(struct pt_regs * regs, long error_code)
 
 static void simd_math_error(void __user *ip)
 {
-	struct task_struct * task;
-	siginfo_t info;
+	struct task_struct *task;
 	unsigned short mxcsr;
+	siginfo_t info;
 
 	/*
 	 * Save the info for the exception handler and clear the error.
@@ -1026,82 +1061,80 @@ static void simd_math_error(void __user *ip)
 	 */
 	mxcsr = get_fpu_mxcsr(task);
 	switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
-		case 0x000:
-		default:
-			break;
-		case 0x001: /* Invalid Op */
-			info.si_code = FPE_FLTINV;
-			break;
-		case 0x002: /* Denormalize */
-		case 0x010: /* Underflow */
-			info.si_code = FPE_FLTUND;
-			break;
-		case 0x004: /* Zero Divide */
-			info.si_code = FPE_FLTDIV;
-			break;
-		case 0x008: /* Overflow */
-			info.si_code = FPE_FLTOVF;
-			break;
-		case 0x020: /* Precision */
-			info.si_code = FPE_FLTRES;
-			break;
+	case 0x000:
+	default:
+		break;
+	case 0x001: /* Invalid Op */
+		info.si_code = FPE_FLTINV;
+		break;
+	case 0x002: /* Denormalize */
+	case 0x010: /* Underflow */
+		info.si_code = FPE_FLTUND;
+		break;
+	case 0x004: /* Zero Divide */
+		info.si_code = FPE_FLTDIV;
+		break;
+	case 0x008: /* Overflow */
+		info.si_code = FPE_FLTOVF;
+		break;
+	case 0x020: /* Precision */
+		info.si_code = FPE_FLTRES;
+		break;
 	}
 	force_sig_info(SIGFPE, &info, task);
 }
 
-void do_simd_coprocessor_error(struct pt_regs * regs,
-					  long error_code)
+void do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
 {
 	if (cpu_has_xmm) {
 		/* Handle SIMD FPU exceptions on PIII+ processors. */
 		ignore_fpu_irq = 1;
 		simd_math_error((void __user *)regs->ip);
-	} else {
-		/*
-		 * Handle strange cache flush from user space exception
-		 * in all other cases.  This is undocumented behaviour.
-		 */
-		if (regs->flags & VM_MASK) {
-			handle_vm86_fault((struct kernel_vm86_regs *)regs,
-					  error_code);
-			return;
-		}
-		current->thread.trap_no = 19;
-		current->thread.error_code = error_code;
-		die_if_kernel("cache flush denied", regs, error_code);
-		force_sig(SIGSEGV, current);
+		return;
+	}
+	/*
+	 * Handle strange cache flush from user space exception
+	 * in all other cases.  This is undocumented behaviour.
+	 */
+	if (regs->flags & X86_VM_MASK) {
+		handle_vm86_fault((struct kernel_vm86_regs *)regs, error_code);
+		return;
 	}
+	current->thread.trap_no = 19;
+	current->thread.error_code = error_code;
+	die_if_kernel("cache flush denied", regs, error_code);
+	force_sig(SIGSEGV, current);
 }
 
-void do_spurious_interrupt_bug(struct pt_regs * regs,
-					  long error_code)
+void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
 {
 #if 0
 	/* No need to warn about this any longer. */
-	printk("Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
+	printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
 #endif
 }
 
-unsigned long patch_espfix_desc(unsigned long uesp,
-					  unsigned long kesp)
+unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp)
 {
 	struct desc_struct *gdt = __get_cpu_var(gdt_page).gdt;
 	unsigned long base = (kesp - uesp) & -THREAD_SIZE;
 	unsigned long new_kesp = kesp - base;
 	unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT;
 	__u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS];
+
 	/* Set up base for espfix segment */
- 	desc &= 0x00f0ff0000000000ULL;
- 	desc |=	((((__u64)base) << 16) & 0x000000ffffff0000ULL) |
+	desc &= 0x00f0ff0000000000ULL;
+	desc |=	((((__u64)base) << 16) & 0x000000ffffff0000ULL) |
 		((((__u64)base) << 32) & 0xff00000000000000ULL) |
 		((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) |
 		(lim_pages & 0xffff);
 	*(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc;
+
 	return new_kesp;
 }
 
 /*
- *  'math_state_restore()' saves the current math information in the
+ * 'math_state_restore()' saves the current math information in the
  * old math state array, and gets the new ones from the current task
  *
  * Careful.. There are problems with IBM-designed IRQ13 behaviour.
@@ -1115,7 +1148,7 @@ asmlinkage void math_state_restore(void)
 	struct thread_info *thread = current_thread_info();
 	struct task_struct *tsk = thread->task;
 
-	clts();		/* Allow maths ops (or we recurse) */
+	clts();				/* Allow maths ops (or we recurse) */
 	if (!tsk_used_math(tsk))
 		init_fpu(tsk);
 	restore_fpu(tsk);
@@ -1128,53 +1161,52 @@ EXPORT_SYMBOL_GPL(math_state_restore);
 
 asmlinkage void math_emulate(long arg)
 {
-	printk(KERN_EMERG "math-emulation not enabled and no coprocessor found.\n");
-	printk(KERN_EMERG "killing %s.\n",current->comm);
-	force_sig(SIGFPE,current);
+	printk(KERN_EMERG
+		"math-emulation not enabled and no coprocessor found.\n");
+	printk(KERN_EMERG "killing %s.\n", current->comm);
+	force_sig(SIGFPE, current);
 	schedule();
 }
 
 #endif /* CONFIG_MATH_EMULATION */
 
-
 void __init trap_init(void)
 {
 	int i;
 
 #ifdef CONFIG_EISA
 	void __iomem *p = early_ioremap(0x0FFFD9, 4);
-	if (readl(p) == 'E'+('I'<<8)+('S'<<16)+('A'<<24)) {
+
+	if (readl(p) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24))
 		EISA_bus = 1;
-	}
 	early_iounmap(p, 4);
 #endif
 
 #ifdef CONFIG_X86_LOCAL_APIC
 	init_apic_mappings();
 #endif
-
-	set_trap_gate(0,&divide_error);
-	set_intr_gate(1,&debug);
-	set_intr_gate(2,&nmi);
+	set_trap_gate(0,  &divide_error);
+	set_intr_gate(1,  &debug);
+	set_intr_gate(2,  &nmi);
 	set_system_intr_gate(3, &int3); /* int3/4 can be called from all */
-	set_system_gate(4,&overflow);
-	set_trap_gate(5,&bounds);
-	set_trap_gate(6,&invalid_op);
-	set_trap_gate(7,&device_not_available);
-	set_task_gate(8,GDT_ENTRY_DOUBLEFAULT_TSS);
-	set_trap_gate(9,&coprocessor_segment_overrun);
-	set_trap_gate(10,&invalid_TSS);
-	set_trap_gate(11,&segment_not_present);
-	set_trap_gate(12,&stack_segment);
-	set_trap_gate(13,&general_protection);
-	set_intr_gate(14,&page_fault);
-	set_trap_gate(15,&spurious_interrupt_bug);
-	set_trap_gate(16,&coprocessor_error);
-	set_trap_gate(17,&alignment_check);
+	set_system_gate(4, &overflow);
+	set_trap_gate(5,  &bounds);
+	set_trap_gate(6,  &invalid_op);
+	set_trap_gate(7,  &device_not_available);
+	set_task_gate(8,  GDT_ENTRY_DOUBLEFAULT_TSS);
+	set_trap_gate(9,  &coprocessor_segment_overrun);
+	set_trap_gate(10, &invalid_TSS);
+	set_trap_gate(11, &segment_not_present);
+	set_trap_gate(12, &stack_segment);
+	set_trap_gate(13, &general_protection);
+	set_intr_gate(14, &page_fault);
+	set_trap_gate(15, &spurious_interrupt_bug);
+	set_trap_gate(16, &coprocessor_error);
+	set_trap_gate(17, &alignment_check);
 #ifdef CONFIG_X86_MCE
-	set_trap_gate(18,&machine_check);
+	set_trap_gate(18, &machine_check);
 #endif
-	set_trap_gate(19,&simd_coprocessor_error);
+	set_trap_gate(19, &simd_coprocessor_error);
 
 	/*
 	 * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned.
@@ -1187,21 +1219,22 @@ void __init trap_init(void)
 		printk("done.\n");
 	}
 	if (cpu_has_xmm) {
-		printk(KERN_INFO "Enabling unmasked SIMD FPU exception "
-				"support... ");
+		printk(KERN_INFO
+			"Enabling unmasked SIMD FPU exception support... ");
 		set_in_cr4(X86_CR4_OSXMMEXCPT);
 		printk("done.\n");
 	}
 
-	set_system_gate(SYSCALL_VECTOR,&system_call);
+	set_system_gate(SYSCALL_VECTOR, &system_call);
 
-	/* Reserve all the builtin and the syscall vector. */
+	/* Reserve all the builtin and the syscall vector: */
 	for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
 		set_bit(i, used_vectors);
+
 	set_bit(SYSCALL_VECTOR, used_vectors);
 
 	/*
-	 * Should be a barrier for any external CPU state.
+	 * Should be a barrier for any external CPU state:
 	 */
 	cpu_init();
 
@@ -1211,6 +1244,7 @@ void __init trap_init(void)
 static int __init kstack_setup(char *s)
 {
 	kstack_depth_to_print = simple_strtoul(s, NULL, 0);
+
 	return 1;
 }
 __setup("kstack=", kstack_setup);
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 045466681911..79aa6fc0815c 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -33,6 +33,8 @@
 #include <linux/kdebug.h>
 #include <linux/utsname.h>
 
+#include <mach_traps.h>
+
 #if defined(CONFIG_EDAC)
 #include <linux/edac.h>
 #endif
@@ -600,8 +602,13 @@ void die(const char * str, struct pt_regs * regs, long err)
 
 void __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic)
 {
-	unsigned long flags = oops_begin();
+	unsigned long flags;
 
+	if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) ==
+	    NOTIFY_STOP)
+		return;
+
+	flags = oops_begin();
 	/*
 	 * We are in trouble anyway, lets at least try
 	 * to get a message out.
@@ -806,6 +813,8 @@ io_check_error(unsigned char reason, struct pt_regs * regs)
 static __kprobes void
 unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
 {
+	if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
+		return;
 	printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
 		reason);
 	printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c
index c2241e04ea5f..3d7e6e9fa6c2 100644
--- a/arch/x86/kernel/tsc_32.c
+++ b/arch/x86/kernel/tsc_32.c
@@ -84,8 +84,8 @@ DEFINE_PER_CPU(unsigned long, cyc2ns);
 
 static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
 {
-	unsigned long flags, prev_scale, *scale;
 	unsigned long long tsc_now, ns_now;
+	unsigned long flags, *scale;
 
 	local_irq_save(flags);
 	sched_clock_idle_sleep_event();
@@ -95,7 +95,6 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
 	rdtscll(tsc_now);
 	ns_now = __cycles_2_ns(tsc_now);
 
-	prev_scale = *scale;
 	if (cpu_khz)
 		*scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz;
 
@@ -392,13 +391,15 @@ void __init tsc_init(void)
 	int cpu;
 
 	if (!cpu_has_tsc)
-		goto out_no_tsc;
+		return;
 
 	cpu_khz = calculate_cpu_khz();
 	tsc_khz = cpu_khz;
 
-	if (!cpu_khz)
-		goto out_no_tsc;
+	if (!cpu_khz) {
+		mark_tsc_unstable("could not calculate TSC khz");
+		return;
+	}
 
 	printk("Detected %lu.%03lu MHz processor.\n",
 				(unsigned long)cpu_khz / 1000,
@@ -431,9 +432,4 @@ void __init tsc_init(void)
 		tsc_enabled = 1;
 
 	clocksource_register(&clocksource_tsc);
-
-	return;
-
-out_no_tsc:
-	setup_clear_cpu_cap(X86_FEATURE_TSC);
 }
diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c
index d3bebaaad842..ceeba01e7f47 100644
--- a/arch/x86/kernel/tsc_64.c
+++ b/arch/x86/kernel/tsc_64.c
@@ -44,8 +44,8 @@ DEFINE_PER_CPU(unsigned long, cyc2ns);
 
 static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
 {
-	unsigned long flags, prev_scale, *scale;
 	unsigned long long tsc_now, ns_now;
+	unsigned long flags, *scale;
 
 	local_irq_save(flags);
 	sched_clock_idle_sleep_event();
@@ -55,7 +55,6 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
 	rdtscll(tsc_now);
 	ns_now = __cycles_2_ns(tsc_now);
 
-	prev_scale = *scale;
 	if (cpu_khz)
 		*scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz;
 
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 738c2104df30..38f566fa27d2 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -64,7 +64,7 @@
 
 
 #define KVM86	((struct kernel_vm86_struct *)regs)
-#define VMPI 	KVM86->vm86plus
+#define VMPI	KVM86->vm86plus
 
 
 /*
@@ -81,7 +81,7 @@
 #define VFLAGS	(*(unsigned short *)&(current->thread.v86flags))
 #define VEFLAGS	(current->thread.v86flags)
 
-#define set_flags(X,new,mask) \
+#define set_flags(X, new, mask) \
 ((X) = ((X) & ~(mask)) | ((new) & (mask)))
 
 #define SAFE_MASK	(0xDD5)
@@ -93,8 +93,10 @@ static int copy_vm86_regs_to_user(struct vm86_regs __user *user,
 {
 	int ret = 0;
 
-	/* kernel_vm86_regs is missing gs, so copy everything up to
-	   (but not including) orig_eax, and then rest including orig_eax. */
+	/*
+	 * kernel_vm86_regs is missing gs, so copy everything up to
+	 * (but not including) orig_eax, and then rest including orig_eax.
+	 */
 	ret += copy_to_user(user, regs, offsetof(struct kernel_vm86_regs, pt.orig_ax));
 	ret += copy_to_user(&user->orig_eax, &regs->pt.orig_ax,
 			    sizeof(struct kernel_vm86_regs) -
@@ -120,7 +122,7 @@ static int copy_vm86_regs_from_user(struct kernel_vm86_regs *regs,
 	return ret;
 }
 
-struct pt_regs * save_v86_state(struct kernel_vm86_regs * regs)
+struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs)
 {
 	struct tss_struct *tss;
 	struct pt_regs *ret;
@@ -137,9 +139,9 @@ struct pt_regs * save_v86_state(struct kernel_vm86_regs * regs)
 		printk("no vm86_info: BAD\n");
 		do_exit(SIGSEGV);
 	}
-	set_flags(regs->pt.flags, VEFLAGS, VIF_MASK | current->thread.v86mask);
-	tmp = copy_vm86_regs_to_user(&current->thread.vm86_info->regs,regs);
-	tmp += put_user(current->thread.screen_bitmap,&current->thread.vm86_info->screen_bitmap);
+	set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | current->thread.v86mask);
+	tmp = copy_vm86_regs_to_user(&current->thread.vm86_info->regs, regs);
+	tmp += put_user(current->thread.screen_bitmap, &current->thread.vm86_info->screen_bitmap);
 	if (tmp) {
 		printk("vm86: could not access userspace vm86_info\n");
 		do_exit(SIGSEGV);
@@ -237,20 +239,21 @@ asmlinkage int sys_vm86(struct pt_regs regs)
 
 	tsk = current;
 	switch (regs.bx) {
-		case VM86_REQUEST_IRQ:
-		case VM86_FREE_IRQ:
-		case VM86_GET_IRQ_BITS:
-		case VM86_GET_AND_RESET_IRQ:
-			ret = do_vm86_irq_handling(regs.bx, (int)regs.cx);
-			goto out;
-		case VM86_PLUS_INSTALL_CHECK:
-			/* NOTE: on old vm86 stuff this will return the error
-			   from access_ok(), because the subfunction is
-			   interpreted as (invalid) address to vm86_struct.
-			   So the installation check works.
-			 */
-			ret = 0;
-			goto out;
+	case VM86_REQUEST_IRQ:
+	case VM86_FREE_IRQ:
+	case VM86_GET_IRQ_BITS:
+	case VM86_GET_AND_RESET_IRQ:
+		ret = do_vm86_irq_handling(regs.bx, (int)regs.cx);
+		goto out;
+	case VM86_PLUS_INSTALL_CHECK:
+		/*
+		 * NOTE: on old vm86 stuff this will return the error
+		 *  from access_ok(), because the subfunction is
+		 *  interpreted as (invalid) address to vm86_struct.
+		 *  So the installation check works.
+		 */
+		ret = 0;
+		goto out;
 	}
 
 	/* we come here only for functions VM86_ENTER, VM86_ENTER_NO_BYPASS */
@@ -296,21 +299,21 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
 	VEFLAGS = info->regs.pt.flags;
 	info->regs.pt.flags &= SAFE_MASK;
 	info->regs.pt.flags |= info->regs32->flags & ~SAFE_MASK;
-	info->regs.pt.flags |= VM_MASK;
+	info->regs.pt.flags |= X86_VM_MASK;
 
 	switch (info->cpu_type) {
-		case CPU_286:
-			tsk->thread.v86mask = 0;
-			break;
-		case CPU_386:
-			tsk->thread.v86mask = NT_MASK | IOPL_MASK;
-			break;
-		case CPU_486:
-			tsk->thread.v86mask = AC_MASK | NT_MASK | IOPL_MASK;
-			break;
-		default:
-			tsk->thread.v86mask = ID_MASK | AC_MASK | NT_MASK | IOPL_MASK;
-			break;
+	case CPU_286:
+		tsk->thread.v86mask = 0;
+		break;
+	case CPU_386:
+		tsk->thread.v86mask = X86_EFLAGS_NT | X86_EFLAGS_IOPL;
+		break;
+	case CPU_486:
+		tsk->thread.v86mask = X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL;
+		break;
+	default:
+		tsk->thread.v86mask = X86_EFLAGS_ID | X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL;
+		break;
 	}
 
 /*
@@ -346,9 +349,9 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
 	/* we never return here */
 }
 
-static inline void return_to_32bit(struct kernel_vm86_regs * regs16, int retval)
+static inline void return_to_32bit(struct kernel_vm86_regs *regs16, int retval)
 {
-	struct pt_regs * regs32;
+	struct pt_regs *regs32;
 
 	regs32 = save_v86_state(regs16);
 	regs32->ax = retval;
@@ -358,29 +361,30 @@ static inline void return_to_32bit(struct kernel_vm86_regs * regs16, int retval)
 		: : "r" (regs32), "r" (current_thread_info()));
 }
 
-static inline void set_IF(struct kernel_vm86_regs * regs)
+static inline void set_IF(struct kernel_vm86_regs *regs)
 {
-	VEFLAGS |= VIF_MASK;
-	if (VEFLAGS & VIP_MASK)
+	VEFLAGS |= X86_EFLAGS_VIF;
+	if (VEFLAGS & X86_EFLAGS_VIP)
 		return_to_32bit(regs, VM86_STI);
 }
 
-static inline void clear_IF(struct kernel_vm86_regs * regs)
+static inline void clear_IF(struct kernel_vm86_regs *regs)
 {
-	VEFLAGS &= ~VIF_MASK;
+	VEFLAGS &= ~X86_EFLAGS_VIF;
 }
 
-static inline void clear_TF(struct kernel_vm86_regs * regs)
+static inline void clear_TF(struct kernel_vm86_regs *regs)
 {
-	regs->pt.flags &= ~TF_MASK;
+	regs->pt.flags &= ~X86_EFLAGS_TF;
 }
 
-static inline void clear_AC(struct kernel_vm86_regs * regs)
+static inline void clear_AC(struct kernel_vm86_regs *regs)
 {
-	regs->pt.flags &= ~AC_MASK;
+	regs->pt.flags &= ~X86_EFLAGS_AC;
 }
 
-/* It is correct to call set_IF(regs) from the set_vflags_*
+/*
+ * It is correct to call set_IF(regs) from the set_vflags_*
  * functions. However someone forgot to call clear_IF(regs)
  * in the opposite case.
  * After the command sequence CLI PUSHF STI POPF you should
@@ -391,41 +395,41 @@ static inline void clear_AC(struct kernel_vm86_regs * regs)
  * [KD]
  */
 
-static inline void set_vflags_long(unsigned long flags, struct kernel_vm86_regs * regs)
+static inline void set_vflags_long(unsigned long flags, struct kernel_vm86_regs *regs)
 {
 	set_flags(VEFLAGS, flags, current->thread.v86mask);
 	set_flags(regs->pt.flags, flags, SAFE_MASK);
-	if (flags & IF_MASK)
+	if (flags & X86_EFLAGS_IF)
 		set_IF(regs);
 	else
 		clear_IF(regs);
 }
 
-static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_regs * regs)
+static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_regs *regs)
 {
 	set_flags(VFLAGS, flags, current->thread.v86mask);
 	set_flags(regs->pt.flags, flags, SAFE_MASK);
-	if (flags & IF_MASK)
+	if (flags & X86_EFLAGS_IF)
 		set_IF(regs);
 	else
 		clear_IF(regs);
 }
 
-static inline unsigned long get_vflags(struct kernel_vm86_regs * regs)
+static inline unsigned long get_vflags(struct kernel_vm86_regs *regs)
 {
 	unsigned long flags = regs->pt.flags & RETURN_MASK;
 
-	if (VEFLAGS & VIF_MASK)
-		flags |= IF_MASK;
-	flags |= IOPL_MASK;
+	if (VEFLAGS & X86_EFLAGS_VIF)
+		flags |= X86_EFLAGS_IF;
+	flags |= X86_EFLAGS_IOPL;
 	return flags | (VEFLAGS & current->thread.v86mask);
 }
 
-static inline int is_revectored(int nr, struct revectored_struct * bitmap)
+static inline int is_revectored(int nr, struct revectored_struct *bitmap)
 {
 	__asm__ __volatile__("btl %2,%1\n\tsbbl %0,%0"
 		:"=r" (nr)
-		:"m" (*bitmap),"r" (nr));
+		:"m" (*bitmap), "r" (nr));
 	return nr;
 }
 
@@ -437,7 +441,7 @@ static inline int is_revectored(int nr, struct revectored_struct * bitmap)
 		ptr--; \
 		if (put_user(__val, base + ptr) < 0) \
 			goto err_label; \
-	} while(0)
+	} while (0)
 
 #define pushw(base, ptr, val, err_label) \
 	do { \
@@ -448,7 +452,7 @@ static inline int is_revectored(int nr, struct revectored_struct * bitmap)
 		ptr--; \
 		if (put_user(val_byte(__val, 0), base + ptr) < 0) \
 			goto err_label; \
-	} while(0)
+	} while (0)
 
 #define pushl(base, ptr, val, err_label) \
 	do { \
@@ -465,7 +469,7 @@ static inline int is_revectored(int nr, struct revectored_struct * bitmap)
 		ptr--; \
 		if (put_user(val_byte(__val, 0), base + ptr) < 0) \
 			goto err_label; \
-	} while(0)
+	} while (0)
 
 #define popb(base, ptr, err_label) \
 	({ \
@@ -512,7 +516,7 @@ static inline int is_revectored(int nr, struct revectored_struct * bitmap)
  * in userspace is always better than an Oops anyway.) [KD]
  */
 static void do_int(struct kernel_vm86_regs *regs, int i,
-    unsigned char __user * ssp, unsigned short sp)
+    unsigned char __user *ssp, unsigned short sp)
 {
 	unsigned long __user *intr_ptr;
 	unsigned long segoffs;
@@ -521,7 +525,7 @@ static void do_int(struct kernel_vm86_regs *regs, int i,
 		goto cannot_handle;
 	if (is_revectored(i, &KVM86->int_revectored))
 		goto cannot_handle;
-	if (i==0x21 && is_revectored(AH(regs),&KVM86->int21_revectored))
+	if (i == 0x21 && is_revectored(AH(regs), &KVM86->int21_revectored))
 		goto cannot_handle;
 	intr_ptr = (unsigned long __user *) (i << 2);
 	if (get_user(segoffs, intr_ptr))
@@ -543,30 +547,23 @@ cannot_handle:
 	return_to_32bit(regs, VM86_INTx + (i << 8));
 }
 
-int handle_vm86_trap(struct kernel_vm86_regs * regs, long error_code, int trapno)
+int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno)
 {
 	if (VMPI.is_vm86pus) {
-		if ( (trapno==3) || (trapno==1) )
+		if ((trapno == 3) || (trapno == 1))
 			return_to_32bit(regs, VM86_TRAP + (trapno << 8));
 		do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs));
 		return 0;
 	}
-	if (trapno !=1)
+	if (trapno != 1)
 		return 1; /* we let this handle by the calling routine */
-	if (current->ptrace & PT_PTRACED) {
-		unsigned long flags;
-		spin_lock_irqsave(&current->sighand->siglock, flags);
-		sigdelset(&current->blocked, SIGTRAP);
-		recalc_sigpending();
-		spin_unlock_irqrestore(&current->sighand->siglock, flags);
-	}
-	send_sig(SIGTRAP, current, 1);
 	current->thread.trap_no = trapno;
 	current->thread.error_code = error_code;
+	force_sig(SIGTRAP, current);
 	return 0;
 }
 
-void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code)
+void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code)
 {
 	unsigned char opcode;
 	unsigned char __user *csp;
@@ -576,11 +573,11 @@ void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code)
 
 #define CHECK_IF_IN_TRAP \
 	if (VMPI.vm86dbg_active && VMPI.vm86dbg_TFpendig) \
-		newflags |= TF_MASK
+		newflags |= X86_EFLAGS_TF
 #define VM86_FAULT_RETURN do { \
-	if (VMPI.force_return_for_pic  && (VEFLAGS & (IF_MASK | VIF_MASK))) \
+	if (VMPI.force_return_for_pic  && (VEFLAGS & (X86_EFLAGS_IF | X86_EFLAGS_VIF))) \
 		return_to_32bit(regs, VM86_PICRETURN); \
-	if (orig_flags & TF_MASK) \
+	if (orig_flags & X86_EFLAGS_TF) \
 		handle_vm86_trap(regs, 0, 1); \
 	return; } while (0)
 
@@ -595,17 +592,17 @@ void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code)
 	pref_done = 0;
 	do {
 		switch (opcode = popb(csp, ip, simulate_sigsegv)) {
-			case 0x66:      /* 32-bit data */     data32=1; break;
-			case 0x67:      /* 32-bit address */  break;
-			case 0x2e:      /* CS */              break;
-			case 0x3e:      /* DS */              break;
-			case 0x26:      /* ES */              break;
-			case 0x36:      /* SS */              break;
-			case 0x65:      /* GS */              break;
-			case 0x64:      /* FS */              break;
-			case 0xf2:      /* repnz */       break;
-			case 0xf3:      /* rep */             break;
-			default: pref_done = 1;
+		case 0x66:      /* 32-bit data */     data32 = 1; break;
+		case 0x67:      /* 32-bit address */  break;
+		case 0x2e:      /* CS */              break;
+		case 0x3e:      /* DS */              break;
+		case 0x26:      /* ES */              break;
+		case 0x36:      /* SS */              break;
+		case 0x65:      /* GS */              break;
+		case 0x64:      /* FS */              break;
+		case 0xf2:      /* repnz */       break;
+		case 0xf3:      /* rep */             break;
+		default: pref_done = 1;
 		}
 	} while (!pref_done);
 
@@ -628,7 +625,7 @@ void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code)
 		{
 		unsigned long newflags;
 		if (data32) {
-			newflags=popl(ssp, sp, simulate_sigsegv);
+			newflags = popl(ssp, sp, simulate_sigsegv);
 			SP(regs) += 4;
 		} else {
 			newflags = popw(ssp, sp, simulate_sigsegv);
@@ -636,20 +633,20 @@ void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code)
 		}
 		IP(regs) = ip;
 		CHECK_IF_IN_TRAP;
-		if (data32) {
+		if (data32)
 			set_vflags_long(newflags, regs);
-		} else {
+		else
 			set_vflags_short(newflags, regs);
-		}
+
 		VM86_FAULT_RETURN;
 		}
 
 	/* int xx */
 	case 0xcd: {
-		int intno=popb(csp, ip, simulate_sigsegv);
+		int intno = popb(csp, ip, simulate_sigsegv);
 		IP(regs) = ip;
 		if (VMPI.vm86dbg_active) {
-			if ( (1 << (intno &7)) & VMPI.vm86dbg_intxxtab[intno >> 3] )
+			if ((1 << (intno & 7)) & VMPI.vm86dbg_intxxtab[intno >> 3])
 				return_to_32bit(regs, VM86_INTx + (intno << 8));
 		}
 		do_int(regs, intno, ssp, sp);
@@ -663,9 +660,9 @@ void handle_vm86_fault(struct kernel_vm86_regs * regs, long error_code)
 		unsigned long newcs;
 		unsigned long newflags;
 		if (data32) {
-			newip=popl(ssp, sp, simulate_sigsegv);
-			newcs=popl(ssp, sp, simulate_sigsegv);
-			newflags=popl(ssp, sp, simulate_sigsegv);
+			newip = popl(ssp, sp, simulate_sigsegv);
+			newcs = popl(ssp, sp, simulate_sigsegv);
+			newflags = popl(ssp, sp, simulate_sigsegv);
 			SP(regs) += 12;
 		} else {
 			newip = popw(ssp, sp, simulate_sigsegv);
@@ -734,18 +731,18 @@ static struct vm86_irqs {
 static DEFINE_SPINLOCK(irqbits_lock);
 static int irqbits;
 
-#define ALLOWED_SIGS ( 1 /* 0 = don't send a signal */ \
+#define ALLOWED_SIGS (1 /* 0 = don't send a signal */ \
 	| (1 << SIGUSR1) | (1 << SIGUSR2) | (1 << SIGIO)  | (1 << SIGURG) \
-	| (1 << SIGUNUSED) )
-	
+	| (1 << SIGUNUSED))
+
 static irqreturn_t irq_handler(int intno, void *dev_id)
 {
 	int irq_bit;
 	unsigned long flags;
 
-	spin_lock_irqsave(&irqbits_lock, flags);	
+	spin_lock_irqsave(&irqbits_lock, flags);
 	irq_bit = 1 << intno;
-	if ((irqbits & irq_bit) || ! vm86_irqs[intno].tsk)
+	if ((irqbits & irq_bit) || !vm86_irqs[intno].tsk)
 		goto out;
 	irqbits |= irq_bit;
 	if (vm86_irqs[intno].sig)
@@ -759,7 +756,7 @@ static irqreturn_t irq_handler(int intno, void *dev_id)
 	return IRQ_HANDLED;
 
 out:
-	spin_unlock_irqrestore(&irqbits_lock, flags);	
+	spin_unlock_irqrestore(&irqbits_lock, flags);
 	return IRQ_NONE;
 }
 
@@ -770,9 +767,9 @@ static inline void free_vm86_irq(int irqnumber)
 	free_irq(irqnumber, NULL);
 	vm86_irqs[irqnumber].tsk = NULL;
 
-	spin_lock_irqsave(&irqbits_lock, flags);	
+	spin_lock_irqsave(&irqbits_lock, flags);
 	irqbits &= ~(1 << irqnumber);
-	spin_unlock_irqrestore(&irqbits_lock, flags);	
+	spin_unlock_irqrestore(&irqbits_lock, flags);
 }
 
 void release_vm86_irqs(struct task_struct *task)
@@ -788,10 +785,10 @@ static inline int get_and_reset_irq(int irqnumber)
 	int bit;
 	unsigned long flags;
 	int ret = 0;
-	
+
 	if (invalid_vm86_irq(irqnumber)) return 0;
 	if (vm86_irqs[irqnumber].tsk != current) return 0;
-	spin_lock_irqsave(&irqbits_lock, flags);	
+	spin_lock_irqsave(&irqbits_lock, flags);
 	bit = irqbits & (1 << irqnumber);
 	irqbits &= ~bit;
 	if (bit) {
@@ -799,7 +796,7 @@ static inline int get_and_reset_irq(int irqnumber)
 		ret = 1;
 	}
 
-	spin_unlock_irqrestore(&irqbits_lock, flags);	
+	spin_unlock_irqrestore(&irqbits_lock, flags);
 	return ret;
 }
 
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index 2ffa9656fe7a..ce5ed083a1e9 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -149,6 +149,11 @@ SECTIONS
 	*(.con_initcall.init)
   	__con_initcall_end = .;
   }
+  .x86cpuvendor.init : AT(ADDR(.x86cpuvendor.init) - LOAD_OFFSET) {
+	__x86cpuvendor_start = .;
+	*(.x86cpuvendor.init)
+	__x86cpuvendor_end = .;
+  }
   SECURITY_INIT
   . = ALIGN(4);
   .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index fab132299735..b7ab3c335fae 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -177,6 +177,11 @@ SECTIONS
 	*(.con_initcall.init)
   }
   __con_initcall_end = .;
+  __x86cpuvendor_start = .;
+  .x86cpuvendor.init : AT(ADDR(.x86cpuvendor.init) - LOAD_OFFSET) {
+	*(.x86cpuvendor.init)
+  }
+  __x86cpuvendor_end = .;
   SECURITY_INIT
 
   . = ALIGN(8);
@@ -247,3 +252,9 @@ SECTIONS
 
   DWARF_DEBUG
 }
+
+/*
+ * Build-time check on the image size:
+ */
+ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
+	"kernel image bigger than KERNEL_IMAGE_SIZE")
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index d971210a6d36..caf2a26f5cfd 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -8,6 +8,8 @@
  *
  * Ravikiran Thirumalai <kiran@scalemp.com>,
  * Shai Fultheim <shai@scalemp.com>
+ * Paravirt ops integration: Glauber de Oliveira Costa <gcosta@redhat.com>,
+ *			     Ravikiran Thirumalai <kiran@scalemp.com>
  */
 
 #include <linux/init.h>
@@ -15,38 +17,137 @@
 #include <linux/pci_regs.h>
 #include <asm/pci-direct.h>
 #include <asm/io.h>
+#include <asm/paravirt.h>
 
-static int __init vsmp_init(void)
+#if defined CONFIG_PCI && defined CONFIG_PARAVIRT
+/*
+ * Interrupt control on vSMPowered systems:
+ * ~AC is a shadow of IF.  If IF is 'on' AC should be 'off'
+ * and vice versa.
+ */
+
+static unsigned long vsmp_save_fl(void)
 {
-	void *address;
-	unsigned int cap, ctl;
+	unsigned long flags = native_save_fl();
 
-	if (!early_pci_allowed())
-		return 0;
+	if (!(flags & X86_EFLAGS_IF) || (flags & X86_EFLAGS_AC))
+		flags &= ~X86_EFLAGS_IF;
+	return flags;
+}
 
-	/* Check if we are running on a ScaleMP vSMP box */
-	if ((read_pci_config_16(0, 0x1f, 0, PCI_VENDOR_ID) !=
-	     PCI_VENDOR_ID_SCALEMP) ||
-	    (read_pci_config_16(0, 0x1f, 0, PCI_DEVICE_ID) !=
-	     PCI_DEVICE_ID_SCALEMP_VSMP_CTL))
-		return 0;
+static void vsmp_restore_fl(unsigned long flags)
+{
+	if (flags & X86_EFLAGS_IF)
+		flags &= ~X86_EFLAGS_AC;
+	else
+		flags |= X86_EFLAGS_AC;
+	native_restore_fl(flags);
+}
+
+static void vsmp_irq_disable(void)
+{
+	unsigned long flags = native_save_fl();
+
+	native_restore_fl((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC);
+}
+
+static void vsmp_irq_enable(void)
+{
+	unsigned long flags = native_save_fl();
+
+	native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC));
+}
+
+static unsigned __init vsmp_patch(u8 type, u16 clobbers, void *ibuf,
+				  unsigned long addr, unsigned len)
+{
+	switch (type) {
+	case PARAVIRT_PATCH(pv_irq_ops.irq_enable):
+	case PARAVIRT_PATCH(pv_irq_ops.irq_disable):
+	case PARAVIRT_PATCH(pv_irq_ops.save_fl):
+	case PARAVIRT_PATCH(pv_irq_ops.restore_fl):
+		return paravirt_patch_default(type, clobbers, ibuf, addr, len);
+	default:
+		return native_patch(type, clobbers, ibuf, addr, len);
+	}
+
+}
+
+static void __init set_vsmp_pv_ops(void)
+{
+	void *address;
+	unsigned int cap, ctl, cfg;
 
 	/* set vSMP magic bits to indicate vSMP capable kernel */
-	address = ioremap(read_pci_config(0, 0x1f, 0, PCI_BASE_ADDRESS_0), 8);
+	cfg = read_pci_config(0, 0x1f, 0, PCI_BASE_ADDRESS_0);
+	address = early_ioremap(cfg, 8);
 	cap = readl(address);
 	ctl = readl(address + 4);
 	printk(KERN_INFO "vSMP CTL: capabilities:0x%08x  control:0x%08x\n",
 	       cap, ctl);
 	if (cap & ctl & (1 << 4)) {
-		/* Turn on vSMP IRQ fastpath handling (see system.h) */
+		/* Setup irq ops and turn on vSMP  IRQ fastpath handling */
+		pv_irq_ops.irq_disable = vsmp_irq_disable;
+		pv_irq_ops.irq_enable  = vsmp_irq_enable;
+		pv_irq_ops.save_fl  = vsmp_save_fl;
+		pv_irq_ops.restore_fl  = vsmp_restore_fl;
+		pv_init_ops.patch = vsmp_patch;
+
 		ctl &= ~(1 << 4);
 		writel(ctl, address + 4);
 		ctl = readl(address + 4);
 		printk(KERN_INFO "vSMP CTL: control set to:0x%08x\n", ctl);
 	}
 
-	iounmap(address);
+	early_iounmap(address, 8);
+}
+#else
+static void __init set_vsmp_pv_ops(void)
+{
+}
+#endif
+
+#ifdef CONFIG_PCI
+static int is_vsmp = -1;
+
+static void __init detect_vsmp_box(void)
+{
+	is_vsmp = 0;
+
+	if (!early_pci_allowed())
+		return;
+
+	/* Check if we are running on a ScaleMP vSMPowered box */
+	if (read_pci_config(0, 0x1f, 0, PCI_VENDOR_ID) ==
+	     (PCI_VENDOR_ID_SCALEMP | (PCI_DEVICE_ID_SCALEMP_VSMP_CTL << 16)))
+		is_vsmp = 1;
+}
+
+int is_vsmp_box(void)
+{
+	if (is_vsmp != -1)
+		return is_vsmp;
+	else {
+		WARN_ON_ONCE(1);
+		return 0;
+	}
+}
+#else
+static int __init detect_vsmp_box(void)
+{
+}
+int is_vsmp_box(void)
+{
 	return 0;
 }
+#endif
 
-core_initcall(vsmp_init);
+void __init vsmp_init(void)
+{
+	detect_vsmp_box();
+	if (!is_vsmp_box())
+		return;
+
+	set_vsmp_pv_ops();
+	return;
+}
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index a66e9c1a0537..58882f9f2637 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -4,7 +4,6 @@
 #include <linux/module.h>
 #include <linux/smp.h>
 
-#include <asm/semaphore.h>
 #include <asm/processor.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -12,11 +11,6 @@
 
 EXPORT_SYMBOL(kernel_thread);
 
-EXPORT_SYMBOL(__down_failed);
-EXPORT_SYMBOL(__down_failed_interruptible);
-EXPORT_SYMBOL(__down_failed_trylock);
-EXPORT_SYMBOL(__up_wakeup);
-
 EXPORT_SYMBOL(__get_user_1);
 EXPORT_SYMBOL(__get_user_2);
 EXPORT_SYMBOL(__get_user_4);
@@ -35,15 +29,17 @@ EXPORT_SYMBOL(__copy_from_user_inatomic);
 EXPORT_SYMBOL(copy_page);
 EXPORT_SYMBOL(clear_page);
 
-/* Export string functions. We normally rely on gcc builtin for most of these,
-   but gcc sometimes decides not to inline them. */    
+/*
+ * Export string functions. We normally rely on gcc builtin for most of these,
+ * but gcc sometimes decides not to inline them.
+ */
 #undef memcpy
 #undef memset
 #undef memmove
 
-extern void * memset(void *,int,__kernel_size_t);
-extern void * memcpy(void *,const void *,__kernel_size_t);
-extern void * __memcpy(void *,const void *,__kernel_size_t);
+extern void *memset(void *, int, __kernel_size_t);
+extern void *memcpy(void *, const void *, __kernel_size_t);
+extern void *__memcpy(void *, const void *, __kernel_size_t);
 
 EXPORT_SYMBOL(memset);
 EXPORT_SYMBOL(memcpy);
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 3335b4595efd..af65b2da3ba0 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -661,7 +661,7 @@ static int lguest_clockevent_set_next_event(unsigned long delta,
 	if (delta < LG_CLOCK_MIN_DELTA) {
 		if (printk_ratelimit())
 			printk(KERN_DEBUG "%s: small delta %lu ns\n",
-			       __FUNCTION__, delta);
+			       __func__, delta);
 		return -ETIME;
 	}
 
diff --git a/arch/x86/lib/memcpy_32.c b/arch/x86/lib/memcpy_32.c
index 37756b6fb329..5415a9d06f53 100644
--- a/arch/x86/lib/memcpy_32.c
+++ b/arch/x86/lib/memcpy_32.c
@@ -25,7 +25,7 @@ void *memmove(void *dest, const void *src, size_t n)
 	int d0, d1, d2;
 
 	if (dest < src) {
-		memcpy(dest,src,n);
+		memcpy(dest, src, n);
 	} else {
 		__asm__ __volatile__(
 			"std\n\t"
diff --git a/arch/x86/lib/memmove_64.c b/arch/x86/lib/memmove_64.c
index 80175e47b190..0a33909bf122 100644
--- a/arch/x86/lib/memmove_64.c
+++ b/arch/x86/lib/memmove_64.c
@@ -6,10 +6,10 @@
 #include <linux/module.h>
 
 #undef memmove
-void *memmove(void * dest,const void *src,size_t count)
+void *memmove(void *dest, const void *src, size_t count)
 {
-	if (dest < src) { 
-		return memcpy(dest,src,count);
+	if (dest < src) {
+		return memcpy(dest, src, count);
 	} else {
 		char *p = dest + count;
 		const char *s = src + count;
@@ -17,5 +17,5 @@ void *memmove(void * dest,const void *src,size_t count)
 			*--p = *--s;
 	}
 	return dest;
-} 
+}
 EXPORT_SYMBOL(memmove);
diff --git a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c
index cc9b4a4450f3..c9f2d9ba8dd8 100644
--- a/arch/x86/lib/mmx_32.c
+++ b/arch/x86/lib/mmx_32.c
@@ -1,32 +1,30 @@
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/sched.h>
-#include <linux/hardirq.h>
-#include <linux/module.h>
-
-#include <asm/asm.h>
-#include <asm/i387.h>
-
-
 /*
  *	MMX 3DNow! library helper functions
  *
  *	To do:
- *	We can use MMX just for prefetch in IRQ's. This may be a win. 
+ *	We can use MMX just for prefetch in IRQ's. This may be a win.
  *		(reported so on K6-III)
  *	We should use a better code neutral filler for the short jump
  *		leal ebx. [ebx] is apparently best for K6-2, but Cyrix ??
  *	We also want to clobber the filler register so we don't get any
- *		register forwarding stalls on the filler. 
+ *		register forwarding stalls on the filler.
  *
  *	Add *user handling. Checksums are not a win with MMX on any CPU
  *	tested so far for any MMX solution figured.
  *
- *	22/09/2000 - Arjan van de Ven 
- *		Improved for non-egineering-sample Athlons 
+ *	22/09/2000 - Arjan van de Ven
+ *		Improved for non-egineering-sample Athlons
  *
  */
- 
+#include <linux/hardirq.h>
+#include <linux/string.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+
+#include <asm/i387.h>
+#include <asm/asm.h>
+
 void *_mmx_memcpy(void *to, const void *from, size_t len)
 {
 	void *p;
@@ -51,12 +49,10 @@ void *_mmx_memcpy(void *to, const void *from, size_t len)
 		"3: movw $0x1AEB, 1b\n"	/* jmp on 26 bytes */
 		"   jmp 2b\n"
 		".previous\n"
-		_ASM_EXTABLE(1b,3b)
-		: : "r" (from) );
-		
-	
-	for(; i>5; i--)
-	{
+			_ASM_EXTABLE(1b, 3b)
+			: : "r" (from));
+
+	for ( ; i > 5; i--) {
 		__asm__ __volatile__ (
 		"1:  prefetch 320(%0)\n"
 		"2:  movq (%0), %%mm0\n"
@@ -79,14 +75,14 @@ void *_mmx_memcpy(void *to, const void *from, size_t len)
 		"3: movw $0x05EB, 1b\n"	/* jmp on 5 bytes */
 		"   jmp 2b\n"
 		".previous\n"
-		_ASM_EXTABLE(1b,3b)
-		: : "r" (from), "r" (to) : "memory");
-		from+=64;
-		to+=64;
+			_ASM_EXTABLE(1b, 3b)
+			: : "r" (from), "r" (to) : "memory");
+
+		from += 64;
+		to += 64;
 	}
 
-	for(; i>0; i--)
-	{
+	for ( ; i > 0; i--) {
 		__asm__ __volatile__ (
 		"  movq (%0), %%mm0\n"
 		"  movq 8(%0), %%mm1\n"
@@ -104,17 +100,20 @@ void *_mmx_memcpy(void *to, const void *from, size_t len)
 		"  movq %%mm1, 40(%1)\n"
 		"  movq %%mm2, 48(%1)\n"
 		"  movq %%mm3, 56(%1)\n"
-		: : "r" (from), "r" (to) : "memory");
-		from+=64;
-		to+=64;
+			: : "r" (from), "r" (to) : "memory");
+
+		from += 64;
+		to += 64;
 	}
 	/*
-	 *	Now do the tail of the block
+	 * Now do the tail of the block:
 	 */
-	__memcpy(to, from, len&63);
+	__memcpy(to, from, len & 63);
 	kernel_fpu_end();
+
 	return p;
 }
+EXPORT_SYMBOL(_mmx_memcpy);
 
 #ifdef CONFIG_MK7
 
@@ -128,13 +127,12 @@ static void fast_clear_page(void *page)
 	int i;
 
 	kernel_fpu_begin();
-	
+
 	__asm__ __volatile__ (
 		"  pxor %%mm0, %%mm0\n" : :
 	);
 
-	for(i=0;i<4096/64;i++)
-	{
+	for (i = 0; i < 4096/64; i++) {
 		__asm__ __volatile__ (
 		"  movntq %%mm0, (%0)\n"
 		"  movntq %%mm0, 8(%0)\n"
@@ -145,14 +143,15 @@ static void fast_clear_page(void *page)
 		"  movntq %%mm0, 48(%0)\n"
 		"  movntq %%mm0, 56(%0)\n"
 		: : "r" (page) : "memory");
-		page+=64;
+		page += 64;
 	}
-	/* since movntq is weakly-ordered, a "sfence" is needed to become
-	 * ordered again.
+
+	/*
+	 * Since movntq is weakly-ordered, a "sfence" is needed to become
+	 * ordered again:
 	 */
-	__asm__ __volatile__ (
-		"  sfence \n" : :
-	);
+	__asm__ __volatile__("sfence\n"::);
+
 	kernel_fpu_end();
 }
 
@@ -162,10 +161,11 @@ static void fast_copy_page(void *to, void *from)
 
 	kernel_fpu_begin();
 
-	/* maybe the prefetch stuff can go before the expensive fnsave...
+	/*
+	 * maybe the prefetch stuff can go before the expensive fnsave...
 	 * but that is for later. -AV
 	 */
-	__asm__ __volatile__ (
+	__asm__ __volatile__(
 		"1: prefetch (%0)\n"
 		"   prefetch 64(%0)\n"
 		"   prefetch 128(%0)\n"
@@ -176,11 +176,9 @@ static void fast_copy_page(void *to, void *from)
 		"3: movw $0x1AEB, 1b\n"	/* jmp on 26 bytes */
 		"   jmp 2b\n"
 		".previous\n"
-		_ASM_EXTABLE(1b,3b)
-		: : "r" (from) );
+			_ASM_EXTABLE(1b, 3b) : : "r" (from));
 
-	for(i=0; i<(4096-320)/64; i++)
-	{
+	for (i = 0; i < (4096-320)/64; i++) {
 		__asm__ __volatile__ (
 		"1: prefetch 320(%0)\n"
 		"2: movq (%0), %%mm0\n"
@@ -203,13 +201,13 @@ static void fast_copy_page(void *to, void *from)
 		"3: movw $0x05EB, 1b\n"	/* jmp on 5 bytes */
 		"   jmp 2b\n"
 		".previous\n"
-		_ASM_EXTABLE(1b,3b)
-		: : "r" (from), "r" (to) : "memory");
-		from+=64;
-		to+=64;
+		_ASM_EXTABLE(1b, 3b) : : "r" (from), "r" (to) : "memory");
+
+		from += 64;
+		to += 64;
 	}
-	for(i=(4096-320)/64; i<4096/64; i++)
-	{
+
+	for (i = (4096-320)/64; i < 4096/64; i++) {
 		__asm__ __volatile__ (
 		"2: movq (%0), %%mm0\n"
 		"   movntq %%mm0, (%1)\n"
@@ -227,37 +225,34 @@ static void fast_copy_page(void *to, void *from)
 		"   movntq %%mm6, 48(%1)\n"
 		"   movq 56(%0), %%mm7\n"
 		"   movntq %%mm7, 56(%1)\n"
-		: : "r" (from), "r" (to) : "memory");
-		from+=64;
-		to+=64;
+			: : "r" (from), "r" (to) : "memory");
+		from += 64;
+		to += 64;
 	}
-	/* since movntq is weakly-ordered, a "sfence" is needed to become
-	 * ordered again.
+	/*
+	 * Since movntq is weakly-ordered, a "sfence" is needed to become
+	 * ordered again:
 	 */
-	__asm__ __volatile__ (
-		"  sfence \n" : :
-	);
+	__asm__ __volatile__("sfence \n"::);
 	kernel_fpu_end();
 }
 
-#else
+#else /* CONFIG_MK7 */
 
 /*
  *	Generic MMX implementation without K7 specific streaming
  */
- 
 static void fast_clear_page(void *page)
 {
 	int i;
-	
+
 	kernel_fpu_begin();
-	
+
 	__asm__ __volatile__ (
 		"  pxor %%mm0, %%mm0\n" : :
 	);
 
-	for(i=0;i<4096/128;i++)
-	{
+	for (i = 0; i < 4096/128; i++) {
 		__asm__ __volatile__ (
 		"  movq %%mm0, (%0)\n"
 		"  movq %%mm0, 8(%0)\n"
@@ -275,8 +270,8 @@ static void fast_clear_page(void *page)
 		"  movq %%mm0, 104(%0)\n"
 		"  movq %%mm0, 112(%0)\n"
 		"  movq %%mm0, 120(%0)\n"
-		: : "r" (page) : "memory");
-		page+=128;
+			: : "r" (page) : "memory");
+		page += 128;
 	}
 
 	kernel_fpu_end();
@@ -285,8 +280,7 @@ static void fast_clear_page(void *page)
 static void fast_copy_page(void *to, void *from)
 {
 	int i;
-	
-	
+
 	kernel_fpu_begin();
 
 	__asm__ __volatile__ (
@@ -300,11 +294,9 @@ static void fast_copy_page(void *to, void *from)
 		"3: movw $0x1AEB, 1b\n"	/* jmp on 26 bytes */
 		"   jmp 2b\n"
 		".previous\n"
-		_ASM_EXTABLE(1b,3b)
-		: : "r" (from) );
+			_ASM_EXTABLE(1b, 3b) : : "r" (from));
 
-	for(i=0; i<4096/64; i++)
-	{
+	for (i = 0; i < 4096/64; i++) {
 		__asm__ __volatile__ (
 		"1: prefetch 320(%0)\n"
 		"2: movq (%0), %%mm0\n"
@@ -327,60 +319,59 @@ static void fast_copy_page(void *to, void *from)
 		"3: movw $0x05EB, 1b\n"	/* jmp on 5 bytes */
 		"   jmp 2b\n"
 		".previous\n"
-		_ASM_EXTABLE(1b,3b)
-		: : "r" (from), "r" (to) : "memory");
-		from+=64;
-		to+=64;
+			_ASM_EXTABLE(1b, 3b)
+			: : "r" (from), "r" (to) : "memory");
+
+		from += 64;
+		to += 64;
 	}
 	kernel_fpu_end();
 }
 
-
-#endif
+#endif /* !CONFIG_MK7 */
 
 /*
- *	Favour MMX for page clear and copy. 
+ * Favour MMX for page clear and copy:
  */
-
-static void slow_zero_page(void * page)
+static void slow_zero_page(void *page)
 {
 	int d0, d1;
-	__asm__ __volatile__( \
-		"cld\n\t" \
-		"rep ; stosl" \
-		: "=&c" (d0), "=&D" (d1)
-		:"a" (0),"1" (page),"0" (1024)
-		:"memory");
+
+	__asm__ __volatile__(
+		"cld\n\t"
+		"rep ; stosl"
+
+			: "=&c" (d0), "=&D" (d1)
+			:"a" (0), "1" (page), "0" (1024)
+			:"memory");
 }
- 
-void mmx_clear_page(void * page)
+
+void mmx_clear_page(void *page)
 {
-	if(unlikely(in_interrupt()))
+	if (unlikely(in_interrupt()))
 		slow_zero_page(page);
 	else
 		fast_clear_page(page);
 }
+EXPORT_SYMBOL(mmx_clear_page);
 
 static void slow_copy_page(void *to, void *from)
 {
 	int d0, d1, d2;
-	__asm__ __volatile__( \
-		"cld\n\t" \
-		"rep ; movsl" \
-		: "=&c" (d0), "=&D" (d1), "=&S" (d2) \
-		: "0" (1024),"1" ((long) to),"2" ((long) from) \
+
+	__asm__ __volatile__(
+		"cld\n\t"
+		"rep ; movsl"
+		: "=&c" (d0), "=&D" (d1), "=&S" (d2)
+		: "0" (1024), "1" ((long) to), "2" ((long) from)
 		: "memory");
 }
-  
 
 void mmx_copy_page(void *to, void *from)
 {
-	if(unlikely(in_interrupt()))
+	if (unlikely(in_interrupt()))
 		slow_copy_page(to, from);
 	else
 		fast_copy_page(to, from);
 }
-
-EXPORT_SYMBOL(_mmx_memcpy);
-EXPORT_SYMBOL(mmx_clear_page);
 EXPORT_SYMBOL(mmx_copy_page);
diff --git a/arch/x86/lib/semaphore_32.S b/arch/x86/lib/semaphore_32.S
index 3899bd37fdf0..648fe4741782 100644
--- a/arch/x86/lib/semaphore_32.S
+++ b/arch/x86/lib/semaphore_32.S
@@ -30,89 +30,6 @@
  * value or just clobbered..
  */
 	.section .sched.text, "ax"
-ENTRY(__down_failed)
-	CFI_STARTPROC
-	FRAME
-	pushl %edx
-	CFI_ADJUST_CFA_OFFSET 4
-	CFI_REL_OFFSET edx,0
-	pushl %ecx
-	CFI_ADJUST_CFA_OFFSET 4
-	CFI_REL_OFFSET ecx,0
-	call __down
-	popl %ecx
-	CFI_ADJUST_CFA_OFFSET -4
-	CFI_RESTORE ecx
-	popl %edx
-	CFI_ADJUST_CFA_OFFSET -4
-	CFI_RESTORE edx
-	ENDFRAME
-	ret
-	CFI_ENDPROC
-	ENDPROC(__down_failed)
-
-ENTRY(__down_failed_interruptible)
-	CFI_STARTPROC
-	FRAME
-	pushl %edx
-	CFI_ADJUST_CFA_OFFSET 4
-	CFI_REL_OFFSET edx,0
-	pushl %ecx
-	CFI_ADJUST_CFA_OFFSET 4
-	CFI_REL_OFFSET ecx,0
-	call __down_interruptible
-	popl %ecx
-	CFI_ADJUST_CFA_OFFSET -4
-	CFI_RESTORE ecx
-	popl %edx
-	CFI_ADJUST_CFA_OFFSET -4
-	CFI_RESTORE edx
-	ENDFRAME
-	ret
-	CFI_ENDPROC
-	ENDPROC(__down_failed_interruptible)
-
-ENTRY(__down_failed_trylock)
-	CFI_STARTPROC
-	FRAME
-	pushl %edx
-	CFI_ADJUST_CFA_OFFSET 4
-	CFI_REL_OFFSET edx,0
-	pushl %ecx
-	CFI_ADJUST_CFA_OFFSET 4
-	CFI_REL_OFFSET ecx,0
-	call __down_trylock
-	popl %ecx
-	CFI_ADJUST_CFA_OFFSET -4
-	CFI_RESTORE ecx
-	popl %edx
-	CFI_ADJUST_CFA_OFFSET -4
-	CFI_RESTORE edx
-	ENDFRAME
-	ret
-	CFI_ENDPROC
-	ENDPROC(__down_failed_trylock)
-
-ENTRY(__up_wakeup)
-	CFI_STARTPROC
-	FRAME
-	pushl %edx
-	CFI_ADJUST_CFA_OFFSET 4
-	CFI_REL_OFFSET edx,0
-	pushl %ecx
-	CFI_ADJUST_CFA_OFFSET 4
-	CFI_REL_OFFSET ecx,0
-	call __up
-	popl %ecx
-	CFI_ADJUST_CFA_OFFSET -4
-	CFI_RESTORE ecx
-	popl %edx
-	CFI_ADJUST_CFA_OFFSET -4
-	CFI_RESTORE edx
-	ENDFRAME
-	ret
-	CFI_ENDPROC
-	ENDPROC(__up_wakeup)
 
 /*
  * rw spinlock fallbacks
diff --git a/arch/x86/lib/string_32.c b/arch/x86/lib/string_32.c
index c2c0504a3071..94972e7c094d 100644
--- a/arch/x86/lib/string_32.c
+++ b/arch/x86/lib/string_32.c
@@ -14,25 +14,25 @@
 #include <linux/module.h>
 
 #ifdef __HAVE_ARCH_STRCPY
-char *strcpy(char * dest,const char *src)
+char *strcpy(char *dest, const char *src)
 {
 	int d0, d1, d2;
-	asm volatile( "1:\tlodsb\n\t"
+	asm volatile("1:\tlodsb\n\t"
 		"stosb\n\t"
 		"testb %%al,%%al\n\t"
 		"jne 1b"
 		: "=&S" (d0), "=&D" (d1), "=&a" (d2)
-		:"0" (src),"1" (dest) : "memory");
+		:"0" (src), "1" (dest) : "memory");
 	return dest;
 }
 EXPORT_SYMBOL(strcpy);
 #endif
 
 #ifdef __HAVE_ARCH_STRNCPY
-char *strncpy(char * dest,const char *src,size_t count)
+char *strncpy(char *dest, const char *src, size_t count)
 {
 	int d0, d1, d2, d3;
-	asm volatile( "1:\tdecl %2\n\t"
+	asm volatile("1:\tdecl %2\n\t"
 		"js 2f\n\t"
 		"lodsb\n\t"
 		"stosb\n\t"
@@ -42,17 +42,17 @@ char *strncpy(char * dest,const char *src,size_t count)
 		"stosb\n"
 		"2:"
 		: "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3)
-		:"0" (src),"1" (dest),"2" (count) : "memory");
+		:"0" (src), "1" (dest), "2" (count) : "memory");
 	return dest;
 }
 EXPORT_SYMBOL(strncpy);
 #endif
 
 #ifdef __HAVE_ARCH_STRCAT
-char *strcat(char * dest,const char * src)
+char *strcat(char *dest, const char *src)
 {
 	int d0, d1, d2, d3;
-	asm volatile( "repne\n\t"
+	asm volatile("repne\n\t"
 		"scasb\n\t"
 		"decl %1\n"
 		"1:\tlodsb\n\t"
@@ -67,10 +67,10 @@ EXPORT_SYMBOL(strcat);
 #endif
 
 #ifdef __HAVE_ARCH_STRNCAT
-char *strncat(char * dest,const char * src,size_t count)
+char *strncat(char *dest, const char *src, size_t count)
 {
 	int d0, d1, d2, d3;
-	asm volatile( "repne\n\t"
+	asm volatile("repne\n\t"
 		"scasb\n\t"
 		"decl %1\n\t"
 		"movl %8,%3\n"
@@ -83,7 +83,7 @@ char *strncat(char * dest,const char * src,size_t count)
 		"2:\txorl %2,%2\n\t"
 		"stosb"
 		: "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
-		: "0" (src),"1" (dest),"2" (0),"3" (0xffffffffu), "g" (count)
+		: "0" (src), "1" (dest), "2" (0), "3" (0xffffffffu), "g" (count)
 		: "memory");
 	return dest;
 }
@@ -91,11 +91,11 @@ EXPORT_SYMBOL(strncat);
 #endif
 
 #ifdef __HAVE_ARCH_STRCMP
-int strcmp(const char * cs,const char * ct)
+int strcmp(const char *cs, const char *ct)
 {
 	int d0, d1;
 	int res;
-	asm volatile( "1:\tlodsb\n\t"
+	asm volatile("1:\tlodsb\n\t"
 		"scasb\n\t"
 		"jne 2f\n\t"
 		"testb %%al,%%al\n\t"
@@ -106,7 +106,7 @@ int strcmp(const char * cs,const char * ct)
 		"orb $1,%%al\n"
 		"3:"
 		:"=a" (res), "=&S" (d0), "=&D" (d1)
-		:"1" (cs),"2" (ct)
+		:"1" (cs), "2" (ct)
 		:"memory");
 	return res;
 }
@@ -114,11 +114,11 @@ EXPORT_SYMBOL(strcmp);
 #endif
 
 #ifdef __HAVE_ARCH_STRNCMP
-int strncmp(const char * cs,const char * ct,size_t count)
+int strncmp(const char *cs, const char *ct, size_t count)
 {
 	int res;
 	int d0, d1, d2;
-	asm volatile( "1:\tdecl %3\n\t"
+	asm volatile("1:\tdecl %3\n\t"
 		"js 2f\n\t"
 		"lodsb\n\t"
 		"scasb\n\t"
@@ -131,7 +131,7 @@ int strncmp(const char * cs,const char * ct,size_t count)
 		"orb $1,%%al\n"
 		"4:"
 		:"=a" (res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
-		:"1" (cs),"2" (ct),"3" (count)
+		:"1" (cs), "2" (ct), "3" (count)
 		:"memory");
 	return res;
 }
@@ -139,11 +139,11 @@ EXPORT_SYMBOL(strncmp);
 #endif
 
 #ifdef __HAVE_ARCH_STRCHR
-char *strchr(const char * s, int c)
+char *strchr(const char *s, int c)
 {
 	int d0;
-	char * res;
-	asm volatile( "movb %%al,%%ah\n"
+	char *res;
+	asm volatile("movb %%al,%%ah\n"
 		"1:\tlodsb\n\t"
 		"cmpb %%ah,%%al\n\t"
 		"je 2f\n\t"
@@ -153,7 +153,7 @@ char *strchr(const char * s, int c)
 		"2:\tmovl %1,%0\n\t"
 		"decl %0"
 		:"=a" (res), "=&S" (d0)
-		:"1" (s),"0" (c)
+		:"1" (s), "0" (c)
 		:"memory");
 	return res;
 }
@@ -161,16 +161,16 @@ EXPORT_SYMBOL(strchr);
 #endif
 
 #ifdef __HAVE_ARCH_STRLEN
-size_t strlen(const char * s)
+size_t strlen(const char *s)
 {
 	int d0;
 	int res;
-	asm volatile( "repne\n\t"
+	asm volatile("repne\n\t"
 		"scasb\n\t"
 		"notl %0\n\t"
 		"decl %0"
 		:"=c" (res), "=&D" (d0)
-		:"1" (s),"a" (0), "0" (0xffffffffu)
+		:"1" (s), "a" (0), "0" (0xffffffffu)
 		:"memory");
 	return res;
 }
@@ -178,19 +178,19 @@ EXPORT_SYMBOL(strlen);
 #endif
 
 #ifdef __HAVE_ARCH_MEMCHR
-void *memchr(const void *cs,int c,size_t count)
+void *memchr(const void *cs, int c, size_t count)
 {
 	int d0;
 	void *res;
 	if (!count)
 		return NULL;
-	asm volatile( "repne\n\t"
+	asm volatile("repne\n\t"
 		"scasb\n\t"
 		"je 1f\n\t"
 		"movl $1,%0\n"
 		"1:\tdecl %0"
 		:"=D" (res), "=&c" (d0)
-		:"a" (c),"0" (cs),"1" (count)
+		:"a" (c), "0" (cs), "1" (count)
 		:"memory");
 	return res;
 }
@@ -198,7 +198,7 @@ EXPORT_SYMBOL(memchr);
 #endif
 
 #ifdef __HAVE_ARCH_MEMSCAN
-void *memscan(void * addr, int c, size_t size)
+void *memscan(void *addr, int c, size_t size)
 {
 	if (!size)
 		return addr;
@@ -219,7 +219,7 @@ size_t strnlen(const char *s, size_t count)
 {
 	int d0;
 	int res;
-	asm volatile( "movl %2,%0\n\t"
+	asm volatile("movl %2,%0\n\t"
 		"jmp 2f\n"
 		"1:\tcmpb $0,(%0)\n\t"
 		"je 3f\n\t"
@@ -229,7 +229,7 @@ size_t strnlen(const char *s, size_t count)
 		"jne 1b\n"
 		"3:\tsubl %2,%0"
 		:"=a" (res), "=&d" (d0)
-		:"c" (s),"1" (count)
+		:"c" (s), "1" (count)
 		:"memory");
 	return res;
 }
diff --git a/arch/x86/lib/strstr_32.c b/arch/x86/lib/strstr_32.c
index a3dafbf59dae..42e8a50303f3 100644
--- a/arch/x86/lib/strstr_32.c
+++ b/arch/x86/lib/strstr_32.c
@@ -1,9 +1,9 @@
 #include <linux/string.h>
 
-char * strstr(const char * cs,const char * ct)
+char *strstr(const char *cs, const char *ct)
 {
 int	d0, d1;
-register char * __res;
+register char *__res;
 __asm__ __volatile__(
 	"movl %6,%%edi\n\t"
 	"repne\n\t"
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
index 8b92d428ab02..e009251d4e9f 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/lib/thunk_64.S
@@ -41,11 +41,6 @@
 	thunk rwsem_downgrade_thunk,rwsem_downgrade_wake
 #endif	
 	
-	thunk __down_failed,__down
-	thunk_retrax __down_failed_interruptible,__down_interruptible
-	thunk_retrax __down_failed_trylock,__down_trylock
-	thunk __up_wakeup,__up
-
 #ifdef CONFIG_TRACE_IRQFLAGS
 	thunk trace_hardirqs_on_thunk,trace_hardirqs_on
 	thunk trace_hardirqs_off_thunk,trace_hardirqs_off
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index e849b9998b0e..24e60944971a 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -1,4 +1,4 @@
-/* 
+/*
  * User address space access functions.
  * The non inlined parts of asm-i386/uaccess.h are here.
  *
@@ -22,14 +22,14 @@ static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned lon
 #endif
 	return 1;
 }
-#define movsl_is_ok(a1,a2,n) \
-	__movsl_is_ok((unsigned long)(a1),(unsigned long)(a2),(n))
+#define movsl_is_ok(a1, a2, n) \
+	__movsl_is_ok((unsigned long)(a1), (unsigned long)(a2), (n))
 
 /*
  * Copy a null terminated string from userspace.
  */
 
-#define __do_strncpy_from_user(dst,src,count,res)			   \
+#define __do_strncpy_from_user(dst, src, count, res)			   \
 do {									   \
 	int __d0, __d1, __d2;						   \
 	might_sleep();							   \
@@ -61,7 +61,7 @@ do {									   \
  *         least @count bytes long.
  * @src:   Source address, in user space.
  * @count: Maximum number of bytes to copy, including the trailing NUL.
- * 
+ *
  * Copies a NUL-terminated string from userspace to kernel space.
  * Caller must check the specified block with access_ok() before calling
  * this function.
@@ -90,7 +90,7 @@ EXPORT_SYMBOL(__strncpy_from_user);
  *         least @count bytes long.
  * @src:   Source address, in user space.
  * @count: Maximum number of bytes to copy, including the trailing NUL.
- * 
+ *
  * Copies a NUL-terminated string from userspace to kernel space.
  *
  * On success, returns the length of the string (not including the trailing
@@ -120,7 +120,7 @@ EXPORT_SYMBOL(strncpy_from_user);
 do {									\
 	int __d0;							\
 	might_sleep();							\
-  	__asm__ __volatile__(						\
+	__asm__ __volatile__(						\
 		"0:	rep; stosl\n"					\
 		"	movl %2,%0\n"					\
 		"1:	rep; stosb\n"					\
@@ -333,17 +333,17 @@ __copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size)
 	__asm__ __volatile__(
 		       "        .align 2,0x90\n"
 		       "0:      movl 32(%4), %%eax\n"
-		       "        cmpl $67, %0\n"      
-		       "        jbe 2f\n"            
+		       "        cmpl $67, %0\n"
+		       "        jbe 2f\n"
 		       "1:      movl 64(%4), %%eax\n"
-		       "        .align 2,0x90\n"     
-		       "2:      movl 0(%4), %%eax\n" 
-		       "21:     movl 4(%4), %%edx\n" 
-		       "        movl %%eax, 0(%3)\n" 
-		       "        movl %%edx, 4(%3)\n" 
-		       "3:      movl 8(%4), %%eax\n" 
-		       "31:     movl 12(%4),%%edx\n" 
-		       "        movl %%eax, 8(%3)\n" 
+		       "        .align 2,0x90\n"
+		       "2:      movl 0(%4), %%eax\n"
+		       "21:     movl 4(%4), %%edx\n"
+		       "        movl %%eax, 0(%3)\n"
+		       "        movl %%edx, 4(%3)\n"
+		       "3:      movl 8(%4), %%eax\n"
+		       "31:     movl 12(%4),%%edx\n"
+		       "        movl %%eax, 8(%3)\n"
 		       "        movl %%edx, 12(%3)\n"
 		       "4:      movl 16(%4), %%eax\n"
 		       "41:     movl 20(%4), %%edx\n"
@@ -369,38 +369,38 @@ __copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size)
 		       "91:     movl 60(%4), %%edx\n"
 		       "        movl %%eax, 56(%3)\n"
 		       "        movl %%edx, 60(%3)\n"
-		       "        addl $-64, %0\n"     
-		       "        addl $64, %4\n"      
-		       "        addl $64, %3\n"      
-		       "        cmpl $63, %0\n"      
-		       "        ja  0b\n"            
-		       "5:      movl  %0, %%eax\n"   
-		       "        shrl  $2, %0\n"      
-		       "        andl $3, %%eax\n"    
-		       "        cld\n"               
-		       "6:      rep; movsl\n"   
+		       "        addl $-64, %0\n"
+		       "        addl $64, %4\n"
+		       "        addl $64, %3\n"
+		       "        cmpl $63, %0\n"
+		       "        ja  0b\n"
+		       "5:      movl  %0, %%eax\n"
+		       "        shrl  $2, %0\n"
+		       "        andl $3, %%eax\n"
+		       "        cld\n"
+		       "6:      rep; movsl\n"
 		       "        movl %%eax,%0\n"
-		       "7:      rep; movsb\n"	
-		       "8:\n"			
+		       "7:      rep; movsb\n"
+		       "8:\n"
 		       ".section .fixup,\"ax\"\n"
-		       "9:      lea 0(%%eax,%0,4),%0\n"	
-		       "16:     pushl %0\n"	
-		       "        pushl %%eax\n"	
+		       "9:      lea 0(%%eax,%0,4),%0\n"
+		       "16:     pushl %0\n"
+		       "        pushl %%eax\n"
 		       "        xorl %%eax,%%eax\n"
-		       "        rep; stosb\n"	
-		       "        popl %%eax\n"	
-		       "        popl %0\n"	
-		       "        jmp 8b\n"	
-		       ".previous\n"		
+		       "        rep; stosb\n"
+		       "        popl %%eax\n"
+		       "        popl %0\n"
+		       "        jmp 8b\n"
+		       ".previous\n"
 		       ".section __ex_table,\"a\"\n"
-		       "	.align 4\n"	   
-		       "	.long 0b,16b\n"	 
+		       "	.align 4\n"
+		       "	.long 0b,16b\n"
 		       "	.long 1b,16b\n"
 		       "	.long 2b,16b\n"
 		       "	.long 21b,16b\n"
-		       "	.long 3b,16b\n"	
+		       "	.long 3b,16b\n"
 		       "	.long 31b,16b\n"
-		       "	.long 4b,16b\n"	
+		       "	.long 4b,16b\n"
 		       "	.long 41b,16b\n"
 		       "	.long 10b,16b\n"
 		       "	.long 51b,16b\n"
@@ -412,9 +412,9 @@ __copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size)
 		       "	.long 81b,16b\n"
 		       "	.long 14b,16b\n"
 		       "	.long 91b,16b\n"
-		       "	.long 6b,9b\n"	
-		       "        .long 7b,16b\n" 
-		       ".previous"		
+		       "	.long 6b,9b\n"
+		       "        .long 7b,16b\n"
+		       ".previous"
 		       : "=&c"(size), "=&D" (d0), "=&S" (d1)
 		       :  "1"(to), "2"(from), "0"(size)
 		       : "eax", "edx", "memory");
@@ -429,7 +429,7 @@ __copy_user_zeroing_intel(void *to, const void __user *from, unsigned long size)
 static unsigned long __copy_user_zeroing_intel_nocache(void *to,
 				const void __user *from, unsigned long size)
 {
-        int d0, d1;
+	int d0, d1;
 
 	__asm__ __volatile__(
 	       "        .align 2,0x90\n"
@@ -526,7 +526,7 @@ static unsigned long __copy_user_zeroing_intel_nocache(void *to,
 static unsigned long __copy_user_intel_nocache(void *to,
 				const void __user *from, unsigned long size)
 {
-        int d0, d1;
+	int d0, d1;
 
 	__asm__ __volatile__(
 	       "        .align 2,0x90\n"
@@ -629,7 +629,7 @@ unsigned long __copy_user_zeroing_intel_nocache(void *to,
 #endif /* CONFIG_X86_INTEL_USERCOPY */
 
 /* Generic arbitrary sized copy.  */
-#define __copy_user(to,from,size)					\
+#define __copy_user(to, from, size)					\
 do {									\
 	int __d0, __d1, __d2;						\
 	__asm__ __volatile__(						\
@@ -665,7 +665,7 @@ do {									\
 		: "memory");						\
 } while (0)
 
-#define __copy_user_zeroing(to,from,size)				\
+#define __copy_user_zeroing(to, from, size)				\
 do {									\
 	int __d0, __d1, __d2;						\
 	__asm__ __volatile__(						\
@@ -712,7 +712,7 @@ unsigned long __copy_to_user_ll(void __user *to, const void *from,
 {
 #ifndef CONFIG_X86_WP_WORKS_OK
 	if (unlikely(boot_cpu_data.wp_works_ok == 0) &&
-			((unsigned long )to) < TASK_SIZE) {
+			((unsigned long)to) < TASK_SIZE) {
 		/*
 		 * When we are in an atomic section (see
 		 * mm/filemap.c:file_read_actor), return the full
@@ -721,26 +721,26 @@ unsigned long __copy_to_user_ll(void __user *to, const void *from,
 		if (in_atomic())
 			return n;
 
-		/* 
+		/*
 		 * CPU does not honor the WP bit when writing
 		 * from supervisory mode, and due to preemption or SMP,
 		 * the page tables can change at any time.
 		 * Do it manually.	Manfred <manfred@colorfullife.com>
 		 */
 		while (n) {
-		      	unsigned long offset = ((unsigned long)to)%PAGE_SIZE;
+			unsigned long offset = ((unsigned long)to)%PAGE_SIZE;
 			unsigned long len = PAGE_SIZE - offset;
 			int retval;
 			struct page *pg;
 			void *maddr;
-			
+
 			if (len > n)
 				len = n;
 
 survive:
 			down_read(&current->mm->mmap_sem);
 			retval = get_user_pages(current, current->mm,
-					(unsigned long )to, 1, 1, 0, &pg, NULL);
+					(unsigned long)to, 1, 1, 0, &pg, NULL);
 
 			if (retval == -ENOMEM && is_global_init(current)) {
 				up_read(&current->mm->mmap_sem);
@@ -750,8 +750,8 @@ survive:
 
 			if (retval != 1) {
 				up_read(&current->mm->mmap_sem);
-		       		break;
-		       	}
+				break;
+			}
 
 			maddr = kmap_atomic(pg, KM_USER0);
 			memcpy(maddr + offset, from, len);
@@ -802,12 +802,12 @@ unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from,
 					unsigned long n)
 {
 #ifdef CONFIG_X86_INTEL_USERCOPY
-	if ( n > 64 && cpu_has_xmm2)
-                n = __copy_user_zeroing_intel_nocache(to, from, n);
+	if (n > 64 && cpu_has_xmm2)
+		n = __copy_user_zeroing_intel_nocache(to, from, n);
 	else
 		__copy_user_zeroing(to, from, n);
 #else
-        __copy_user_zeroing(to, from, n);
+	__copy_user_zeroing(to, from, n);
 #endif
 	return n;
 }
@@ -817,12 +817,12 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr
 					unsigned long n)
 {
 #ifdef CONFIG_X86_INTEL_USERCOPY
-	if ( n > 64 && cpu_has_xmm2)
-                n = __copy_user_intel_nocache(to, from, n);
+	if (n > 64 && cpu_has_xmm2)
+		n = __copy_user_intel_nocache(to, from, n);
 	else
 		__copy_user(to, from, n);
 #else
-        __copy_user(to, from, n);
+	__copy_user(to, from, n);
 #endif
 	return n;
 }
diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c
index 292a225edabe..95fc463056d0 100644
--- a/arch/x86/mach-generic/bigsmp.c
+++ b/arch/x86/mach-generic/bigsmp.c
@@ -1,4 +1,4 @@
-/* 
+/*
  * APIC driver for "bigsmp" XAPIC machines with more than 8 virtual CPUs.
  * Drives the local APIC in "clustered mode".
  */
@@ -32,26 +32,26 @@ static int hp_ht_bigsmp(const struct dmi_system_id *d)
 
 
 static const struct dmi_system_id bigsmp_dmi_table[] = {
-	{ hp_ht_bigsmp, "HP ProLiant DL760 G2", {
-		DMI_MATCH(DMI_BIOS_VENDOR, "HP"),
-		DMI_MATCH(DMI_BIOS_VERSION, "P44-"),
-	}},
-
-	{ hp_ht_bigsmp, "HP ProLiant DL740", {
-		DMI_MATCH(DMI_BIOS_VENDOR, "HP"),
-		DMI_MATCH(DMI_BIOS_VERSION, "P47-"),
-	 }},
+	{ hp_ht_bigsmp, "HP ProLiant DL760 G2",
+	{ DMI_MATCH(DMI_BIOS_VENDOR, "HP"),
+	DMI_MATCH(DMI_BIOS_VERSION, "P44-"),}
+	},
+
+	{ hp_ht_bigsmp, "HP ProLiant DL740",
+	{ DMI_MATCH(DMI_BIOS_VENDOR, "HP"),
+	DMI_MATCH(DMI_BIOS_VERSION, "P47-"),}
+	},
 	 { }
 };
 
 
 static int probe_bigsmp(void)
-{ 
+{
 	if (def_to_bigsmp)
-        	dmi_bigsmp = 1;
+	dmi_bigsmp = 1;
 	else
 		dmi_check_system(bigsmp_dmi_table);
-	return dmi_bigsmp; 
-} 
+	return dmi_bigsmp;
+}
 
-struct genapic apic_bigsmp = APIC_INIT("bigsmp", probe_bigsmp); 
+struct genapic apic_bigsmp = APIC_INIT("bigsmp", probe_bigsmp);
diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c
index 1af0cc7648f0..9e835a11a13a 100644
--- a/arch/x86/mach-generic/default.c
+++ b/arch/x86/mach-generic/default.c
@@ -1,4 +1,4 @@
-/* 
+/*
  * Default generic APIC driver. This handles up to 8 CPUs.
  */
 #define APIC_DEFINITION 1
@@ -19,8 +19,8 @@
 
 /* should be called last. */
 static int probe_default(void)
-{ 
+{
 	return 1;
-} 
+}
 
-struct genapic apic_default = APIC_INIT("default", probe_default); 
+struct genapic apic_default = APIC_INIT("default", probe_default);
diff --git a/arch/x86/mach-generic/probe.c b/arch/x86/mach-generic/probe.c
index f410d3cb5659..c5ae751b994a 100644
--- a/arch/x86/mach-generic/probe.c
+++ b/arch/x86/mach-generic/probe.c
@@ -1,8 +1,9 @@
-/* Copyright 2003 Andi Kleen, SuSE Labs. 
- * Subject to the GNU Public License, v.2 
- * 
+/*
+ * Copyright 2003 Andi Kleen, SuSE Labs.
+ * Subject to the GNU Public License, v.2
+ *
  * Generic x86 APIC driver probe layer.
- */  
+ */
 #include <linux/threads.h>
 #include <linux/cpumask.h>
 #include <linux/string.h>
@@ -24,7 +25,7 @@ struct genapic *genapic = &apic_default;
 
 static struct genapic *apic_probe[] __initdata = {
 	&apic_summit,
-	&apic_bigsmp, 
+	&apic_bigsmp,
 	&apic_es7000,
 	&apic_default,	/* must be last */
 	NULL,
@@ -69,7 +70,7 @@ void __init generic_bigsmp_probe(void)
 }
 
 void __init generic_apic_probe(void)
-{ 
+{
 	if (!cmdline_apic) {
 		int i;
 		for (i = 0; apic_probe[i]; i++) {
@@ -83,40 +84,40 @@ void __init generic_apic_probe(void)
 			panic("Didn't find an APIC driver");
 	}
 	printk(KERN_INFO "Using APIC driver %s\n", genapic->name);
-} 
+}
 
 /* These functions can switch the APIC even after the initial ->probe() */
 
 int __init mps_oem_check(struct mp_config_table *mpc, char *oem, char *productid)
-{ 
+{
 	int i;
-	for (i = 0; apic_probe[i]; ++i) { 
-		if (apic_probe[i]->mps_oem_check(mpc,oem,productid)) { 
+	for (i = 0; apic_probe[i]; ++i) {
+		if (apic_probe[i]->mps_oem_check(mpc, oem, productid)) {
 			if (!cmdline_apic) {
 				genapic = apic_probe[i];
 				printk(KERN_INFO "Switched to APIC driver `%s'.\n",
 				       genapic->name);
 			}
 			return 1;
-		} 
-	} 
+		}
+	}
 	return 0;
-} 
+}
 
 int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 {
 	int i;
-	for (i = 0; apic_probe[i]; ++i) { 
-		if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) { 
+	for (i = 0; apic_probe[i]; ++i) {
+		if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) {
 			if (!cmdline_apic) {
 				genapic = apic_probe[i];
 				printk(KERN_INFO "Switched to APIC driver `%s'.\n",
 				       genapic->name);
 			}
 			return 1;
-		} 
-	} 
-	return 0;	
+		}
+	}
+	return 0;
 }
 
 int hard_smp_processor_id(void)
diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c
index 74883ccb8f73..a97ea0f35b1e 100644
--- a/arch/x86/mach-generic/summit.c
+++ b/arch/x86/mach-generic/summit.c
@@ -1,4 +1,4 @@
-/* 
+/*
  * APIC driver for the IBM "Summit" chipset.
  */
 #define APIC_DEFINITION 1
@@ -19,9 +19,9 @@
 #include <asm/mach-summit/mach_mpparse.h>
 
 static int probe_summit(void)
-{ 
+{
 	/* probed later in mptable/ACPI hooks */
 	return 0;
-} 
+}
 
-struct genapic apic_summit = APIC_INIT("summit", probe_summit); 
+struct genapic apic_summit = APIC_INIT("summit", probe_summit);
diff --git a/arch/x86/mach-rdc321x/Makefile b/arch/x86/mach-rdc321x/Makefile
index 1faac8125e3d..8325b4ca431c 100644
--- a/arch/x86/mach-rdc321x/Makefile
+++ b/arch/x86/mach-rdc321x/Makefile
@@ -1,5 +1,5 @@
 #
 # Makefile for the RDC321x specific parts of the kernel
 #
-obj-$(CONFIG_X86_RDC321X)        := gpio.o platform.o wdt.o
+obj-$(CONFIG_X86_RDC321X)        := gpio.o platform.o
 
diff --git a/arch/x86/mach-rdc321x/wdt.c b/arch/x86/mach-rdc321x/wdt.c
deleted file mode 100644
index ec5625ae7061..000000000000
--- a/arch/x86/mach-rdc321x/wdt.c
+++ /dev/null
@@ -1,275 +0,0 @@
-/*
- * RDC321x watchdog driver
- *
- * Copyright (C) 2007 Florian Fainelli <florian@openwrt.org>
- *
- * This driver is highly inspired from the cpu5_wdt driver
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/miscdevice.h>
-#include <linux/fs.h>
-#include <linux/init.h>
-#include <linux/ioport.h>
-#include <linux/timer.h>
-#include <linux/completion.h>
-#include <linux/jiffies.h>
-#include <linux/platform_device.h>
-#include <linux/watchdog.h>
-#include <linux/io.h>
-#include <linux/uaccess.h>
-
-#include <asm/mach-rdc321x/rdc321x_defs.h>
-
-#define RDC_WDT_MASK	0x80000000 /* Mask */
-#define RDC_WDT_EN	0x00800000 /* Enable bit */
-#define RDC_WDT_WTI	0x00200000 /* Generate CPU reset/NMI/WDT on timeout */
-#define RDC_WDT_RST	0x00100000 /* Reset bit */
-#define RDC_WDT_WIF	0x00040000 /* WDT IRQ Flag */
-#define RDC_WDT_IRT	0x00000100 /* IRQ Routing table */
-#define RDC_WDT_CNT	0x00000001 /* WDT count */
-
-#define RDC_CLS_TMR	0x80003844 /* Clear timer */
-
-#define RDC_WDT_INTERVAL	(HZ/10+1)
-
-int nowayout = WATCHDOG_NOWAYOUT;
-module_param(nowayout, int, 0);
-MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
-
-static int ticks = 1000;
-
-/* some device data */
-
-static struct {
-	struct completion stop;
-	volatile int running;
-	struct timer_list timer;
-	volatile int queue;
-	int default_ticks;
-	unsigned long inuse;
-} rdc321x_wdt_device;
-
-/* generic helper functions */
-
-static void rdc321x_wdt_trigger(unsigned long unused)
-{
-	if (rdc321x_wdt_device.running)
-		ticks--;
-
-	/* keep watchdog alive */
-	outl(RDC_WDT_EN|inl(RDC3210_CFGREG_DATA), RDC3210_CFGREG_DATA);
-
-	/* requeue?? */
-	if (rdc321x_wdt_device.queue && ticks)
-		mod_timer(&rdc321x_wdt_device.timer,
-				jiffies + RDC_WDT_INTERVAL);
-	else {
-		/* ticks doesn't matter anyway */
-		complete(&rdc321x_wdt_device.stop);
-	}
-
-}
-
-static void rdc321x_wdt_reset(void)
-{
-	ticks = rdc321x_wdt_device.default_ticks;
-}
-
-static void rdc321x_wdt_start(void)
-{
-	if (!rdc321x_wdt_device.queue) {
-		rdc321x_wdt_device.queue = 1;
-
-		/* Clear the timer */
-		outl(RDC_CLS_TMR, RDC3210_CFGREG_ADDR);
-
-		/* Enable watchdog and set the timeout to 81.92 us */
-		outl(RDC_WDT_EN|RDC_WDT_CNT, RDC3210_CFGREG_DATA);
-
-		mod_timer(&rdc321x_wdt_device.timer,
-				jiffies + RDC_WDT_INTERVAL);
-	}
-
-	/* if process dies, counter is not decremented */
-	rdc321x_wdt_device.running++;
-}
-
-static int rdc321x_wdt_stop(void)
-{
-	if (rdc321x_wdt_device.running)
-		rdc321x_wdt_device.running = 0;
-
-	ticks = rdc321x_wdt_device.default_ticks;
-
-	return -EIO;
-}
-
-/* filesystem operations */
-
-static int rdc321x_wdt_open(struct inode *inode, struct file *file)
-{
-	if (test_and_set_bit(0, &rdc321x_wdt_device.inuse))
-		return -EBUSY;
-
-	return nonseekable_open(inode, file);
-}
-
-static int rdc321x_wdt_release(struct inode *inode, struct file *file)
-{
-	clear_bit(0, &rdc321x_wdt_device.inuse);
-	return 0;
-}
-
-static int rdc321x_wdt_ioctl(struct inode *inode, struct file *file,
-				unsigned int cmd, unsigned long arg)
-{
-	void __user *argp = (void __user *)arg;
-	unsigned int value;
-	static struct watchdog_info ident = {
-		.options = WDIOF_CARDRESET,
-		.identity = "RDC321x WDT",
-	};
-
-	switch (cmd) {
-	case WDIOC_KEEPALIVE:
-		rdc321x_wdt_reset();
-		break;
-	case WDIOC_GETSTATUS:
-		/* Read the value from the DATA register */
-		value = inl(RDC3210_CFGREG_DATA);
-		if (copy_to_user(argp, &value, sizeof(int)))
-			return -EFAULT;
-		break;
-	case WDIOC_GETSUPPORT:
-		if (copy_to_user(argp, &ident, sizeof(ident)))
-			return -EFAULT;
-		break;
-	case WDIOC_SETOPTIONS:
-		if (copy_from_user(&value, argp, sizeof(int)))
-			return -EFAULT;
-		switch (value) {
-		case WDIOS_ENABLECARD:
-			rdc321x_wdt_start();
-			break;
-		case WDIOS_DISABLECARD:
-			return rdc321x_wdt_stop();
-		default:
-			return -EINVAL;
-		}
-		break;
-	default:
-		return -ENOTTY;
-	}
-	return 0;
-}
-
-static ssize_t rdc321x_wdt_write(struct file *file, const char __user *buf,
-				size_t count, loff_t *ppos)
-{
-	if (!count)
-		return -EIO;
-
-	rdc321x_wdt_reset();
-
-	return count;
-}
-
-static const struct file_operations rdc321x_wdt_fops = {
-	.owner		= THIS_MODULE,
-	.llseek		= no_llseek,
-	.ioctl		= rdc321x_wdt_ioctl,
-	.open		= rdc321x_wdt_open,
-	.write		= rdc321x_wdt_write,
-	.release	= rdc321x_wdt_release,
-};
-
-static struct miscdevice rdc321x_wdt_misc = {
-	.minor	= WATCHDOG_MINOR,
-	.name	= "watchdog",
-	.fops	= &rdc321x_wdt_fops,
-};
-
-static int __devinit rdc321x_wdt_probe(struct platform_device *pdev)
-{
-	int err;
-
-	err = misc_register(&rdc321x_wdt_misc);
-	if (err < 0) {
-		printk(KERN_ERR PFX "watchdog misc_register failed\n");
-		return err;
-	}
-
-	/* Reset the watchdog */
-	outl(RDC_WDT_RST, RDC3210_CFGREG_DATA);
-
-	init_completion(&rdc321x_wdt_device.stop);
-	rdc321x_wdt_device.queue = 0;
-
-	clear_bit(0, &rdc321x_wdt_device.inuse);
-
-	setup_timer(&rdc321x_wdt_device.timer, rdc321x_wdt_trigger, 0);
-
-	rdc321x_wdt_device.default_ticks = ticks;
-
-	printk(KERN_INFO PFX "watchdog init success\n");
-
-	return 0;
-}
-
-static int rdc321x_wdt_remove(struct platform_device *pdev)
-{
-	if (rdc321x_wdt_device.queue) {
-		rdc321x_wdt_device.queue = 0;
-		wait_for_completion(&rdc321x_wdt_device.stop);
-	}
-
-	misc_deregister(&rdc321x_wdt_misc);
-
-	return 0;
-}
-
-static struct platform_driver rdc321x_wdt_driver = {
-	.probe = rdc321x_wdt_probe,
-	.remove = rdc321x_wdt_remove,
-	.driver = {
-		.owner = THIS_MODULE,
-		.name = "rdc321x-wdt",
-	},
-};
-
-static int __init rdc321x_wdt_init(void)
-{
-	return platform_driver_register(&rdc321x_wdt_driver);
-}
-
-static void __exit rdc321x_wdt_exit(void)
-{
-	platform_driver_unregister(&rdc321x_wdt_driver);
-}
-
-module_init(rdc321x_wdt_init);
-module_exit(rdc321x_wdt_exit);
-
-MODULE_AUTHOR("Florian Fainelli <florian@openwrt.org>");
-MODULE_DESCRIPTION("RDC321x watchdog driver");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR);
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 3cc8eb2f36a9..be7235bf105d 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -27,6 +27,7 @@
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 #include <asm/arch_hooks.h>
+#include <asm/trampoline.h>
 
 /* TLB state -- visible externally, indexed physically */
 DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { &init_mm, 0 };
@@ -210,7 +211,7 @@ static int cpucount = 0;
 /* steal a page from the bottom of memory for the trampoline and
  * squirrel its address away here.  This will be in kernel virtual
  * space */
-static __u32 trampoline_base;
+unsigned char *trampoline_base;
 
 /* The per cpu profile stuff - used in smp_local_timer_interrupt */
 static DEFINE_PER_CPU(int, prof_multiplier) = 1;
@@ -429,15 +430,15 @@ void __init smp_store_cpu_info(int id)
 }
 
 /* set up the trampoline and return the physical address of the code */
-static __u32 __init setup_trampoline(void)
+unsigned long __init setup_trampoline(void)
 {
 	/* these two are global symbols in trampoline.S */
 	extern const __u8 trampoline_end[];
 	extern const __u8 trampoline_data[];
 
-	memcpy((__u8 *) trampoline_base, trampoline_data,
+	memcpy(trampoline_base, trampoline_data,
 	       trampoline_end - trampoline_data);
-	return virt_to_phys((__u8 *) trampoline_base);
+	return virt_to_phys(trampoline_base);
 }
 
 /* Routine initially called when a non-boot CPU is brought online */
@@ -520,13 +521,6 @@ static void __init do_boot_cpu(__u8 cpu)
 	    & ~(voyager_extended_vic_processors
 		& voyager_allowed_boot_processors);
 
-	/* This is an area in head.S which was used to set up the
-	 * initial kernel stack.  We need to alter this to give the
-	 * booting CPU a new stack (taken from its idle process) */
-	extern struct {
-		__u8 *sp;
-		unsigned short ss;
-	} stack_start;
 	/* This is the format of the CPI IDT gate (in real mode) which
 	 * we're hijacking to boot the CPU */
 	union IDTFormat {
@@ -1166,7 +1160,7 @@ void flush_tlb_all(void)
  * is sorted out */
 void __init smp_alloc_memory(void)
 {
-	trampoline_base = (__u32) alloc_bootmem_low_pages(PAGE_SIZE);
+	trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE);
 	if (__pa(trampoline_base) >= 0x93000)
 		BUG();
 }
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c
index 760baeea5f07..4bab3b145392 100644
--- a/arch/x86/math-emu/fpu_entry.c
+++ b/arch/x86/math-emu/fpu_entry.c
@@ -276,6 +276,7 @@ asmlinkage void math_emulate(long arg)
 	entry_sel_off.offset = FPU_ORIG_EIP;
 	entry_sel_off.selector = FPU_CS;
 	entry_sel_off.opcode = (byte1 << 8) | FPU_modrm;
+	entry_sel_off.empty = 0;
 
 	FPU_rm = FPU_modrm & 7;
 
diff --git a/arch/x86/math-emu/reg_ld_str.c b/arch/x86/math-emu/reg_ld_str.c
index 799d4af5be66..02af772a24db 100644
--- a/arch/x86/math-emu/reg_ld_str.c
+++ b/arch/x86/math-emu/reg_ld_str.c
@@ -383,15 +383,15 @@ int FPU_store_double(FPU_REG *st0_ptr, u_char st0_tag, double __user *dfloat)
 	int exp;
 	FPU_REG tmp;
 
+	l[0] = 0;
+	l[1] = 0;
 	if (st0_tag == TAG_Valid) {
 		reg_copy(st0_ptr, &tmp);
 		exp = exponent(&tmp);
 
 		if (exp < DOUBLE_Emin) {	/* It may be a denormal */
 			addexponent(&tmp, -DOUBLE_Emin + 52);	/* largest exp to be 51 */
-
-		      denormal_arg:
-
+denormal_arg:
 			if ((precision_loss = FPU_round_to_int(&tmp, st0_tag))) {
 #ifdef PECULIAR_486
 				/* Did it round to a non-denormal ? */
@@ -477,8 +477,7 @@ int FPU_store_double(FPU_REG *st0_ptr, u_char st0_tag, double __user *dfloat)
 
 				/* This is a special case: see sec 16.2.5.1 of the 80486 book */
 				/* Overflow to infinity */
-				l[0] = 0x00000000;	/* Set to */
-				l[1] = 0x7ff00000;	/* + INF */
+				l[1] = 0x7ff00000;	/* Set to + INF */
 			} else {
 				if (precision_loss) {
 					if (increment)
@@ -492,8 +491,6 @@ int FPU_store_double(FPU_REG *st0_ptr, u_char st0_tag, double __user *dfloat)
 		}
 	} else if (st0_tag == TAG_Zero) {
 		/* Number is zero */
-		l[0] = 0;
-		l[1] = 0;
 	} else if (st0_tag == TAG_Special) {
 		st0_tag = FPU_Special(st0_ptr);
 		if (st0_tag == TW_Denormal) {
@@ -508,7 +505,6 @@ int FPU_store_double(FPU_REG *st0_ptr, u_char st0_tag, double __user *dfloat)
 			reg_copy(st0_ptr, &tmp);
 			goto denormal_arg;
 		} else if (st0_tag == TW_Infinity) {
-			l[0] = 0;
 			l[1] = 0x7ff00000;
 		} else if (st0_tag == TW_NaN) {
 			/* Is it really a NaN ? */
@@ -532,7 +528,6 @@ int FPU_store_double(FPU_REG *st0_ptr, u_char st0_tag, double __user *dfloat)
 				EXCEPTION(EX_Invalid);
 				if (!(control_word & CW_Invalid))
 					return 0;
-				l[0] = 0;
 				l[1] = 0xfff80000;
 			}
 		}
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 983291096848..20941d2954e2 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,5 +1,17 @@
+obj-y	:=  init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
+	    pat.o
+
+obj-$(CONFIG_X86_32)		+= pgtable_32.o
+
+obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
+obj-$(CONFIG_X86_PTDUMP)	+= dump_pagetables.o
+
+obj-$(CONFIG_HIGHMEM)		+= highmem_32.o
+
 ifeq ($(CONFIG_X86_32),y)
-include ${srctree}/arch/x86/mm/Makefile_32
+obj-$(CONFIG_NUMA)		+= discontig_32.o
 else
-include ${srctree}/arch/x86/mm/Makefile_64
+obj-$(CONFIG_NUMA)		+= numa_64.o
+obj-$(CONFIG_K8_NUMA)		+= k8topology_64.o
+obj-$(CONFIG_ACPI_NUMA)		+= srat_64.o
 endif
diff --git a/arch/x86/mm/Makefile_32 b/arch/x86/mm/Makefile_32
deleted file mode 100644
index c36ae88bb543..000000000000
--- a/arch/x86/mm/Makefile_32
+++ /dev/null
@@ -1,9 +0,0 @@
-#
-# Makefile for the linux i386-specific parts of the memory manager.
-#
-
-obj-y	:= init_32.o pgtable_32.o fault.o ioremap.o extable.o pageattr.o mmap.o
-
-obj-$(CONFIG_NUMA) += discontig_32.o
-obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
-obj-$(CONFIG_HIGHMEM) += highmem_32.o
diff --git a/arch/x86/mm/Makefile_64 b/arch/x86/mm/Makefile_64
deleted file mode 100644
index 688c8c28ac8f..000000000000
--- a/arch/x86/mm/Makefile_64
+++ /dev/null
@@ -1,9 +0,0 @@
-#
-# Makefile for the linux x86_64-specific parts of the memory manager.
-#
-
-obj-y	 := init_64.o fault.o ioremap.o extable.o pageattr.o mmap.o
-obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
-obj-$(CONFIG_NUMA) += numa_64.o
-obj-$(CONFIG_K8_NUMA) += k8topology_64.o
-obj-$(CONFIG_ACPI_NUMA) += srat_64.o
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index 8e25e06ff730..eba0bbede7a6 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -37,7 +37,7 @@
 #include <asm/e820.h>
 #include <asm/setup.h>
 #include <asm/mmzone.h>
-#include <bios_ebda.h>
+#include <asm/bios_ebda.h>
 
 struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
 EXPORT_SYMBOL(node_data);
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
new file mode 100644
index 000000000000..6791b8334bc6
--- /dev/null
+++ b/arch/x86/mm/dump_pagetables.c
@@ -0,0 +1,354 @@
+/*
+ * Debug helper to dump the current kernel pagetables of the system
+ * so that we can see what the various memory ranges are set to.
+ *
+ * (C) Copyright 2008 Intel Corporation
+ *
+ * Author: Arjan van de Ven <arjan@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+
+#include <asm/pgtable.h>
+
+/*
+ * The dumper groups pagetable entries of the same type into one, and for
+ * that it needs to keep some state when walking, and flush this state
+ * when a "break" in the continuity is found.
+ */
+struct pg_state {
+	int level;
+	pgprot_t current_prot;
+	unsigned long start_address;
+	unsigned long current_address;
+	const struct addr_marker *marker;
+};
+
+struct addr_marker {
+	unsigned long start_address;
+	const char *name;
+};
+
+/* Address space markers hints */
+static struct addr_marker address_markers[] = {
+	{ 0, "User Space" },
+#ifdef CONFIG_X86_64
+	{ 0x8000000000000000UL, "Kernel Space" },
+	{ 0xffff810000000000UL, "Low Kernel Mapping" },
+	{ VMALLOC_START,        "vmalloc() Area" },
+	{ VMEMMAP_START,        "Vmemmap" },
+	{ __START_KERNEL_map,   "High Kernel Mapping" },
+	{ MODULES_VADDR,        "Modules" },
+	{ MODULES_END,          "End Modules" },
+#else
+	{ PAGE_OFFSET,          "Kernel Mapping" },
+	{ 0/* VMALLOC_START */, "vmalloc() Area" },
+	{ 0/*VMALLOC_END*/,     "vmalloc() End" },
+# ifdef CONFIG_HIGHMEM
+	{ 0/*PKMAP_BASE*/,      "Persisent kmap() Area" },
+# endif
+	{ 0/*FIXADDR_START*/,   "Fixmap Area" },
+#endif
+	{ -1, NULL }		/* End of list */
+};
+
+/* Multipliers for offsets within the PTEs */
+#define PTE_LEVEL_MULT (PAGE_SIZE)
+#define PMD_LEVEL_MULT (PTRS_PER_PTE * PTE_LEVEL_MULT)
+#define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT)
+#define PGD_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT)
+
+/*
+ * Print a readable form of a pgprot_t to the seq_file
+ */
+static void printk_prot(struct seq_file *m, pgprot_t prot, int level)
+{
+	pgprotval_t pr = pgprot_val(prot);
+	static const char * const level_name[] =
+		{ "cr3", "pgd", "pud", "pmd", "pte" };
+
+	if (!pgprot_val(prot)) {
+		/* Not present */
+		seq_printf(m, "                          ");
+	} else {
+		if (pr & _PAGE_USER)
+			seq_printf(m, "USR ");
+		else
+			seq_printf(m, "    ");
+		if (pr & _PAGE_RW)
+			seq_printf(m, "RW ");
+		else
+			seq_printf(m, "ro ");
+		if (pr & _PAGE_PWT)
+			seq_printf(m, "PWT ");
+		else
+			seq_printf(m, "    ");
+		if (pr & _PAGE_PCD)
+			seq_printf(m, "PCD ");
+		else
+			seq_printf(m, "    ");
+
+		/* Bit 9 has a different meaning on level 3 vs 4 */
+		if (level <= 3) {
+			if (pr & _PAGE_PSE)
+				seq_printf(m, "PSE ");
+			else
+				seq_printf(m, "    ");
+		} else {
+			if (pr & _PAGE_PAT)
+				seq_printf(m, "pat ");
+			else
+				seq_printf(m, "    ");
+		}
+		if (pr & _PAGE_GLOBAL)
+			seq_printf(m, "GLB ");
+		else
+			seq_printf(m, "    ");
+		if (pr & _PAGE_NX)
+			seq_printf(m, "NX ");
+		else
+			seq_printf(m, "x  ");
+	}
+	seq_printf(m, "%s\n", level_name[level]);
+}
+
+/*
+ * On 64 bits, sign-extend the 48 bit address to 64 bit
+ */
+static unsigned long normalize_addr(unsigned long u)
+{
+#ifdef CONFIG_X86_64
+	return (signed long)(u << 16) >> 16;
+#else
+	return u;
+#endif
+}
+
+/*
+ * This function gets called on a break in a continuous series
+ * of PTE entries; the next one is different so we need to
+ * print what we collected so far.
+ */
+static void note_page(struct seq_file *m, struct pg_state *st,
+		      pgprot_t new_prot, int level)
+{
+	pgprotval_t prot, cur;
+	static const char units[] = "KMGTPE";
+
+	/*
+	 * If we have a "break" in the series, we need to flush the state that
+	 * we have now. "break" is either changing perms, levels or
+	 * address space marker.
+	 */
+	prot = pgprot_val(new_prot) & ~(PTE_MASK);
+	cur = pgprot_val(st->current_prot) & ~(PTE_MASK);
+
+	if (!st->level) {
+		/* First entry */
+		st->current_prot = new_prot;
+		st->level = level;
+		st->marker = address_markers;
+		seq_printf(m, "---[ %s ]---\n", st->marker->name);
+	} else if (prot != cur || level != st->level ||
+		   st->current_address >= st->marker[1].start_address) {
+		const char *unit = units;
+		unsigned long delta;
+
+		/*
+		 * Now print the actual finished series
+		 */
+		seq_printf(m, "0x%p-0x%p   ",
+			   (void *)st->start_address,
+			   (void *)st->current_address);
+
+		delta = (st->current_address - st->start_address) >> 10;
+		while (!(delta & 1023) && unit[1]) {
+			delta >>= 10;
+			unit++;
+		}
+		seq_printf(m, "%9lu%c ", delta, *unit);
+		printk_prot(m, st->current_prot, st->level);
+
+		/*
+		 * We print markers for special areas of address space,
+		 * such as the start of vmalloc space etc.
+		 * This helps in the interpretation.
+		 */
+		if (st->current_address >= st->marker[1].start_address) {
+			st->marker++;
+			seq_printf(m, "---[ %s ]---\n", st->marker->name);
+		}
+
+		st->start_address = st->current_address;
+		st->current_prot = new_prot;
+		st->level = level;
+	}
+}
+
+static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
+							unsigned long P)
+{
+	int i;
+	pte_t *start;
+
+	start = (pte_t *) pmd_page_vaddr(addr);
+	for (i = 0; i < PTRS_PER_PTE; i++) {
+		pgprot_t prot = pte_pgprot(*start);
+
+		st->current_address = normalize_addr(P + i * PTE_LEVEL_MULT);
+		note_page(m, st, prot, 4);
+		start++;
+	}
+}
+
+#if PTRS_PER_PMD > 1
+
+static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr,
+							unsigned long P)
+{
+	int i;
+	pmd_t *start;
+
+	start = (pmd_t *) pud_page_vaddr(addr);
+	for (i = 0; i < PTRS_PER_PMD; i++) {
+		st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT);
+		if (!pmd_none(*start)) {
+			pgprotval_t prot = pmd_val(*start) & ~PTE_MASK;
+
+			if (pmd_large(*start) || !pmd_present(*start))
+				note_page(m, st, __pgprot(prot), 3);
+			else
+				walk_pte_level(m, st, *start,
+					       P + i * PMD_LEVEL_MULT);
+		} else
+			note_page(m, st, __pgprot(0), 3);
+		start++;
+	}
+}
+
+#else
+#define walk_pmd_level(m,s,a,p) walk_pte_level(m,s,__pmd(pud_val(a)),p)
+#define pud_large(a) pmd_large(__pmd(pud_val(a)))
+#define pud_none(a)  pmd_none(__pmd(pud_val(a)))
+#endif
+
+#if PTRS_PER_PUD > 1
+
+static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
+							unsigned long P)
+{
+	int i;
+	pud_t *start;
+
+	start = (pud_t *) pgd_page_vaddr(addr);
+
+	for (i = 0; i < PTRS_PER_PUD; i++) {
+		st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
+		if (!pud_none(*start)) {
+			pgprotval_t prot = pud_val(*start) & ~PTE_MASK;
+
+			if (pud_large(*start) || !pud_present(*start))
+				note_page(m, st, __pgprot(prot), 2);
+			else
+				walk_pmd_level(m, st, *start,
+					       P + i * PUD_LEVEL_MULT);
+		} else
+			note_page(m, st, __pgprot(0), 2);
+
+		start++;
+	}
+}
+
+#else
+#define walk_pud_level(m,s,a,p) walk_pmd_level(m,s,__pud(pgd_val(a)),p)
+#define pgd_large(a) pud_large(__pud(pgd_val(a)))
+#define pgd_none(a)  pud_none(__pud(pgd_val(a)))
+#endif
+
+static void walk_pgd_level(struct seq_file *m)
+{
+#ifdef CONFIG_X86_64
+	pgd_t *start = (pgd_t *) &init_level4_pgt;
+#else
+	pgd_t *start = swapper_pg_dir;
+#endif
+	int i;
+	struct pg_state st;
+
+	memset(&st, 0, sizeof(st));
+
+	for (i = 0; i < PTRS_PER_PGD; i++) {
+		st.current_address = normalize_addr(i * PGD_LEVEL_MULT);
+		if (!pgd_none(*start)) {
+			pgprotval_t prot = pgd_val(*start) & ~PTE_MASK;
+
+			if (pgd_large(*start) || !pgd_present(*start))
+				note_page(m, &st, __pgprot(prot), 1);
+			else
+				walk_pud_level(m, &st, *start,
+					       i * PGD_LEVEL_MULT);
+		} else
+			note_page(m, &st, __pgprot(0), 1);
+
+		start++;
+	}
+
+	/* Flush out the last page */
+	st.current_address = normalize_addr(PTRS_PER_PGD*PGD_LEVEL_MULT);
+	note_page(m, &st, __pgprot(0), 0);
+}
+
+static int ptdump_show(struct seq_file *m, void *v)
+{
+	walk_pgd_level(m);
+	return 0;
+}
+
+static int ptdump_open(struct inode *inode, struct file *filp)
+{
+	return single_open(filp, ptdump_show, NULL);
+}
+
+static const struct file_operations ptdump_fops = {
+	.open		= ptdump_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+int pt_dump_init(void)
+{
+	struct dentry *pe;
+
+#ifdef CONFIG_X86_32
+	/* Not a compile-time constant on x86-32 */
+	address_markers[2].start_address = VMALLOC_START;
+	address_markers[3].start_address = VMALLOC_END;
+# ifdef CONFIG_HIGHMEM
+	address_markers[4].start_address = PKMAP_BASE;
+	address_markers[5].start_address = FIXADDR_START;
+# else
+	address_markers[4].start_address = FIXADDR_START;
+# endif
+#endif
+
+	pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL,
+				 &ptdump_fops);
+	if (!pe)
+		return -ENOMEM;
+
+	return 0;
+}
+
+__initcall(pt_dump_init);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
+MODULE_DESCRIPTION("Kernel debugging helper that dumps pagetables");
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index ec08d8389850..fd7e1798c75a 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -639,7 +639,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
 #ifdef CONFIG_X86_32
 	/* It's safe to allow irq's after cr2 has been saved and the vmalloc
 	   fault has been handled. */
-	if (regs->flags & (X86_EFLAGS_IF|VM_MASK))
+	if (regs->flags & (X86_EFLAGS_IF | X86_VM_MASK))
 		local_irq_enable();
 
 	/*
@@ -976,9 +976,5 @@ void vmalloc_sync_all(void)
 		if (address == start)
 			start = address + PGDIR_SIZE;
 	}
-	/* Check that there is no need to do the same for the modules area. */
-	BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
-	BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
-				(__START_KERNEL & PGDIR_MASK)));
 #endif
 }
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index ee1091a46964..1500dc8d63e4 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -51,6 +51,8 @@
 
 unsigned int __VMALLOC_RESERVE = 128 << 20;
 
+unsigned long max_pfn_mapped;
+
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 unsigned long highstart_pfn, highend_pfn;
 
@@ -179,8 +181,13 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
 			/*
 			 * Map with big pages if possible, otherwise
 			 * create normal page tables:
+			 *
+			 * Don't use a large page for the first 2/4MB of memory
+			 * because there are often fixed size MTRRs in there
+			 * and overlapping MTRRs into large pages can cause
+			 * slowdowns.
 			 */
-			if (cpu_has_pse) {
+			if (cpu_has_pse && !(pgd_idx == 0 && pmd_idx == 0)) {
 				unsigned int addr2;
 				pgprot_t prot = PAGE_KERNEL_LARGE;
 
@@ -194,6 +201,7 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
 				set_pmd(pmd, pfn_pmd(pfn, prot));
 
 				pfn += PTRS_PER_PTE;
+				max_pfn_mapped = pfn;
 				continue;
 			}
 			pte = one_page_table_init(pmd);
@@ -208,6 +216,7 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
 
 				set_pte(pte, pfn_pte(pfn, prot));
 			}
+			max_pfn_mapped = pfn;
 		}
 	}
 }
@@ -723,25 +732,17 @@ void mark_rodata_ro(void)
 	unsigned long start = PFN_ALIGN(_text);
 	unsigned long size = PFN_ALIGN(_etext) - start;
 
-#ifndef CONFIG_KPROBES
-#ifdef CONFIG_HOTPLUG_CPU
-	/* It must still be possible to apply SMP alternatives. */
-	if (num_possible_cpus() <= 1)
-#endif
-	{
-		set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
-		printk(KERN_INFO "Write protecting the kernel text: %luk\n",
-			size >> 10);
+	set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
+	printk(KERN_INFO "Write protecting the kernel text: %luk\n",
+		size >> 10);
 
 #ifdef CONFIG_CPA_DEBUG
-		printk(KERN_INFO "Testing CPA: Reverting %lx-%lx\n",
-			start, start+size);
-		set_pages_rw(virt_to_page(start), size>>PAGE_SHIFT);
+	printk(KERN_INFO "Testing CPA: Reverting %lx-%lx\n",
+		start, start+size);
+	set_pages_rw(virt_to_page(start), size>>PAGE_SHIFT);
 
-		printk(KERN_INFO "Testing CPA: write protecting again\n");
-		set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
-#endif
-	}
+	printk(KERN_INFO "Testing CPA: write protecting again\n");
+	set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
 #endif
 	start += size;
 	size = (unsigned long)__end_rodata - start;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index a02a14f0f324..1076097dcab2 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -54,6 +54,26 @@ static unsigned long dma_reserve __initdata;
 
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 
+int direct_gbpages __meminitdata
+#ifdef CONFIG_DIRECT_GBPAGES
+				= 1
+#endif
+;
+
+static int __init parse_direct_gbpages_off(char *arg)
+{
+	direct_gbpages = 0;
+	return 0;
+}
+early_param("nogbpages", parse_direct_gbpages_off);
+
+static int __init parse_direct_gbpages_on(char *arg)
+{
+	direct_gbpages = 1;
+	return 0;
+}
+early_param("gbpages", parse_direct_gbpages_on);
+
 /*
  * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the
  * physical space so we can cache the place of the first one and move
@@ -69,9 +89,6 @@ void show_mem(void)
 
 	printk(KERN_INFO "Mem-info:\n");
 	show_free_areas();
-	printk(KERN_INFO "Free swap:       %6ldkB\n",
-		nr_swap_pages << (PAGE_SHIFT-10));
-
 	for_each_online_pgdat(pgdat) {
 		for (i = 0; i < pgdat->node_spanned_pages; ++i) {
 			/*
@@ -296,7 +313,7 @@ __meminit void early_iounmap(void *addr, unsigned long size)
 	__flush_tlb_all();
 }
 
-static void __meminit
+static unsigned long __meminit
 phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
 {
 	int i = pmd_index(address);
@@ -318,21 +335,26 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
 		set_pte((pte_t *)pmd,
 			pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
 	}
+	return address;
 }
 
-static void __meminit
+static unsigned long __meminit
 phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
 {
 	pmd_t *pmd = pmd_offset(pud, 0);
+	unsigned long last_map_addr;
+
 	spin_lock(&init_mm.page_table_lock);
-	phys_pmd_init(pmd, address, end);
+	last_map_addr = phys_pmd_init(pmd, address, end);
 	spin_unlock(&init_mm.page_table_lock);
 	__flush_tlb_all();
+	return last_map_addr;
 }
 
-static void __meminit
+static unsigned long __meminit
 phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
 {
+	unsigned long last_map_addr = end;
 	int i = pud_index(addr);
 
 	for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE) {
@@ -350,7 +372,15 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
 		}
 
 		if (pud_val(*pud)) {
-			phys_pmd_update(pud, addr, end);
+			if (!pud_large(*pud))
+				last_map_addr = phys_pmd_update(pud, addr, end);
+			continue;
+		}
+
+		if (direct_gbpages) {
+			set_pte((pte_t *)pud,
+				pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
+			last_map_addr = (addr & PUD_MASK) + PUD_SIZE;
 			continue;
 		}
 
@@ -358,12 +388,14 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
 
 		spin_lock(&init_mm.page_table_lock);
 		set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
-		phys_pmd_init(pmd, addr, end);
+		last_map_addr = phys_pmd_init(pmd, addr, end);
 		spin_unlock(&init_mm.page_table_lock);
 
 		unmap_low_page(pmd);
 	}
 	__flush_tlb_all();
+
+	return last_map_addr >> PAGE_SHIFT;
 }
 
 static void __init find_early_table_space(unsigned long end)
@@ -371,9 +403,11 @@ static void __init find_early_table_space(unsigned long end)
 	unsigned long puds, pmds, tables, start;
 
 	puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
-	pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
-	tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) +
-		 round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
+	tables = round_up(puds * sizeof(pud_t), PAGE_SIZE);
+	if (!direct_gbpages) {
+		pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
+		tables += round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
+	}
 
 	/*
 	 * RED-PEN putting page tables only on node 0 could
@@ -393,16 +427,135 @@ static void __init find_early_table_space(unsigned long end)
 		(table_start << PAGE_SHIFT) + tables);
 }
 
+static void __init init_gbpages(void)
+{
+	if (direct_gbpages && cpu_has_gbpages)
+		printk(KERN_INFO "Using GB pages for direct mapping\n");
+	else
+		direct_gbpages = 0;
+}
+
+#ifdef CONFIG_MEMTEST_BOOTPARAM
+
+static void __init memtest(unsigned long start_phys, unsigned long size,
+				 unsigned pattern)
+{
+	unsigned long i;
+	unsigned long *start;
+	unsigned long start_bad;
+	unsigned long last_bad;
+	unsigned long val;
+	unsigned long start_phys_aligned;
+	unsigned long count;
+	unsigned long incr;
+
+	switch (pattern) {
+	case 0:
+		val = 0UL;
+		break;
+	case 1:
+		val = -1UL;
+		break;
+	case 2:
+		val = 0x5555555555555555UL;
+		break;
+	case 3:
+		val = 0xaaaaaaaaaaaaaaaaUL;
+		break;
+	default:
+		return;
+	}
+
+	incr = sizeof(unsigned long);
+	start_phys_aligned = ALIGN(start_phys, incr);
+	count = (size - (start_phys_aligned - start_phys))/incr;
+	start = __va(start_phys_aligned);
+	start_bad = 0;
+	last_bad = 0;
+
+	for (i = 0; i < count; i++)
+		start[i] = val;
+	for (i = 0; i < count; i++, start++, start_phys_aligned += incr) {
+		if (*start != val) {
+			if (start_phys_aligned == last_bad + incr) {
+				last_bad += incr;
+			} else {
+				if (start_bad) {
+					printk(KERN_CONT "\n  %016lx bad mem addr %016lx - %016lx reserved",
+						val, start_bad, last_bad + incr);
+					reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
+				}
+				start_bad = last_bad = start_phys_aligned;
+			}
+		}
+	}
+	if (start_bad) {
+		printk(KERN_CONT "\n  %016lx bad mem addr %016lx - %016lx reserved",
+			val, start_bad, last_bad + incr);
+		reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
+	}
+
+}
+
+static int memtest_pattern __initdata = CONFIG_MEMTEST_BOOTPARAM_VALUE;
+
+static int __init parse_memtest(char *arg)
+{
+	if (arg)
+		memtest_pattern = simple_strtoul(arg, NULL, 0);
+	return 0;
+}
+
+early_param("memtest", parse_memtest);
+
+static void __init early_memtest(unsigned long start, unsigned long end)
+{
+	unsigned long t_start, t_size;
+	unsigned pattern;
+
+	if (!memtest_pattern)
+		return;
+
+	printk(KERN_INFO "early_memtest: pattern num %d", memtest_pattern);
+	for (pattern = 0; pattern < memtest_pattern; pattern++) {
+		t_start = start;
+		t_size = 0;
+		while (t_start < end) {
+			t_start = find_e820_area_size(t_start, &t_size, 1);
+
+			/* done ? */
+			if (t_start >= end)
+				break;
+			if (t_start + t_size > end)
+				t_size = end - t_start;
+
+			printk(KERN_CONT "\n  %016lx - %016lx pattern %d",
+				t_start, t_start + t_size, pattern);
+
+			memtest(t_start, t_size, pattern);
+
+			t_start += t_size;
+		}
+	}
+	printk(KERN_CONT "\n");
+}
+#else
+static void __init early_memtest(unsigned long start, unsigned long end)
+{
+}
+#endif
+
 /*
  * Setup the direct mapping of the physical memory at PAGE_OFFSET.
  * This runs before bootmem is initialized and gets pages directly from
  * the physical memory. To access them they are temporarily mapped.
  */
-void __init_refok init_memory_mapping(unsigned long start, unsigned long end)
+unsigned long __init_refok init_memory_mapping(unsigned long start, unsigned long end)
 {
-	unsigned long next;
+	unsigned long next, last_map_addr = end;
+	unsigned long start_phys = start, end_phys = end;
 
-	pr_debug("init_memory_mapping\n");
+	printk(KERN_INFO "init_memory_mapping\n");
 
 	/*
 	 * Find space for the kernel direct mapping tables.
@@ -411,8 +564,10 @@ void __init_refok init_memory_mapping(unsigned long start, unsigned long end)
 	 * memory mapped. Unfortunately this is done currently before the
 	 * nodes are discovered.
 	 */
-	if (!after_bootmem)
+	if (!after_bootmem) {
+		init_gbpages();
 		find_early_table_space(end);
+	}
 
 	start = (unsigned long)__va(start);
 	end = (unsigned long)__va(end);
@@ -430,7 +585,7 @@ void __init_refok init_memory_mapping(unsigned long start, unsigned long end)
 		next = start + PGDIR_SIZE;
 		if (next > end)
 			next = end;
-		phys_pud_init(pud, __pa(start), __pa(next));
+		last_map_addr = phys_pud_init(pud, __pa(start), __pa(next));
 		if (!after_bootmem)
 			set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
 		unmap_low_page(pud);
@@ -443,6 +598,11 @@ void __init_refok init_memory_mapping(unsigned long start, unsigned long end)
 	if (!after_bootmem)
 		reserve_early(table_start << PAGE_SHIFT,
 				 table_end << PAGE_SHIFT, "PGTABLE");
+
+	if (!after_bootmem)
+		early_memtest(start_phys, end_phys);
+
+	return last_map_addr;
 }
 
 #ifndef CONFIG_NUMA
@@ -482,11 +642,13 @@ int arch_add_memory(int nid, u64 start, u64 size)
 {
 	struct pglist_data *pgdat = NODE_DATA(nid);
 	struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
-	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long last_mapped_pfn, start_pfn = start >> PAGE_SHIFT;
 	unsigned long nr_pages = size >> PAGE_SHIFT;
 	int ret;
 
-	init_memory_mapping(start, start + size-1);
+	last_mapped_pfn = init_memory_mapping(start, start + size-1);
+	if (last_mapped_pfn > max_pfn_mapped)
+		max_pfn_mapped = last_mapped_pfn;
 
 	ret = __add_pages(zone, start_pfn, nr_pages);
 	WARN_ON(1);
@@ -596,24 +758,7 @@ EXPORT_SYMBOL_GPL(rodata_test_data);
 
 void mark_rodata_ro(void)
 {
-	unsigned long start = (unsigned long)_stext, end;
-
-#ifdef CONFIG_HOTPLUG_CPU
-	/* It must still be possible to apply SMP alternatives. */
-	if (num_possible_cpus() > 1)
-		start = (unsigned long)_etext;
-#endif
-
-#ifdef CONFIG_KPROBES
-	start = (unsigned long)__start_rodata;
-#endif
-
-	end = (unsigned long)__end_rodata;
-	start = (start + PAGE_SIZE - 1) & PAGE_MASK;
-	end &= PAGE_MASK;
-	if (end <= start)
-		return;
-
+	unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata);
 
 	printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
 	       (end - start) >> 10);
@@ -636,6 +781,7 @@ void mark_rodata_ro(void)
 	set_memory_ro(start, (end-start) >> PAGE_SHIFT);
 #endif
 }
+
 #endif
 
 #ifdef CONFIG_BLK_DEV_INITRD
@@ -657,7 +803,7 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
 		 * This can happen with kdump kernels when accessing
 		 * firmware tables:
 		 */
-		if (pfn < end_pfn_map)
+		if (pfn < max_pfn_mapped)
 			return;
 
 		printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 794895c6dcc9..c590fd200e29 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -19,11 +19,7 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 #include <asm/pgalloc.h>
-
-enum ioremap_mode {
-	IOR_MODE_UNCACHED,
-	IOR_MODE_CACHED,
-};
+#include <asm/pat.h>
 
 #ifdef CONFIG_X86_64
 
@@ -35,11 +31,23 @@ unsigned long __phys_addr(unsigned long x)
 }
 EXPORT_SYMBOL(__phys_addr);
 
+static inline int phys_addr_valid(unsigned long addr)
+{
+	return addr < (1UL << boot_cpu_data.x86_phys_bits);
+}
+
+#else
+
+static inline int phys_addr_valid(unsigned long addr)
+{
+	return 1;
+}
+
 #endif
 
 int page_is_ram(unsigned long pagenr)
 {
-	unsigned long addr, end;
+	resource_size_t addr, end;
 	int i;
 
 	/*
@@ -78,19 +86,22 @@ int page_is_ram(unsigned long pagenr)
  * Fix up the linear direct mapping of the kernel to avoid cache attribute
  * conflicts.
  */
-static int ioremap_change_attr(unsigned long vaddr, unsigned long size,
-			       enum ioremap_mode mode)
+int ioremap_change_attr(unsigned long vaddr, unsigned long size,
+			       unsigned long prot_val)
 {
 	unsigned long nrpages = size >> PAGE_SHIFT;
 	int err;
 
-	switch (mode) {
-	case IOR_MODE_UNCACHED:
+	switch (prot_val) {
+	case _PAGE_CACHE_UC:
 	default:
-		err = set_memory_uc(vaddr, nrpages);
+		err = _set_memory_uc(vaddr, nrpages);
+		break;
+	case _PAGE_CACHE_WC:
+		err = _set_memory_wc(vaddr, nrpages);
 		break;
-	case IOR_MODE_CACHED:
-		err = set_memory_wb(vaddr, nrpages);
+	case _PAGE_CACHE_WB:
+		err = _set_memory_wb(vaddr, nrpages);
 		break;
 	}
 
@@ -107,17 +118,27 @@ static int ioremap_change_attr(unsigned long vaddr, unsigned long size,
  * caller shouldn't need to know that small detail.
  */
 static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
-			       enum ioremap_mode mode)
+			       unsigned long prot_val)
 {
-	unsigned long pfn, offset, last_addr, vaddr;
+	unsigned long pfn, offset, vaddr;
+	resource_size_t last_addr;
 	struct vm_struct *area;
+	unsigned long new_prot_val;
 	pgprot_t prot;
+	int retval;
 
 	/* Don't allow wraparound or zero size */
 	last_addr = phys_addr + size - 1;
 	if (!size || last_addr < phys_addr)
 		return NULL;
 
+	if (!phys_addr_valid(phys_addr)) {
+		printk(KERN_WARNING "ioremap: invalid physical address %llx\n",
+		       phys_addr);
+		WARN_ON_ONCE(1);
+		return NULL;
+	}
+
 	/*
 	 * Don't remap the low PCI/ISA area, it's always mapped..
 	 */
@@ -127,25 +148,14 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
 	/*
 	 * Don't allow anybody to remap normal RAM that we're using..
 	 */
-	for (pfn = phys_addr >> PAGE_SHIFT; pfn < max_pfn_mapped &&
-	     (pfn << PAGE_SHIFT) < last_addr; pfn++) {
-		if (page_is_ram(pfn) && pfn_valid(pfn) &&
-		    !PageReserved(pfn_to_page(pfn)))
-			return NULL;
-	}
+	for (pfn = phys_addr >> PAGE_SHIFT;
+				(pfn << PAGE_SHIFT) < last_addr; pfn++) {
 
-	switch (mode) {
-	case IOR_MODE_UNCACHED:
-	default:
-		/*
-		 * FIXME: we will use UC MINUS for now, as video fb drivers
-		 * depend on it. Upcoming ioremap_wc() will fix this behavior.
-		 */
-		prot = PAGE_KERNEL_UC_MINUS;
-		break;
-	case IOR_MODE_CACHED:
-		prot = PAGE_KERNEL;
-		break;
+		int is_ram = page_is_ram(pfn);
+
+		if (is_ram && pfn_valid(pfn) && !PageReserved(pfn_to_page(pfn)))
+			return NULL;
+		WARN_ON_ONCE(is_ram);
 	}
 
 	/*
@@ -155,6 +165,49 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
 	phys_addr &= PAGE_MASK;
 	size = PAGE_ALIGN(last_addr+1) - phys_addr;
 
+	retval = reserve_memtype(phys_addr, phys_addr + size,
+						prot_val, &new_prot_val);
+	if (retval) {
+		pr_debug("Warning: reserve_memtype returned %d\n", retval);
+		return NULL;
+	}
+
+	if (prot_val != new_prot_val) {
+		/*
+		 * Do not fallback to certain memory types with certain
+		 * requested type:
+		 * - request is uncached, return cannot be write-back
+		 * - request is uncached, return cannot be write-combine
+		 * - request is write-combine, return cannot be write-back
+		 */
+		if ((prot_val == _PAGE_CACHE_UC &&
+		     (new_prot_val == _PAGE_CACHE_WB ||
+		      new_prot_val == _PAGE_CACHE_WC)) ||
+		    (prot_val == _PAGE_CACHE_WC &&
+		     new_prot_val == _PAGE_CACHE_WB)) {
+			pr_debug(
+		"ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n",
+				phys_addr, phys_addr + size,
+				prot_val, new_prot_val);
+			free_memtype(phys_addr, phys_addr + size);
+			return NULL;
+		}
+		prot_val = new_prot_val;
+	}
+
+	switch (prot_val) {
+	case _PAGE_CACHE_UC:
+	default:
+		prot = PAGE_KERNEL_NOCACHE;
+		break;
+	case _PAGE_CACHE_WC:
+		prot = PAGE_KERNEL_WC;
+		break;
+	case _PAGE_CACHE_WB:
+		prot = PAGE_KERNEL;
+		break;
+	}
+
 	/*
 	 * Ok, go for it..
 	 */
@@ -164,11 +217,13 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
 	area->phys_addr = phys_addr;
 	vaddr = (unsigned long) area->addr;
 	if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) {
+		free_memtype(phys_addr, phys_addr + size);
 		free_vm_area(area);
 		return NULL;
 	}
 
-	if (ioremap_change_attr(vaddr, size, mode) < 0) {
+	if (ioremap_change_attr(vaddr, size, prot_val) < 0) {
+		free_memtype(phys_addr, phys_addr + size);
 		vunmap(area->addr);
 		return NULL;
 	}
@@ -199,13 +254,32 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
  */
 void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size)
 {
-	return __ioremap(phys_addr, size, IOR_MODE_UNCACHED);
+	return __ioremap(phys_addr, size, _PAGE_CACHE_UC);
 }
 EXPORT_SYMBOL(ioremap_nocache);
 
+/**
+ * ioremap_wc	-	map memory into CPU space write combined
+ * @offset:	bus address of the memory
+ * @size:	size of the resource to map
+ *
+ * This version of ioremap ensures that the memory is marked write combining.
+ * Write combining allows faster writes to some hardware devices.
+ *
+ * Must be freed with iounmap.
+ */
+void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size)
+{
+	if (pat_wc_enabled)
+		return __ioremap(phys_addr, size, _PAGE_CACHE_WC);
+	else
+		return ioremap_nocache(phys_addr, size);
+}
+EXPORT_SYMBOL(ioremap_wc);
+
 void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
 {
-	return __ioremap(phys_addr, size, IOR_MODE_CACHED);
+	return __ioremap(phys_addr, size, _PAGE_CACHE_WB);
 }
 EXPORT_SYMBOL(ioremap_cache);
 
@@ -252,6 +326,8 @@ void iounmap(volatile void __iomem *addr)
 		return;
 	}
 
+	free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p));
+
 	/* Finally remove it */
 	o = remove_vm_area((void *)addr);
 	BUG_ON(p != o || o == NULL);
@@ -272,8 +348,8 @@ static int __init early_ioremap_debug_setup(char *str)
 early_param("early_ioremap_debug", early_ioremap_debug_setup);
 
 static __initdata int after_paging_init;
-static __initdata pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)]
-				__attribute__((aligned(PAGE_SIZE)));
+static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)]
+		__section(.bss.page_aligned);
 
 static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
 {
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 16b82ad34b96..2ea56f48f29b 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -31,13 +31,15 @@ bootmem_data_t plat_node_bdata[MAX_NUMNODES];
 
 struct memnode memnode;
 
+#ifdef CONFIG_SMP
 int x86_cpu_to_node_map_init[NR_CPUS] = {
 	[0 ... NR_CPUS-1] = NUMA_NO_NODE
 };
 void *x86_cpu_to_node_map_early_ptr;
+EXPORT_SYMBOL(x86_cpu_to_node_map_early_ptr);
+#endif
 DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE;
 EXPORT_PER_CPU_SYMBOL(x86_cpu_to_node_map);
-EXPORT_SYMBOL(x86_cpu_to_node_map_early_ptr);
 
 s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
 	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
@@ -548,8 +550,6 @@ void __cpuinit numa_set_node(int cpu, int node)
 {
 	int *cpu_to_node_map = x86_cpu_to_node_map_early_ptr;
 
-	cpu_pda(cpu)->nodenumber = node;
-
 	if(cpu_to_node_map)
 		cpu_to_node_map[cpu] = node;
 	else if(per_cpu_offset(cpu))
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 7b79f6be4e7d..f7823a172868 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -9,6 +9,8 @@
 #include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
 
 #include <asm/e820.h>
 #include <asm/processor.h>
@@ -17,6 +19,7 @@
 #include <asm/uaccess.h>
 #include <asm/pgalloc.h>
 #include <asm/proto.h>
+#include <asm/pat.h>
 
 /*
  * The current flushing context - we pass it instead of 5 arguments:
@@ -28,6 +31,7 @@ struct cpa_data {
 	int		numpages;
 	int		flushtlb;
 	unsigned long	pfn;
+	unsigned	force_split : 1;
 };
 
 #ifdef CONFIG_X86_64
@@ -259,6 +263,9 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
 	int i, do_split = 1;
 	unsigned int level;
 
+	if (cpa->force_split)
+		return 1;
+
 	spin_lock_irqsave(&pgd_lock, flags);
 	/*
 	 * Check for races, another CPU might have split this page
@@ -535,7 +542,7 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
 repeat:
 	kpte = lookup_address(address, &level);
 	if (!kpte)
-		return primary ? -EINVAL : 0;
+		return 0;
 
 	old_pte = *kpte;
 	if (!pte_val(old_pte)) {
@@ -693,7 +700,8 @@ static inline int cache_attr(pgprot_t attr)
 }
 
 static int change_page_attr_set_clr(unsigned long addr, int numpages,
-				    pgprot_t mask_set, pgprot_t mask_clr)
+				    pgprot_t mask_set, pgprot_t mask_clr,
+				    int force_split)
 {
 	struct cpa_data cpa;
 	int ret, cache, checkalias;
@@ -704,7 +712,7 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
 	 */
 	mask_set = canon_pgprot(mask_set);
 	mask_clr = canon_pgprot(mask_clr);
-	if (!pgprot_val(mask_set) && !pgprot_val(mask_clr))
+	if (!pgprot_val(mask_set) && !pgprot_val(mask_clr) && !force_split)
 		return 0;
 
 	/* Ensure we are PAGE_SIZE aligned */
@@ -721,6 +729,7 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
 	cpa.mask_set = mask_set;
 	cpa.mask_clr = mask_clr;
 	cpa.flushtlb = 0;
+	cpa.force_split = force_split;
 
 	/* No alias checking for _NX bit modifications */
 	checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX;
@@ -759,26 +768,61 @@ out:
 static inline int change_page_attr_set(unsigned long addr, int numpages,
 				       pgprot_t mask)
 {
-	return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0));
+	return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0);
 }
 
 static inline int change_page_attr_clear(unsigned long addr, int numpages,
 					 pgprot_t mask)
 {
-	return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask);
+	return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0);
 }
 
-int set_memory_uc(unsigned long addr, int numpages)
+int _set_memory_uc(unsigned long addr, int numpages)
 {
 	return change_page_attr_set(addr, numpages,
-				    __pgprot(_PAGE_PCD));
+				    __pgprot(_PAGE_CACHE_UC));
+}
+
+int set_memory_uc(unsigned long addr, int numpages)
+{
+	if (reserve_memtype(addr, addr + numpages * PAGE_SIZE,
+	                    _PAGE_CACHE_UC, NULL))
+		return -EINVAL;
+
+	return _set_memory_uc(addr, numpages);
 }
 EXPORT_SYMBOL(set_memory_uc);
 
-int set_memory_wb(unsigned long addr, int numpages)
+int _set_memory_wc(unsigned long addr, int numpages)
+{
+	return change_page_attr_set(addr, numpages,
+				    __pgprot(_PAGE_CACHE_WC));
+}
+
+int set_memory_wc(unsigned long addr, int numpages)
+{
+	if (!pat_wc_enabled)
+		return set_memory_uc(addr, numpages);
+
+	if (reserve_memtype(addr, addr + numpages * PAGE_SIZE,
+		_PAGE_CACHE_WC, NULL))
+		return -EINVAL;
+
+	return _set_memory_wc(addr, numpages);
+}
+EXPORT_SYMBOL(set_memory_wc);
+
+int _set_memory_wb(unsigned long addr, int numpages)
 {
 	return change_page_attr_clear(addr, numpages,
-				      __pgprot(_PAGE_PCD | _PAGE_PWT));
+				      __pgprot(_PAGE_CACHE_MASK));
+}
+
+int set_memory_wb(unsigned long addr, int numpages)
+{
+	free_memtype(addr, addr + numpages * PAGE_SIZE);
+
+	return _set_memory_wb(addr, numpages);
 }
 EXPORT_SYMBOL(set_memory_wb);
 
@@ -809,6 +853,12 @@ int set_memory_np(unsigned long addr, int numpages)
 	return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_PRESENT));
 }
 
+int set_memory_4k(unsigned long addr, int numpages)
+{
+	return change_page_attr_set_clr(addr, numpages, __pgprot(0),
+					__pgprot(0), 1);
+}
+
 int set_pages_uc(struct page *page, int numpages)
 {
 	unsigned long addr = (unsigned long)page_address(page);
@@ -918,6 +968,45 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
 	cpa_fill_pool(NULL);
 }
 
+#ifdef CONFIG_DEBUG_FS
+static int dpa_show(struct seq_file *m, void *v)
+{
+	seq_puts(m, "DEBUG_PAGEALLOC\n");
+	seq_printf(m, "pool_size     : %lu\n", pool_size);
+	seq_printf(m, "pool_pages    : %lu\n", pool_pages);
+	seq_printf(m, "pool_low      : %lu\n", pool_low);
+	seq_printf(m, "pool_used     : %lu\n", pool_used);
+	seq_printf(m, "pool_failed   : %lu\n", pool_failed);
+
+	return 0;
+}
+
+static int dpa_open(struct inode *inode, struct file *filp)
+{
+	return single_open(filp, dpa_show, NULL);
+}
+
+static const struct file_operations dpa_fops = {
+	.open		= dpa_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+int __init debug_pagealloc_proc_init(void)
+{
+	struct dentry *de;
+
+	de = debugfs_create_file("debug_pagealloc", 0600, NULL, NULL,
+				 &dpa_fops);
+	if (!de)
+		return -ENOMEM;
+
+	return 0;
+}
+__initcall(debug_pagealloc_proc_init);
+#endif
+
 #ifdef CONFIG_HIBERNATION
 
 bool kernel_page_present(struct page *page)
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
new file mode 100644
index 000000000000..72c0f6097402
--- /dev/null
+++ b/arch/x86/mm/pat.c
@@ -0,0 +1,421 @@
+/*
+ * Handle caching attributes in page tables (PAT)
+ *
+ * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+ *          Suresh B Siddha <suresh.b.siddha@intel.com>
+ *
+ * Loosely based on earlier PAT patchset from Eric Biederman and Andi Kleen.
+ */
+
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <linux/gfp.h>
+#include <linux/fs.h>
+
+#include <asm/msr.h>
+#include <asm/tlbflush.h>
+#include <asm/processor.h>
+#include <asm/pgtable.h>
+#include <asm/pat.h>
+#include <asm/e820.h>
+#include <asm/cacheflush.h>
+#include <asm/fcntl.h>
+#include <asm/mtrr.h>
+
+int pat_wc_enabled = 1;
+
+static u64 __read_mostly boot_pat_state;
+
+static int nopat(char *str)
+{
+	pat_wc_enabled = 0;
+	printk(KERN_INFO "x86: PAT support disabled.\n");
+
+	return 0;
+}
+early_param("nopat", nopat);
+
+static int pat_known_cpu(void)
+{
+	if (!pat_wc_enabled)
+		return 0;
+
+	if (cpu_has_pat)
+		return 1;
+
+	pat_wc_enabled = 0;
+	printk(KERN_INFO "CPU and/or kernel does not support PAT.\n");
+	return 0;
+}
+
+enum {
+	PAT_UC = 0,		/* uncached */
+	PAT_WC = 1,		/* Write combining */
+	PAT_WT = 4,		/* Write Through */
+	PAT_WP = 5,		/* Write Protected */
+	PAT_WB = 6,		/* Write Back (default) */
+	PAT_UC_MINUS = 7,	/* UC, but can be overriden by MTRR */
+};
+
+#define PAT(x,y)	((u64)PAT_ ## y << ((x)*8))
+
+void pat_init(void)
+{
+	u64 pat;
+
+#ifndef CONFIG_X86_PAT
+	nopat(NULL);
+#endif
+
+	/* Boot CPU enables PAT based on CPU feature */
+	if (!smp_processor_id() && !pat_known_cpu())
+		return;
+
+	/* APs enable PAT iff boot CPU has enabled it before */
+	if (smp_processor_id() && !pat_wc_enabled)
+		return;
+
+	/* Set PWT to Write-Combining. All other bits stay the same */
+	/*
+	 * PTE encoding used in Linux:
+	 *      PAT
+	 *      |PCD
+	 *      ||PWT
+	 *      |||
+	 *      000 WB		_PAGE_CACHE_WB
+	 *      001 WC		_PAGE_CACHE_WC
+	 *      010 UC-		_PAGE_CACHE_UC_MINUS
+	 *      011 UC		_PAGE_CACHE_UC
+	 * PAT bit unused
+	 */
+	pat = PAT(0,WB) | PAT(1,WC) | PAT(2,UC_MINUS) | PAT(3,UC) |
+	      PAT(4,WB) | PAT(5,WC) | PAT(6,UC_MINUS) | PAT(7,UC);
+
+	/* Boot CPU check */
+	if (!smp_processor_id()) {
+		rdmsrl(MSR_IA32_CR_PAT, boot_pat_state);
+	}
+
+	wrmsrl(MSR_IA32_CR_PAT, pat);
+	printk(KERN_INFO "x86 PAT enabled: cpu %d, old 0x%Lx, new 0x%Lx\n",
+	       smp_processor_id(), boot_pat_state, pat);
+}
+
+#undef PAT
+
+static char *cattr_name(unsigned long flags)
+{
+	switch (flags & _PAGE_CACHE_MASK) {
+		case _PAGE_CACHE_UC:		return "uncached";
+		case _PAGE_CACHE_UC_MINUS:	return "uncached-minus";
+		case _PAGE_CACHE_WB:		return "write-back";
+		case _PAGE_CACHE_WC:		return "write-combining";
+		default:			return "broken";
+	}
+}
+
+/*
+ * The global memtype list keeps track of memory type for specific
+ * physical memory areas. Conflicting memory types in different
+ * mappings can cause CPU cache corruption. To avoid this we keep track.
+ *
+ * The list is sorted based on starting address and can contain multiple
+ * entries for each address (this allows reference counting for overlapping
+ * areas). All the aliases have the same cache attributes of course.
+ * Zero attributes are represented as holes.
+ *
+ * Currently the data structure is a list because the number of mappings
+ * are expected to be relatively small. If this should be a problem
+ * it could be changed to a rbtree or similar.
+ *
+ * memtype_lock protects the whole list.
+ */
+
+struct memtype {
+	u64 start;
+	u64 end;
+	unsigned long type;
+	struct list_head nd;
+};
+
+static LIST_HEAD(memtype_list);
+static DEFINE_SPINLOCK(memtype_lock); 	/* protects memtype list */
+
+/*
+ * Does intersection of PAT memory type and MTRR memory type and returns
+ * the resulting memory type as PAT understands it.
+ * (Type in pat and mtrr will not have same value)
+ * The intersection is based on "Effective Memory Type" tables in IA-32
+ * SDM vol 3a
+ */
+static int pat_x_mtrr_type(u64 start, u64 end, unsigned long prot,
+				unsigned long *ret_prot)
+{
+	unsigned long pat_type;
+	u8 mtrr_type;
+
+	mtrr_type = mtrr_type_lookup(start, end);
+	if (mtrr_type == 0xFF) {		/* MTRR not enabled */
+		*ret_prot = prot;
+		return 0;
+	}
+	if (mtrr_type == 0xFE) {		/* MTRR match error */
+		*ret_prot = _PAGE_CACHE_UC;
+		return -1;
+	}
+	if (mtrr_type != MTRR_TYPE_UNCACHABLE &&
+	    mtrr_type != MTRR_TYPE_WRBACK &&
+	    mtrr_type != MTRR_TYPE_WRCOMB) {	/* MTRR type unhandled */
+		*ret_prot = _PAGE_CACHE_UC;
+		return -1;
+	}
+
+	pat_type = prot & _PAGE_CACHE_MASK;
+	prot &= (~_PAGE_CACHE_MASK);
+
+	/* Currently doing intersection by hand. Optimize it later. */
+	if (pat_type == _PAGE_CACHE_WC) {
+		*ret_prot = prot | _PAGE_CACHE_WC;
+	} else if (pat_type == _PAGE_CACHE_UC_MINUS) {
+		*ret_prot = prot | _PAGE_CACHE_UC_MINUS;
+	} else if (pat_type == _PAGE_CACHE_UC ||
+	           mtrr_type == MTRR_TYPE_UNCACHABLE) {
+		*ret_prot = prot | _PAGE_CACHE_UC;
+	} else if (mtrr_type == MTRR_TYPE_WRCOMB) {
+		*ret_prot = prot | _PAGE_CACHE_WC;
+	} else {
+		*ret_prot = prot | _PAGE_CACHE_WB;
+	}
+
+	return 0;
+}
+
+int reserve_memtype(u64 start, u64 end, unsigned long req_type,
+			unsigned long *ret_type)
+{
+	struct memtype *new_entry = NULL;
+	struct memtype *parse;
+	unsigned long actual_type;
+	int err = 0;
+
+	/* Only track when pat_wc_enabled */
+	if (!pat_wc_enabled) {
+		if (ret_type)
+			*ret_type = req_type;
+
+		return 0;
+	}
+
+	/* Low ISA region is always mapped WB in page table. No need to track */
+	if (start >= ISA_START_ADDRESS && (end - 1) <= ISA_END_ADDRESS) {
+		if (ret_type)
+			*ret_type = _PAGE_CACHE_WB;
+
+		return 0;
+	}
+
+	req_type &= _PAGE_CACHE_MASK;
+	err = pat_x_mtrr_type(start, end, req_type, &actual_type);
+	if (err) {
+		if (ret_type)
+			*ret_type = actual_type;
+
+		return -EINVAL;
+	}
+
+	new_entry  = kmalloc(sizeof(struct memtype), GFP_KERNEL);
+	if (!new_entry)
+		return -ENOMEM;
+
+	new_entry->start = start;
+	new_entry->end = end;
+	new_entry->type = actual_type;
+
+	if (ret_type)
+		*ret_type = actual_type;
+
+	spin_lock(&memtype_lock);
+
+	/* Search for existing mapping that overlaps the current range */
+	list_for_each_entry(parse, &memtype_list, nd) {
+		struct memtype *saved_ptr;
+
+		if (parse->start >= end) {
+			printk("New Entry\n");
+			list_add(&new_entry->nd, parse->nd.prev);
+			new_entry = NULL;
+			break;
+		}
+
+		if (start <= parse->start && end >= parse->start) {
+			if (actual_type != parse->type && ret_type) {
+				actual_type = parse->type;
+				*ret_type = actual_type;
+				new_entry->type = actual_type;
+			}
+
+			if (actual_type != parse->type) {
+				printk(
+		KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
+					current->comm, current->pid,
+					start, end,
+					cattr_name(actual_type),
+					cattr_name(parse->type));
+				err = -EBUSY;
+				break;
+			}
+
+			saved_ptr = parse;
+			/*
+			 * Check to see whether the request overlaps more
+			 * than one entry in the list
+			 */
+			list_for_each_entry_continue(parse, &memtype_list, nd) {
+				if (end <= parse->start) {
+					break;
+				}
+
+				if (actual_type != parse->type) {
+					printk(
+		KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
+						current->comm, current->pid,
+						start, end,
+						cattr_name(actual_type),
+						cattr_name(parse->type));
+					err = -EBUSY;
+					break;
+				}
+			}
+
+			if (err) {
+				break;
+			}
+
+			printk("Overlap at 0x%Lx-0x%Lx\n",
+			       saved_ptr->start, saved_ptr->end);
+			/* No conflict. Go ahead and add this new entry */
+			list_add(&new_entry->nd, saved_ptr->nd.prev);
+			new_entry = NULL;
+			break;
+		}
+
+		if (start < parse->end) {
+			if (actual_type != parse->type && ret_type) {
+				actual_type = parse->type;
+				*ret_type = actual_type;
+				new_entry->type = actual_type;
+			}
+
+			if (actual_type != parse->type) {
+				printk(
+		KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
+					current->comm, current->pid,
+					start, end,
+					cattr_name(actual_type),
+					cattr_name(parse->type));
+				err = -EBUSY;
+				break;
+			}
+
+			saved_ptr = parse;
+			/*
+			 * Check to see whether the request overlaps more
+			 * than one entry in the list
+			 */
+			list_for_each_entry_continue(parse, &memtype_list, nd) {
+				if (end <= parse->start) {
+					break;
+				}
+
+				if (actual_type != parse->type) {
+					printk(
+		KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
+						current->comm, current->pid,
+						start, end,
+						cattr_name(actual_type),
+						cattr_name(parse->type));
+					err = -EBUSY;
+					break;
+				}
+			}
+
+			if (err) {
+				break;
+			}
+
+			printk("Overlap at 0x%Lx-0x%Lx\n",
+			       saved_ptr->start, saved_ptr->end);
+			/* No conflict. Go ahead and add this new entry */
+			list_add(&new_entry->nd, &saved_ptr->nd);
+			new_entry = NULL;
+			break;
+		}
+	}
+
+	if (err) {
+		printk(
+	"reserve_memtype failed 0x%Lx-0x%Lx, track %s, req %s\n",
+			start, end, cattr_name(new_entry->type),
+			cattr_name(req_type));
+		kfree(new_entry);
+		spin_unlock(&memtype_lock);
+		return err;
+	}
+
+	if (new_entry) {
+		/* No conflict. Not yet added to the list. Add to the tail */
+		list_add_tail(&new_entry->nd, &memtype_list);
+		printk("New Entry\n");
+  	}
+
+	if (ret_type) {
+		printk(
+	"reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n",
+			start, end, cattr_name(actual_type),
+			cattr_name(req_type), cattr_name(*ret_type));
+	} else {
+		printk(
+	"reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s\n",
+			start, end, cattr_name(actual_type),
+			cattr_name(req_type));
+	}
+
+	spin_unlock(&memtype_lock);
+	return err;
+}
+
+int free_memtype(u64 start, u64 end)
+{
+	struct memtype *ml;
+	int err = -EINVAL;
+
+	/* Only track when pat_wc_enabled */
+	if (!pat_wc_enabled) {
+		return 0;
+	}
+
+	/* Low ISA region is always mapped WB. No need to track */
+	if (start >= ISA_START_ADDRESS && end <= ISA_END_ADDRESS) {
+		return 0;
+	}
+
+	spin_lock(&memtype_lock);
+	list_for_each_entry(ml, &memtype_list, nd) {
+		if (ml->start == start && ml->end == end) {
+			list_del(&ml->nd);
+			kfree(ml);
+			err = 0;
+			break;
+		}
+	}
+	spin_unlock(&memtype_lock);
+
+	if (err) {
+		printk(KERN_DEBUG "%s:%d freeing invalid memtype %Lx-%Lx\n",
+			current->comm, current->pid, start, end);
+	}
+
+	printk( "free_memtype request 0x%Lx-0x%Lx\n", start, end);
+	return err;
+}
+
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 2f9e9afcb9f4..3165ec0672bd 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -36,7 +36,6 @@ void show_mem(void)
 
 	printk(KERN_INFO "Mem-info:\n");
 	show_free_areas();
-	printk(KERN_INFO "Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
 	for_each_online_pgdat(pgdat) {
 		pgdat_resize_lock(pgdat, &flags);
 		for (i = 0; i < pgdat->node_spanned_pages; ++i) {
@@ -381,3 +380,10 @@ void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
 }
 
 #endif
+
+int pmd_bad(pmd_t pmd)
+{
+	WARN_ON_ONCE(pmd_bad_v1(pmd) != pmd_bad_v2(pmd));
+
+	return pmd_bad_v1(pmd);
+}
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 845001c617cc..1bae9c855ceb 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -20,6 +20,7 @@
 #include <asm/proto.h>
 #include <asm/numa.h>
 #include <asm/e820.h>
+#include <asm/genapic.h>
 
 int acpi_numa __initdata;
 
@@ -132,7 +133,6 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
 	int pxm, node;
 	int apic_id;
 
-	apic_id = pa->apic_id;
 	if (srat_disabled())
 		return;
 	if (pa->header.length != sizeof(struct acpi_srat_cpu_affinity)) {
@@ -148,6 +148,11 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
 		bad_srat();
 		return;
 	}
+
+	if (is_uv_system())
+		apic_id = (pa->apic_id << 8) | pa->local_sapic_eid;
+	else
+		apic_id = pa->apic_id;
 	apicid_to_node[apic_id] = node;
 	acpi_numa = 1;
 	printk(KERN_INFO "SRAT: PXM %u -> APIC %u -> Node %u\n",
diff --git a/arch/x86/oprofile/init.c b/arch/x86/oprofile/init.c
index 5341d481d92f..cdfe4c54deca 100644
--- a/arch/x86/oprofile/init.c
+++ b/arch/x86/oprofile/init.c
@@ -10,18 +10,19 @@
 #include <linux/oprofile.h>
 #include <linux/init.h>
 #include <linux/errno.h>
- 
-/* We support CPUs that have performance counters like the Pentium Pro
+
+/*
+ * We support CPUs that have performance counters like the Pentium Pro
  * with the NMI mode driver.
  */
- 
-extern int op_nmi_init(struct oprofile_operations * ops);
-extern int op_nmi_timer_init(struct oprofile_operations * ops);
+
+extern int op_nmi_init(struct oprofile_operations *ops);
+extern int op_nmi_timer_init(struct oprofile_operations *ops);
 extern void op_nmi_exit(void);
 extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth);
 
 
-int __init oprofile_arch_init(struct oprofile_operations * ops)
+int __init oprofile_arch_init(struct oprofile_operations *ops)
 {
 	int ret;
 
diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c
index 1418e36ae7ab..e3ecb71b5790 100644
--- a/arch/x86/oprofile/nmi_timer_int.c
+++ b/arch/x86/oprofile/nmi_timer_int.c
@@ -17,14 +17,14 @@
 #include <asm/nmi.h>
 #include <asm/apic.h>
 #include <asm/ptrace.h>
- 
+
 static int profile_timer_exceptions_notify(struct notifier_block *self,
 					   unsigned long val, void *data)
 {
 	struct die_args *args = (struct die_args *)data;
 	int ret = NOTIFY_DONE;
 
-	switch(val) {
+	switch (val) {
 	case DIE_NMI:
 		oprofile_add_sample(args->regs, 0);
 		ret = NOTIFY_STOP;
@@ -56,7 +56,7 @@ static void timer_stop(void)
 }
 
 
-int __init op_nmi_timer_init(struct oprofile_operations * ops)
+int __init op_nmi_timer_init(struct oprofile_operations *ops)
 {
 	if ((nmi_watchdog != NMI_IO_APIC) || (atomic_read(&nmi_active) <= 0))
 		return -ENODEV;
diff --git a/arch/x86/oprofile/op_model_athlon.c b/arch/x86/oprofile/op_model_athlon.c
index c3ee43333f26..3d534879a9dc 100644
--- a/arch/x86/oprofile/op_model_athlon.c
+++ b/arch/x86/oprofile/op_model_athlon.c
@@ -1,4 +1,4 @@
-/**
+/*
  * @file op_model_athlon.h
  * athlon / K7 / K8 / Family 10h model-specific MSR operations
  *
@@ -14,28 +14,28 @@
 #include <asm/ptrace.h>
 #include <asm/msr.h>
 #include <asm/nmi.h>
- 
+
 #include "op_x86_model.h"
 #include "op_counter.h"
 
 #define NUM_COUNTERS 4
 #define NUM_CONTROLS 4
 
-#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0)
-#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
-#define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1);} while (0)
+#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
+#define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0)
+#define CTR_WRITE(l, msrs, c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1); } while (0)
 #define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
 
-#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0)
-#define CTRL_READ(l,h,msrs,c) do {rdmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
-#define CTRL_WRITE(l,h,msrs,c) do {wrmsr(msrs->controls[(c)].addr, (l), (h));} while (0)
+#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
+#define CTRL_READ(l, h, msrs, c) do {rdmsr(msrs->controls[(c)].addr, (l), (h)); } while (0)
+#define CTRL_WRITE(l, h, msrs, c) do {wrmsr(msrs->controls[(c)].addr, (l), (h)); } while (0)
 #define CTRL_SET_ACTIVE(n) (n |= (1<<22))
 #define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
 #define CTRL_CLEAR_LO(x) (x &= (1<<21))
 #define CTRL_CLEAR_HI(x) (x &= 0xfffffcf0)
 #define CTRL_SET_ENABLE(val) (val |= 1<<20)
-#define CTRL_SET_USR(val,u) (val |= ((u & 1) << 16))
-#define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17))
+#define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16))
+#define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17))
 #define CTRL_SET_UM(val, m) (val |= (m << 8))
 #define CTRL_SET_EVENT_LOW(val, e) (val |= (e & 0xff))
 #define CTRL_SET_EVENT_HIGH(val, e) (val |= ((e >> 8) & 0xf))
@@ -43,19 +43,19 @@
 #define CTRL_SET_GUEST_ONLY(val, h) (val |= ((h & 1) << 8))
 
 static unsigned long reset_value[NUM_COUNTERS];
- 
+
 static void athlon_fill_in_addresses(struct op_msrs * const msrs)
 {
 	int i;
 
-	for (i=0; i < NUM_COUNTERS; i++) {
+	for (i = 0; i < NUM_COUNTERS; i++) {
 		if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
 			msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
 		else
 			msrs->counters[i].addr = 0;
 	}
 
-	for (i=0; i < NUM_CONTROLS; i++) {
+	for (i = 0; i < NUM_CONTROLS; i++) {
 		if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i))
 			msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
 		else
@@ -63,15 +63,15 @@ static void athlon_fill_in_addresses(struct op_msrs * const msrs)
 	}
 }
 
- 
+
 static void athlon_setup_ctrs(struct op_msrs const * const msrs)
 {
 	unsigned int low, high;
 	int i;
- 
+
 	/* clear all counters */
 	for (i = 0 ; i < NUM_CONTROLS; ++i) {
-		if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
+		if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
 			continue;
 		CTRL_READ(low, high, msrs, i);
 		CTRL_CLEAR_LO(low);
@@ -81,14 +81,14 @@ static void athlon_setup_ctrs(struct op_msrs const * const msrs)
 
 	/* avoid a false detection of ctr overflows in NMI handler */
 	for (i = 0; i < NUM_COUNTERS; ++i) {
-		if (unlikely(!CTR_IS_RESERVED(msrs,i)))
+		if (unlikely(!CTR_IS_RESERVED(msrs, i)))
 			continue;
 		CTR_WRITE(1, msrs, i);
 	}
 
 	/* enable active counters */
 	for (i = 0; i < NUM_COUNTERS; ++i) {
-		if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs,i))) {
+		if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) {
 			reset_value[i] = counter_config[i].count;
 
 			CTR_WRITE(counter_config[i].count, msrs, i);
@@ -112,7 +112,7 @@ static void athlon_setup_ctrs(struct op_msrs const * const msrs)
 	}
 }
 
- 
+
 static int athlon_check_ctrs(struct pt_regs * const regs,
 			     struct op_msrs const * const msrs)
 {
@@ -133,7 +133,7 @@ static int athlon_check_ctrs(struct pt_regs * const regs,
 	return 1;
 }
 
- 
+
 static void athlon_start(struct op_msrs const * const msrs)
 {
 	unsigned int low, high;
@@ -150,7 +150,7 @@ static void athlon_start(struct op_msrs const * const msrs)
 
 static void athlon_stop(struct op_msrs const * const msrs)
 {
-	unsigned int low,high;
+	unsigned int low, high;
 	int i;
 
 	/* Subtle: stop on all counters to avoid race with
@@ -169,11 +169,11 @@ static void athlon_shutdown(struct op_msrs const * const msrs)
 	int i;
 
 	for (i = 0 ; i < NUM_COUNTERS ; ++i) {
-		if (CTR_IS_RESERVED(msrs,i))
+		if (CTR_IS_RESERVED(msrs, i))
 			release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
 	}
 	for (i = 0 ; i < NUM_CONTROLS ; ++i) {
-		if (CTRL_IS_RESERVED(msrs,i))
+		if (CTRL_IS_RESERVED(msrs, i))
 			release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
 	}
 }
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index c554f52cb808..eff431f6c57b 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -1,4 +1,4 @@
-/**
+/*
  * @file op_model_ppro.h
  * pentium pro / P6 model-specific MSR operations
  *
@@ -15,45 +15,45 @@
 #include <asm/msr.h>
 #include <asm/apic.h>
 #include <asm/nmi.h>
- 
+
 #include "op_x86_model.h"
 #include "op_counter.h"
 
 #define NUM_COUNTERS 2
 #define NUM_CONTROLS 2
 
-#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0)
-#define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0)
-#define CTR_32BIT_WRITE(l,msrs,c)	\
-	do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), 0);} while (0)
+#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0)
+#define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0)
+#define CTR_32BIT_WRITE(l, msrs, c)	\
+	do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), 0); } while (0)
 #define CTR_OVERFLOWED(n) (!((n) & (1U<<31)))
 
-#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0)
-#define CTRL_READ(l,h,msrs,c) do {rdmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
-#define CTRL_WRITE(l,h,msrs,c) do {wrmsr((msrs->controls[(c)].addr), (l), (h));} while (0)
+#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0)
+#define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0)
+#define CTRL_WRITE(l, h, msrs, c) do {wrmsr((msrs->controls[(c)].addr), (l), (h)); } while (0)
 #define CTRL_SET_ACTIVE(n) (n |= (1<<22))
 #define CTRL_SET_INACTIVE(n) (n &= ~(1<<22))
 #define CTRL_CLEAR(x) (x &= (1<<21))
 #define CTRL_SET_ENABLE(val) (val |= 1<<20)
-#define CTRL_SET_USR(val,u) (val |= ((u & 1) << 16))
-#define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17))
+#define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16))
+#define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17))
 #define CTRL_SET_UM(val, m) (val |= (m << 8))
 #define CTRL_SET_EVENT(val, e) (val |= e)
 
 static unsigned long reset_value[NUM_COUNTERS];
- 
+
 static void ppro_fill_in_addresses(struct op_msrs * const msrs)
 {
 	int i;
 
-	for (i=0; i < NUM_COUNTERS; i++) {
+	for (i = 0; i < NUM_COUNTERS; i++) {
 		if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i))
 			msrs->counters[i].addr = MSR_P6_PERFCTR0 + i;
 		else
 			msrs->counters[i].addr = 0;
 	}
-	
-	for (i=0; i < NUM_CONTROLS; i++) {
+
+	for (i = 0; i < NUM_CONTROLS; i++) {
 		if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i))
 			msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i;
 		else
@@ -69,23 +69,23 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
 
 	/* clear all counters */
 	for (i = 0 ; i < NUM_CONTROLS; ++i) {
-		if (unlikely(!CTRL_IS_RESERVED(msrs,i)))
+		if (unlikely(!CTRL_IS_RESERVED(msrs, i)))
 			continue;
 		CTRL_READ(low, high, msrs, i);
 		CTRL_CLEAR(low);
 		CTRL_WRITE(low, high, msrs, i);
 	}
-	
+
 	/* avoid a false detection of ctr overflows in NMI handler */
 	for (i = 0; i < NUM_COUNTERS; ++i) {
-		if (unlikely(!CTR_IS_RESERVED(msrs,i)))
+		if (unlikely(!CTR_IS_RESERVED(msrs, i)))
 			continue;
 		CTR_32BIT_WRITE(1, msrs, i);
 	}
 
 	/* enable active counters */
 	for (i = 0; i < NUM_COUNTERS; ++i) {
-		if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs,i))) {
+		if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) {
 			reset_value[i] = counter_config[i].count;
 
 			CTR_32BIT_WRITE(counter_config[i].count, msrs, i);
@@ -104,13 +104,13 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
 	}
 }
 
- 
+
 static int ppro_check_ctrs(struct pt_regs * const regs,
 			   struct op_msrs const * const msrs)
 {
 	unsigned int low, high;
 	int i;
- 
+
 	for (i = 0 ; i < NUM_COUNTERS; ++i) {
 		if (!reset_value[i])
 			continue;
@@ -135,10 +135,10 @@ static int ppro_check_ctrs(struct pt_regs * const regs,
 	return 1;
 }
 
- 
+
 static void ppro_start(struct op_msrs const * const msrs)
 {
-	unsigned int low,high;
+	unsigned int low, high;
 	int i;
 
 	for (i = 0; i < NUM_COUNTERS; ++i) {
@@ -153,7 +153,7 @@ static void ppro_start(struct op_msrs const * const msrs)
 
 static void ppro_stop(struct op_msrs const * const msrs)
 {
-	unsigned int low,high;
+	unsigned int low, high;
 	int i;
 
 	for (i = 0; i < NUM_COUNTERS; ++i) {
@@ -170,11 +170,11 @@ static void ppro_shutdown(struct op_msrs const * const msrs)
 	int i;
 
 	for (i = 0 ; i < NUM_COUNTERS ; ++i) {
-		if (CTR_IS_RESERVED(msrs,i))
+		if (CTR_IS_RESERVED(msrs, i))
 			release_perfctr_nmi(MSR_P6_PERFCTR0 + i);
 	}
 	for (i = 0 ; i < NUM_CONTROLS ; ++i) {
-		if (CTRL_IS_RESERVED(msrs,i))
+		if (CTRL_IS_RESERVED(msrs, i))
 			release_evntsel_nmi(MSR_P6_EVNTSEL0 + i);
 	}
 }
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 103b9dff1213..2ead72363077 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -30,6 +30,9 @@
 #include <linux/init.h>
 #include <linux/ioport.h>
 #include <linux/errno.h>
+#include <linux/bootmem.h>
+
+#include <asm/pat.h>
 
 #include "pci.h"
 
@@ -297,10 +300,35 @@ void pcibios_set_master(struct pci_dev *dev)
 	pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat);
 }
 
+static void pci_unmap_page_range(struct vm_area_struct *vma)
+{
+	u64 addr = (u64)vma->vm_pgoff << PAGE_SHIFT;
+	free_memtype(addr, addr + vma->vm_end - vma->vm_start);
+}
+
+static void pci_track_mmap_page_range(struct vm_area_struct *vma)
+{
+	u64 addr = (u64)vma->vm_pgoff << PAGE_SHIFT;
+	unsigned long flags = pgprot_val(vma->vm_page_prot)
+						& _PAGE_CACHE_MASK;
+
+	reserve_memtype(addr, addr + vma->vm_end - vma->vm_start, flags, NULL);
+}
+
+static struct vm_operations_struct pci_mmap_ops = {
+	.open  = pci_track_mmap_page_range,
+	.close = pci_unmap_page_range,
+};
+
 int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
 			enum pci_mmap_state mmap_state, int write_combine)
 {
 	unsigned long prot;
+	u64 addr = vma->vm_pgoff << PAGE_SHIFT;
+	unsigned long len = vma->vm_end - vma->vm_start;
+	unsigned long flags;
+	unsigned long new_flags;
+	int retval;
 
 	/* I/O space cannot be accessed via normal processor loads and
 	 * stores on this platform.
@@ -308,21 +336,50 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
 	if (mmap_state == pci_mmap_io)
 		return -EINVAL;
 
-	/* Leave vm_pgoff as-is, the PCI space address is the physical
-	 * address on this platform.
-	 */
 	prot = pgprot_val(vma->vm_page_prot);
-	if (boot_cpu_data.x86 > 3)
-		prot |= _PAGE_PCD | _PAGE_PWT;
+	if (pat_wc_enabled && write_combine)
+		prot |= _PAGE_CACHE_WC;
+	else if (boot_cpu_data.x86 > 3)
+		prot |= _PAGE_CACHE_UC;
+
 	vma->vm_page_prot = __pgprot(prot);
 
-	/* Write-combine setting is ignored, it is changed via the mtrr
-	 * interfaces on this platform.
-	 */
+	flags = pgprot_val(vma->vm_page_prot) & _PAGE_CACHE_MASK;
+	retval = reserve_memtype(addr, addr + len, flags, &new_flags);
+	if (retval)
+		return retval;
+
+	if (flags != new_flags) {
+		/*
+		 * Do not fallback to certain memory types with certain
+		 * requested type:
+		 * - request is uncached, return cannot be write-back
+		 * - request is uncached, return cannot be write-combine
+		 * - request is write-combine, return cannot be write-back
+		 */
+		if ((flags == _PAGE_CACHE_UC &&
+		     (new_flags == _PAGE_CACHE_WB ||
+		      new_flags == _PAGE_CACHE_WC)) ||
+		    (flags == _PAGE_CACHE_WC &&
+		     new_flags == _PAGE_CACHE_WB)) {
+			free_memtype(addr, addr+len);
+			return -EINVAL;
+		}
+		flags = new_flags;
+	}
+
+	if (vma->vm_pgoff <= max_pfn_mapped &&
+	    ioremap_change_attr((unsigned long)__va(addr), len, flags)) {
+		free_memtype(addr, addr + len);
+		return -EINVAL;
+	}
+
 	if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
 			       vma->vm_end - vma->vm_start,
 			       vma->vm_page_prot))
 		return -EAGAIN;
 
+	vma->vm_ops = &pci_mmap_ops;
+
 	return 0;
 }
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index a8715861877e..579745ca6b66 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -200,7 +200,7 @@ static int pirq_ali_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
 {
 	static const unsigned char irqmap[16] = { 0, 9, 3, 10, 4, 5, 7, 6, 1, 11, 0, 12, 0, 14, 0, 15 };
 
-	WARN_ON_ONCE(pirq >= 16);
+	WARN_ON_ONCE(pirq > 16);
 	return irqmap[read_config_nybble(router, 0x48, pirq-1)];
 }
 
@@ -209,7 +209,7 @@ static int pirq_ali_set(struct pci_dev *router, struct pci_dev *dev, int pirq, i
 	static const unsigned char irqmap[16] = { 0, 8, 0, 2, 4, 5, 7, 6, 0, 1, 3, 9, 11, 0, 13, 15 };
 	unsigned int val = irqmap[irq];
 
-	WARN_ON_ONCE(pirq >= 16);
+	WARN_ON_ONCE(pirq > 16);
 	if (val) {
 		write_config_nybble(router, 0x48, pirq-1, val);
 		return 1;
@@ -260,7 +260,7 @@ static int pirq_via586_get(struct pci_dev *router, struct pci_dev *dev, int pirq
 {
 	static const unsigned int pirqmap[5] = { 3, 2, 5, 1, 1 };
 
-	WARN_ON_ONCE(pirq >= 5);
+	WARN_ON_ONCE(pirq > 5);
 	return read_config_nybble(router, 0x55, pirqmap[pirq-1]);
 }
 
@@ -268,7 +268,7 @@ static int pirq_via586_set(struct pci_dev *router, struct pci_dev *dev, int pirq
 {
 	static const unsigned int pirqmap[5] = { 3, 2, 5, 1, 1 };
 
-	WARN_ON_ONCE(pirq >= 5);
+	WARN_ON_ONCE(pirq > 5);
 	write_config_nybble(router, 0x55, pirqmap[pirq-1], irq);
 	return 1;
 }
@@ -282,7 +282,7 @@ static int pirq_ite_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
 {
 	static const unsigned char pirqmap[4] = { 1, 0, 2, 3 };
 
-	WARN_ON_ONCE(pirq >= 4);
+	WARN_ON_ONCE(pirq > 4);
 	return read_config_nybble(router,0x43, pirqmap[pirq-1]);
 }
 
@@ -290,7 +290,7 @@ static int pirq_ite_set(struct pci_dev *router, struct pci_dev *dev, int pirq, i
 {
 	static const unsigned char pirqmap[4] = { 1, 0, 2, 3 };
 
-	WARN_ON_ONCE(pirq >= 4);
+	WARN_ON_ONCE(pirq > 4);
 	write_config_nybble(router, 0x43, pirqmap[pirq-1], irq);
 	return 1;
 }
diff --git a/arch/x86/pci/numa.c b/arch/x86/pci/numa.c
index 55270c26237c..d9afbae5092b 100644
--- a/arch/x86/pci/numa.c
+++ b/arch/x86/pci/numa.c
@@ -11,11 +11,41 @@
 #define XQUAD_PORTIO_BASE 0xfe400000
 #define XQUAD_PORTIO_QUAD 0x40000  /* 256k per quad. */
 
+int mp_bus_id_to_node[MAX_MP_BUSSES];
 #define BUS2QUAD(global) (mp_bus_id_to_node[global])
+
+int mp_bus_id_to_local[MAX_MP_BUSSES];
 #define BUS2LOCAL(global) (mp_bus_id_to_local[global])
+
+void mpc_oem_bus_info(struct mpc_config_bus *m, char *name,
+	struct mpc_config_translation *translation)
+{
+	int quad = translation->trans_quad;
+	int local = translation->trans_local;
+
+	mp_bus_id_to_node[m->mpc_busid] = quad;
+	mp_bus_id_to_local[m->mpc_busid] = local;
+	printk(KERN_INFO "Bus #%d is %s (node %d)\n",
+	       m->mpc_busid, name, quad);
+}
+
+int quad_local_to_mp_bus_id [NR_CPUS/4][4];
 #define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local])
+void mpc_oem_pci_bus(struct mpc_config_bus *m,
+	struct mpc_config_translation *translation)
+{
+	int quad = translation->trans_quad;
+	int local = translation->trans_local;
+
+	quad_local_to_mp_bus_id[quad][local] = m->mpc_busid;
+}
+
+/* Where the IO area was mapped on multiquad, always 0 otherwise */
+void *xquad_portio;
+#ifdef CONFIG_X86_NUMAQ
+EXPORT_SYMBOL(xquad_portio);
+#endif
 
-extern void *xquad_portio;    /* Where the IO area was mapped */
 #define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port)
 
 #define PCI_CONF1_MQ_ADDRESS(bus, devfn, reg) \
diff --git a/arch/x86/power/cpu_32.c b/arch/x86/power/cpu_32.c
index 7f9c6da04a4c..7dc5d5cf50a2 100644
--- a/arch/x86/power/cpu_32.c
+++ b/arch/x86/power/cpu_32.c
@@ -27,17 +27,17 @@ static void __save_processor_state(struct saved_context *ctxt)
 	/*
 	 * descriptor tables
 	 */
- 	store_gdt(&ctxt->gdt);
- 	store_idt(&ctxt->idt);
- 	store_tr(ctxt->tr);
+	store_gdt(&ctxt->gdt);
+	store_idt(&ctxt->idt);
+	store_tr(ctxt->tr);
 
 	/*
 	 * segment registers
 	 */
- 	savesegment(es, ctxt->es);
- 	savesegment(fs, ctxt->fs);
- 	savesegment(gs, ctxt->gs);
- 	savesegment(ss, ctxt->ss);
+	savesegment(es, ctxt->es);
+	savesegment(fs, ctxt->fs);
+	savesegment(gs, ctxt->gs);
+	savesegment(ss, ctxt->ss);
 
 	/*
 	 * control registers
@@ -48,10 +48,12 @@ static void __save_processor_state(struct saved_context *ctxt)
 	ctxt->cr4 = read_cr4();
 }
 
+/* Needed by apm.c */
 void save_processor_state(void)
 {
 	__save_processor_state(&saved_context);
 }
+EXPORT_SYMBOL(save_processor_state);
 
 static void do_fpu_end(void)
 {
@@ -64,9 +66,14 @@ static void do_fpu_end(void)
 static void fix_processor_context(void)
 {
 	int cpu = smp_processor_id();
-	struct tss_struct * t = &per_cpu(init_tss, cpu);
+	struct tss_struct *t = &per_cpu(init_tss, cpu);
 
-	set_tss_desc(cpu,t);	/* This just modifies memory; should not be necessary. But... This is necessary, because 386 hardware has concept of busy TSS or some similar stupidity. */
+	set_tss_desc(cpu, t);	/*
+				 * This just modifies memory; should not be
+				 * necessary. But... This is necessary, because
+				 * 386 hardware has concept of busy TSS or some
+				 * similar stupidity.
+				 */
 
 	load_TR_desc();				/* This does ltr */
 	load_LDT(&current->active_mm->context);	/* This does lldt */
@@ -100,16 +107,16 @@ static void __restore_processor_state(struct saved_context *ctxt)
 	 * now restore the descriptor tables to their proper values
 	 * ltr is done i fix_processor_context().
 	 */
- 	load_gdt(&ctxt->gdt);
- 	load_idt(&ctxt->idt);
+	load_gdt(&ctxt->gdt);
+	load_idt(&ctxt->idt);
 
 	/*
 	 * segment registers
 	 */
- 	loadsegment(es, ctxt->es);
- 	loadsegment(fs, ctxt->fs);
- 	loadsegment(gs, ctxt->gs);
- 	loadsegment(ss, ctxt->ss);
+	loadsegment(es, ctxt->es);
+	loadsegment(fs, ctxt->fs);
+	loadsegment(gs, ctxt->gs);
+	loadsegment(ss, ctxt->ss);
 
 	/*
 	 * sysenter MSRs
@@ -123,11 +130,9 @@ static void __restore_processor_state(struct saved_context *ctxt)
 	mcheck_init(&boot_cpu_data);
 }
 
+/* Needed by apm.c */
 void restore_processor_state(void)
 {
 	__restore_processor_state(&saved_context);
 }
-
-/* Needed by apm.c */
-EXPORT_SYMBOL(save_processor_state);
 EXPORT_SYMBOL(restore_processor_state);
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 0a8f4742ef51..17a6b057856b 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -39,7 +39,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
 
 CFL := $(PROFILING) -mcmodel=small -fPIC -g0 -O2 -fasynchronous-unwind-tables -m64
 
-$(vobjs): KBUILD_CFLAGS = $(CFL)
+$(vobjs): KBUILD_CFLAGS += $(CFL)
 
 targets += vdso-syms.lds
 obj-$(VDSO64-y)			+= vdso-syms.lds
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index 348f1341e1c8..e2af8eee80e3 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -210,8 +210,12 @@ static int use_sysenter __read_mostly = -1;
 /* May not be __init: called during resume */
 void syscall32_cpu_init(void)
 {
-	if (use_sysenter < 0)
-		use_sysenter = (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL);
+	if (use_sysenter < 0) {
+		if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+			use_sysenter = 1;
+		if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR)
+			use_sysenter = 1;
+	}
 
 	/* Load these always in case some future AMD CPU supports
 	   SYSENTER from compat mode too. */
@@ -325,6 +329,9 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
 	int ret = 0;
 	bool compat;
 
+	if (vdso_enabled == VDSO_DISABLED)
+		return 0;
+
 	down_write(&mm->mmap_sem);
 
 	/* Test compat mode once here, in case someone
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 27ee26aedf94..c0388220cf97 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -25,6 +25,7 @@
 #include <linux/mm.h>
 #include <linux/page-flags.h>
 #include <linux/highmem.h>
+#include <linux/console.h>
 
 #include <xen/interface/xen.h>
 #include <xen/interface/physdev.h>
@@ -889,7 +890,6 @@ void __init xen_setup_vcpu_info_placement(void)
 		pv_irq_ops.irq_disable = xen_irq_disable_direct;
 		pv_irq_ops.irq_enable = xen_irq_enable_direct;
 		pv_mmu_ops.read_cr2 = xen_read_cr2_direct;
-		pv_cpu_ops.iret = xen_iret_direct;
 	}
 }
 
@@ -993,7 +993,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
 	.read_tsc = native_read_tsc,
 	.read_pmc = native_read_pmc,
 
-	.iret = (void *)&hypercall_page[__HYPERVISOR_iret],
+	.iret = xen_iret,
 	.irq_enable_syscall_ret = NULL,  /* never called */
 
 	.load_tr_desc = paravirt_nop,
@@ -1228,6 +1228,9 @@ asmlinkage void __init xen_start_kernel(void)
 		? __pa(xen_start_info->mod_start) : 0;
 	boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
 
+	if (!is_initial_xendomain())
+		add_preferred_console("hvc", 0, NULL);
+
 	/* Start the world */
 	start_kernel();
 }
diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c
index 5e6f36f6d876..5791eb2e3750 100644
--- a/arch/x86/xen/multicalls.c
+++ b/arch/x86/xen/multicalls.c
@@ -76,7 +76,7 @@ void xen_mc_flush(void)
 		if (ret) {
 			printk(KERN_ERR "%d multicall(s) failed: cpu %d\n",
 			       ret, smp_processor_id());
-			for(i = 0; i < b->mcidx; i++) {
+			for (i = 0; i < b->mcidx; i++) {
 				printk("  call %2d/%d: op=%lu arg=[%lx] result=%ld\n",
 				       i+1, b->mcidx,
 				       b->debug[i].op,
@@ -93,7 +93,7 @@ void xen_mc_flush(void)
 
 	local_irq_restore(flags);
 
-	for(i = 0; i < b->cbidx; i++) {
+	for (i = 0; i < b->cbidx; i++) {
 		struct callback *cb = &b->callbacks[i];
 
 		(*cb->fn)(cb->data);
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index aafc54437403..e340ff92f6b6 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -35,7 +35,7 @@
 #include "xen-ops.h"
 #include "mmu.h"
 
-static cpumask_t cpu_initialized_map;
+static cpumask_t xen_cpu_initialized_map;
 static DEFINE_PER_CPU(int, resched_irq);
 static DEFINE_PER_CPU(int, callfunc_irq);
 
@@ -179,7 +179,7 @@ void __init xen_smp_prepare_cpus(unsigned int max_cpus)
 	if (xen_smp_intr_init(0))
 		BUG();
 
-	cpu_initialized_map = cpumask_of_cpu(0);
+	xen_cpu_initialized_map = cpumask_of_cpu(0);
 
 	/* Restrict the possible_map according to max_cpus. */
 	while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
@@ -210,7 +210,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
 	struct vcpu_guest_context *ctxt;
 	struct gdt_page *gdt = &per_cpu(gdt_page, cpu);
 
-	if (cpu_test_and_set(cpu, cpu_initialized_map))
+	if (cpu_test_and_set(cpu, xen_cpu_initialized_map))
 		return 0;
 
 	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index 6b7190449d07..fe161ed4b01e 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -135,13 +135,8 @@ ENDPATCH(xen_restore_fl_direct)
 	current stack state in whatever form its in, we keep things
 	simple by only using a single register which is pushed/popped
 	on the stack.
-
-	Non-direct iret could be done in the same way, but it would
-	require an annoying amount of code duplication.  We'll assume
-	that direct mode will be the common case once the hypervisor
-	support becomes commonplace.
  */
-ENTRY(xen_iret_direct)
+ENTRY(xen_iret)
 	/* test eflags for special cases */
 	testl $(X86_EFLAGS_VM | XEN_EFLAGS_NMI), 8(%esp)
 	jnz hyper_iret
@@ -155,9 +150,9 @@ ENTRY(xen_iret_direct)
 	GET_THREAD_INFO(%eax)
 	movl TI_cpu(%eax),%eax
 	movl __per_cpu_offset(,%eax,4),%eax
-	lea per_cpu__xen_vcpu_info(%eax),%eax
+	mov per_cpu__xen_vcpu(%eax),%eax
 #else
-	movl $per_cpu__xen_vcpu_info, %eax
+	movl per_cpu__xen_vcpu, %eax
 #endif
 
 	/* check IF state we're restoring */
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index b02a909bfd4c..956a491ea998 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -63,5 +63,5 @@ DECL_ASM(void, xen_irq_disable_direct, void);
 DECL_ASM(unsigned long, xen_save_fl_direct, void);
 DECL_ASM(void, xen_restore_fl_direct, unsigned long);
 
-void xen_iret_direct(void);
+void xen_iret(void);
 #endif /* XEN_OPS_H */
diff --git a/arch/xtensa/kernel/Makefile b/arch/xtensa/kernel/Makefile
index f582d6a24ec2..7419dbccf027 100644
--- a/arch/xtensa/kernel/Makefile
+++ b/arch/xtensa/kernel/Makefile
@@ -5,7 +5,7 @@
 extra-y := head.o vmlinux.lds
 
 
-obj-y := align.o entry.o irq.o coprocessor.o process.o ptrace.o semaphore.o  \
+obj-y := align.o entry.o irq.o coprocessor.o process.o ptrace.o \
 	 setup.o signal.o syscall.o time.o traps.o vectors.o platform.o  \
 	 pci-dma.o init_task.o io.o
 
diff --git a/arch/xtensa/kernel/semaphore.c b/arch/xtensa/kernel/semaphore.c
deleted file mode 100644
index 995c6410ae10..000000000000
--- a/arch/xtensa/kernel/semaphore.c
+++ /dev/null
@@ -1,226 +0,0 @@
-/*
- * arch/xtensa/kernel/semaphore.c
- *
- * Generic semaphore code. Buyer beware. Do your own specific changes
- * in <asm/semaphore-helper.h>
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2001 - 2005 Tensilica Inc.
- *
- * Joe Taylor	<joe@tensilica.com, joetylr@yahoo.com>
- * Chris Zankel	<chris@zankel.net>
- * Marc Gauthier<marc@tensilica.com, marc@alumni.uwaterloo.ca>
- * Kevin Chea
- */
-
-#include <linux/sched.h>
-#include <linux/wait.h>
-#include <linux/init.h>
-#include <asm/semaphore.h>
-#include <asm/errno.h>
-
-/*
- * These two _must_ execute atomically wrt each other.
- */
-
-static __inline__ void wake_one_more(struct semaphore * sem)
-{
-	atomic_inc((atomic_t *)&sem->sleepers);
-}
-
-static __inline__ int waking_non_zero(struct semaphore *sem)
-{
-	unsigned long flags;
-	int ret = 0;
-
-	spin_lock_irqsave(&semaphore_wake_lock, flags);
-	if (sem->sleepers > 0) {
-		sem->sleepers--;
-		ret = 1;
-	}
-	spin_unlock_irqrestore(&semaphore_wake_lock, flags);
-	return ret;
-}
-
-/*
- * waking_non_zero_interruptible:
- *	1	got the lock
- *	0	go to sleep
- *	-EINTR	interrupted
- *
- * We must undo the sem->count down_interruptible() increment while we are
- * protected by the spinlock in order to make atomic this atomic_inc() with the
- * atomic_read() in wake_one_more(), otherwise we can race. -arca
- */
-
-static __inline__ int waking_non_zero_interruptible(struct semaphore *sem,
-						struct task_struct *tsk)
-{
-	unsigned long flags;
-	int ret = 0;
-
-	spin_lock_irqsave(&semaphore_wake_lock, flags);
-	if (sem->sleepers > 0) {
-		sem->sleepers--;
-		ret = 1;
-	} else if (signal_pending(tsk)) {
-		atomic_inc(&sem->count);
-		ret = -EINTR;
-	}
-	spin_unlock_irqrestore(&semaphore_wake_lock, flags);
-	return ret;
-}
-
-/*
- * waking_non_zero_trylock:
- *	1	failed to lock
- *	0	got the lock
- *
- * We must undo the sem->count down_trylock() increment while we are
- * protected by the spinlock in order to make atomic this atomic_inc() with the
- * atomic_read() in wake_one_more(), otherwise we can race. -arca
- */
-
-static __inline__ int waking_non_zero_trylock(struct semaphore *sem)
-{
-	unsigned long flags;
-	int ret = 1;
-
-	spin_lock_irqsave(&semaphore_wake_lock, flags);
-	if (sem->sleepers <= 0)
-		atomic_inc(&sem->count);
-	else {
-		sem->sleepers--;
-		ret = 0;
-	}
-	spin_unlock_irqrestore(&semaphore_wake_lock, flags);
-	return ret;
-}
-
-DEFINE_SPINLOCK(semaphore_wake_lock);
-
-/*
- * Semaphores are implemented using a two-way counter:
- * The "count" variable is decremented for each process
- * that tries to sleep, while the "waking" variable is
- * incremented when the "up()" code goes to wake up waiting
- * processes.
- *
- * Notably, the inline "up()" and "down()" functions can
- * efficiently test if they need to do any extra work (up
- * needs to do something only if count was negative before
- * the increment operation.
- *
- * waking_non_zero() (from asm/semaphore.h) must execute
- * atomically.
- *
- * When __up() is called, the count was negative before
- * incrementing it, and we need to wake up somebody.
- *
- * This routine adds one to the count of processes that need to
- * wake up and exit.  ALL waiting processes actually wake up but
- * only the one that gets to the "waking" field first will gate
- * through and acquire the semaphore.  The others will go back
- * to sleep.
- *
- * Note that these functions are only called when there is
- * contention on the lock, and as such all this is the
- * "non-critical" part of the whole semaphore business. The
- * critical part is the inline stuff in <asm/semaphore.h>
- * where we want to avoid any extra jumps and calls.
- */
-
-void __up(struct semaphore *sem)
-{
-	wake_one_more(sem);
-	wake_up(&sem->wait);
-}
-
-/*
- * Perform the "down" function.  Return zero for semaphore acquired,
- * return negative for signalled out of the function.
- *
- * If called from __down, the return is ignored and the wait loop is
- * not interruptible.  This means that a task waiting on a semaphore
- * using "down()" cannot be killed until someone does an "up()" on
- * the semaphore.
- *
- * If called from __down_interruptible, the return value gets checked
- * upon return.  If the return value is negative then the task continues
- * with the negative value in the return register (it can be tested by
- * the caller).
- *
- * Either form may be used in conjunction with "up()".
- *
- */
-
-#define DOWN_VAR				\
-	struct task_struct *tsk = current;	\
-	wait_queue_t wait;			\
-	init_waitqueue_entry(&wait, tsk);
-
-#define DOWN_HEAD(task_state)						\
-									\
-									\
-	tsk->state = (task_state);					\
-	add_wait_queue(&sem->wait, &wait);				\
-									\
-	/*								\
-	 * Ok, we're set up.  sem->count is known to be less than zero	\
-	 * so we must wait.						\
-	 *								\
-	 * We can let go the lock for purposes of waiting.		\
-	 * We re-acquire it after awaking so as to protect		\
-	 * all semaphore operations.					\
-	 *								\
-	 * If "up()" is called before we call waking_non_zero() then	\
-	 * we will catch it right away.  If it is called later then	\
-	 * we will have to go through a wakeup cycle to catch it.	\
-	 *								\
-	 * Multiple waiters contend for the semaphore lock to see	\
-	 * who gets to gate through and who has to wait some more.	\
-	 */								\
-	for (;;) {
-
-#define DOWN_TAIL(task_state)			\
-		tsk->state = (task_state);	\
-	}					\
-	tsk->state = TASK_RUNNING;		\
-	remove_wait_queue(&sem->wait, &wait);
-
-void __sched __down(struct semaphore * sem)
-{
-	DOWN_VAR
-	DOWN_HEAD(TASK_UNINTERRUPTIBLE)
-	if (waking_non_zero(sem))
-		break;
-	schedule();
-	DOWN_TAIL(TASK_UNINTERRUPTIBLE)
-}
-
-int __sched __down_interruptible(struct semaphore * sem)
-{
-	int ret = 0;
-	DOWN_VAR
-	DOWN_HEAD(TASK_INTERRUPTIBLE)
-
-	ret = waking_non_zero_interruptible(sem, tsk);
-	if (ret)
-	{
-		if (ret == 1)
-			/* ret != 0 only if we get interrupted -arca */
-			ret = 0;
-		break;
-	}
-	schedule();
-	DOWN_TAIL(TASK_INTERRUPTIBLE)
-	return ret;
-}
-
-int __down_trylock(struct semaphore * sem)
-{
-	return waking_non_zero_trylock(sem);
-}
diff --git a/arch/xtensa/kernel/xtensa_ksyms.c b/arch/xtensa/kernel/xtensa_ksyms.c
index 60dbdb43fb4c..6e52cdd6166f 100644
--- a/arch/xtensa/kernel/xtensa_ksyms.c
+++ b/arch/xtensa/kernel/xtensa_ksyms.c
@@ -26,7 +26,6 @@
 #include <asm/io.h>
 #include <asm/page.h>
 #include <asm/pgalloc.h>
-#include <asm/semaphore.h>
 #ifdef CONFIG_BLK_DEV_FD
 #include <asm/floppy.h>
 #endif
@@ -71,14 +70,6 @@ EXPORT_SYMBOL(__umodsi3);
 EXPORT_SYMBOL(__udivdi3);
 EXPORT_SYMBOL(__umoddi3);
 
-/*
- * Semaphore operations
- */
-EXPORT_SYMBOL(__down);
-EXPORT_SYMBOL(__down_interruptible);
-EXPORT_SYMBOL(__down_trylock);
-EXPORT_SYMBOL(__up);
-
 #ifdef CONFIG_NET
 /*
  * Networking support