35 files changed, 1599 insertions, 1044 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 1d035082e78e..eb530b4128ba 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -146,6 +146,7 @@ config MATHEMU
 config COMPAT
 	bool "Kernel support for 31 bit emulation"
 	depends on 64BIT
+	select COMPAT_BINFMT_ELF
 	help
 	  Select this option if you want to enable your system kernel to
 	  handle system-calls from ELF binaries for 31 bit ESA.  This option
@@ -288,7 +289,7 @@ config WARN_STACK_SIZE
 	int "Maximum frame size considered safe (128-2048)"
 	range 128 2048
 	depends on WARN_STACK
-	default "256"
+	default "2048"
 	help
 	  This allows you to specify the maximum frame size a function may
 	  have without the compiler complaining about it.
@@ -304,10 +305,18 @@ config ARCH_SPARSEMEM_ENABLE
 	def_bool y
 	select SPARSEMEM_VMEMMAP_ENABLE
 	select SPARSEMEM_VMEMMAP
+	select SPARSEMEM_STATIC if !64BIT
 
 config ARCH_SPARSEMEM_DEFAULT
 	def_bool y
 
+config ARCH_SELECT_MEMORY_MODEL
+       def_bool y
+
+config ARCH_ENABLE_MEMORY_HOTPLUG
+	def_bool y
+	depends on SPARSEMEM
+
 source "mm/Kconfig"
 
 comment "I/O subsystem configuration"
@@ -340,6 +349,22 @@ config QDIO_DEBUG
 
 	  If unsure, say N.
 
+config CHSC_SCH
+	tristate "Support for CHSC subchannels"
+	help
+	  This driver allows usage of CHSC subchannels. A CHSC subchannel
+	  is usually present on LPAR only.
+	  The driver creates a device /dev/chsc, which may be used to
+	  obtain I/O configuration information about the machine and
+	  to issue asynchronous chsc commands (DANGEROUS).
+	  You will usually only want to use this interface on a special
+	  LPAR designated for system management.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called chsc_sch.
+
+	  If unsure, say N.
+
 comment "Misc"
 
 config IPL
diff --git a/arch/s390/appldata/appldata.h b/arch/s390/appldata/appldata.h
index db3ae8505103..17a2636fec0a 100644
--- a/arch/s390/appldata/appldata.h
+++ b/arch/s390/appldata/appldata.h
@@ -3,13 +3,11 @@
  *
  * Definitions and interface for Linux - z/VM Monitor Stream.
  *
- * Copyright (C) 2003,2006 IBM Corporation, IBM Deutschland Entwicklung GmbH.
+ * Copyright IBM Corp. 2003, 2008
  *
  * Author: Gerald Schaefer <gerald.schaefer@de.ibm.com>
  */
 
-//#define APPLDATA_DEBUG			/* Debug messages on/off */
-
 #define APPLDATA_MAX_REC_SIZE	  4024	/* Maximum size of the */
 					/* data buffer */
 #define APPLDATA_MAX_PROCS 100
@@ -32,12 +30,6 @@
 #define P_ERROR(x...)	printk(KERN_ERR MY_PRINT_NAME " error: " x)
 #define P_WARNING(x...)	printk(KERN_WARNING MY_PRINT_NAME " status: " x)
 
-#ifdef APPLDATA_DEBUG
-#define P_DEBUG(x...)   printk(KERN_DEBUG MY_PRINT_NAME " debug: " x)
-#else
-#define P_DEBUG(x...)   do {} while (0)
-#endif
-
 struct appldata_ops {
 	struct list_head list;
 	struct ctl_table_header *sysctl_header;
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index 655d52543e2d..a7f8979fb925 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -5,7 +5,7 @@
  * Exports appldata_register_ops() and appldata_unregister_ops() for the
  * data gathering modules.
  *
- * Copyright (C) 2003,2006 IBM Corporation, IBM Deutschland Entwicklung GmbH.
+ * Copyright IBM Corp. 2003, 2008
  *
  * Author: Gerald Schaefer <gerald.schaefer@de.ibm.com>
  */
@@ -108,9 +108,6 @@ static LIST_HEAD(appldata_ops_list);
  */
 static void appldata_timer_function(unsigned long data)
 {
-	P_DEBUG("   -= Timer =-\n");
-	P_DEBUG("CPU: %i, expire_count: %i\n", smp_processor_id(),
-		atomic_read(&appldata_expire_count));
 	if (atomic_dec_and_test(&appldata_expire_count)) {
 		atomic_set(&appldata_expire_count, num_online_cpus());
 		queue_work(appldata_wq, (struct work_struct *) data);
@@ -128,18 +125,17 @@ static void appldata_work_fn(struct work_struct *work)
 	struct appldata_ops *ops;
 	int i;
 
-	P_DEBUG("  -= Work Queue =-\n");
 	i = 0;
+	get_online_cpus();
 	spin_lock(&appldata_ops_lock);
 	list_for_each(lh, &appldata_ops_list) {
 		ops = list_entry(lh, struct appldata_ops, list);
-		P_DEBUG("list_for_each loop: %i) active = %u, name = %s\n",
-			++i, ops->active, ops->name);
 		if (ops->active == 1) {
 			ops->callback(ops->data);
 		}
 	}
 	spin_unlock(&appldata_ops_lock);
+	put_online_cpus();
 }
 
 /*
@@ -207,10 +203,9 @@ __appldata_vtimer_setup(int cmd)
 			per_cpu(appldata_timer, i).expires = per_cpu_interval;
 			smp_call_function_single(i, add_virt_timer_periodic,
 						 &per_cpu(appldata_timer, i),
-						 0, 1);
+						 1);
 		}
 		appldata_timer_active = 1;
-		P_INFO("Monitoring timer started.\n");
 		break;
 	case APPLDATA_DEL_TIMER:
 		for_each_online_cpu(i)
@@ -219,7 +214,6 @@ __appldata_vtimer_setup(int cmd)
 			break;
 		appldata_timer_active = 0;
 		atomic_set(&appldata_expire_count, num_online_cpus());
-		P_INFO("Monitoring timer stopped.\n");
 		break;
 	case APPLDATA_MOD_TIMER:
 		per_cpu_interval = (u64) (appldata_interval*1000 /
@@ -234,7 +228,7 @@ __appldata_vtimer_setup(int cmd)
 			args.timer = &per_cpu(appldata_timer, i);
 			args.expires = per_cpu_interval;
 			smp_call_function_single(i, __appldata_mod_vtimer_wrap,
-						 &args, 0, 1);
+						 &args, 1);
 		}
 	}
 }
@@ -266,12 +260,14 @@ appldata_timer_handler(ctl_table *ctl, int write, struct file *filp,
 	len = *lenp;
 	if (copy_from_user(buf, buffer, len > sizeof(buf) ? sizeof(buf) : len))
 		return -EFAULT;
+	get_online_cpus();
 	spin_lock(&appldata_timer_lock);
 	if (buf[0] == '1')
 		__appldata_vtimer_setup(APPLDATA_ADD_TIMER);
 	else if (buf[0] == '0')
 		__appldata_vtimer_setup(APPLDATA_DEL_TIMER);
 	spin_unlock(&appldata_timer_lock);
+	put_online_cpus();
 out:
 	*lenp = len;
 	*ppos += len;
@@ -309,18 +305,15 @@ appldata_interval_handler(ctl_table *ctl, int write, struct file *filp,
 	}
 	interval = 0;
 	sscanf(buf, "%i", &interval);
-	if (interval <= 0) {
-		P_ERROR("Timer CPU interval has to be > 0!\n");
+	if (interval <= 0)
 		return -EINVAL;
-	}
 
+	get_online_cpus();
 	spin_lock(&appldata_timer_lock);
 	appldata_interval = interval;
 	__appldata_vtimer_setup(APPLDATA_MOD_TIMER);
 	spin_unlock(&appldata_timer_lock);
-
-	P_INFO("Monitoring CPU interval set to %u milliseconds.\n",
-		 interval);
+	put_online_cpus();
 out:
 	*lenp = len;
 	*ppos += len;
@@ -400,23 +393,16 @@ appldata_generic_handler(ctl_table *ctl, int write, struct file *filp,
 			P_ERROR("START DIAG 0xDC for %s failed, "
 				"return code: %d\n", ops->name, rc);
 			module_put(ops->owner);
-		} else {
-			P_INFO("Monitoring %s data enabled, "
-				"DIAG 0xDC started.\n", ops->name);
+		} else
 			ops->active = 1;
-		}
 	} else if ((buf[0] == '0') && (ops->active == 1)) {
 		ops->active = 0;
 		rc = appldata_diag(ops->record_nr, APPLDATA_STOP_REC,
 				(unsigned long) ops->data, ops->size,
 				ops->mod_lvl);
-		if (rc != 0) {
+		if (rc != 0)
 			P_ERROR("STOP DIAG 0xDC for %s failed, "
 				"return code: %d\n", ops->name, rc);
-		} else {
-			P_INFO("Monitoring %s data disabled, "
-				"DIAG 0xDC stopped.\n", ops->name);
-		}
 		module_put(ops->owner);
 	}
 	spin_unlock(&appldata_ops_lock);
@@ -462,7 +448,6 @@ int appldata_register_ops(struct appldata_ops *ops)
 	ops->sysctl_header = register_sysctl_table(ops->ctl_table);
 	if (!ops->sysctl_header)
 		goto out;
-	P_INFO("%s-ops registered!\n", ops->name);
 	return 0;
 out:
 	spin_lock(&appldata_ops_lock);
@@ -484,7 +469,6 @@ void appldata_unregister_ops(struct appldata_ops *ops)
 	spin_unlock(&appldata_ops_lock);
 	unregister_sysctl_table(ops->sysctl_header);
 	kfree(ops->ctl_table);
-	P_INFO("%s-ops unregistered!\n", ops->name);
 }
 /********************** module-ops management <END> **************************/
 
@@ -547,24 +531,19 @@ static int __init appldata_init(void)
 {
 	int i;
 
-	P_DEBUG("sizeof(parameter_list) = %lu\n",
-		sizeof(struct appldata_parameter_list));
-
 	appldata_wq = create_singlethread_workqueue("appldata");
-	if (!appldata_wq) {
-		P_ERROR("Could not create work queue\n");
+	if (!appldata_wq)
 		return -ENOMEM;
-	}
 
+	get_online_cpus();
 	for_each_online_cpu(i)
 		appldata_online_cpu(i);
+	put_online_cpus();
 
 	/* Register cpu hotplug notifier */
 	register_hotcpu_notifier(&appldata_nb);
 
 	appldata_sysctl_header = register_sysctl_table(appldata_dir_table);
-
-	P_DEBUG("Base interface initialized.\n");
 	return 0;
 }
 
@@ -576,7 +555,9 @@ EXPORT_SYMBOL_GPL(appldata_register_ops);
 EXPORT_SYMBOL_GPL(appldata_unregister_ops);
 EXPORT_SYMBOL_GPL(appldata_diag);
 
+#ifdef CONFIG_SWAP
 EXPORT_SYMBOL_GPL(si_swapinfo);
+#endif
 EXPORT_SYMBOL_GPL(nr_threads);
 EXPORT_SYMBOL_GPL(nr_running);
 EXPORT_SYMBOL_GPL(nr_iowait);
diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c
index 51181ccdb87b..3ed56b7d1b2f 100644
--- a/arch/s390/appldata/appldata_mem.c
+++ b/arch/s390/appldata/appldata_mem.c
@@ -14,14 +14,13 @@
 #include <linux/slab.h>
 #include <linux/errno.h>
 #include <linux/kernel_stat.h>
-#include <asm/io.h>
 #include <linux/pagemap.h>
 #include <linux/swap.h>
+#include <asm/io.h>
 
 #include "appldata.h"
 
 
-#define MY_PRINT_NAME "appldata_mem"		/* for debug messages, etc. */
 #define P2K(x) ((x) << (PAGE_SHIFT - 10))	/* Converts #Pages to KB */
 
 /*
@@ -70,30 +69,6 @@ static struct appldata_mem_data {
 } __attribute__((packed)) appldata_mem_data;
 
 
-static inline void appldata_debug_print(struct appldata_mem_data *mem_data)
-{
-	P_DEBUG("--- MEM - RECORD ---\n");
-	P_DEBUG("pgpgin     = %8lu KB\n", mem_data->pgpgin);
-	P_DEBUG("pgpgout    = %8lu KB\n", mem_data->pgpgout);
-	P_DEBUG("pswpin     = %8lu Pages\n", mem_data->pswpin);
-	P_DEBUG("pswpout    = %8lu Pages\n", mem_data->pswpout);
-	P_DEBUG("pgalloc    = %8lu \n", mem_data->pgalloc);
-	P_DEBUG("pgfault    = %8lu \n", mem_data->pgfault);
-	P_DEBUG("pgmajfault = %8lu \n", mem_data->pgmajfault);
-	P_DEBUG("sharedram  = %8lu KB\n", mem_data->sharedram);
-	P_DEBUG("totalram   = %8lu KB\n", mem_data->totalram);
-	P_DEBUG("freeram    = %8lu KB\n", mem_data->freeram);
-	P_DEBUG("totalhigh  = %8lu KB\n", mem_data->totalhigh);
-	P_DEBUG("freehigh   = %8lu KB\n", mem_data->freehigh);
-	P_DEBUG("bufferram  = %8lu KB\n", mem_data->bufferram);
-	P_DEBUG("cached     = %8lu KB\n", mem_data->cached);
-	P_DEBUG("totalswap  = %8lu KB\n", mem_data->totalswap);
-	P_DEBUG("freeswap   = %8lu KB\n", mem_data->freeswap);
-	P_DEBUG("sync_count_1 = %u\n", mem_data->sync_count_1);
-	P_DEBUG("sync_count_2 = %u\n", mem_data->sync_count_2);
-	P_DEBUG("timestamp    = %lX\n", mem_data->timestamp);
-}
-
 /*
  * appldata_get_mem_data()
  *
@@ -140,9 +115,6 @@ static void appldata_get_mem_data(void *data)
 
 	mem_data->timestamp = get_clock();
 	mem_data->sync_count_2++;
-#ifdef APPLDATA_DEBUG
-	appldata_debug_print(mem_data);
-#endif
 }
 
 
@@ -164,17 +136,7 @@ static struct appldata_ops ops = {
  */
 static int __init appldata_mem_init(void)
 {
-	int rc;
-
-	P_DEBUG("sizeof(mem) = %lu\n", sizeof(struct appldata_mem_data));
-
-	rc = appldata_register_ops(&ops);
-	if (rc != 0) {
-		P_ERROR("Error registering ops, rc = %i\n", rc);
-	} else {
-		P_DEBUG("%s-ops registered!\n", ops.name);
-	}
-	return rc;
+	return appldata_register_ops(&ops);
 }
 
 /*
@@ -185,7 +147,6 @@ static int __init appldata_mem_init(void)
 static void __exit appldata_mem_exit(void)
 {
 	appldata_unregister_ops(&ops);
-	P_DEBUG("%s-ops unregistered!\n", ops.name);
 }
 
 
diff --git a/arch/s390/appldata/appldata_net_sum.c b/arch/s390/appldata/appldata_net_sum.c
index 4d8344336001..3b746556e1a3 100644
--- a/arch/s390/appldata/appldata_net_sum.c
+++ b/arch/s390/appldata/appldata_net_sum.c
@@ -21,9 +21,6 @@
 #include "appldata.h"
 
 
-#define MY_PRINT_NAME	"appldata_net_sum"	/* for debug messages, etc. */
-
-
 /*
  * Network data
  *
@@ -60,26 +57,6 @@ static struct appldata_net_sum_data {
 } __attribute__((packed)) appldata_net_sum_data;
 
 
-static inline void appldata_print_debug(struct appldata_net_sum_data *net_data)
-{
-	P_DEBUG("--- NET - RECORD ---\n");
-
-	P_DEBUG("nr_interfaces = %u\n", net_data->nr_interfaces);
-	P_DEBUG("rx_packets    = %8lu\n", net_data->rx_packets);
-	P_DEBUG("tx_packets    = %8lu\n", net_data->tx_packets);
-	P_DEBUG("rx_bytes      = %8lu\n", net_data->rx_bytes);
-	P_DEBUG("tx_bytes      = %8lu\n", net_data->tx_bytes);
-	P_DEBUG("rx_errors     = %8lu\n", net_data->rx_errors);
-	P_DEBUG("tx_errors     = %8lu\n", net_data->tx_errors);
-	P_DEBUG("rx_dropped    = %8lu\n", net_data->rx_dropped);
-	P_DEBUG("tx_dropped    = %8lu\n", net_data->tx_dropped);
-	P_DEBUG("collisions    = %8lu\n", net_data->collisions);
-
-	P_DEBUG("sync_count_1 = %u\n", net_data->sync_count_1);
-	P_DEBUG("sync_count_2 = %u\n", net_data->sync_count_2);
-	P_DEBUG("timestamp    = %lX\n", net_data->timestamp);
-}
-
 /*
  * appldata_get_net_sum_data()
  *
@@ -135,9 +112,6 @@ static void appldata_get_net_sum_data(void *data)
 
 	net_data->timestamp = get_clock();
 	net_data->sync_count_2++;
-#ifdef APPLDATA_DEBUG
-	appldata_print_debug(net_data);
-#endif
 }
 
 
@@ -159,17 +133,7 @@ static struct appldata_ops ops = {
  */
 static int __init appldata_net_init(void)
 {
-	int rc;
-
-	P_DEBUG("sizeof(net) = %lu\n", sizeof(struct appldata_net_sum_data));
-
-	rc = appldata_register_ops(&ops);
-	if (rc != 0) {
-		P_ERROR("Error registering ops, rc = %i\n", rc);
-	} else {
-		P_DEBUG("%s-ops registered!\n", ops.name);
-	}
-	return rc;
+	return appldata_register_ops(&ops);
 }
 
 /*
@@ -180,7 +144,6 @@ static int __init appldata_net_init(void)
 static void __exit appldata_net_exit(void)
 {
 	appldata_unregister_ops(&ops);
-	P_DEBUG("%s-ops unregistered!\n", ops.name);
 }
 
 
diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c
index 6b3eafe10453..eb44f9f8ab91 100644
--- a/arch/s390/appldata/appldata_os.c
+++ b/arch/s390/appldata/appldata_os.c
@@ -89,44 +89,6 @@ static struct appldata_ops ops = {
 };
 
 
-static inline void appldata_print_debug(struct appldata_os_data *os_data)
-{
-	int a0, a1, a2, i;
-
-	P_DEBUG("--- OS - RECORD ---\n");
-	P_DEBUG("nr_threads   = %u\n", os_data->nr_threads);
-	P_DEBUG("nr_running   = %u\n", os_data->nr_running);
-	P_DEBUG("nr_iowait    = %u\n", os_data->nr_iowait);
-	P_DEBUG("avenrun(int) = %8x / %8x / %8x\n", os_data->avenrun[0],
-		os_data->avenrun[1], os_data->avenrun[2]);
-	a0 = os_data->avenrun[0];
-	a1 = os_data->avenrun[1];
-	a2 = os_data->avenrun[2];
-	P_DEBUG("avenrun(float) = %d.%02d / %d.%02d / %d.%02d\n",
-		LOAD_INT(a0), LOAD_FRAC(a0), LOAD_INT(a1), LOAD_FRAC(a1),
-		LOAD_INT(a2), LOAD_FRAC(a2));
-
-	P_DEBUG("nr_cpus = %u\n", os_data->nr_cpus);
-	for (i = 0; i < os_data->nr_cpus; i++) {
-		P_DEBUG("cpu%u : user = %u, nice = %u, system = %u, "
-			"idle = %u, irq = %u, softirq = %u, iowait = %u, "
-			"steal = %u\n",
-				os_data->os_cpu[i].cpu_id,
-				os_data->os_cpu[i].per_cpu_user,
-				os_data->os_cpu[i].per_cpu_nice,
-				os_data->os_cpu[i].per_cpu_system,
-				os_data->os_cpu[i].per_cpu_idle,
-				os_data->os_cpu[i].per_cpu_irq,
-				os_data->os_cpu[i].per_cpu_softirq,
-				os_data->os_cpu[i].per_cpu_iowait,
-				os_data->os_cpu[i].per_cpu_steal);
-	}
-
-	P_DEBUG("sync_count_1 = %u\n", os_data->sync_count_1);
-	P_DEBUG("sync_count_2 = %u\n", os_data->sync_count_2);
-	P_DEBUG("timestamp    = %lX\n", os_data->timestamp);
-}
-
 /*
  * appldata_get_os_data()
  *
@@ -180,13 +142,10 @@ static void appldata_get_os_data(void *data)
 					   APPLDATA_START_INTERVAL_REC,
 					   (unsigned long) ops.data, new_size,
 					   ops.mod_lvl);
-			if (rc != 0) {
+			if (rc != 0)
 				P_ERROR("os: START NEW DIAG 0xDC failed, "
 					"return code: %d, new size = %i\n", rc,
 					new_size);
-				P_INFO("os: stopping old record now\n");
-			} else
-				P_INFO("os: new record size = %i\n", new_size);
 
 			rc = appldata_diag(APPLDATA_RECORD_OS_ID,
 					   APPLDATA_STOP_REC,
@@ -204,9 +163,6 @@ static void appldata_get_os_data(void *data)
 	}
 	os_data->timestamp = get_clock();
 	os_data->sync_count_2++;
-#ifdef APPLDATA_DEBUG
-	appldata_print_debug(os_data);
-#endif
 }
 
 
@@ -227,12 +183,9 @@ static int __init appldata_os_init(void)
 		rc = -ENOMEM;
 		goto out;
 	}
-	P_DEBUG("max. sizeof(os) = %i, sizeof(os_cpu) = %lu\n", max_size,
-		sizeof(struct appldata_os_per_cpu));
 
 	appldata_os_data = kzalloc(max_size, GFP_DMA);
 	if (appldata_os_data == NULL) {
-		P_ERROR("No memory for %s!\n", ops.name);
 		rc = -ENOMEM;
 		goto out;
 	}
@@ -240,17 +193,12 @@ static int __init appldata_os_init(void)
 	appldata_os_data->per_cpu_size = sizeof(struct appldata_os_per_cpu);
 	appldata_os_data->cpu_offset   = offsetof(struct appldata_os_data,
 							os_cpu);
-	P_DEBUG("cpu offset = %u\n", appldata_os_data->cpu_offset);
 
 	ops.data = appldata_os_data;
 	ops.callback  = &appldata_get_os_data;
 	rc = appldata_register_ops(&ops);
-	if (rc != 0) {
-		P_ERROR("Error registering ops, rc = %i\n", rc);
+	if (rc != 0)
 		kfree(appldata_os_data);
-	} else {
-		P_DEBUG("%s-ops registered!\n", ops.name);
-	}
 out:
 	return rc;
 }
@@ -264,7 +212,6 @@ static void __exit appldata_os_exit(void)
 {
 	appldata_unregister_ops(&ops);
 	kfree(appldata_os_data);
-	P_DEBUG("%s-ops unregistered!\n", ops.name);
 }
 
 
diff --git a/arch/s390/crypto/crypt_s390.h b/arch/s390/crypto/crypt_s390.h
index 9992f95ef992..0ef9829f2ad6 100644
--- a/arch/s390/crypto/crypt_s390.h
+++ b/arch/s390/crypto/crypt_s390.h
@@ -296,6 +296,10 @@ static inline int crypt_s390_func_available(int func)
 	unsigned char status[16];
 	int ret;
 
+	/* check if CPACF facility (bit 17) is available */
+	if (!(stfl() & 1ULL << (31 - 17)))
+		return 0;
+
 	switch (func & CRYPT_S390_OP_MASK) {
 	case CRYPT_S390_KM:
 		ret = crypt_s390_km(KM_QUERY, &status, NULL, NULL, 0);
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index 0cfefddd8375..eca724d229ec 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -6,6 +6,7 @@
 #include <linux/fs.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/smp_lock.h>
 #include <linux/miscdevice.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
@@ -48,6 +49,7 @@ static unsigned char parm_block[32] = {
 
 static int prng_open(struct inode *inode, struct file *file)
 {
+	cycle_kernel_lock();
 	return nonseekable_open(inode, file);
 }
 
@@ -185,11 +187,8 @@ static int __init prng_init(void)
 	prng_seed(16);
 
 	ret = misc_register(&prng_dev);
-	if (ret) {
-		printk(KERN_WARNING
-		       "Could not register misc device for PRNG.\n");
+	if (ret)
 		goto out_buf;
-	}
 	return 0;
 
 out_buf:
diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index aa341d0ea1e6..c5cdb975d590 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.25
-# Wed Apr 30 11:07:45 2008
+# Linux kernel version: 2.6.26-rc4
+# Fri May 30 09:49:33 2008
 #
 CONFIG_SCHED_MC=y
 CONFIG_MMU=y
@@ -103,6 +103,7 @@ CONFIG_RT_MUTEXES=y
 # CONFIG_TINY_SHMEM is not set
 CONFIG_BASE_SMALL=0
 CONFIG_MODULES=y
+# CONFIG_MODULE_FORCE_LOAD is not set
 CONFIG_MODULE_UNLOAD=y
 # CONFIG_MODULE_FORCE_UNLOAD is not set
 CONFIG_MODVERSIONS=y
@@ -173,6 +174,7 @@ CONFIG_PREEMPT=y
 # CONFIG_PREEMPT_RCU is not set
 CONFIG_ARCH_SPARSEMEM_ENABLE=y
 CONFIG_ARCH_SPARSEMEM_DEFAULT=y
+CONFIG_ARCH_SELECT_MEMORY_MODEL=y
 CONFIG_SELECT_MEMORY_MODEL=y
 # CONFIG_FLATMEM_MANUAL is not set
 # CONFIG_DISCONTIGMEM_MANUAL is not set
@@ -210,6 +212,7 @@ CONFIG_FORCE_MAX_ZONEORDER=9
 CONFIG_PFAULT=y
 # CONFIG_SHARED_KERNEL is not set
 # CONFIG_CMM is not set
+# CONFIG_PAGE_STATES is not set
 CONFIG_VIRT_TIMER=y
 CONFIG_VIRT_CPU_ACCOUNTING=y
 # CONFIG_APPLDATA_BASE is not set
@@ -620,6 +623,7 @@ CONFIG_S390_VMUR=m
 #
 # CONFIG_MEMSTICK is not set
 # CONFIG_NEW_LEDS is not set
+CONFIG_ACCESSIBILITY=y
 
 #
 # File systems
@@ -754,11 +758,12 @@ CONFIG_FRAME_WARN=2048
 CONFIG_MAGIC_SYSRQ=y
 # CONFIG_UNUSED_SYMBOLS is not set
 CONFIG_DEBUG_FS=y
-CONFIG_HEADERS_CHECK=y
+# CONFIG_HEADERS_CHECK is not set
 CONFIG_DEBUG_KERNEL=y
 # CONFIG_SCHED_DEBUG is not set
 # CONFIG_SCHEDSTATS is not set
 # CONFIG_TIMER_STATS is not set
+# CONFIG_DEBUG_OBJECTS is not set
 # CONFIG_DEBUG_SLAB is not set
 CONFIG_DEBUG_PREEMPT=y
 # CONFIG_DEBUG_RT_MUTEXES is not set
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 4b010ff814c9..7383781f3e6a 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -150,33 +150,24 @@ static ssize_t hypfs_aio_read(struct kiocb *iocb, const struct iovec *iov,
 			      unsigned long nr_segs, loff_t offset)
 {
 	char *data;
-	size_t len;
+	ssize_t ret;
 	struct file *filp = iocb->ki_filp;
 	/* XXX: temporary */
 	char __user *buf = iov[0].iov_base;
 	size_t count = iov[0].iov_len;
 
-	if (nr_segs != 1) {
-		count = -EINVAL;
-		goto out;
-	}
+	if (nr_segs != 1)
+		return -EINVAL;
 
 	data = filp->private_data;
-	len = strlen(data);
-	if (offset > len) {
-		count = 0;
-		goto out;
-	}
-	if (count > len - offset)
-		count = len - offset;
-	if (copy_to_user(buf, data + offset, count)) {
-		count = -EFAULT;
-		goto out;
-	}
-	iocb->ki_pos += count;
+	ret = simple_read_from_buffer(buf, count, &offset, data, strlen(data));
+	if (ret <= 0)
+		return ret;
+
+	iocb->ki_pos += ret;
 	file_accessed(filp);
-out:
-	return count;
+
+	return ret;
 }
 static ssize_t hypfs_aio_write(struct kiocb *iocb, const struct iovec *iov,
 			      unsigned long nr_segs, loff_t offset)
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 6302f5082588..50f657e77344 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -7,9 +7,14 @@
 #
 CFLAGS_smp.o	:= -Wno-nonnull
 
+#
+# Pass UTS_MACHINE for user_regset definition
+#
+CFLAGS_ptrace.o		+= -DUTS_MACHINE='"$(UTS_MACHINE)"'
+
 obj-y	:=  bitmap.o traps.o time.o process.o base.o early.o \
             setup.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o \
-	    s390_ext.o debug.o irq.o ipl.o dis.o diag.o
+	    s390_ext.o debug.o irq.o ipl.o dis.o diag.o mem_detect.o
 
 obj-y	+= $(if $(CONFIG_64BIT),entry64.o,entry.o)
 obj-y	+= $(if $(CONFIG_64BIT),reipl64.o,reipl.o)
@@ -23,7 +28,7 @@ obj-$(CONFIG_AUDIT)		+= audit.o
 compat-obj-$(CONFIG_AUDIT)	+= compat_audit.o
 obj-$(CONFIG_COMPAT)		+= compat_linux.o compat_signal.o \
 					compat_wrapper.o compat_exec_domain.o \
-					binfmt_elf32.o $(compat-obj-y)
+					$(compat-obj-y)
 
 obj-$(CONFIG_VIRT_TIMER)	+= vtime.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
diff --git a/arch/s390/kernel/binfmt_elf32.c b/arch/s390/kernel/binfmt_elf32.c
deleted file mode 100644
index 3e1c315b736d..000000000000
--- a/arch/s390/kernel/binfmt_elf32.c
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
- * Support for 32-bit Linux for S390 ELF binaries.
- *
- * Copyright (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation
- * Author(s): Gerhard Tonn (ton@de.ibm.com)
- *
- * Heavily inspired by the 32-bit Sparc compat code which is
- * Copyright (C) 1995, 1996, 1997, 1998 David S. Miller (davem@redhat.com)
- * Copyright (C) 1995, 1996, 1997, 1998 Jakub Jelinek   (jj@ultra.linux.cz)
- */
-
-#define __ASMS390_ELF_H
-
-#include <linux/time.h>
-
-/*
- * These are used to set parameters in the core dumps.
- */
-#define ELF_CLASS	ELFCLASS32
-#define ELF_DATA	ELFDATA2MSB
-#define ELF_ARCH	EM_S390
-
-/*
- * This is used to ensure we don't load something for the wrong architecture.
- */
-#define elf_check_arch(x) \
-	(((x)->e_machine == EM_S390 || (x)->e_machine == EM_S390_OLD) \
-         && (x)->e_ident[EI_CLASS] == ELF_CLASS)
-
-/* ELF register definitions */
-#define NUM_GPRS      16
-#define NUM_FPRS      16
-#define NUM_ACRS      16    
-
-/* For SVR4/S390 the function pointer to be registered with `atexit` is
-   passed in R14. */
-#define ELF_PLAT_INIT(_r, load_addr) \
-	do { \
-		_r->gprs[14] = 0; \
-	} while(0)
-
-#define USE_ELF_CORE_DUMP
-#define ELF_EXEC_PAGESIZE       4096
-
-/* This is the location that an ET_DYN program is loaded if exec'ed.  Typical
-   use of this is to invoke "./ld.so someprog" to test out a new version of
-   the loader.  We need to make sure that it is out of the way of the program
-   that it will "exec", and that there is sufficient room for the brk.  */
-
-#define ELF_ET_DYN_BASE         (TASK_SIZE / 3 * 2)
-
-/* Wow, the "main" arch needs arch dependent functions too.. :) */
-
-/* regs is struct pt_regs, pr_reg is elf_gregset_t (which is
-   now struct_user_regs, they are different) */
-
-#define ELF_CORE_COPY_REGS(pr_reg, regs) dump_regs32(regs, &pr_reg);
-
-#define ELF_CORE_COPY_TASK_REGS(tsk, regs) dump_task_regs32(tsk, regs)
-
-#define ELF_CORE_COPY_FPREGS(tsk, fpregs) dump_task_fpu(tsk, fpregs)
-
-/* This yields a mask that user programs can use to figure out what
-   instruction set this CPU supports. */
-
-#define ELF_HWCAP (0)
-
-/* This yields a string that ld.so will use to load implementation
-   specific libraries for optimization.  This is more specific in
-   intent than poking at uname or /proc/cpuinfo.
-
-   For the moment, we have only optimizations for the Intel generations,
-   but that could change... */
-
-#define ELF_PLATFORM (NULL)
-
-#define SET_PERSONALITY(ex, ibcs2)			\
-do {							\
-	if (ibcs2)                                      \
-		set_personality(PER_SVR4);              \
-	else if (current->personality != PER_LINUX32)   \
-		set_personality(PER_LINUX);             \
-	set_thread_flag(TIF_31BIT);			\
-} while (0)
-
-#include "compat_linux.h"
-
-typedef _s390_fp_regs32 elf_fpregset_t;
-
-typedef struct
-{
-	
-	_psw_t32	psw;
-	__u32		gprs[__NUM_GPRS]; 
-	__u32		acrs[__NUM_ACRS]; 
-	__u32		orig_gpr2;
-} s390_regs32;
-typedef s390_regs32 elf_gregset_t;
-
-static inline int dump_regs32(struct pt_regs *ptregs, elf_gregset_t *regs)
-{
-	int i;
-
-	memcpy(&regs->psw.mask, &ptregs->psw.mask, 4);
-	memcpy(&regs->psw.addr, (char *)&ptregs->psw.addr + 4, 4);
-	for (i = 0; i < NUM_GPRS; i++)
-		regs->gprs[i] = ptregs->gprs[i];
-	save_access_regs(regs->acrs);
-	regs->orig_gpr2 = ptregs->orig_gpr2;
-	return 1;
-}
-
-static inline int dump_task_regs32(struct task_struct *tsk, elf_gregset_t *regs)
-{
-	struct pt_regs *ptregs = task_pt_regs(tsk);
-	int i;
-
-	memcpy(&regs->psw.mask, &ptregs->psw.mask, 4);
-	memcpy(&regs->psw.addr, (char *)&ptregs->psw.addr + 4, 4);
-	for (i = 0; i < NUM_GPRS; i++)
-		regs->gprs[i] = ptregs->gprs[i];
-	memcpy(regs->acrs, tsk->thread.acrs, sizeof(regs->acrs));
-	regs->orig_gpr2 = ptregs->orig_gpr2;
-	return 1;
-}
-
-static inline int dump_task_fpu(struct task_struct *tsk, elf_fpregset_t *fpregs)
-{
-	if (tsk == current)
-		save_fp_regs((s390_fp_regs *) fpregs);
-	else
-		memcpy(fpregs, &tsk->thread.fp_regs, sizeof(elf_fpregset_t));
-	return 1;
-}
-
-#include <asm/processor.h>
-#include <asm/pgalloc.h>
-#include <linux/module.h>
-#include <linux/elfcore.h>
-#include <linux/binfmts.h>
-#include <linux/compat.h>
-
-#define elf_prstatus elf_prstatus32
-struct elf_prstatus32
-{
-	struct elf_siginfo pr_info;	/* Info associated with signal */
-	short	pr_cursig;		/* Current signal */
-	u32	pr_sigpend;	/* Set of pending signals */
-	u32	pr_sighold;	/* Set of held signals */
-	pid_t	pr_pid;
-	pid_t	pr_ppid;
-	pid_t	pr_pgrp;
-	pid_t	pr_sid;
-	struct compat_timeval pr_utime;	/* User time */
-	struct compat_timeval pr_stime;	/* System time */
-	struct compat_timeval pr_cutime;	/* Cumulative user time */
-	struct compat_timeval pr_cstime;	/* Cumulative system time */
-	elf_gregset_t pr_reg;	/* GP registers */
-	int pr_fpvalid;		/* True if math co-processor being used.  */
-};
-
-#define elf_prpsinfo elf_prpsinfo32
-struct elf_prpsinfo32
-{
-	char	pr_state;	/* numeric process state */
-	char	pr_sname;	/* char for pr_state */
-	char	pr_zomb;	/* zombie */
-	char	pr_nice;	/* nice val */
-	u32	pr_flag;	/* flags */
-	u16	pr_uid;
-	u16	pr_gid;
-	pid_t	pr_pid, pr_ppid, pr_pgrp, pr_sid;
-	/* Lots missing */
-	char	pr_fname[16];	/* filename of executable */
-	char	pr_psargs[ELF_PRARGSZ];	/* initial part of arg list */
-};
-
-#include <linux/highuid.h>
-
-/*
-#define init_elf_binfmt init_elf32_binfmt
-*/
-
-#undef start_thread
-#define start_thread                    start_thread31 
-
-static inline void start_thread31(struct pt_regs *regs, unsigned long new_psw,
-				  unsigned long new_stackp)
-{
-	set_fs(USER_DS);
-	regs->psw.mask	= psw_user32_bits;
-	regs->psw.addr	= new_psw;
-	regs->gprs[15]	= new_stackp;
-	crst_table_downgrade(current->mm, 1UL << 31);
-}
-
-MODULE_DESCRIPTION("Binary format loader for compatibility with 32bit Linux for S390 binaries,"
-                   " Copyright 2000 IBM Corporation"); 
-MODULE_AUTHOR("Gerhard Tonn <ton@de.ibm.com>");
-
-#undef MODULE_DESCRIPTION
-#undef MODULE_AUTHOR
-
-#undef cputime_to_timeval
-#define cputime_to_timeval cputime_to_compat_timeval
-static inline void
-cputime_to_compat_timeval(const cputime_t cputime, struct compat_timeval *value)
-{
-	value->tv_usec = cputime % 1000000;
-	value->tv_sec = cputime / 1000000;
-}
-
-#include "../../../fs/binfmt_elf.c"
-
diff --git a/arch/s390/kernel/compat_ptrace.h b/arch/s390/kernel/compat_ptrace.h
index 419aef913ee1..cde81fa64f89 100644
--- a/arch/s390/kernel/compat_ptrace.h
+++ b/arch/s390/kernel/compat_ptrace.h
@@ -1,7 +1,7 @@
 #ifndef _PTRACE32_H
 #define _PTRACE32_H
 
-#include "compat_linux.h"  /* needed for _psw_t32 */
+#include "compat_linux.h"  /* needed for psw_compat_t */
 
 typedef struct {
 	__u32 cr[3];
@@ -38,7 +38,7 @@ typedef struct {
 
 struct user_regs_struct32
 {
-	_psw_t32 psw;
+	psw_compat_t psw;
 	u32 gprs[NUM_GPRS];
 	u32 acrs[NUM_ACRS];
 	u32 orig_gpr2;
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index c93d1296cc0a..d80fcd4a7fe1 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -1079,7 +1079,6 @@ __init debug_init(void)
 	s390dbf_sysctl_header = register_sysctl_table(s390dbf_dir_table);
 	mutex_lock(&debug_mutex);
 	debug_debugfs_root_entry = debugfs_create_dir(DEBUG_DIR_ROOT,NULL);
-	printk(KERN_INFO "debug: Initialization complete\n");
 	initialized = 1;
 	mutex_unlock(&debug_mutex);
 
@@ -1193,7 +1192,6 @@ debug_get_uint(char *buf)
 	for(; isspace(*buf); buf++);
 	rc = simple_strtoul(buf, &buf, 10);
 	if(*buf){
-		printk("debug: no integer specified!\n");
 		rc = -EINVAL;
 	}
 	return rc;
@@ -1340,19 +1338,12 @@ static void debug_flush(debug_info_t* id, int area)
                         	memset(id->areas[i][j], 0, PAGE_SIZE);
 			}
 		}
-                printk(KERN_INFO "debug: %s: all areas flushed\n",id->name);
         } else if(area >= 0 && area < id->nr_areas) {
                 id->active_entries[area] = 0;
 		id->active_pages[area] = 0;
 		for(i = 0; i < id->pages_per_area; i++) {
                 	memset(id->areas[area][i],0,PAGE_SIZE);
 		}
-                printk(KERN_INFO "debug: %s: area %i has been flushed\n",
-                        id->name, area);
-        } else {
-                printk(KERN_INFO
-                      "debug: %s: area %i cannot be flushed (range: %i - %i)\n",
-                        id->name, area, 0, id->nr_areas-1);
         }
         spin_unlock_irqrestore(&id->lock,flags);
 }
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index c14a336f6300..d2f270c995d9 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -208,7 +208,7 @@ static const unsigned char formats[][7] = {
 	[INSTR_RRF_F0FF]  = { 0xff, F_16,F_24,F_28,0,0,0 },    /* e.g. madbr */
 	[INSTR_RRF_FUFF]  = { 0xff, F_24,F_16,F_28,U4_20,0,0 },/* e.g. didbr */
 	[INSTR_RRF_RURR]  = { 0xff, R_24,R_28,R_16,U4_20,0,0 },/* e.g. .insn */
-	[INSTR_RRF_R0RR]  = { 0xff, R_24,R_28,R_16,0,0,0 },    /* e.g. idte  */
+	[INSTR_RRF_R0RR]  = { 0xff, R_24,R_16,R_28,0,0,0 },    /* e.g. idte  */
 	[INSTR_RRF_U0FF]  = { 0xff, F_24,U4_16,F_28,0,0,0 },   /* e.g. fixr  */
 	[INSTR_RRF_U0RF]  = { 0xff, R_24,U4_16,F_28,0,0,0 },   /* e.g. cfebr */
 	[INSTR_RRF_M0RR]  = { 0xff, R_24,R_28,M_16,0,0,0 },    /* e.g. sske  */
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index d0e09684b9ce..2a2ca268b1dd 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -14,6 +14,7 @@
 #include <linux/module.h>
 #include <linux/pfn.h>
 #include <linux/uaccess.h>
+#include <asm/ebcdic.h>
 #include <asm/ipl.h>
 #include <asm/lowcore.h>
 #include <asm/processor.h>
@@ -26,12 +27,40 @@
 /*
  * Create a Kernel NSS if the SAVESYS= parameter is defined
  */
-#define DEFSYS_CMD_SIZE		96
+#define DEFSYS_CMD_SIZE		128
 #define SAVESYS_CMD_SIZE	32
 
 char kernel_nss_name[NSS_NAME_SIZE + 1];
 
+static void __init setup_boot_command_line(void);
+
+
 #ifdef CONFIG_SHARED_KERNEL
+int __init savesys_ipl_nss(char *cmd, const int cmdlen);
+
+asm(
+	"	.section .init.text,\"ax\",@progbits\n"
+	"	.align	4\n"
+	"	.type	savesys_ipl_nss, @function\n"
+	"savesys_ipl_nss:\n"
+#ifdef CONFIG_64BIT
+	"	stmg	6,15,48(15)\n"
+	"	lgr	14,3\n"
+	"	sam31\n"
+	"	diag	2,14,0x8\n"
+	"	sam64\n"
+	"	lgr	2,14\n"
+	"	lmg	6,15,48(15)\n"
+#else
+	"	stm	6,15,24(15)\n"
+	"	lr	14,3\n"
+	"	diag	2,14,0x8\n"
+	"	lr	2,14\n"
+	"	lm	6,15,24(15)\n"
+#endif
+	"	br	14\n"
+	"	.size	savesys_ipl_nss, .-savesys_ipl_nss\n");
+
 static noinline __init void create_kernel_nss(void)
 {
 	unsigned int i, stext_pfn, eshared_pfn, end_pfn, min_size;
@@ -39,6 +68,7 @@ static noinline __init void create_kernel_nss(void)
 	unsigned int sinitrd_pfn, einitrd_pfn;
 #endif
 	int response;
+	size_t len;
 	char *savesys_ptr;
 	char upper_command_line[COMMAND_LINE_SIZE];
 	char defsys_cmd[DEFSYS_CMD_SIZE];
@@ -49,8 +79,8 @@ static noinline __init void create_kernel_nss(void)
 		return;
 
 	/* Convert COMMAND_LINE to upper case */
-	for (i = 0; i < strlen(COMMAND_LINE); i++)
-		upper_command_line[i] = toupper(COMMAND_LINE[i]);
+	for (i = 0; i < strlen(boot_command_line); i++)
+		upper_command_line[i] = toupper(boot_command_line[i]);
 
 	savesys_ptr = strstr(upper_command_line, "SAVESYS=");
 
@@ -83,7 +113,8 @@ static noinline __init void create_kernel_nss(void)
 	}
 #endif
 
-	sprintf(defsys_cmd, "%s EW MINSIZE=%.7iK", defsys_cmd, min_size);
+	sprintf(defsys_cmd, "%s EW MINSIZE=%.7iK PARMREGS=0-13",
+		defsys_cmd, min_size);
 	sprintf(savesys_cmd, "SAVESYS %s \n IPL %s",
 		kernel_nss_name, kernel_nss_name);
 
@@ -94,13 +125,24 @@ static noinline __init void create_kernel_nss(void)
 		return;
 	}
 
-	__cpcmd(savesys_cmd, NULL, 0, &response);
+	len = strlen(savesys_cmd);
+	ASCEBC(savesys_cmd, len);
+	response = savesys_ipl_nss(savesys_cmd, len);
 
-	if (response != strlen(savesys_cmd)) {
+	/* On success: response is equal to the command size,
+	 *	       max SAVESYS_CMD_SIZE
+	 * On error: response contains the numeric portion of cp error message.
+	 *	     for SAVESYS it will be >= 263
+	 */
+	if (response > SAVESYS_CMD_SIZE) {
 		kernel_nss_name[0] = '\0';
 		return;
 	}
 
+	/* re-setup boot command line with new ipl vm parms */
+	ipl_update_parameters();
+	setup_boot_command_line();
+
 	ipl_flags = IPL_NSS_VALID;
 }
 
@@ -141,109 +183,11 @@ static noinline __init void detect_machine_type(void)
 	if (cpuinfo->cpu_id.version == 0xff)
 		machine_flags |= MACHINE_FLAG_VM;
 
-	/* Running on a P/390 ? */
-	if (cpuinfo->cpu_id.machine == 0x7490)
-		machine_flags |= MACHINE_FLAG_P390;
-
 	/* Running under KVM ? */
 	if (cpuinfo->cpu_id.version == 0xfe)
 		machine_flags |= MACHINE_FLAG_KVM;
 }
 
-#ifdef CONFIG_64BIT
-static noinline __init int memory_fast_detect(void)
-{
-	unsigned long val0 = 0;
-	unsigned long val1 = 0xc;
-	int ret = -ENOSYS;
-
-	if (ipl_flags & IPL_NSS_VALID)
-		return -ENOSYS;
-
-	asm volatile(
-		"	diag	%1,%2,0x260\n"
-		"0:	lhi	%0,0\n"
-		"1:\n"
-		EX_TABLE(0b,1b)
-		: "+d" (ret), "+d" (val0), "+d" (val1) : : "cc");
-
-	if (ret || val0 != val1)
-		return -ENOSYS;
-
-	memory_chunk[0].size = val0 + 1;
-	return 0;
-}
-#else
-static inline int memory_fast_detect(void)
-{
-	return -ENOSYS;
-}
-#endif
-
-static inline __init unsigned long __tprot(unsigned long addr)
-{
-	int cc = -1;
-
-	asm volatile(
-		"	tprot	0(%1),0\n"
-		"0:	ipm	%0\n"
-		"	srl	%0,28\n"
-		"1:\n"
-		EX_TABLE(0b,1b)
-		: "+d" (cc) : "a" (addr) : "cc");
-	return (unsigned long)cc;
-}
-
-/* Checking memory in 128KB increments. */
-#define CHUNK_INCR	(1UL << 17)
-#define ADDR2G		(1UL << 31)
-
-static noinline __init void find_memory_chunks(unsigned long memsize)
-{
-	unsigned long addr = 0, old_addr = 0;
-	unsigned long old_cc = CHUNK_READ_WRITE;
-	unsigned long cc;
-	int chunk = 0;
-
-	while (chunk < MEMORY_CHUNKS) {
-		cc = __tprot(addr);
-		while (cc == old_cc) {
-			addr += CHUNK_INCR;
-			if (memsize && addr >= memsize)
-				break;
-#ifndef CONFIG_64BIT
-			if (addr == ADDR2G)
-				break;
-#endif
-			cc = __tprot(addr);
-		}
-
-		if (old_addr != addr &&
-		    (old_cc == CHUNK_READ_WRITE || old_cc == CHUNK_READ_ONLY)) {
-			memory_chunk[chunk].addr = old_addr;
-			memory_chunk[chunk].size = addr - old_addr;
-			memory_chunk[chunk].type = old_cc;
-			chunk++;
-		}
-
-		old_addr = addr;
-		old_cc = cc;
-
-#ifndef CONFIG_64BIT
-		if (addr == ADDR2G)
-			break;
-#endif
-		/*
-		 * Finish memory detection at the first hole
-		 * if storage size is unknown.
-		 */
-		if (cc == -1UL && !memsize)
-			break;
-		if (memsize && addr >= memsize)
-			break;
-	}
-}
-
 static __init void early_pgm_check_handler(void)
 {
 	unsigned long addr;
@@ -380,23 +324,61 @@ static __init void detect_machine_facilities(void)
 #endif
 }
 
+static __init void rescue_initrd(void)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+	/*
+	 * Move the initrd right behind the bss section in case it starts
+	 * within the bss section. So we don't overwrite it when the bss
+	 * section gets cleared.
+	 */
+	if (!INITRD_START || !INITRD_SIZE)
+		return;
+	if (INITRD_START >= (unsigned long) __bss_stop)
+		return;
+	memmove(__bss_stop, (void *) INITRD_START, INITRD_SIZE);
+	INITRD_START = (unsigned long) __bss_stop;
+#endif
+}
+
+/* Set up boot command line */
+static void __init setup_boot_command_line(void)
+{
+	char *parm = NULL;
+
+	/* copy arch command line */
+	strlcpy(boot_command_line, COMMAND_LINE, ARCH_COMMAND_LINE_SIZE);
+	boot_command_line[ARCH_COMMAND_LINE_SIZE - 1] = 0;
+
+	/* append IPL PARM data to the boot command line */
+	if (MACHINE_IS_VM) {
+		parm = boot_command_line + strlen(boot_command_line);
+		*parm++ = ' ';
+		get_ipl_vmparm(parm);
+		if (parm[0] == '=')
+			memmove(boot_command_line, parm + 1, strlen(parm));
+	}
+}
+
+
 /*
  * Save ipl parameters, clear bss memory, initialize storage keys
  * and create a kernel NSS at startup if the SAVESYS= parm is defined
  */
 void __init startup_init(void)
 {
-	unsigned long long memsize;
-
 	ipl_save_parameters();
+	rescue_initrd();
 	clear_bss_section();
 	init_kernel_storage_key();
 	lockdep_init();
 	lockdep_off();
-	detect_machine_type();
-	create_kernel_nss();
 	sort_main_extable();
 	setup_lowcore_early();
+	detect_machine_type();
+	ipl_update_parameters();
+	setup_boot_command_line();
+	create_kernel_nss();
 	detect_mvpg();
 	detect_ieee();
 	detect_csp();
@@ -404,18 +386,7 @@ void __init startup_init(void)
 	detect_diag44();
 	detect_machine_facilities();
 	setup_hpage();
-	sclp_read_info_early();
 	sclp_facilities_detect();
-	memsize = sclp_memory_detect();
-#ifndef CONFIG_64BIT
-	/*
-	 * Can't deal with more than 2G in 31 bit addressing mode, so
-	 * limit the value in order to avoid strange side effects.
-	 */
-	if (memsize > ADDR2G)
-		memsize = ADDR2G;
-#endif
-	if (memory_fast_detect() < 0)
-		find_memory_chunks((unsigned long) memsize);
+	detect_memory_layout(memory_chunk);
 	lockdep_on();
 }
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 532542447d66..54b2779b5e2f 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -14,6 +14,7 @@
 #include <linux/delay.h>
 #include <linux/reboot.h>
 #include <linux/ctype.h>
+#include <linux/fs.h>
 #include <asm/ipl.h>
 #include <asm/smp.h>
 #include <asm/setup.h>
@@ -22,6 +23,7 @@
 #include <asm/ebcdic.h>
 #include <asm/reset.h>
 #include <asm/sclp.h>
+#include <asm/setup.h>
 
 #define IPL_PARM_BLOCK_VERSION 0
 
@@ -121,6 +123,7 @@ enum ipl_method {
 	REIPL_METHOD_FCP_RO_VM,
 	REIPL_METHOD_FCP_DUMP,
 	REIPL_METHOD_NSS,
+	REIPL_METHOD_NSS_DIAG,
 	REIPL_METHOD_DEFAULT,
 };
 
@@ -134,14 +137,15 @@ enum dump_method {
 
 static int diag308_set_works = 0;
 
+static struct ipl_parameter_block ipl_block;
+
 static int reipl_capabilities = IPL_TYPE_UNKNOWN;
 
 static enum ipl_type reipl_type = IPL_TYPE_UNKNOWN;
 static enum ipl_method reipl_method = REIPL_METHOD_DEFAULT;
 static struct ipl_parameter_block *reipl_block_fcp;
 static struct ipl_parameter_block *reipl_block_ccw;
-
-static char reipl_nss_name[NSS_NAME_SIZE + 1];
+static struct ipl_parameter_block *reipl_block_nss;
 
 static int dump_capabilities = DUMP_TYPE_NONE;
 static enum dump_type dump_type = DUMP_TYPE_NONE;
@@ -263,6 +267,56 @@ static ssize_t ipl_type_show(struct kobject *kobj, struct kobj_attribute *attr,
 
 static struct kobj_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type);
 
+/* VM IPL PARM routines */
+static void reipl_get_ascii_vmparm(char *dest,
+				   const struct ipl_parameter_block *ipb)
+{
+	int i;
+	int len = 0;
+	char has_lowercase = 0;
+
+	if ((ipb->ipl_info.ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID) &&
+	    (ipb->ipl_info.ccw.vm_parm_len > 0)) {
+
+		len = ipb->ipl_info.ccw.vm_parm_len;
+		memcpy(dest, ipb->ipl_info.ccw.vm_parm, len);
+		/* If at least one character is lowercase, we assume mixed
+		 * case; otherwise we convert everything to lowercase.
+		 */
+		for (i = 0; i < len; i++)
+			if ((dest[i] > 0x80 && dest[i] < 0x8a) || /* a-i */
+			    (dest[i] > 0x90 && dest[i] < 0x9a) || /* j-r */
+			    (dest[i] > 0xa1 && dest[i] < 0xaa)) { /* s-z */
+				has_lowercase = 1;
+				break;
+			}
+		if (!has_lowercase)
+			EBC_TOLOWER(dest, len);
+		EBCASC(dest, len);
+	}
+	dest[len] = 0;
+}
+
+void get_ipl_vmparm(char *dest)
+{
+	if (diag308_set_works && (ipl_block.hdr.pbt == DIAG308_IPL_TYPE_CCW))
+		reipl_get_ascii_vmparm(dest, &ipl_block);
+	else
+		dest[0] = 0;
+}
+
+static ssize_t ipl_vm_parm_show(struct kobject *kobj,
+				struct kobj_attribute *attr, char *page)
+{
+	char parm[DIAG308_VMPARM_SIZE + 1] = {};
+
+	get_ipl_vmparm(parm);
+	return sprintf(page, "%s\n", parm);
+}
+
+static struct kobj_attribute sys_ipl_vm_parm_attr =
+	__ATTR(parm, S_IRUGO, ipl_vm_parm_show, NULL);
+
 static ssize_t sys_ipl_device_show(struct kobject *kobj,
 				   struct kobj_attribute *attr, char *page)
 {
@@ -285,14 +339,8 @@ static struct kobj_attribute sys_ipl_device_attr =
 static ssize_t ipl_parameter_read(struct kobject *kobj, struct bin_attribute *attr,
 				  char *buf, loff_t off, size_t count)
 {
-	unsigned int size = IPL_PARMBLOCK_SIZE;
-
-	if (off > size)
-		return 0;
-	if (off + count > size)
-		count = size - off;
-	memcpy(buf, (void *)IPL_PARMBLOCK_START + off, count);
-	return count;
+	return memory_read_from_buffer(buf, count, &off, IPL_PARMBLOCK_START,
+					IPL_PARMBLOCK_SIZE);
 }
 
 static struct bin_attribute ipl_parameter_attr = {
@@ -310,12 +358,7 @@ static ssize_t ipl_scp_data_read(struct kobject *kobj, struct bin_attribute *att
 	unsigned int size = IPL_PARMBLOCK_START->ipl_info.fcp.scp_data_len;
 	void *scp_data = &IPL_PARMBLOCK_START->ipl_info.fcp.scp_data;
 
-	if (off > size)
-		return 0;
-	if (off + count > size)
-		count = size - off;
-	memcpy(buf, scp_data + off, count);
-	return count;
+	return memory_read_from_buffer(buf, count, &off, scp_data, size);
 }
 
 static struct bin_attribute ipl_scp_data_attr = {
@@ -370,15 +413,27 @@ static ssize_t ipl_ccw_loadparm_show(struct kobject *kobj,
 static struct kobj_attribute sys_ipl_ccw_loadparm_attr =
 	__ATTR(loadparm, 0444, ipl_ccw_loadparm_show, NULL);
 
-static struct attribute *ipl_ccw_attrs[] = {
+static struct attribute *ipl_ccw_attrs_vm[] = {
 	&sys_ipl_type_attr.attr,
 	&sys_ipl_device_attr.attr,
 	&sys_ipl_ccw_loadparm_attr.attr,
+	&sys_ipl_vm_parm_attr.attr,
 	NULL,
 };
 
-static struct attribute_group ipl_ccw_attr_group = {
-	.attrs = ipl_ccw_attrs,
+static struct attribute *ipl_ccw_attrs_lpar[] = {
+	&sys_ipl_type_attr.attr,
+	&sys_ipl_device_attr.attr,
+	&sys_ipl_ccw_loadparm_attr.attr,
+	NULL,
+};
+
+static struct attribute_group ipl_ccw_attr_group_vm = {
+	.attrs = ipl_ccw_attrs_vm,
+};
+
+static struct attribute_group ipl_ccw_attr_group_lpar = {
+	.attrs = ipl_ccw_attrs_lpar
 };
 
 /* NSS ipl device attributes */
@@ -388,6 +443,8 @@ DEFINE_IPL_ATTR_RO(ipl_nss, name, "%s\n", kernel_nss_name);
 static struct attribute *ipl_nss_attrs[] = {
 	&sys_ipl_type_attr.attr,
 	&sys_ipl_nss_name_attr.attr,
+	&sys_ipl_ccw_loadparm_attr.attr,
+	&sys_ipl_vm_parm_attr.attr,
 	NULL,
 };
 
@@ -450,7 +507,12 @@ static int __init ipl_init(void)
 	}
 	switch (ipl_info.type) {
 	case IPL_TYPE_CCW:
-		rc = sysfs_create_group(&ipl_kset->kobj, &ipl_ccw_attr_group);
+		if (MACHINE_IS_VM)
+			rc = sysfs_create_group(&ipl_kset->kobj,
+						&ipl_ccw_attr_group_vm);
+		else
+			rc = sysfs_create_group(&ipl_kset->kobj,
+						&ipl_ccw_attr_group_lpar);
 		break;
 	case IPL_TYPE_FCP:
 	case IPL_TYPE_FCP_DUMP:
@@ -481,6 +543,83 @@ static struct shutdown_action __refdata ipl_action = {
  * reipl shutdown action: Reboot Linux on shutdown.
  */
 
+/* VM IPL PARM attributes */
+static ssize_t reipl_generic_vmparm_show(struct ipl_parameter_block *ipb,
+					  char *page)
+{
+	char vmparm[DIAG308_VMPARM_SIZE + 1] = {};
+
+	reipl_get_ascii_vmparm(vmparm, ipb);
+	return sprintf(page, "%s\n", vmparm);
+}
+
+static ssize_t reipl_generic_vmparm_store(struct ipl_parameter_block *ipb,
+					  size_t vmparm_max,
+					  const char *buf, size_t len)
+{
+	int i, ip_len;
+
+	/* ignore trailing newline */
+	ip_len = len;
+	if ((len > 0) && (buf[len - 1] == '\n'))
+		ip_len--;
+
+	if (ip_len > vmparm_max)
+		return -EINVAL;
+
+	/* parm is used to store kernel options, check for common chars */
+	for (i = 0; i < ip_len; i++)
+		if (!(isalnum(buf[i]) || isascii(buf[i]) || isprint(buf[i])))
+			return -EINVAL;
+
+	memset(ipb->ipl_info.ccw.vm_parm, 0, DIAG308_VMPARM_SIZE);
+	ipb->ipl_info.ccw.vm_parm_len = ip_len;
+	if (ip_len > 0) {
+		ipb->ipl_info.ccw.vm_flags |= DIAG308_VM_FLAGS_VP_VALID;
+		memcpy(ipb->ipl_info.ccw.vm_parm, buf, ip_len);
+		ASCEBC(ipb->ipl_info.ccw.vm_parm, ip_len);
+	} else {
+		ipb->ipl_info.ccw.vm_flags &= ~DIAG308_VM_FLAGS_VP_VALID;
+	}
+
+	return len;
+}
+
+/* NSS wrapper */
+static ssize_t reipl_nss_vmparm_show(struct kobject *kobj,
+				     struct kobj_attribute *attr, char *page)
+{
+	return reipl_generic_vmparm_show(reipl_block_nss, page);
+}
+
+static ssize_t reipl_nss_vmparm_store(struct kobject *kobj,
+				      struct kobj_attribute *attr,
+				      const char *buf, size_t len)
+{
+	return reipl_generic_vmparm_store(reipl_block_nss, 56, buf, len);
+}
+
+/* CCW wrapper */
+static ssize_t reipl_ccw_vmparm_show(struct kobject *kobj,
+				     struct kobj_attribute *attr, char *page)
+{
+	return reipl_generic_vmparm_show(reipl_block_ccw, page);
+}
+
+static ssize_t reipl_ccw_vmparm_store(struct kobject *kobj,
+				      struct kobj_attribute *attr,
+				      const char *buf, size_t len)
+{
+	return reipl_generic_vmparm_store(reipl_block_ccw, 64, buf, len);
+}
+
+static struct kobj_attribute sys_reipl_nss_vmparm_attr =
+	__ATTR(parm, S_IRUGO | S_IWUSR, reipl_nss_vmparm_show,
+					reipl_nss_vmparm_store);
+static struct kobj_attribute sys_reipl_ccw_vmparm_attr =
+	__ATTR(parm, S_IRUGO | S_IWUSR, reipl_ccw_vmparm_show,
+					reipl_ccw_vmparm_store);
+
 /* FCP reipl device attributes */
 
 DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%016llx\n",
@@ -513,27 +652,26 @@ static struct attribute_group reipl_fcp_attr_group = {
 DEFINE_IPL_ATTR_RW(reipl_ccw, device, "0.0.%04llx\n", "0.0.%llx\n",
 	reipl_block_ccw->ipl_info.ccw.devno);
 
-static void reipl_get_ascii_loadparm(char *loadparm)
+static void reipl_get_ascii_loadparm(char *loadparm,
+				     struct ipl_parameter_block *ibp)
 {
-	memcpy(loadparm, &reipl_block_ccw->ipl_info.ccw.load_param,
-	       LOADPARM_LEN);
+	memcpy(loadparm, ibp->ipl_info.ccw.load_parm, LOADPARM_LEN);
 	EBCASC(loadparm, LOADPARM_LEN);
 	loadparm[LOADPARM_LEN] = 0;
 	strstrip(loadparm);
 }
 
-static ssize_t reipl_ccw_loadparm_show(struct kobject *kobj,
-				       struct kobj_attribute *attr, char *page)
+static ssize_t reipl_generic_loadparm_show(struct ipl_parameter_block *ipb,
+					   char *page)
 {
 	char buf[LOADPARM_LEN + 1];
 
-	reipl_get_ascii_loadparm(buf);
+	reipl_get_ascii_loadparm(buf, ipb);
 	return sprintf(page, "%s\n", buf);
 }
 
-static ssize_t reipl_ccw_loadparm_store(struct kobject *kobj,
-					struct kobj_attribute *attr,
-					const char *buf, size_t len)
+static ssize_t reipl_generic_loadparm_store(struct ipl_parameter_block *ipb,
+					    const char *buf, size_t len)
 {
 	int i, lp_len;
 
@@ -552,35 +690,128 @@ static ssize_t reipl_ccw_loadparm_store(struct kobject *kobj,
 		return -EINVAL;
 	}
 	/* initialize loadparm with blanks */
-	memset(&reipl_block_ccw->ipl_info.ccw.load_param, ' ', LOADPARM_LEN);
+	memset(ipb->ipl_info.ccw.load_parm, ' ', LOADPARM_LEN);
 	/* copy and convert to ebcdic */
-	memcpy(&reipl_block_ccw->ipl_info.ccw.load_param, buf, lp_len);
-	ASCEBC(reipl_block_ccw->ipl_info.ccw.load_param, LOADPARM_LEN);
+	memcpy(ipb->ipl_info.ccw.load_parm, buf, lp_len);
+	ASCEBC(ipb->ipl_info.ccw.load_parm, LOADPARM_LEN);
 	return len;
 }
 
+/* NSS wrapper */
+static ssize_t reipl_nss_loadparm_show(struct kobject *kobj,
+				       struct kobj_attribute *attr, char *page)
+{
+	return reipl_generic_loadparm_show(reipl_block_nss, page);
+}
+
+static ssize_t reipl_nss_loadparm_store(struct kobject *kobj,
+					struct kobj_attribute *attr,
+					const char *buf, size_t len)
+{
+	return reipl_generic_loadparm_store(reipl_block_nss, buf, len);
+}
+
+/* CCW wrapper */
+static ssize_t reipl_ccw_loadparm_show(struct kobject *kobj,
+				       struct kobj_attribute *attr, char *page)
+{
+	return reipl_generic_loadparm_show(reipl_block_ccw, page);
+}
+
+static ssize_t reipl_ccw_loadparm_store(struct kobject *kobj,
+					struct kobj_attribute *attr,
+					const char *buf, size_t len)
+{
+	return reipl_generic_loadparm_store(reipl_block_ccw, buf, len);
+}
+
 static struct kobj_attribute sys_reipl_ccw_loadparm_attr =
-	__ATTR(loadparm, 0644, reipl_ccw_loadparm_show,
-	       reipl_ccw_loadparm_store);
+	__ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_ccw_loadparm_show,
+					    reipl_ccw_loadparm_store);
 
-static struct attribute *reipl_ccw_attrs[] = {
+static struct attribute *reipl_ccw_attrs_vm[] = {
 	&sys_reipl_ccw_device_attr.attr,
 	&sys_reipl_ccw_loadparm_attr.attr,
+	&sys_reipl_ccw_vmparm_attr.attr,
 	NULL,
 };
 
-static struct attribute_group reipl_ccw_attr_group = {
+static struct attribute *reipl_ccw_attrs_lpar[] = {
+	&sys_reipl_ccw_device_attr.attr,
+	&sys_reipl_ccw_loadparm_attr.attr,
+	NULL,
+};
+
+static struct attribute_group reipl_ccw_attr_group_vm = {
+	.name  = IPL_CCW_STR,
+	.attrs = reipl_ccw_attrs_vm,
+};
+
+static struct attribute_group reipl_ccw_attr_group_lpar = {
 	.name  = IPL_CCW_STR,
-	.attrs = reipl_ccw_attrs,
+	.attrs = reipl_ccw_attrs_lpar,
 };
 
 
 /* NSS reipl device attributes */
+static void reipl_get_ascii_nss_name(char *dst,
+				     struct ipl_parameter_block *ipb)
+{
+	memcpy(dst, ipb->ipl_info.ccw.nss_name, NSS_NAME_SIZE);
+	EBCASC(dst, NSS_NAME_SIZE);
+	dst[NSS_NAME_SIZE] = 0;
+}
+
+static ssize_t reipl_nss_name_show(struct kobject *kobj,
+				   struct kobj_attribute *attr, char *page)
+{
+	char nss_name[NSS_NAME_SIZE + 1] = {};
 
-DEFINE_IPL_ATTR_STR_RW(reipl_nss, name, "%s\n", "%s\n", reipl_nss_name);
+	reipl_get_ascii_nss_name(nss_name, reipl_block_nss);
+	return sprintf(page, "%s\n", nss_name);
+}
+
+static ssize_t reipl_nss_name_store(struct kobject *kobj,
+				    struct kobj_attribute *attr,
+				    const char *buf, size_t len)
+{
+	int nss_len;
+
+	/* ignore trailing newline */
+	nss_len = len;
+	if ((len > 0) && (buf[len - 1] == '\n'))
+		nss_len--;
+
+	if (nss_len > NSS_NAME_SIZE)
+		return -EINVAL;
+
+	memset(reipl_block_nss->ipl_info.ccw.nss_name, 0x40, NSS_NAME_SIZE);
+	if (nss_len > 0) {
+		reipl_block_nss->ipl_info.ccw.vm_flags |=
+			DIAG308_VM_FLAGS_NSS_VALID;
+		memcpy(reipl_block_nss->ipl_info.ccw.nss_name, buf, nss_len);
+		ASCEBC(reipl_block_nss->ipl_info.ccw.nss_name, nss_len);
+		EBC_TOUPPER(reipl_block_nss->ipl_info.ccw.nss_name, nss_len);
+	} else {
+		reipl_block_nss->ipl_info.ccw.vm_flags &=
+			~DIAG308_VM_FLAGS_NSS_VALID;
+	}
+
+	return len;
+}
+
+static struct kobj_attribute sys_reipl_nss_name_attr =
+	__ATTR(name, S_IRUGO | S_IWUSR, reipl_nss_name_show,
+					reipl_nss_name_store);
+
+static struct kobj_attribute sys_reipl_nss_loadparm_attr =
+	__ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_nss_loadparm_show,
+					    reipl_nss_loadparm_store);
 
 static struct attribute *reipl_nss_attrs[] = {
 	&sys_reipl_nss_name_attr.attr,
+	&sys_reipl_nss_loadparm_attr.attr,
+	&sys_reipl_nss_vmparm_attr.attr,
 	NULL,
 };
 
@@ -617,7 +848,10 @@ static int reipl_set_type(enum ipl_type type)
 		reipl_method = REIPL_METHOD_FCP_DUMP;
 		break;
 	case IPL_TYPE_NSS:
-		reipl_method = REIPL_METHOD_NSS;
+		if (diag308_set_works)
+			reipl_method = REIPL_METHOD_NSS_DIAG;
+		else
+			reipl_method = REIPL_METHOD_NSS;
 		break;
 	case IPL_TYPE_UNKNOWN:
 		reipl_method = REIPL_METHOD_DEFAULT;
@@ -655,11 +889,38 @@ static struct kobj_attribute reipl_type_attr =
 
 static struct kset *reipl_kset;
 
+static void get_ipl_string(char *dst, struct ipl_parameter_block *ipb,
+			   const enum ipl_method m)
+{
+	char loadparm[LOADPARM_LEN + 1] = {};
+	char vmparm[DIAG308_VMPARM_SIZE + 1] = {};
+	char nss_name[NSS_NAME_SIZE + 1] = {};
+	size_t pos = 0;
+
+	reipl_get_ascii_loadparm(loadparm, ipb);
+	reipl_get_ascii_nss_name(nss_name, ipb);
+	reipl_get_ascii_vmparm(vmparm, ipb);
+
+	switch (m) {
+	case REIPL_METHOD_CCW_VM:
+		pos = sprintf(dst, "IPL %X CLEAR", ipb->ipl_info.ccw.devno);
+		break;
+	case REIPL_METHOD_NSS:
+		pos = sprintf(dst, "IPL %s", nss_name);
+		break;
+	default:
+		break;
+	}
+	if (strlen(loadparm) > 0)
+		pos += sprintf(dst + pos, " LOADPARM '%s'", loadparm);
+	if (strlen(vmparm) > 0)
+		sprintf(dst + pos, " PARM %s", vmparm);
+}
+
 static void reipl_run(struct shutdown_trigger *trigger)
 {
 	struct ccw_dev_id devid;
-	static char buf[100];
-	char loadparm[LOADPARM_LEN + 1];
+	static char buf[128];
 
 	switch (reipl_method) {
 	case REIPL_METHOD_CCW_CIO:
@@ -668,13 +929,7 @@ static void reipl_run(struct shutdown_trigger *trigger)
 		reipl_ccw_dev(&devid);
 		break;
 	case REIPL_METHOD_CCW_VM:
-		reipl_get_ascii_loadparm(loadparm);
-		if (strlen(loadparm) == 0)
-			sprintf(buf, "IPL %X CLEAR",
-				reipl_block_ccw->ipl_info.ccw.devno);
-		else
-			sprintf(buf, "IPL %X CLEAR LOADPARM '%s'",
-				reipl_block_ccw->ipl_info.ccw.devno, loadparm);
+		get_ipl_string(buf, reipl_block_ccw, REIPL_METHOD_CCW_VM);
 		__cpcmd(buf, NULL, 0, NULL);
 		break;
 	case REIPL_METHOD_CCW_DIAG:
@@ -691,8 +946,12 @@ static void reipl_run(struct shutdown_trigger *trigger)
 	case REIPL_METHOD_FCP_RO_VM:
 		__cpcmd("IPL", NULL, 0, NULL);
 		break;
+	case REIPL_METHOD_NSS_DIAG:
+		diag308(DIAG308_SET, reipl_block_nss);
+		diag308(DIAG308_IPL, NULL);
+		break;
 	case REIPL_METHOD_NSS:
-		sprintf(buf, "IPL %s", reipl_nss_name);
+		get_ipl_string(buf, reipl_block_nss, REIPL_METHOD_NSS);
 		__cpcmd(buf, NULL, 0, NULL);
 		break;
 	case REIPL_METHOD_DEFAULT:
@@ -707,16 +966,36 @@ static void reipl_run(struct shutdown_trigger *trigger)
 	disabled_wait((unsigned long) __builtin_return_address(0));
 }
 
-static void __init reipl_probe(void)
+static void reipl_block_ccw_init(struct ipl_parameter_block *ipb)
 {
-	void *buffer;
+	ipb->hdr.len = IPL_PARM_BLK_CCW_LEN;
+	ipb->hdr.version = IPL_PARM_BLOCK_VERSION;
+	ipb->hdr.blk0_len = IPL_PARM_BLK0_CCW_LEN;
+	ipb->hdr.pbt = DIAG308_IPL_TYPE_CCW;
+}
 
-	buffer = (void *) get_zeroed_page(GFP_KERNEL);
-	if (!buffer)
-		return;
-	if (diag308(DIAG308_STORE, buffer) == DIAG308_RC_OK)
-		diag308_set_works = 1;
-	free_page((unsigned long)buffer);
+static void reipl_block_ccw_fill_parms(struct ipl_parameter_block *ipb)
+{
+	/* LOADPARM */
+	/* check if read scp info worked and set loadparm */
+	if (sclp_ipl_info.is_valid)
+		memcpy(ipb->ipl_info.ccw.load_parm,
+				&sclp_ipl_info.loadparm, LOADPARM_LEN);
+	else
+		/* read scp info failed: set empty loadparm (EBCDIC blanks) */
+		memset(ipb->ipl_info.ccw.load_parm, 0x40, LOADPARM_LEN);
+	ipb->hdr.flags = DIAG308_FLAGS_LP_VALID;
+
+	/* VM PARM */
+	if (MACHINE_IS_VM && diag308_set_works &&
+	    (ipl_block.ipl_info.ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID)) {
+
+		ipb->ipl_info.ccw.vm_flags |= DIAG308_VM_FLAGS_VP_VALID;
+		ipb->ipl_info.ccw.vm_parm_len =
+					ipl_block.ipl_info.ccw.vm_parm_len;
+		memcpy(ipb->ipl_info.ccw.vm_parm,
+		       ipl_block.ipl_info.ccw.vm_parm, DIAG308_VMPARM_SIZE);
+	}
 }
 
 static int __init reipl_nss_init(void)
@@ -725,10 +1004,31 @@ static int __init reipl_nss_init(void)
 
 	if (!MACHINE_IS_VM)
 		return 0;
+
+	reipl_block_nss = (void *) get_zeroed_page(GFP_KERNEL);
+	if (!reipl_block_nss)
+		return -ENOMEM;
+
+	if (!diag308_set_works)
+		sys_reipl_nss_vmparm_attr.attr.mode = S_IRUGO;
+
 	rc = sysfs_create_group(&reipl_kset->kobj, &reipl_nss_attr_group);
 	if (rc)
 		return rc;
-	strncpy(reipl_nss_name, kernel_nss_name, NSS_NAME_SIZE + 1);
+
+	reipl_block_ccw_init(reipl_block_nss);
+	if (ipl_info.type == IPL_TYPE_NSS) {
+		memset(reipl_block_nss->ipl_info.ccw.nss_name,
+			' ', NSS_NAME_SIZE);
+		memcpy(reipl_block_nss->ipl_info.ccw.nss_name,
+			kernel_nss_name, strlen(kernel_nss_name));
+		ASCEBC(reipl_block_nss->ipl_info.ccw.nss_name, NSS_NAME_SIZE);
+		reipl_block_nss->ipl_info.ccw.vm_flags |=
+			DIAG308_VM_FLAGS_NSS_VALID;
+
+		reipl_block_ccw_fill_parms(reipl_block_nss);
+	}
+
 	reipl_capabilities |= IPL_TYPE_NSS;
 	return 0;
 }
@@ -740,28 +1040,27 @@ static int __init reipl_ccw_init(void)
 	reipl_block_ccw = (void *) get_zeroed_page(GFP_KERNEL);
 	if (!reipl_block_ccw)
 		return -ENOMEM;
-	rc = sysfs_create_group(&reipl_kset->kobj, &reipl_ccw_attr_group);
-	if (rc) {
-		free_page((unsigned long)reipl_block_ccw);
-		return rc;
+
+	if (MACHINE_IS_VM) {
+		if (!diag308_set_works)
+			sys_reipl_ccw_vmparm_attr.attr.mode = S_IRUGO;
+		rc = sysfs_create_group(&reipl_kset->kobj,
+					&reipl_ccw_attr_group_vm);
+	} else {
+		if(!diag308_set_works)
+			sys_reipl_ccw_loadparm_attr.attr.mode = S_IRUGO;
+		rc = sysfs_create_group(&reipl_kset->kobj,
+					&reipl_ccw_attr_group_lpar);
 	}
-	reipl_block_ccw->hdr.len = IPL_PARM_BLK_CCW_LEN;
-	reipl_block_ccw->hdr.version = IPL_PARM_BLOCK_VERSION;
-	reipl_block_ccw->hdr.blk0_len = IPL_PARM_BLK0_CCW_LEN;
-	reipl_block_ccw->hdr.pbt = DIAG308_IPL_TYPE_CCW;
-	reipl_block_ccw->hdr.flags = DIAG308_FLAGS_LP_VALID;
-	/* check if read scp info worked and set loadparm */
-	if (sclp_ipl_info.is_valid)
-		memcpy(reipl_block_ccw->ipl_info.ccw.load_param,
-		       &sclp_ipl_info.loadparm, LOADPARM_LEN);
-	else
-		/* read scp info failed: set empty loadparm (EBCDIC blanks) */
-		memset(reipl_block_ccw->ipl_info.ccw.load_param, 0x40,
-		       LOADPARM_LEN);
-	if (!MACHINE_IS_VM && !diag308_set_works)
-		sys_reipl_ccw_loadparm_attr.attr.mode = S_IRUGO;
-	if (ipl_info.type == IPL_TYPE_CCW)
+	if (rc)
+		return rc;
+
+	reipl_block_ccw_init(reipl_block_ccw);
+	if (ipl_info.type == IPL_TYPE_CCW) {
 		reipl_block_ccw->ipl_info.ccw.devno = ipl_devno;
+		reipl_block_ccw_fill_parms(reipl_block_ccw);
+	}
+
 	reipl_capabilities |= IPL_TYPE_CCW;
 	return 0;
 }
@@ -1298,7 +1597,6 @@ static void __init shutdown_actions_init(void)
 
 static int __init s390_ipl_init(void)
 {
-	reipl_probe();
 	sclp_get_ipl_info(&sclp_ipl_info);
 	shutdown_actions_init();
 	shutdown_triggers_init();
@@ -1405,6 +1703,12 @@ void __init setup_ipl(void)
 	atomic_notifier_chain_register(&panic_notifier_list, &on_panic_nb);
 }
 
+void __init ipl_update_parameters(void)
+{
+	if (diag308(DIAG308_STORE, &ipl_block) == DIAG308_RC_OK)
+		diag308_set_works = 1;
+}
+
 void __init ipl_save_parameters(void)
 {
 	struct cio_iplinfo iplinfo;
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index ed04d1372d5d..288ad490a6dd 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -41,10 +41,8 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
 	if (is_prohibited_opcode((kprobe_opcode_t *) p->addr))
 		return -EINVAL;
 
-	if ((unsigned long)p->addr & 0x01) {
-		printk("Attempt to register kprobe at an unaligned address\n");
+	if ((unsigned long)p->addr & 0x01)
 		return -EINVAL;
-		}
 
 	/* Use the get_insn_slot() facility for correctness */
 	if (!(p->ainsn.insn = get_insn_slot()))
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 3c77dd36994c..131d7ee8b416 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -52,7 +52,6 @@ void machine_kexec_cleanup(struct kimage *image)
 
 void machine_shutdown(void)
 {
-	printk(KERN_INFO "kexec: machine_shutdown called\n");
 }
 
 void machine_kexec(struct kimage *image)
diff --git a/arch/s390/kernel/mem_detect.c b/arch/s390/kernel/mem_detect.c
new file mode 100644
index 000000000000..18ed7abe16c5
--- /dev/null
+++ b/arch/s390/kernel/mem_detect.c
@@ -0,0 +1,100 @@
+/*
+ *    Copyright IBM Corp. 2008
+ *    Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <asm/ipl.h>
+#include <asm/sclp.h>
+#include <asm/setup.h>
+
+static int memory_fast_detect(struct mem_chunk *chunk)
+{
+	unsigned long val0 = 0;
+	unsigned long val1 = 0xc;
+	int rc = -EOPNOTSUPP;
+
+	if (ipl_flags & IPL_NSS_VALID)
+		return -EOPNOTSUPP;
+	asm volatile(
+		"	diag	%1,%2,0x260\n"
+		"0:	lhi	%0,0\n"
+		"1:\n"
+		EX_TABLE(0b,1b)
+		: "+d" (rc), "+d" (val0), "+d" (val1) : : "cc");
+
+	if (rc || val0 != val1)
+		return -EOPNOTSUPP;
+	chunk->size = val0 + 1;
+	return 0;
+}
+
+static inline int tprot(unsigned long addr)
+{
+	int rc = -EFAULT;
+
+	asm volatile(
+		"	tprot	0(%1),0\n"
+		"0:	ipm	%0\n"
+		"	srl	%0,28\n"
+		"1:\n"
+		EX_TABLE(0b,1b)
+		: "+d" (rc) : "a" (addr) : "cc");
+	return rc;
+}
+
+#define ADDR2G (1ULL << 31)
+
+static void find_memory_chunks(struct mem_chunk chunk[])
+{
+	unsigned long long memsize, rnmax, rzm;
+	unsigned long addr = 0, size;
+	int i = 0, type;
+
+	rzm = sclp_get_rzm();
+	rnmax = sclp_get_rnmax();
+	memsize = rzm * rnmax;
+	if (!rzm)
+		rzm = 1ULL << 17;
+	if (sizeof(long) == 4) {
+		rzm = min(ADDR2G, rzm);
+		memsize = memsize ? min(ADDR2G, memsize) : ADDR2G;
+	}
+	do {
+		size = 0;
+		type = tprot(addr);
+		do {
+			size += rzm;
+			if (memsize && addr + size >= memsize)
+				break;
+		} while (type == tprot(addr + size));
+		if (type == CHUNK_READ_WRITE || type == CHUNK_READ_ONLY) {
+			chunk[i].addr = addr;
+			chunk[i].size = size;
+			chunk[i].type = type;
+			i++;
+		}
+		addr += size;
+	} while (addr < memsize && i < MEMORY_CHUNKS);
+}
+
+void detect_memory_layout(struct mem_chunk chunk[])
+{
+	unsigned long flags, cr0;
+
+	memset(chunk, 0, MEMORY_CHUNKS * sizeof(struct mem_chunk));
+	if (memory_fast_detect(&chunk[0]) == 0)
+		return;
+	/* Disable IRQs, DAT and low address protection so tprot does the
+	 * right thing and we don't get scheduled away with low address
+	 * protection disabled.
+	 */
+	flags = __raw_local_irq_stnsm(0xf8);
+	__ctl_store(cr0, 0, 0);
+	__ctl_clear_bit(0, 28);
+	find_memory_chunks(chunk);
+	__ctl_load(cr0, 0, 0);
+	__raw_local_irq_ssm(flags);
+}
+EXPORT_SYMBOL(detect_memory_layout);
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 7920861109d2..85defd01d293 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -75,46 +75,19 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
 	return sf->gprs[8];
 }
 
-/*
- * Need to know about CPUs going idle?
- */
-static ATOMIC_NOTIFIER_HEAD(idle_chain);
 DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
 
-int register_idle_notifier(struct notifier_block *nb)
-{
-	return atomic_notifier_chain_register(&idle_chain, nb);
-}
-EXPORT_SYMBOL(register_idle_notifier);
-
-int unregister_idle_notifier(struct notifier_block *nb)
-{
-	return atomic_notifier_chain_unregister(&idle_chain, nb);
-}
-EXPORT_SYMBOL(unregister_idle_notifier);
-
 static int s390_idle_enter(void)
 {
 	struct s390_idle_data *idle;
-	int nr_calls = 0;
-	void *hcpu;
-	int rc;
 
-	hcpu = (void *)(long)smp_processor_id();
-	rc = __atomic_notifier_call_chain(&idle_chain, S390_CPU_IDLE, hcpu, -1,
-					  &nr_calls);
-	if (rc == NOTIFY_BAD) {
-		nr_calls--;
-		__atomic_notifier_call_chain(&idle_chain, S390_CPU_NOT_IDLE,
-					     hcpu, nr_calls, NULL);
-		return rc;
-	}
 	idle = &__get_cpu_var(s390_idle);
 	spin_lock(&idle->lock);
 	idle->idle_count++;
 	idle->in_idle = 1;
 	idle->idle_enter = get_clock();
 	spin_unlock(&idle->lock);
+	vtime_stop_cpu_timer();
 	return NOTIFY_OK;
 }
 
@@ -122,13 +95,12 @@ void s390_idle_leave(void)
 {
 	struct s390_idle_data *idle;
 
+	vtime_start_cpu_timer();
 	idle = &__get_cpu_var(s390_idle);
 	spin_lock(&idle->lock);
 	idle->idle_time += get_clock() - idle->idle_enter;
 	idle->in_idle = 0;
 	spin_unlock(&idle->lock);
-	atomic_notifier_call_chain(&idle_chain, S390_CPU_NOT_IDLE,
-				   (void *)(long) smp_processor_id());
 }
 
 extern void s390_handle_mcck(void);
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 35827b9bd4d1..2815bfe348a6 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -33,6 +33,8 @@
 #include <linux/security.h>
 #include <linux/audit.h>
 #include <linux/signal.h>
+#include <linux/elf.h>
+#include <linux/regset.h>
 
 #include <asm/segment.h>
 #include <asm/page.h>
@@ -47,6 +49,11 @@
 #include "compat_ptrace.h"
 #endif
 
+enum s390_regset {
+	REGSET_GENERAL,
+	REGSET_FP,
+};
+
 static void
 FixPerRegisters(struct task_struct *task)
 {
@@ -126,24 +133,10 @@ ptrace_disable(struct task_struct *child)
  * struct user contain pad bytes that should be read as zeroes.
  * Lovely...
  */
-static int
-peek_user(struct task_struct *child, addr_t addr, addr_t data)
+static unsigned long __peek_user(struct task_struct *child, addr_t addr)
 {
 	struct user *dummy = NULL;
-	addr_t offset, tmp, mask;
-
-	/*
-	 * Stupid gdb peeks/pokes the access registers in 64 bit with
-	 * an alignment of 4. Programmers from hell...
-	 */
-	mask = __ADDR_MASK;
-#ifdef CONFIG_64BIT
-	if (addr >= (addr_t) &dummy->regs.acrs &&
-	    addr < (addr_t) &dummy->regs.orig_gpr2)
-		mask = 3;
-#endif
-	if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK)
-		return -EIO;
+	addr_t offset, tmp;
 
 	if (addr < (addr_t) &dummy->regs.acrs) {
 		/*
@@ -197,24 +190,18 @@ peek_user(struct task_struct *child, addr_t addr, addr_t data)
 	} else
 		tmp = 0;
 
-	return put_user(tmp, (addr_t __user *) data);
+	return tmp;
 }
 
-/*
- * Write a word to the user area of a process at location addr. This
- * operation does have an additional problem compared to peek_user.
- * Stores to the program status word and on the floating point
- * control register needs to get checked for validity.
- */
 static int
-poke_user(struct task_struct *child, addr_t addr, addr_t data)
+peek_user(struct task_struct *child, addr_t addr, addr_t data)
 {
 	struct user *dummy = NULL;
-	addr_t offset, mask;
+	addr_t tmp, mask;
 
 	/*
 	 * Stupid gdb peeks/pokes the access registers in 64 bit with
-	 * an alignment of 4. Programmers from hell indeed...
+	 * an alignment of 4. Programmers from hell...
 	 */
 	mask = __ADDR_MASK;
 #ifdef CONFIG_64BIT
@@ -225,6 +212,21 @@ poke_user(struct task_struct *child, addr_t addr, addr_t data)
 	if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK)
 		return -EIO;
 
+	tmp = __peek_user(child, addr);
+	return put_user(tmp, (addr_t __user *) data);
+}
+
+/*
+ * Write a word to the user area of a process at location addr. This
+ * operation does have an additional problem compared to peek_user.
+ * Stores to the program status word and on the floating point
+ * control register needs to get checked for validity.
+ */
+static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
+{
+	struct user *dummy = NULL;
+	addr_t offset;
+
 	if (addr < (addr_t) &dummy->regs.acrs) {
 		/*
 		 * psw and gprs are stored on the stack
@@ -292,6 +294,28 @@ poke_user(struct task_struct *child, addr_t addr, addr_t data)
 	return 0;
 }
 
+static int
+poke_user(struct task_struct *child, addr_t addr, addr_t data)
+{
+	struct user *dummy = NULL;
+	addr_t mask;
+
+	/*
+	 * Stupid gdb peeks/pokes the access registers in 64 bit with
+	 * an alignment of 4. Programmers from hell indeed...
+	 */
+	mask = __ADDR_MASK;
+#ifdef CONFIG_64BIT
+	if (addr >= (addr_t) &dummy->regs.acrs &&
+	    addr < (addr_t) &dummy->regs.orig_gpr2)
+		mask = 3;
+#endif
+	if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK)
+		return -EIO;
+
+	return __poke_user(child, addr, data);
+}
+
 long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 {
 	ptrace_area parea; 
@@ -367,18 +391,13 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 /*
  * Same as peek_user but for a 31 bit program.
  */
-static int
-peek_user_emu31(struct task_struct *child, addr_t addr, addr_t data)
+static u32 __peek_user_compat(struct task_struct *child, addr_t addr)
 {
 	struct user32 *dummy32 = NULL;
 	per_struct32 *dummy_per32 = NULL;
 	addr_t offset;
 	__u32 tmp;
 
-	if (!test_thread_flag(TIF_31BIT) ||
-	    (addr & 3) || addr > sizeof(struct user) - 3)
-		return -EIO;
-
 	if (addr < (addr_t) &dummy32->regs.acrs) {
 		/*
 		 * psw and gprs are stored on the stack
@@ -435,25 +454,32 @@ peek_user_emu31(struct task_struct *child, addr_t addr, addr_t data)
 	} else
 		tmp = 0;
 
+	return tmp;
+}
+
+static int peek_user_compat(struct task_struct *child,
+			    addr_t addr, addr_t data)
+{
+	__u32 tmp;
+
+	if (!test_thread_flag(TIF_31BIT) ||
+	    (addr & 3) || addr > sizeof(struct user) - 3)
+		return -EIO;
+
+	tmp = __peek_user_compat(child, addr);
 	return put_user(tmp, (__u32 __user *) data);
 }
 
 /*
  * Same as poke_user but for a 31 bit program.
  */
-static int
-poke_user_emu31(struct task_struct *child, addr_t addr, addr_t data)
+static int __poke_user_compat(struct task_struct *child,
+			      addr_t addr, addr_t data)
 {
 	struct user32 *dummy32 = NULL;
 	per_struct32 *dummy_per32 = NULL;
+	__u32 tmp = (__u32) data;
 	addr_t offset;
-	__u32 tmp;
-
-	if (!test_thread_flag(TIF_31BIT) ||
-	    (addr & 3) || addr > sizeof(struct user32) - 3)
-		return -EIO;
-
-	tmp = (__u32) data;
 
 	if (addr < (addr_t) &dummy32->regs.acrs) {
 		/*
@@ -528,6 +554,16 @@ poke_user_emu31(struct task_struct *child, addr_t addr, addr_t data)
 	return 0;
 }
 
+static int poke_user_compat(struct task_struct *child,
+			    addr_t addr, addr_t data)
+{
+	if (!test_thread_flag(TIF_31BIT) ||
+	    (addr & 3) || addr > sizeof(struct user32) - 3)
+		return -EIO;
+
+	return __poke_user_compat(child, addr, data);
+}
+
 long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 			compat_ulong_t caddr, compat_ulong_t cdata)
 {
@@ -539,11 +575,11 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 	switch (request) {
 	case PTRACE_PEEKUSR:
 		/* read the word at location addr in the USER area. */
-		return peek_user_emu31(child, addr, data);
+		return peek_user_compat(child, addr, data);
 
 	case PTRACE_POKEUSR:
 		/* write the word at location addr in the USER area */
-		return poke_user_emu31(child, addr, data);
+		return poke_user_compat(child, addr, data);
 
 	case PTRACE_PEEKUSR_AREA:
 	case PTRACE_POKEUSR_AREA:
@@ -555,13 +591,13 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 		copied = 0;
 		while (copied < parea.len) {
 			if (request == PTRACE_PEEKUSR_AREA)
-				ret = peek_user_emu31(child, addr, data);
+				ret = peek_user_compat(child, addr, data);
 			else {
 				__u32 utmp;
 				if (get_user(utmp,
 					     (__u32 __force __user *) data))
 					return -EFAULT;
-				ret = poke_user_emu31(child, addr, utmp);
+				ret = poke_user_compat(child, addr, utmp);
 			}
 			if (ret)
 				return ret;
@@ -610,3 +646,240 @@ syscall_trace(struct pt_regs *regs, int entryexit)
 				    regs->gprs[2], regs->orig_gpr2, regs->gprs[3],
 				    regs->gprs[4], regs->gprs[5]);
 }
+
+/*
+ * user_regset definitions.
+ */
+
+static int s390_regs_get(struct task_struct *target,
+			 const struct user_regset *regset,
+			 unsigned int pos, unsigned int count,
+			 void *kbuf, void __user *ubuf)
+{
+	if (target == current)
+		save_access_regs(target->thread.acrs);
+
+	if (kbuf) {
+		unsigned long *k = kbuf;
+		while (count > 0) {
+			*k++ = __peek_user(target, pos);
+			count -= sizeof(*k);
+			pos += sizeof(*k);
+		}
+	} else {
+		unsigned long __user *u = ubuf;
+		while (count > 0) {
+			if (__put_user(__peek_user(target, pos), u++))
+				return -EFAULT;
+			count -= sizeof(*u);
+			pos += sizeof(*u);
+		}
+	}
+	return 0;
+}
+
+static int s390_regs_set(struct task_struct *target,
+			 const struct user_regset *regset,
+			 unsigned int pos, unsigned int count,
+			 const void *kbuf, const void __user *ubuf)
+{
+	int rc = 0;
+
+	if (target == current)
+		save_access_regs(target->thread.acrs);
+
+	if (kbuf) {
+		const unsigned long *k = kbuf;
+		while (count > 0 && !rc) {
+			rc = __poke_user(target, pos, *k++);
+			count -= sizeof(*k);
+			pos += sizeof(*k);
+		}
+	} else {
+		const unsigned long  __user *u = ubuf;
+		while (count > 0 && !rc) {
+			unsigned long word;
+			rc = __get_user(word, u++);
+			if (rc)
+				break;
+			rc = __poke_user(target, pos, word);
+			count -= sizeof(*u);
+			pos += sizeof(*u);
+		}
+	}
+
+	if (rc == 0 && target == current)
+		restore_access_regs(target->thread.acrs);
+
+	return rc;
+}
+
+static int s390_fpregs_get(struct task_struct *target,
+			   const struct user_regset *regset, unsigned int pos,
+			   unsigned int count, void *kbuf, void __user *ubuf)
+{
+	if (target == current)
+		save_fp_regs(&target->thread.fp_regs);
+
+	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+				   &target->thread.fp_regs, 0, -1);
+}
+
+static int s390_fpregs_set(struct task_struct *target,
+			   const struct user_regset *regset, unsigned int pos,
+			   unsigned int count, const void *kbuf,
+			   const void __user *ubuf)
+{
+	int rc = 0;
+
+	if (target == current)
+		save_fp_regs(&target->thread.fp_regs);
+
+	/* If setting FPC, must validate it first. */
+	if (count > 0 && pos < offsetof(s390_fp_regs, fprs)) {
+		u32 fpc[2] = { target->thread.fp_regs.fpc, 0 };
+		rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &fpc,
+					0, offsetof(s390_fp_regs, fprs));
+		if (rc)
+			return rc;
+		if ((fpc[0] & ~FPC_VALID_MASK) != 0 || fpc[1] != 0)
+			return -EINVAL;
+		target->thread.fp_regs.fpc = fpc[0];
+	}
+
+	if (rc == 0 && count > 0)
+		rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+					target->thread.fp_regs.fprs,
+					offsetof(s390_fp_regs, fprs), -1);
+
+	if (rc == 0 && target == current)
+		restore_fp_regs(&target->thread.fp_regs);
+
+	return rc;
+}
+
+static const struct user_regset s390_regsets[] = {
+	[REGSET_GENERAL] = {
+		.core_note_type = NT_PRSTATUS,
+		.n = sizeof(s390_regs) / sizeof(long),
+		.size = sizeof(long),
+		.align = sizeof(long),
+		.get = s390_regs_get,
+		.set = s390_regs_set,
+	},
+	[REGSET_FP] = {
+		.core_note_type = NT_PRFPREG,
+		.n = sizeof(s390_fp_regs) / sizeof(long),
+		.size = sizeof(long),
+		.align = sizeof(long),
+		.get = s390_fpregs_get,
+		.set = s390_fpregs_set,
+	},
+};
+
+static const struct user_regset_view user_s390_view = {
+	.name = UTS_MACHINE,
+	.e_machine = EM_S390,
+	.regsets = s390_regsets,
+	.n = ARRAY_SIZE(s390_regsets)
+};
+
+#ifdef CONFIG_COMPAT
+static int s390_compat_regs_get(struct task_struct *target,
+				const struct user_regset *regset,
+				unsigned int pos, unsigned int count,
+				void *kbuf, void __user *ubuf)
+{
+	if (target == current)
+		save_access_regs(target->thread.acrs);
+
+	if (kbuf) {
+		compat_ulong_t *k = kbuf;
+		while (count > 0) {
+			*k++ = __peek_user_compat(target, pos);
+			count -= sizeof(*k);
+			pos += sizeof(*k);
+		}
+	} else {
+		compat_ulong_t __user *u = ubuf;
+		while (count > 0) {
+			if (__put_user(__peek_user_compat(target, pos), u++))
+				return -EFAULT;
+			count -= sizeof(*u);
+			pos += sizeof(*u);
+		}
+	}
+	return 0;
+}
+
+static int s390_compat_regs_set(struct task_struct *target,
+				const struct user_regset *regset,
+				unsigned int pos, unsigned int count,
+				const void *kbuf, const void __user *ubuf)
+{
+	int rc = 0;
+
+	if (target == current)
+		save_access_regs(target->thread.acrs);
+
+	if (kbuf) {
+		const compat_ulong_t *k = kbuf;
+		while (count > 0 && !rc) {
+			rc = __poke_user_compat(target, pos, *k++);
+			count -= sizeof(*k);
+			pos += sizeof(*k);
+		}
+	} else {
+		const compat_ulong_t  __user *u = ubuf;
+		while (count > 0 && !rc) {
+			compat_ulong_t word;
+			rc = __get_user(word, u++);
+			if (rc)
+				break;
+			rc = __poke_user_compat(target, pos, word);
+			count -= sizeof(*u);
+			pos += sizeof(*u);
+		}
+	}
+
+	if (rc == 0 && target == current)
+		restore_access_regs(target->thread.acrs);
+
+	return rc;
+}
+
+static const struct user_regset s390_compat_regsets[] = {
+	[REGSET_GENERAL] = {
+		.core_note_type = NT_PRSTATUS,
+		.n = sizeof(s390_compat_regs) / sizeof(compat_long_t),
+		.size = sizeof(compat_long_t),
+		.align = sizeof(compat_long_t),
+		.get = s390_compat_regs_get,
+		.set = s390_compat_regs_set,
+	},
+	[REGSET_FP] = {
+		.core_note_type = NT_PRFPREG,
+		.n = sizeof(s390_fp_regs) / sizeof(compat_long_t),
+		.size = sizeof(compat_long_t),
+		.align = sizeof(compat_long_t),
+		.get = s390_fpregs_get,
+		.set = s390_fpregs_set,
+	},
+};
+
+static const struct user_regset_view user_s390_compat_view = {
+	.name = "s390",
+	.e_machine = EM_S390,
+	.regsets = s390_compat_regsets,
+	.n = ARRAY_SIZE(s390_compat_regsets)
+};
+#endif
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+#ifdef CONFIG_COMPAT
+	if (test_tsk_thread_flag(task, TIF_31BIT))
+		return &user_s390_compat_view;
+#endif
+	return &user_s390_view;
+}
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 2bc70b6e876a..b358e18273b0 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -77,7 +77,7 @@ unsigned long machine_flags;
 unsigned long elf_hwcap = 0;
 char elf_platform[ELF_PLATFORM_SIZE];
 
-struct mem_chunk __meminitdata memory_chunk[MEMORY_CHUNKS];
+struct mem_chunk __initdata memory_chunk[MEMORY_CHUNKS];
 volatile int __cpu_logical_map[NR_CPUS]; /* logical cpu to cpu address */
 static unsigned long __initdata memory_end;
 
@@ -205,12 +205,6 @@ static void __init conmode_default(void)
 			SET_CONSOLE_SCLP;
 #endif
 		}
-        } else if (MACHINE_IS_P390) {
-#if defined(CONFIG_TN3215_CONSOLE)
-		SET_CONSOLE_3215;
-#elif defined(CONFIG_TN3270_CONSOLE)
-		SET_CONSOLE_3270;
-#endif
 	} else {
 #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
 		SET_CONSOLE_SCLP;
@@ -221,18 +215,17 @@ static void __init conmode_default(void)
 #if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_ZFCPDUMP_MODULE)
 static void __init setup_zfcpdump(unsigned int console_devno)
 {
-	static char str[64];
+	static char str[41];
 
 	if (ipl_info.type != IPL_TYPE_FCP_DUMP)
 		return;
 	if (console_devno != -1)
-		sprintf(str, "cio_ignore=all,!0.0.%04x,!0.0.%04x",
+		sprintf(str, " cio_ignore=all,!0.0.%04x,!0.0.%04x",
 			ipl_info.data.fcp.dev_id.devno, console_devno);
 	else
-		sprintf(str, "cio_ignore=all,!0.0.%04x",
+		sprintf(str, " cio_ignore=all,!0.0.%04x",
 			ipl_info.data.fcp.dev_id.devno);
-	strcat(COMMAND_LINE, " ");
-	strcat(COMMAND_LINE, str);
+	strcat(boot_command_line, str);
 	console_loglevel = 2;
 }
 #else
@@ -289,32 +282,6 @@ static int __init early_parse_mem(char *p)
 }
 early_param("mem", early_parse_mem);
 
-/*
- * "ipldelay=XXX[sm]" sets ipl delay in seconds or minutes
- */
-static int __init early_parse_ipldelay(char *p)
-{
-	unsigned long delay = 0;
-
-	delay = simple_strtoul(p, &p, 0);
-
-	switch (*p) {
-	case 's':
-	case 'S':
-		delay *= 1000000;
-		break;
-	case 'm':
-	case 'M':
-		delay *= 60 * 1000000;
-	}
-
-	/* now wait for the requested amount of time */
-	udelay(delay);
-
-	return 0;
-}
-early_param("ipldelay", early_parse_ipldelay);
-
 #ifdef CONFIG_S390_SWITCH_AMODE
 #ifdef CONFIG_PGSTE
 unsigned int switch_amode = 1;
@@ -804,11 +771,9 @@ setup_arch(char **cmdline_p)
 		printk("We are running native (64 bit mode)\n");
 #endif /* CONFIG_64BIT */
 
-	/* Save unparsed command line copy for /proc/cmdline */
-	strlcpy(boot_command_line, COMMAND_LINE, COMMAND_LINE_SIZE);
-
-	*cmdline_p = COMMAND_LINE;
-	*(*cmdline_p + COMMAND_LINE_SIZE - 1) = '\0';
+	/* Have one command line that is parsed and saved in /proc/cmdline */
+	/* boot_command_line has been already set up in early.c */
+	*cmdline_p = boot_command_line;
 
         ROOT_DEV = Root_RAM0;
 
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 1f4228948dc4..b6781030cfbd 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -109,7 +109,7 @@ static void do_call_function(void)
 }
 
 static void __smp_call_function_map(void (*func) (void *info), void *info,
-				    int nonatomic, int wait, cpumask_t map)
+				    int wait, cpumask_t map)
 {
 	struct call_data_struct data;
 	int cpu, local = 0;
@@ -162,7 +162,6 @@ out:
  * smp_call_function:
  * @func: the function to run; this must be fast and non-blocking
  * @info: an arbitrary pointer to pass to the function
- * @nonatomic: unused
  * @wait: if true, wait (atomically) until function has completed on other CPUs
  *
  * Run a function on all other CPUs.
@@ -170,15 +169,14 @@ out:
  * You must not call this function with disabled interrupts, from a
  * hardware interrupt handler or from a bottom half.
  */
-int smp_call_function(void (*func) (void *info), void *info, int nonatomic,
-		      int wait)
+int smp_call_function(void (*func) (void *info), void *info, int wait)
 {
 	cpumask_t map;
 
 	spin_lock(&call_lock);
 	map = cpu_online_map;
 	cpu_clear(smp_processor_id(), map);
-	__smp_call_function_map(func, info, nonatomic, wait, map);
+	__smp_call_function_map(func, info, wait, map);
 	spin_unlock(&call_lock);
 	return 0;
 }
@@ -189,7 +187,6 @@ EXPORT_SYMBOL(smp_call_function);
  * @cpu: the CPU where func should run
  * @func: the function to run; this must be fast and non-blocking
  * @info: an arbitrary pointer to pass to the function
- * @nonatomic: unused
  * @wait: if true, wait (atomically) until function has completed on other CPUs
  *
  * Run a function on one processor.
@@ -198,11 +195,10 @@ EXPORT_SYMBOL(smp_call_function);
  * hardware interrupt handler or from a bottom half.
  */
 int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
-			     int nonatomic, int wait)
+			     int wait)
 {
 	spin_lock(&call_lock);
-	__smp_call_function_map(func, info, nonatomic, wait,
-				cpumask_of_cpu(cpu));
+	__smp_call_function_map(func, info, wait, cpumask_of_cpu(cpu));
 	spin_unlock(&call_lock);
 	return 0;
 }
@@ -228,7 +224,7 @@ int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
 {
 	spin_lock(&call_lock);
 	cpu_clear(smp_processor_id(), mask);
-	__smp_call_function_map(func, info, 0, wait, mask);
+	__smp_call_function_map(func, info, wait, mask);
 	spin_unlock(&call_lock);
 	return 0;
 }
@@ -303,7 +299,7 @@ static void smp_ptlb_callback(void *info)
 
 void smp_ptlb_all(void)
 {
-	on_each_cpu(smp_ptlb_callback, NULL, 0, 1);
+	on_each_cpu(smp_ptlb_callback, NULL, 1);
 }
 EXPORT_SYMBOL(smp_ptlb_all);
 #endif /* ! CONFIG_64BIT */
@@ -351,7 +347,7 @@ void smp_ctl_set_bit(int cr, int bit)
 	memset(&parms.orvals, 0, sizeof(parms.orvals));
 	memset(&parms.andvals, 0xff, sizeof(parms.andvals));
 	parms.orvals[cr] = 1 << bit;
-	on_each_cpu(smp_ctl_bit_callback, &parms, 0, 1);
+	on_each_cpu(smp_ctl_bit_callback, &parms, 1);
 }
 EXPORT_SYMBOL(smp_ctl_set_bit);
 
@@ -365,7 +361,7 @@ void smp_ctl_clear_bit(int cr, int bit)
 	memset(&parms.orvals, 0, sizeof(parms.orvals));
 	memset(&parms.andvals, 0xff, sizeof(parms.andvals));
 	parms.andvals[cr] = ~(1L << bit);
-	on_each_cpu(smp_ctl_bit_callback, &parms, 0, 1);
+	on_each_cpu(smp_ctl_bit_callback, &parms, 1);
 }
 EXPORT_SYMBOL(smp_ctl_clear_bit);
 
@@ -711,7 +707,7 @@ int __cpuinit __cpu_up(unsigned int cpu)
 	memset(sf, 0, sizeof(struct stack_frame));
 	sf->gprs[9] = (unsigned long) sf;
 	cpu_lowcore->save_area[15] = (unsigned long) sf;
-	__ctl_store(cpu_lowcore->cregs_save_area[0], 0, 15);
+	__ctl_store(cpu_lowcore->cregs_save_area, 0, 15);
 	asm volatile(
 		"	stam	0,15,0(%0)"
 		: : "a" (&cpu_lowcore->access_regs_save_area) : "memory");
@@ -1089,7 +1085,7 @@ out:
 
 #ifdef CONFIG_HOTPLUG_CPU
 
-int smp_rescan_cpus(void)
+int __ref smp_rescan_cpus(void)
 {
 	cpumask_t newcpus;
 	int cpu;
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index 85e46a5d0e08..8841919ef7e6 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -10,6 +10,7 @@
 #include <linux/sched.h>
 #include <linux/stacktrace.h>
 #include <linux/kallsyms.h>
+#include <linux/module.h>
 
 static unsigned long save_context_stack(struct stack_trace *trace,
 					unsigned long sp,
@@ -81,6 +82,7 @@ void save_stack_trace(struct stack_trace *trace)
 			   S390_lowcore.thread_info,
 			   S390_lowcore.thread_info + THREAD_SIZE, 1);
 }
+EXPORT_SYMBOL_GPL(save_stack_trace);
 
 void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 {
@@ -93,3 +95,4 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 	if (trace->nr_entries < trace->max_entries)
 		trace->entries[trace->nr_entries++] = ULONG_MAX;
 }
+EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 7aec676fefd5..f2cede3947b2 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -3,7 +3,7 @@
  *    Time of day based timer functions.
  *
  *  S390 version
- *    Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation
+ *    Copyright IBM Corp. 1999, 2008
  *    Author(s): Hartmut Penner (hp@de.ibm.com),
  *               Martin Schwidefsky (schwidefsky@de.ibm.com),
  *               Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
@@ -31,6 +31,7 @@
 #include <linux/notifier.h>
 #include <linux/clocksource.h>
 #include <linux/clockchips.h>
+#include <linux/bootmem.h>
 #include <asm/uaccess.h>
 #include <asm/delay.h>
 #include <asm/s390_ext.h>
@@ -162,7 +163,7 @@ void init_cpu_timer(void)
 	/* Enable clock comparator timer interrupt. */
 	__ctl_set_bit(0,11);
 
-	/* Always allow ETR external interrupts, even without an ETR. */
+	/* Always allow the timing alert external interrupt. */
 	__ctl_set_bit(0, 4);
 }
 
@@ -170,8 +171,21 @@ static void clock_comparator_interrupt(__u16 code)
 {
 }
 
+static void etr_timing_alert(struct etr_irq_parm *);
+static void stp_timing_alert(struct stp_irq_parm *);
+
+static void timing_alert_interrupt(__u16 code)
+{
+	if (S390_lowcore.ext_params & 0x00c40000)
+		etr_timing_alert((struct etr_irq_parm *)
+				 &S390_lowcore.ext_params);
+	if (S390_lowcore.ext_params & 0x00038000)
+		stp_timing_alert((struct stp_irq_parm *)
+				 &S390_lowcore.ext_params);
+}
+
 static void etr_reset(void);
-static void etr_ext_handler(__u16);
+static void stp_reset(void);
 
 /*
  * Get the TOD clock running.
@@ -181,6 +195,7 @@ static u64 __init reset_tod_clock(void)
 	u64 time;
 
 	etr_reset();
+	stp_reset();
 	if (store_clock(&time) == 0)
 		return time;
 	/* TOD clock not running. Set the clock to Unix Epoch. */
@@ -231,8 +246,9 @@ void __init time_init(void)
 	if (clocksource_register(&clocksource_tod) != 0)
 		panic("Could not register TOD clock source");
 
-	/* request the etr external interrupt */
-	if (register_early_external_interrupt(0x1406, etr_ext_handler,
+	/* request the timing alert external interrupt */
+	if (register_early_external_interrupt(0x1406,
+					      timing_alert_interrupt,
 					      &ext_int_etr_cc) != 0)
 		panic("Couldn't request external interrupt 0x1406");
 
@@ -245,10 +261,112 @@ void __init time_init(void)
 }
 
 /*
+ * The time is "clock". old is what we think the time is.
+ * Adjust the value by a multiple of jiffies and add the delta to ntp.
+ * "delay" is an approximation how long the synchronization took. If
+ * the time correction is positive, then "delay" is subtracted from
+ * the time difference and only the remaining part is passed to ntp.
+ */
+static unsigned long long adjust_time(unsigned long long old,
+				      unsigned long long clock,
+				      unsigned long long delay)
+{
+	unsigned long long delta, ticks;
+	struct timex adjust;
+
+	if (clock > old) {
+		/* It is later than we thought. */
+		delta = ticks = clock - old;
+		delta = ticks = (delta < delay) ? 0 : delta - delay;
+		delta -= do_div(ticks, CLK_TICKS_PER_JIFFY);
+		adjust.offset = ticks * (1000000 / HZ);
+	} else {
+		/* It is earlier than we thought. */
+		delta = ticks = old - clock;
+		delta -= do_div(ticks, CLK_TICKS_PER_JIFFY);
+		delta = -delta;
+		adjust.offset = -ticks * (1000000 / HZ);
+	}
+	jiffies_timer_cc += delta;
+	if (adjust.offset != 0) {
+		printk(KERN_NOTICE "etr: time adjusted by %li micro-seconds\n",
+		       adjust.offset);
+		adjust.modes = ADJ_OFFSET_SINGLESHOT;
+		do_adjtimex(&adjust);
+	}
+	return delta;
+}
+
+static DEFINE_PER_CPU(atomic_t, clock_sync_word);
+static unsigned long clock_sync_flags;
+
+#define CLOCK_SYNC_HAS_ETR	0
+#define CLOCK_SYNC_HAS_STP	1
+#define CLOCK_SYNC_ETR		2
+#define CLOCK_SYNC_STP		3
+
+/*
+ * The synchronous get_clock function. It will write the current clock
+ * value to the clock pointer and return 0 if the clock is in sync with
+ * the external time source. If the clock mode is local it will return
+ * -ENOSYS and -EAGAIN if the clock is not in sync with the external
+ * reference.
+ */
+int get_sync_clock(unsigned long long *clock)
+{
+	atomic_t *sw_ptr;
+	unsigned int sw0, sw1;
+
+	sw_ptr = &get_cpu_var(clock_sync_word);
+	sw0 = atomic_read(sw_ptr);
+	*clock = get_clock();
+	sw1 = atomic_read(sw_ptr);
+	put_cpu_var(clock_sync_sync);
+	if (sw0 == sw1 && (sw0 & 0x80000000U))
+		/* Success: time is in sync. */
+		return 0;
+	if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags) &&
+	    !test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
+		return -ENOSYS;
+	if (!test_bit(CLOCK_SYNC_ETR, &clock_sync_flags) &&
+	    !test_bit(CLOCK_SYNC_STP, &clock_sync_flags))
+		return -EACCES;
+	return -EAGAIN;
+}
+EXPORT_SYMBOL(get_sync_clock);
+
+/*
+ * Make get_sync_clock return -EAGAIN.
+ */
+static void disable_sync_clock(void *dummy)
+{
+	atomic_t *sw_ptr = &__get_cpu_var(clock_sync_word);
+	/*
+	 * Clear the in-sync bit 2^31. All get_sync_clock calls will
+	 * fail until the sync bit is turned back on. In addition
+	 * increase the "sequence" counter to avoid the race of an
+	 * etr event and the complete recovery against get_sync_clock.
+	 */
+	atomic_clear_mask(0x80000000, sw_ptr);
+	atomic_inc(sw_ptr);
+}
+
+/*
+ * Make get_sync_clock return 0 again.
+ * Needs to be called from a context disabled for preemption.
+ */
+static void enable_sync_clock(void)
+{
+	atomic_t *sw_ptr = &__get_cpu_var(clock_sync_word);
+	atomic_set_mask(0x80000000, sw_ptr);
+}
+
+/*
  * External Time Reference (ETR) code.
  */
 static int etr_port0_online;
 static int etr_port1_online;
+static int etr_steai_available;
 
 static int __init early_parse_etr(char *p)
 {
@@ -273,12 +391,6 @@ enum etr_event {
 	ETR_EVENT_UPDATE,
 };
 
-enum etr_flags {
-	ETR_FLAG_ENOSYS,
-	ETR_FLAG_EACCES,
-	ETR_FLAG_STEAI,
-};
-
 /*
  * Valid bit combinations of the eacr register are (x = don't care):
  * e0 e1 dp p0 p1 ea es sl
@@ -305,74 +417,18 @@ enum etr_flags {
  */
 static struct etr_eacr etr_eacr;
 static u64 etr_tolec;			/* time of last eacr update */
-static unsigned long etr_flags;
 static struct etr_aib etr_port0;
 static int etr_port0_uptodate;
 static struct etr_aib etr_port1;
 static int etr_port1_uptodate;
 static unsigned long etr_events;
 static struct timer_list etr_timer;
-static DEFINE_PER_CPU(atomic_t, etr_sync_word);
 
 static void etr_timeout(unsigned long dummy);
 static void etr_work_fn(struct work_struct *work);
 static DECLARE_WORK(etr_work, etr_work_fn);
 
 /*
- * The etr get_clock function. It will write the current clock value
- * to the clock pointer and return 0 if the clock is in sync with the
- * external time source. If the clock mode is local it will return
- * -ENOSYS and -EAGAIN if the clock is not in sync with the external
- * reference. This function is what ETR is all about..
- */
-int get_sync_clock(unsigned long long *clock)
-{
-	atomic_t *sw_ptr;
-	unsigned int sw0, sw1;
-
-	sw_ptr = &get_cpu_var(etr_sync_word);
-	sw0 = atomic_read(sw_ptr);
-	*clock = get_clock();
-	sw1 = atomic_read(sw_ptr);
-	put_cpu_var(etr_sync_sync);
-	if (sw0 == sw1 && (sw0 & 0x80000000U))
-		/* Success: time is in sync. */
-		return 0;
-	if (test_bit(ETR_FLAG_ENOSYS, &etr_flags))
-		return -ENOSYS;
-	if (test_bit(ETR_FLAG_EACCES, &etr_flags))
-		return -EACCES;
-	return -EAGAIN;
-}
-EXPORT_SYMBOL(get_sync_clock);
-
-/*
- * Make get_sync_clock return -EAGAIN.
- */
-static void etr_disable_sync_clock(void *dummy)
-{
-	atomic_t *sw_ptr = &__get_cpu_var(etr_sync_word);
-	/*
-	 * Clear the in-sync bit 2^31. All get_sync_clock calls will
-	 * fail until the sync bit is turned back on. In addition
-	 * increase the "sequence" counter to avoid the race of an
-	 * etr event and the complete recovery against get_sync_clock.
-	 */
-	atomic_clear_mask(0x80000000, sw_ptr);
-	atomic_inc(sw_ptr);
-}
-
-/*
- * Make get_sync_clock return 0 again.
- * Needs to be called from a context disabled for preemption.
- */
-static void etr_enable_sync_clock(void)
-{
-	atomic_t *sw_ptr = &__get_cpu_var(etr_sync_word);
-	atomic_set_mask(0x80000000, sw_ptr);
-}
-
-/*
  * Reset ETR attachment.
  */
 static void etr_reset(void)
@@ -381,15 +437,13 @@ static void etr_reset(void)
 		.e0 = 0, .e1 = 0, ._pad0 = 4, .dp = 0,
 		.p0 = 0, .p1 = 0, ._pad1 = 0, .ea = 0,
 		.es = 0, .sl = 0 };
-	if (etr_setr(&etr_eacr) == 0)
+	if (etr_setr(&etr_eacr) == 0) {
 		etr_tolec = get_clock();
-	else {
-		set_bit(ETR_FLAG_ENOSYS, &etr_flags);
-		if (etr_port0_online || etr_port1_online) {
-			printk(KERN_WARNING "Running on non ETR capable "
-			       "machine, only local mode available.\n");
-			etr_port0_online = etr_port1_online = 0;
-		}
+		set_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags);
+	} else if (etr_port0_online || etr_port1_online) {
+		printk(KERN_WARNING "Running on non ETR capable "
+		       "machine, only local mode available.\n");
+		etr_port0_online = etr_port1_online = 0;
 	}
 }
 
@@ -397,14 +451,12 @@ static int __init etr_init(void)
 {
 	struct etr_aib aib;
 
-	if (test_bit(ETR_FLAG_ENOSYS, &etr_flags))
+	if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags))
 		return 0;
 	/* Check if this machine has the steai instruction. */
 	if (etr_steai(&aib, ETR_STEAI_STEPPING_PORT) == 0)
-		set_bit(ETR_FLAG_STEAI, &etr_flags);
+		etr_steai_available = 1;
 	setup_timer(&etr_timer, etr_timeout, 0UL);
-	if (!etr_port0_online && !etr_port1_online)
-		set_bit(ETR_FLAG_EACCES, &etr_flags);
 	if (etr_port0_online) {
 		set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events);
 		schedule_work(&etr_work);
@@ -435,7 +487,8 @@ void etr_switch_to_local(void)
 {
 	if (!etr_eacr.sl)
 		return;
-	etr_disable_sync_clock(NULL);
+	if (test_bit(CLOCK_SYNC_ETR, &clock_sync_flags))
+		disable_sync_clock(NULL);
 	set_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events);
 	schedule_work(&etr_work);
 }
@@ -450,23 +503,21 @@ void etr_sync_check(void)
 {
 	if (!etr_eacr.es)
 		return;
-	etr_disable_sync_clock(NULL);
+	if (test_bit(CLOCK_SYNC_ETR, &clock_sync_flags))
+		disable_sync_clock(NULL);
 	set_bit(ETR_EVENT_SYNC_CHECK, &etr_events);
 	schedule_work(&etr_work);
 }
 
 /*
- * ETR external interrupt. There are two causes:
+ * ETR timing alert. There are two causes:
  * 1) port state change, check the usability of the port
  * 2) port alert, one of the ETR-data-validity bits (v1-v2 bits of the
  *    sldr-status word) or ETR-data word 1 (edf1) or ETR-data word 3 (edf3)
  *    or ETR-data word 4 (edf4) has changed.
  */
-static void etr_ext_handler(__u16 code)
+static void etr_timing_alert(struct etr_irq_parm *intparm)
 {
-	struct etr_interruption_parameter *intparm =
-		(struct etr_interruption_parameter *) &S390_lowcore.ext_params;
-
 	if (intparm->pc0)
 		/* ETR port 0 state change. */
 		set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events);
@@ -591,58 +642,23 @@ static int etr_aib_follows(struct etr_aib *a1, struct etr_aib *a2, int p)
 	return 1;
 }
 
-/*
- * The time is "clock". old is what we think the time is.
- * Adjust the value by a multiple of jiffies and add the delta to ntp.
- * "delay" is an approximation how long the synchronization took. If
- * the time correction is positive, then "delay" is subtracted from
- * the time difference and only the remaining part is passed to ntp.
- */
-static unsigned long long etr_adjust_time(unsigned long long old,
-					  unsigned long long clock,
-					  unsigned long long delay)
-{
-	unsigned long long delta, ticks;
-	struct timex adjust;
-
-	if (clock > old) {
-		/* It is later than we thought. */
-		delta = ticks = clock - old;
-		delta = ticks = (delta < delay) ? 0 : delta - delay;
-		delta -= do_div(ticks, CLK_TICKS_PER_JIFFY);
-		adjust.offset = ticks * (1000000 / HZ);
-	} else {
-		/* It is earlier than we thought. */
-		delta = ticks = old - clock;
-		delta -= do_div(ticks, CLK_TICKS_PER_JIFFY);
-		delta = -delta;
-		adjust.offset = -ticks * (1000000 / HZ);
-	}
-	jiffies_timer_cc += delta;
-	if (adjust.offset != 0) {
-		printk(KERN_NOTICE "etr: time adjusted by %li micro-seconds\n",
-		       adjust.offset);
-		adjust.modes = ADJ_OFFSET_SINGLESHOT;
-		do_adjtimex(&adjust);
-	}
-	return delta;
-}
-
-static struct {
+struct clock_sync_data {
 	int in_sync;
 	unsigned long long fixup_cc;
-} etr_sync;
+};
 
-static void etr_sync_cpu_start(void *dummy)
+static void clock_sync_cpu_start(void *dummy)
 {
-	etr_enable_sync_clock();
+	struct clock_sync_data *sync = dummy;
+
+	enable_sync_clock();
 	/*
 	 * This looks like a busy wait loop but it isn't. etr_sync_cpus
 	 * is called on all other cpus while the TOD clocks is stopped.
 	 * __udelay will stop the cpu on an enabled wait psw until the
 	 * TOD is running again.
 	 */
-	while (etr_sync.in_sync == 0) {
+	while (sync->in_sync == 0) {
 		__udelay(1);
 		/*
 		 * A different cpu changes *in_sync. Therefore use
@@ -650,17 +666,17 @@ static void etr_sync_cpu_start(void *dummy)
 		 */
 		barrier();
 	}
-	if (etr_sync.in_sync != 1)
+	if (sync->in_sync != 1)
 		/* Didn't work. Clear per-cpu in sync bit again. */
-		etr_disable_sync_clock(NULL);
+		disable_sync_clock(NULL);
 	/*
 	 * This round of TOD syncing is done. Set the clock comparator
 	 * to the next tick and let the processor continue.
 	 */
-	fixup_clock_comparator(etr_sync.fixup_cc);
+	fixup_clock_comparator(sync->fixup_cc);
 }
 
-static void etr_sync_cpu_end(void *dummy)
+static void clock_sync_cpu_end(void *dummy)
 {
 }
 
@@ -672,6 +688,7 @@ static void etr_sync_cpu_end(void *dummy)
 static int etr_sync_clock(struct etr_aib *aib, int port)
 {
 	struct etr_aib *sync_port;
+	struct clock_sync_data etr_sync;
 	unsigned long long clock, old_clock, delay, delta;
 	int follows;
 	int rc;
@@ -690,9 +707,9 @@ static int etr_sync_clock(struct etr_aib *aib, int port)
 	 */
 	memset(&etr_sync, 0, sizeof(etr_sync));
 	preempt_disable();
-	smp_call_function(etr_sync_cpu_start, NULL, 0, 0);
+	smp_call_function(clock_sync_cpu_start, &etr_sync, 0);
 	local_irq_disable();
-	etr_enable_sync_clock();
+	enable_sync_clock();
 
 	/* Set clock to next OTE. */
 	__ctl_set_bit(14, 21);
@@ -707,13 +724,13 @@ static int etr_sync_clock(struct etr_aib *aib, int port)
 		/* Adjust Linux timing variables. */
 		delay = (unsigned long long)
 			(aib->edf2.etv - sync_port->edf2.etv) << 32;
-		delta = etr_adjust_time(old_clock, clock, delay);
+		delta = adjust_time(old_clock, clock, delay);
 		etr_sync.fixup_cc = delta;
 		fixup_clock_comparator(delta);
 		/* Verify that the clock is properly set. */
 		if (!etr_aib_follows(sync_port, aib, port)) {
 			/* Didn't work. */
-			etr_disable_sync_clock(NULL);
+			disable_sync_clock(NULL);
 			etr_sync.in_sync = -EAGAIN;
 			rc = -EAGAIN;
 		} else {
@@ -724,12 +741,12 @@ static int etr_sync_clock(struct etr_aib *aib, int port)
 		/* Could not set the clock ?!? */
 		__ctl_clear_bit(0, 29);
 		__ctl_clear_bit(14, 21);
-		etr_disable_sync_clock(NULL);
+		disable_sync_clock(NULL);
 		etr_sync.in_sync = -EAGAIN;
 		rc = -EAGAIN;
 	}
 	local_irq_enable();
-	smp_call_function(etr_sync_cpu_end,NULL,0,0);
+	smp_call_function(clock_sync_cpu_end, NULL, 0);
 	preempt_enable();
 	return rc;
 }
@@ -832,7 +849,7 @@ static struct etr_eacr etr_handle_update(struct etr_aib *aib,
 	 * Do not try to get the alternate port aib if the clock
 	 * is not in sync yet.
 	 */
-	if (!eacr.es)
+	if (!test_bit(CLOCK_SYNC_STP, &clock_sync_flags) && !eacr.es)
 		return eacr;
 
 	/*
@@ -840,7 +857,7 @@ static struct etr_eacr etr_handle_update(struct etr_aib *aib,
 	 * the other port immediately. If only stetr is available the
 	 * data-port bit toggle has to be used.
 	 */
-	if (test_bit(ETR_FLAG_STEAI, &etr_flags)) {
+	if (etr_steai_available) {
 		if (eacr.p0 && !etr_port0_uptodate) {
 			etr_steai_cv(&etr_port0, ETR_STEAI_PORT_0);
 			etr_port0_uptodate = 1;
@@ -909,10 +926,10 @@ static void etr_work_fn(struct work_struct *work)
 	if (!eacr.ea) {
 		/* Both ports offline. Reset everything. */
 		eacr.dp = eacr.es = eacr.sl = 0;
-		on_each_cpu(etr_disable_sync_clock, NULL, 0, 1);
+		on_each_cpu(disable_sync_clock, NULL, 1);
 		del_timer_sync(&etr_timer);
 		etr_update_eacr(eacr);
-		set_bit(ETR_FLAG_EACCES, &etr_flags);
+		clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
 		return;
 	}
 
@@ -953,7 +970,6 @@ static void etr_work_fn(struct work_struct *work)
 			eacr.e1 = 1;
 		sync_port = (etr_port0_uptodate &&
 			     etr_port_valid(&etr_port0, 0)) ? 0 : -1;
-		clear_bit(ETR_FLAG_EACCES, &etr_flags);
 	} else if (eacr.p1 && aib.esw.psc1 == etr_lpsc_pps_mode) {
 		eacr.sl = 0;
 		eacr.e0 = 0;
@@ -962,7 +978,6 @@ static void etr_work_fn(struct work_struct *work)
 			eacr.es = 0;
 		sync_port = (etr_port1_uptodate &&
 			     etr_port_valid(&etr_port1, 1)) ? 1 : -1;
-		clear_bit(ETR_FLAG_EACCES, &etr_flags);
 	} else if (eacr.p0 && aib.esw.psc0 == etr_lpsc_operational_step) {
 		eacr.sl = 1;
 		eacr.e0 = 1;
@@ -976,7 +991,6 @@ static void etr_work_fn(struct work_struct *work)
 			eacr.e1 = 1;
 		sync_port = (etr_port0_uptodate &&
 			     etr_port_valid(&etr_port0, 0)) ? 0 : -1;
-		clear_bit(ETR_FLAG_EACCES, &etr_flags);
 	} else if (eacr.p1 && aib.esw.psc1 == etr_lpsc_operational_step) {
 		eacr.sl = 1;
 		eacr.e0 = 0;
@@ -985,19 +999,22 @@ static void etr_work_fn(struct work_struct *work)
 			eacr.es = 0;
 		sync_port = (etr_port1_uptodate &&
 			     etr_port_valid(&etr_port1, 1)) ? 1 : -1;
-		clear_bit(ETR_FLAG_EACCES, &etr_flags);
 	} else {
 		/* Both ports not usable. */
 		eacr.es = eacr.sl = 0;
 		sync_port = -1;
-		set_bit(ETR_FLAG_EACCES, &etr_flags);
+		clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
 	}
 
+	if (!test_bit(CLOCK_SYNC_ETR, &clock_sync_flags))
+		eacr.es = 0;
+
 	/*
 	 * If the clock is in sync just update the eacr and return.
 	 * If there is no valid sync port wait for a port update.
 	 */
-	if (eacr.es || sync_port < 0) {
+	if (test_bit(CLOCK_SYNC_STP, &clock_sync_flags) ||
+	    eacr.es || sync_port < 0) {
 		etr_update_eacr(eacr);
 		etr_set_tolec_timeout(now);
 		return;
@@ -1018,11 +1035,13 @@ static void etr_work_fn(struct work_struct *work)
 	 * and set up a timer to try again after 0.5 seconds
 	 */
 	etr_update_eacr(eacr);
+	set_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
 	if (now < etr_tolec + (1600000 << 12) ||
 	    etr_sync_clock(&aib, sync_port) != 0) {
 		/* Sync failed. Try again in 1/2 second. */
 		eacr.es = 0;
 		etr_update_eacr(eacr);
+		clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
 		etr_set_sync_timeout();
 	} else
 		etr_set_tolec_timeout(now);
@@ -1097,8 +1116,8 @@ static ssize_t etr_online_store(struct sys_device *dev,
 	value = simple_strtoul(buf, NULL, 0);
 	if (value != 0 && value != 1)
 		return -EINVAL;
-	if (test_bit(ETR_FLAG_ENOSYS, &etr_flags))
-		return -ENOSYS;
+	if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags))
+		return -EOPNOTSUPP;
 	if (dev == &etr_port0_dev) {
 		if (etr_port0_online == value)
 			return count;	/* Nothing to do. */
@@ -1292,3 +1311,318 @@ out:
 }
 
 device_initcall(etr_init_sysfs);
+
+/*
+ * Server Time Protocol (STP) code.
+ */
+static int stp_online;
+static struct stp_sstpi stp_info;
+static void *stp_page;
+
+static void stp_work_fn(struct work_struct *work);
+static DECLARE_WORK(stp_work, stp_work_fn);
+
+static int __init early_parse_stp(char *p)
+{
+	if (strncmp(p, "off", 3) == 0)
+		stp_online = 0;
+	else if (strncmp(p, "on", 2) == 0)
+		stp_online = 1;
+	return 0;
+}
+early_param("stp", early_parse_stp);
+
+/*
+ * Reset STP attachment.
+ */
+static void stp_reset(void)
+{
+	int rc;
+
+	stp_page = alloc_bootmem_pages(PAGE_SIZE);
+	rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000);
+	if (rc == 1)
+		set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags);
+	else if (stp_online) {
+		printk(KERN_WARNING "Running on non STP capable machine.\n");
+		free_bootmem((unsigned long) stp_page, PAGE_SIZE);
+		stp_page = NULL;
+		stp_online = 0;
+	}
+}
+
+static int __init stp_init(void)
+{
+	if (test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags) && stp_online)
+		schedule_work(&stp_work);
+	return 0;
+}
+
+arch_initcall(stp_init);
+
+/*
+ * STP timing alert. There are three causes:
+ * 1) timing status change
+ * 2) link availability change
+ * 3) time control parameter change
+ * In all three cases we are only interested in the clock source state.
+ * If a STP clock source is now available use it.
+ */
+static void stp_timing_alert(struct stp_irq_parm *intparm)
+{
+	if (intparm->tsc || intparm->lac || intparm->tcpc)
+		schedule_work(&stp_work);
+}
+
+/*
+ * STP sync check machine check. This is called when the timing state
+ * changes from the synchronized state to the unsynchronized state.
+ * After a STP sync check the clock is not in sync. The machine check
+ * is broadcasted to all cpus at the same time.
+ */
+void stp_sync_check(void)
+{
+	if (!test_bit(CLOCK_SYNC_STP, &clock_sync_flags))
+		return;
+	disable_sync_clock(NULL);
+	schedule_work(&stp_work);
+}
+
+/*
+ * STP island condition machine check. This is called when an attached
+ * server  attempts to communicate over an STP link and the servers
+ * have matching CTN ids and have a valid stratum-1 configuration
+ * but the configurations do not match.
+ */
+void stp_island_check(void)
+{
+	if (!test_bit(CLOCK_SYNC_STP, &clock_sync_flags))
+		return;
+	disable_sync_clock(NULL);
+	schedule_work(&stp_work);
+}
+
+/*
+ * STP tasklet. Check for the STP state and take over the clock
+ * synchronization if the STP clock source is usable.
+ */
+static void stp_work_fn(struct work_struct *work)
+{
+	struct clock_sync_data stp_sync;
+	unsigned long long old_clock, delta;
+	int rc;
+
+	if (!stp_online) {
+		chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000);
+		return;
+	}
+
+	rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xb0e0);
+	if (rc)
+		return;
+
+	rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi));
+	if (rc || stp_info.c == 0)
+		return;
+
+	/*
+	 * Catch all other cpus and make them wait until we have
+	 * successfully synced the clock. smp_call_function will
+	 * return after all other cpus are in clock_sync_cpu_start.
+	 */
+	memset(&stp_sync, 0, sizeof(stp_sync));
+	preempt_disable();
+	smp_call_function(clock_sync_cpu_start, &stp_sync, 0);
+	local_irq_disable();
+	enable_sync_clock();
+
+	set_bit(CLOCK_SYNC_STP, &clock_sync_flags);
+	if (test_and_clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags))
+		schedule_work(&etr_work);
+
+	rc = 0;
+	if (stp_info.todoff[0] || stp_info.todoff[1] ||
+	    stp_info.todoff[2] || stp_info.todoff[3] ||
+	    stp_info.tmd != 2) {
+		old_clock = get_clock();
+		rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0);
+		if (rc == 0) {
+			delta = adjust_time(old_clock, get_clock(), 0);
+			fixup_clock_comparator(delta);
+			rc = chsc_sstpi(stp_page, &stp_info,
+					sizeof(struct stp_sstpi));
+			if (rc == 0 && stp_info.tmd != 2)
+				rc = -EAGAIN;
+		}
+	}
+	if (rc) {
+		disable_sync_clock(NULL);
+		stp_sync.in_sync = -EAGAIN;
+		clear_bit(CLOCK_SYNC_STP, &clock_sync_flags);
+		if (etr_port0_online || etr_port1_online)
+			schedule_work(&etr_work);
+	} else
+		stp_sync.in_sync = 1;
+
+	local_irq_enable();
+	smp_call_function(clock_sync_cpu_end, NULL, 0);
+	preempt_enable();
+}
+
+/*
+ * STP class sysfs interface functions
+ */
+static struct sysdev_class stp_sysclass = {
+	.name	= "stp",
+};
+
+static ssize_t stp_ctn_id_show(struct sysdev_class *class, char *buf)
+{
+	if (!stp_online)
+		return -ENODATA;
+	return sprintf(buf, "%016llx\n",
+		       *(unsigned long long *) stp_info.ctnid);
+}
+
+static SYSDEV_CLASS_ATTR(ctn_id, 0400, stp_ctn_id_show, NULL);
+
+static ssize_t stp_ctn_type_show(struct sysdev_class *class, char *buf)
+{
+	if (!stp_online)
+		return -ENODATA;
+	return sprintf(buf, "%i\n", stp_info.ctn);
+}
+
+static SYSDEV_CLASS_ATTR(ctn_type, 0400, stp_ctn_type_show, NULL);
+
+static ssize_t stp_dst_offset_show(struct sysdev_class *class, char *buf)
+{
+	if (!stp_online || !(stp_info.vbits & 0x2000))
+		return -ENODATA;
+	return sprintf(buf, "%i\n", (int)(s16) stp_info.dsto);
+}
+
+static SYSDEV_CLASS_ATTR(dst_offset, 0400, stp_dst_offset_show, NULL);
+
+static ssize_t stp_leap_seconds_show(struct sysdev_class *class, char *buf)
+{
+	if (!stp_online || !(stp_info.vbits & 0x8000))
+		return -ENODATA;
+	return sprintf(buf, "%i\n", (int)(s16) stp_info.leaps);
+}
+
+static SYSDEV_CLASS_ATTR(leap_seconds, 0400, stp_leap_seconds_show, NULL);
+
+static ssize_t stp_stratum_show(struct sysdev_class *class, char *buf)
+{
+	if (!stp_online)
+		return -ENODATA;
+	return sprintf(buf, "%i\n", (int)(s16) stp_info.stratum);
+}
+
+static SYSDEV_CLASS_ATTR(stratum, 0400, stp_stratum_show, NULL);
+
+static ssize_t stp_time_offset_show(struct sysdev_class *class, char *buf)
+{
+	if (!stp_online || !(stp_info.vbits & 0x0800))
+		return -ENODATA;
+	return sprintf(buf, "%i\n", (int) stp_info.tto);
+}
+
+static SYSDEV_CLASS_ATTR(time_offset, 0400, stp_time_offset_show, NULL);
+
+static ssize_t stp_time_zone_offset_show(struct sysdev_class *class, char *buf)
+{
+	if (!stp_online || !(stp_info.vbits & 0x4000))
+		return -ENODATA;
+	return sprintf(buf, "%i\n", (int)(s16) stp_info.tzo);
+}
+
+static SYSDEV_CLASS_ATTR(time_zone_offset, 0400,
+			 stp_time_zone_offset_show, NULL);
+
+static ssize_t stp_timing_mode_show(struct sysdev_class *class, char *buf)
+{
+	if (!stp_online)
+		return -ENODATA;
+	return sprintf(buf, "%i\n", stp_info.tmd);
+}
+
+static SYSDEV_CLASS_ATTR(timing_mode, 0400, stp_timing_mode_show, NULL);
+
+static ssize_t stp_timing_state_show(struct sysdev_class *class, char *buf)
+{
+	if (!stp_online)
+		return -ENODATA;
+	return sprintf(buf, "%i\n", stp_info.tst);
+}
+
+static SYSDEV_CLASS_ATTR(timing_state, 0400, stp_timing_state_show, NULL);
+
+static ssize_t stp_online_show(struct sysdev_class *class, char *buf)
+{
+	return sprintf(buf, "%i\n", stp_online);
+}
+
+static ssize_t stp_online_store(struct sysdev_class *class,
+				const char *buf, size_t count)
+{
+	unsigned int value;
+
+	value = simple_strtoul(buf, NULL, 0);
+	if (value != 0 && value != 1)
+		return -EINVAL;
+	if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
+		return -EOPNOTSUPP;
+	stp_online = value;
+	schedule_work(&stp_work);
+	return count;
+}
+
+/*
+ * Can't use SYSDEV_CLASS_ATTR because the attribute should be named
+ * stp/online but attr_online already exists in this file ..
+ */
+static struct sysdev_class_attribute attr_stp_online = {
+	.attr = { .name = "online", .mode = 0600 },
+	.show	= stp_online_show,
+	.store	= stp_online_store,
+};
+
+static struct sysdev_class_attribute *stp_attributes[] = {
+	&attr_ctn_id,
+	&attr_ctn_type,
+	&attr_dst_offset,
+	&attr_leap_seconds,
+	&attr_stp_online,
+	&attr_stratum,
+	&attr_time_offset,
+	&attr_time_zone_offset,
+	&attr_timing_mode,
+	&attr_timing_state,
+	NULL
+};
+
+static int __init stp_init_sysfs(void)
+{
+	struct sysdev_class_attribute **attr;
+	int rc;
+
+	rc = sysdev_class_register(&stp_sysclass);
+	if (rc)
+		goto out;
+	for (attr = stp_attributes; *attr; attr++) {
+		rc = sysdev_class_create_file(&stp_sysclass, *attr);
+		if (rc)
+			goto out_unreg;
+	}
+	return 0;
+out_unreg:
+	for (; attr >= stp_attributes; attr--)
+		sysdev_class_remove_file(&stp_sysclass, *attr);
+	sysdev_class_unregister(&stp_sysclass);
+out:
+	return rc;
+}
+
+device_initcall(stp_init_sysfs);
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 661a07217057..212d618b0095 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -313,8 +313,6 @@ void __init s390_init_cpu_topology(void)
 		machine_has_topology_irq = 1;
 
 	tl_info = alloc_bootmem_pages(PAGE_SIZE);
-	if (!tl_info)
-		goto error;
 	info = tl_info;
 	stsi(info, 15, 1, 2);
 
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index b4607155e8d0..76c1e60c92f3 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -40,7 +40,6 @@ SECTIONS
 	_etext = .;		/* End of text section */
 
 	NOTES :text :note
-	BUG_TABLE :text
 
 	RODATA
 
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index ca90ee3f930e..0fa5dc5d68e1 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -136,7 +136,7 @@ static inline void set_vtimer(__u64 expires)
 }
 #endif
 
-static void start_cpu_timer(void)
+void vtime_start_cpu_timer(void)
 {
 	struct vtimer_queue *vt_list;
 
@@ -150,7 +150,7 @@ static void start_cpu_timer(void)
 		set_vtimer(vt_list->idle);
 }
 
-static void stop_cpu_timer(void)
+void vtime_stop_cpu_timer(void)
 {
 	struct vtimer_queue *vt_list;
 
@@ -318,8 +318,7 @@ static void internal_add_vtimer(struct vtimer_list *timer)
 	vt_list = &per_cpu(virt_cpu_timer, timer->cpu);
 	spin_lock_irqsave(&vt_list->lock, flags);
 
-	if (timer->cpu != smp_processor_id())
-		printk("internal_add_vtimer: BUG, running on wrong CPU");
+	BUG_ON(timer->cpu != smp_processor_id());
 
 	/* if list is empty we only have to set the timer */
 	if (list_empty(&vt_list->list)) {
@@ -353,25 +352,12 @@ static void internal_add_vtimer(struct vtimer_list *timer)
 	put_cpu();
 }
 
-static inline int prepare_vtimer(struct vtimer_list *timer)
+static inline void prepare_vtimer(struct vtimer_list *timer)
 {
-	if (!timer->function) {
-		printk("add_virt_timer: uninitialized timer\n");
-		return -EINVAL;
-	}
-
-	if (!timer->expires || timer->expires > VTIMER_MAX_SLICE) {
-		printk("add_virt_timer: invalid timer expire value!\n");
-		return -EINVAL;
-	}
-
-	if (vtimer_pending(timer)) {
-		printk("add_virt_timer: timer pending\n");
-		return -EBUSY;
-	}
-
+	BUG_ON(!timer->function);
+	BUG_ON(!timer->expires || timer->expires > VTIMER_MAX_SLICE);
+	BUG_ON(vtimer_pending(timer));
 	timer->cpu = get_cpu();
-	return 0;
 }
 
 /*
@@ -382,10 +368,7 @@ void add_virt_timer(void *new)
 	struct vtimer_list *timer;
 
 	timer = (struct vtimer_list *)new;
-
-	if (prepare_vtimer(timer) < 0)
-		return;
-
+	prepare_vtimer(timer);
 	timer->interval = 0;
 	internal_add_vtimer(timer);
 }
@@ -399,10 +382,7 @@ void add_virt_timer_periodic(void *new)
 	struct vtimer_list *timer;
 
 	timer = (struct vtimer_list *)new;
-
-	if (prepare_vtimer(timer) < 0)
-		return;
-
+	prepare_vtimer(timer);
 	timer->interval = timer->expires;
 	internal_add_vtimer(timer);
 }
@@ -423,15 +403,8 @@ int mod_virt_timer(struct vtimer_list *timer, __u64 expires)
 	unsigned long flags;
 	int cpu;
 
-	if (!timer->function) {
-		printk("mod_virt_timer: uninitialized timer\n");
-		return	-EINVAL;
-	}
-
-	if (!expires || expires > VTIMER_MAX_SLICE) {
-		printk("mod_virt_timer: invalid expire range\n");
-		return -EINVAL;
-	}
+	BUG_ON(!timer->function);
+	BUG_ON(!expires || expires > VTIMER_MAX_SLICE);
 
 	/*
 	 * This is a common optimization triggered by the
@@ -444,6 +417,9 @@ int mod_virt_timer(struct vtimer_list *timer, __u64 expires)
 	cpu = get_cpu();
 	vt_list = &per_cpu(virt_cpu_timer, cpu);
 
+	/* check if we run on the right CPU */
+	BUG_ON(timer->cpu != cpu);
+
 	/* disable interrupts before test if timer is pending */
 	spin_lock_irqsave(&vt_list->lock, flags);
 
@@ -458,14 +434,6 @@ int mod_virt_timer(struct vtimer_list *timer, __u64 expires)
 		return 0;
 	}
 
-	/* check if we run on the right CPU */
-	if (timer->cpu != cpu) {
-		printk("mod_virt_timer: running on wrong CPU, check your code\n");
-		spin_unlock_irqrestore(&vt_list->lock, flags);
-		put_cpu();
-		return -EINVAL;
-	}
-
 	list_del_init(&timer->entry);
 	timer->expires = expires;
 
@@ -536,24 +504,6 @@ void init_cpu_vtimer(void)
 
 }
 
-static int vtimer_idle_notify(struct notifier_block *self,
-			      unsigned long action, void *hcpu)
-{
-	switch (action) {
-	case S390_CPU_IDLE:
-		stop_cpu_timer();
-		break;
-	case S390_CPU_NOT_IDLE:
-		start_cpu_timer();
-		break;
-	}
-	return NOTIFY_OK;
-}
-
-static struct notifier_block vtimer_idle_nb = {
-	.notifier_call = vtimer_idle_notify,
-};
-
 void __init vtime_init(void)
 {
 	/* request the cpu timer external interrupt */
@@ -561,9 +511,6 @@ void __init vtime_init(void)
 					      &ext_int_info_timer) != 0)
 		panic("Couldn't request external interrupt 0x1005");
 
-	if (register_idle_notifier(&vtimer_idle_nb))
-		panic("Couldn't register idle notifier");
-
 	/* Enable cpu timer interrupts on the boot cpu. */
 	init_cpu_vtimer();
 }
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index f639a152869f..a0775e1f08df 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -20,7 +20,7 @@ static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
 	VCPU_EVENT(vcpu, 5, "%s", "diag time slice end");
 	vcpu->stat.diagnose_44++;
 	vcpu_put(vcpu);
-	schedule();
+	yield();
 	vcpu_load(vcpu);
 	return 0;
 }
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index fcd1ed8015c1..84a7fed4cd4e 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -339,6 +339,11 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
 	if (kvm_cpu_has_interrupt(vcpu))
 		return 0;
 
+	__set_cpu_idle(vcpu);
+	spin_lock_bh(&vcpu->arch.local_int.lock);
+	vcpu->arch.local_int.timer_due = 0;
+	spin_unlock_bh(&vcpu->arch.local_int.lock);
+
 	if (psw_interrupts_disabled(vcpu)) {
 		VCPU_EVENT(vcpu, 3, "%s", "disabled wait");
 		__unset_cpu_idle(vcpu);
@@ -366,8 +371,6 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
 no_timer:
 	spin_lock_bh(&vcpu->arch.local_int.float_int->lock);
 	spin_lock_bh(&vcpu->arch.local_int.lock);
-	__set_cpu_idle(vcpu);
-	vcpu->arch.local_int.timer_due = 0;
 	add_wait_queue(&vcpu->arch.local_int.wq, &wait);
 	while (list_empty(&vcpu->arch.local_int.list) &&
 		list_empty(&vcpu->arch.local_int.float_int->list) &&
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 0ac36a649eba..6558b09ff579 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -423,6 +423,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
 	return -EINVAL; /* not implemented yet */
 }
 
+extern void s390_handle_mcck(void);
+
 static void __vcpu_run(struct kvm_vcpu *vcpu)
 {
 	memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16);
@@ -430,13 +432,21 @@ static void __vcpu_run(struct kvm_vcpu *vcpu)
 	if (need_resched())
 		schedule();
 
+	if (test_thread_flag(TIF_MCCK_PENDING))
+		s390_handle_mcck();
+
+	kvm_s390_deliver_pending_interrupts(vcpu);
+
 	vcpu->arch.sie_block->icptcode = 0;
 	local_irq_disable();
 	kvm_guest_enter();
 	local_irq_enable();
 	VCPU_EVENT(vcpu, 6, "entering sie flags %x",
 		   atomic_read(&vcpu->arch.sie_block->cpuflags));
-	sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs);
+	if (sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs)) {
+		VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
+		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+	}
 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
 		   vcpu->arch.sie_block->icptcode);
 	local_irq_disable();
@@ -475,7 +485,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	might_sleep();
 
 	do {
-		kvm_s390_deliver_pending_interrupts(vcpu);
 		__vcpu_run(vcpu);
 		rc = kvm_handle_sie_intercept(vcpu);
 	} while (!signal_pending(current) && !rc);
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 29f3a63806b9..388cc7420055 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -44,37 +44,34 @@ char  empty_zero_page[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
 
 void show_mem(void)
 {
-	int i, total = 0, reserved = 0;
-	int shared = 0, cached = 0;
+	unsigned long i, total = 0, reserved = 0;
+	unsigned long shared = 0, cached = 0;
+	unsigned long flags;
 	struct page *page;
+	pg_data_t *pgdat;
 
 	printk("Mem-info:\n");
 	show_free_areas();
-	i = max_mapnr;
-	while (i-- > 0) {
-		if (!pfn_valid(i))
-			continue;
-		page = pfn_to_page(i);
-		total++;
-		if (PageReserved(page))
-			reserved++;
-		else if (PageSwapCache(page))
-			cached++;
-		else if (page_count(page))
-			shared += page_count(page) - 1;
+	for_each_online_pgdat(pgdat) {
+		pgdat_resize_lock(pgdat, &flags);
+		for (i = 0; i < pgdat->node_spanned_pages; i++) {
+			if (!pfn_valid(pgdat->node_start_pfn + i))
+				continue;
+			page = pfn_to_page(pgdat->node_start_pfn + i);
+			total++;
+			if (PageReserved(page))
+				reserved++;
+			else if (PageSwapCache(page))
+				cached++;
+			else if (page_count(page))
+				shared += page_count(page) - 1;
+		}
+		pgdat_resize_unlock(pgdat, &flags);
 	}
-	printk("%d pages of RAM\n", total);
-	printk("%d reserved pages\n", reserved);
-	printk("%d pages shared\n", shared);
-	printk("%d pages swap cached\n", cached);
-
-	printk("%lu pages dirty\n", global_page_state(NR_FILE_DIRTY));
-	printk("%lu pages writeback\n", global_page_state(NR_WRITEBACK));
-	printk("%lu pages mapped\n", global_page_state(NR_FILE_MAPPED));
-	printk("%lu pages slab\n",
-	       global_page_state(NR_SLAB_RECLAIMABLE) +
-	       global_page_state(NR_SLAB_UNRECLAIMABLE));
-	printk("%lu pages pagetables\n", global_page_state(NR_PAGETABLE));
+	printk("%ld pages of RAM\n", total);
+	printk("%ld reserved pages\n", reserved);
+	printk("%ld pages shared\n", shared);
+	printk("%ld pages swap cached\n", cached);
 }
 
 /*
@@ -205,3 +202,22 @@ void free_initrd_mem(unsigned long start, unsigned long end)
         }
 }
 #endif
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+int arch_add_memory(int nid, u64 start, u64 size)
+{
+	struct pglist_data *pgdat;
+	struct zone *zone;
+	int rc;
+
+	pgdat = NODE_DATA(nid);
+	zone = pgdat->node_zones + ZONE_NORMAL;
+	rc = vmem_add_mapping(start, size);
+	if (rc)
+		return rc;
+	rc = __add_pages(zone, PFN_DOWN(start), PFN_DOWN(size));
+	if (rc)
+		vmem_remove_mapping(start, size);
+	return rc;
+}
+#endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 5c1aea97cd12..3d98ba82ea67 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -254,36 +254,46 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk)
 int s390_enable_sie(void)
 {
 	struct task_struct *tsk = current;
-	struct mm_struct *mm;
-	int rc;
+	struct mm_struct *mm, *old_mm;
 
-	task_lock(tsk);
-
-	rc = 0;
+	/* Do we have pgstes? if yes, we are done */
 	if (tsk->mm->context.pgstes)
-		goto unlock;
+		return 0;
 
-	rc = -EINVAL;
+	/* lets check if we are allowed to replace the mm */
+	task_lock(tsk);
 	if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
-	    tsk->mm != tsk->active_mm || tsk->mm->ioctx_list)
-		goto unlock;
+	    tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) {
+		task_unlock(tsk);
+		return -EINVAL;
+	}
+	task_unlock(tsk);
 
-	tsk->mm->context.pgstes = 1;	/* dirty little tricks .. */
+	/* we copy the mm with pgstes enabled */
+	tsk->mm->context.pgstes = 1;
 	mm = dup_mm(tsk);
 	tsk->mm->context.pgstes = 0;
-
-	rc = -ENOMEM;
 	if (!mm)
-		goto unlock;
-	mmput(tsk->mm);
+		return -ENOMEM;
+
+	/* Now lets check again if somebody attached ptrace etc */
+	task_lock(tsk);
+	if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
+	    tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) {
+		mmput(mm);
+		task_unlock(tsk);
+		return -EINVAL;
+	}
+
+	/* ok, we are alone. No ptrace, no threads, etc. */
+	old_mm = tsk->mm;
 	tsk->mm = tsk->active_mm = mm;
 	preempt_disable();
 	update_mm(mm, tsk);
 	cpu_set(smp_processor_id(), mm->cpu_vm_mask);
 	preempt_enable();
-	rc = 0;
-unlock:
 	task_unlock(tsk);
-	return rc;
+	mmput(old_mm);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(s390_enable_sie);
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index ea2804808f39..e4868bfc672f 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -27,12 +27,19 @@ struct memory_segment {
 
 static LIST_HEAD(mem_segs);
 
-static pud_t *vmem_pud_alloc(void)
+static void __ref *vmem_alloc_pages(unsigned int order)
+{
+	if (slab_is_available())
+		return (void *)__get_free_pages(GFP_KERNEL, order);
+	return alloc_bootmem_pages((1 << order) * PAGE_SIZE);
+}
+
+static inline pud_t *vmem_pud_alloc(void)
 {
 	pud_t *pud = NULL;
 
 #ifdef CONFIG_64BIT
-	pud = vmemmap_alloc_block(PAGE_SIZE * 4, 0);
+	pud = vmem_alloc_pages(2);
 	if (!pud)
 		return NULL;
 	clear_table((unsigned long *) pud, _REGION3_ENTRY_EMPTY, PAGE_SIZE * 4);
@@ -40,12 +47,12 @@ static pud_t *vmem_pud_alloc(void)
 	return pud;
 }
 
-static pmd_t *vmem_pmd_alloc(void)
+static inline pmd_t *vmem_pmd_alloc(void)
 {
 	pmd_t *pmd = NULL;
 
 #ifdef CONFIG_64BIT
-	pmd = vmemmap_alloc_block(PAGE_SIZE * 4, 0);
+	pmd = vmem_alloc_pages(2);
 	if (!pmd)
 		return NULL;
 	clear_table((unsigned long *) pmd, _SEGMENT_ENTRY_EMPTY, PAGE_SIZE * 4);
@@ -207,13 +214,14 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node)
 		if (pte_none(*pt_dir)) {
 			unsigned long new_page;
 
-			new_page =__pa(vmemmap_alloc_block(PAGE_SIZE, 0));
+			new_page =__pa(vmem_alloc_pages(0));
 			if (!new_page)
 				goto out;
 			pte = pfn_pte(new_page >> PAGE_SHIFT, PAGE_KERNEL);
 			*pt_dir = pte;
 		}
 	}
+	memset(start, 0, nr * sizeof(struct page));
 	ret = 0;
 out:
 	flush_tlb_kernel_range(start_addr, end_addr);
@@ -228,7 +236,7 @@ static int insert_memory_segment(struct memory_segment *seg)
 {
 	struct memory_segment *tmp;
 
-	if (seg->start + seg->size >= VMEM_MAX_PHYS ||
+	if (seg->start + seg->size > VMEM_MAX_PHYS ||
 	    seg->start + seg->size < seg->start)
 		return -ERANGE;