42 files changed, 468 insertions, 501 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 7bc4a583f4e1..e85097bceff4 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -310,7 +310,7 @@ menu "Kernel Features"
 
 config SMP
 	bool "Symmetric Multi-Processing (EXPERIMENTAL)"
-	depends on EXPERIMENTAL #&& n
+	depends on EXPERIMENTAL && BROKEN #&& n
 	help
 	  This enables support for systems with more than one CPU. If you have
 	  a system with only one CPU, like most personal computers, say N. If
@@ -752,6 +752,8 @@ source "drivers/hwmon/Kconfig"
 
 source "drivers/misc/Kconfig"
 
+source "drivers/mfd/Kconfig"
+
 source "drivers/media/Kconfig"
 
 source "drivers/video/Kconfig"
diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c
index ad26e98f1e62..c4923fac8dff 100644
--- a/arch/arm/kernel/bios32.c
+++ b/arch/arm/kernel/bios32.c
@@ -447,9 +447,26 @@ pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region,
 	region->end   = res->end - offset;
 }
 
+void __devinit
+pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
+			struct pci_bus_region *region)
+{
+	struct pci_sys_data *root = dev->sysdata;
+	unsigned long offset = 0;
+
+	if (res->flags & IORESOURCE_IO)
+		offset = root->io_offset;
+	if (res->flags & IORESOURCE_MEM)
+		offset = root->mem_offset;
+
+	res->start = region->start + offset;
+	res->end   = region->end + offset;
+}
+
 #ifdef CONFIG_HOTPLUG
 EXPORT_SYMBOL(pcibios_fixup_bus);
 EXPORT_SYMBOL(pcibios_resource_to_bus);
+EXPORT_SYMBOL(pcibios_bus_to_resource);
 #endif
 
 /*
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index e5d370c235d7..2b6b4c786e65 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -327,6 +327,12 @@ __syscall_start:
 /* 310 */	.long	sys_request_key
 		.long	sys_keyctl
 		.long	sys_semtimedop
+/* vserver */	.long	sys_ni_syscall
+		.long	sys_ioprio_set
+/* 315 */	.long	sys_ioprio_get
+		.long	sys_inotify_init
+		.long	sys_inotify_add_watch
+		.long	sys_inotify_rm_watch
 __syscall_end:
 
 		.rept	NR_syscalls - (__syscall_end - __syscall_start) / 4
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 39a6c1b0b9a3..7152bfbee581 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -533,6 +533,13 @@ ENTRY(__switch_to)
 	ldr	r3, [r2, #TI_TP_VALUE]
 	stmia	ip!, {r4 - sl, fp, sp, lr}	@ Store most regs on stack
 	ldr	r6, [r2, #TI_CPU_DOMAIN]!
+#if __LINUX_ARM_ARCH__ >= 6
+#ifdef CONFIG_CPU_MPCORE
+	clrex
+#else
+	strex	r3, r4, [ip]			@ Clear exclusive monitor
+#endif
+#endif
 #if defined(CONFIG_CPU_XSCALE) && !defined(CONFIG_IWMMXT)
 	mra	r4, r5, acc0
 	stmia   ip, {r4, r5}
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 5e435e42dacd..a94d75fef598 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -658,11 +658,12 @@ handle_signal(unsigned long sig, struct k_sigaction *ka,
 	/*
 	 * Block the signal if we were unsuccessful.
 	 */
-	if (ret != 0 || !(ka->sa.sa_flags & SA_NODEFER)) {
+	if (ret != 0) {
 		spin_lock_irq(&tsk->sighand->siglock);
 		sigorsets(&tsk->blocked, &tsk->blocked,
 			  &ka->sa.sa_mask);
-		sigaddset(&tsk->blocked, sig);
+		if (!(ka->sa.sa_flags & SA_NODEFER))
+			sigaddset(&tsk->blocked, sig);
 		recalc_sigpending();
 		spin_unlock_irq(&tsk->sighand->siglock);
 	}
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 295e0a8379cf..b2085735a2ba 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -176,6 +176,7 @@ asmlinkage void __cpuinit secondary_start_kernel(void)
 	cpu_set(cpu, mm->cpu_vm_mask);
 	cpu_switch_mm(mm->pgd, mm);
 	enter_lazy_tlb(mm, current);
+	local_flush_tlb_all();
 
 	cpu_init();
 
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index d571c37ac30c..4554c961251c 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -617,7 +617,7 @@ baddataabort(int code, unsigned long instr, struct pt_regs *regs)
 	notify_die("unknown data abort code", regs, &info, instr, 0);
 }
 
-volatile void __bug(const char *file, int line, void *data)
+void __attribute__((noreturn)) __bug(const char *file, int line, void *data)
 {
 	printk(KERN_CRIT"kernel BUG at %s:%d!", file, line);
 	if (data)
diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h
index 5382a3023602..64a988c1ad44 100644
--- a/arch/arm/lib/bitops.h
+++ b/arch/arm/lib/bitops.h
@@ -1,4 +1,6 @@
-#if __LINUX_ARM_ARCH__ >= 6
+#include <linux/config.h>
+
+#if __LINUX_ARM_ARCH__ >= 6 && defined(CONFIG_CPU_MPCORE)
 	.macro	bitop, instr
 	mov	r2, #1
 	and	r3, r0, #7		@ Get bit offset
@@ -7,7 +9,7 @@
 1:	ldrexb	r2, [r1]
 	\instr	r2, r2, r3
 	strexb	r0, r2, [r1]
-	cmpne	r0, #0
+	cmp	r0, #0
 	bne	1b
 	mov	pc, lr
 	.endm
diff --git a/arch/arm/mach-integrator/platsmp.c b/arch/arm/mach-integrator/platsmp.c
index aecf47ba033a..ea10bd8c972c 100644
--- a/arch/arm/mach-integrator/platsmp.c
+++ b/arch/arm/mach-integrator/platsmp.c
@@ -15,6 +15,7 @@
 #include <linux/mm.h>
 
 #include <asm/atomic.h>
+#include <asm/cacheflush.h>
 #include <asm/delay.h>
 #include <asm/mmu_context.h>
 #include <asm/procinfo.h>
@@ -80,6 +81,7 @@ int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle)
 	 * "cpu" is Linux's internal ID.
 	 */
 	pen_release = cpu;
+	flush_cache_all();
 
 	/*
 	 * XXX
diff --git a/arch/arm/mach-ixp4xx/coyote-setup.c b/arch/arm/mach-ixp4xx/coyote-setup.c
index 4ff4393ef0ea..411ea9996190 100644
--- a/arch/arm/mach-ixp4xx/coyote-setup.c
+++ b/arch/arm/mach-ixp4xx/coyote-setup.c
@@ -36,7 +36,7 @@ static struct flash_platform_data coyote_flash_data = {
 
 static struct resource coyote_flash_resource = {
 	.start		= COYOTE_FLASH_BASE,
-	.end		= COYOTE_FLASH_BASE + COYOTE_FLASH_SIZE,
+	.end		= COYOTE_FLASH_BASE + COYOTE_FLASH_SIZE - 1,
 	.flags		= IORESOURCE_MEM,
 };
 
@@ -61,7 +61,7 @@ static struct plat_serial8250_port coyote_uart_data[] = {
 		.mapbase	= IXP4XX_UART2_BASE_PHYS,
 		.membase	= (char *)IXP4XX_UART2_BASE_VIRT + REG_OFFSET,
 		.irq		= IRQ_IXP4XX_UART2,
-		.flags		= UPF_BOOT_AUTOCONF,
+		.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST,
 		.iotype		= UPIO_MEM,
 		.regshift	= 2,
 		.uartclk	= IXP4XX_UART_XTAL,
diff --git a/arch/arm/mach-ixp4xx/gtwx5715-setup.c b/arch/arm/mach-ixp4xx/gtwx5715-setup.c
index 8ba1cd9406e7..333459d6aa46 100644
--- a/arch/arm/mach-ixp4xx/gtwx5715-setup.c
+++ b/arch/arm/mach-ixp4xx/gtwx5715-setup.c
@@ -83,7 +83,7 @@ static struct plat_serial8250_port gtwx5715_uart_platform_data[] = {
 	.mapbase	= IXP4XX_UART2_BASE_PHYS,
 	.membase	= (char *)IXP4XX_UART2_BASE_VIRT + REG_OFFSET,
 	.irq		= IRQ_IXP4XX_UART2,
-	.flags		= UPF_BOOT_AUTOCONF,
+	.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST,
 	.iotype		= UPIO_MEM,
 	.regshift	= 2,
 	.uartclk	= IXP4XX_UART_XTAL,
@@ -114,7 +114,7 @@ static struct flash_platform_data gtwx5715_flash_data = {
 
 static struct resource gtwx5715_flash_resource = {
 	.start		= GTWX5715_FLASH_BASE,
-	.end		= GTWX5715_FLASH_BASE + GTWX5715_FLASH_SIZE,
+	.end		= GTWX5715_FLASH_BASE + GTWX5715_FLASH_SIZE - 1,
 	.flags		= IORESOURCE_MEM,
 };
 
diff --git a/arch/arm/mach-ixp4xx/ixdp425-setup.c b/arch/arm/mach-ixp4xx/ixdp425-setup.c
index c2ba759e9946..fa0646c8693b 100644
--- a/arch/arm/mach-ixp4xx/ixdp425-setup.c
+++ b/arch/arm/mach-ixp4xx/ixdp425-setup.c
@@ -36,7 +36,7 @@ static struct flash_platform_data ixdp425_flash_data = {
 
 static struct resource ixdp425_flash_resource = {
 	.start		= IXDP425_FLASH_BASE,
-	.end		= IXDP425_FLASH_BASE + IXDP425_FLASH_SIZE,
+	.end		= IXDP425_FLASH_BASE + IXDP425_FLASH_SIZE - 1,
 	.flags		= IORESOURCE_MEM,
 };
 
@@ -82,7 +82,7 @@ static struct plat_serial8250_port ixdp425_uart_data[] = {
 		.mapbase	= IXP4XX_UART1_BASE_PHYS,
 		.membase	= (char *)IXP4XX_UART1_BASE_VIRT + REG_OFFSET,
 		.irq		= IRQ_IXP4XX_UART1,
-		.flags		= UPF_BOOT_AUTOCONF,
+		.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST,
 		.iotype		= UPIO_MEM,
 		.regshift	= 2,
 		.uartclk	= IXP4XX_UART_XTAL,
@@ -91,7 +91,7 @@ static struct plat_serial8250_port ixdp425_uart_data[] = {
 		.mapbase	= IXP4XX_UART2_BASE_PHYS,
 		.membase	= (char *)IXP4XX_UART2_BASE_VIRT + REG_OFFSET,
 		.irq		= IRQ_IXP4XX_UART1,
-		.flags		= UPF_BOOT_AUTOCONF,
+		.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST,
 		.iotype		= UPIO_MEM,
 		.regshift	= 2,
 		.uartclk	= IXP4XX_UART_XTAL,
diff --git a/arch/arm/mach-s3c2410/mach-bast.c b/arch/arm/mach-s3c2410/mach-bast.c
index 1e7f343822d0..e9182242da95 100644
--- a/arch/arm/mach-s3c2410/mach-bast.c
+++ b/arch/arm/mach-s3c2410/mach-bast.c
@@ -30,6 +30,7 @@
  *     28-Jun-2005 BJD  Moved pm functionality out to common code
  *     17-Jul-2005 BJD  Changed to platform device for SuperIO 16550s
  *     25-Jul-2005 BJD  Removed ASIX static mappings
+ *     27-Jul-2005 BJD  Ensure maximum frequency of i2c bus
 */
 
 #include <linux/kernel.h>
@@ -60,6 +61,7 @@
 #include <asm/arch/regs-mem.h>
 #include <asm/arch/regs-lcd.h>
 #include <asm/arch/nand.h>
+#include <asm/arch/iic.h>
 
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/nand.h>
@@ -304,7 +306,7 @@ static void bast_nand_select(struct s3c2410_nand_set *set, int slot)
 }
 
 static struct s3c2410_platform_nand bast_nand_info = {
-	.tacls		= 80,
+	.tacls		= 40,
 	.twrph0		= 80,
 	.twrph1		= 80,
 	.nr_sets	= ARRAY_SIZE(bast_nand_sets),
@@ -385,6 +387,17 @@ static struct platform_device bast_sio = {
 	},
 };
 
+/* we have devices on the bus which cannot work much over the
+ * standard 100KHz i2c bus frequency
+*/
+
+static struct s3c2410_platform_i2c bast_i2c_info = {
+	.flags		= 0,
+	.slave_addr	= 0x10,
+	.bus_freq	= 100*1000,
+	.max_freq	= 130*1000,
+};
+
 /* Standard BAST devices */
 
 static struct platform_device *bast_devices[] __initdata = {
@@ -431,6 +444,7 @@ void __init bast_map_io(void)
 	s3c24xx_uclk.parent  = &s3c24xx_clkout1;
 
 	s3c_device_nand.dev.platform_data = &bast_nand_info;
+	s3c_device_i2c.dev.platform_data = &bast_i2c_info;
 
 	s3c24xx_init_io(bast_iodesc, ARRAY_SIZE(bast_iodesc));
 	s3c24xx_init_clocks(0);
diff --git a/arch/arm/mach-s3c2410/s3c2410.c b/arch/arm/mach-s3c2410/s3c2410.c
index ff2f25409e44..0b88993dfd27 100644
--- a/arch/arm/mach-s3c2410/s3c2410.c
+++ b/arch/arm/mach-s3c2410/s3c2410.c
@@ -18,6 +18,7 @@
  *     28-Sep-2004 BJD  Updates for new serial port bits
  *     04-Nov-2004 BJD  Updated UART configuration process
  *     10-Jan-2005 BJD  Removed s3c2410_clock_tick_rate
+ *     13-Aug-2005 DA   Removed UART from initial I/O mappings
 */
 
 #include <linux/kernel.h>
@@ -49,10 +50,9 @@ static struct map_desc s3c2410_iodesc[] __initdata = {
 	IODESC_ENT(USBHOST),
 	IODESC_ENT(CLKPWR),
 	IODESC_ENT(LCD),
-	IODESC_ENT(UART),
 	IODESC_ENT(TIMER),
 	IODESC_ENT(ADC),
-	IODESC_ENT(WATCHDOG)
+	IODESC_ENT(WATCHDOG),
 };
 
 static struct resource s3c_uart0_resource[] = {
diff --git a/arch/arm/mach-s3c2410/usb-simtec.c b/arch/arm/mach-s3c2410/usb-simtec.c
index 7f2b61362976..f021fd82be52 100644
--- a/arch/arm/mach-s3c2410/usb-simtec.c
+++ b/arch/arm/mach-s3c2410/usb-simtec.c
@@ -1,6 +1,6 @@
 /* linux/arch/arm/mach-s3c2410/usb-simtec.c
  *
- * Copyright (c) 2004 Simtec Electronics
+ * Copyright (c) 2004,2005 Simtec Electronics
  *   Ben Dooks <ben@simtec.co.uk>
  *
  * http://www.simtec.co.uk/products/EB2410ITX/
@@ -14,6 +14,8 @@
  * Modifications:
  *	14-Sep-2004 BJD  Created
  *	18-Oct-2004 BJD  Cleanups, and added code to report OC cleared
+ *	09-Aug-2005 BJD  Renamed s3c2410_report_oc to s3c2410_usb_report_oc
+ *	09-Aug-2005 BJD  Ports powered only if both are enabled
 */
 
 #define DEBUG
@@ -47,13 +49,19 @@
  * designed boards.
 */
 
+static unsigned int power_state[2];
+
 static void
 usb_simtec_powercontrol(int port, int to)
 {
 	pr_debug("usb_simtec_powercontrol(%d,%d)\n", port, to);
 
-	if (port == 1)
-		s3c2410_gpio_setpin(S3C2410_GPB4, to ? 0:1);
+	power_state[port] = to;
+
+	if (power_state[0] && power_state[1])
+		s3c2410_gpio_setpin(S3C2410_GPB4, 0);
+	else
+		s3c2410_gpio_setpin(S3C2410_GPB4, 1);
 }
 
 static irqreturn_t
@@ -63,10 +71,10 @@ usb_simtec_ocirq(int irq, void *pw, struct pt_regs *regs)
 
 	if (s3c2410_gpio_getpin(S3C2410_GPG10) == 0) {
 		pr_debug("usb_simtec: over-current irq (oc detected)\n");
-		s3c2410_report_oc(info, 3);
+		s3c2410_usb_report_oc(info, 3);
 	} else {
 		pr_debug("usb_simtec: over-current irq (oc cleared)\n");
-		s3c2410_report_oc(info, 0);
+		s3c2410_usb_report_oc(info, 0);
 	}
 
 	return IRQ_HANDLED;
diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c
index 4d4d303ee3a8..24687f511bf5 100644
--- a/arch/arm/mach-sa1100/assabet.c
+++ b/arch/arm/mach-sa1100/assabet.c
@@ -35,6 +35,7 @@
 #include <asm/mach/map.h>
 #include <asm/mach/serial_sa1100.h>
 #include <asm/arch/assabet.h>
+#include <asm/arch/mcp.h>
 
 #include "generic.h"
 
@@ -198,6 +199,11 @@ static struct irda_platform_data assabet_irda_data = {
 	.set_speed	= assabet_irda_set_speed,
 };
 
+static struct mcp_plat_data assabet_mcp_data = {
+	.mccr0		= MCCR0_ADM,
+	.sclk_rate	= 11981000,
+};
+
 static void __init assabet_init(void)
 {
 	/*
@@ -246,6 +252,7 @@ static void __init assabet_init(void)
 	sa11x0_set_flash_data(&assabet_flash_data, assabet_flash_resources,
 			      ARRAY_SIZE(assabet_flash_resources));
 	sa11x0_set_irda_data(&assabet_irda_data);
+	sa11x0_set_mcp_data(&assabet_mcp_data);
 }
 
 /*
diff --git a/arch/arm/mach-sa1100/cerf.c b/arch/arm/mach-sa1100/cerf.c
index 0aa918e24c31..9484be7dc671 100644
--- a/arch/arm/mach-sa1100/cerf.c
+++ b/arch/arm/mach-sa1100/cerf.c
@@ -29,6 +29,7 @@
 #include <asm/mach/serial_sa1100.h>
 
 #include <asm/arch/cerf.h>
+#include <asm/arch/mcp.h>
 #include "generic.h"
 
 static struct resource cerfuart2_resources[] = {
@@ -116,10 +117,16 @@ static void __init cerf_map_io(void)
 	GPDR |= CERF_GPIO_CF_RESET;
 }
 
+static struct mcp_plat_data cerf_mcp_data = {
+	.mccr0		= MCCR0_ADM,
+	.sclk_rate	= 11981000,
+};
+
 static void __init cerf_init(void)
 {
 	platform_add_devices(cerf_devices, ARRAY_SIZE(cerf_devices));
 	sa11x0_set_flash_data(&cerf_flash_data, &cerf_flash_resource, 1);
+	sa11x0_set_mcp_data(&cerf_mcp_data);
 }
 
 MACHINE_START(CERF, "Intrinsyc CerfBoard/CerfCube")
diff --git a/arch/arm/mach-sa1100/generic.c b/arch/arm/mach-sa1100/generic.c
index 95ae217be1bc..3f1e358455e5 100644
--- a/arch/arm/mach-sa1100/generic.c
+++ b/arch/arm/mach-sa1100/generic.c
@@ -221,6 +221,11 @@ static struct platform_device sa11x0mcp_device = {
 	.resource	= sa11x0mcp_resources,
 };
 
+void sa11x0_set_mcp_data(struct mcp_plat_data *data)
+{
+	sa11x0mcp_device.dev.platform_data = data;
+}
+
 static struct resource sa11x0ssp_resources[] = {
 	[0] = {
 		.start	= 0x80070000,
diff --git a/arch/arm/mach-sa1100/generic.h b/arch/arm/mach-sa1100/generic.h
index bfe41da9923e..279e3afa3c39 100644
--- a/arch/arm/mach-sa1100/generic.h
+++ b/arch/arm/mach-sa1100/generic.h
@@ -34,5 +34,8 @@ struct resource;
 extern void sa11x0_set_flash_data(struct flash_platform_data *flash,
 				  struct resource *res, int nr);
 
+struct sa11x0_ssp_plat_ops;
+extern void sa11x0_set_ssp_data(struct sa11x0_ssp_plat_ops *ops);
+
 struct irda_platform_data;
 void sa11x0_set_irda_data(struct irda_platform_data *irda);
diff --git a/arch/arm/mach-sa1100/jornada720.c b/arch/arm/mach-sa1100/jornada720.c
index eee3cbc5ec4f..2f497112c96a 100644
--- a/arch/arm/mach-sa1100/jornada720.c
+++ b/arch/arm/mach-sa1100/jornada720.c
@@ -97,6 +97,7 @@ static void __init jornada720_map_io(void)
 }
 
 MACHINE_START(JORNADA720, "HP Jornada 720")
+	/* Maintainer: Michael Gernoth <michael@gernoth.net> */
 	.phys_ram	= 0xc0000000,
 	.phys_io	= 0x80000000,
 	.io_pg_offst	= ((0xf8000000) >> 18) & 0xfffc,
diff --git a/arch/arm/mach-sa1100/lart.c b/arch/arm/mach-sa1100/lart.c
index 870b488aeda4..ed6744d480af 100644
--- a/arch/arm/mach-sa1100/lart.c
+++ b/arch/arm/mach-sa1100/lart.c
@@ -13,12 +13,23 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
 #include <asm/mach/serial_sa1100.h>
+#include <asm/arch/mcp.h>
 
 #include "generic.h"
 
 
 #warning "include/asm/arch-sa1100/ide.h needs fixing for lart"
 
+static struct mcp_plat_data lart_mcp_data = {
+	.mccr0		= MCCR0_ADM,
+	.sclk_rate	= 11981000,
+};
+
+static void __init lart_init(void)
+{
+	sa11x0_set_mcp_data(&lart_mcp_data);
+}
+
 static struct map_desc lart_io_desc[] __initdata = {
  /* virtual     physical    length      type */
   { 0xe8000000, 0x00000000, 0x00400000, MT_DEVICE }, /* main flash memory */
@@ -47,5 +58,6 @@ MACHINE_START(LART, "LART")
 	.boot_params	= 0xc0000100,
 	.map_io		= lart_map_io,
 	.init_irq	= sa1100_init_irq,
+	.init_machine	= lart_init,
 	.timer		= &sa1100_timer,
 MACHINE_END
diff --git a/arch/arm/mach-sa1100/shannon.c b/arch/arm/mach-sa1100/shannon.c
index 43a00359fcdd..7482288278d9 100644
--- a/arch/arm/mach-sa1100/shannon.c
+++ b/arch/arm/mach-sa1100/shannon.c
@@ -18,6 +18,7 @@
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
 #include <asm/mach/serial_sa1100.h>
+#include <asm/arch/mcp.h>
 #include <asm/arch/shannon.h>
 
 #include "generic.h"
@@ -52,9 +53,15 @@ static struct resource shannon_flash_resource = {
 	.flags		= IORESOURCE_MEM,
 };
 
+static struct mcp_plat_data shannon_mcp_data = {
+	.mccr0		= MCCR0_ADM,
+	.sclk_rate	= 11981000,
+};
+
 static void __init shannon_init(void)
 {
 	sa11x0_set_flash_data(&shannon_flash_data, &shannon_flash_resource, 1);
+	sa11x0_set_mcp_data(&shannon_mcp_data);
 }
 
 static void __init shannon_map_io(void)
diff --git a/arch/arm/mach-sa1100/simpad.c b/arch/arm/mach-sa1100/simpad.c
index 77978586b126..07f6d5fd7bb0 100644
--- a/arch/arm/mach-sa1100/simpad.c
+++ b/arch/arm/mach-sa1100/simpad.c
@@ -23,6 +23,7 @@
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
 #include <asm/mach/serial_sa1100.h>
+#include <asm/arch/mcp.h>
 #include <asm/arch/simpad.h>
 
 #include <linux/serial_core.h>
@@ -123,6 +124,11 @@ static struct resource simpad_flash_resources [] = {
 	}
 };
 
+static struct mcp_plat_data simpad_mcp_data = {
+	.mccr0		= MCCR0_ADM,
+	.sclk_rate	= 11981000,
+};
+
 
 
 static void __init simpad_map_io(void)
@@ -157,6 +163,7 @@ static void __init simpad_map_io(void)
 
 	sa11x0_set_flash_data(&simpad_flash_data, simpad_flash_resources,
 			      ARRAY_SIZE(simpad_flash_resources));
+	sa11x0_set_mcp_data(&simpad_mcp_data);
 }
 
 static void simpad_power_off(void)
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index afbbeb6f4658..db5e47dfc303 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -384,7 +384,7 @@ config CPU_DCACHE_DISABLE
 
 config CPU_DCACHE_WRITETHROUGH
 	bool "Force write through D-cache"
-	depends on (CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM1020) && !CPU_DISABLE_DCACHE
+	depends on (CPU_ARM920T || CPU_ARM922T || CPU_ARM925T || CPU_ARM926T || CPU_ARM1020) && !CPU_DCACHE_DISABLE
 	default y if CPU_ARM925T
 	help
 	  Say Y here to use the data cache in writethrough mode. Unless you
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 65bfe84b6d67..0b6c4db44e08 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -238,9 +238,9 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 	up_read(&mm->mmap_sem);
 
 	/*
-	 * Handle the "normal" case first
+	 * Handle the "normal" case first - VM_FAULT_MAJOR / VM_FAULT_MINOR
 	 */
-	if (fault > 0)
+	if (fault >= VM_FAULT_MINOR)
 		return 0;
 
 	/*
@@ -261,7 +261,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 		do_exit(SIGKILL);
 		return 0;
 
-	case 0:
+	case VM_FAULT_SIGBUS:
 		/*
 		 * We had some memory, but were unable to
 		 * successfully fix up this page fault.
diff --git a/arch/arm/mm/mm-armv.c b/arch/arm/mm/mm-armv.c
index e33fe4229d05..3c655c54e231 100644
--- a/arch/arm/mm/mm-armv.c
+++ b/arch/arm/mm/mm-armv.c
@@ -383,6 +383,7 @@ static void __init build_mem_type_table(void)
 {
 	struct cachepolicy *cp;
 	unsigned int cr = get_cr();
+	unsigned int user_pgprot;
 	int cpu_arch = cpu_architecture();
 	int i;
 
@@ -408,6 +409,9 @@ static void __init build_mem_type_table(void)
 		}
 	}
 
+	cp = &cache_policies[cachepolicy];
+	user_pgprot = cp->pte;
+
 	/*
 	 * ARMv6 and above have extended page tables.
 	 */
@@ -426,11 +430,18 @@ static void __init build_mem_type_table(void)
 		mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
 		mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
 
+		/*
+		 * Mark the device area as "shared device"
+		 */
 		mem_types[MT_DEVICE].prot_pte |= L_PTE_BUFFERABLE;
 		mem_types[MT_DEVICE].prot_sect |= PMD_SECT_BUFFERED;
-	}
 
-	cp = &cache_policies[cachepolicy];
+		/*
+		 * User pages need to be mapped with the ASID
+		 * (iow, non-global)
+		 */
+		user_pgprot |= L_PTE_ASID;
+	}
 
 	if (cpu_arch >= CPU_ARCH_ARMv5) {
 		mem_types[MT_LOW_VECTORS].prot_pte |= cp->pte & PTE_CACHEABLE;
@@ -448,7 +459,7 @@ static void __init build_mem_type_table(void)
 
 	for (i = 0; i < 16; i++) {
 		unsigned long v = pgprot_val(protection_map[i]);
-		v &= (~(PTE_BUFFERABLE|PTE_CACHEABLE)) | cp->pte;
+		v &= (~(PTE_BUFFERABLE|PTE_CACHEABLE)) | user_pgprot;
 		protection_map[i] = __pgprot(v);
 	}
 
diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S
index 352db98ee269..139a38670c5d 100644
--- a/arch/arm/mm/proc-v6.S
+++ b/arch/arm/mm/proc-v6.S
@@ -105,18 +105,12 @@ ENTRY(cpu_v6_dcache_clean_area)
 ENTRY(cpu_v6_switch_mm)
 	mov	r2, #0
 	ldr	r1, [r1, #MM_CONTEXT_ID]	@ get mm->context.id
-	mcr     p15, 0, r2, c7, c5, 6           @ flush BTAC/BTB
+	mcr	p15, 0, r2, c7, c5, 6		@ flush BTAC/BTB
 	mcr	p15, 0, r2, c7, c10, 4		@ drain write buffer
 	mcr	p15, 0, r0, c2, c0, 0		@ set TTB 0
 	mcr	p15, 0, r1, c13, c0, 1		@ set context ID
 	mov	pc, lr
 
-#define nG	(1 << 11)
-#define APX	(1 << 9)
-#define AP1	(1 << 5)
-#define AP0	(1 << 4)
-#define XN	(1 << 0)
-
 /*
  *	cpu_v6_set_pte(ptep, pte)
  *
@@ -139,24 +133,24 @@ ENTRY(cpu_v6_switch_mm)
 ENTRY(cpu_v6_set_pte)
 	str	r1, [r0], #-2048		@ linux version
 
-	bic	r2, r1, #0x00000ff0
+	bic	r2, r1, #0x000007f0
 	bic	r2, r2, #0x00000003
-	orr	r2, r2, #AP0 | 2
+	orr	r2, r2, #PTE_EXT_AP0 | 2
 
 	tst	r1, #L_PTE_WRITE
 	tstne	r1, #L_PTE_DIRTY
-	orreq	r2, r2, #APX
+	orreq	r2, r2, #PTE_EXT_APX
 
 	tst	r1, #L_PTE_USER
-	orrne	r2, r2, #AP1 | nG
-	tstne	r2, #APX
-	bicne	r2, r2, #APX | AP0
+	orrne	r2, r2, #PTE_EXT_AP1
+	tstne	r2, #PTE_EXT_APX
+	bicne	r2, r2, #PTE_EXT_APX | PTE_EXT_AP0
 
 	tst	r1, #L_PTE_YOUNG
-	biceq	r2, r2, #APX | AP1 | AP0
+	biceq	r2, r2, #PTE_EXT_APX | PTE_EXT_AP_MASK
 
 @	tst	r1, #L_PTE_EXEC
-@	orreq	r2, r2, #XN
+@	orreq	r2, r2, #PTE_EXT_XN
 
 	tst	r1, #L_PTE_PRESENT
 	moveq	r2, #0
diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S
index 2d977b4eeeab..b88de2700146 100644
--- a/arch/arm/mm/proc-xscale.S
+++ b/arch/arm/mm/proc-xscale.S
@@ -370,142 +370,6 @@ ENTRY(cpu_xscale_dcache_clean_area)
 	bhi	1b
 	mov	pc, lr
 
-/* ================================ CACHE LOCKING============================
- *
- * The XScale MicroArchitecture implements support for locking entries into
- * the data and instruction cache.  The following functions implement the core
- * low level instructions needed to accomplish the locking.  The developer's
- * manual states that the code that performs the locking must be in non-cached
- * memory.  To accomplish this, the code in xscale-cache-lock.c copies the
- * following functions from the cache into a non-cached memory region that
- * is allocated through consistent_alloc().
- *
- */
-	.align	5
-/*
- * xscale_icache_lock
- *
- * r0: starting address to lock
- * r1: end address to lock
- */
-ENTRY(xscale_icache_lock)
-
-iLockLoop:
-	bic	r0, r0, #CACHELINESIZE - 1
-	mcr	p15, 0, r0, c9, c1, 0	@ lock into cache
-	cmp	r0, r1			@ are we done?
-	add	r0, r0, #CACHELINESIZE	@ advance to next cache line
-	bls	iLockLoop
-	mov	pc, lr
-
-/*
- * xscale_icache_unlock
- */
-ENTRY(xscale_icache_unlock)
-	mcr	p15, 0, r0, c9, c1, 1	@ Unlock icache
-	mov	pc, lr
-
-/*
- * xscale_dcache_lock
- *
- * r0: starting address to lock
- * r1: end address to lock
- */
-ENTRY(xscale_dcache_lock)
-	mcr	p15, 0, ip, c7, c10, 4		@ Drain Write (& Fill) Buffer
-	mov	r2, #1
-	mcr	p15, 0, r2, c9, c2, 0	@ Put dcache in lock mode
-	cpwait	ip			@ Wait for completion
-
-	mrs	r2, cpsr
-	orr	r3, r2, #PSR_F_BIT | PSR_I_BIT
-dLockLoop:
-	msr	cpsr_c, r3
-	mcr	p15, 0, r0, c7, c10, 1	@ Write back line if it is dirty
-	mcr	p15, 0, r0, c7, c6, 1	@ Flush/invalidate line
-	msr	cpsr_c, r2
-	ldr	ip, [r0], #CACHELINESIZE @ Preload 32 bytes into cache from
-					@ location [r0]. Post-increment
-					@ r3 to next cache line
-	cmp	r0, r1			@ Are we done?
-	bls	dLockLoop
-
-	mcr	p15, 0, ip, c7, c10, 4		@ Drain Write (& Fill) Buffer
-	mov	r2, #0
-	mcr	p15, 0, r2, c9, c2, 0	@ Get out of lock mode
-	cpwait_ret lr, ip
-
-/*
- * xscale_dcache_unlock
- */
-ENTRY(xscale_dcache_unlock)
-	mcr	p15, 0, ip, c7, c10, 4		@ Drain Write (& Fill) Buffer
-	mcr	p15, 0, ip, c9, c2, 1	@ Unlock cache
-	mov	pc, lr
-
-/*
- * Needed to determine the length of the code that needs to be copied.
- */
-	.align	5
-ENTRY(xscale_cache_dummy)
-	mov	pc, lr
-
-/* ================================ TLB LOCKING==============================
- *
- * The XScale MicroArchitecture implements support for locking entries into
- * the Instruction and Data TLBs.  The following functions provide the
- * low level support for supporting these under Linux.  xscale-lock.c
- * implements some higher level management code.  Most of the following
- * is taken straight out of the Developer's Manual.
- */
-
-/*
- * Lock I-TLB entry
- *
- * r0: Virtual address to translate and lock
- */
-	.align	5
-ENTRY(xscale_itlb_lock)
-	mrs	r2, cpsr
-	orr	r3, r2, #PSR_F_BIT | PSR_I_BIT
-	msr	cpsr_c, r3			@ Disable interrupts
-	mcr	p15, 0, r0, c8, c5, 1		@ Invalidate I-TLB entry
-	mcr	p15, 0, r0, c10, c4, 0		@ Translate and lock
-	msr	cpsr_c, r2			@ Restore interrupts
-	cpwait_ret lr, ip
-
-/*
- * Lock D-TLB entry
- *
- * r0: Virtual address to translate and lock
- */
-	.align	5
-ENTRY(xscale_dtlb_lock)
-	mrs	r2, cpsr
-	orr	r3, r2, #PSR_F_BIT | PSR_I_BIT
-	msr	cpsr_c, r3			@ Disable interrupts
-	mcr	p15, 0, r0, c8, c6, 1		@ Invalidate D-TLB entry
-	mcr	p15, 0, r0, c10, c8, 0		@ Translate and lock
-	msr	cpsr_c, r2			@ Restore interrupts
-	cpwait_ret lr, ip
-
-/*
- * Unlock all I-TLB entries
- */
-	.align	5
-ENTRY(xscale_itlb_unlock)
-	mcr	p15, 0, ip, c10, c4, 1		@ Unlock I-TLB
-	mcr	p15, 0, ip, c8, c5, 0		@ Invalidate I-TLB
-	cpwait_ret lr, ip
-
-/*
- * Unlock all D-TLB entries
- */
-ENTRY(xscale_dtlb_unlock)
-	mcr	p15, 0, ip, c10, c8, 1		@ Unlock D-TBL
-	mcr	p15, 0, ip, c8, c6, 0		@ Invalidate D-TLB
-	cpwait_ret lr, ip
-
 /* =============================== PageTable ============================== */
 
 #define PTE_CACHE_WRITE_ALLOCATE 0
diff --git a/arch/arm/nwfpe/double_cpdo.c b/arch/arm/nwfpe/double_cpdo.c
index 7ffd8cb9bc96..c51d1386a97c 100644
--- a/arch/arm/nwfpe/double_cpdo.c
+++ b/arch/arm/nwfpe/double_cpdo.c
@@ -40,17 +40,17 @@ float64 float64_arccos(float64 rFm);
 float64 float64_pow(float64 rFn, float64 rFm);
 float64 float64_pol(float64 rFn, float64 rFm);
 
-static float64 float64_rsf(float64 rFn, float64 rFm)
+static float64 float64_rsf(struct roundingData *roundData, float64 rFn, float64 rFm)
 {
-	return float64_sub(rFm, rFn);
+	return float64_sub(roundData, rFm, rFn);
 }
 
-static float64 float64_rdv(float64 rFn, float64 rFm)
+static float64 float64_rdv(struct roundingData *roundData, float64 rFn, float64 rFm)
 {
-	return float64_div(rFm, rFn);
+	return float64_div(roundData, rFm, rFn);
 }
 
-static float64 (*const dyadic_double[16])(float64 rFn, float64 rFm) = {
+static float64 (*const dyadic_double[16])(struct roundingData*, float64 rFn, float64 rFm) = {
 	[ADF_CODE >> 20] = float64_add,
 	[MUF_CODE >> 20] = float64_mul,
 	[SUF_CODE >> 20] = float64_sub,
@@ -65,12 +65,12 @@ static float64 (*const dyadic_double[16])(float64 rFn, float64 rFm) = {
 	[FRD_CODE >> 20] = float64_rdv,
 };
 
-static float64 float64_mvf(float64 rFm)
+static float64 float64_mvf(struct roundingData *roundData,float64 rFm)
 {
 	return rFm;
 }
 
-static float64 float64_mnf(float64 rFm)
+static float64 float64_mnf(struct roundingData *roundData,float64 rFm)
 {
 	union float64_components u;
 
@@ -84,7 +84,7 @@ static float64 float64_mnf(float64 rFm)
 	return u.f64;
 }
 
-static float64 float64_abs(float64 rFm)
+static float64 float64_abs(struct roundingData *roundData,float64 rFm)
 {
 	union float64_components u;
 
@@ -98,7 +98,7 @@ static float64 float64_abs(float64 rFm)
 	return u.f64;
 }
 
-static float64 (*const monadic_double[16])(float64 rFm) = {
+static float64 (*const monadic_double[16])(struct roundingData *, float64 rFm) = {
 	[MVF_CODE >> 20] = float64_mvf,
 	[MNF_CODE >> 20] = float64_mnf,
 	[ABS_CODE >> 20] = float64_abs,
@@ -108,7 +108,7 @@ static float64 (*const monadic_double[16])(float64 rFm) = {
 	[NRM_CODE >> 20] = float64_mvf,
 };
 
-unsigned int DoubleCPDO(const unsigned int opcode, FPREG * rFd)
+unsigned int DoubleCPDO(struct roundingData *roundData, const unsigned int opcode, FPREG * rFd)
 {
 	FPA11 *fpa11 = GET_FPA11();
 	float64 rFm;
@@ -151,13 +151,13 @@ unsigned int DoubleCPDO(const unsigned int opcode, FPREG * rFd)
 		}
 
 		if (dyadic_double[opc_mask_shift]) {
-			rFd->fDouble = dyadic_double[opc_mask_shift](rFn, rFm);
+			rFd->fDouble = dyadic_double[opc_mask_shift](roundData, rFn, rFm);
 		} else {
 			return 0;
 		}
 	} else {
 		if (monadic_double[opc_mask_shift]) {
-			rFd->fDouble = monadic_double[opc_mask_shift](rFm);
+			rFd->fDouble = monadic_double[opc_mask_shift](roundData, rFm);
 		} else {
 			return 0;
 		}
diff --git a/arch/arm/nwfpe/extended_cpdo.c b/arch/arm/nwfpe/extended_cpdo.c
index c39f68a3449e..65a279ba927f 100644
--- a/arch/arm/nwfpe/extended_cpdo.c
+++ b/arch/arm/nwfpe/extended_cpdo.c
@@ -35,17 +35,17 @@ floatx80 floatx80_arccos(floatx80 rFm);
 floatx80 floatx80_pow(floatx80 rFn, floatx80 rFm);
 floatx80 floatx80_pol(floatx80 rFn, floatx80 rFm);
 
-static floatx80 floatx80_rsf(floatx80 rFn, floatx80 rFm)
+static floatx80 floatx80_rsf(struct roundingData *roundData, floatx80 rFn, floatx80 rFm)
 {
-	return floatx80_sub(rFm, rFn);
+	return floatx80_sub(roundData, rFm, rFn);
 }
 
-static floatx80 floatx80_rdv(floatx80 rFn, floatx80 rFm)
+static floatx80 floatx80_rdv(struct roundingData *roundData, floatx80 rFn, floatx80 rFm)
 {
-	return floatx80_div(rFm, rFn);
+	return floatx80_div(roundData, rFm, rFn);
 }
 
-static floatx80 (*const dyadic_extended[16])(floatx80 rFn, floatx80 rFm) = {
+static floatx80 (*const dyadic_extended[16])(struct roundingData*, floatx80 rFn, floatx80 rFm) = {
 	[ADF_CODE >> 20] = floatx80_add,
 	[MUF_CODE >> 20] = floatx80_mul,
 	[SUF_CODE >> 20] = floatx80_sub,
@@ -60,24 +60,24 @@ static floatx80 (*const dyadic_extended[16])(floatx80 rFn, floatx80 rFm) = {
 	[FRD_CODE >> 20] = floatx80_rdv,
 };
 
-static floatx80 floatx80_mvf(floatx80 rFm)
+static floatx80 floatx80_mvf(struct roundingData *roundData, floatx80 rFm)
 {
 	return rFm;
 }
 
-static floatx80 floatx80_mnf(floatx80 rFm)
+static floatx80 floatx80_mnf(struct roundingData *roundData, floatx80 rFm)
 {
 	rFm.high ^= 0x8000;
 	return rFm;
 }
 
-static floatx80 floatx80_abs(floatx80 rFm)
+static floatx80 floatx80_abs(struct roundingData *roundData, floatx80 rFm)
 {
 	rFm.high &= 0x7fff;
 	return rFm;
 }
 
-static floatx80 (*const monadic_extended[16])(floatx80 rFm) = {
+static floatx80 (*const monadic_extended[16])(struct roundingData*, floatx80 rFm) = {
 	[MVF_CODE >> 20] = floatx80_mvf,
 	[MNF_CODE >> 20] = floatx80_mnf,
 	[ABS_CODE >> 20] = floatx80_abs,
@@ -87,7 +87,7 @@ static floatx80 (*const monadic_extended[16])(floatx80 rFm) = {
 	[NRM_CODE >> 20] = floatx80_mvf,
 };
 
-unsigned int ExtendedCPDO(const unsigned int opcode, FPREG * rFd)
+unsigned int ExtendedCPDO(struct roundingData *roundData, const unsigned int opcode, FPREG * rFd)
 {
 	FPA11 *fpa11 = GET_FPA11();
 	floatx80 rFm;
@@ -138,13 +138,13 @@ unsigned int ExtendedCPDO(const unsigned int opcode, FPREG * rFd)
 		}
 
 		if (dyadic_extended[opc_mask_shift]) {
-			rFd->fExtended = dyadic_extended[opc_mask_shift](rFn, rFm);
+			rFd->fExtended = dyadic_extended[opc_mask_shift](roundData, rFn, rFm);
 		} else {
 			return 0;
 		}
 	} else {
 		if (monadic_extended[opc_mask_shift]) {
-			rFd->fExtended = monadic_extended[opc_mask_shift](rFm);
+			rFd->fExtended = monadic_extended[opc_mask_shift](roundData, rFm);
 		} else {
 			return 0;
 		}
diff --git a/arch/arm/nwfpe/fpa11.c b/arch/arm/nwfpe/fpa11.c
index bf61696865ec..7690f731ee87 100644
--- a/arch/arm/nwfpe/fpa11.c
+++ b/arch/arm/nwfpe/fpa11.c
@@ -51,48 +51,42 @@ static void resetFPA11(void)
 	fpa11->fpsr = FP_EMULATOR | BIT_AC;
 }
 
-void SetRoundingMode(const unsigned int opcode)
+int8 SetRoundingMode(const unsigned int opcode)
 {
 	switch (opcode & MASK_ROUNDING_MODE) {
 	default:
 	case ROUND_TO_NEAREST:
-		float_rounding_mode = float_round_nearest_even;
-		break;
+		return float_round_nearest_even;
 
 	case ROUND_TO_PLUS_INFINITY:
-		float_rounding_mode = float_round_up;
-		break;
+		return float_round_up;
 
 	case ROUND_TO_MINUS_INFINITY:
-		float_rounding_mode = float_round_down;
-		break;
+		return float_round_down;
 
 	case ROUND_TO_ZERO:
-		float_rounding_mode = float_round_to_zero;
-		break;
+		return float_round_to_zero;
 	}
 }
 
-void SetRoundingPrecision(const unsigned int opcode)
+int8 SetRoundingPrecision(const unsigned int opcode)
 {
 #ifdef CONFIG_FPE_NWFPE_XP
 	switch (opcode & MASK_ROUNDING_PRECISION) {
 	case ROUND_SINGLE:
-		floatx80_rounding_precision = 32;
-		break;
+		return 32;
 
 	case ROUND_DOUBLE:
-		floatx80_rounding_precision = 64;
-		break;
+		return 64;
 
 	case ROUND_EXTENDED:
-		floatx80_rounding_precision = 80;
-		break;
+		return 80;
 
 	default:
-		floatx80_rounding_precision = 80;
+		return 80;
 	}
 #endif
+	return 80;
 }
 
 void nwfpe_init_fpa(union fp_state *fp)
@@ -103,8 +97,6 @@ void nwfpe_init_fpa(union fp_state *fp)
 #endif
  	memset(fpa11, 0, sizeof(FPA11));
 	resetFPA11();
-	SetRoundingMode(ROUND_TO_NEAREST);
-	SetRoundingPrecision(ROUND_EXTENDED);
 	fpa11->initflag = 1;
 }
 
diff --git a/arch/arm/nwfpe/fpa11.h b/arch/arm/nwfpe/fpa11.h
index e4a61aea534b..93523ae4b7a1 100644
--- a/arch/arm/nwfpe/fpa11.h
+++ b/arch/arm/nwfpe/fpa11.h
@@ -37,6 +37,13 @@
 /* includes */
 #include "fpsr.h"		/* FP control and status register definitions */
 #include "milieu.h"
+
+struct roundingData {
+    int8 mode;
+    int8 precision;
+    signed char exception;
+};
+
 #include "softfloat.h"
 
 #define		typeNone		0x00
@@ -84,8 +91,8 @@ typedef struct tagFPA11 {
 				   initialised. */
 } FPA11;
 
-extern void SetRoundingMode(const unsigned int);
-extern void SetRoundingPrecision(const unsigned int);
+extern int8 SetRoundingMode(const unsigned int);
+extern int8 SetRoundingPrecision(const unsigned int);
 extern void nwfpe_init_fpa(union fp_state *fp);
 
 #endif
diff --git a/arch/arm/nwfpe/fpa11_cpdo.c b/arch/arm/nwfpe/fpa11_cpdo.c
index 1bea67437b6f..4a31dfd94068 100644
--- a/arch/arm/nwfpe/fpa11_cpdo.c
+++ b/arch/arm/nwfpe/fpa11_cpdo.c
@@ -24,15 +24,16 @@
 #include "fpa11.h"
 #include "fpopcode.h"
 
-unsigned int SingleCPDO(const unsigned int opcode, FPREG * rFd);
-unsigned int DoubleCPDO(const unsigned int opcode, FPREG * rFd);
-unsigned int ExtendedCPDO(const unsigned int opcode, FPREG * rFd);
+unsigned int SingleCPDO(struct roundingData *roundData, const unsigned int opcode, FPREG * rFd);
+unsigned int DoubleCPDO(struct roundingData *roundData, const unsigned int opcode, FPREG * rFd);
+unsigned int ExtendedCPDO(struct roundingData *roundData, const unsigned int opcode, FPREG * rFd);
 
 unsigned int EmulateCPDO(const unsigned int opcode)
 {
 	FPA11 *fpa11 = GET_FPA11();
 	FPREG *rFd;
 	unsigned int nType, nDest, nRc;
+	struct roundingData roundData;
 
 	/* Get the destination size.  If not valid let Linux perform
 	   an invalid instruction trap. */
@@ -40,7 +41,9 @@ unsigned int EmulateCPDO(const unsigned int opcode)
 	if (typeNone == nDest)
 		return 0;
 
-	SetRoundingMode(opcode);
+	roundData.mode = SetRoundingMode(opcode);
+	roundData.precision = SetRoundingPrecision(opcode);
+	roundData.exception = 0;
 
 	/* Compare the size of the operands in Fn and Fm.
 	   Choose the largest size and perform operations in that size,
@@ -63,14 +66,14 @@ unsigned int EmulateCPDO(const unsigned int opcode)
 
 	switch (nType) {
 	case typeSingle:
-		nRc = SingleCPDO(opcode, rFd);
+		nRc = SingleCPDO(&roundData, opcode, rFd);
 		break;
 	case typeDouble:
-		nRc = DoubleCPDO(opcode, rFd);
+		nRc = DoubleCPDO(&roundData, opcode, rFd);
 		break;
 #ifdef CONFIG_FPE_NWFPE_XP
 	case typeExtended:
-		nRc = ExtendedCPDO(opcode, rFd);
+		nRc = ExtendedCPDO(&roundData, opcode, rFd);
 		break;
 #endif
 	default:
@@ -93,9 +96,9 @@ unsigned int EmulateCPDO(const unsigned int opcode)
 			case typeSingle:
 				{
 					if (typeDouble == nType)
-						rFd->fSingle = float64_to_float32(rFd->fDouble);
+						rFd->fSingle = float64_to_float32(&roundData, rFd->fDouble);
 					else
-						rFd->fSingle = floatx80_to_float32(rFd->fExtended);
+						rFd->fSingle = floatx80_to_float32(&roundData, rFd->fExtended);
 				}
 				break;
 
@@ -104,7 +107,7 @@ unsigned int EmulateCPDO(const unsigned int opcode)
 					if (typeSingle == nType)
 						rFd->fDouble = float32_to_float64(rFd->fSingle);
 					else
-						rFd->fDouble = floatx80_to_float64(rFd->fExtended);
+						rFd->fDouble = floatx80_to_float64(&roundData, rFd->fExtended);
 				}
 				break;
 
@@ -121,12 +124,15 @@ unsigned int EmulateCPDO(const unsigned int opcode)
 #else
 		if (nDest != nType) {
 			if (nDest == typeSingle)
-				rFd->fSingle = float64_to_float32(rFd->fDouble);
+				rFd->fSingle = float64_to_float32(&roundData, rFd->fDouble);
 			else
 				rFd->fDouble = float32_to_float64(rFd->fSingle);
 		}
 #endif
 	}
 
+	if (roundData.exception)
+		float_raise(roundData.exception);
+
 	return nRc;
 }
diff --git a/arch/arm/nwfpe/fpa11_cpdt.c b/arch/arm/nwfpe/fpa11_cpdt.c
index 95fb63fa9d18..b0db5cbcc3b1 100644
--- a/arch/arm/nwfpe/fpa11_cpdt.c
+++ b/arch/arm/nwfpe/fpa11_cpdt.c
@@ -96,7 +96,7 @@ static inline void loadMultiple(const unsigned int Fn, const unsigned int __user
 	}
 }
 
-static inline void storeSingle(const unsigned int Fn, unsigned int __user *pMem)
+static inline void storeSingle(struct roundingData *roundData, const unsigned int Fn, unsigned int __user *pMem)
 {
 	FPA11 *fpa11 = GET_FPA11();
 	union {
@@ -106,12 +106,12 @@ static inline void storeSingle(const unsigned int Fn, unsigned int __user *pMem)
 
 	switch (fpa11->fType[Fn]) {
 	case typeDouble:
-		val.f = float64_to_float32(fpa11->fpreg[Fn].fDouble);
+		val.f = float64_to_float32(roundData, fpa11->fpreg[Fn].fDouble);
 		break;
 
 #ifdef CONFIG_FPE_NWFPE_XP
 	case typeExtended:
-		val.f = floatx80_to_float32(fpa11->fpreg[Fn].fExtended);
+		val.f = floatx80_to_float32(roundData, fpa11->fpreg[Fn].fExtended);
 		break;
 #endif
 
@@ -122,7 +122,7 @@ static inline void storeSingle(const unsigned int Fn, unsigned int __user *pMem)
 	put_user(val.i[0], pMem);
 }
 
-static inline void storeDouble(const unsigned int Fn, unsigned int __user *pMem)
+static inline void storeDouble(struct roundingData *roundData, const unsigned int Fn, unsigned int __user *pMem)
 {
 	FPA11 *fpa11 = GET_FPA11();
 	union {
@@ -137,7 +137,7 @@ static inline void storeDouble(const unsigned int Fn, unsigned int __user *pMem)
 
 #ifdef CONFIG_FPE_NWFPE_XP
 	case typeExtended:
-		val.f = floatx80_to_float64(fpa11->fpreg[Fn].fExtended);
+		val.f = floatx80_to_float64(roundData, fpa11->fpreg[Fn].fExtended);
 		break;
 #endif
 
@@ -259,8 +259,11 @@ unsigned int PerformSTF(const unsigned int opcode)
 {
 	unsigned int __user *pBase, *pAddress, *pFinal;
 	unsigned int nRc = 1, write_back = WRITE_BACK(opcode);
+	struct roundingData roundData;
 
-	SetRoundingMode(ROUND_TO_NEAREST);
+	roundData.mode = SetRoundingMode(opcode);
+	roundData.precision = SetRoundingPrecision(opcode);
+	roundData.exception = 0;
 
 	pBase = (unsigned int __user *) readRegister(getRn(opcode));
 	if (REG_PC == getRn(opcode)) {
@@ -281,10 +284,10 @@ unsigned int PerformSTF(const unsigned int opcode)
 
 	switch (opcode & MASK_TRANSFER_LENGTH) {
 	case TRANSFER_SINGLE:
-		storeSingle(getFd(opcode), pAddress);
+		storeSingle(&roundData, getFd(opcode), pAddress);
 		break;
 	case TRANSFER_DOUBLE:
-		storeDouble(getFd(opcode), pAddress);
+		storeDouble(&roundData, getFd(opcode), pAddress);
 		break;
 #ifdef CONFIG_FPE_NWFPE_XP
 	case TRANSFER_EXTENDED:
@@ -295,6 +298,9 @@ unsigned int PerformSTF(const unsigned int opcode)
 		nRc = 0;
 	}
 
+	if (roundData.exception)
+		float_raise(roundData.exception);
+
 	if (write_back)
 		writeRegister(getRn(opcode), (unsigned long) pFinal);
 	return nRc;
diff --git a/arch/arm/nwfpe/fpa11_cprt.c b/arch/arm/nwfpe/fpa11_cprt.c
index db01fbc97216..adf8d3000540 100644
--- a/arch/arm/nwfpe/fpa11_cprt.c
+++ b/arch/arm/nwfpe/fpa11_cprt.c
@@ -33,8 +33,6 @@ extern flag floatx80_is_nan(floatx80);
 extern flag float64_is_nan(float64);
 extern flag float32_is_nan(float32);
 
-void SetRoundingMode(const unsigned int opcode);
-
 unsigned int PerformFLT(const unsigned int opcode);
 unsigned int PerformFIX(const unsigned int opcode);
 
@@ -77,14 +75,17 @@ unsigned int EmulateCPRT(const unsigned int opcode)
 unsigned int PerformFLT(const unsigned int opcode)
 {
 	FPA11 *fpa11 = GET_FPA11();
-	SetRoundingMode(opcode);
-	SetRoundingPrecision(opcode);
+	struct roundingData roundData;
+
+	roundData.mode = SetRoundingMode(opcode);
+	roundData.precision = SetRoundingPrecision(opcode);
+	roundData.exception = 0;
 
 	switch (opcode & MASK_ROUNDING_PRECISION) {
 	case ROUND_SINGLE:
 		{
 			fpa11->fType[getFn(opcode)] = typeSingle;
-			fpa11->fpreg[getFn(opcode)].fSingle = int32_to_float32(readRegister(getRd(opcode)));
+			fpa11->fpreg[getFn(opcode)].fSingle = int32_to_float32(&roundData, readRegister(getRd(opcode)));
 		}
 		break;
 
@@ -108,6 +109,9 @@ unsigned int PerformFLT(const unsigned int opcode)
 		return 0;
 	}
 
+	if (roundData.exception)
+		float_raise(roundData.exception);
+
 	return 1;
 }
 
@@ -115,26 +119,29 @@ unsigned int PerformFIX(const unsigned int opcode)
 {
 	FPA11 *fpa11 = GET_FPA11();
 	unsigned int Fn = getFm(opcode);
+	struct roundingData roundData;
 
-	SetRoundingMode(opcode);
+	roundData.mode = SetRoundingMode(opcode);
+	roundData.precision = SetRoundingPrecision(opcode);
+	roundData.exception = 0;
 
 	switch (fpa11->fType[Fn]) {
 	case typeSingle:
 		{
-			writeRegister(getRd(opcode), float32_to_int32(fpa11->fpreg[Fn].fSingle));
+			writeRegister(getRd(opcode), float32_to_int32(&roundData, fpa11->fpreg[Fn].fSingle));
 		}
 		break;
 
 	case typeDouble:
 		{
-			writeRegister(getRd(opcode), float64_to_int32(fpa11->fpreg[Fn].fDouble));
+			writeRegister(getRd(opcode), float64_to_int32(&roundData, fpa11->fpreg[Fn].fDouble));
 		}
 		break;
 
 #ifdef CONFIG_FPE_NWFPE_XP
 	case typeExtended:
 		{
-			writeRegister(getRd(opcode), floatx80_to_int32(fpa11->fpreg[Fn].fExtended));
+			writeRegister(getRd(opcode), floatx80_to_int32(&roundData, fpa11->fpreg[Fn].fExtended));
 		}
 		break;
 #endif
@@ -143,6 +150,9 @@ unsigned int PerformFIX(const unsigned int opcode)
 		return 0;
 	}
 
+	if (roundData.exception)
+		float_raise(roundData.exception);
+
 	return 1;
 }
 
diff --git a/arch/arm/nwfpe/fpmodule.c b/arch/arm/nwfpe/fpmodule.c
index 12885f31d347..2dfe1ac42ee8 100644
--- a/arch/arm/nwfpe/fpmodule.c
+++ b/arch/arm/nwfpe/fpmodule.c
@@ -116,8 +116,6 @@ fpmodule.c to integrate with the NetBSD kernel (I hope!).
 code to access data in user space in some other source files at the 
 moment (grep for get_user / put_user calls).  --philb]
 
-float_exception_flags is a global variable in SoftFloat.
-
 This function is called by the SoftFloat routines to raise a floating
 point exception.  We check the trap enable byte in the FPSR, and raise
 a SIGFPE exception if necessary.  If not the relevant bits in the 
@@ -129,15 +127,14 @@ void float_raise(signed char flags)
 	register unsigned int fpsr, cumulativeTraps;
 
 #ifdef CONFIG_DEBUG_USER
-	printk(KERN_DEBUG
-	       "NWFPE: %s[%d] takes exception %08x at %p from %08lx\n",
-	       current->comm, current->pid, flags,
-	       __builtin_return_address(0), GET_USERREG()->ARM_pc);
+ 	/* Ignore inexact errors as there are far too many of them to log */
+ 	if (flags & ~BIT_IXC)
+ 		printk(KERN_DEBUG
+		       "NWFPE: %s[%d] takes exception %08x at %p from %08lx\n",
+		       current->comm, current->pid, flags,
+		       __builtin_return_address(0), GET_USERREG()->ARM_pc);
 #endif
 
-	/* Keep SoftFloat exception flags up to date.  */
-	float_exception_flags |= flags;
-
 	/* Read fpsr and initialize the cumulativeTraps.  */
 	fpsr = readFPSR();
 	cumulativeTraps = 0;
diff --git a/arch/arm/nwfpe/fpopcode.h b/arch/arm/nwfpe/fpopcode.h
index 8035f4faafbf..1777e92a88e6 100644
--- a/arch/arm/nwfpe/fpopcode.h
+++ b/arch/arm/nwfpe/fpopcode.h
@@ -370,20 +370,20 @@ TABLE 5
 #define getRoundingMode(opcode)		((opcode & MASK_ROUNDING_MODE) >> 5)
 
 #ifdef CONFIG_FPE_NWFPE_XP
-static inline const floatx80 getExtendedConstant(const unsigned int nIndex)
+static inline __attribute_pure__ floatx80 getExtendedConstant(const unsigned int nIndex)
 {
 	extern const floatx80 floatx80Constant[];
 	return floatx80Constant[nIndex];
 }
 #endif
 
-static inline const float64 getDoubleConstant(const unsigned int nIndex)
+static inline __attribute_pure__ float64 getDoubleConstant(const unsigned int nIndex)
 {
 	extern const float64 float64Constant[];
 	return float64Constant[nIndex];
 }
 
-static inline const float32 getSingleConstant(const unsigned int nIndex)
+static inline __attribute_pure__ float32 getSingleConstant(const unsigned int nIndex)
 {
 	extern const float32 float32Constant[];
 	return float32Constant[nIndex];
diff --git a/arch/arm/nwfpe/single_cpdo.c b/arch/arm/nwfpe/single_cpdo.c
index 705808e88d9d..c66981d682cf 100644
--- a/arch/arm/nwfpe/single_cpdo.c
+++ b/arch/arm/nwfpe/single_cpdo.c
@@ -36,17 +36,17 @@ float32 float32_arccos(float32 rFm);
 float32 float32_pow(float32 rFn, float32 rFm);
 float32 float32_pol(float32 rFn, float32 rFm);
 
-static float32 float32_rsf(float32 rFn, float32 rFm)
+static float32 float32_rsf(struct roundingData *roundData, float32 rFn, float32 rFm)
 {
-	return float32_sub(rFm, rFn);
+	return float32_sub(roundData, rFm, rFn);
 }
 
-static float32 float32_rdv(float32 rFn, float32 rFm)
+static float32 float32_rdv(struct roundingData *roundData, float32 rFn, float32 rFm)
 {
-	return float32_div(rFm, rFn);
+	return float32_div(roundData, rFm, rFn);
 }
 
-static float32 (*const dyadic_single[16])(float32 rFn, float32 rFm) = {
+static float32 (*const dyadic_single[16])(struct roundingData *, float32 rFn, float32 rFm) = {
 	[ADF_CODE >> 20] = float32_add,
 	[MUF_CODE >> 20] = float32_mul,
 	[SUF_CODE >> 20] = float32_sub,
@@ -60,22 +60,22 @@ static float32 (*const dyadic_single[16])(float32 rFn, float32 rFm) = {
 	[FRD_CODE >> 20] = float32_rdv,
 };
 
-static float32 float32_mvf(float32 rFm)
+static float32 float32_mvf(struct roundingData *roundData, float32 rFm)
 {
 	return rFm;
 }
 
-static float32 float32_mnf(float32 rFm)
+static float32 float32_mnf(struct roundingData *roundData, float32 rFm)
 {
 	return rFm ^ 0x80000000;
 }
 
-static float32 float32_abs(float32 rFm)
+static float32 float32_abs(struct roundingData *roundData, float32 rFm)
 {
 	return rFm & 0x7fffffff;
 }
 
-static float32 (*const monadic_single[16])(float32 rFm) = {
+static float32 (*const monadic_single[16])(struct roundingData*, float32 rFm) = {
 	[MVF_CODE >> 20] = float32_mvf,
 	[MNF_CODE >> 20] = float32_mnf,
 	[ABS_CODE >> 20] = float32_abs,
@@ -85,7 +85,7 @@ static float32 (*const monadic_single[16])(float32 rFm) = {
 	[NRM_CODE >> 20] = float32_mvf,
 };
 
-unsigned int SingleCPDO(const unsigned int opcode, FPREG * rFd)
+unsigned int SingleCPDO(struct roundingData *roundData, const unsigned int opcode, FPREG * rFd)
 {
 	FPA11 *fpa11 = GET_FPA11();
 	float32 rFm;
@@ -108,13 +108,13 @@ unsigned int SingleCPDO(const unsigned int opcode, FPREG * rFd)
 		if (fpa11->fType[Fn] == typeSingle &&
 		    dyadic_single[opc_mask_shift]) {
 			rFn = fpa11->fpreg[Fn].fSingle;
-			rFd->fSingle = dyadic_single[opc_mask_shift](rFn, rFm);
+			rFd->fSingle = dyadic_single[opc_mask_shift](roundData, rFn, rFm);
 		} else {
 			return 0;
 		}
 	} else {
 		if (monadic_single[opc_mask_shift]) {
-			rFd->fSingle = monadic_single[opc_mask_shift](rFm);
+			rFd->fSingle = monadic_single[opc_mask_shift](roundData, rFm);
 		} else {
 			return 0;
 		}
diff --git a/arch/arm/nwfpe/softfloat.c b/arch/arm/nwfpe/softfloat.c
index e038dd3be9b3..f9f049132a17 100644
--- a/arch/arm/nwfpe/softfloat.c
+++ b/arch/arm/nwfpe/softfloat.c
@@ -36,16 +36,6 @@ this code that are retained.
 
 /*
 -------------------------------------------------------------------------------
-Floating-point rounding mode, extended double-precision rounding precision,
-and exception flags.
--------------------------------------------------------------------------------
-*/
-int8 float_rounding_mode = float_round_nearest_even;
-int8 floatx80_rounding_precision = 80;
-int8 float_exception_flags;
-
-/*
--------------------------------------------------------------------------------
 Primitive arithmetic functions, including multi-word arithmetic, and
 division and square root approximations.  (Can be specialized to target if
 desired.)
@@ -77,14 +67,14 @@ input is too large, however, the invalid exception is raised and the largest
 positive or negative integer is returned.
 -------------------------------------------------------------------------------
 */
-static int32 roundAndPackInt32( flag zSign, bits64 absZ )
+static int32 roundAndPackInt32( struct roundingData *roundData, flag zSign, bits64 absZ )
 {
     int8 roundingMode;
     flag roundNearestEven;
     int8 roundIncrement, roundBits;
     int32 z;
 
-    roundingMode = float_rounding_mode;
+    roundingMode = roundData->mode;
     roundNearestEven = ( roundingMode == float_round_nearest_even );
     roundIncrement = 0x40;
     if ( ! roundNearestEven ) {
@@ -107,10 +97,10 @@ static int32 roundAndPackInt32( flag zSign, bits64 absZ )
     z = absZ;
     if ( zSign ) z = - z;
     if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
-        float_exception_flags |= float_flag_invalid;
+        roundData->exception |= float_flag_invalid;
         return zSign ? 0x80000000 : 0x7FFFFFFF;
     }
-    if ( roundBits ) float_exception_flags |= float_flag_inexact;
+    if ( roundBits ) roundData->exception |= float_flag_inexact;
     return z;
 
 }
@@ -224,14 +214,14 @@ The handling of underflow and overflow follows the IEC/IEEE Standard for
 Binary Floating-point Arithmetic.
 -------------------------------------------------------------------------------
 */
-static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
+static float32 roundAndPackFloat32( struct roundingData *roundData, flag zSign, int16 zExp, bits32 zSig )
 {
     int8 roundingMode;
     flag roundNearestEven;
     int8 roundIncrement, roundBits;
     flag isTiny;
 
-    roundingMode = float_rounding_mode;
+    roundingMode = roundData->mode;
     roundNearestEven = ( roundingMode == float_round_nearest_even );
     roundIncrement = 0x40;
     if ( ! roundNearestEven ) {
@@ -254,7 +244,7 @@ static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
              || (    ( zExp == 0xFD )
                   && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
            ) {
-            float_raise( float_flag_overflow | float_flag_inexact );
+            roundData->exception |= float_flag_overflow | float_flag_inexact;
             return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
         }
         if ( zExp < 0 ) {
@@ -265,10 +255,10 @@ static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
             shift32RightJamming( zSig, - zExp, &zSig );
             zExp = 0;
             roundBits = zSig & 0x7F;
-            if ( isTiny && roundBits ) float_raise( float_flag_underflow );
+            if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow;
         }
     }
-    if ( roundBits ) float_exception_flags |= float_flag_inexact;
+    if ( roundBits ) roundData->exception |= float_flag_inexact;
     zSig = ( zSig + roundIncrement )>>7;
     zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
     if ( zSig == 0 ) zExp = 0;
@@ -287,12 +277,12 @@ point exponent.
 -------------------------------------------------------------------------------
 */
 static float32
- normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
+ normalizeRoundAndPackFloat32( struct roundingData *roundData, flag zSign, int16 zExp, bits32 zSig )
 {
     int8 shiftCount;
 
     shiftCount = countLeadingZeros32( zSig ) - 1;
-    return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
+    return roundAndPackFloat32( roundData, zSign, zExp - shiftCount, zSig<<shiftCount );
 
 }
 
@@ -395,14 +385,14 @@ The handling of underflow and overflow follows the IEC/IEEE Standard for
 Binary Floating-point Arithmetic.
 -------------------------------------------------------------------------------
 */
-static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
+static float64 roundAndPackFloat64( struct roundingData *roundData, flag zSign, int16 zExp, bits64 zSig )
 {
     int8 roundingMode;
     flag roundNearestEven;
     int16 roundIncrement, roundBits;
     flag isTiny;
 
-    roundingMode = float_rounding_mode;
+    roundingMode = roundData->mode;
     roundNearestEven = ( roundingMode == float_round_nearest_even );
     roundIncrement = 0x200;
     if ( ! roundNearestEven ) {
@@ -427,7 +417,7 @@ static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
            ) {
             //register int lr = __builtin_return_address(0);
             //printk("roundAndPackFloat64 called from 0x%08x\n",lr);
-            float_raise( float_flag_overflow | float_flag_inexact );
+            roundData->exception |= float_flag_overflow | float_flag_inexact;
             return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 );
         }
         if ( zExp < 0 ) {
@@ -438,10 +428,10 @@ static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
             shift64RightJamming( zSig, - zExp, &zSig );
             zExp = 0;
             roundBits = zSig & 0x3FF;
-            if ( isTiny && roundBits ) float_raise( float_flag_underflow );
+            if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow;
         }
     }
-    if ( roundBits ) float_exception_flags |= float_flag_inexact;
+    if ( roundBits ) roundData->exception |= float_flag_inexact;
     zSig = ( zSig + roundIncrement )>>10;
     zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
     if ( zSig == 0 ) zExp = 0;
@@ -460,12 +450,12 @@ point exponent.
 -------------------------------------------------------------------------------
 */
 static float64
- normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
+ normalizeRoundAndPackFloat64( struct roundingData *roundData, flag zSign, int16 zExp, bits64 zSig )
 {
     int8 shiftCount;
 
     shiftCount = countLeadingZeros64( zSig ) - 1;
-    return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount );
+    return roundAndPackFloat64( roundData, zSign, zExp - shiftCount, zSig<<shiftCount );
 
 }
 
@@ -572,14 +562,15 @@ Floating-point Arithmetic.
 */
 static floatx80
  roundAndPackFloatx80(
-     int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
+     struct roundingData *roundData, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
  )
 {
-    int8 roundingMode;
+    int8 roundingMode, roundingPrecision;
     flag roundNearestEven, increment, isTiny;
     int64 roundIncrement, roundMask, roundBits;
 
-    roundingMode = float_rounding_mode;
+    roundingMode = roundData->mode;
+    roundingPrecision = roundData->precision;
     roundNearestEven = ( roundingMode == float_round_nearest_even );
     if ( roundingPrecision == 80 ) goto precision80;
     if ( roundingPrecision == 64 ) {
@@ -623,8 +614,8 @@ static floatx80
             shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
             zExp = 0;
             roundBits = zSig0 & roundMask;
-            if ( isTiny && roundBits ) float_raise( float_flag_underflow );
-            if ( roundBits ) float_exception_flags |= float_flag_inexact;
+            if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow;
+            if ( roundBits ) roundData->exception |= float_flag_inexact;
             zSig0 += roundIncrement;
             if ( (sbits64) zSig0 < 0 ) zExp = 1;
             roundIncrement = roundMask + 1;
@@ -635,7 +626,7 @@ static floatx80
             return packFloatx80( zSign, zExp, zSig0 );
         }
     }
-    if ( roundBits ) float_exception_flags |= float_flag_inexact;
+    if ( roundBits ) roundData->exception |= float_flag_inexact;
     zSig0 += roundIncrement;
     if ( zSig0 < roundIncrement ) {
         ++zExp;
@@ -672,7 +663,7 @@ static floatx80
            ) {
             roundMask = 0;
  overflow:
-            float_raise( float_flag_overflow | float_flag_inexact );
+            roundData->exception |= float_flag_overflow | float_flag_inexact;
             if (    ( roundingMode == float_round_to_zero )
                  || ( zSign && ( roundingMode == float_round_up ) )
                  || ( ! zSign && ( roundingMode == float_round_down ) )
@@ -689,8 +680,8 @@ static floatx80
                 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
             shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
             zExp = 0;
-            if ( isTiny && zSig1 ) float_raise( float_flag_underflow );
-            if ( zSig1 ) float_exception_flags |= float_flag_inexact;
+            if ( isTiny && zSig1 ) roundData->exception |= float_flag_underflow;
+            if ( zSig1 ) roundData->exception |= float_flag_inexact;
             if ( roundNearestEven ) {
                 increment = ( (sbits64) zSig1 < 0 );
             }
@@ -710,7 +701,7 @@ static floatx80
             return packFloatx80( zSign, zExp, zSig0 );
         }
     }
-    if ( zSig1 ) float_exception_flags |= float_flag_inexact;
+    if ( zSig1 ) roundData->exception |= float_flag_inexact;
     if ( increment ) {
         ++zSig0;
         if ( zSig0 == 0 ) {
@@ -740,7 +731,7 @@ normalized.
 */
 static floatx80
  normalizeRoundAndPackFloatx80(
-     int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
+     struct roundingData *roundData, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
  )
 {
     int8 shiftCount;
@@ -754,7 +745,7 @@ static floatx80
     shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
     zExp -= shiftCount;
     return
-        roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 );
+        roundAndPackFloatx80( roundData, zSign, zExp, zSig0, zSig1 );
 
 }
 
@@ -767,14 +758,14 @@ the single-precision floating-point format.  The conversion is performed
 according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
 -------------------------------------------------------------------------------
 */
-float32 int32_to_float32( int32 a )
+float32 int32_to_float32(struct roundingData *roundData, int32 a)
 {
     flag zSign;
 
     if ( a == 0 ) return 0;
     if ( a == 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
     zSign = ( a < 0 );
-    return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );
+    return normalizeRoundAndPackFloat32( roundData, zSign, 0x9C, zSign ? - a : a );
 
 }
 
@@ -840,7 +831,7 @@ positive integer is returned.  Otherwise, if the conversion overflows, the
 largest integer with the same sign as `a' is returned.
 -------------------------------------------------------------------------------
 */
-int32 float32_to_int32( float32 a )
+int32 float32_to_int32( struct roundingData *roundData, float32 a )
 {
     flag aSign;
     int16 aExp, shiftCount;
@@ -856,7 +847,7 @@ int32 float32_to_int32( float32 a )
     zSig = aSig;
     zSig <<= 32;
     if ( 0 < shiftCount ) shift64RightJamming( zSig, shiftCount, &zSig );
-    return roundAndPackInt32( aSign, zSig );
+    return roundAndPackInt32( roundData, aSign, zSig );
 
 }
 
@@ -889,13 +880,13 @@ int32 float32_to_int32_round_to_zero( float32 a )
         return 0x80000000;
     }
     else if ( aExp <= 0x7E ) {
-        if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
+        if ( aExp | aSig ) float_raise( float_flag_inexact );
         return 0;
     }
     aSig = ( aSig | 0x00800000 )<<8;
     z = aSig>>( - shiftCount );
     if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
-        float_exception_flags |= float_flag_inexact;
+        float_raise( float_flag_inexact );
     }
     return aSign ? - z : z;
 
@@ -973,7 +964,7 @@ operation is performed according to the IEC/IEEE Standard for Binary
 Floating-point Arithmetic.
 -------------------------------------------------------------------------------
 */
-float32 float32_round_to_int( float32 a )
+float32 float32_round_to_int( struct roundingData *roundData, float32 a )
 {
     flag aSign;
     int16 aExp;
@@ -988,11 +979,12 @@ float32 float32_round_to_int( float32 a )
         }
         return a;
     }
+    roundingMode = roundData->mode;
     if ( aExp <= 0x7E ) {
         if ( (bits32) ( a<<1 ) == 0 ) return a;
-        float_exception_flags |= float_flag_inexact;
+        roundData->exception |= float_flag_inexact;
         aSign = extractFloat32Sign( a );
-        switch ( float_rounding_mode ) {
+        switch ( roundingMode ) {
          case float_round_nearest_even:
             if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
                 return packFloat32( aSign, 0x7F, 0 );
@@ -1009,7 +1001,6 @@ float32 float32_round_to_int( float32 a )
     lastBitMask <<= 0x96 - aExp;
     roundBitsMask = lastBitMask - 1;
     z = a;
-    roundingMode = float_rounding_mode;
     if ( roundingMode == float_round_nearest_even ) {
         z += lastBitMask>>1;
         if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
@@ -1020,7 +1011,7 @@ float32 float32_round_to_int( float32 a )
         }
     }
     z &= ~ roundBitsMask;
-    if ( z != a ) float_exception_flags |= float_flag_inexact;
+    if ( z != a ) roundData->exception |= float_flag_inexact;
     return z;
 
 }
@@ -1034,7 +1025,7 @@ addition is performed according to the IEC/IEEE Standard for Binary
 Floating-point Arithmetic.
 -------------------------------------------------------------------------------
 */
-static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
+static float32 addFloat32Sigs( struct roundingData *roundData, float32 a, float32 b, flag zSign )
 {
     int16 aExp, bExp, zExp;
     bits32 aSig, bSig, zSig;
@@ -1093,7 +1084,7 @@ static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
         ++zExp;
     }
  roundAndPack:
-    return roundAndPackFloat32( zSign, zExp, zSig );
+    return roundAndPackFloat32( roundData, zSign, zExp, zSig );
 
 }
 
@@ -1106,7 +1097,7 @@ result is a NaN.  The subtraction is performed according to the IEC/IEEE
 Standard for Binary Floating-point Arithmetic.
 -------------------------------------------------------------------------------
 */
-static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
+static float32 subFloat32Sigs( struct roundingData *roundData, float32 a, float32 b, flag zSign )
 {
     int16 aExp, bExp, zExp;
     bits32 aSig, bSig, zSig;
@@ -1123,7 +1114,7 @@ static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
     if ( expDiff < 0 ) goto bExpBigger;
     if ( aExp == 0xFF ) {
         if ( aSig | bSig ) return propagateFloat32NaN( a, b );
-        float_raise( float_flag_invalid );
+        roundData->exception |= float_flag_invalid;
         return float32_default_nan;
     }
     if ( aExp == 0 ) {
@@ -1132,7 +1123,7 @@ static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
     }
     if ( bSig < aSig ) goto aBigger;
     if ( aSig < bSig ) goto bBigger;
-    return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
+    return packFloat32( roundData->mode == float_round_down, 0, 0 );
  bExpBigger:
     if ( bExp == 0xFF ) {
         if ( bSig ) return propagateFloat32NaN( a, b );
@@ -1169,7 +1160,7 @@ static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
     zExp = aExp;
  normalizeRoundAndPack:
     --zExp;
-    return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
+    return normalizeRoundAndPackFloat32( roundData, zSign, zExp, zSig );
 
 }
 
@@ -1180,17 +1171,17 @@ and `b'.  The operation is performed according to the IEC/IEEE Standard for
 Binary Floating-point Arithmetic.
 -------------------------------------------------------------------------------
 */
-float32 float32_add( float32 a, float32 b )
+float32 float32_add( struct roundingData *roundData, float32 a, float32 b )
 {
     flag aSign, bSign;
 
     aSign = extractFloat32Sign( a );
     bSign = extractFloat32Sign( b );
     if ( aSign == bSign ) {
-        return addFloat32Sigs( a, b, aSign );
+        return addFloat32Sigs( roundData, a, b, aSign );
     }
     else {
-        return subFloat32Sigs( a, b, aSign );
+        return subFloat32Sigs( roundData, a, b, aSign );
     }
 
 }
@@ -1202,17 +1193,17 @@ Returns the result of subtracting the single-precision floating-point values
 for Binary Floating-point Arithmetic.
 -------------------------------------------------------------------------------
 */
-float32 float32_sub( float32 a, float32 b )
+float32 float32_sub( struct roundingData *roundData, float32 a, float32 b )
 {
     flag aSign, bSign;
 
     aSign = extractFloat32Sign( a );
     bSign = extractFloat32Sign( b );
     if ( aSign == bSign ) {
-        return subFloat32Sigs( a, b, aSign );
+        return subFloat32Sigs( roundData, a, b, aSign );
     }
     else {
-        return addFloat32Sigs( a, b, aSign );
+        return addFloat32Sigs( roundData, a, b, aSign );
     }
 
 }
@@ -1224,7 +1215,7 @@ Returns the result of multiplying the single-precision floating-point values
 for Binary Floating-point Arithmetic.
 -------------------------------------------------------------------------------
 */
-float32 float32_mul( float32 a, float32 b )
+float32 float32_mul( struct roundingData *roundData, float32 a, float32 b )
 {
     flag aSign, bSign, zSign;
     int16 aExp, bExp, zExp;
@@ -1244,7 +1235,7 @@ float32 float32_mul( float32 a, float32 b )
             return propagateFloat32NaN( a, b );
         }
         if ( ( bExp | bSig ) == 0 ) {
-            float_raise( float_flag_invalid );
+            roundData->exception |= float_flag_invalid;
             return float32_default_nan;
         }
         return packFloat32( zSign, 0xFF, 0 );
@@ -1252,7 +1243,7 @@ float32 float32_mul( float32 a, float32 b )
     if ( bExp == 0xFF ) {
         if ( bSig ) return propagateFloat32NaN( a, b );
         if ( ( aExp | aSig ) == 0 ) {
-            float_raise( float_flag_invalid );
+            roundData->exception |= float_flag_invalid;
             return float32_default_nan;
         }
         return packFloat32( zSign, 0xFF, 0 );
@@ -1274,7 +1265,7 @@ float32 float32_mul( float32 a, float32 b )
         zSig <<= 1;
         --zExp;
     }
-    return roundAndPackFloat32( zSign, zExp, zSig );
+    return roundAndPackFloat32( roundData, zSign, zExp, zSig );
 
 }
 
@@ -1285,7 +1276,7 @@ by the corresponding value `b'.  The operation is performed according to the
 IEC/IEEE Standard for Binary Floating-point Arithmetic.
 -------------------------------------------------------------------------------
 */
-float32 float32_div( float32 a, float32 b )
+float32 float32_div( struct roundingData *roundData, float32 a, float32 b )
 {
     flag aSign, bSign, zSign;
     int16 aExp, bExp, zExp;
@@ -1302,7 +1293,7 @@ float32 float32_div( float32 a, float32 b )
         if ( aSig ) return propagateFloat32NaN( a, b );
         if ( bExp == 0xFF ) {
             if ( bSig ) return propagateFloat32NaN( a, b );
-            float_raise( float_flag_invalid );
+            roundData->exception |= float_flag_invalid;
             return float32_default_nan;
         }
         return packFloat32( zSign, 0xFF, 0 );
@@ -1314,10 +1305,10 @@ float32 float32_div( float32 a, float32 b )
     if ( bExp == 0 ) {
         if ( bSig == 0 ) {
             if ( ( aExp | aSig ) == 0 ) {
-                float_raise( float_flag_invalid );
+                roundData->exception |= float_flag_invalid;
                 return float32_default_nan;
             }
-            float_raise( float_flag_divbyzero );
+            roundData->exception |= float_flag_divbyzero;
             return packFloat32( zSign, 0xFF, 0 );
         }
         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
@@ -1341,7 +1332,7 @@ float32 float32_div( float32 a, float32 b )
     if ( ( zSig & 0x3F ) == 0 ) {
         zSig |= ( ( (bits64) bSig ) * zSig != ( (bits64) aSig )<<32 );
     }
-    return roundAndPackFloat32( zSign, zExp, zSig );
+    return roundAndPackFloat32( roundData, zSign, zExp, zSig );
 
 }
 
@@ -1352,7 +1343,7 @@ with respect to the corresponding value `b'.  The operation is performed
 according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
 -------------------------------------------------------------------------------
 */
-float32 float32_rem( float32 a, float32 b )
+float32 float32_rem( struct roundingData *roundData, float32 a, float32 b )
 {
     flag aSign, bSign, zSign;
     int16 aExp, bExp, expDiff;
@@ -1372,7 +1363,7 @@ float32 float32_rem( float32 a, float32 b )
         if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
             return propagateFloat32NaN( a, b );
         }
-        float_raise( float_flag_invalid );
+        roundData->exception |= float_flag_invalid;
         return float32_default_nan;
     }
     if ( bExp == 0xFF ) {
@@ -1381,7 +1372,7 @@ float32 float32_rem( float32 a, float32 b )
     }
     if ( bExp == 0 ) {
         if ( bSig == 0 ) {
-            float_raise( float_flag_invalid );
+            roundData->exception |= float_flag_invalid;
             return float32_default_nan;
         }
         normalizeFloat32Subnormal( bSig, &bExp, &bSig );
@@ -1444,7 +1435,7 @@ float32 float32_rem( float32 a, float32 b )
     }
     zSign = ( (sbits32) aSig < 0 );
     if ( zSign ) aSig = - aSig;
-    return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
+    return normalizeRoundAndPackFloat32( roundData, aSign ^ zSign, bExp, aSig );
 
 }
 
@@ -1455,7 +1446,7 @@ The operation is performed according to the IEC/IEEE Standard for Binary
 Floating-point Arithmetic.
 -------------------------------------------------------------------------------
 */
-float32 float32_sqrt( float32 a )
+float32 float32_sqrt( struct roundingData *roundData, float32 a )
 {
     flag aSign;
     int16 aExp, zExp;
@@ -1468,12 +1459,12 @@ float32 float32_sqrt( float32 a )
     if ( aExp == 0xFF ) {
         if ( aSig ) return propagateFloat32NaN( a, 0 );
         if ( ! aSign ) return a;
-        float_raise( float_flag_invalid );
+        roundData->exception |= float_flag_invalid;
         return float32_default_nan;
     }
     if ( aSign ) {
         if ( ( aExp | aSig ) == 0 ) return a;
-        float_raise( float_flag_invalid );
+        roundData->exception |= float_flag_invalid;
         return float32_default_nan;
     }
     if ( aExp == 0 ) {
@@ -1499,7 +1490,7 @@ float32 float32_sqrt( float32 a )
         }
     }
     shift32RightJamming( zSig, 1, &zSig );
-    return roundAndPackFloat32( 0, zExp, zSig );
+    return roundAndPackFloat32( roundData, 0, zExp, zSig );
 
 }
 
@@ -1611,9 +1602,7 @@ flag float32_le_quiet( float32 a, float32 b )
     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
        ) {
-        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
-            float_raise( float_flag_invalid );
-        }
+        /* Do nothing, even if NaN as we're quiet */
         return 0;
     }
     aSign = extractFloat32Sign( a );
@@ -1638,9 +1627,7 @@ flag float32_lt_quiet( float32 a, float32 b )
     if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
          || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
        ) {
-        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
-            float_raise( float_flag_invalid );
-        }
+        /* Do nothing, even if NaN as we're quiet */
         return 0;
     }
     aSign = extractFloat32Sign( a );
@@ -1661,7 +1648,7 @@ positive integer is returned.  Otherwise, if the conversion overflows, the
 largest integer with the same sign as `a' is returned.
 -------------------------------------------------------------------------------
 */
-int32 float64_to_int32( float64 a )
+int32 float64_to_int32( struct roundingData *roundData, float64 a )
 {
     flag aSign;
     int16 aExp, shiftCount;
@@ -1674,7 +1661,7 @@ int32 float64_to_int32( float64 a )
     if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
     shiftCount = 0x42C - aExp;
     if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
-    return roundAndPackInt32( aSign, aSig );
+    return roundAndPackInt32( roundData, aSign, aSig );
 
 }
 
@@ -1705,7 +1692,7 @@ int32 float64_to_int32_round_to_zero( float64 a )
         goto invalid;
     }
     else if ( 52 < shiftCount ) {
-        if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
+        if ( aExp || aSig ) float_raise( float_flag_inexact );
         return 0;
     }
     aSig |= LIT64( 0x0010000000000000 );
@@ -1715,11 +1702,11 @@ int32 float64_to_int32_round_to_zero( float64 a )
     if ( aSign ) z = - z;
     if ( ( z < 0 ) ^ aSign ) {
  invalid:
-        float_exception_flags |= float_flag_invalid;
+        float_raise( float_flag_invalid );
         return aSign ? 0x80000000 : 0x7FFFFFFF;
     }
     if ( ( aSig<<shiftCount ) != savedASig ) {
-        float_exception_flags |= float_flag_inexact;
+        float_raise( float_flag_inexact );
     }
     return z;
 
@@ -1736,7 +1723,7 @@ positive integer is returned.  Otherwise, if the conversion overflows, the
 largest positive integer is returned.
 -------------------------------------------------------------------------------
 */
-int32 float64_to_uint32( float64 a )
+int32 float64_to_uint32( struct roundingData *roundData, float64 a )
 {
     flag aSign;
     int16 aExp, shiftCount;
@@ -1749,7 +1736,7 @@ int32 float64_to_uint32( float64 a )
     if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
     shiftCount = 0x42C - aExp;
     if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
-    return roundAndPackInt32( aSign, aSig );
+    return roundAndPackInt32( roundData, aSign, aSig );
 }
 
 /*
@@ -1778,7 +1765,7 @@ int32 float64_to_uint32_round_to_zero( float64 a )
         goto invalid;
     }
     else if ( 52 < shiftCount ) {
-        if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
+        if ( aExp || aSig ) float_raise( float_flag_inexact );
         return 0;
     }
     aSig |= LIT64( 0x0010000000000000 );
@@ -1788,11 +1775,11 @@ int32 float64_to_uint32_round_to_zero( float64 a )
     if ( aSign ) z = - z;
     if ( ( z < 0 ) ^ aSign ) {
  invalid:
-        float_exception_flags |= float_flag_invalid;
+        float_raise( float_flag_invalid );
         return aSign ? 0x80000000 : 0x7FFFFFFF;
     }
     if ( ( aSig<<shiftCount ) != savedASig ) {
-        float_exception_flags |= float_flag_inexact;
+        float_raise( float_flag_inexact );
     }
     return z;
 }
@@ -1805,7 +1792,7 @@ performed according to the IEC/IEEE Standard for Binary Floating-point
 Arithmetic.
 -------------------------------------------------------------------------------
 */
-float32 float64_to_float32( float64 a )
+float32 float64_to_float32( struct roundingData *roundData, float64 a )
 {
     flag aSign;
     int16 aExp;
@@ -1825,7 +1812,7 @@ float32 float64_to_float32( float64 a )
         zSig |= 0x40000000;
         aExp -= 0x381;
     }
-    return roundAndPackFloat32( aSign, aExp, zSig );
+    return roundAndPackFloat32( roundData, aSign, aExp, zSig );
 
 }
 
@@ -1872,7 +1859,7 @@ operation is performed according to the IEC/IEEE Standard for Binary
 Floating-point Arithmetic.
 -------------------------------------------------------------------------------
 */
-float64 float64_round_to_int( float64 a )
+float64 float64_round_to_int( struct roundingData *roundData, float64 a )
 {
     flag aSign;
     int16 aExp;
@@ -1889,9 +1876,9 @@ float64 float64_round_to_int( float64 a )
     }
     if ( aExp <= 0x3FE ) {
         if ( (bits64) ( a<<1 ) == 0 ) return a;
-        float_exception_flags |= float_flag_inexact;
+        roundData->exception |= float_flag_inexact;
         aSign = extractFloat64Sign( a );
-        switch ( float_rounding_mode ) {
+        switch ( roundData->mode ) {
          case float_round_nearest_even:
             if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
                 return packFloat64( aSign, 0x3FF, 0 );
@@ -1909,7 +1896,7 @@ float64 float64_round_to_int( float64 a )
     lastBitMask <<= 0x433 - aExp;
     roundBitsMask = lastBitMask - 1;
     z = a;
-    roundingMode = float_rounding_mode;
+    roundingMode = roundData->mode;
     if ( roundingMode == float_round_nearest_even ) {
         z += lastBitMask>>1;
         if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
@@ -1920,7 +1907,7 @@ float64 float64_round_to_int( float64 a )
         }
     }
     z &= ~ roundBitsMask;
-    if ( z != a ) float_exception_flags |= float_flag_inexact;
+    if ( z != a ) roundData->exception |= float_flag_inexact;
     return z;
 
 }
@@ -1934,7 +1921,7 @@ addition is performed according to the IEC/IEEE Standard for Binary
 Floating-point Arithmetic.
 -------------------------------------------------------------------------------
 */
-static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
+static float64 addFloat64Sigs( struct roundingData *roundData, float64 a, float64 b, flag zSign )
 {
     int16 aExp, bExp, zExp;
     bits64 aSig, bSig, zSig;
@@ -1993,7 +1980,7 @@ static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
         ++zExp;
     }
  roundAndPack:
-    return roundAndPackFloat64( zSign, zExp, zSig );
+    return roundAndPackFloat64( roundData, zSign, zExp, zSig );
 
 }
 
@@ -2006,7 +1993,7 @@ result is a NaN.  The subtraction is performed according to the IEC/IEEE
 Standard for Binary Floating-point Arithmetic.
 -------------------------------------------------------------------------------
 */
-static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
+static float64 subFloat64Sigs( struct roundingData *roundData, float64 a, float64 b, flag zSign )
 {
     int16 aExp, bExp, zExp;
     bits64 aSig, bSig, zSig;
@@ -2023,7 +2010,7 @@ static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
     if ( expDiff < 0 ) goto bExpBigger;
     if ( aExp == 0x7FF ) {
         if ( aSig | bSig ) return propagateFloat64NaN( a, b );
-        float_raise( float_flag_invalid );
+        roundData->exception |= float_flag_invalid;
         return float64_default_nan;
     }
     if ( aExp == 0 ) {
@@ -2032,7 +2019,7 @@ static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
     }
     if ( bSig < aSig ) goto aBigger;
     if ( aSig < bSig ) goto bBigger;
-    return packFloat64( float_rounding_mode == float_round_down, 0, 0 );
+    return packFloat64( roundData->mode == float_round_down, 0, 0 );
  bExpBigger:
     if ( bExp == 0x7FF ) {
         if ( bSig ) return propagateFloat64NaN( a, b );
@@ -2069,7 +2056,7 @@ static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
     zExp = aExp;
  normalizeRoundAndPack:
     --zExp;
-    return normalizeRoundAndPackFloat64( zSign, zExp, zSig );
+    return normalizeRoundAndPackFloat64( roundData, zSign, zExp, zSig );
 
 }
 
@@ -2080,17 +2067,17 @@ and `b'.  The operation is performed according to the IEC/IEEE Standard for
 Binary Floating-point Arithmetic.
 -------------------------------------------------------------------------------
 */
-float64 float64_add( float64 a, float64 b )
+float64 float64_add( struct roundingData *roundData, float64 a, float64 b )
 {
     flag aSign, bSign;
 
     aSign = extractFloat64Sign( a );
     bSign = extractFloat64Sign( b );
     if ( aSign == bSign ) {
-        return addFloat64Sigs( a, b, aSign );
+        return addFloat64Sigs( roundData, a, b, aSign );
     }
     else {
-        return subFloat64Sigs( a, b, aSign );
+        return subFloat64Sigs( roundData, a, b, aSign );
     }
 
 }
@@ -2102,17 +2089,17 @@ Returns the result of subtracting the double-precision floating-point values
 for Binary Floating-point Arithmetic.
 -------------------------------------------------------------------------------
 */
-float64 float64_sub( float64 a, float64 b )
+float64 float64_sub( struct roundingData *roundData, float64 a, float64 b )
 {
     flag aSign, bSign;
 
     aSign = extractFloat64Sign( a );
     bSign = extractFloat64Sign( b );
     if ( aSign == bSign ) {
-        return subFloat64Sigs( a, b, aSign );
+        return subFloat64Sigs( roundData, a, b, aSign );
     }
     else {
-        return addFloat64Sigs( a, b, aSign );
+        return addFloat64Sigs( roundData, a, b, aSign );
     }
 
 }
@@ -2124,7 +2111,7 @@ Returns the result of multiplying the double-precision floating-point values
 for Binary Floating-point Arithmetic.
 -------------------------------------------------------------------------------
 */
-float64 float64_mul( float64 a, float64 b )
+float64 float64_mul( struct roundingData *roundData, float64 a, float64 b )
 {
     flag aSign, bSign, zSign;
     int16 aExp, bExp, zExp;
@@ -2142,7 +2129,7 @@ float64 float64_mul( float64 a, float64 b )
             return propagateFloat64NaN( a, b );
         }
         if ( ( bExp | bSig ) == 0 ) {
-            float_raise( float_flag_invalid );
+            roundData->exception |= float_flag_invalid;
             return float64_default_nan;
         }
         return packFloat64( zSign, 0x7FF, 0 );
@@ -2150,7 +2137,7 @@ float64 float64_mul( float64 a, float64 b )
     if ( bExp == 0x7FF ) {
         if ( bSig ) return propagateFloat64NaN( a, b );
         if ( ( aExp | aSig ) == 0 ) {
-            float_raise( float_flag_invalid );
+            roundData->exception |= float_flag_invalid;
             return float64_default_nan;
         }
         return packFloat64( zSign, 0x7FF, 0 );
@@ -2172,7 +2159,7 @@ float64 float64_mul( float64 a, float64 b )
         zSig0 <<= 1;
         --zExp;
     }
-    return roundAndPackFloat64( zSign, zExp, zSig0 );
+    return roundAndPackFloat64( roundData, zSign, zExp, zSig0 );
 
 }
 
@@ -2183,7 +2170,7 @@ by the corresponding value `b'.  The operation is performed according to
 the IEC/IEEE Standard for Binary Floating-point Arithmetic.
 -------------------------------------------------------------------------------
 */
-float64 float64_div( float64 a, float64 b )
+float64 float64_div( struct roundingData *roundData, float64 a, float64 b )
 {
     flag aSign, bSign, zSign;
     int16 aExp, bExp, zExp;
@@ -2202,7 +2189,7 @@ float64 float64_div( float64 a, float64 b )
         if ( aSig ) return propagateFloat64NaN( a, b );
         if ( bExp == 0x7FF ) {
             if ( bSig ) return propagateFloat64NaN( a, b );
-            float_raise( float_flag_invalid );
+            roundData->exception |= float_flag_invalid;
             return float64_default_nan;
         }
         return packFloat64( zSign, 0x7FF, 0 );
@@ -2214,10 +2201,10 @@ float64 float64_div( float64 a, float64 b )
     if ( bExp == 0 ) {
         if ( bSig == 0 ) {
             if ( ( aExp | aSig ) == 0 ) {
-                float_raise( float_flag_invalid );
+                roundData->exception |= float_flag_invalid;
                 return float64_default_nan;
             }
-            float_raise( float_flag_divbyzero );
+            roundData->exception |= float_flag_divbyzero;
             return packFloat64( zSign, 0x7FF, 0 );
         }
         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
@@ -2243,7 +2230,7 @@ float64 float64_div( float64 a, float64 b )
         }
         zSig |= ( rem1 != 0 );
     }
-    return roundAndPackFloat64( zSign, zExp, zSig );
+    return roundAndPackFloat64( roundData, zSign, zExp, zSig );
 
 }
 
@@ -2254,7 +2241,7 @@ with respect to the corresponding value `b'.  The operation is performed
 according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
 -------------------------------------------------------------------------------
 */
-float64 float64_rem( float64 a, float64 b )
+float64 float64_rem( struct roundingData *roundData, float64 a, float64 b )
 {
     flag aSign, bSign, zSign;
     int16 aExp, bExp, expDiff;
@@ -2272,7 +2259,7 @@ float64 float64_rem( float64 a, float64 b )
         if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
             return propagateFloat64NaN( a, b );
         }
-        float_raise( float_flag_invalid );
+        roundData->exception |= float_flag_invalid;
         return float64_default_nan;
     }
     if ( bExp == 0x7FF ) {
@@ -2281,7 +2268,7 @@ float64 float64_rem( float64 a, float64 b )
     }
     if ( bExp == 0 ) {
         if ( bSig == 0 ) {
-            float_raise( float_flag_invalid );
+            roundData->exception |= float_flag_invalid;
             return float64_default_nan;
         }
         normalizeFloat64Subnormal( bSig, &bExp, &bSig );
@@ -2329,7 +2316,7 @@ float64 float64_rem( float64 a, float64 b )
     }
     zSign = ( (sbits64) aSig < 0 );
     if ( zSign ) aSig = - aSig;
-    return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig );
+    return normalizeRoundAndPackFloat64( roundData, aSign ^ zSign, bExp, aSig );
 
 }
 
@@ -2340,7 +2327,7 @@ The operation is performed according to the IEC/IEEE Standard for Binary
 Floating-point Arithmetic.
 -------------------------------------------------------------------------------
 */
-float64 float64_sqrt( float64 a )
+float64 float64_sqrt( struct roundingData *roundData, float64 a )
 {
     flag aSign;
     int16 aExp, zExp;
@@ -2354,12 +2341,12 @@ float64 float64_sqrt( float64 a )
     if ( aExp == 0x7FF ) {
         if ( aSig ) return propagateFloat64NaN( a, a );
         if ( ! aSign ) return a;
-        float_raise( float_flag_invalid );
+        roundData->exception |= float_flag_invalid;
         return float64_default_nan;
     }
     if ( aSign ) {
         if ( ( aExp | aSig ) == 0 ) return a;
-        float_raise( float_flag_invalid );
+        roundData->exception |= float_flag_invalid;
         return float64_default_nan;
     }
     if ( aExp == 0 ) {
@@ -2390,7 +2377,7 @@ float64 float64_sqrt( float64 a )
         }
     }
     shift64RightJamming( zSig, 1, &zSig );
-    return roundAndPackFloat64( 0, zExp, zSig );
+    return roundAndPackFloat64( roundData, 0, zExp, zSig );
 
 }
 
@@ -2502,9 +2489,7 @@ flag float64_le_quiet( float64 a, float64 b )
     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
        ) {
-        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
-            float_raise( float_flag_invalid );
-        }
+        /* Do nothing, even if NaN as we're quiet */
         return 0;
     }
     aSign = extractFloat64Sign( a );
@@ -2529,9 +2514,7 @@ flag float64_lt_quiet( float64 a, float64 b )
     if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
          || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
        ) {
-        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
-            float_raise( float_flag_invalid );
-        }
+        /* Do nothing, even if NaN as we're quiet */
         return 0;
     }
     aSign = extractFloat64Sign( a );
@@ -2554,7 +2537,7 @@ largest positive integer is returned.  Otherwise, if the conversion
 overflows, the largest integer with the same sign as `a' is returned.
 -------------------------------------------------------------------------------
 */
-int32 floatx80_to_int32( floatx80 a )
+int32 floatx80_to_int32( struct roundingData *roundData, floatx80 a )
 {
     flag aSign;
     int32 aExp, shiftCount;
@@ -2567,7 +2550,7 @@ int32 floatx80_to_int32( floatx80 a )
     shiftCount = 0x4037 - aExp;
     if ( shiftCount <= 0 ) shiftCount = 1;
     shift64RightJamming( aSig, shiftCount, &aSig );
-    return roundAndPackInt32( aSign, aSig );
+    return roundAndPackInt32( roundData, aSign, aSig );
 
 }
 
@@ -2598,7 +2581,7 @@ int32 floatx80_to_int32_round_to_zero( floatx80 a )
         goto invalid;
     }
     else if ( 63 < shiftCount ) {
-        if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
+        if ( aExp || aSig ) float_raise( float_flag_inexact );
         return 0;
     }
     savedASig = aSig;
@@ -2607,11 +2590,11 @@ int32 floatx80_to_int32_round_to_zero( floatx80 a )
     if ( aSign ) z = - z;
     if ( ( z < 0 ) ^ aSign ) {
  invalid:
-        float_exception_flags |= float_flag_invalid;
+        float_raise( float_flag_invalid );
         return aSign ? 0x80000000 : 0x7FFFFFFF;
     }
     if ( ( aSig<<shiftCount ) != savedASig ) {
-        float_exception_flags |= float_flag_inexact;
+        float_raise( float_flag_inexact );
     }
     return z;
 
@@ -2625,7 +2608,7 @@ conversion is performed according to the IEC/IEEE Standard for Binary
 Floating-point Arithmetic.
 -------------------------------------------------------------------------------
 */
-float32 floatx80_to_float32( floatx80 a )
+float32 floatx80_to_float32( struct roundingData *roundData, floatx80 a )
 {
     flag aSign;
     int32 aExp;
@@ -2642,7 +2625,7 @@ float32 floatx80_to_float32( floatx80 a )
     }
     shift64RightJamming( aSig, 33, &aSig );
     if ( aExp || aSig ) aExp -= 0x3F81;
-    return roundAndPackFloat32( aSign, aExp, aSig );
+    return roundAndPackFloat32( roundData, aSign, aExp, aSig );
 
 }
 
@@ -2654,7 +2637,7 @@ conversion is performed according to the IEC/IEEE Standard for Binary
 Floating-point Arithmetic.
 -------------------------------------------------------------------------------
 */
-float64 floatx80_to_float64( floatx80 a )
+float64 floatx80_to_float64( struct roundingData *roundData, floatx80 a )
 {
     flag aSign;
     int32 aExp;
@@ -2671,7 +2654,7 @@ float64 floatx80_to_float64( floatx80 a )
     }
     shift64RightJamming( aSig, 1, &zSig );
     if ( aExp || aSig ) aExp -= 0x3C01;
-    return roundAndPackFloat64( aSign, aExp, zSig );
+    return roundAndPackFloat64( roundData, aSign, aExp, zSig );
 
 }
 
@@ -2683,7 +2666,7 @@ value.  The operation is performed according to the IEC/IEEE Standard for
 Binary Floating-point Arithmetic.
 -------------------------------------------------------------------------------
 */
-floatx80 floatx80_round_to_int( floatx80 a )
+floatx80 floatx80_round_to_int( struct roundingData *roundData, floatx80 a )
 {
     flag aSign;
     int32 aExp;
@@ -2703,9 +2686,9 @@ floatx80 floatx80_round_to_int( floatx80 a )
              && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
             return a;
         }
-        float_exception_flags |= float_flag_inexact;
+        roundData->exception |= float_flag_inexact;
         aSign = extractFloatx80Sign( a );
-        switch ( float_rounding_mode ) {
+        switch ( roundData->mode ) {
          case float_round_nearest_even:
             if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
                ) {
@@ -2729,7 +2712,7 @@ floatx80 floatx80_round_to_int( floatx80 a )
     lastBitMask <<= 0x403E - aExp;
     roundBitsMask = lastBitMask - 1;
     z = a;
-    roundingMode = float_rounding_mode;
+    roundingMode = roundData->mode;
     if ( roundingMode == float_round_nearest_even ) {
         z.low += lastBitMask>>1;
         if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
@@ -2744,7 +2727,7 @@ floatx80 floatx80_round_to_int( floatx80 a )
         ++z.high;
         z.low = LIT64( 0x8000000000000000 );
     }
-    if ( z.low != a.low ) float_exception_flags |= float_flag_inexact;
+    if ( z.low != a.low ) roundData->exception |= float_flag_inexact;
     return z;
 
 }
@@ -2758,7 +2741,7 @@ The addition is performed according to the IEC/IEEE Standard for Binary
 Floating-point Arithmetic.
 -------------------------------------------------------------------------------
 */
-static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
+static floatx80 addFloatx80Sigs( struct roundingData *roundData, floatx80 a, floatx80 b, flag zSign )
 {
     int32 aExp, bExp, zExp;
     bits64 aSig, bSig, zSig0, zSig1;
@@ -2814,7 +2797,7 @@ static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
  roundAndPack:
     return
         roundAndPackFloatx80(
-            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+            roundData, zSign, zExp, zSig0, zSig1 );
 
 }
 
@@ -2827,7 +2810,7 @@ result is a NaN.  The subtraction is performed according to the IEC/IEEE
 Standard for Binary Floating-point Arithmetic.
 -------------------------------------------------------------------------------
 */
-static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
+static floatx80 subFloatx80Sigs( struct roundingData *roundData, floatx80 a, floatx80 b, flag zSign )
 {
     int32 aExp, bExp, zExp;
     bits64 aSig, bSig, zSig0, zSig1;
@@ -2845,7 +2828,7 @@ static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
         if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
             return propagateFloatx80NaN( a, b );
         }
-        float_raise( float_flag_invalid );
+        roundData->exception |= float_flag_invalid;
         z.low = floatx80_default_nan_low;
         z.high = floatx80_default_nan_high;
         return z;
@@ -2857,7 +2840,7 @@ static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
     zSig1 = 0;
     if ( bSig < aSig ) goto aBigger;
     if ( aSig < bSig ) goto bBigger;
-    return packFloatx80( float_rounding_mode == float_round_down, 0, 0 );
+    return packFloatx80( roundData->mode == float_round_down, 0, 0 );
  bExpBigger:
     if ( bExp == 0x7FFF ) {
         if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
@@ -2883,7 +2866,7 @@ static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
  normalizeRoundAndPack:
     return
         normalizeRoundAndPackFloatx80(
-            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+            roundData, zSign, zExp, zSig0, zSig1 );
 
 }
 
@@ -2894,17 +2877,17 @@ values `a' and `b'.  The operation is performed according to the IEC/IEEE
 Standard for Binary Floating-point Arithmetic.
 -------------------------------------------------------------------------------
 */
-floatx80 floatx80_add( floatx80 a, floatx80 b )
+floatx80 floatx80_add( struct roundingData *roundData, floatx80 a, floatx80 b )
 {
     flag aSign, bSign;
     
     aSign = extractFloatx80Sign( a );
     bSign = extractFloatx80Sign( b );
     if ( aSign == bSign ) {
-        return addFloatx80Sigs( a, b, aSign );
+        return addFloatx80Sigs( roundData, a, b, aSign );
     }
     else {
-        return subFloatx80Sigs( a, b, aSign );
+        return subFloatx80Sigs( roundData, a, b, aSign );
     }
     
 }
@@ -2916,17 +2899,17 @@ point values `a' and `b'.  The operation is performed according to the
 IEC/IEEE Standard for Binary Floating-point Arithmetic.
 -------------------------------------------------------------------------------
 */
-floatx80 floatx80_sub( floatx80 a, floatx80 b )
+floatx80 floatx80_sub( struct roundingData *roundData, floatx80 a, floatx80 b )
 {
     flag aSign, bSign;
 
     aSign = extractFloatx80Sign( a );
     bSign = extractFloatx80Sign( b );
     if ( aSign == bSign ) {
-        return subFloatx80Sigs( a, b, aSign );
+        return subFloatx80Sigs( roundData, a, b, aSign );
     }
     else {
-        return addFloatx80Sigs( a, b, aSign );
+        return addFloatx80Sigs( roundData, a, b, aSign );
     }
 
 }
@@ -2938,7 +2921,7 @@ point values `a' and `b'.  The operation is performed according to the
 IEC/IEEE Standard for Binary Floating-point Arithmetic.
 -------------------------------------------------------------------------------
 */
-floatx80 floatx80_mul( floatx80 a, floatx80 b )
+floatx80 floatx80_mul( struct roundingData *roundData, floatx80 a, floatx80 b )
 {
     flag aSign, bSign, zSign;
     int32 aExp, bExp, zExp;
@@ -2964,7 +2947,7 @@ floatx80 floatx80_mul( floatx80 a, floatx80 b )
         if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
         if ( ( aExp | aSig ) == 0 ) {
  invalid:
-            float_raise( float_flag_invalid );
+            roundData->exception |= float_flag_invalid;
             z.low = floatx80_default_nan_low;
             z.high = floatx80_default_nan_high;
             return z;
@@ -2987,7 +2970,7 @@ floatx80 floatx80_mul( floatx80 a, floatx80 b )
     }
     return
         roundAndPackFloatx80(
-            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+            roundData, zSign, zExp, zSig0, zSig1 );
 
 }
 
@@ -2998,7 +2981,7 @@ value `a' by the corresponding value `b'.  The operation is performed
 according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
 -------------------------------------------------------------------------------
 */
-floatx80 floatx80_div( floatx80 a, floatx80 b )
+floatx80 floatx80_div( struct roundingData *roundData, floatx80 a, floatx80 b )
 {
     flag aSign, bSign, zSign;
     int32 aExp, bExp, zExp;
@@ -3029,12 +3012,12 @@ floatx80 floatx80_div( floatx80 a, floatx80 b )
         if ( bSig == 0 ) {
             if ( ( aExp | aSig ) == 0 ) {
  invalid:
-                float_raise( float_flag_invalid );
+                roundData->exception |= float_flag_invalid;
                 z.low = floatx80_default_nan_low;
                 z.high = floatx80_default_nan_high;
                 return z;
             }
-            float_raise( float_flag_divbyzero );
+            roundData->exception |= float_flag_divbyzero;
             return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
         }
         normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
@@ -3068,7 +3051,7 @@ floatx80 floatx80_div( floatx80 a, floatx80 b )
     }
     return
         roundAndPackFloatx80(
-            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
+            roundData, zSign, zExp, zSig0, zSig1 );
 
 }
 
@@ -3079,7 +3062,7 @@ Returns the remainder of the extended double-precision floating-point value
 according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
 -------------------------------------------------------------------------------
 */
-floatx80 floatx80_rem( floatx80 a, floatx80 b )
+floatx80 floatx80_rem( struct roundingData *roundData, floatx80 a, floatx80 b )
 {
     flag aSign, bSign, zSign;
     int32 aExp, bExp, expDiff;
@@ -3107,7 +3090,7 @@ floatx80 floatx80_rem( floatx80 a, floatx80 b )
     if ( bExp == 0 ) {
         if ( bSig == 0 ) {
  invalid:
-            float_raise( float_flag_invalid );
+            roundData->exception |= float_flag_invalid;
             z.low = floatx80_default_nan_low;
             z.high = floatx80_default_nan_high;
             return z;
@@ -3164,9 +3147,10 @@ floatx80 floatx80_rem( floatx80 a, floatx80 b )
         aSig1 = alternateASig1;
         zSign = ! zSign;
     }
+
     return
         normalizeRoundAndPackFloatx80(
-            80, zSign, bExp + expDiff, aSig0, aSig1 );
+            roundData, zSign, bExp + expDiff, aSig0, aSig1 );
 
 }
 
@@ -3177,7 +3161,7 @@ value `a'.  The operation is performed according to the IEC/IEEE Standard
 for Binary Floating-point Arithmetic.
 -------------------------------------------------------------------------------
 */
-floatx80 floatx80_sqrt( floatx80 a )
+floatx80 floatx80_sqrt( struct roundingData *roundData, floatx80 a )
 {
     flag aSign;
     int32 aExp, zExp;
@@ -3197,7 +3181,7 @@ floatx80 floatx80_sqrt( floatx80 a )
     if ( aSign ) {
         if ( ( aExp | aSig0 ) == 0 ) return a;
  invalid:
-        float_raise( float_flag_invalid );
+        roundData->exception |= float_flag_invalid;
         z.low = floatx80_default_nan_low;
         z.high = floatx80_default_nan_high;
         return z;
@@ -3242,7 +3226,7 @@ floatx80 floatx80_sqrt( floatx80 a )
     }
     return
         roundAndPackFloatx80(
-            floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );
+            roundData, 0, zExp, zSig0, zSig1 );
 
 }
 
@@ -3390,10 +3374,7 @@ flag floatx80_le_quiet( floatx80 a, floatx80 b )
          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
        ) {
-        if (    floatx80_is_signaling_nan( a )
-             || floatx80_is_signaling_nan( b ) ) {
-            float_raise( float_flag_invalid );
-        }
+        /* Do nothing, even if NaN as we're quiet */
         return 0;
     }
     aSign = extractFloatx80Sign( a );
@@ -3427,10 +3408,7 @@ flag floatx80_lt_quiet( floatx80 a, floatx80 b )
          || (    ( extractFloatx80Exp( b ) == 0x7FFF )
               && (bits64) ( extractFloatx80Frac( b )<<1 ) )
        ) {
-        if (    floatx80_is_signaling_nan( a )
-             || floatx80_is_signaling_nan( b ) ) {
-            float_raise( float_flag_invalid );
-        }
+        /* Do nothing, even if NaN as we're quiet */
         return 0;
     }
     aSign = extractFloatx80Sign( a );
diff --git a/arch/arm/nwfpe/softfloat.h b/arch/arm/nwfpe/softfloat.h
index 1e1743173899..1c8799b9ee4d 100644
--- a/arch/arm/nwfpe/softfloat.h
+++ b/arch/arm/nwfpe/softfloat.h
@@ -74,7 +74,7 @@ enum {
 Software IEC/IEEE floating-point rounding mode.
 -------------------------------------------------------------------------------
 */
-extern signed char float_rounding_mode;
+//extern int8 float_rounding_mode;
 enum {
     float_round_nearest_even = 0,
     float_round_to_zero      = 1,
@@ -86,7 +86,6 @@ enum {
 -------------------------------------------------------------------------------
 Software IEC/IEEE floating-point exception flags.
 -------------------------------------------------------------------------------
-extern signed char float_exception_flags;
 enum {
     float_flag_inexact   =  1,
     float_flag_underflow =  2,
@@ -99,7 +98,6 @@ ScottB: November 4, 1998
 Changed the enumeration to match the bit order in the FPA11.
 */
 
-extern signed char float_exception_flags;
 enum {
     float_flag_invalid   =  1,
     float_flag_divbyzero =  2,
@@ -121,7 +119,7 @@ void float_raise( signed char );
 Software IEC/IEEE integer-to-floating-point conversion routines.
 -------------------------------------------------------------------------------
 */
-float32 int32_to_float32( signed int );
+float32 int32_to_float32( struct roundingData *, signed int );
 float64 int32_to_float64( signed int );
 #ifdef FLOATX80
 floatx80 int32_to_floatx80( signed int );
@@ -132,7 +130,7 @@ floatx80 int32_to_floatx80( signed int );
 Software IEC/IEEE single-precision conversion routines.
 -------------------------------------------------------------------------------
 */
-signed int float32_to_int32( float32 );
+signed int float32_to_int32( struct roundingData *, float32 );
 signed int float32_to_int32_round_to_zero( float32 );
 float64 float32_to_float64( float32 );
 #ifdef FLOATX80
@@ -144,13 +142,13 @@ floatx80 float32_to_floatx80( float32 );
 Software IEC/IEEE single-precision operations.
 -------------------------------------------------------------------------------
 */
-float32 float32_round_to_int( float32 );
-float32 float32_add( float32, float32 );
-float32 float32_sub( float32, float32 );
-float32 float32_mul( float32, float32 );
-float32 float32_div( float32, float32 );
-float32 float32_rem( float32, float32 );
-float32 float32_sqrt( float32 );
+float32 float32_round_to_int( struct roundingData*, float32 );
+float32 float32_add( struct roundingData *, float32, float32 );
+float32 float32_sub( struct roundingData *, float32, float32 );
+float32 float32_mul( struct roundingData *, float32, float32 );
+float32 float32_div( struct roundingData *, float32, float32 );
+float32 float32_rem( struct roundingData *, float32, float32 );
+float32 float32_sqrt( struct roundingData*, float32 );
 char float32_eq( float32, float32 );
 char float32_le( float32, float32 );
 char float32_lt( float32, float32 );
@@ -164,9 +162,9 @@ char float32_is_signaling_nan( float32 );
 Software IEC/IEEE double-precision conversion routines.
 -------------------------------------------------------------------------------
 */
-signed int float64_to_int32( float64 );
+signed int float64_to_int32( struct roundingData *, float64 );
 signed int float64_to_int32_round_to_zero( float64 );
-float32 float64_to_float32( float64 );
+float32 float64_to_float32( struct roundingData *, float64 );
 #ifdef FLOATX80
 floatx80 float64_to_floatx80( float64 );
 #endif
@@ -176,13 +174,13 @@ floatx80 float64_to_floatx80( float64 );
 Software IEC/IEEE double-precision operations.
 -------------------------------------------------------------------------------
 */
-float64 float64_round_to_int( float64 );
-float64 float64_add( float64, float64 );
-float64 float64_sub( float64, float64 );
-float64 float64_mul( float64, float64 );
-float64 float64_div( float64, float64 );
-float64 float64_rem( float64, float64 );
-float64 float64_sqrt( float64 );
+float64 float64_round_to_int( struct roundingData *, float64 );
+float64 float64_add( struct roundingData *, float64, float64 );
+float64 float64_sub( struct roundingData *, float64, float64 );
+float64 float64_mul( struct roundingData *, float64, float64 );
+float64 float64_div( struct roundingData *, float64, float64 );
+float64 float64_rem( struct roundingData *, float64, float64 );
+float64 float64_sqrt( struct roundingData *, float64 );
 char float64_eq( float64, float64 );
 char float64_le( float64, float64 );
 char float64_lt( float64, float64 );
@@ -198,31 +196,23 @@ char float64_is_signaling_nan( float64 );
 Software IEC/IEEE extended double-precision conversion routines.
 -------------------------------------------------------------------------------
 */
-signed int floatx80_to_int32( floatx80 );
+signed int floatx80_to_int32( struct roundingData *, floatx80 );
 signed int floatx80_to_int32_round_to_zero( floatx80 );
-float32 floatx80_to_float32( floatx80 );
-float64 floatx80_to_float64( floatx80 );
-
-/*
--------------------------------------------------------------------------------
-Software IEC/IEEE extended double-precision rounding precision.  Valid
-values are 32, 64, and 80.
--------------------------------------------------------------------------------
-*/
-extern signed char floatx80_rounding_precision;
+float32 floatx80_to_float32( struct roundingData *, floatx80 );
+float64 floatx80_to_float64( struct roundingData *, floatx80 );
 
 /*
 -------------------------------------------------------------------------------
 Software IEC/IEEE extended double-precision operations.
 -------------------------------------------------------------------------------
 */
-floatx80 floatx80_round_to_int( floatx80 );
-floatx80 floatx80_add( floatx80, floatx80 );
-floatx80 floatx80_sub( floatx80, floatx80 );
-floatx80 floatx80_mul( floatx80, floatx80 );
-floatx80 floatx80_div( floatx80, floatx80 );
-floatx80 floatx80_rem( floatx80, floatx80 );
-floatx80 floatx80_sqrt( floatx80 );
+floatx80 floatx80_round_to_int( struct roundingData *, floatx80 );
+floatx80 floatx80_add( struct roundingData *, floatx80, floatx80 );
+floatx80 floatx80_sub( struct roundingData *, floatx80, floatx80 );
+floatx80 floatx80_mul( struct roundingData *, floatx80, floatx80 );
+floatx80 floatx80_div( struct roundingData *, floatx80, floatx80 );
+floatx80 floatx80_rem( struct roundingData *, floatx80, floatx80 );
+floatx80 floatx80_sqrt( struct roundingData *, floatx80 );
 char floatx80_eq( floatx80, floatx80 );
 char floatx80_le( floatx80, floatx80 );
 char floatx80_lt( floatx80, floatx80 );
diff --git a/arch/arm/oprofile/backtrace.c b/arch/arm/oprofile/backtrace.c
index ec58d3e2eb8b..df35c452a8bf 100644
--- a/arch/arm/oprofile/backtrace.c
+++ b/arch/arm/oprofile/backtrace.c
@@ -115,7 +115,7 @@ static int valid_kernel_stack(struct frame_tail *tail, struct pt_regs *regs)
 	return (tailaddr > stack) && (tailaddr < stack_base);
 }
 
-void arm_backtrace(struct pt_regs const *regs, unsigned int depth)
+void arm_backtrace(struct pt_regs * const regs, unsigned int depth)
 {
 	struct frame_tail *tail;
 	unsigned long last_address = 0;
diff --git a/arch/arm/vfp/vfpdouble.c b/arch/arm/vfp/vfpdouble.c
index b801cd66b6ea..9b367a65cb4d 100644
--- a/arch/arm/vfp/vfpdouble.c
+++ b/arch/arm/vfp/vfpdouble.c
@@ -770,6 +770,9 @@ vfp_double_add(struct vfp_double *vdd, struct vfp_double *vdn,
 		if ((s64)m_sig < 0) {
 			vdd->sign = vfp_sign_negate(vdd->sign);
 			m_sig = -m_sig;
+		} else if (m_sig == 0) {
+			vdd->sign = (fpscr & FPSCR_RMODE_MASK) ==
+				      FPSCR_ROUND_MINUSINF ? 0x8000 : 0;
 		}
 	} else {
 		m_sig += vdn->significand;