From 6391af174ad75f72e92043c1dd8302660a2fec58 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Wed, 11 Oct 2006 01:20:39 -0700
Subject: [PATCH] mm: use symbolic names instead of indices for zone
 initialisation

Arch-independent zone-sizing is using indices instead of symbolic names to
offset within an array related to zones (max_zone_pfns).  The unintended
impact is that ZONE_DMA and ZONE_NORMAL is initialised on powerpc instead
of ZONE_DMA and ZONE_HIGHMEM when CONFIG_HIGHMEM is set.  As a result, the
the machine fails to boot but will boot with CONFIG_HIGHMEM turned off.

The following patch properly initialises the max_zone_pfns[] array and uses
symbolic names instead of indices in each architecture using
arch-independent zone-sizing.  Two users have successfully booted their
powerpcs with it (one an ibook G4).  It has also been boot tested on x86,
x86_64, ppc64 and ia64.  Please merge for 2.6.19-rc2.

Credit to Benjamin Herrenschmidt for identifying the bug and rolling the
first fix.  Additional credit to Johannes Berg and Andreas Schwab for
reporting the problem and testing on powerpc.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/x86_64/mm/init.c | 9 ++++++---
 arch/x86_64/mm/numa.c | 8 +++++---
 2 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index 19c72520a868..971dc1181e69 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -406,9 +406,12 @@ void __cpuinit zap_low_mappings(int cpu)
 #ifndef CONFIG_NUMA
 void __init paging_init(void)
 {
-	unsigned long max_zone_pfns[MAX_NR_ZONES] = {MAX_DMA_PFN,
-							MAX_DMA32_PFN,
-							end_pfn};
+	unsigned long max_zone_pfns[MAX_NR_ZONES];
+	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
+	max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
+	max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
+	max_zone_pfns[ZONE_NORMAL] = end_pfn;
+
 	memory_present(0, 0, end_pfn);
 	sparse_init();
 	free_area_init_nodes(max_zone_pfns);
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c
index 829a008bd39b..2ee2e003606c 100644
--- a/arch/x86_64/mm/numa.c
+++ b/arch/x86_64/mm/numa.c
@@ -338,9 +338,11 @@ static void __init arch_sparse_init(void)
 void __init paging_init(void)
 { 
 	int i;
-	unsigned long max_zone_pfns[MAX_NR_ZONES] = { MAX_DMA_PFN,
-		MAX_DMA32_PFN,
-		end_pfn};
+	unsigned long max_zone_pfns[MAX_NR_ZONES];
+	memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
+	max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
+	max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
+	max_zone_pfns[ZONE_NORMAL] = end_pfn;
 
 	arch_sparse_init();
 
-- 
cgit v1.2.1


From c37e108d156101dcde7ec7033eabe7abe83366bc Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@gmail.com>
Date: Wed, 11 Oct 2006 01:20:43 -0700
Subject: [PATCH] use struct irq_chip instead of struct hw_interrupt_type

hw_interrupt_type is deprecated in favour of struct irq_chip.

[mingo@elte.hu: do x86_64 too]
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/x86_64/kernel/io_apic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index 771bcf77daf2..c3cdcab29688 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -1897,7 +1897,7 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
 }
 #endif
 
-static struct hw_interrupt_type ht_irq_chip = {
+static struct irq_chip ht_irq_chip = {
 	.name		= "PCI-HT",
 	.mask		= mask_ht_irq,
 	.unmask		= unmask_ht_irq,
-- 
cgit v1.2.1


From 994bd4f9f5a065ead4a92435fdd928ac7fd33809 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 11 Oct 2006 22:44:46 -0600
Subject: [PATCH] x86_64 irq: Properly update vector_irq

This patch fixes my one line thinko where I was clearing
the vector_irq entries on the wrong cpus.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/x86_64/kernel/io_apic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index c3cdcab29688..44b55f833875 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -660,7 +660,7 @@ next:
 		}
 		if (old_vector >= 0) {
 			int old_cpu;
-			for_each_cpu_mask(old_cpu, domain)
+			for_each_cpu_mask(old_cpu, irq_domain[irq])
 				per_cpu(vector_irq, old_cpu)[old_vector] = -1;
 		}
 		for_each_cpu_mask(new_cpu, domain)
-- 
cgit v1.2.1


From 734c4c67393893f61b39bcdfe1e94f72111c56d6 Mon Sep 17 00:00:00 2001
From: Ravikiran Thirumalai <kiran@scalex86.org>
Date: Thu, 12 Oct 2006 12:17:52 -0700
Subject: [PATCH] Fix build breakage with CONFIG_X86_VSMP

Kernel build breaks with CONFIG_X86_VSMP.  Probably due to some header
file cleanups in 2.6.19-rc1.

Signed-off-by: Ravikiran Thirumalai <kiran@scalex86.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/x86_64/kernel/vsmp.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/vsmp.c b/arch/x86_64/kernel/vsmp.c
index 044e852bd25e..414caf0c5f9a 100644
--- a/arch/x86_64/kernel/vsmp.c
+++ b/arch/x86_64/kernel/vsmp.c
@@ -14,6 +14,7 @@
 #include <linux/pci_ids.h>
 #include <linux/pci_regs.h>
 #include <asm/pci-direct.h>
+#include <asm/io.h>
 
 static int __init vsmp_init(void)
 {
-- 
cgit v1.2.1


From 991528d7348667924176f3e29addea0675298944 Mon Sep 17 00:00:00 2001
From: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Mon, 25 Sep 2006 16:28:13 -0700
Subject: ACPI: Processor native C-states using MWAIT

Intel processors starting with the Core Duo support
support processor native C-state using the MWAIT instruction.
Refer: Intel Architecture Software Developer's Manual
http://www.intel.com/design/Pentium4/manuals/253668.htm

Platform firmware exports the support for Native C-state to OS using
ACPI _PDC and _CST methods.
Refer: Intel Processor Vendor-Specific ACPI: Interface Specification
http://www.intel.com/technology/iapc/acpi/downloads/302223.htm

With Processor Native C-state, we use 'MWAIT' instruction on the processor
to enter different C-states (C1, C2, C3).  We won't use the special IO
ports to enter C-state and no SMM mode etc required to enter C-state.
Overall this will mean better C-state support.

One major advantage of using MWAIT for all C-states is, with this and
"treat interrupt as break event" feature of MWAIT, we can now get accurate
timing for the time spent in C1, C2, ..  states.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86_64/kernel/process.c | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index 5e95b257ee26..49f7fac6229e 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -238,20 +238,28 @@ void cpu_idle (void)
  * We execute MONITOR against need_resched and enter optimized wait state
  * through MWAIT. Whenever someone changes need_resched, we would be woken
  * up from MWAIT (without an IPI).
+ *
+ * New with Core Duo processors, MWAIT can take some hints based on CPU
+ * capability.
  */
-static void mwait_idle(void)
+void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
 {
-	local_irq_enable();
-
-	while (!need_resched()) {
+	if (!need_resched()) {
 		__monitor((void *)&current_thread_info()->flags, 0, 0);
 		smp_mb();
-		if (need_resched())
-			break;
-		__mwait(0, 0);
+		if (!need_resched())
+			__mwait(eax, ecx);
 	}
 }
 
+/* Default MONITOR/MWAIT with no hints, used for default C1 state */
+static void mwait_idle(void)
+{
+	local_irq_enable();
+	while (!need_resched())
+		mwait_idle_with_hints(0,0);
+}
+
 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
 {
 	static int printed;
-- 
cgit v1.2.1


From a460e745e8f9c75a0525ff94154a0629f9d3e05d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 17 Oct 2006 00:10:03 -0700
Subject: [PATCH] genirq: clean up irq-flow-type naming

Introduce desc->name and eliminate the handle_irq_name() hack.  Add
set_irq_chip_and_handler_name() to set the flow type and name at once.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Matthew Wilcox <willy@debian.org>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/x86_64/kernel/i8259.c   |  7 ++++---
 arch/x86_64/kernel/io_apic.c | 15 ++++++++-------
 arch/x86_64/kernel/irq.c     |  2 +-
 3 files changed, 13 insertions(+), 11 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c
index 0612a33bb896..c4ef801b765b 100644
--- a/arch/x86_64/kernel/i8259.c
+++ b/arch/x86_64/kernel/i8259.c
@@ -178,7 +178,8 @@ void make_8259A_irq(unsigned int irq)
 {
 	disable_irq_nosync(irq);
 	io_apic_irqs &= ~(1<<irq);
-	set_irq_chip_and_handler(irq, &i8259A_chip, handle_level_irq);
+	set_irq_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq,
+				      "XT");
 	enable_irq(irq);
 }
 
@@ -431,8 +432,8 @@ void __init init_ISA_irqs (void)
 			/*
 			 * 16 old-style INTA-cycle interrupts:
 			 */
-			set_irq_chip_and_handler(i, &i8259A_chip,
-						 handle_level_irq);
+			set_irq_chip_and_handler_name(i, &i8259A_chip,
+						      handle_level_irq, "XT");
 		} else {
 			/*
 			 * 'high' PCI IRQs filled in on demand
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index 44b55f833875..49e94f7994c5 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -696,11 +696,11 @@ static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
 {
 	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
 			trigger == IOAPIC_LEVEL)
-		set_irq_chip_and_handler(irq, &ioapic_chip,
-					 handle_fasteoi_irq);
+		set_irq_chip_and_handler_name(irq, &ioapic_chip,
+					      handle_fasteoi_irq, "fasteoi");
 	else
-		set_irq_chip_and_handler(irq, &ioapic_chip,
-					 handle_edge_irq);
+		set_irq_chip_and_handler_name(irq, &ioapic_chip,
+					      handle_edge_irq, "edge");
 }
 
 static void __init setup_IO_APIC_irqs(void)
@@ -806,7 +806,7 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in
 	 * The timer IRQ doesn't have to know that behind the
 	 * scene we have a 8259A-master in AEOI mode ...
 	 */
-	set_irq_chip_and_handler(0, &ioapic_chip, handle_edge_irq);
+	set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
 
 	/*
 	 * Add it to the IO-APIC irq-routing table:
@@ -1839,7 +1839,7 @@ int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev)
 
 	write_msi_msg(irq, &msg);
 
-	set_irq_chip_and_handler(irq, &msi_chip, handle_edge_irq);
+	set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
 
 	return 0;
 }
@@ -1936,7 +1936,8 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 		write_ht_irq_low(irq, low);
 		write_ht_irq_high(irq, high);
 
-		set_irq_chip_and_handler(irq, &ht_irq_chip, handle_edge_irq);
+		set_irq_chip_and_handler_name(irq, &ht_irq_chip,
+					      handle_edge_irq, "edge");
 	}
 	return vector;
 }
diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c
index dff68eb2b787..e46c55856d40 100644
--- a/arch/x86_64/kernel/irq.c
+++ b/arch/x86_64/kernel/irq.c
@@ -75,7 +75,7 @@ int show_interrupts(struct seq_file *p, void *v)
 			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
 #endif
 		seq_printf(p, " %8s", irq_desc[i].chip->name);
-		seq_printf(p, "-%s", handle_irq_name(irq_desc[i].handle_irq));
+		seq_printf(p, "-%-8s", irq_desc[i].name);
 
 		seq_printf(p, "  %s", action->name);
 		for (action=action->next; action; action = action->next)
-- 
cgit v1.2.1


From f248b6a34fdd726dd12b549fceba907b6df2771c Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Sat, 21 Oct 2006 18:37:00 +0200
Subject: [PATCH] x86-64: Update defconfig

Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/defconfig | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig
index 47bfba6e9dc4..0f5d44e86be5 100644
--- a/arch/x86_64/defconfig
+++ b/arch/x86_64/defconfig
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.19-rc1
-# Thu Oct  5 13:04:43 2006
+# Linux kernel version: 2.6.19-rc2-git4
+# Sat Oct 21 03:38:52 2006
 #
 CONFIG_X86_64=y
 CONFIG_64BIT=y
@@ -335,8 +335,8 @@ CONFIG_IPV6=y
 # CONFIG_INET6_XFRM_MODE_TUNNEL is not set
 # CONFIG_INET6_XFRM_MODE_BEET is not set
 # CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set
+CONFIG_IPV6_SIT=y
 # CONFIG_IPV6_TUNNEL is not set
-# CONFIG_IPV6_SUBTREES is not set
 # CONFIG_IPV6_MULTIPLE_TABLES is not set
 # CONFIG_NETWORK_SECMARK is not set
 # CONFIG_NETFILTER is not set
@@ -437,6 +437,13 @@ CONFIG_BLK_DEV_INITRD=y
 # CONFIG_CDROM_PKTCDVD is not set
 # CONFIG_ATA_OVER_ETH is not set
 
+#
+# Misc devices
+#
+# CONFIG_IBM_ASM is not set
+# CONFIG_SGI_IOC4 is not set
+# CONFIG_TIFM_CORE is not set
+
 #
 # ATA/ATAPI/MFM/RLL support
 #
@@ -1008,6 +1015,7 @@ CONFIG_I2C_ISA=m
 #
 # Dallas's 1-wire bus
 #
+# CONFIG_W1 is not set
 
 #
 # Hardware Monitoring support
@@ -1058,12 +1066,6 @@ CONFIG_SENSORS_SMSC47B397=m
 # CONFIG_SENSORS_HDAPS is not set
 # CONFIG_HWMON_DEBUG_CHIP is not set
 
-#
-# Misc devices
-#
-# CONFIG_IBM_ASM is not set
-# CONFIG_TIFM_CORE is not set
-
 #
 # Multimedia devices
 #
@@ -1196,7 +1198,6 @@ CONFIG_USB_HIDINPUT=y
 # CONFIG_USB_ATI_REMOTE2 is not set
 # CONFIG_USB_KEYSPAN_REMOTE is not set
 # CONFIG_USB_APPLETOUCH is not set
-# CONFIG_USB_TRANCEVIBRATOR is not set
 
 #
 # USB Imaging devices
@@ -1242,6 +1243,7 @@ CONFIG_USB_MON=y
 # CONFIG_USB_APPLEDISPLAY is not set
 # CONFIG_USB_SISUSBVGA is not set
 # CONFIG_USB_LD is not set
+# CONFIG_USB_TRANCEVIBRATOR is not set
 # CONFIG_USB_TEST is not set
 
 #
@@ -1318,6 +1320,7 @@ CONFIG_EXT3_FS=y
 CONFIG_EXT3_FS_XATTR=y
 CONFIG_EXT3_FS_POSIX_ACL=y
 # CONFIG_EXT3_FS_SECURITY is not set
+# CONFIG_EXT4DEV_FS is not set
 CONFIG_JBD=y
 # CONFIG_JBD_DEBUG is not set
 CONFIG_FS_MBCACHE=y
@@ -1341,6 +1344,7 @@ CONFIG_DNOTIFY=y
 # CONFIG_AUTOFS_FS is not set
 CONFIG_AUTOFS4_FS=y
 # CONFIG_FUSE_FS is not set
+CONFIG_GENERIC_ACL=y
 
 #
 # CD-ROM/DVD Filesystems
@@ -1418,7 +1422,6 @@ CONFIG_SUNRPC=y
 # CONFIG_CODA_FS is not set
 # CONFIG_AFS_FS is not set
 # CONFIG_9P_FS is not set
-CONFIG_GENERIC_ACL=y
 
 #
 # Partition Types
@@ -1470,10 +1473,6 @@ CONFIG_NLS_ISO8859_15=y
 # CONFIG_NLS_KOI8_U is not set
 CONFIG_NLS_UTF8=y
 
-#
-# Distributed Lock Manager
-#
-
 #
 # Instrumentation Support
 #
@@ -1512,6 +1511,7 @@ CONFIG_DEBUG_FS=y
 CONFIG_UNWIND_INFO=y
 CONFIG_STACK_UNWIND=y
 # CONFIG_FORCED_INLINING is not set
+# CONFIG_HEADERS_CHECK is not set
 # CONFIG_RCU_TORTURE_TEST is not set
 # CONFIG_LKDTM is not set
 # CONFIG_DEBUG_RODATA is not set
-- 
cgit v1.2.1


From 926fafebc48a3218fac675f12974f9a46473bd40 Mon Sep 17 00:00:00 2001
From: keith mannthey <kmannth@us.ibm.com>
Date: Sat, 21 Oct 2006 18:37:01 +0200
Subject: [PATCH] x86-64: x86_64 hot-add memory srat.c fix

  This patch corrects the logic used in srat.c to figure out what
parsing what action to take when registering hot-add areas.  Hot-add
areas should only be added to the node information for the
MEMORY_HOTPLUG_RESERVE case.  When booting MEMORY_HOTPLUG_SPARSE hot-add
areas on everything but the last node are getting include in the node
data and during kernel boot the pages are setup then the kernel dies
when the pages are used. This patch fixes this issue.

Signed-off-by: Keith Mannthey <kmannth@us.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/mm/srat.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/mm/srat.c b/arch/x86_64/mm/srat.c
index 3cc0544e25f5..1087e150a218 100644
--- a/arch/x86_64/mm/srat.c
+++ b/arch/x86_64/mm/srat.c
@@ -207,7 +207,7 @@ static inline int save_add_info(void)
 	return hotadd_percent > 0;
 }
 #else
-int update_end_of_memory(unsigned long end) {return 0;}
+int update_end_of_memory(unsigned long end) {return -1;}
 static int hotadd_enough_memory(struct bootnode *nd) {return 1;}
 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
 static inline int save_add_info(void) {return 1;}
@@ -337,7 +337,7 @@ acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma)
 	push_node_boundaries(node, nd->start >> PAGE_SHIFT,
 						nd->end >> PAGE_SHIFT);
 
- 	if (ma->flags.hot_pluggable && !reserve_hotadd(node, start, end) < 0) {
+ 	if (ma->flags.hot_pluggable && (reserve_hotadd(node, start, end) < 0)) {
 		/* Ignore hotadd region. Undo damage */
 		printk(KERN_NOTICE "SRAT: Hotplug region ignored\n");
 		*nd = oldnode;
-- 
cgit v1.2.1


From 45edfd1db02f818b3dc7e4743ee8585af6b78f78 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai.lu@amd.com>
Date: Sat, 21 Oct 2006 18:37:01 +0200
Subject: [PATCH] x86-64: typo in __assign_irq_vector when updating pos for
 vector and offset

typo with cpu instead of new_cpu

Signed-off-by: Yinghai Lu <yinghai.lu@amd.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/io_apic.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index 49e94f7994c5..b848f4808510 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -651,12 +651,12 @@ next:
 		if (vector == IA32_SYSCALL_VECTOR)
 			goto next;
 		for_each_cpu_mask(new_cpu, domain)
-			if (per_cpu(vector_irq, cpu)[vector] != -1)
+			if (per_cpu(vector_irq, new_cpu)[vector] != -1)
 				goto next;
 		/* Found one! */
 		for_each_cpu_mask(new_cpu, domain) {
-			pos[cpu].vector = vector;
-			pos[cpu].offset = offset;
+			pos[new_cpu].vector = vector;
+			pos[new_cpu].offset = offset;
 		}
 		if (old_vector >= 0) {
 			int old_cpu;
-- 
cgit v1.2.1


From 469b1d8741a5970ad49f2b5a837811579ba0b6f2 Mon Sep 17 00:00:00 2001
From: Corey Minyard <minyard@acm.org>
Date: Sat, 21 Oct 2006 18:37:01 +0200
Subject: [PATCH] x86-64: Fix for arch/x86_64/pci/Makefile CFLAGS

The arch/x86_64/pci directory was giving problems in a wierd cross-compile
environment.  The exact cause is unknown, but the Makefile used CFLAGS
instead of EXTRA_CFLAGS.  From what I can tell from
Documentation/kbuild/makefiles.txt, CFLAGS should not be used for this, it
should be EXTRA_CFLAGS.  And it solves the cross-compile problem.

Signed-off-by: Corey Minyard <cminyard@mvista.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Andi Kleen <ak@muc.de>
Cc: Vojtech Pavlik <vojtech@suse.cz>
Cc: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---
 arch/x86_64/pci/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/pci/Makefile b/arch/x86_64/pci/Makefile
index 1eb18f421edf..149aba05a5b8 100644
--- a/arch/x86_64/pci/Makefile
+++ b/arch/x86_64/pci/Makefile
@@ -3,7 +3,7 @@
 #
 # Reuse the i386 PCI subsystem
 #
-CFLAGS += -Iarch/i386/pci
+EXTRA_CFLAGS += -Iarch/i386/pci
 
 obj-y		:= i386.o
 obj-$(CONFIG_PCI_DIRECT)+= direct.o
-- 
cgit v1.2.1


From 73bb8919b33d42cf75a0ed89bc9ca6a7128665be Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@in.ibm.com>
Date: Sat, 21 Oct 2006 18:37:01 +0200
Subject: [PATCH] x86-64: fix page align in e820 allocator

Currently some code pieces assume that address returned by find_e820_area()
are page aligned.  But looks like find_e820_area() had no such intention
and hence one might end up stomping over some of the data.  One such case
is bootmem allocator initialization code stomped over bss.

This patch modified find_e820_area() to return page aligned address.  This
might be little wasteful of memory but at the same time probably it is
easier to handle page aligned memory.

Signed-off-by: Vivek Goyal <vgoyal@in.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---
 arch/x86_64/kernel/e820.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c
index b3f0908668ec..a75c829c2b02 100644
--- a/arch/x86_64/kernel/e820.c
+++ b/arch/x86_64/kernel/e820.c
@@ -54,13 +54,13 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size)
 
 	/* various gunk below that needed for SMP startup */
 	if (addr < 0x8000) { 
-		*addrp = 0x8000;
+		*addrp = PAGE_ALIGN(0x8000);
 		return 1; 
 	}
 
 	/* direct mapping tables of the kernel */
 	if (last >= table_start<<PAGE_SHIFT && addr < table_end<<PAGE_SHIFT) { 
-		*addrp = table_end << PAGE_SHIFT; 
+		*addrp = PAGE_ALIGN(table_end << PAGE_SHIFT);
 		return 1;
 	} 
 
@@ -68,18 +68,18 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size)
 #ifdef CONFIG_BLK_DEV_INITRD
 	if (LOADER_TYPE && INITRD_START && last >= INITRD_START && 
 	    addr < INITRD_START+INITRD_SIZE) { 
-		*addrp = INITRD_START + INITRD_SIZE; 
+		*addrp = PAGE_ALIGN(INITRD_START + INITRD_SIZE);
 		return 1;
 	} 
 #endif
 	/* kernel code */
-	if (last >= __pa_symbol(&_text) && last < __pa_symbol(&_end)) {
-		*addrp = __pa_symbol(&_end);
+	if (last >= __pa_symbol(&_text) && addr < __pa_symbol(&_end)) {
+		*addrp = PAGE_ALIGN(__pa_symbol(&_end));
 		return 1;
 	}
 
 	if (last >= ebda_addr && addr < ebda_addr + ebda_size) {
-		*addrp = ebda_addr + ebda_size;
+		*addrp = PAGE_ALIGN(ebda_addr + ebda_size);
 		return 1;
 	}
 
@@ -152,7 +152,7 @@ unsigned long __init find_e820_area(unsigned long start, unsigned long end, unsi
 			continue; 
 		while (bad_addr(&addr, size) && addr+size <= ei->addr+ei->size)
 			;
-		last = addr + size;
+		last = PAGE_ALIGN(addr) + size;
 		if (last > ei->addr + ei->size)
 			continue;
 		if (last > end) 
-- 
cgit v1.2.1


From cdfce1f5714fec7b24715f569b2fee1607350a6d Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Sat, 21 Oct 2006 18:37:01 +0200
Subject: [PATCH] x86: Use -maccumulate-outgoing-args

This avoids some problems with gcc 4.x and earlier generating
invalid unwind information. In 4.1 the option is default
when unwind information is enabled.

And it seems to generate smaller code too, so it's probably
a good thing on its own. With gcc 4.0:

i386:
4683198  902112  480868 6066178  5c9002 vmlinux (before)
4449895  902112  480868 5832875  5900ab vmlinux (after)

x86-64:
4939761 1449584  648216 7037561  6b6279 vmlinux (before)
4854193 1449584  648216 6951993  6a1439 vmlinux (after)

On 4.1 it shouldn't make much difference because it is
default when unwind is enabled anyways.

Suggested by Michael Matz and Jan Beulich

Cc: jbeulich@novell.com

Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/Makefile | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile
index 1c0f18d4f887..13972148058d 100644
--- a/arch/x86_64/Makefile
+++ b/arch/x86_64/Makefile
@@ -54,6 +54,10 @@ endif
 cflags-y += $(call cc-option,-funit-at-a-time)
 # prevent gcc from generating any FP code by mistake
 cflags-y += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,)
+# this works around some issues with generating unwind tables in older gccs
+# newer gccs do it by default
+cflags-y += -maccumulate-outgoing-args
+
 # do binutils support CFI?
 cflags-y += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
 AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
-- 
cgit v1.2.1


From cc7d479fe56133e79840beffe9cb4fd193af93aa Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Sat, 21 Oct 2006 18:37:02 +0200
Subject: [PATCH] x86-64: Fix ENOSYS in system call tracing

This patch:

- out of range system calls failing to return -ENOSYS under
  system call tracing

[AK: split out from another patch by Jan as separate bugfix]

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/entry.S | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index 38a7b2d528e2..038dcf733a1b 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -315,6 +315,8 @@ tracesys:
 	LOAD_ARGS ARGOFFSET  /* reload args from stack in case ptrace changed it */
 	RESTORE_REST
 	cmpq $__NR_syscall_max,%rax
+	movq $-ENOSYS,%rcx
+	cmova %rcx,%rax
 	ja  1f
 	movq %r10,%rcx	/* fixup for C */
 	call *sys_call_table(,%rax,8)
-- 
cgit v1.2.1


From 581910e2eb952e541b8ca9b5f551d6c124903b61 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Sat, 21 Oct 2006 18:37:02 +0200
Subject: [PATCH] x86-64: Revert interrupt backlink changes

They break more than they fix
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/entry.S | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index 038dcf733a1b..e3eddde3d3bb 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -537,8 +537,6 @@ END(stub_rt_sigreturn)
 1:	incl	%gs:pda_irqcount
 	cmoveq %gs:pda_irqstackptr,%rsp
 	push    %rbp			# backlink for old unwinder
-	CFI_ADJUST_CFA_OFFSET 8
-	CFI_REL_OFFSET rbp,0
 	/*
 	 * We entered an interrupt context - irqs are off:
 	 */
@@ -1178,7 +1176,6 @@ ENTRY(call_softirq)
 	incl %gs:pda_irqcount
 	cmove %gs:pda_irqstackptr,%rsp
 	push  %rbp			# backlink for old unwinder
-	CFI_ADJUST_CFA_OFFSET    8
 	call __do_softirq
 	leaveq
 	CFI_DEF_CFA_REGISTER	rsp
-- 
cgit v1.2.1


From 6bf2dafad18c119beb534cbb3d882fe7a6c3f529 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 21 Oct 2006 18:37:02 +0200
Subject: [PATCH] x86-64: Use irq_domain in ioapic_retrigger_irq

Thanks to YH Lu for spotting this.  It appears I missed this function when I
refactored allocate_irq_vector and introduced irq_domain, with the result that
all retriggered irqs would go to cpu 0 even if we were not prepared to receive
them there.

While reviewing YH's patch I also noticed that this function was missing
locking, and since I am now reading two values from two diffrent arrays that
looks like a race we might be able to hit in the real world.

Cc: Yinghai Lu <yinghai.lu@amd.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/io_apic.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index b848f4808510..8a9a357875b7 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -1255,12 +1255,15 @@ static int ioapic_retrigger_irq(unsigned int irq)
 {
 	cpumask_t mask;
 	unsigned vector;
+	unsigned long flags;
 
+	spin_lock_irqsave(&vector_lock, flags);
 	vector = irq_vector[irq];
 	cpus_clear(mask);
-	cpu_set(vector >> 8, mask);
+	cpu_set(first_cpu(irq_domain[irq]), mask);
 
-	send_IPI_mask(mask, vector & 0xff);
+	send_IPI_mask(mask, vector);
+	spin_unlock_irqrestore(&vector_lock, flags);
 
 	return 1;
 }
-- 
cgit v1.2.1


From 8cf2c51927bbeefafc25193d01b91f9ed3806e96 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Sat, 21 Oct 2006 18:37:02 +0200
Subject: [PATCH] x86: Revert new unwind kernel stack termination

Jan convinced me that it was unnecessary because the assembly stubs do
this already on the stack.

Cc: jbeulich@novell.com

Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/entry.S | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index e3eddde3d3bb..7d401b00d822 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -980,11 +980,6 @@ ENTRY(kernel_thread)
 	call do_fork
 	movq %rax,RAX(%rsp)
 	xorl %edi,%edi
-	test %rax,%rax
-	jnz  1f
-	/* terminate stack in child */
-	movq %rdi,RIP(%rsp)
-1:
 
 	/*
 	 * It isn't worth to check for reschedule here,
-- 
cgit v1.2.1


From 84f404f695b16bd142c8dd9910d5a398f54fb044 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 21 Oct 2006 18:37:02 +0200
Subject: [PATCH] x86-64: Put more than one cpu in TARGET_CPUS

TARGET_CPUS is the default irq routing poicy.  It specifies which cpus the
kernel should aim an irq at.  In physflat delivery mode we can route an irq to
a single cpu.  But that doesn't mean our default policy should only be a
single cpu is allowed.

By allowing the irq routing code to select from multiple cpus this enables
systems with more irqs then we can service on a single processor to actually
work.

I just audited and tested the code and irqbalance doesn't care, and the
io_apic.c doesn't care if we have extra cpus in the mask.  Everything will use
or assume we are using the lowest numbered cpu in the mask if we can't use
them all.

So this should result in no behavior changes except on systems that need it.

Thanks for YH Lu for spotting this problem in his testing.

Cc: Yinghai Lu <yinghai.lu@amd.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/genapic_flat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/genapic_flat.c b/arch/x86_64/kernel/genapic_flat.c
index 0dfc223c1839..7c01db8fa9d1 100644
--- a/arch/x86_64/kernel/genapic_flat.c
+++ b/arch/x86_64/kernel/genapic_flat.c
@@ -153,7 +153,7 @@ struct genapic apic_flat =  {
 
 static cpumask_t physflat_target_cpus(void)
 {
-	return cpumask_of_cpu(0);
+	return cpu_online_map;
 }
 
 static cpumask_t physflat_vector_allocation_domain(int cpu)
-- 
cgit v1.2.1


From dbaab49f92ff6ae6255762a948375e4036cbdbd2 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@in.ibm.com>
Date: Sat, 21 Oct 2006 18:37:03 +0200
Subject: [PATCH] x86-64: Overlapping program headers in physical addr space
 fix

o A recent change to vmlinux.ld.S file broke kexec as now resulting vmlinux
  program headers are overlapping in physical address space.

o Now all the vsyscall related sections are placed after data and after
  that mostly init data sections are placed. To avoid physical overlap
  among phdrs, there are three possible solutions.
	- Place vsyscall sections also in data phdrs instead of user
	- move vsyscal sections after init data in bss.
	- create another phdrs say data.init and move all the sections
	  after vsyscall into this new phdr.

o This patch implements the third solution.

Signed-off-by: Vivek Goyal <vgoyal@in.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Cc: Magnus Damm <magnus@valinux.co.jp>
Cc: Andi Kleen <ak@suse.de>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---
 arch/x86_64/kernel/vmlinux.lds.S | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
index b9df2ab6529f..1283614c9b24 100644
--- a/arch/x86_64/kernel/vmlinux.lds.S
+++ b/arch/x86_64/kernel/vmlinux.lds.S
@@ -17,6 +17,7 @@ PHDRS {
 	text PT_LOAD FLAGS(5);	/* R_E */
 	data PT_LOAD FLAGS(7);	/* RWE */
 	user PT_LOAD FLAGS(7);	/* RWE */
+	data.init PT_LOAD FLAGS(7);	/* RWE */
 	note PT_NOTE FLAGS(4);	/* R__ */
 }
 SECTIONS
@@ -131,7 +132,7 @@ SECTIONS
   . = ALIGN(8192);		/* init_task */
   .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
 	*(.data.init_task)
-  } :data
+  }:data.init
 
   . = ALIGN(4096);
   .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
-- 
cgit v1.2.1


From e70ea8c09db0e25ab58f84ba7f393e5c9125a8ee Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Sat, 21 Oct 2006 18:37:03 +0200
Subject: [PATCH] x86-64: Revert timer routing behaviour back to 2.6.16 state

By default route the 8254 over the 8259 and only disable
it on ATI boards where this causes double timer interrupts.

This should unbreak some Nvidia boards where the timer doesn't
seem to tick of it isn't enabled in the 8259. At least one
VIA board also seemed to have a little trouble with the disabled
8259.

For 2.6.20 we'll try both dynamically without black listing, but I think
for .19 this is the safer approach because it has been already well tested
in earlier kernels. This also makes the x86-64 behaviour the same
as i386.

Command line options can change all this of course.

Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/early-quirks.c | 9 +++++----
 arch/x86_64/kernel/io_apic.c      | 2 +-
 2 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/early-quirks.c b/arch/x86_64/kernel/early-quirks.c
index 208e38a372c1..2b1245d86258 100644
--- a/arch/x86_64/kernel/early-quirks.c
+++ b/arch/x86_64/kernel/early-quirks.c
@@ -61,10 +61,11 @@ static void nvidia_bugs(void)
 
 static void ati_bugs(void)
 {
-#if 1 /* for testing */
-	printk("ATI board detected\n");
-#endif
-	/* No bugs right now */
+	if (timer_over_8254 == 1) {
+		timer_over_8254 = 0;
+		printk(KERN_INFO
+	 	"ATI board detected. Disabling timer routing over 8254.\n");
+	}
 }
 
 struct chipset {
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index 8a9a357875b7..b000017e4b5d 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -57,7 +57,7 @@ static int no_timer_check;
 
 static int disable_timer_pin_1 __initdata;
 
-int timer_over_8254 __initdata = 0;
+int timer_over_8254 __initdata = 1;
 
 /* Where if anywhere is the i8259 connect in external int mode */
 static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
-- 
cgit v1.2.1


From aa026ede513b7d672fa7d9106b2f2a475455dcf2 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Sun, 22 Oct 2006 00:41:15 +0200
Subject: [PATCH] x86-64: Fix C3 timer test

There was a typo in the C3 latency test to decide of the TSC
should be used or not. It used the C2 latency threshold, not the
C3 one. Fix that.

This should fix the time on various dual core laptops.

Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/time.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index 1ba5a442ac32..88722f11ca13 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -948,7 +948,7 @@ __cpuinit int unsynchronized_tsc(void)
  	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
 #ifdef CONFIG_ACPI
 		/* But TSC doesn't tick in C3 so don't use it there */
-		if (acpi_fadt.length > 0 && acpi_fadt.plvl3_lat < 100)
+		if (acpi_fadt.length > 0 && acpi_fadt.plvl3_lat < 1000)
 			return 1;
 #endif
  		return 0;
-- 
cgit v1.2.1


From cb01fc720c629261b9c616b2d5fcc3d93cd8bb09 Mon Sep 17 00:00:00 2001
From: Muli Ben-Yehuda <muli@il.ibm.com>
Date: Sun, 22 Oct 2006 00:41:15 +0200
Subject: [PATCH] x86-64: increase PHB1 split transaction timeout

This patch increases the timeout for PCI split transactions on PHB1 on
the first Calgary to work around an issue with the aic94xx
adapter. Fixes kernel.org bugzilla #7180
(http://bugzilla.kernel.org/show_bug.cgi?id=7180)

Based on excellent debugging and a patch by Darrick J. Wong
<djwong@us.ibm.com>

Signed-off-by: Muli Ben-Yehuda <muli@il.ibm.com>
Signed-off-by: Jon Mason <jdmason@kudzu.us>
Signed-off-by: Andi Kleen <ak@suse.de>
Acked-by: Darrick J. Wong <djwong@us.ibm.com>
---
 arch/x86_64/kernel/pci-calgary.c | 44 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 43 insertions(+), 1 deletion(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c
index b3296cc2f2f2..37a770859e71 100644
--- a/arch/x86_64/kernel/pci-calgary.c
+++ b/arch/x86_64/kernel/pci-calgary.c
@@ -52,7 +52,8 @@
 #define ONE_BASED_CHASSIS_NUM   1
 
 /* register offsets inside the host bridge space */
-#define PHB_CSR_OFFSET		0x0110
+#define CALGARY_CONFIG_REG	0x0108
+#define PHB_CSR_OFFSET		0x0110 /* Channel Status */
 #define PHB_PLSSR_OFFSET	0x0120
 #define PHB_CONFIG_RW_OFFSET	0x0160
 #define PHB_IOBASE_BAR_LOW	0x0170
@@ -83,6 +84,8 @@
 #define TAR_VALID		0x0000000000000008UL
 /* CSR (Channel/DMA Status Register) */
 #define CSR_AGENT_MASK		0xffe0ffff
+/* CCR (Calgary Configuration Register) */
+#define CCR_2SEC_TIMEOUT        0x000000000000000EUL
 
 #define MAX_NUM_OF_PHBS		8 /* how many PHBs in total? */
 #define MAX_NUM_CHASSIS		8 /* max number of chassis */
@@ -732,6 +735,38 @@ static void calgary_watchdog(unsigned long data)
 	}
 }
 
+static void __init calgary_increase_split_completion_timeout(void __iomem *bbar,
+	unsigned char busnum)
+{
+	u64 val64;
+	void __iomem *target;
+	unsigned long phb_shift = -1;
+	u64 mask;
+
+	switch (busno_to_phbid(busnum)) {
+	case 0: phb_shift = (63 - 19);
+		break;
+	case 1: phb_shift = (63 - 23);
+		break;
+	case 2: phb_shift = (63 - 27);
+		break;
+	case 3: phb_shift = (63 - 35);
+		break;
+	default:
+		BUG_ON(busno_to_phbid(busnum));
+	}
+
+	target = calgary_reg(bbar, CALGARY_CONFIG_REG);
+	val64 = be64_to_cpu(readq(target));
+
+	/* zero out this PHB's timer bits */
+	mask = ~(0xFUL << phb_shift);
+	val64 &= mask;
+	val64 |= (CCR_2SEC_TIMEOUT << phb_shift);
+	writeq(cpu_to_be64(val64), target);
+	readq(target); /* flush */
+}
+
 static void __init calgary_enable_translation(struct pci_dev *dev)
 {
 	u32 val32;
@@ -756,6 +791,13 @@ static void __init calgary_enable_translation(struct pci_dev *dev)
 	writel(cpu_to_be32(val32), target);
 	readl(target); /* flush */
 
+	/*
+	 * Give split completion a longer timeout on bus 1 for aic94xx
+	 * http://bugzilla.kernel.org/show_bug.cgi?id=7180
+	 */
+	if (busnum == 1)
+		calgary_increase_split_completion_timeout(bbar, busnum);
+
 	init_timer(&tbl->watchdog_timer);
 	tbl->watchdog_timer.function = &calgary_watchdog;
 	tbl->watchdog_timer.data = (unsigned long)dev;
-- 
cgit v1.2.1


From d1752aa884ec0ac3027c1a3d456bf69bf765c8b8 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 25 Oct 2006 01:00:22 +0200
Subject: [PATCH] x86-64: Simplify the vector allocator.

There is no reason to remember a per cpu position of which vector
to try.  Keeping a global position is simpler and more likely to
result in a global vector allocation even if I don't need or require
it.  For level triggered interrupts this means we are less likely to
acknowledge another cpus irq, and cause the level triggered irq to
harmlessly refire.

This simplification makes it easier to only access data structures
of  online cpus, by having fewer special cases to deal with.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/io_apic.c | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index b000017e4b5d..0e89ae7e7b22 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -612,10 +612,7 @@ static int __assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
 	 * Also, we've got to be careful not to trash gate
 	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 	 */
-	static struct {
-		int vector;
-		int offset;
-	} pos[NR_CPUS] = { [ 0 ... NR_CPUS - 1] = {FIRST_DEVICE_VECTOR, 0} };
+	static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
 	int old_vector = -1;
 	int cpu;
 
@@ -631,14 +628,13 @@ static int __assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
 
 	for_each_cpu_mask(cpu, mask) {
 		cpumask_t domain;
-		int first, new_cpu;
+		int new_cpu;
 		int vector, offset;
 
 		domain = vector_allocation_domain(cpu);
-		first = first_cpu(domain);
 
-		vector = pos[first].vector;
-		offset = pos[first].offset;
+		vector = current_vector;
+		offset = current_offset;
 next:
 		vector += 8;
 		if (vector >= FIRST_SYSTEM_VECTOR) {
@@ -646,7 +642,7 @@ next:
 			offset = (offset + 1) % 8;
 			vector = FIRST_DEVICE_VECTOR + offset;
 		}
-		if (unlikely(pos[first].vector == vector))
+		if (unlikely(current_vector == vector))
 			continue;
 		if (vector == IA32_SYSCALL_VECTOR)
 			goto next;
@@ -654,10 +650,8 @@ next:
 			if (per_cpu(vector_irq, new_cpu)[vector] != -1)
 				goto next;
 		/* Found one! */
-		for_each_cpu_mask(new_cpu, domain) {
-			pos[new_cpu].vector = vector;
-			pos[new_cpu].offset = offset;
-		}
+		current_vector = vector;
+		current_offset = offset;
 		if (old_vector >= 0) {
 			int old_cpu;
 			for_each_cpu_mask(old_cpu, irq_domain[irq])
-- 
cgit v1.2.1


From 70a0a5357db20c291d46c04011d646d5d84c868c Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 25 Oct 2006 01:00:23 +0200
Subject: [PATCH] x86-64: Only look at per_cpu data for online cpus.

When I generalized __assign_irq_vector I failed to pay attention
to what happens when you access a per cpu data structure for
a cpu that is not online.   It is an undefined case making any
code that does it have undefined behavior as well.

The code still needs to be able to allocate a vector across cpus
that are not online to properly handle combinations like lowest
priority interrupt delivery and cpu_hotplug.  Not that we can do
that today but the infrastructure shouldn't prevent it.

So this patch updates the places where we touch per cpu data
to only touch online cpus, it makes cpu vector allocation
an atomic operation with respect to cpu hotplug, and it updates
the cpu start code to properly initialize vector_irq so we
don't have inconsistencies.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/io_apic.c | 42 +++++++++++++++++++++++++++++++++++++-----
 arch/x86_64/kernel/smpboot.c |  7 ++++++-
 2 files changed, 43 insertions(+), 6 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index 0e89ae7e7b22..fe429e5d6b29 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -63,7 +63,7 @@ int timer_over_8254 __initdata = 1;
 static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
 
 static DEFINE_SPINLOCK(ioapic_lock);
-static DEFINE_SPINLOCK(vector_lock);
+DEFINE_SPINLOCK(vector_lock);
 
 /*
  * # of IRQ routing registers
@@ -618,6 +618,9 @@ static int __assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
 
 	BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
 
+	/* Only try and allocate irqs on cpus that are present */
+	cpus_and(mask, mask, cpu_online_map);
+
 	if (irq_vector[irq] > 0)
 		old_vector = irq_vector[irq];
 	if (old_vector > 0) {
@@ -627,11 +630,12 @@ static int __assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
 	}
 
 	for_each_cpu_mask(cpu, mask) {
-		cpumask_t domain;
+		cpumask_t domain, new_mask;
 		int new_cpu;
 		int vector, offset;
 
 		domain = vector_allocation_domain(cpu);
+		cpus_and(new_mask, domain, cpu_online_map);
 
 		vector = current_vector;
 		offset = current_offset;
@@ -646,18 +650,20 @@ next:
 			continue;
 		if (vector == IA32_SYSCALL_VECTOR)
 			goto next;
-		for_each_cpu_mask(new_cpu, domain)
+		for_each_cpu_mask(new_cpu, new_mask)
 			if (per_cpu(vector_irq, new_cpu)[vector] != -1)
 				goto next;
 		/* Found one! */
 		current_vector = vector;
 		current_offset = offset;
 		if (old_vector >= 0) {
+			cpumask_t old_mask;
 			int old_cpu;
-			for_each_cpu_mask(old_cpu, irq_domain[irq])
+			cpus_and(old_mask, irq_domain[irq], cpu_online_map);
+			for_each_cpu_mask(old_cpu, old_mask)
 				per_cpu(vector_irq, old_cpu)[old_vector] = -1;
 		}
-		for_each_cpu_mask(new_cpu, domain)
+		for_each_cpu_mask(new_cpu, new_mask)
 			per_cpu(vector_irq, new_cpu)[vector] = irq;
 		irq_vector[irq] = vector;
 		irq_domain[irq] = domain;
@@ -678,6 +684,32 @@ static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
 	return vector;
 }
 
+void __setup_vector_irq(int cpu)
+{
+	/* Initialize vector_irq on a new cpu */
+	/* This function must be called with vector_lock held */
+	unsigned long flags;
+	int irq, vector;
+
+
+	/* Mark the inuse vectors */
+	for (irq = 0; irq < NR_IRQ_VECTORS; ++irq) {
+		if (!cpu_isset(cpu, irq_domain[irq]))
+			continue;
+		vector = irq_vector[irq];
+		per_cpu(vector_irq, cpu)[vector] = irq;
+	}
+	/* Mark the free vectors */
+	for (vector = 0; vector < NR_VECTORS; ++vector) {
+		irq = per_cpu(vector_irq, cpu)[vector];
+		if (irq < 0)
+			continue;
+		if (!cpu_isset(cpu, irq_domain[irq]))
+			per_cpu(vector_irq, cpu)[vector] = -1;
+	}
+}
+
+
 extern void (*interrupt[NR_IRQS])(void);
 
 static struct irq_chip ioapic_chip;
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 7b7a6870288a..62c2e747af58 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -581,12 +581,16 @@ void __cpuinit start_secondary(void)
 	 * smp_call_function().
 	 */
 	lock_ipi_call_lock();
+	spin_lock(&vector_lock);
 
+	/* Setup the per cpu irq handling data structures */
+	__setup_vector_irq(smp_processor_id());
 	/*
 	 * Allow the master to continue.
 	 */
 	cpu_set(smp_processor_id(), cpu_online_map);
 	per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
+	spin_unlock(&vector_lock);
 	unlock_ipi_call_lock();
 
 	cpu_idle();
@@ -799,7 +803,6 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
 				cpu, node);
 	}
 
-
 	alternatives_smp_switch(1);
 
 	c_idle.idle = get_idle_for_cpu(cpu);
@@ -1246,8 +1249,10 @@ int __cpu_disable(void)
 	local_irq_disable();
 	remove_siblinginfo(cpu);
 
+	spin_lock(&vector_lock);
 	/* It's now safe to remove this processor from the online map */
 	cpu_clear(cpu, cpu_online_map);
+	spin_unlock(&vector_lock);
 	remove_cpu_from_maps();
 	fixup_irqs(cpu_online_map);
 	return 0;
-- 
cgit v1.2.1


From 3095fc0c9772b4afb3c81f76664f341ef716d380 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Fri, 20 Oct 2006 14:45:33 -0700
Subject: PCI: x86-64: mmconfig missing printk levels

Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/x86_64/pci/mmconfig.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/pci/mmconfig.c b/arch/x86_64/pci/mmconfig.c
index 7732f4254d21..e61093b34c26 100644
--- a/arch/x86_64/pci/mmconfig.c
+++ b/arch/x86_64/pci/mmconfig.c
@@ -220,7 +220,7 @@ void __init pci_mmcfg_init(int type)
 
 	pci_mmcfg_virt = kmalloc(sizeof(*pci_mmcfg_virt) * pci_mmcfg_config_num, GFP_KERNEL);
 	if (pci_mmcfg_virt == NULL) {
-		printk("PCI: Can not allocate memory for mmconfig structures\n");
+		printk(KERN_ERR "PCI: Can not allocate memory for mmconfig structures\n");
 		return;
 	}
 	for (i = 0; i < pci_mmcfg_config_num; ++i) {
@@ -228,7 +228,8 @@ void __init pci_mmcfg_init(int type)
 		pci_mmcfg_virt[i].virt = ioremap_nocache(pci_mmcfg_config[i].base_address,
 							 MMCONFIG_APER_MAX);
 		if (!pci_mmcfg_virt[i].virt) {
-			printk("PCI: Cannot map mmconfig aperture for segment %d\n",
+			printk(KERN_ERR "PCI: Cannot map mmconfig aperture for "
+					"segment %d\n",
 			       pci_mmcfg_config[i].pci_segment_group_number);
 			return;
 		}
-- 
cgit v1.2.1


From 61ce1efe6e40233663d27ab8ac9ba9710eebcaad Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Fri, 27 Oct 2006 11:41:44 -0700
Subject: [PATCH] vmlinux.lds: consolidate initcall sections

Add a vmlinux.lds.h helper macro for defining the eight-level initcall table,
teach all the architectures to use it.

This is a prerequisite for a patch which performs initcall synchronisation for
multithreaded-probing.

Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
[ Added AVR32 as well ]
Signed-off-by: Haavard Skinnemoen <hskinnemoen@atmel.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/x86_64/kernel/vmlinux.lds.S | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
index 1283614c9b24..edb24aa714b4 100644
--- a/arch/x86_64/kernel/vmlinux.lds.S
+++ b/arch/x86_64/kernel/vmlinux.lds.S
@@ -175,13 +175,7 @@ SECTIONS
   __setup_end = .;
   __initcall_start = .;
   .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
-	*(.initcall1.init) 
-	*(.initcall2.init) 
-	*(.initcall3.init) 
-	*(.initcall4.init) 
-	*(.initcall5.init) 
-	*(.initcall6.init) 
-	*(.initcall7.init)
+	INITCALLS
   }
   __initcall_end = .;
   __con_initcall_start = .;
-- 
cgit v1.2.1


From a7aacdf9ea45bf6139cfd750e558a3dcbc6f16c3 Mon Sep 17 00:00:00 2001
From: Albert Cahalan <acahalan@gmail.com>
Date: Sun, 29 Oct 2006 22:26:17 -0500
Subject: [PATCH] fix i386 regparm=3 RT signal handlers on x86_64

The recent change to make x86_64 support i386 binaries compiled
with -mregparm=3 only covered signal handlers without SA_SIGINFO.
(the 3-arg "real-time" ones) This is useful for klibc at least.

Signed-off-by: Albert Cahalan <acahalan@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/x86_64/ia32/ia32_signal.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/ia32/ia32_signal.c b/arch/x86_64/ia32/ia32_signal.c
index a6ba9951e86c..0e0a266d976f 100644
--- a/arch/x86_64/ia32/ia32_signal.c
+++ b/arch/x86_64/ia32/ia32_signal.c
@@ -579,6 +579,11 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	regs->rsp = (unsigned long) frame;
 	regs->rip = (unsigned long) ka->sa.sa_handler;
 
+	/* Make -mregparm=3 work */
+	regs->rax = sig;
+	regs->rdx = (unsigned long) &frame->info;
+	regs->rcx = (unsigned long) &frame->uc;
+
 	asm volatile("movl %0,%%ds" :: "r" (__USER32_DS)); 
 	asm volatile("movl %0,%%es" :: "r" (__USER32_DS)); 
 	
-- 
cgit v1.2.1


From 6c0ffb9d2fd987c79c6cbb81c3f3011c63749b1a Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@merom.osdl.org>
Date: Wed, 8 Nov 2006 10:23:03 -0800
Subject: x86-64: clean up io-apic accesses

This is just commit 130fe05dbc0114609cfef9815c0c5580b42decfa ported to
x86-64, for all the same reasons.  It cleans up the IO-APIC accesses in
order to then fix the ordering issues.

We move the accessor functions (that were only used by io_apic.c) out of
a header file, and use proper memory-mapped accesses rather than making
up our own "volatile" pointers.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/x86_64/kernel/io_apic.c | 46 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index fe429e5d6b29..96e02d845716 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -88,6 +88,52 @@ static struct irq_pin_list {
 	short apic, pin, next;
 } irq_2_pin[PIN_MAP_SIZE];
 
+struct io_apic {
+	unsigned int index;
+	unsigned int unused[3];
+	unsigned int data;
+};
+
+static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
+{
+	return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
+		+ (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
+}
+
+static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
+{
+	struct io_apic __iomem *io_apic = io_apic_base(apic);
+	writel(reg, &io_apic->index);
+	return readl(&io_apic->data);
+}
+
+static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
+{
+	struct io_apic __iomem *io_apic = io_apic_base(apic);
+	writel(reg, &io_apic->index);
+	writel(value, &io_apic->data);
+}
+
+/*
+ * Re-write a value: to be used for read-modify-write
+ * cycles where the read already set up the index register.
+ */
+static inline void io_apic_modify(unsigned int apic, unsigned int value)
+{
+	struct io_apic __iomem *io_apic = io_apic_base(apic);
+	writel(value, &io_apic->data);
+}
+
+/*
+ * Synchronize the IO-APIC and the CPU by doing
+ * a dummy read from the IO-APIC
+ */
+static inline void io_apic_sync(unsigned int apic)
+{
+	struct io_apic __iomem *io_apic = io_apic_base(apic);
+	readl(&io_apic->data);
+}
+
 #define __DO_ACTION(R, ACTION, FINAL)					\
 									\
 {									\
-- 
cgit v1.2.1


From 48797ebd9e8b16fddcd4ef062f792314a6b9219a Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@merom.osdl.org>
Date: Wed, 8 Nov 2006 10:27:54 -0800
Subject: x86-64: write IO APIC irq routing entries in correct order

This is the x86-64 version of f9dadfa71bc594df09044da61d1c72701121d802
that did the same thing on i386.

Since the "mask" bit is in the low word, when we write a new entry, we
need to write the high word first, before we potentially unmask it.

The exception is when we actually want to mask the interrupt, in which
case we want to write the low word first to make sure that the high word
doesn't change while the interrupt routing is still active.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/x86_64/kernel/io_apic.c | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index 96e02d845716..3b8f9c68ad3c 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -172,11 +172,33 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
 	return eu.entry;
 }
 
+/*
+ * When we write a new IO APIC routing entry, we need to write the high
+ * word first! If the mask bit in the low word is clear, we will enable
+ * the interrupt, and we need to make sure the entry is fully populated
+ * before that happens.
+ */
 static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
 {
 	unsigned long flags;
 	union entry_union eu;
 	eu.entry = e;
+	spin_lock_irqsave(&ioapic_lock, flags);
+	io_apic_write(apic, 0x11 + 2*pin, eu.w2);
+	io_apic_write(apic, 0x10 + 2*pin, eu.w1);
+	spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+/*
+ * When we mask an IO APIC routing entry, we need to write the low
+ * word first, in order to set the mask bit before we change the
+ * high bits!
+ */
+static void ioapic_mask_entry(int apic, int pin)
+{
+	unsigned long flags;
+	union entry_union eu = { .entry.mask = 1 };
+
 	spin_lock_irqsave(&ioapic_lock, flags);
 	io_apic_write(apic, 0x10 + 2*pin, eu.w1);
 	io_apic_write(apic, 0x11 + 2*pin, eu.w2);
@@ -302,9 +324,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
 	/*
 	 * Disable it in the IO-APIC irq-routing table:
 	 */
-	memset(&entry, 0, sizeof(entry));
-	entry.mask = 1;
-	ioapic_write_entry(apic, pin, entry);
+	ioapic_mask_entry(apic, pin);
 }
 
 static void clear_IO_APIC (void)
-- 
cgit v1.2.1


From ec68307cc5a8dc499e48693843bb42f6b6028458 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 8 Nov 2006 17:44:57 -0800
Subject: [PATCH] htirq: refactor so we only have one function that writes to
 the chip

This refactoring actually optimizes the code a little by caching the value
that we think the device is programmed with instead of reading it back from
the hardware.  Which simplifies the code a little and should speed things up a
bit.

This patch introduces the concept of a ht_irq_msg and modifies the
architecture read/write routines to update this code.

There is a minor consistency fix here as well as x86_64 forgot to initialize
the htirq as masked.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Cc: Andi Kleen <ak@suse.de>
Acked-by: Bryan O'Sullivan <bos@pathscale.com>
Cc: <olson@pathscale.com>
Cc: Roland Dreier <rolandd@cisco.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/x86_64/kernel/io_apic.c | 31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index 3b8f9c68ad3c..41bfc49301ad 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -1955,18 +1955,16 @@ void arch_teardown_msi_irq(unsigned int irq)
 
 static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
 {
-	u32 low, high;
-	low  = read_ht_irq_low(irq);
-	high = read_ht_irq_high(irq);
+	struct ht_irq_msg msg;
+	fetch_ht_irq_msg(irq, &msg);
 
-	low  &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
-	high &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
+	msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
+	msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
 
-	low  |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
-	high |= HT_IRQ_HIGH_DEST_ID(dest);
+	msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
+	msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
 
-	write_ht_irq_low(irq, low);
-	write_ht_irq_high(irq, high);
+	write_ht_irq_msg(irq, &msg);
 }
 
 static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
@@ -1987,7 +1985,7 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
 
 	dest = cpu_mask_to_apicid(tmp);
 
-	target_ht_irq(irq, dest, vector & 0xff);
+	target_ht_irq(irq, dest, vector);
 	set_native_irq_info(irq, mask);
 }
 #endif
@@ -2010,14 +2008,15 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 
 	vector = assign_irq_vector(irq, TARGET_CPUS, &tmp);
 	if (vector >= 0) {
-		u32 low, high;
+		struct ht_irq_msg msg;
 		unsigned dest;
 
 		dest = cpu_mask_to_apicid(tmp);
 
-		high = 	HT_IRQ_HIGH_DEST_ID(dest);
+		msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
 
-		low =	HT_IRQ_LOW_BASE |
+		msg.address_lo =
+			HT_IRQ_LOW_BASE |
 			HT_IRQ_LOW_DEST_ID(dest) |
 			HT_IRQ_LOW_VECTOR(vector) |
 			((INT_DEST_MODE == 0) ?
@@ -2026,10 +2025,10 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 			HT_IRQ_LOW_RQEOI_EDGE |
 			((INT_DELIVERY_MODE != dest_LowestPrio) ?
 				HT_IRQ_LOW_MT_FIXED :
-				HT_IRQ_LOW_MT_ARBITRATED);
+				HT_IRQ_LOW_MT_ARBITRATED) |
+			HT_IRQ_LOW_IRQ_MASKED;
 
-		write_ht_irq_low(irq, low);
-		write_ht_irq_high(irq, high);
+		write_ht_irq_msg(irq, &msg);
 
 		set_irq_chip_and_handler_name(irq, &ht_irq_chip,
 					      handle_edge_irq, "edge");
-- 
cgit v1.2.1


From 64e72e41acae0dab733fb0d5d789b76d115210c0 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@basil.nowhere.org>
Date: Tue, 14 Nov 2006 16:56:33 +0100
Subject: Revert "[PATCH] MMCONFIG and new Intel motherboards"

This reverts 4c6e052adfe285ede5884e4e8c4d33af33932c13 commit.

Following Linus' i386 change: revert resource reservation
for mmcfg config now. Will be revisited in .20 hopefully.
---
 arch/x86_64/pci/mmconfig.c | 32 --------------------------------
 1 file changed, 32 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/pci/mmconfig.c b/arch/x86_64/pci/mmconfig.c
index e61093b34c26..f8b6b2800a62 100644
--- a/arch/x86_64/pci/mmconfig.c
+++ b/arch/x86_64/pci/mmconfig.c
@@ -163,37 +163,6 @@ static __init void unreachable_devices(void)
 	}
 }
 
-static __init void pci_mmcfg_insert_resources(void)
-{
-#define PCI_MMCFG_RESOURCE_NAME_LEN 19
-	int i;
-	struct resource *res;
-	char *names;
-	unsigned num_buses;
-
-	res = kcalloc(PCI_MMCFG_RESOURCE_NAME_LEN + sizeof(*res),
-			pci_mmcfg_config_num, GFP_KERNEL);
-
-	if (!res) {
-		printk(KERN_ERR "PCI: Unable to allocate MMCONFIG resources\n");
-		return;
-	}
-
-	names = (void *)&res[pci_mmcfg_config_num];
-	for (i = 0; i < pci_mmcfg_config_num; i++, res++) {
-		num_buses = pci_mmcfg_config[i].end_bus_number -
-		    pci_mmcfg_config[i].start_bus_number + 1;
-		res->name = names;
-		snprintf(names, PCI_MMCFG_RESOURCE_NAME_LEN, "PCI MMCONFIG %u",
-			pci_mmcfg_config[i].pci_segment_group_number);
-		res->start = pci_mmcfg_config[i].base_address;
-		res->end = res->start + (num_buses << 20) - 1;
-		res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
-		insert_resource(&iomem_resource, res);
-		names += PCI_MMCFG_RESOURCE_NAME_LEN;
-	}
-}
-
 void __init pci_mmcfg_init(int type)
 {
 	int i;
@@ -237,7 +206,6 @@ void __init pci_mmcfg_init(int type)
 	}
 
 	unreachable_devices();
-	pci_mmcfg_insert_resources();
 
 	raw_pci_ops = &pci_mmcfg;
 	pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
-- 
cgit v1.2.1


From 14f448e36192d6d2cd7dfd81cb044977b2f9dd9b Mon Sep 17 00:00:00 2001
From: Aaron Durbin <adurbin@google.com>
Date: Tue, 14 Nov 2006 16:57:45 +0100
Subject: [PATCH] x86-64: Fix partial page check to ensure unusable memory is
 not being marked usable.

Fix partial page check in e820_register_active_regions to ensure
partial pages are
not being marked as active in the memory pool.

Signed-off-by: Aaron Durbin <adurbin@google.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/e820.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c
index a75c829c2b02..855b5611318f 100644
--- a/arch/x86_64/kernel/e820.c
+++ b/arch/x86_64/kernel/e820.c
@@ -278,7 +278,7 @@ e820_register_active_regions(int nid, unsigned long start_pfn,
 								>> PAGE_SHIFT;
 
 		/* Skip map entries smaller than a page */
-		if (ei_startpfn > ei_endpfn)
+		if (ei_startpfn >= ei_endpfn)
 			continue;
 
 		/* Check if end_pfn_map should be updated */
-- 
cgit v1.2.1


From 14679eb3c50897889ba62f9a37e3bcd8a205b5e7 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Tue, 14 Nov 2006 16:57:46 +0100
Subject: [PATCH] x86-64: Fix PTRACE_[SG]ET_THREAD_AREA regression with ia32
 emulation.

ptrace(PTRACE_[SG]ET_THREAD_AREA) calls from ia32 code
should be passed onto the x86_64 implementation.

The default case in sys32_ptrace used to call to sys_ptrace(), but is
now EINVAL.  This patch fixes a regression caused by that changed.

Signed-off-by: Mike McCormack <mike@codeweavers.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/ia32/ptrace32.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/ia32/ptrace32.c b/arch/x86_64/ia32/ptrace32.c
index 3a7561d4703e..04566fe5de49 100644
--- a/arch/x86_64/ia32/ptrace32.c
+++ b/arch/x86_64/ia32/ptrace32.c
@@ -244,6 +244,8 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
 	case PTRACE_DETACH:
 	case PTRACE_SYSCALL:
 	case PTRACE_SETOPTIONS:
+	case PTRACE_SET_THREAD_AREA:
+	case PTRACE_GET_THREAD_AREA:
 		return sys_ptrace(request, pid, addr, data); 
 
 	default:
-- 
cgit v1.2.1


From 51d67a488b53a5cc8401460480c124eaec71e2d4 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Tue, 14 Nov 2006 16:57:46 +0100
Subject: [PATCH] x86-64: shorten the x86_64 boot setup GDT to what the comment
 says

Stephen Tweedie, Herbert Xu, and myself have been struggling with a very
nasty bug in Xen.  But it also pointed out a small bug in the x86_64
kernel boot setup.

The GDT limit being setup by the initial bzImage code when entering into
protected mode is way too big.  The comment by the code states that the
size of the GDT is 2048, but the actual size being set up is much bigger
(32768). This happens simply because of one extra '0'.

Instead of setting up a 0x800 size, 0x8000 is set up.  On bare metal this
is fine because the CPU wont load any segments unless  they are
explicitly used.  But unfortunately, this breaks Xen on vmx FV, since it
(for now) blindly loads all the segments into the VMCS if they are less
than the gdt limit. Since the real mode segments are around 0x3000, we are
getting junk into the VMCS and that later causes an exception.

Stephen Tweedie has written up a patch to fix the Xen side and will be
submitting that to those folks. But that doesn't excuse the GDT limit
being a magnitude too big.

AK: changed to compute true gdt size in assembler, fixed comment

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/boot/setup.S | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/boot/setup.S b/arch/x86_64/boot/setup.S
index c3bfd223ab49..770940cc0108 100644
--- a/arch/x86_64/boot/setup.S
+++ b/arch/x86_64/boot/setup.S
@@ -836,13 +836,12 @@ gdt:
 	.word	0x9200				# data read/write
 	.word	0x00CF				# granularity = 4096, 386
 						#  (+5th nibble of limit)
+gdt_end:
 idt_48:
 	.word	0				# idt limit = 0
 	.word	0, 0				# idt base = 0L
 gdt_48:
-	.word	0x8000				# gdt limit=2048,
-						#  256 GDT entries
-
+	.word	gdt_end-gdt-1			# gdt limit
 	.word	0, 0				# gdt base (filled in later)
 
 # Include video setup & detection code
-- 
cgit v1.2.1


From 5e58a02a8f6a7a1c9ae41f39286bcd3aea0d6f24 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Tue, 14 Nov 2006 16:57:46 +0100
Subject: [PATCH] x86-64: Handle reserve_bootmem_generic beyond end_pfn

This can happen on kexec kernels with some configurations, in particularly
on Unisys ES7000 systems.

Analysis by Amul Shah

Cc: Amul Shah <amul.shah@unisys.com>

Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/mm/init.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index 971dc1181e69..f1f977aafae1 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -655,9 +655,22 @@ void free_initrd_mem(unsigned long start, unsigned long end)
 
 void __init reserve_bootmem_generic(unsigned long phys, unsigned len) 
 { 
-	/* Should check here against the e820 map to avoid double free */ 
 #ifdef CONFIG_NUMA
 	int nid = phys_to_nid(phys);
+#endif
+	unsigned long pfn = phys >> PAGE_SHIFT;
+	if (pfn >= end_pfn) {
+		/* This can happen with kdump kernels when accessing firmware
+		   tables. */
+		if (pfn < end_pfn_map)
+			return;
+		printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
+				phys, len);
+		return;
+	}
+
+	/* Should check here against the e820 map to avoid double free */
+#ifdef CONFIG_NUMA
   	reserve_bootmem_node(NODE_DATA(nid), phys, len);
 #else       		
 	reserve_bootmem(phys, len);    
-- 
cgit v1.2.1


From 15803a43288da434d34d41c4ed650c3c1728d42c Mon Sep 17 00:00:00 2001
From: Magnus Damm <magnus@valinux.co.jp>
Date: Tue, 14 Nov 2006 16:57:46 +0100
Subject: [PATCH] x86-64: setup saved_max_pfn correctly (kdump)

x86_64: setup saved_max_pfn correctly

2.6.19-rc4 has broken CONFIG_CRASH_DUMP support on x86_64. It is impossible
to read out the kernel contents from /proc/vmcore because saved_max_pfn is set
to zero instead of the max_pfn value before the user map is setup.

This happens because saved_max_pfn is initialized at parse_early_param() time,
and at this time no active regions have been registered. save_max_pfn is setup
from e820_end_of_ram(), more exact find_max_pfn_with_active_regions() which
returns 0 because no regions exist.

This patch fixes this by registering before and removing after the call
to e820_end_of_ram().

Signed-off-by: Magnus Damm <magnus@valinux.co.jp>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/e820.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c
index 855b5611318f..6fe191c58084 100644
--- a/arch/x86_64/kernel/e820.c
+++ b/arch/x86_64/kernel/e820.c
@@ -594,7 +594,9 @@ static int __init parse_memmap_opt(char *p)
 		 * size before original memory map is
 		 * reset.
 		 */
+		e820_register_active_regions(0, 0, -1UL);
 		saved_max_pfn = e820_end_of_ram();
+		remove_all_active_ranges();
 #endif
 		end_pfn_map = 0;
 		e820.nr_map = 0;
-- 
cgit v1.2.1


From fa18f477d0987c011cce047a7c3cd1284f547a14 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Tue, 14 Nov 2006 16:57:46 +0100
Subject: [PATCH] x86: Add acpi_user_timer_override option for Asus boards

Timer overrides are normally disabled on Nvidia board because
they are commonly wrong, except on new ones with HPET support.
Unfortunately there are quite some Asus boards around that
don't have HPET, but need a timer override.

We don't know yet how to handle this transparently,
but at least add a command line option to force the timer override
and let them boot.

Cc: len.brown@intel.com

Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/early-quirks.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/early-quirks.c b/arch/x86_64/kernel/early-quirks.c
index 2b1245d86258..68273bff58cc 100644
--- a/arch/x86_64/kernel/early-quirks.c
+++ b/arch/x86_64/kernel/early-quirks.c
@@ -45,7 +45,13 @@ static void nvidia_bugs(void)
 	/*
 	 * All timer overrides on Nvidia are
 	 * wrong unless HPET is enabled.
+	 * Unfortunately that's not true on many Asus boards.
+	 * We don't know yet how to detect this automatically, but
+	 * at least allow a command line override.
 	 */
+	if (acpi_use_timer_override)
+		return;
+
 	nvidia_hpet_detected = 0;
 	acpi_table_parse(ACPI_HPET, nvidia_hpet_check);
 	if (nvidia_hpet_detected == 0) {
@@ -53,6 +59,8 @@ static void nvidia_bugs(void)
 		printk(KERN_INFO "Nvidia board "
 		       "detected. Ignoring ACPI "
 		       "timer override.\n");
+		printk(KERN_INFO "If you got timer trouble "
+			"try acpi_use_timer_override\n");
 	}
 #endif
 	/* RED-PEN skip them on mptables too? */
-- 
cgit v1.2.1


From 8c131af1db510793f87dc43edbc8950a35370df3 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Tue, 14 Nov 2006 16:57:46 +0100
Subject: [PATCH] x86-64: Fix vgetcpu when CONFIG_HOTPLUG_CPU is disabled

The vgetcpu per CPU initialization previously relied on CPU hotplug
events for all CPUs to initialize the per CPU state. That only
worked only on kernels with CONFIG_HOTPLUG_CPU enabled.  On the
others some CPUs didn't get their state initialized properly
and vgetcpu wouldn't work.

Change the initialization sequence to instead run in a normal
initcall (which runs after the normal CPU bootup) and initialize
all running CPUs there. Later hotplug CPUs are still handled
with an hotplug notifier.

This actually simplifies the code somewhat.

Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/smp.c      |  3 +--
 arch/x86_64/kernel/time.c     | 11 -----------
 arch/x86_64/kernel/vsyscall.c | 45 ++++++++++++++++++++++++-------------------
 3 files changed, 26 insertions(+), 33 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c
index 4f67697f5036..9f74c883568c 100644
--- a/arch/x86_64/kernel/smp.c
+++ b/arch/x86_64/kernel/smp.c
@@ -376,9 +376,8 @@ int smp_call_function_single (int cpu, void (*func) (void *info), void *info,
 	/* prevent preemption and reschedule on another processor */
 	int me = get_cpu();
 	if (cpu == me) {
-		WARN_ON(1);
 		put_cpu();
-		return -EBUSY;
+		return 0;
 	}
 	spin_lock_bh(&call_lock);
 	__smp_call_function_single(cpu, func, info, nonatomic, wait);
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index 88722f11ca13..e3ef544d2cfb 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -876,15 +876,6 @@ static struct irqaction irq0 = {
 	timer_interrupt, IRQF_DISABLED, CPU_MASK_NONE, "timer", NULL, NULL
 };
 
-static int __cpuinit
-time_cpu_notifier(struct notifier_block *nb, unsigned long action, void *hcpu)
-{
-	unsigned cpu = (unsigned long) hcpu;
-	if (action == CPU_ONLINE)
-		vsyscall_set_cpu(cpu);
-	return NOTIFY_DONE;
-}
-
 void __init time_init(void)
 {
 	if (nohpet)
@@ -925,8 +916,6 @@ void __init time_init(void)
 	vxtime.last_tsc = get_cycles_sync();
 	set_cyc2ns_scale(cpu_khz);
 	setup_irq(0, &irq0);
-	hotcpu_notifier(time_cpu_notifier, 0);
-	time_cpu_notifier(NULL, CPU_ONLINE, (void *)(long)smp_processor_id());
 
 #ifndef CONFIG_SMP
 	time_init_gtod();
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
index a98b460af6a1..a730bacecb0b 100644
--- a/arch/x86_64/kernel/vsyscall.c
+++ b/arch/x86_64/kernel/vsyscall.c
@@ -27,6 +27,9 @@
 #include <linux/jiffies.h>
 #include <linux/sysctl.h>
 #include <linux/getcpu.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+#include <linux/notifier.h>
 
 #include <asm/vsyscall.h>
 #include <asm/pgtable.h>
@@ -243,32 +246,17 @@ static ctl_table kernel_root_table2[] = {
 
 #endif
 
-static void __cpuinit write_rdtscp_cb(void *info)
-{
-	write_rdtscp_aux((unsigned long)info);
-}
-
-void __cpuinit vsyscall_set_cpu(int cpu)
+/* Assume __initcall executes before all user space. Hopefully kmod
+   doesn't violate that. We'll find out if it does. */
+static void __cpuinit vsyscall_set_cpu(int cpu)
 {
 	unsigned long *d;
 	unsigned long node = 0;
 #ifdef CONFIG_NUMA
 	node = cpu_to_node[cpu];
 #endif
-	if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) {
-		void *info = (void *)((node << 12) | cpu);
-		/* Can happen on preemptive kernel */
-		if (get_cpu() == cpu)
-			write_rdtscp_cb(info);
-#ifdef CONFIG_SMP
-		else {
-			/* the notifier is unfortunately not executed on the
-			   target CPU */
-			smp_call_function_single(cpu,write_rdtscp_cb,info,0,1);
-		}
-#endif
-		put_cpu();
-	}
+	if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP))
+		write_rdtscp_aux((node << 12) | cpu);
 
 	/* Store cpu number in limit so that it can be loaded quickly
 	   in user space in vgetcpu.
@@ -280,6 +268,21 @@ void __cpuinit vsyscall_set_cpu(int cpu)
 	*d |= (node >> 4) << 48;
 }
 
+static void __cpuinit cpu_vsyscall_init(void *arg)
+{
+	/* preemption should be already off */
+	vsyscall_set_cpu(raw_smp_processor_id());
+}
+
+static int __cpuinit
+cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
+{
+	long cpu = (long)arg;
+	if (action == CPU_ONLINE)
+		smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1);
+	return NOTIFY_DONE;
+}
+
 static void __init map_vsyscall(void)
 {
 	extern char __vsyscall_0;
@@ -299,6 +302,8 @@ static int __init vsyscall_init(void)
 #ifdef CONFIG_SYSCTL
 	register_sysctl_table(kernel_root_table2, 0);
 #endif
+	on_each_cpu(cpu_vsyscall_init, NULL, 0, 1);
+	hotcpu_notifier(cpu_vsyscall_notifier, 0);
 	return 0;
 }
 
-- 
cgit v1.2.1


From 9446868b5383eb87f76b2d4389dea4bb968a6657 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Tue, 14 Nov 2006 16:57:46 +0100
Subject: [PATCH] x86-64: Fix race in exit_idle

When another interrupt happens in exit_idle the exit idle notifier
could be called an incorrect number of times.

Add a test_and_clear_bit_pda and use it handle the bit
atomically against interrupts to avoid this.

Pointed out by Stephane Eranian

Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/process.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index 49f7fac6229e..f6226055d53d 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -88,9 +88,8 @@ void enter_idle(void)
 
 static void __exit_idle(void)
 {
-	if (read_pda(isidle) == 0)
+	if (test_and_clear_bit_pda(0, isidle) == 0)
 		return;
-	write_pda(isidle, 0);
 	atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
 }
 
-- 
cgit v1.2.1


From 45c99533252ef2297f37c5fdd672a3e0eb566870 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 14 Nov 2006 10:52:12 -0700
Subject: [PATCH] Use delayed disable mode of ioapic edge triggered interrupts

Komuro reports that ISA interrupts do not work after a disable_irq(),
causing some PCMCIA drivers to not work, with messages like

	eth0: Asix AX88190: io 0x300, irq 3, hw_addr xx:xx:xx:xx:xx:xx
	eth0: found link beat
	eth0: autonegotiation complete: 100baseT-FD selected
	eth0: interrupt(s) dropped!
	eth0: interrupt(s) dropped!
	eth0: interrupt(s) dropped!
	...

Linus Torvalds <torvalds@osdl.org> said:

  "Now, edge-triggered interrupts are a _lot_ harder to mask, because the
   Intel APIC is an unbelievable piece of sh*t, and has the edge-detect logic
   _before_ the mask logic, so if a edge happens _while_ the device is
   masked, you'll never ever see the edge ever again (unmasking will not
   cause a new edge, so you simply lost the interrupt).

   So when you "mask" an edge-triggered IRQ, you can't really mask it at all,
   because if you did that, you'd lose it forever if the IRQ comes in while
   you masked it. Instead, we're supposed to leave it active, and set a flag,
   and IF the IRQ comes in, we just remember it, and mask it at that point
   instead, and then on unmasking, we have to replay it by sending a
   self-IPI."

This trivial patch solves the problem.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Cc: Ingo Molnar <mingo@redhat.com>
Acked-by: Komuro <komurojun-mbn@nifty.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/x86_64/kernel/io_apic.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index 41bfc49301ad..14654e682411 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -790,9 +790,11 @@ static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
 			trigger == IOAPIC_LEVEL)
 		set_irq_chip_and_handler_name(irq, &ioapic_chip,
 					      handle_fasteoi_irq, "fasteoi");
-	else
+	else {
+		irq_desc[irq].status |= IRQ_DELAYED_DISABLE;
 		set_irq_chip_and_handler_name(irq, &ioapic_chip,
 					      handle_edge_irq, "edge");
+	}
 }
 
 static void __init setup_IO_APIC_irqs(void)
-- 
cgit v1.2.1


From 6b3d1a95ba714bfb1cc81362f7f3e01b7654b4f3 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Thu, 16 Nov 2006 10:22:03 +0100
Subject: [PATCH] x86-64: Fix vsyscall.c compilation on UP

Broken by earlier patch by me.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/x86_64/kernel/vsyscall.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
index a730bacecb0b..92546c1526f1 100644
--- a/arch/x86_64/kernel/vsyscall.c
+++ b/arch/x86_64/kernel/vsyscall.c
@@ -274,6 +274,7 @@ static void __cpuinit cpu_vsyscall_init(void *arg)
 	vsyscall_set_cpu(raw_smp_processor_id());
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
 static int __cpuinit
 cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
 {
@@ -282,6 +283,7 @@ cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
 		smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1);
 	return NOTIFY_DONE;
 }
+#endif
 
 static void __init map_vsyscall(void)
 {
-- 
cgit v1.2.1


From ccf9ff524ccb195d648ecb0b168340560b42532c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 16 Nov 2006 11:49:16 +0100
Subject: [PATCH] x86_64: fix CONFIG_CC_STACKPROTECTOR build bug

on x86_64, the CONFIG_CC_STACKPROTECTOR build fails if used in a
distcc setup that has "CC" defined to "distcc gcc":

 gcc: gcc: linker input file unused because linking not done
 gcc: gcc: linker input file unused because linking not done
 gcc: gcc: linker input file unused because linking not done

this is because the gcc-x86_64-has-stack-protector.sh script
has a 2-parameters assumption. Fix this by passing $(CC) as
a single parameter.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Please-Use-Me-More: make randconfig
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/x86_64/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile
index 13972148058d..6e38d4daeed7 100644
--- a/arch/x86_64/Makefile
+++ b/arch/x86_64/Makefile
@@ -66,8 +66,8 @@ AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_endproc,-DCONFIG_AS_CFI=1,)
 cflags-y += $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1,)
 AFLAGS += $(call as-instr,.cfi_startproc\n.cfi_signal_frame\n.cfi_endproc,-DCONFIG_AS_CFI_SIGNAL_FRAME=1,)
 
-cflags-$(CONFIG_CC_STACKPROTECTOR) += $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh $(CC) -fstack-protector )
-cflags-$(CONFIG_CC_STACKPROTECTOR_ALL) += $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh $(CC) -fstack-protector-all )
+cflags-$(CONFIG_CC_STACKPROTECTOR) += $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh "$(CC)" -fstack-protector )
+cflags-$(CONFIG_CC_STACKPROTECTOR_ALL) += $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh "$(CC)" -fstack-protector-all )
 
 CFLAGS += $(cflags-y)
 CFLAGS_KERNEL += $(cflags-kernel-y)
-- 
cgit v1.2.1


From 0796bdb7e9e4a48b401f4fba1ee5dc79a45528ef Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 17 Nov 2006 05:57:49 +0100
Subject: [PATCH] x86_64: stack unwinder crash fix

the new dwarf2 unwinder crashes while trying to dump the stack:

  Leftover inexact backtrace:

  Unable to handle kernel paging request at ffffffff82800000 RIP:
   [<ffffffff8026cf26>] dump_trace+0x35b/0x3d2
  PGD 203027 PUD 205027 PMD 0
  Oops: 0000 [2] PREEMPT SMP
  CPU 0
  Modules linked in:
  Pid: 30, comm: khelper Not tainted 2.6.19-rc6-rt1 #11
  RIP: 0010:[<ffffffff8026cf26>]  [<ffffffff8026cf26>] dump_trace+0x35b/0x3d2
  RSP: 0000:ffff81003fb9d848  EFLAGS: 00010006
  RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
  RDX: 0000000000000000 RSI: ffffffff805b3520 RDI: 0000000000000000
  RBP: ffffffff827ffff9 R08: ffffffff80aad000 R09: 0000000000000005
  R10: ffffffff80aae000 R11: ffffffff8037961b R12: ffff81003fb9d858
  R13: 0000000000000000 R14: ffffffff80598460 R15: ffffffff80ab1fc0
  FS:  0000000000000000(0000) GS:ffffffff806c4200(0000) knlGS:0000000000000000
  CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b
  CR2: ffffffff82800000 CR3: 0000000000201000 CR4: 00000000000006e0

this crash happened because it did not sanitize the dwarf2 data it
got, and got an unaligned stack pointer - which happily walked past
the process stack (and eventually reached the end of kernel memory
and pagefaulted there) due to this naive iteration condition:

        HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0);

note that i386 is alot more conservative when it comes to trusting
stack pointers:

  static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
  {
         return  p > (void *)tinfo &&
                 p < (void *)tinfo + THREAD_SIZE - 3;
  }

but the x86_64 code did not take this bit of i386 code.

The fix is to align the stack pointer.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Andi Kleen <ak@suse.de>
Cc: Jan Beulich <jbeulich@novell.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/x86_64/kernel/traps.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c
index 7819022a8db5..a153d0a01b72 100644
--- a/arch/x86_64/kernel/traps.c
+++ b/arch/x86_64/kernel/traps.c
@@ -290,6 +290,12 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * s
 		if (tsk && tsk != current)
 			stack = (unsigned long *)tsk->thread.rsp;
 	}
+	/*
+	 * Align the stack pointer on word boundary, later loops
+	 * rely on that (and corruption / debug info bugs can cause
+	 * unaligned values here):
+	 */
+	stack = (unsigned long *)((unsigned long)stack & ~(sizeof(long)-1));
 
 	/*
 	 * Print function call entries within a stack. 'cond' is the
-- 
cgit v1.2.1


From dc1829a4c378d793fb3b95d56135d89a0d7ff72a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 17 Nov 2006 14:26:18 +0100
Subject: [PATCH] i386/x86_64: ACPI cpu_idle_wait() fix

The scheduler on Andreas Friedrich's hyperthreading system stopped
working properly: the scheduler would never move tasks to another CPU!
The lask known working kernel was 2.6.8.

After a couple of attempts to corner the bug, the following smoking gun
was found:

  BIOS reported wrong ACPI idfor the processor
  CPU#1: set_cpus_allowed(), swapper:1, 3 -> 2
   [<c0103bbe>] show_trace_log_lvl+0x34/0x4a
   [<c0103ceb>] show_trace+0x2c/0x2e
   [<c01045f8>] dump_stack+0x2b/0x2d
   [<c0116a77>] set_cpus_allowed+0x52/0xec
   [<c0101d86>] cpu_idle_wait+0x2e/0x100
   [<c0259c57>] acpi_processor_power_exit+0x45/0x58
   [<c0259752>] acpi_processor_remove+0x46/0xea
   [<c025c6fb>] acpi_start_single_object+0x47/0x54
   [<c025cee5>] acpi_bus_register_driver+0xa4/0xd3
   [<c04ab2d7>] acpi_processor_init+0x57/0x77
   [<c01004d7>] init+0x146/0x2fd
   [<c0103a87>] kernel_thread_helper+0x7/0x10

a quick look at cpu_idle_wait() shows how broken that code is
on i386: it changes the init task's affinity map but never
restores it ...

and because all userspace tasks get forked by init, they all
inherited that single-CPU affinity mask. x86_64 cloned this
bug too.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Andreas Friedrich <andreas.friedrich@fujitsu-siemens.com>
Cc: Wolfgang Erig <Wolfgang.Erig@fujitsu-siemens.com>
Cc: Andrew Morton <akpm@osdl.org>
Cc: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/x86_64/kernel/process.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index f6226055d53d..7451a4c43c16 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -144,7 +144,7 @@ static void poll_idle (void)
 void cpu_idle_wait(void)
 {
 	unsigned int cpu, this_cpu = get_cpu();
-	cpumask_t map;
+	cpumask_t map, tmp = current->cpus_allowed;
 
 	set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
 	put_cpu();
@@ -167,6 +167,8 @@ void cpu_idle_wait(void)
 		}
 		cpus_and(map, map, cpu_online_map);
 	} while (!cpus_empty(map));
+
+	set_cpus_allowed(current, tmp);
 }
 EXPORT_SYMBOL_GPL(cpu_idle_wait);
 
-- 
cgit v1.2.1


From 8243229f0940ab4e9f501879d3ffb7476b02ee6a Mon Sep 17 00:00:00 2001
From: Yasunori Goto <y-goto@jp.fujitsu.com>
Date: Sat, 18 Nov 2006 22:19:40 -0800
Subject: [PATCH] x86_64: fix memory hotplug build with NUMA=n

This is to fix compile error of x86-64 memory hotplug without any NUMA
option.

  CC      arch/x86_64/mm/init.o
arch/x86_64/mm/init.c:501: error: redefinition of 'memory_add_physaddr_to_nid'
include/linux/memory_hotplug.h:71: error: previous definition of 'memory_add_phys
addr_to_nid' was here
arch/x86_64/mm/init.c:509: error: redefinition of 'memory_add_physaddr_to_nid'
arch/x86_64/mm/init.c:501: error: previous definition of 'memory_add_physaddr_to_
nid' was here

I confirmed compile completion with !NUMA, (NUMA & !ACPI_NUMA),
or (NUMA & ACPI_NUMA).

Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Acked-by: Andi Kleen <ak@suse.de>
Cc: "Randy.Dunlap" <rdunlap@xenotime.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/x86_64/mm/init.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index f1f977aafae1..4c0c00ef3ca7 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -496,7 +496,7 @@ int remove_memory(u64 start, u64 size)
 }
 EXPORT_SYMBOL_GPL(remove_memory);
 
-#ifndef CONFIG_ACPI_NUMA
+#if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA)
 int memory_add_physaddr_to_nid(u64 start)
 {
 	return 0;
@@ -504,13 +504,6 @@ int memory_add_physaddr_to_nid(u64 start)
 EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
 #endif
 
-#ifndef CONFIG_ACPI_NUMA
-int memory_add_physaddr_to_nid(u64 start)
-{
-	return 0;
-}
-#endif
-
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
 #ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
-- 
cgit v1.2.1


From 3af9815328bba76e8d11d71d6dbbd6f38beafe58 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@in.ibm.com>
Date: Mon, 20 Nov 2006 11:29:09 -0500
Subject: [PATCH] x86_64: Align data segment to PAGE_SIZE boundary

o Explicitly align data segment to PAGE_SIZE boundary otherwise depending on
  config options and tool chain it might be placed on a non PAGE_SIZE aligned
  boundary and vmlinux loaders like kexec fail when they encounter a
  PT_LOAD type segment which is not aligned to PAGE_SIZE boundary.

Signed-off-by: Vivek Goyal <vgoyal@in.ibm.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/vmlinux.lds.S | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86_64')

diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
index edb24aa714b4..d9534e750d4f 100644
--- a/arch/x86_64/kernel/vmlinux.lds.S
+++ b/arch/x86_64/kernel/vmlinux.lds.S
@@ -60,6 +60,7 @@ SECTIONS
   }
 #endif
 
+  . = ALIGN(PAGE_SIZE);        /* Align data segment to page size boundary */
 				/* Data */
   .data : AT(ADDR(.data) - LOAD_OFFSET) {
 	*(.data)
-- 
cgit v1.2.1