diff options
Diffstat (limited to 'arch/x86_64')
-rw-r--r-- | arch/x86_64/kernel/e820.c | 6 | ||||
-rw-r--r-- | arch/x86_64/kernel/io_apic.c | 5 | ||||
-rw-r--r-- | arch/x86_64/kernel/mpparse.c | 12 | ||||
-rw-r--r-- | arch/x86_64/kernel/pci-gart.c | 8 | ||||
-rw-r--r-- | arch/x86_64/kernel/pci-nommu.c | 7 | ||||
-rw-r--r-- | arch/x86_64/kernel/setup.c | 30 | ||||
-rw-r--r-- | arch/x86_64/kernel/traps.c | 33 | ||||
-rw-r--r-- | arch/x86_64/mm/srat.c | 15 |
8 files changed, 95 insertions, 21 deletions
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c index 62776c07cff1..222b5b46d2b2 100644 --- a/arch/x86_64/kernel/e820.c +++ b/arch/x86_64/kernel/e820.c @@ -76,6 +76,12 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size) *addrp = __pa_symbol(&_end); return 1; } + + if (last >= ebda_addr && addr < ebda_addr + ebda_size) { + *addrp = ebda_addr + ebda_size; + return 1; + } + /* XXX ramdisk image here? */ return 0; } diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index 77b4c608cca0..0de3ea938830 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -1777,6 +1777,8 @@ static inline void unlock_ExtINT_logic(void) spin_unlock_irqrestore(&ioapic_lock, flags); } +int timer_uses_ioapic_pin_0; + /* * This code may look a bit paranoid, but it's supposed to cooperate with * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ @@ -1814,6 +1816,9 @@ static inline void check_timer(void) pin2 = ioapic_i8259.pin; apic2 = ioapic_i8259.apic; + if (pin1 == 0) + timer_uses_ioapic_pin_0 = 1; + apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", vector, apic1, pin1, apic2, pin2); diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c index b17cf3eba359..083da7e606b1 100644 --- a/arch/x86_64/kernel/mpparse.c +++ b/arch/x86_64/kernel/mpparse.c @@ -968,7 +968,17 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) */ int irq = gsi; if (gsi < MAX_GSI_NUM) { - if (gsi > 15) + /* + * Retain the VIA chipset work-around (gsi > 15), but + * avoid a problem where the 8254 timer (IRQ0) is setup + * via an override (so it's not on pin 0 of the ioapic), + * and at the same time, the pin 0 interrupt is a PCI + * type. The gsi > 15 test could cause these two pins + * to be shared as IRQ0, and they are not shareable. + * So test for this condition, and if necessary, avoid + * the pin collision. + */ + if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0)) gsi = pci_irq++; /* * Don't assign IRQ used by ACPI SCI diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c index 9d3d76c85ae7..2480d3f08a47 100644 --- a/arch/x86_64/kernel/pci-gart.c +++ b/arch/x86_64/kernel/pci-gart.c @@ -639,6 +639,14 @@ static int __init pci_iommu_init(void) return -1; } + i = 0; + for_all_nb(dev) + i++; + if (i > MAX_NB) { + printk(KERN_ERR "PCI-GART: Too many northbridges (%ld). Disabled\n", i); + return -1; + } + printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); aper_size = info.aper_size * 1024 * 1024; iommu_size = check_iommu_size(info.aper_base, aper_size); diff --git a/arch/x86_64/kernel/pci-nommu.c b/arch/x86_64/kernel/pci-nommu.c index 44adcc2d5e5b..1f6ecc62061d 100644 --- a/arch/x86_64/kernel/pci-nommu.c +++ b/arch/x86_64/kernel/pci-nommu.c @@ -12,9 +12,10 @@ static int check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) { if (hwdev && bus + size > *hwdev->dma_mask) { - printk(KERN_ERR - "nommu_%s: overflow %Lx+%lu of device mask %Lx\n", - name, (long long)bus, size, (long long)*hwdev->dma_mask); + if (*hwdev->dma_mask >= 0xffffffffULL) + printk(KERN_ERR + "nommu_%s: overflow %Lx+%lu of device mask %Lx\n", + name, (long long)bus, size, (long long)*hwdev->dma_mask); return 0; } return 1; diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index ebc3c33b1c6c..f0870bef24d1 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -571,17 +571,28 @@ static inline void copy_edd(void) #endif #define EBDA_ADDR_POINTER 0x40E -static void __init reserve_ebda_region(void) + +unsigned __initdata ebda_addr; +unsigned __initdata ebda_size; + +static void discover_ebda(void) { - unsigned int addr; - /** + /* * there is a real-mode segmented pointer pointing to the * 4K EBDA area at 0x40E */ - addr = *(unsigned short *)phys_to_virt(EBDA_ADDR_POINTER); - addr <<= 4; - if (addr) - reserve_bootmem_generic(addr, PAGE_SIZE); + ebda_addr = *(unsigned short *)EBDA_ADDR_POINTER; + ebda_addr <<= 4; + + ebda_size = *(unsigned short *)(unsigned long)ebda_addr; + + /* Round EBDA up to pages */ + if (ebda_size == 0) + ebda_size = 1; + ebda_size <<= 10; + ebda_size = round_up(ebda_size + (ebda_addr & ~PAGE_MASK), PAGE_SIZE); + if (ebda_size > 64*1024) + ebda_size = 64*1024; } void __init setup_arch(char **cmdline_p) @@ -627,6 +638,8 @@ void __init setup_arch(char **cmdline_p) check_efer(); + discover_ebda(); + init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT)); dmi_scan_machine(); @@ -669,7 +682,8 @@ void __init setup_arch(char **cmdline_p) reserve_bootmem_generic(0, PAGE_SIZE); /* reserve ebda region */ - reserve_ebda_region(); + if (ebda_addr) + reserve_bootmem_generic(ebda_addr, ebda_size); #ifdef CONFIG_SMP /* diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 2700b1375c1f..cea335e8746c 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -102,6 +102,8 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) { if (regs->eflags & X86_EFLAGS_IF) local_irq_disable(); + /* Make sure to not schedule here because we could be running + on an exception stack. */ preempt_enable_no_resched(); } @@ -385,6 +387,7 @@ void out_of_line_bug(void) static DEFINE_SPINLOCK(die_lock); static int die_owner = -1; +static unsigned int die_nest_count; unsigned __kprobes long oops_begin(void) { @@ -399,6 +402,7 @@ unsigned __kprobes long oops_begin(void) else spin_lock(&die_lock); } + die_nest_count++; die_owner = cpu; console_verbose(); bust_spinlocks(1); @@ -409,7 +413,13 @@ void __kprobes oops_end(unsigned long flags) { die_owner = -1; bust_spinlocks(0); - spin_unlock_irqrestore(&die_lock, flags); + die_nest_count--; + if (die_nest_count) + /* We still own the lock */ + local_irq_restore(flags); + else + /* Nest count reaches zero, release the lock. */ + spin_unlock_irqrestore(&die_lock, flags); if (panic_on_oops) panic("Oops"); } @@ -464,6 +474,8 @@ void __kprobes die_nmi(char *str, struct pt_regs *regs) panic("nmi watchdog"); printk("console shuts up ...\n"); oops_end(flags); + nmi_exit(); + local_irq_enable(); do_exit(SIGSEGV); } @@ -473,8 +485,6 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, { struct task_struct *tsk = current; - conditional_sti(regs); - tsk->thread.error_code = error_code; tsk->thread.trap_no = trapnr; @@ -511,6 +521,7 @@ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ == NOTIFY_STOP) \ return; \ + conditional_sti(regs); \ do_trap(trapnr, signr, str, regs, error_code, NULL); \ } @@ -525,6 +536,7 @@ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ == NOTIFY_STOP) \ return; \ + conditional_sti(regs); \ do_trap(trapnr, signr, str, regs, error_code, &info); \ } @@ -538,7 +550,17 @@ DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) DO_ERROR(18, SIGSEGV, "reserved", reserved) -DO_ERROR(12, SIGBUS, "stack segment", stack_segment) + +/* Runs on IST stack */ +asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code) +{ + if (notify_die(DIE_TRAP, "stack segment", regs, error_code, + 12, SIGBUS) == NOTIFY_STOP) + return; + preempt_conditional_sti(regs); + do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL); + preempt_conditional_cli(regs); +} asmlinkage void do_double_fault(struct pt_regs * regs, long error_code) { @@ -672,8 +694,9 @@ asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code) if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) { return; } + preempt_conditional_sti(regs); do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); - return; + preempt_conditional_cli(regs); } /* Help handler running on IST stack to switch back to user stack diff --git a/arch/x86_64/mm/srat.c b/arch/x86_64/mm/srat.c index 15ae9fcd65a7..e1513532df29 100644 --- a/arch/x86_64/mm/srat.c +++ b/arch/x86_64/mm/srat.c @@ -34,7 +34,10 @@ static nodemask_t nodes_found __initdata; static struct bootnode nodes[MAX_NUMNODES] __initdata; static struct bootnode nodes_add[MAX_NUMNODES] __initdata; static int found_add_area __initdata; -int hotadd_percent __initdata = 10; +int hotadd_percent __initdata = 0; +#ifndef RESERVE_HOTADD +#define hotadd_percent 0 /* Ignore all settings */ +#endif static u8 pxm2node[256] = { [0 ... 255] = 0xff }; /* Too small nodes confuse the VM badly. Usually they result @@ -103,6 +106,7 @@ static __init void bad_srat(void) int i; printk(KERN_ERR "SRAT: SRAT not used.\n"); acpi_numa = -1; + found_add_area = 0; for (i = 0; i < MAX_LOCAL_APIC; i++) apicid_to_node[i] = NUMA_NO_NODE; for (i = 0; i < MAX_NUMNODES; i++) @@ -154,7 +158,8 @@ acpi_numa_processor_affinity_init(struct acpi_table_processor_affinity *pa) int pxm, node; if (srat_disabled()) return; - if (pa->header.length != sizeof(struct acpi_table_processor_affinity)) { bad_srat(); + if (pa->header.length != sizeof(struct acpi_table_processor_affinity)) { + bad_srat(); return; } if (pa->flags.enabled == 0) @@ -191,15 +196,17 @@ static int hotadd_enough_memory(struct bootnode *nd) allowed = (end_pfn - e820_hole_size(0, end_pfn)) * PAGE_SIZE; allowed = (allowed / 100) * hotadd_percent; if (allocated + mem > allowed) { + unsigned long range; /* Give them at least part of their hotadd memory upto hotadd_percent It would be better to spread the limit out over multiple hotplug areas, but that is too complicated right now */ if (allocated >= allowed) return 0; - pages = (allowed - allocated + mem) / sizeof(struct page); + range = allowed - allocated; + pages = (range / PAGE_SIZE); mem = pages * sizeof(struct page); - nd->end = nd->start + pages*PAGE_SIZE; + nd->end = nd->start + range; } /* Not completely fool proof, but a good sanity check */ addr = find_e820_area(last_area_end, end_pfn<<PAGE_SHIFT, mem); |