From 0e83815be719d3391bf5ea24b7fe696c07dbd417 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Mon, 27 Jul 2009 19:43:52 +0200 Subject: x86: fix section mismatch for i386 init code Startup code for i386 in arch/x86/kernel/head_32.S is using the reference variable initial_code that is located in the .cpuinit.data section. If CONFIG_HOTPLUG_CPU is enabled, startup code is not in an init section and can be called later too. In this case the reference initial_code must be kept too. This patch fixes this. See below for the section mismatch warning. WARNING: vmlinux.o(.cpuinit.data+0x0): Section mismatch in reference from the variable initial_code to the function .init.text:i386_start_kernel() The variable __cpuinitdata initial_code references a function __init i386_start_kernel(). If i386_start_kernel is only used by initial_code then annotate i386_start_kernel with a matching annotation. Signed-off-by: Robert Richter LKML-Reference: <1248716632-26844-1-git-send-email-robert.richter@amd.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/head_32.S | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 8663afb56535..0d98a01cbdb2 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -602,7 +602,11 @@ ignore_int: #endif iret -.section .cpuinit.data,"wa" +#ifndef CONFIG_HOTPLUG_CPU + __CPUINITDATA +#else + __REFDATA +#endif .align 4 ENTRY(initial_code) .long i386_start_kernel -- cgit v1.2.3 From 6c6c51e4cc11a5456fb1172008f7c69d955af9f6 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 3 Aug 2009 22:47:32 +1000 Subject: x86: Add quirk to make Apple MacBook5,2 use reboot=pci The latest Apple MacBook (MacBook5,2) doesn't reboot successfully under Linux; neither the EFI reboot method nor the default method using the keyboard controller works (the system just hangs and doesn't reset). However, the method using the "PCI reset register" at 0xcf9 does work. This adds a quirk to detect this machine via DMI and force the reboot_type to BOOT_CF9. With this it reboots successfully without requiring a command-line option. Note that the EFI code forces reboot_type to BOOT_EFI when the machine is booted via EFI, but this overrides that since the core_initcall runs after the EFI initialization code. Signed-off-by: Paul Mackerras LKML-Reference: <19062.56420.501516.316181@cargo.ozlabs.ibm.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/reboot.c | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 508e982dd072..834c9da8bf9d 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -17,7 +18,6 @@ #include #ifdef CONFIG_X86_32 -# include # include # include #else @@ -404,6 +404,38 @@ EXPORT_SYMBOL(machine_real_restart); #endif /* CONFIG_X86_32 */ +/* + * Apple MacBook5,2 (2009 MacBook) needs reboot=p + */ +static int __init set_pci_reboot(const struct dmi_system_id *d) +{ + if (reboot_type != BOOT_CF9) { + reboot_type = BOOT_CF9; + printk(KERN_INFO "%s series board detected. " + "Selecting PCI-method for reboots.\n", d->ident); + } + return 0; +} + +static struct dmi_system_id __initdata pci_reboot_dmi_table[] = { + { /* Handle problems with rebooting on Apple MacBook5,2 */ + .callback = set_pci_reboot, + .ident = "Apple MacBook", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5,2"), + }, + }, + { } +}; + +static int __init pci_reboot_init(void) +{ + dmi_check_system(pci_reboot_dmi_table); + return 0; +} +core_initcall(pci_reboot_init); + static inline void kb_wait(void) { int i; -- cgit v1.2.3 From 6a7bbd57ed50bb62c9a81ae5f2e202ca689e5964 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 3 Aug 2009 22:38:10 +1000 Subject: x86: Make 64-bit efi_ioremap use ioremap on MMIO regions Booting current 64-bit x86 kernels on the latest Apple MacBook (MacBook5,2) via EFI gives the following warning: [ 0.182209] ------------[ cut here ]------------ [ 0.182222] WARNING: at arch/x86/mm/pageattr.c:581 __cpa_process_fault+0x44/0xa0() [ 0.182227] Hardware name: MacBook5,2 [ 0.182231] CPA: called for zero pte. vaddr = ffff8800ffe00000 cpa->vaddr = ffff8800ffe00000 [ 0.182236] Modules linked in: [ 0.182242] Pid: 0, comm: swapper Not tainted 2.6.31-rc4 #6 [ 0.182246] Call Trace: [ 0.182254] [] ? __cpa_process_fault+0x44/0xa0 [ 0.182261] [] warn_slowpath_common+0x78/0xd0 [ 0.182266] [] warn_slowpath_fmt+0x64/0x70 [ 0.182272] [] ? update_page_count+0x3c/0x50 [ 0.182280] [] ? phys_pmd_init+0x140/0x22e [ 0.182286] [] __cpa_process_fault+0x44/0xa0 [ 0.182292] [] __change_page_attr_set_clr+0x5f0/0xb40 [ 0.182301] [] ? vm_unmap_aliases+0x175/0x190 [ 0.182307] [] change_page_attr_set_clr+0xfe/0x3d0 [ 0.182314] [] _set_memory_uc+0x2a/0x30 [ 0.182319] [] set_memory_uc+0x7b/0xb0 [ 0.182327] [] efi_enter_virtual_mode+0x2ad/0x2c9 [ 0.182334] [] start_kernel+0x2db/0x3f4 [ 0.182340] [] x86_64_start_reservations+0x99/0xb9 [ 0.182345] [] x86_64_start_kernel+0xe0/0xf2 [ 0.182357] ---[ end trace 4eaa2a86a8e2da22 ]--- [ 0.182982] init_memory_mapping: 00000000ffffc000-0000000100000000 [ 0.182993] 00ffffc000 - 0100000000 page 4k This happens because the 64-bit version of efi_ioremap calls init_memory_mapping for all addresses, regardless of whether they are RAM or MMIO. The EFI tables on this machine ask for runtime access to some MMIO regions: [ 0.000000] EFI: mem195: type=11, attr=0x8000000000000000, range=[0x0000000093400000-0x0000000093401000) (0MB) [ 0.000000] EFI: mem196: type=11, attr=0x8000000000000000, range=[0x00000000ffc00000-0x00000000ffc40000) (0MB) [ 0.000000] EFI: mem197: type=11, attr=0x8000000000000000, range=[0x00000000ffc40000-0x00000000ffc80000) (0MB) [ 0.000000] EFI: mem198: type=11, attr=0x8000000000000000, range=[0x00000000ffc80000-0x00000000ffca4000) (0MB) [ 0.000000] EFI: mem199: type=11, attr=0x8000000000000000, range=[0x00000000ffca4000-0x00000000ffcb4000) (0MB) [ 0.000000] EFI: mem200: type=11, attr=0x8000000000000000, range=[0x00000000ffcb4000-0x00000000ffffc000) (3MB) [ 0.000000] EFI: mem201: type=11, attr=0x8000000000000000, range=[0x00000000ffffc000-0x0000000100000000) (0MB) This arranges to pass the EFI memory type through to efi_ioremap, and makes efi_ioremap use ioremap rather than init_memory_mapping if the type is EFI_MEMORY_MAPPED_IO. With this, the above warning goes away. Signed-off-by: Paul Mackerras LKML-Reference: <19062.55858.533494.471153@cargo.ozlabs.ibm.com> Cc: Huang Ying Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/efi.h | 5 +++-- arch/x86/kernel/efi.c | 2 +- arch/x86/kernel/efi_64.c | 6 +++++- 3 files changed, 9 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index edc90f23e708..8406ed7f9926 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -33,7 +33,7 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...); #define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \ efi_call_virt(f, a1, a2, a3, a4, a5, a6) -#define efi_ioremap(addr, size) ioremap_cache(addr, size) +#define efi_ioremap(addr, size, type) ioremap_cache(addr, size) #else /* !CONFIG_X86_32 */ @@ -84,7 +84,8 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3, efi_call6((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6)) -extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size); +extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, + u32 type); #endif /* CONFIG_X86_32 */ diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 96f7ac0bbf01..19ccf6d0dccf 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c @@ -512,7 +512,7 @@ void __init efi_enter_virtual_mode(void) && end_pfn <= max_pfn_mapped)) va = __va(md->phys_addr); else - va = efi_ioremap(md->phys_addr, size); + va = efi_ioremap(md->phys_addr, size, md->type); md->virt_addr = (u64) (unsigned long) va; diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c index 22c3b7828c50..ac0621a7ac3d 100644 --- a/arch/x86/kernel/efi_64.c +++ b/arch/x86/kernel/efi_64.c @@ -98,10 +98,14 @@ void __init efi_call_phys_epilog(void) early_runtime_code_mapping_set_exec(0); } -void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size) +void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, + u32 type) { unsigned long last_map_pfn; + if (type == EFI_MEMORY_MAPPED_IO) + return ioremap(phys_addr, size); + last_map_pfn = init_memory_mapping(phys_addr, phys_addr + size); if ((last_map_pfn << PAGE_SHIFT) < phys_addr + size) return NULL; -- cgit v1.2.3 From d2ba8b211bb8abc29aa627dbd4dce08cfbc8082b Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 3 Aug 2009 14:44:54 -0700 Subject: x86: Fix assert syntax in vmlinux.lds.S Older versions of binutils did not accept the naked "ASSERT" syntax; it is considered an expression whose value needs to be assigned to something. Reported-tested-and-fixed-by: Jean Delvare Signed-off-by: H. Peter Anvin --- arch/x86/kernel/vmlinux.lds.S | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 59f31d2dd435..78d185d797de 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -393,8 +393,8 @@ SECTIONS #ifdef CONFIG_X86_32 -ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), - "kernel image bigger than KERNEL_IMAGE_SIZE") +. = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), + "kernel image bigger than KERNEL_IMAGE_SIZE"); #else /* * Per-cpu symbols which need to be offset from __per_cpu_load @@ -407,12 +407,12 @@ INIT_PER_CPU(irq_stack_union); /* * Build-time check on the image size: */ -ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), - "kernel image bigger than KERNEL_IMAGE_SIZE") +. = ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), + "kernel image bigger than KERNEL_IMAGE_SIZE"); #ifdef CONFIG_SMP -ASSERT((per_cpu__irq_stack_union == 0), - "irq_stack_union is not at start of per-cpu area"); +. = ASSERT((per_cpu__irq_stack_union == 0), + "irq_stack_union is not at start of per-cpu area"); #endif #endif /* CONFIG_X86_32 */ @@ -420,7 +420,7 @@ ASSERT((per_cpu__irq_stack_union == 0), #ifdef CONFIG_KEXEC #include -ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, - "kexec control code size is too big") +. = ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, + "kexec control code size is too big"); #endif -- cgit v1.2.3 From 6c7184b77464261b7d55583a48accbd1350923a3 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Mon, 27 Jul 2009 09:35:07 -0500 Subject: x86, UV: Handle missing blade-local memory correctly UV blades may not have any blade-local memory. Add a field (nid) to the UV blade structure to indicates whether the node has local memory. This is needed by the GRU driver (pushed separately). Signed-off-by: Jack Steiner Cc: linux-mm@kvack.org LKML-Reference: <20090727143507.GA7006@sgi.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv_hub.h | 7 +++++++ arch/x86/kernel/apic/x2apic_uv_x.c | 5 +++++ 2 files changed, 12 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 341070f7ad5c..a6dde059e02e 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -327,6 +327,7 @@ struct uv_blade_info { unsigned short nr_possible_cpus; unsigned short nr_online_cpus; unsigned short pnode; + short memory_nid; }; extern struct uv_blade_info *uv_blade_info; extern short *uv_node_to_blade; @@ -363,6 +364,12 @@ static inline int uv_blade_to_pnode(int bid) return uv_blade_info[bid].pnode; } +/* Nid of memory node on blade. -1 if no blade-local memory */ +static inline int uv_blade_to_memory_nid(int bid) +{ + return uv_blade_info[bid].memory_nid; +} + /* Determine the number of possible cpus on a blade */ static inline int uv_blade_nr_possible_cpus(int bid) { diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 096d19aea2f7..ad3f0a984cab 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -591,6 +591,8 @@ void __init uv_system_init(void) bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); uv_blade_info = kmalloc(bytes, GFP_KERNEL); BUG_ON(!uv_blade_info); + for (blade = 0; blade < uv_num_possible_blades(); blade++) + uv_blade_info[blade].memory_nid = -1; get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size); @@ -629,6 +631,9 @@ void __init uv_system_init(void) lcpu = uv_blade_info[blade].nr_possible_cpus; uv_blade_info[blade].nr_possible_cpus++; + /* Any node on the blade, else will contain -1. */ + uv_blade_info[blade].memory_nid = nid; + uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base; uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size; uv_cpu_hub_info(cpu)->m_val = m_val; -- cgit v1.2.3 From cc5e4fa1bd4d2f56da07f9092281afdcd2374ab9 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Mon, 27 Jul 2009 09:36:56 -0500 Subject: x86, UV: Delete mapping of MMR rangs mapped by BIOS The UV BIOS has added additional MMR ranges that are mapped via EFI virtual mode mappings. These ranges should be deleted from ranges mapped by uv_system_init(). Signed-off-by: Jack Steiner Cc: linux-mm@kvack.org LKML-Reference: <20090727143656.GA7698@sgi.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/x2apic_uv_x.c | 31 +------------------------------ 1 file changed, 1 insertion(+), 30 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index ad3f0a984cab..445c210daf82 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -362,12 +362,6 @@ static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size) BUG(); } -static __init void map_low_mmrs(void) -{ - init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE); - init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE); -} - enum map_type {map_wb, map_uc}; static __init void map_high(char *id, unsigned long base, int shift, @@ -395,26 +389,6 @@ static __init void map_gru_high(int max_pnode) map_high("GRU", gru.s.base, shift, max_pnode, map_wb); } -static __init void map_config_high(int max_pnode) -{ - union uvh_rh_gam_cfg_overlay_config_mmr_u cfg; - int shift = UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_SHFT; - - cfg.v = uv_read_local_mmr(UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR); - if (cfg.s.enable) - map_high("CONFIG", cfg.s.base, shift, max_pnode, map_uc); -} - -static __init void map_mmr_high(int max_pnode) -{ - union uvh_rh_gam_mmr_overlay_config_mmr_u mmr; - int shift = UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT; - - mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR); - if (mmr.s.enable) - map_high("MMR", mmr.s.base, shift, max_pnode, map_uc); -} - static __init void map_mmioh_high(int max_pnode) { union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh; @@ -566,8 +540,6 @@ void __init uv_system_init(void) unsigned long mmr_base, present, paddr; unsigned short pnode_mask; - map_low_mmrs(); - m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG); m_val = m_n_config.s.m_skt; n_val = m_n_config.s.n_skt; @@ -667,11 +639,10 @@ void __init uv_system_init(void) pnode = (paddr >> m_val) & pnode_mask; blade = boot_pnode_to_blade(pnode); uv_node_to_blade[nid] = blade; + max_pnode = max(pnode, max_pnode); } map_gru_high(max_pnode); - map_mmr_high(max_pnode); - map_config_high(max_pnode); map_mmioh_high(max_pnode); uv_cpu_init(); -- cgit v1.2.3 From c5997fa8d7aca3c9876a6ff71bacf27c41095ce9 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Mon, 27 Jul 2009 09:38:56 -0500 Subject: x86, UV: Fix UV apic mode Change SGI UV default apicid mode to "physical". This is required to match settings in the UV hub chip. Signed-off-by: Jack Steiner LKML-Reference: <20090727143856.GA8905@sgi.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/x2apic_uv_x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 445c210daf82..832e908adcb5 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -261,7 +261,7 @@ struct apic apic_x2apic_uv_x = { .apic_id_registered = uv_apic_id_registered, .irq_delivery_mode = dest_Fixed, - .irq_dest_mode = 1, /* logical */ + .irq_dest_mode = 0, /* physical */ .target_cpus = uv_target_cpus, .disable_esr = 0, -- cgit v1.2.3 From d8c7eb34c2db6268909ae8c3958be63bde254292 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 25 Jul 2009 03:23:09 -0700 Subject: x86: Don't use current_cpu_data in x2apic phys_pkg_id One system has socket 1 come up as BSP. kexeced kernel reports BSP as: [ 1.524550] Initializing cgroup subsys cpuacct [ 1.536064] initial_apicid:20 [ 1.537135] ht_mask_width:1 [ 1.538128] core_select_mask:f [ 1.539126] core_plus_mask_width:5 [ 1.558479] CPU: Physical Processor ID: 0 [ 1.559501] CPU: Processor Core ID: 0 [ 1.560539] CPU: L1 I cache: 32K, L1 D cache: 32K [ 1.579098] CPU: L2 cache: 256K [ 1.580085] CPU: L3 cache: 24576K [ 1.581108] CPU 0/0x20 -> Node 0 [ 1.596193] CPU 0 microcode level: 0xffff0008 It doesn't have correct physical processor id and will get an error: [ 38.840859] CPU0 attaching sched-domain: [ 38.848287] domain 0: span 0,8,72 level SIBLING [ 38.851151] groups: 0 8 72 [ 38.858137] domain 1: span 0,8-15,72-79 level MC [ 38.868944] groups: 0,8,72 9,73 10,74 11,75 12,76 13,77 14,78 15,79 [ 38.881383] ERROR: parent span is not a superset of domain->span [ 38.890724] domain 2: span 0-7,64-71 level CPU [ 38.899237] ERROR: domain->groups does not contain CPU0 [ 38.909229] groups: 8-15,72-79 [ 38.912547] ERROR: groups don't span domain->span [ 38.919665] domain 3: span 0-127 level NODE [ 38.930739] groups: 0-7,64-71 8-15,72-79 16-23,80-87 24-31,88-95 32-39,96-103 40-47,104-111 48-55,112-119 56-63,120-127 it turns out: we can not use current_cpu_data in phys_pgd_id for x2apic. identify_boot_cpu() is called by check_bugs() before smp_prepare_cpus() and till smp_prepare_cpus() current_cpu_data for bsp is assigned with boot_cpu_data. Just make phys_pkg_id for x2apic is aligned to xapic. Signed-off-by: Yinghai Lu Acked-by: Suresh Siddha Cc: Andrew Morton LKML-Reference: <4A6ADD0D.10002@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/x2apic_cluster.c | 2 +- arch/x86/kernel/apic/x2apic_phys.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 8e4cbb255c38..2ed4e2bb3b32 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -170,7 +170,7 @@ static unsigned long set_apic_id(unsigned int id) static int x2apic_cluster_phys_pkg_id(int initial_apicid, int index_msb) { - return current_cpu_data.initial_apicid >> index_msb; + return initial_apicid >> index_msb; } static void x2apic_send_IPI_self(int vector) diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index a284359627e7..0b631c6a2e00 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -162,7 +162,7 @@ static unsigned long set_apic_id(unsigned int id) static int x2apic_phys_pkg_id(int initial_apicid, int index_msb) { - return current_cpu_data.initial_apicid >> index_msb; + return initial_apicid >> index_msb; } static void x2apic_send_IPI_self(int vector) -- cgit v1.2.3 From 2a5ef41661b56cf4eee042a6967c4e14b63e8eac Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Mon, 20 Jul 2009 09:28:41 -0500 Subject: x86, UV: Complete IRQ interrupt migration in arch_enable_uv_irq() In uv_setup_irq(), the call to create_irq() initially assigns IRQ vectors to cpu 0. The subsequent call to assign_irq_vector() in arch_enable_uv_irq() migrates the IRQ to another cpu and frees the cpu 0 vector - at least it will be freed as soon as the "IRQ move" completes. arch_enable_uv_irq() needs to send a cleanup IPI to complete the IRQ move. Otherwise, assignment of GRU interrupts on large systems (>200 cpus) will exhaust the cpu 0 interrupt vectors and initialization of the GRU driver will fail. Signed-off-by: Jack Steiner LKML-Reference: <20090720142840.GA8885@sgi.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 2284a4812b68..d2ed6c5ddc80 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3793,6 +3793,9 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, mmr_pnode = uv_blade_to_pnode(mmr_blade); uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); + if (cfg->move_in_progress) + send_cleanup_vector(cfg); + return irq; } -- cgit v1.2.3 From dc731fbbadf5d65c98fcd6c86472aa286c16458a Mon Sep 17 00:00:00 2001 From: Subrata Modak Date: Tue, 21 Jul 2009 08:02:27 +0530 Subject: x86: Work around compilation warning in arch/x86/kernel/apm_32.c The following fix was initially inspired by David Howells fix few days back: http://lkml.org/lkml/2009/7/9/109 However, Ingo disapproves such fixes as it's dangerous (it can hide future, relevant warnings) - in something as performance-uncritical. So, initialize 'err' to '0' to work around a GCC false positive warning: http://lkml.org/lkml/2009/7/18/89 Signed-off-by: Subrata Modak Cc: Sachin P Sant Cc: David Howells Cc: Balbir Singh Cc: Stephen Rothwell LKML-Reference: <20090721023226.31855.67236.sendpatchset@subratamodak.linux.ibm.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apm_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 79302e9a33a4..442b5508893f 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -811,7 +811,7 @@ static int apm_do_idle(void) u8 ret = 0; int idled = 0; int polling; - int err; + int err = 0; polling = !!(current_thread_info()->status & TS_POLLING); if (polling) { -- cgit v1.2.3 From 7d5b005652bc5ae3e1e0efc53fd0e25a643ec506 Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Tue, 4 Aug 2009 15:34:22 -0700 Subject: x86: Fix VMI && stack protector With CONFIG_STACK_PROTECTOR turned on, VMI doesn't boot with more than one processor. The problem is with the gs value not being initialized correctly when registering the secondary processor for VMI's case. The patch below initializes the gs value for the AP to __KERNEL_STACK_CANARY. Without this the secondary processor keeps on taking a GP on every gs access. Signed-off-by: Alok N Kataria Cc: # for v2.6.30.x LKML-Reference: <1249425262.18955.40.camel@ank32.eng.vmware.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/vmi_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index b263423fbe2a..95a7289e4b0c 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -441,7 +441,7 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip, ap.ds = __USER_DS; ap.es = __USER_DS; ap.fs = __KERNEL_PERCPU; - ap.gs = 0; + ap.gs = __KERNEL_STACK_CANARY; ap.eflags = 0; -- cgit v1.2.3 From 087d7e56deffb611a098e7e257388a41edbeef1f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 4 Aug 2009 08:59:59 -0700 Subject: x86: Fix MSI-X initialization by using online_mask for x2apic target_cpus found a system where x2apic reports an MSI-X irq initialization failure: [ 302.859446] igbvf 0000:81:10.4: enabling device (0000 -> 0002) [ 302.874369] igbvf 0000:81:10.4: using 64bit DMA mask [ 302.879023] igbvf 0000:81:10.4: using 64bit consistent DMA mask [ 302.894386] igbvf 0000:81:10.4: enabling bus mastering [ 302.898171] igbvf 0000:81:10.4: setting latency timer to 64 [ 302.914050] reserve_memtype added 0xefb08000-0xefb0c000, track uncached-minus, req uncached-minus, ret uncached-minus [ 302.933839] reserve_memtype added 0xefb28000-0xefb29000, track uncached-minus, req uncached-minus, ret uncached-minus [ 302.940367] alloc irq_desc for 265 on node 4 [ 302.956874] alloc kstat_irqs on node 4 [ 302.959452] alloc irq_2_iommu on node 0 [ 302.974328] igbvf 0000:81:10.4: irq 265 for MSI/MSI-X [ 302.977778] alloc irq_desc for 266 on node 4 [ 302.980347] alloc kstat_irqs on node 4 [ 302.995312] free_memtype request 0xefb28000-0xefb29000 [ 302.998816] igbvf 0000:81:10.4: Failed to initialize MSI-X interrupts. ... it turns out that when trying to enable MSI-X, __assign_irq_vector(new, cfg_new, apic->target_cpus()) can not get vector because for x2apic target-cpus returns cpumask_of(0) Update that to online_mask like xapic. Signed-off-by: Yinghai Lu Acked-by: Suresh Siddha LKML-Reference: <4A785AFF.3050902@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/x2apic_cluster.c | 8 +++++--- arch/x86/kernel/apic/x2apic_phys.c | 8 +++++--- 2 files changed, 10 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 2ed4e2bb3b32..a5371ec36776 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -17,11 +17,13 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return x2apic_enabled(); } -/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ - +/* + * need to use more than cpu 0, because we need more vectors when + * MSI-X are used. + */ static const struct cpumask *x2apic_target_cpus(void) { - return cpumask_of(0); + return cpu_online_mask; } /* diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 0b631c6a2e00..a8989aadc99a 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -27,11 +27,13 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 0; } -/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ - +/* + * need to use more than cpu 0, because we need more vectors when + * MSI-X are used. + */ static const struct cpumask *x2apic_target_cpus(void) { - return cpumask_of(0); + return cpu_online_mask; } static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) -- cgit v1.2.3 From 498cdbfbcf98e0d2c90a26e6a02a82f043876e48 Mon Sep 17 00:00:00 2001 From: Ozan Çağlayan Date: Tue, 4 Aug 2009 19:39:31 +0300 Subject: x86: Add quirk to make Apple MacBookPro5,1 use reboot=pci MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MacBookPro5,1 is not able to reboot unless reboot=pci is set. This patch forces it through a DMI quirk specific to this device. Signed-off-by: Ozan Çağlayan LKML-Reference: <1249403971-6543-1-git-send-email-ozan@pardus.org.tr> Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 834c9da8bf9d..9eb897603705 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -405,7 +405,7 @@ EXPORT_SYMBOL(machine_real_restart); #endif /* CONFIG_X86_32 */ /* - * Apple MacBook5,2 (2009 MacBook) needs reboot=p + * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot */ static int __init set_pci_reboot(const struct dmi_system_id *d) { @@ -426,6 +426,14 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5,2"), }, }, + { /* Handle problems with rebooting on Apple MacBookPro5,1 */ + .callback = set_pci_reboot, + .ident = "Apple MacBookPro5,1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5,1"), + }, + }, { } }; -- cgit v1.2.3 From fdb8a42742ac95606668f73481dfb2f760658fdd Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Thu, 6 Aug 2009 15:58:13 -0700 Subject: x86: fix buffer overflow in efi_init() If the vendor name (from c16) can be longer than 100 bytes (or missing a terminating null), then the null is written past the end of vendor[]. Found with Parfait, http://research.sun.com/projects/parfait/ Signed-off-by: Roel Kluin Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: H. Peter Anvin Cc: Huang Ying --- arch/x86/kernel/efi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 19ccf6d0dccf..fe26ba3e3451 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c @@ -354,7 +354,7 @@ void __init efi_init(void) */ c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2); if (c16) { - for (i = 0; i < sizeof(vendor) && *c16; ++i) + for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i) vendor[i] = *c16++; vendor[i] = '\0'; } else -- cgit v1.2.3 From b6e61eef4f9f94714ac3ee4a5c96862d9bcd1836 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 31 Jul 2009 12:45:41 -0700 Subject: x86: Fix serialization in pit_expect_msb() Wei Chong Tan reported a fast-PIT-calibration corner-case: | pit_expect_msb() is vulnerable to SMI disturbance corner case | in some platforms which causes /proc/cpuinfo to show wrong | CPU MHz value when quick_pit_calibrate() jumps to success | section. I think that the real issue isn't even an SMI - but the fact that in the very last iteration of the loop, there's no serializing instruction _after_ the last 'rdtsc'. So even in the absense of SMI's, we do have a situation where the cycle counter was read without proper serialization. The last check should be done outside the outer loop, since _inside_ the outer loop, we'll be testing that the PIT has the right MSB value has the right value in the next iteration. So only the _last_ iteration is special, because that's the one that will not check the PIT MSB value any more, and because the final 'get_cycles()' isn't serialized. In other words: - I'd like to move the PIT MSB check to after the last iteration, rather than in every iteration - I think we should comment on the fact that it's also a serializing instruction and so 'fences in' the TSC read. Here's a suggested replacement. Signed-off-by: Linus Torvalds Reported-by: "Tan, Wei Chong" Tested-by: "Tan, Wei Chong" LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 6e1a368d21d4..71f4368b357e 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -275,15 +275,20 @@ static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin) * use the TSC value at the transitions to calculate a pretty * good value for the TSC frequencty. */ +static inline int pit_verify_msb(unsigned char val) +{ + /* Ignore LSB */ + inb(0x42); + return inb(0x42) == val; +} + static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap) { int count; u64 tsc = 0; for (count = 0; count < 50000; count++) { - /* Ignore LSB */ - inb(0x42); - if (inb(0x42) != val) + if (!pit_verify_msb(val)) break; tsc = get_cycles(); } @@ -336,8 +341,7 @@ static unsigned long quick_pit_calibrate(void) * to do that is to just read back the 16-bit counter * once from the PIT. */ - inb(0x42); - inb(0x42); + pit_verify_msb(0); if (pit_expect_msb(0xff, &tsc, &d1)) { for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) { @@ -348,8 +352,19 @@ static unsigned long quick_pit_calibrate(void) * Iterate until the error is less than 500 ppm */ delta -= tsc; - if (d1+d2 < delta >> 11) - goto success; + if (d1+d2 >= delta >> 11) + continue; + + /* + * Check the PIT one more time to verify that + * all TSC reads were stable wrt the PIT. + * + * This also guarantees serialization of the + * last cycle read ('d2') in pit_expect_msb. + */ + if (!pit_verify_msb(0xfe - i)) + break; + goto success; } } printk("Fast TSC calibration failed\n"); -- cgit v1.2.3 From 3e03bbeac541856aaaf1ce1ab0250b6a490e4099 Mon Sep 17 00:00:00 2001 From: Shunichi Fuji Date: Tue, 11 Aug 2009 03:34:40 +0900 Subject: x86: Add reboot quirk for every 5 series MacBook/Pro MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reboot does not work on my MacBook Pro 13 inch (MacBookPro5,5) too. It seems all unibody MacBook and MacBookPro require PCI reboot handling, i guess. Following model/machine ID list shows unibody MacBook/Pro have the 5 series of model number: http://www.everymac.com/systems/by_capability/macs-by-machine-model-machine-id.html Signed-off-by: Shunichi Fuji Cc: Ozan Çağlayan LKML-Reference: <30046e3b0908101134p6487ddbftd8776e4ddef204be@mail.gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 9eb897603705..a06e8d101844 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -418,20 +418,20 @@ static int __init set_pci_reboot(const struct dmi_system_id *d) } static struct dmi_system_id __initdata pci_reboot_dmi_table[] = { - { /* Handle problems with rebooting on Apple MacBook5,2 */ + { /* Handle problems with rebooting on Apple MacBook5 */ .callback = set_pci_reboot, - .ident = "Apple MacBook", + .ident = "Apple MacBook5", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5,2"), + DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5"), }, }, - { /* Handle problems with rebooting on Apple MacBookPro5,1 */ + { /* Handle problems with rebooting on Apple MacBookPro5 */ .callback = set_pci_reboot, - .ident = "Apple MacBookPro5,1", + .ident = "Apple MacBookPro5", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5,1"), + DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5"), }, }, { } -- cgit v1.2.3 From 0d01f31439c1e4d602bf9fdc924ab66f407f5e38 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sun, 9 Aug 2009 21:44:49 -0700 Subject: x86, mce: therm_throt - change when we print messages My Latitude d630 seems to be handling thermal events in SMI by lowering the max frequency of the CPU till it cools down but still leaks the "everything is normal" events. This spams the console and with high priority printks. Adjust therm_throt driver to only print messages about the fact that temperatire returned back to normal when leaving the throttling state. Also lower the severity of "back to normal" message from KERN_CRIT to KERN_INFO. Signed-off-by: Dmitry Torokhov Acked-by: H. Peter Anvin LKML-Reference: <20090810051513.0558F526EC9@mailhub.coreip.homeip.net> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/therm_throt.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index bff8dd191dd5..8bc64cfbe936 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -36,6 +36,7 @@ static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); +static DEFINE_PER_CPU(bool, thermal_throttle_active); static atomic_t therm_throt_en = ATOMIC_INIT(0); @@ -96,24 +97,27 @@ static int therm_throt_process(int curr) { unsigned int cpu = smp_processor_id(); __u64 tmp_jiffs = get_jiffies_64(); + bool was_throttled = __get_cpu_var(thermal_throttle_active); + bool is_throttled = __get_cpu_var(thermal_throttle_active) = curr; - if (curr) + if (is_throttled) __get_cpu_var(thermal_throttle_count)++; - if (time_before64(tmp_jiffs, __get_cpu_var(next_check))) + if (!(was_throttled ^ is_throttled) && + time_before64(tmp_jiffs, __get_cpu_var(next_check))) return 0; __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL; /* if we just entered the thermal event */ - if (curr) { + if (is_throttled) { printk(KERN_CRIT "CPU%d: Temperature above threshold, " - "cpu clock throttled (total events = %lu)\n", cpu, - __get_cpu_var(thermal_throttle_count)); + "cpu clock throttled (total events = %lu)\n", + cpu, __get_cpu_var(thermal_throttle_count)); add_taint(TAINT_MACHINE_CHECK); - } else { - printk(KERN_CRIT "CPU%d: Temperature/speed normal\n", cpu); + } else if (was_throttled) { + printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu); } return 1; -- cgit v1.2.3 From 3c581a7f94542341bf0da496a226b44ac63521a8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 11 Aug 2009 10:47:36 +0200 Subject: perf_counter, x86: Fix lapic printk message Instead of this garbled bootup on UP Pentium-M systems: [ 0.015048] Performance Counters: [ 0.016004] no Local APIC, try rebooting with lapicno PMU driver, software counters only. Print: [ 0.015050] Performance Counters: [ 0.016004] no APIC, boot with the "lapic" boot parameter to force-enable it. [ 0.017003] no PMU driver, software counters only. Cf: Frederic Weisbecker Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index a7aa8f900954..40e233a24d9f 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -1590,7 +1590,7 @@ static int p6_pmu_init(void) } if (!cpu_has_apic) { - pr_info("no Local APIC, try rebooting with lapic"); + pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n"); return -ENODEV; } -- cgit v1.2.3 From f64ccccb8afa43abdd63fcbd230f818d6ea0883f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 11 Aug 2009 10:26:33 +0200 Subject: perf_counter, x86: Fix generic cache events on P6-mobile CPUs Johannes Stezenbach reported that 'perf stat' does not count cache-miss and cache-references events on his Pentium-M based laptop. This is because we left them blank in p6_perfmon_event_map[], fill them in. Reported-by: Johannes Stezenbach Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_counter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 40e233a24d9f..fffc126dbdf0 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -72,8 +72,8 @@ static const u64 p6_perfmon_event_map[] = { [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0000, - [PERF_COUNT_HW_CACHE_MISSES] = 0x0000, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, + [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, -- cgit v1.2.3 From fbd8b1819e80ac5a176d085fdddc3a34d1499318 Mon Sep 17 00:00:00 2001 From: Kevin Winchester Date: Mon, 10 Aug 2009 19:56:45 -0300 Subject: x86: Clear incorrectly forced X86_FEATURE_LAHF_LM flag Due to an erratum with certain AMD Athlon 64 processors, the BIOS may need to force enable the LAHF_LM capability. Unfortunately, in at least one case, the BIOS does this even for processors that do not support the functionality. Add a specific check that will clear the feature bit for processors known not to support the LAHF/SAHF instructions. Signed-off-by: Kevin Winchester Acked-by: Borislav Petkov LKML-Reference: <4A80A5AD.2000209@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index e2485b03f1cf..63fddcd082cd 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -400,6 +400,13 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) level = cpuid_eax(1); if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) set_cpu_cap(c, X86_FEATURE_REP_GOOD); + + /* + * Some BIOSes incorrectly force this feature, but only K8 + * revision D (model = 0x14) and later actually support it. + */ + if (c->x86_model < 0x14) + clear_cpu_cap(c, X86_FEATURE_LAHF_LM); } if (c->x86 == 0x10 || c->x86 == 0x11) set_cpu_cap(c, X86_FEATURE_REP_GOOD); -- cgit v1.2.3 From e8055139d996e85722984968472868d6dccb1490 Mon Sep 17 00:00:00 2001 From: Ondrej Zary Date: Tue, 11 Aug 2009 20:00:11 +0200 Subject: x86: Fix oops in identify_cpu() on CPUs without CPUID Kernel is broken for x86 CPUs without CPUID since 2.6.28. It crashes with NULL pointer dereference in identify_cpu(): 766 generic_identify(c); 767 768--> if (this_cpu->c_identify) 769 this_cpu->c_identify(c); this_cpu is NULL. This is because it's only initialized in get_cpu_vendor() function, which is not called if the CPU has no CPUID instruction. Signed-off-by: Ondrej Zary LKML-Reference: <200908112000.15993.linux@rainbow-software.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 48 ++++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 24 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f1961c07af9a..5ce60a88027b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -59,7 +59,30 @@ void __init setup_cpu_local_masks(void) alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); } -static const struct cpu_dev *this_cpu __cpuinitdata; +static void __cpuinit default_init(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_X86_64 + display_cacheinfo(c); +#else + /* Not much we can do here... */ + /* Check if at least it has cpuid */ + if (c->cpuid_level == -1) { + /* No cpuid. It must be an ancient CPU */ + if (c->x86 == 4) + strcpy(c->x86_model_id, "486"); + else if (c->x86 == 3) + strcpy(c->x86_model_id, "386"); + } +#endif +} + +static const struct cpu_dev __cpuinitconst default_cpu = { + .c_init = default_init, + .c_vendor = "Unknown", + .c_x86_vendor = X86_VENDOR_UNKNOWN, +}; + +static const struct cpu_dev *this_cpu __cpuinitdata = &default_cpu; DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { #ifdef CONFIG_X86_64 @@ -332,29 +355,6 @@ void switch_to_new_gdt(int cpu) static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {}; -static void __cpuinit default_init(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_X86_64 - display_cacheinfo(c); -#else - /* Not much we can do here... */ - /* Check if at least it has cpuid */ - if (c->cpuid_level == -1) { - /* No cpuid. It must be an ancient CPU */ - if (c->x86 == 4) - strcpy(c->x86_model_id, "486"); - else if (c->x86 == 3) - strcpy(c->x86_model_id, "386"); - } -#endif -} - -static const struct cpu_dev __cpuinitconst default_cpu = { - .c_init = default_init, - .c_vendor = "Unknown", - .c_x86_vendor = X86_VENDOR_UNKNOWN, -}; - static void __cpuinit get_model_name(struct cpuinfo_x86 *c) { unsigned int *v; -- cgit v1.2.3 From 04da8a43da804723a550f00dd158fd5b5e25ae35 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 11 Aug 2009 10:40:08 +0200 Subject: perf_counter, x86: Fix/improve apic fallback Johannes Stezenbach reported that his Pentium-M based laptop does not have the local APIC enabled by default, and hence perfcounters do not get initialized. Add a fallback for this case: allow non-sampled counters and return with an error on sampled counters. This allows 'perf stat' to work out of box - and allows 'perf top' and 'perf record' to fall back on a hrtimer based sampling method. ( Passing 'lapic' on the boot line will allow hardware sampling to occur - but if the APIC is disabled permanently by the hardware then this fallback still allows more systems to use perfcounters. ) Also decouple perfcounter support from X86_LOCAL_APIC. -v2: fix typo breaking counters on all other systems ... Reported-by: Johannes Stezenbach Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- arch/x86/kernel/cpu/perf_counter.c | 34 ++++++++++++++++++++++++++++++---- 2 files changed, 31 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 738bdc6b0f8b..13ffa5df37d7 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -24,6 +24,7 @@ config X86 select HAVE_UNSTABLE_SCHED_CLOCK select HAVE_IDE select HAVE_OPROFILE + select HAVE_PERF_COUNTERS if (!M386 && !M486) select HAVE_IOREMAP_PROT select HAVE_KPROBES select ARCH_WANT_OPTIONAL_GPIOLIB @@ -742,7 +743,6 @@ config X86_UP_IOAPIC config X86_LOCAL_APIC def_bool y depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC - select HAVE_PERF_COUNTERS if (!M386 && !M486) config X86_IO_APIC def_bool y diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index fffc126dbdf0..900332b800f8 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -55,6 +55,7 @@ struct x86_pmu { int num_counters_fixed; int counter_bits; u64 counter_mask; + int apic; u64 max_period; u64 intel_ctrl; }; @@ -613,6 +614,7 @@ static DEFINE_MUTEX(pmc_reserve_mutex); static bool reserve_pmc_hardware(void) { +#ifdef CONFIG_X86_LOCAL_APIC int i; if (nmi_watchdog == NMI_LOCAL_APIC) @@ -627,9 +629,11 @@ static bool reserve_pmc_hardware(void) if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) goto eventsel_fail; } +#endif return true; +#ifdef CONFIG_X86_LOCAL_APIC eventsel_fail: for (i--; i >= 0; i--) release_evntsel_nmi(x86_pmu.eventsel + i); @@ -644,10 +648,12 @@ perfctr_fail: enable_lapic_nmi_watchdog(); return false; +#endif } static void release_pmc_hardware(void) { +#ifdef CONFIG_X86_LOCAL_APIC int i; for (i = 0; i < x86_pmu.num_counters; i++) { @@ -657,6 +663,7 @@ static void release_pmc_hardware(void) if (nmi_watchdog == NMI_LOCAL_APIC) enable_lapic_nmi_watchdog(); +#endif } static void hw_perf_counter_destroy(struct perf_counter *counter) @@ -748,6 +755,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter) hwc->sample_period = x86_pmu.max_period; hwc->last_period = hwc->sample_period; atomic64_set(&hwc->period_left, hwc->sample_period); + } else { + /* + * If we have a PMU initialized but no APIC + * interrupts, we cannot sample hardware + * counters (user-space has to fall back and + * sample via a hrtimer based software counter): + */ + if (!x86_pmu.apic) + return -EOPNOTSUPP; } counter->destroy = hw_perf_counter_destroy; @@ -1449,18 +1465,22 @@ void smp_perf_pending_interrupt(struct pt_regs *regs) void set_perf_counter_pending(void) { +#ifdef CONFIG_X86_LOCAL_APIC apic->send_IPI_self(LOCAL_PENDING_VECTOR); +#endif } void perf_counters_lapic_init(void) { - if (!x86_pmu_initialized()) +#ifdef CONFIG_X86_LOCAL_APIC + if (!x86_pmu.apic || !x86_pmu_initialized()) return; /* * Always use NMI for PMU */ apic_write(APIC_LVTPC, APIC_DM_NMI); +#endif } static int __kprobes @@ -1484,7 +1504,9 @@ perf_counter_nmi_handler(struct notifier_block *self, regs = args->regs; +#ifdef CONFIG_X86_LOCAL_APIC apic_write(APIC_LVTPC, APIC_DM_NMI); +#endif /* * Can't rely on the handled return value to say it was our NMI, two * counters could trigger 'simultaneously' raising two back-to-back NMIs. @@ -1515,6 +1537,7 @@ static struct x86_pmu p6_pmu = { .event_map = p6_pmu_event_map, .raw_event = p6_pmu_raw_event, .max_events = ARRAY_SIZE(p6_perfmon_event_map), + .apic = 1, .max_period = (1ULL << 31) - 1, .version = 0, .num_counters = 2, @@ -1541,6 +1564,7 @@ static struct x86_pmu intel_pmu = { .event_map = intel_pmu_event_map, .raw_event = intel_pmu_raw_event, .max_events = ARRAY_SIZE(intel_perfmon_event_map), + .apic = 1, /* * Intel PMCs cannot be accessed sanely above 32 bit width, * so we install an artificial 1<<31 period regardless of @@ -1564,6 +1588,7 @@ static struct x86_pmu amd_pmu = { .num_counters = 4, .counter_bits = 48, .counter_mask = (1ULL << 48) - 1, + .apic = 1, /* use highest bit to detect overflow */ .max_period = (1ULL << 47) - 1, }; @@ -1589,13 +1614,14 @@ static int p6_pmu_init(void) return -ENODEV; } + x86_pmu = p6_pmu; + if (!cpu_has_apic) { pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n"); - return -ENODEV; + pr_info("no hardware sampling interrupt available.\n"); + x86_pmu.apic = 0; } - x86_pmu = p6_pmu; - return 0; } -- cgit v1.2.3 From 74d46d6b2d23d44d72c37df4c6a5d2e782f7b088 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 21 Jul 2009 17:11:50 +0900 Subject: percpu, sparc64: fix sparse possible cpu map handling percpu code has been assuming num_possible_cpus() == nr_cpu_ids which is incorrect if cpu_possible_map contains holes. This causes percpu code to access beyond allocated memories and vmalloc areas. On a sparc64 machine with cpus 0 and 2 (u60), this triggers the following warning or fails boot. WARNING: at /devel/tj/os/work/mm/vmalloc.c:106 vmap_page_range_noflush+0x1f0/0x240() Modules linked in: Call Trace: [00000000004b17d0] vmap_page_range_noflush+0x1f0/0x240 [00000000004b1840] map_vm_area+0x20/0x60 [00000000004b1950] __vmalloc_area_node+0xd0/0x160 [0000000000593434] deflate_init+0x14/0xe0 [0000000000583b94] __crypto_alloc_tfm+0xd4/0x1e0 [00000000005844f0] crypto_alloc_base+0x50/0xa0 [000000000058b898] alg_test_comp+0x18/0x80 [000000000058dad4] alg_test+0x54/0x180 [000000000058af00] cryptomgr_test+0x40/0x60 [0000000000473098] kthread+0x58/0x80 [000000000042b590] kernel_thread+0x30/0x60 [0000000000472fd0] kthreadd+0xf0/0x160 ---[ end trace 429b268a213317ba ]--- This patch fixes generic percpu functions and sparc64 setup_per_cpu_areas() so that they handle sparse cpu_possible_map properly. Please note that on x86, cpu_possible_map() doesn't contain holes and thus num_possible_cpus() == nr_cpu_ids and this patch doesn't cause any behavior difference. Signed-off-by: Tejun Heo Acked-by: David S. Miller Cc: Ingo Molnar --- arch/sparc/kernel/smp_64.c | 4 ++-- arch/x86/kernel/setup_percpu.c | 14 +++++++------- mm/percpu.c | 33 ++++++++++++++++++--------------- 3 files changed, 27 insertions(+), 24 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index fa44eaf8d897..3691907a43b4 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1499,7 +1499,7 @@ void __init setup_per_cpu_areas(void) dyn_size = pcpur_size - static_size - PERCPU_MODULE_RESERVE; - ptrs_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpur_ptrs[0])); + ptrs_size = PFN_ALIGN(nr_cpu_ids * sizeof(pcpur_ptrs[0])); pcpur_ptrs = alloc_bootmem(ptrs_size); for_each_possible_cpu(cpu) { @@ -1514,7 +1514,7 @@ void __init setup_per_cpu_areas(void) /* allocate address and map */ vm.flags = VM_ALLOC; - vm.size = num_possible_cpus() * PCPU_CHUNK_SIZE; + vm.size = nr_cpu_ids * PCPU_CHUNK_SIZE; vm_area_register_early(&vm, PCPU_CHUNK_SIZE); for_each_possible_cpu(cpu) { diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 29a3eef7cf4a..07d81916f212 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -165,7 +165,7 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) if (!chosen) { size_t vm_size = VMALLOC_END - VMALLOC_START; - size_t tot_size = num_possible_cpus() * PMD_SIZE; + size_t tot_size = nr_cpu_ids * PMD_SIZE; /* on non-NUMA, embedding is better */ if (!pcpu_need_numa()) @@ -199,7 +199,7 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) dyn_size = pcpul_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; /* allocate pointer array and alloc large pages */ - map_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpul_map[0])); + map_size = PFN_ALIGN(nr_cpu_ids * sizeof(pcpul_map[0])); pcpul_map = alloc_bootmem(map_size); for_each_possible_cpu(cpu) { @@ -228,7 +228,7 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) /* allocate address and map */ pcpul_vm.flags = VM_ALLOC; - pcpul_vm.size = num_possible_cpus() * PMD_SIZE; + pcpul_vm.size = nr_cpu_ids * PMD_SIZE; vm_area_register_early(&pcpul_vm, PMD_SIZE); for_each_possible_cpu(cpu) { @@ -250,8 +250,8 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) PMD_SIZE, pcpul_vm.addr, NULL); /* sort pcpul_map array for pcpu_lpage_remapped() */ - for (i = 0; i < num_possible_cpus() - 1; i++) - for (j = i + 1; j < num_possible_cpus(); j++) + for (i = 0; i < nr_cpu_ids - 1; i++) + for (j = i + 1; j < nr_cpu_ids; j++) if (pcpul_map[i].ptr > pcpul_map[j].ptr) { struct pcpul_ent tmp = pcpul_map[i]; pcpul_map[i] = pcpul_map[j]; @@ -288,7 +288,7 @@ void *pcpu_lpage_remapped(void *kaddr) { void *pmd_addr = (void *)((unsigned long)kaddr & PMD_MASK); unsigned long offset = (unsigned long)kaddr & ~PMD_MASK; - int left = 0, right = num_possible_cpus() - 1; + int left = 0, right = nr_cpu_ids - 1; int pos; /* pcpul in use at all? */ @@ -377,7 +377,7 @@ static ssize_t __init setup_pcpu_4k(size_t static_size) pcpu4k_nr_static_pages = PFN_UP(static_size); /* unaligned allocations can't be freed, round up to page size */ - pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * num_possible_cpus() + pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * nr_cpu_ids * sizeof(pcpu4k_pages[0])); pcpu4k_pages = alloc_bootmem(pages_size); diff --git a/mm/percpu.c b/mm/percpu.c index b70f2acd8853..e0be1146f617 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -8,12 +8,12 @@ * * This is percpu allocator which can handle both static and dynamic * areas. Percpu areas are allocated in chunks in vmalloc area. Each - * chunk is consisted of num_possible_cpus() units and the first chunk - * is used for static percpu variables in the kernel image (special - * boot time alloc/init handling necessary as these areas need to be - * brought up before allocation services are running). Unit grows as - * necessary and all units grow or shrink in unison. When a chunk is - * filled up, another chunk is allocated. ie. in vmalloc area + * chunk is consisted of nr_cpu_ids units and the first chunk is used + * for static percpu variables in the kernel image (special boot time + * alloc/init handling necessary as these areas need to be brought up + * before allocation services are running). Unit grows as necessary + * and all units grow or shrink in unison. When a chunk is filled up, + * another chunk is allocated. ie. in vmalloc area * * c0 c1 c2 * ------------------- ------------------- ------------ @@ -558,7 +558,7 @@ static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme) static void pcpu_unmap(struct pcpu_chunk *chunk, int page_start, int page_end, bool flush_tlb) { - unsigned int last = num_possible_cpus() - 1; + unsigned int last = nr_cpu_ids - 1; unsigned int cpu; /* unmap must not be done on immutable chunk */ @@ -643,7 +643,7 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size, */ static int pcpu_map(struct pcpu_chunk *chunk, int page_start, int page_end) { - unsigned int last = num_possible_cpus() - 1; + unsigned int last = nr_cpu_ids - 1; unsigned int cpu; int err; @@ -1067,9 +1067,9 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, PFN_UP(size_sum)); pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; - pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; + pcpu_chunk_size = nr_cpu_ids * pcpu_unit_size; pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) - + num_possible_cpus() * pcpu_unit_pages * sizeof(struct page *); + + nr_cpu_ids * pcpu_unit_pages * sizeof(struct page *); if (dyn_size < 0) dyn_size = pcpu_unit_size - static_size - reserved_size; @@ -1248,7 +1248,7 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, } else pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE); - chunk_size = pcpue_unit_size * num_possible_cpus(); + chunk_size = pcpue_unit_size * nr_cpu_ids; pcpue_ptr = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); @@ -1259,12 +1259,15 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, } /* return the leftover and copy */ - for_each_possible_cpu(cpu) { + for (cpu = 0; cpu < nr_cpu_ids; cpu++) { void *ptr = pcpue_ptr + cpu * pcpue_unit_size; - free_bootmem(__pa(ptr + pcpue_size), - pcpue_unit_size - pcpue_size); - memcpy(ptr, __per_cpu_load, static_size); + if (cpu_possible(cpu)) { + free_bootmem(__pa(ptr + pcpue_size), + pcpue_unit_size - pcpue_size); + memcpy(ptr, __per_cpu_load, static_size); + } else + free_bootmem(__pa(ptr), pcpue_unit_size); } /* we're ready, commit */ -- cgit v1.2.3 From 3ef12c3c97603bad405d30c989718cc9405e2759 Mon Sep 17 00:00:00 2001 From: Cliff Wickman Date: Fri, 14 Aug 2009 13:56:37 -0500 Subject: x86: Fix UV BAU destination subnode id The SGI UV Broadcast Assist Unit is used to send TLB shootdown messages to remote nodes of the system. The header of the message must contain the subnode id of the block in the receiving hub that handles such messages. It should always be 0x10, the id of the "LB" block. It had previously been documented as a "must be zero" field. Signed-off-by: Cliff Wickman Acked-by: Jack Steiner LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv_bau.h | 2 +- arch/x86/kernel/tlb_uv.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index bddd44f2f0ab..80e2984f521c 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -133,7 +133,7 @@ struct bau_msg_payload { * see table 4.2.3.0.1 in broacast_assist spec. */ struct bau_msg_header { - unsigned int dest_subnodeid:6; /* must be zero */ + unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ /* bits 5:0 */ unsigned int base_dest_nodeid:15; /* nasid>>1 (pnode) of */ /* bits 20:6 */ /* first bit in node_map */ diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 8ccabb8a2f6a..77b9689f8edb 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -744,6 +744,7 @@ uv_activation_descriptor_init(int node, int pnode) * note that base_dest_nodeid is actually a nasid. */ ad2->header.base_dest_nodeid = uv_partition_base_pnode << 1; + ad2->header.dest_subnodeid = 0x10; /* the LB */ ad2->header.command = UV_NET_ENDPOINT_INTD; ad2->header.int_both = 1; /* -- cgit v1.2.3 From 4e5c25d405e18a2f279ca2bfc855508ec3a0186b Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sun, 16 Aug 2009 15:54:37 +0100 Subject: x86, mce: therm_throt: Don't log redundant normality 0d01f31439c1e4d602bf9fdc924ab66f407f5e38 "x86, mce: therm_throt - change when we print messages" removed redundant announcements of "Temperature/speed normal". They're not worth logging and remove their accompanying "Machine check events logged" messages as well from the console. Signed-off-by: Hugh Dickins Cc: Hidetoshi Seto Cc: Andi Kleen Cc: Dmitry Torokhov LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/therm_throt.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 8bc64cfbe936..5957a93e5173 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -116,11 +116,14 @@ static int therm_throt_process(int curr) cpu, __get_cpu_var(thermal_throttle_count)); add_taint(TAINT_MACHINE_CHECK); - } else if (was_throttled) { + return 1; + } + if (was_throttled) { printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu); + return 1; } - return 1; + return 0; } #ifdef CONFIG_SYSFS -- cgit v1.2.3 From 52459ab91363343af8ae252766e9da762344a2e7 Mon Sep 17 00:00:00 2001 From: Leonardo Potenza Date: Sun, 16 Aug 2009 18:55:48 +0200 Subject: x86: Annotate section mismatch warnings in kernel/apic/x2apic_uv_x.c The function uv_acpi_madt_oem_check() has been marked __init, the struct apic_x2apic_uv_x has been marked __refdata. The aim is to address the following section mismatch messages: WARNING: arch/x86/kernel/apic/built-in.o(.data+0x1368): Section mismatch in reference from the variable apic_x2apic_uv_x to the function .cpuinit.text:uv_wakeup_secondary() The variable apic_x2apic_uv_x references the function __cpuinit uv_wakeup_secondary() If the reference is valid then annotate the variable with __init* or __refdata (see linux/init.h) or name the variable: *driver, *_template, *_timer, *_sht, *_ops, *_probe, *_probe_one, *_console, WARNING: arch/x86/kernel/built-in.o(.data+0x68e8): Section mismatch in reference from the variable apic_x2apic_uv_x to the function .cpuinit.text:uv_wakeup_secondary() The variable apic_x2apic_uv_x references the function __cpuinit uv_wakeup_secondary() If the reference is valid then annotate the variable with __init* or __refdata (see linux/init.h) or name the variable: *driver, *_template, *_timer, *_sht, *_ops, *_probe, *_probe_one, *_console, WARNING: arch/x86/built-in.o(.text+0x7b36f): Section mismatch in reference from the function uv_acpi_madt_oem_check() to the function .init.text:early_ioremap() The function uv_acpi_madt_oem_check() references the function __init early_ioremap(). This is often because uv_acpi_madt_oem_check lacks a __init annotation or the annotation of early_ioremap is wrong. WARNING: arch/x86/built-in.o(.text+0x7b38d): Section mismatch in reference from the function uv_acpi_madt_oem_check() to the function .init.text:early_iounmap() The function uv_acpi_madt_oem_check() references the function __init early_iounmap(). This is often because uv_acpi_madt_oem_check lacks a __init annotation or the annotation of early_iounmap is wrong. WARNING: arch/x86/built-in.o(.data+0x8668): Section mismatch in reference from the variable apic_x2apic_uv_x to the function .cpuinit.text:uv_wakeup_secondary() The variable apic_x2apic_uv_x references the function __cpuinit uv_wakeup_secondary() If the reference is valid then annotate the variable with __init* or __refdata (see linux/init.h) or name the variable: *driver, *_template, *_timer, *_sht, *_ops, *_probe, *_probe_one, *_console, Signed-off-by: Leonardo Potenza LKML-Reference: <200908161855.48302.lpotenza@inwind.it> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/x2apic_uv_x.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 832e908adcb5..601159374e87 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -46,7 +46,7 @@ static int early_get_nodeid(void) return node_id.s.node_id; } -static int uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { if (!strcmp(oem_id, "SGI")) { if (!strcmp(oem_table_id, "UVL")) @@ -253,7 +253,7 @@ static void uv_send_IPI_self(int vector) apic_write(APIC_SELF_IPI, vector); } -struct apic apic_x2apic_uv_x = { +struct apic __refdata apic_x2apic_uv_x = { .name = "UV large system", .probe = NULL, -- cgit v1.2.3 From c7f6fa44115d401e89db730f357629d39f8e4ba6 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Tue, 28 Jul 2009 23:52:54 +0200 Subject: x86, mce: don't log boot MCEs on Pentium M (model == 13) CPUs On my legacy Pentium M laptop (Acer Extensa 2900) I get bogus MCE on a cold boot with CONFIG_X86_NEW_MCE enabled, i.e. (after decoding it with mcelog): MCE 0 HARDWARE ERROR. This is *NOT* a software problem! Please contact your hardware vendor CPU 0 BANK 1 MCG status: MCi status: Error overflow Uncorrected error Error enabled Processor context corrupt MCA: Data CACHE Level-1 UNKNOWN Error STATUS f200000000000195 MCGSTATUS 0 [ The other STATUS values observed: f2000000000001b5 (... UNKNOWN error) and f200000000000115 (... READ Error). To verify that this is not a CONFIG_X86_NEW_MCE bug I also modified the CONFIG_X86_OLD_MCE code (which doesn't log any MCEs) to dump content of STATUS MSR before it is cleared during initialization. ] Since the bogus MCE results in a kernel taint (which in turn disables lockdep support) don't log boot MCEs on Pentium M (model == 13) CPUs by default ("mce=bootlog" boot parameter can be be used to get the old behavior). Signed-off-by: Bartlomiej Zolnierkiewicz Reviewed-by: Andi Kleen Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 1cfb623ce11c..a0c2910d96a0 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1273,6 +1273,10 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c) if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) && monarch_timeout < 0) monarch_timeout = USEC_PER_SEC; + + /* There are also broken BIOSes on some Pentium M systems. */ + if (c->x86 == 6 && c->x86_model == 13 && mce_bootlog < 0) + mce_bootlog = 0; } if (monarch_timeout < 0) monarch_timeout = 0; -- cgit v1.2.3 From e412cd257e0d51e0ecbb89f50953835b5a0681b2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 17 Aug 2009 10:19:00 +0200 Subject: x86, mce: Don't initialize MCEs on unknown CPUs An older test-box started hanging at the following point during bootup: [ 0.022996] Mount-cache hash table entries: 512 [ 0.024996] Initializing cgroup subsys debug [ 0.025996] Initializing cgroup subsys cpuacct [ 0.026995] Initializing cgroup subsys devices [ 0.027995] Initializing cgroup subsys freezer [ 0.028995] mce: CPU supports 5 MCE banks I've bisected it down to commit 4efc0670 ("x86, mce: use 64bit machine check code on 32bit"), which utilizes the MCE code on 32-bit systems too. The problem is caused by this detail in my config: # CONFIG_CPU_SUP_INTEL is not set This disables the quirks in mce_cpu_quirks() but still enables MCE support - which then hangs due to the missing quirk workaround needed on this CPU: if (c->x86 == 6 && c->x86_model < 0x1A && banks > 0) mce_banks[0].init = 0; The safe solution is to not initialize MCEs if we dont know on what CPU we are running (or if that CPU's support code got disabled in the config). Also be a bit more defensive on 32-bit systems: dont do a boot-time dump of pending MCEs not just on the specific system that we found a problem with (Pentium-M), but earlier ones as well. Now this problem is probably not common and disabling CPU support is rare - but still being more defensive in something we turned on for a wide range of CPUs is prudent. Cc: Hidetoshi Seto LKML-Reference: Message-ID: <4A88E3E4.40506@jp.fujitsu.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index a0c2910d96a0..01213048f62f 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1226,8 +1226,13 @@ static void mce_init(void) } /* Add per CPU specific workarounds here */ -static void mce_cpu_quirks(struct cpuinfo_x86 *c) +static int mce_cpu_quirks(struct cpuinfo_x86 *c) { + if (c->x86_vendor == X86_VENDOR_UNKNOWN) { + pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); + return -EOPNOTSUPP; + } + /* This should be disabled by the BIOS, but isn't always */ if (c->x86_vendor == X86_VENDOR_AMD) { if (c->x86 == 15 && banks > 4) { @@ -1274,14 +1279,19 @@ static void mce_cpu_quirks(struct cpuinfo_x86 *c) monarch_timeout < 0) monarch_timeout = USEC_PER_SEC; - /* There are also broken BIOSes on some Pentium M systems. */ - if (c->x86 == 6 && c->x86_model == 13 && mce_bootlog < 0) + /* + * There are also broken BIOSes on some Pentium M and + * earlier systems: + */ + if (c->x86 == 6 && c->x86_model <= 13 && mce_bootlog < 0) mce_bootlog = 0; } if (monarch_timeout < 0) monarch_timeout = 0; if (mce_bootlog != 0) mce_panic_timeout = 30; + + return 0; } static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) @@ -1342,11 +1352,10 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c) if (!mce_available(c)) return; - if (mce_cap_init() < 0) { + if (mce_cap_init() < 0 || mce_cpu_quirks(c) < 0) { mce_disabled = 1; return; } - mce_cpu_quirks(c); machine_check_vector = do_machine_check; -- cgit v1.2.3 From 78b89ecd731798f2fec8cc26ca90739253cec33c Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 18 Aug 2009 16:41:33 +0100 Subject: i386: Fix section mismatches for init code with !HOTPLUG_CPU Commit 0e83815be719d3391bf5ea24b7fe696c07dbd417 changed the section the initial_code variable gets allocated in, in an attempt to address a section conflict warning. This, however created a new section conflict when building without HOTPLUG_CPU. The apparently only (reasonable) way to address this is to always use __REFDATA. Once at it, also fix a second section mismatch when not using HOTPLUG_CPU. Signed-off-by: Jan Beulich Cc: Robert Richter LKML-Reference: <4A8AE7CD020000780001054B@vpn.id2.novell.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_32.S | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 0d98a01cbdb2..cc827ac9e8d3 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -261,9 +261,7 @@ page_pde_offset = (__PAGE_OFFSET >> 20); * which will be freed later */ -#ifndef CONFIG_HOTPLUG_CPU -.section .init.text,"ax",@progbits -#endif +__CPUINIT #ifdef CONFIG_SMP ENTRY(startup_32_smp) @@ -602,11 +600,7 @@ ignore_int: #endif iret -#ifndef CONFIG_HOTPLUG_CPU - __CPUINITDATA -#else __REFDATA -#endif .align 4 ENTRY(initial_code) .long i386_start_kernel -- cgit v1.2.3 From f833bab87fca5c3ce13778421b1365845843b976 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 17 Aug 2009 14:34:59 -0700 Subject: clockevent: Prevent dead lock on clockevents_lock Currently clockevents_notify() is called with interrupts enabled at some places and interrupts disabled at some other places. This results in a deadlock in this scenario. cpu A holds clockevents_lock in clockevents_notify() with irqs enabled cpu B waits for clockevents_lock in clockevents_notify() with irqs disabled cpu C doing set_mtrr() which will try to rendezvous of all the cpus. This will result in C and A come to the rendezvous point and waiting for B. B is stuck forever waiting for the spinlock and thus not reaching the rendezvous point. Fix the clockevents code so that clockevents_lock is taken with interrupts disabled and thus avoid the above deadlock. Also call lapic_timer_propagate_broadcast() on the destination cpu so that we avoid calling smp_call_function() in the clockevents notifier chain. This issue left us wondering if we need to change the MTRR rendezvous logic to use stop machine logic (instead of smp_call_function) or add a check in spinlock debug code to see if there are other spinlocks which gets taken under both interrupts enabled/disabled conditions. Signed-off-by: Suresh Siddha Signed-off-by: Venkatesh Pallipadi Cc: "Pallipadi Venkatesh" Cc: "Brown Len" LKML-Reference: <1250544899.2709.210.camel@sbs-t61.sc.intel.com> Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process.c | 6 +----- drivers/acpi/processor_idle.c | 6 ++++-- kernel/time/clockevents.c | 16 ++++++++++------ kernel/time/tick-broadcast.c | 7 +++---- 4 files changed, 18 insertions(+), 17 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 994dd6a4a2a0..071166a4ba83 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -519,16 +519,12 @@ static void c1e_idle(void) if (!cpumask_test_cpu(cpu, c1e_mask)) { cpumask_set_cpu(cpu, c1e_mask); /* - * Force broadcast so ACPI can not interfere. Needs - * to run with interrupts enabled as it uses - * smp_function_call. + * Force broadcast so ACPI can not interfere. */ - local_irq_enable(); clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE, &cpu); printk(KERN_INFO "Switch to broadcast mode on CPU%d\n", cpu); - local_irq_disable(); } clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 0efa59e7e3af..66393d5c4c7c 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -162,8 +162,9 @@ static void lapic_timer_check_state(int state, struct acpi_processor *pr, pr->power.timer_broadcast_on_state = state; } -static void lapic_timer_propagate_broadcast(struct acpi_processor *pr) +static void lapic_timer_propagate_broadcast(void *arg) { + struct acpi_processor *pr = (struct acpi_processor *) arg; unsigned long reason; reason = pr->power.timer_broadcast_on_state < INT_MAX ? @@ -635,7 +636,8 @@ static int acpi_processor_power_verify(struct acpi_processor *pr) working++; } - lapic_timer_propagate_broadcast(pr); + smp_call_function_single(pr->id, lapic_timer_propagate_broadcast, + pr, 1); return (working); } diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index a6dcd67b041d..620b58abdc32 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -137,11 +137,12 @@ int clockevents_program_event(struct clock_event_device *dev, ktime_t expires, */ int clockevents_register_notifier(struct notifier_block *nb) { + unsigned long flags; int ret; - spin_lock(&clockevents_lock); + spin_lock_irqsave(&clockevents_lock, flags); ret = raw_notifier_chain_register(&clockevents_chain, nb); - spin_unlock(&clockevents_lock); + spin_unlock_irqrestore(&clockevents_lock, flags); return ret; } @@ -178,16 +179,18 @@ static void clockevents_notify_released(void) */ void clockevents_register_device(struct clock_event_device *dev) { + unsigned long flags; + BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); BUG_ON(!dev->cpumask); - spin_lock(&clockevents_lock); + spin_lock_irqsave(&clockevents_lock, flags); list_add(&dev->list, &clockevent_devices); clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev); clockevents_notify_released(); - spin_unlock(&clockevents_lock); + spin_unlock_irqrestore(&clockevents_lock, flags); } EXPORT_SYMBOL_GPL(clockevents_register_device); @@ -235,8 +238,9 @@ void clockevents_exchange_device(struct clock_event_device *old, void clockevents_notify(unsigned long reason, void *arg) { struct list_head *node, *tmp; + unsigned long flags; - spin_lock(&clockevents_lock); + spin_lock_irqsave(&clockevents_lock, flags); clockevents_do_notify(reason, arg); switch (reason) { @@ -251,7 +255,7 @@ void clockevents_notify(unsigned long reason, void *arg) default: break; } - spin_unlock(&clockevents_lock); + spin_unlock_irqrestore(&clockevents_lock, flags); } EXPORT_SYMBOL_GPL(clockevents_notify); #endif diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 877dbedc3118..c2ec25087a35 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -205,11 +205,11 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev) * Powerstate information: The system enters/leaves a state, where * affected devices might stop */ -static void tick_do_broadcast_on_off(void *why) +static void tick_do_broadcast_on_off(unsigned long *reason) { struct clock_event_device *bc, *dev; struct tick_device *td; - unsigned long flags, *reason = why; + unsigned long flags; int cpu, bc_stopped; spin_lock_irqsave(&tick_broadcast_lock, flags); @@ -276,8 +276,7 @@ void tick_broadcast_on_off(unsigned long reason, int *oncpu) printk(KERN_ERR "tick-broadcast: ignoring broadcast for " "offline CPU #%d\n", *oncpu); else - smp_call_function_single(*oncpu, tick_do_broadcast_on_off, - &reason, 1); + tick_do_broadcast_on_off(&reason); } /* -- cgit v1.2.3 From 5416c2663517ebd0be0664c4d4ce3df0b116c059 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 17 Aug 2009 12:25:41 -0700 Subject: x86: make sure load_percpu_segment has no stackprotector load_percpu_segment() is used to set up the per-cpu segment registers, which are also used for -fstack-protector. Make sure that the load_percpu_segment() function doesn't have stackprotector enabled. [ Impact: allow percpu setup before calling stack-protected functions ] Signed-off-by: Jeremy Fitzhardinge --- arch/x86/kernel/cpu/Makefile | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 4e242f9a06e4..8b5b9b625ede 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -7,6 +7,10 @@ ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_common.o = -pg endif +# Make sure load_percpu_segment has no stackprotector +nostackp := $(call cc-option, -fno-stack-protector) +CFLAGS_common.o := $(nostackp) + obj-y := intel_cacheinfo.o addon_cpuid_features.o obj-y += proc.o capflags.o powerflags.o common.o obj-y += vmware.o hypervisor.o -- cgit v1.2.3 From 83d349f35e1ae72268c5104dbf9ab2ae635425d4 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 21 Aug 2009 09:23:57 -0700 Subject: x86: don't send an IPI to the empty set of CPU's MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The default_send_IPI_mask_logical() function uses the "flat" APIC mode to send an IPI to a set of CPU's at once, but if that set happens to be empty, some older local APIC's will apparently be rather unhappy. So just warn if a caller gives us an empty mask, and ignore it. This fixes a regression in 2.6.30.x, due to commit 4595f9620 ("x86: change flush_tlb_others to take a const struct cpumask"), documented here: http://bugzilla.kernel.org/show_bug.cgi?id=13933 which causes a silent lock-up. It only seems to happen on PPro, P2, P3 and Athlon XP cores. Most developers sadly (or not so sadly, if you're a developer..) have more modern CPU's. Also, on x86-64 we don't use the flat APIC mode, so it would never trigger there even if the APIC didn't like sending an empty IPI mask. Reported-by: Pavel Vilim Reported-and-tested-by: Thomas Björnell Reported-and-tested-by: Martin Rogge Cc: Mike Travis Cc: Ingo Molnar Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- arch/x86/kernel/apic/ipi.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index dbf5445727a9..6ef00ba4c886 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c @@ -106,6 +106,9 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector) unsigned long mask = cpumask_bits(cpumask)[0]; unsigned long flags; + if (WARN_ONCE(!mask, "empty IPI mask")) + return; + local_irq_save(flags); WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); __default_send_IPI_dest_field(mask, vector, apic->dest_logical); -- cgit v1.2.3 From c62e43202e7cf50ca24bce58b255df7bf5de69d0 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 25 Aug 2009 14:50:53 +0100 Subject: x86: Fix build with older binutils and consolidate linker script binutils prior to 2.17 can't deal with the currently possible situation of a new segment following the per-CPU segment, but that new segment being empty - objcopy misplaces the .bss (and perhaps also the .brk) sections outside of any segment. However, the current ordering of sections really just appears to be the effect of cumulative unrelated changes; re-ordering things allows to easily guarantee that the segment following the per-CPU one is non-empty, and at once eliminates the need for the bogus data.init2 segment. Once touching this code, also use the various data section helper macros from include/asm-generic/vmlinux.lds.h. -v2: fix !SMP builds. Signed-off-by: Jan Beulich Cc: LKML-Reference: <4A94085D02000078000119A5@vpn.id2.novell.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/vmlinux.lds.S | 126 ++++++++++++++++-------------------------- 1 file changed, 47 insertions(+), 79 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 78d185d797de..9fc178255c04 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -46,11 +46,10 @@ PHDRS { data PT_LOAD FLAGS(7); /* RWE */ #ifdef CONFIG_X86_64 user PT_LOAD FLAGS(7); /* RWE */ - data.init PT_LOAD FLAGS(7); /* RWE */ #ifdef CONFIG_SMP percpu PT_LOAD FLAGS(7); /* RWE */ #endif - data.init2 PT_LOAD FLAGS(7); /* RWE */ + init PT_LOAD FLAGS(7); /* RWE */ #endif note PT_NOTE FLAGS(0); /* ___ */ } @@ -103,65 +102,43 @@ SECTIONS __stop___ex_table = .; } :text = 0x9090 - RODATA + RO_DATA(PAGE_SIZE) /* Data */ - . = ALIGN(PAGE_SIZE); .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Start of data section */ _sdata = .; - DATA_DATA - CONSTRUCTORS - } :data + + /* init_task */ + INIT_TASK_DATA(THREAD_SIZE) #ifdef CONFIG_X86_32 - /* 32 bit has nosave before _edata */ - . = ALIGN(PAGE_SIZE); - .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { - __nosave_begin = .; - *(.data.nosave) - . = ALIGN(PAGE_SIZE); - __nosave_end = .; - } + /* 32 bit has nosave before _edata */ + NOSAVE_DATA #endif - . = ALIGN(PAGE_SIZE); - .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { - *(.data.page_aligned) + PAGE_ALIGNED_DATA(PAGE_SIZE) *(.data.idt) - } -#ifdef CONFIG_X86_32 - . = ALIGN(32); -#else - . = ALIGN(PAGE_SIZE); - . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); -#endif - .data.cacheline_aligned : - AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { - *(.data.cacheline_aligned) - } + CACHELINE_ALIGNED_DATA(CONFIG_X86_L1_CACHE_BYTES) - /* rarely changed data like cpu maps */ -#ifdef CONFIG_X86_32 - . = ALIGN(32); -#else - . = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES); -#endif - .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { - *(.data.read_mostly) + DATA_DATA + CONSTRUCTORS + + /* rarely changed data like cpu maps */ + READ_MOSTLY_DATA(CONFIG_X86_INTERNODE_CACHE_BYTES) /* End of data section */ _edata = .; - } + } :data #ifdef CONFIG_X86_64 #define VSYSCALL_ADDR (-10*1024*1024) -#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data.read_mostly) + \ - SIZEOF(.data.read_mostly) + 4095) & ~(4095)) -#define VSYSCALL_VIRT_ADDR ((ADDR(.data.read_mostly) + \ - SIZEOF(.data.read_mostly) + 4095) & ~(4095)) +#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data) + SIZEOF(.data) + \ + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1)) +#define VSYSCALL_VIRT_ADDR ((ADDR(.data) + SIZEOF(.data) + \ + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1)) #define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR) #define VLOAD(x) (ADDR(x) - VLOAD_OFFSET) @@ -227,35 +204,29 @@ SECTIONS #endif /* CONFIG_X86_64 */ - /* init_task */ - . = ALIGN(THREAD_SIZE); - .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { - *(.data.init_task) + /* Init code and data - will be freed after init */ + . = ALIGN(PAGE_SIZE); + .init.begin : AT(ADDR(.init.begin) - LOAD_OFFSET) { + __init_begin = .; /* paired with __init_end */ } -#ifdef CONFIG_X86_64 - :data.init -#endif +#if defined(CONFIG_X86_64) && defined(CONFIG_SMP) /* - * smp_locks might be freed after init - * start/end must be page aligned + * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the + * output PHDR, so the next output section - .init.text - should + * start another segment - init. */ - . = ALIGN(PAGE_SIZE); - .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { - __smp_locks = .; - *(.smp_locks) - __smp_locks_end = .; - . = ALIGN(PAGE_SIZE); - } + PERCPU_VADDR(0, :percpu) +#endif - /* Init code and data - will be freed after init */ - . = ALIGN(PAGE_SIZE); .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { - __init_begin = .; /* paired with __init_end */ _sinittext = .; INIT_TEXT _einittext = .; } +#ifdef CONFIG_X86_64 + :init +#endif .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { INIT_DATA @@ -326,17 +297,7 @@ SECTIONS } #endif -#if defined(CONFIG_X86_64) && defined(CONFIG_SMP) - /* - * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the - * output PHDR, so the next output section - __data_nosave - should - * start another section data.init2. Also, pda should be at the head of - * percpu area. Preallocate it and define the percpu offset symbol - * so that it can be accessed as a percpu variable. - */ - . = ALIGN(PAGE_SIZE); - PERCPU_VADDR(0, :percpu) -#else +#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP) PERCPU(PAGE_SIZE) #endif @@ -347,15 +308,22 @@ SECTIONS __init_end = .; } + /* + * smp_locks might be freed after init + * start/end must be page aligned + */ + . = ALIGN(PAGE_SIZE); + .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { + __smp_locks = .; + *(.smp_locks) + __smp_locks_end = .; + . = ALIGN(PAGE_SIZE); + } + #ifdef CONFIG_X86_64 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { - . = ALIGN(PAGE_SIZE); - __nosave_begin = .; - *(.data.nosave) - . = ALIGN(PAGE_SIZE); - __nosave_end = .; - } :data.init2 - /* use another section data.init2, see PERCPU_VADDR() above */ + NOSAVE_DATA + } #endif /* BSS */ -- cgit v1.2.3 From 295594e9cf6ae2efd73371777aa8feba0f87f42f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 25 Aug 2009 13:44:44 -0700 Subject: x86: Fix vSMP boot crash 2.6.31-rc7 does not boot on vSMP systems: [ 8.501108] CPU31: Thermal monitoring enabled (TM1) [ 8.501127] CPU 31 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8 [ 8.650254] CPU31: Intel(R) Xeon(R) CPU E5540 @ 2.53GHz stepping 04 [ 8.710324] Brought up 32 CPUs [ 8.713916] Total of 32 processors activated (162314.96 BogoMIPS). [ 8.721489] ERROR: parent span is not a superset of domain->span [ 8.727686] ERROR: domain->groups does not contain CPU0 [ 8.733091] ERROR: groups don't span domain->span [ 8.737975] ERROR: domain->cpu_power not set [ 8.742416] Ravikiran Thirumalai bisected it to: | commit 2759c3287de27266e06f1f4e82cbd2d65f6a044c | x86: don't call read_apic_id if !cpu_has_apic The problem is that on vSMP systems the CPUID derived initial-APICIDs are overlapping - so we need to fall back on hard_smp_processor_id() which reads the local APIC. Both come from the hardware (influenced by firmware though) so it's a tough call which one to trust. Doing the quirk expresses the vSMP property properly and also does not affect other systems, so we go for this solution instead of a revert. Reported-and-Tested-by: Ravikiran Thirumalai Signed-off-by: Yinghai Lu Cc: Linus Torvalds Cc: Cyrill Gorcunov Cc: Shai Fultheim Cc: Suresh Siddha LKML-Reference: <4A944D3C.5030100@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/probe_64.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index bc3e880f9b82..fcec2f1d34a1 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -44,6 +44,11 @@ static struct apic *apic_probe[] __initdata = { NULL, }; +static int apicid_phys_pkg_id(int initial_apic_id, int index_msb) +{ + return hard_smp_processor_id() >> index_msb; +} + /* * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. */ @@ -69,6 +74,11 @@ void __init default_setup_apic_routing(void) printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); } + if (is_vsmp_box()) { + /* need to update phys_pkg_id */ + apic->phys_pkg_id = apicid_phys_pkg_id; + } + /* * Now that apic routing model is selected, configure the * fault handling for intr remapping. -- cgit v1.2.3