From baa676fcf8d555269bd0a5a2496782beee55824d Mon Sep 17 00:00:00 2001 From: Andrzej Pietrasiewicz Date: Tue, 27 Mar 2012 14:28:18 +0200 Subject: X86 & IA64: adapt for dma_map_ops changes Adapt core x86 and IA64 architecture code for dma_map_ops changes: replace alloc/free_coherent with generic alloc/free methods. Signed-off-by: Andrzej Pietrasiewicz Acked-by: Kyungmin Park [removed swiotlb related changes and replaced it with wrappers, merged with IA64 patch to avoid inter-patch dependences in intel-iommu code] Signed-off-by: Marek Szyprowski Reviewed-by: Arnd Bergmann Acked-by: Tony Luck --- arch/x86/include/asm/dma-mapping.h | 26 ++++++++++++++++---------- arch/x86/kernel/amd_gart_64.c | 11 ++++++----- arch/x86/kernel/pci-calgary_64.c | 9 +++++---- arch/x86/kernel/pci-dma.c | 3 ++- arch/x86/kernel/pci-nommu.c | 6 +++--- arch/x86/kernel/pci-swiotlb.c | 17 +++++++++++++---- arch/x86/xen/pci-swiotlb-xen.c | 4 ++-- 7 files changed, 47 insertions(+), 29 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index ed3065fd6314..4b4331d71935 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -59,7 +59,8 @@ extern int dma_supported(struct device *hwdev, u64 mask); extern int dma_set_mask(struct device *dev, u64 mask); extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_addr, gfp_t flag); + dma_addr_t *dma_addr, gfp_t flag, + struct dma_attrs *attrs); static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) { @@ -111,9 +112,11 @@ static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp) return gfp; } +#define dma_alloc_coherent(d,s,h,f) dma_alloc_attrs(d,s,h,f,NULL) + static inline void * -dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp) +dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t gfp, struct dma_attrs *attrs) { struct dma_map_ops *ops = get_dma_ops(dev); void *memory; @@ -129,18 +132,21 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, if (!is_device_dma_capable(dev)) return NULL; - if (!ops->alloc_coherent) + if (!ops->alloc) return NULL; - memory = ops->alloc_coherent(dev, size, dma_handle, - dma_alloc_coherent_gfp_flags(dev, gfp)); + memory = ops->alloc(dev, size, dma_handle, + dma_alloc_coherent_gfp_flags(dev, gfp), attrs); debug_dma_alloc_coherent(dev, size, *dma_handle, memory); return memory; } -static inline void dma_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t bus) +#define dma_free_coherent(d,s,c,h) dma_free_attrs(d,s,c,h,NULL) + +static inline void dma_free_attrs(struct device *dev, size_t size, + void *vaddr, dma_addr_t bus, + struct dma_attrs *attrs) { struct dma_map_ops *ops = get_dma_ops(dev); @@ -150,8 +156,8 @@ static inline void dma_free_coherent(struct device *dev, size_t size, return; debug_dma_free_coherent(dev, size, vaddr, bus); - if (ops->free_coherent) - ops->free_coherent(dev, size, vaddr, bus); + if (ops->free) + ops->free(dev, size, vaddr, bus, attrs); } #endif diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index b1e7c7f7a0af..e66311200cbd 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -477,7 +477,7 @@ error: /* allocate and map a coherent mapping */ static void * gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, - gfp_t flag) + gfp_t flag, struct dma_attrs *attrs) { dma_addr_t paddr; unsigned long align_mask; @@ -500,7 +500,8 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, } __free_pages(page, get_order(size)); } else - return dma_generic_alloc_coherent(dev, size, dma_addr, flag); + return dma_generic_alloc_coherent(dev, size, dma_addr, flag, + attrs); return NULL; } @@ -508,7 +509,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, /* free a coherent mapping */ static void gart_free_coherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_addr) + dma_addr_t dma_addr, struct dma_attrs *attrs) { gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, NULL); free_pages((unsigned long)vaddr, get_order(size)); @@ -700,8 +701,8 @@ static struct dma_map_ops gart_dma_ops = { .unmap_sg = gart_unmap_sg, .map_page = gart_map_page, .unmap_page = gart_unmap_page, - .alloc_coherent = gart_alloc_coherent, - .free_coherent = gart_free_coherent, + .alloc = gart_alloc_coherent, + .free = gart_free_coherent, .mapping_error = gart_mapping_error, }; diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 726494b58345..07b587c5a2d2 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -431,7 +431,7 @@ static void calgary_unmap_page(struct device *dev, dma_addr_t dma_addr, } static void* calgary_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag) + dma_addr_t *dma_handle, gfp_t flag, struct dma_attrs *attrs) { void *ret = NULL; dma_addr_t mapping; @@ -464,7 +464,8 @@ error: } static void calgary_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle) + void *vaddr, dma_addr_t dma_handle, + struct dma_attrs *attrs) { unsigned int npages; struct iommu_table *tbl = find_iommu_table(dev); @@ -477,8 +478,8 @@ static void calgary_free_coherent(struct device *dev, size_t size, } static struct dma_map_ops calgary_dma_ops = { - .alloc_coherent = calgary_alloc_coherent, - .free_coherent = calgary_free_coherent, + .alloc = calgary_alloc_coherent, + .free = calgary_free_coherent, .map_sg = calgary_map_sg, .unmap_sg = calgary_unmap_sg, .map_page = calgary_map_page, diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 1c4d769e21ea..75e1cc19e630 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -96,7 +96,8 @@ void __init pci_iommu_alloc(void) } } void *dma_generic_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_addr, gfp_t flag) + dma_addr_t *dma_addr, gfp_t flag, + struct dma_attrs *attrs) { unsigned long dma_mask; struct page *page; diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 3af4af810c07..f96050685b46 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -75,7 +75,7 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, } static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_addr) + dma_addr_t dma_addr, struct dma_attrs *attrs) { free_pages((unsigned long)vaddr, get_order(size)); } @@ -96,8 +96,8 @@ static void nommu_sync_sg_for_device(struct device *dev, } struct dma_map_ops nommu_dma_ops = { - .alloc_coherent = dma_generic_alloc_coherent, - .free_coherent = nommu_free_coherent, + .alloc = dma_generic_alloc_coherent, + .free = nommu_free_coherent, .map_sg = nommu_map_sg, .map_page = nommu_map_page, .sync_single_for_device = nommu_sync_single_for_device, diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 8f972cbddef0..6c483ba98b9c 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -15,21 +15,30 @@ int swiotlb __read_mostly; static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, gfp_t flags) + dma_addr_t *dma_handle, gfp_t flags, + struct dma_attrs *attrs) { void *vaddr; - vaddr = dma_generic_alloc_coherent(hwdev, size, dma_handle, flags); + vaddr = dma_generic_alloc_coherent(hwdev, size, dma_handle, flags, + attrs); if (vaddr) return vaddr; return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags); } +static void x86_swiotlb_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_addr, + struct dma_attrs *attrs) +{ + swiotlb_free_coherent(dev, size, vaddr, dma_addr); +} + static struct dma_map_ops swiotlb_dma_ops = { .mapping_error = swiotlb_dma_mapping_error, - .alloc_coherent = x86_swiotlb_alloc_coherent, - .free_coherent = swiotlb_free_coherent, + .alloc = x86_swiotlb_alloc_coherent, + .free = x86_swiotlb_free_coherent, .sync_single_for_cpu = swiotlb_sync_single_for_cpu, .sync_single_for_device = swiotlb_sync_single_for_device, .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c index b480d4207a4c..967633ad98c4 100644 --- a/arch/x86/xen/pci-swiotlb-xen.c +++ b/arch/x86/xen/pci-swiotlb-xen.c @@ -12,8 +12,8 @@ int xen_swiotlb __read_mostly; static struct dma_map_ops xen_swiotlb_dma_ops = { .mapping_error = xen_swiotlb_dma_mapping_error, - .alloc_coherent = xen_swiotlb_alloc_coherent, - .free_coherent = xen_swiotlb_free_coherent, + .alloc = xen_swiotlb_alloc_coherent, + .free = xen_swiotlb_free_coherent, .sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu, .sync_single_for_device = xen_swiotlb_sync_single_for_device, .sync_sg_for_cpu = xen_swiotlb_sync_sg_for_cpu, -- cgit v1.2.1 From 5f054e31c63be774bf1ce252f20d56012a00f8a5 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Thu, 29 Mar 2012 15:38:31 +1030 Subject: documentation: remove references to cpu_*_map. This has been obsolescent for a while, fix documentation and misc comments. Signed-off-by: Rusty Russell --- arch/x86/xen/enlighten.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index b132ade26f77..4f51bebac02c 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -967,7 +967,7 @@ void xen_setup_shared_info(void) xen_setup_mfn_list_list(); } -/* This is called once we have the cpu_possible_map */ +/* This is called once we have the cpu_possible_mask */ void xen_setup_vcpu_info_placement(void) { int cpu; -- cgit v1.2.1 From 99dd5497e5be4fe4194cad181d45fd6569a930db Mon Sep 17 00:00:00 2001 From: "Liu, Chuansheng" Date: Mon, 26 Mar 2012 07:11:50 +0000 Subject: x86: Preserve lazy irq disable semantics in fixup_irqs() The default irq_disable() sematics are to mark the interrupt disabled, but keep it unmasked. If the interrupt is delivered while marked disabled, the low level interrupt handler masks it and marks it pending. This is important for detecting wakeup interrupts during suspend and for edge type interrupts to avoid losing interrupts. fixup_irqs() moves the interrupts away from an offlined cpu. For certain interrupt types it needs to mask the interrupt line before changing the affinity. After affinity has changed the interrupt line is unmasked again, but only if it is not marked disabled. This breaks the lazy irq disable semantics and causes problems in suspend as the interrupt can be lost or wakeup functionality is broken. Check irqd_irq_masked() instead of irqd_irq_disabled() because irqd_irq_masked() is only set, when the core code actually masked the interrupt line. If it's not set, we unmask the interrupt and let the lazy irq disable logic deal with an eventually incoming interrupt. [ tglx: Massaged changelog and added a comment ] Signed-off-by: liu chuansheng Cc: Yanmin Zhang Link: http://lkml.kernel.org/r/27240C0AC20F114CBF8149A2696CBE4A05DFB3@SHSMSX101.ccr.corp.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/irq.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 7943e0c21bde..3dafc6003b7c 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -282,8 +282,13 @@ void fixup_irqs(void) else if (!(warned++)) set_affinity = 0; + /* + * We unmask if the irq was not marked masked by the + * core code. That respects the lazy irq disable + * behaviour. + */ if (!irqd_can_move_in_process_context(data) && - !irqd_irq_disabled(data) && chip->irq_unmask) + !irqd_irq_masked(data) && chip->irq_unmask) chip->irq_unmask(data); raw_spin_unlock(&desc->lock); -- cgit v1.2.1 From 1d24fb3684f347226747c6b11ea426b7b992694e Mon Sep 17 00:00:00 2001 From: "zhuangfeiran@ict.ac.cn" Date: Wed, 28 Mar 2012 23:27:00 +0000 Subject: x86 bpf_jit: fix a bug in emitting the 16-bit immediate operand of AND When K >= 0xFFFF0000, AND needs the two least significant bytes of K as its operand, but EMIT2() gives it the least significant byte of K and 0x2. EMIT() should be used here to replace EMIT2(). Signed-off-by: Feiran Zhuang Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- arch/x86/net/bpf_jit_comp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 5671752f8d9c..5a5b6e4dd738 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -289,7 +289,7 @@ void bpf_jit_compile(struct sk_filter *fp) EMIT2(0x24, K & 0xFF); /* and imm8,%al */ } else if (K >= 0xFFFF0000) { EMIT2(0x66, 0x25); /* and imm16,%ax */ - EMIT2(K, 2); + EMIT(K, 2); } else { EMIT1_off32(0x25, K); /* and imm32,%eax */ } -- cgit v1.2.1 From 3751d3e85cf693e10e2c47c03c8caa65e171099b Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Fri, 23 Mar 2012 09:35:05 -0500 Subject: x86,kgdb: Fix DEBUG_RODATA limitation using text_poke() There has long been a limitation using software breakpoints with a kernel compiled with CONFIG_DEBUG_RODATA going back to 2.6.26. For this particular patch, it will apply cleanly and has been tested all the way back to 2.6.36. The kprobes code uses the text_poke() function which accommodates writing a breakpoint into a read-only page. The x86 kgdb code can solve the problem similarly by overriding the default breakpoint set/remove routines and using text_poke() directly. The x86 kgdb code will first attempt to use the traditional probe_kernel_write(), and next try using a the text_poke() function. The break point install method is tracked such that the correct break point removal routine will get called later on. Cc: x86@kernel.org Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Cc: stable@vger.kernel.org # >= 2.6.36 Inspried-by: Masami Hiramatsu Signed-off-by: Jason Wessel --- arch/x86/kernel/kgdb.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index fdc37b3d0ce3..b9bd9d8de665 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -43,6 +43,8 @@ #include #include #include +#include +#include #include #include @@ -742,6 +744,64 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip) regs->ip = ip; } +int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt) +{ + int err; + char opc[BREAK_INSTR_SIZE]; + + bpt->type = BP_BREAKPOINT; + err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr, + BREAK_INSTR_SIZE); + if (err) + return err; + err = probe_kernel_write((char *)bpt->bpt_addr, + arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE); +#ifdef CONFIG_DEBUG_RODATA + if (!err) + return err; + /* + * It is safe to call text_poke() because normal kernel execution + * is stopped on all cores, so long as the text_mutex is not locked. + */ + if (mutex_is_locked(&text_mutex)) + return -EBUSY; + text_poke((void *)bpt->bpt_addr, arch_kgdb_ops.gdb_bpt_instr, + BREAK_INSTR_SIZE); + err = probe_kernel_read(opc, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE); + if (err) + return err; + if (memcmp(opc, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE)) + return -EINVAL; + bpt->type = BP_POKE_BREAKPOINT; +#endif /* CONFIG_DEBUG_RODATA */ + return err; +} + +int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt) +{ +#ifdef CONFIG_DEBUG_RODATA + int err; + char opc[BREAK_INSTR_SIZE]; + + if (bpt->type != BP_POKE_BREAKPOINT) + goto knl_write; + /* + * It is safe to call text_poke() because normal kernel execution + * is stopped on all cores, so long as the text_mutex is not locked. + */ + if (mutex_is_locked(&text_mutex)) + goto knl_write; + text_poke((void *)bpt->bpt_addr, bpt->saved_instr, BREAK_INSTR_SIZE); + err = probe_kernel_read(opc, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE); + if (err || memcmp(opc, bpt->saved_instr, BREAK_INSTR_SIZE)) + goto knl_write; + return err; +knl_write: +#endif /* CONFIG_DEBUG_RODATA */ + return probe_kernel_write((char *)bpt->bpt_addr, + (char *)bpt->saved_instr, BREAK_INSTR_SIZE); +} + struct kgdb_arch arch_kgdb_ops = { /* Breakpoint instruction: */ .gdb_bpt_instr = { 0xcc }, -- cgit v1.2.1 From f6365201d8a21fb347260f89d6e9b3e718d63c70 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 29 Mar 2012 14:49:17 -0700 Subject: x86: Remove the ancient and deprecated disable_hlt() and enable_hlt() facility The X86_32-only disable_hlt/enable_hlt mechanism was used by the 32-bit floppy driver. Its effect was to replace the use of the HLT instruction inside default_idle() with cpu_relax() - essentially it turned off the use of HLT. This workaround was commented in the code as: "disable hlt during certain critical i/o operations" "This halt magic was a workaround for ancient floppy DMA wreckage. It should be safe to remove." H. Peter Anvin additionally adds: "To the best of my knowledge, no-hlt only existed because of flaky power distributions on 386/486 systems which were sold to run DOS. Since DOS did no power management of any kind, including HLT, the power draw was fairly uniform; when exposed to the much hhigher noise levels you got when Linux used HLT caused some of these systems to fail. They were by far in the minority even back then." Alan Cox further says: "Also for the Cyrix 5510 which tended to go castors up if a HLT occurred during a DMA cycle and on a few other boxes HLT during DMA tended to go astray. Do we care ? I doubt it. The 5510 was pretty obscure, the 5520 fixed it, the 5530 is probably the oldest still in any kind of use." So, let's finally drop this. Signed-off-by: Len Brown Signed-off-by: Josh Boyer Signed-off-by: Andrew Morton Acked-by: "H. Peter Anvin" Acked-by: Alan Cox Cc: Stephen Hemminger Cc: Link: http://lkml.kernel.org/n/tip-3rhk9bzf0x9rljkv488tloib@git.kernel.org [ If anyone cares then alternative instruction patching could be used to replace HLT with a one-byte NOP instruction. Much simpler. ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 10 ---------- arch/x86/kernel/process.c | 24 ------------------------ 2 files changed, 34 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 7284c9a6a0b5..4fa7dcceb6c0 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -974,16 +974,6 @@ extern bool cpu_has_amd_erratum(const int *); #define cpu_has_amd_erratum(x) (false) #endif /* CONFIG_CPU_SUP_AMD */ -#ifdef CONFIG_X86_32 -/* - * disable hlt during certain critical i/o operations - */ -#define HAVE_DISABLE_HLT -#endif - -void disable_hlt(void); -void enable_hlt(void); - void cpu_idle_wait(void); extern unsigned long arch_align_stack(unsigned long sp); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index a33afaa5ddb7..1d92a5ab6e8b 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -362,34 +362,10 @@ void (*pm_idle)(void); EXPORT_SYMBOL(pm_idle); #endif -#ifdef CONFIG_X86_32 -/* - * This halt magic was a workaround for ancient floppy DMA - * wreckage. It should be safe to remove. - */ -static int hlt_counter; -void disable_hlt(void) -{ - hlt_counter++; -} -EXPORT_SYMBOL(disable_hlt); - -void enable_hlt(void) -{ - hlt_counter--; -} -EXPORT_SYMBOL(enable_hlt); - -static inline int hlt_use_halt(void) -{ - return (!hlt_counter && boot_cpu_data.hlt_works_ok); -} -#else static inline int hlt_use_halt(void) { return 1; } -#endif #ifndef CONFIG_SMP static inline void play_dead(void) -- cgit v1.2.1 From c0e9afc0da6cb0f11497e5ea83377b3c451450e0 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 28 Mar 2012 11:51:17 -0700 Subject: x86: Use -mno-avx when available On gccs that support AVX it's a good idea to disable that too, similar to how SSE2, SSE1 etc. are already disabled. This prevents the compiler from generating AVX ever implicitely. No failure observed, just from review. [ hpa: Marking this for urgent and stable, simply because the patch will either have absolutely no effect *or* it will avoid potentially very hard to debug failures. ] Signed-off-by: Andi Kleen Link: http://lkml.kernel.org/r/1332960678-11879-1-git-send-email-andi@firstfloor.org Signed-off-by: H. Peter Anvin Cc: --- arch/x86/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 968dbe24a255..41a7237606a3 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -129,6 +129,7 @@ KBUILD_CFLAGS += -Wno-sign-compare KBUILD_CFLAGS += -fno-asynchronous-unwind-tables # prevent gcc from generating any FP code by mistake KBUILD_CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,) +KBUILD_CFLAGS += $(call cc-option,-mno-avx,) KBUILD_CFLAGS += $(mflags-y) KBUILD_AFLAGS += $(mflags-y) -- cgit v1.2.1 From dba69d1092e291e257fb5673a3ad0e4c87878ebc Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Sun, 1 Apr 2012 13:53:36 -0300 Subject: x86, kvm: Call restore_sched_clock_state() only after %gs is initialized s2ram broke due to this KVM commit: b74f05d61b73 x86: kvmclock: abstract save/restore sched_clock_state restore_sched_clock_state() methods use percpu data, therefore they must run after %gs is initialized, but before mtrr_bp_restore() (due to lockstat using sched_clock). Move it to the correct place. Reported-and-tested-by: Konstantin Khlebnikov Signed-off-by: Marcelo Tosatti Cc: Avi Kivity Signed-off-by: Ingo Molnar --- arch/x86/power/cpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 47936830968c..218cdb16163c 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -225,13 +225,13 @@ static void __restore_processor_state(struct saved_context *ctxt) fix_processor_context(); do_fpu_end(); + x86_platform.restore_sched_clock_state(); mtrr_bp_restore(); } /* Needed by apm.c */ void restore_processor_state(void) { - x86_platform.restore_sched_clock_state(); __restore_processor_state(&saved_context); } #ifdef CONFIG_X86_32 -- cgit v1.2.1 From 7b8e6da46b921d30ac1553cac56d8fb74f0b431d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 27 Mar 2012 16:50:42 +0200 Subject: perf/x86/p4: Add format attributes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Steven reported his P4 not booting properly, the missing format attributes cause a NULL ptr deref. Cure this by adding the missing format specification. I took the format description out of the comment near p4_config_pack*() and hope that comment is still relatively accurate. Reported-by: Steven Rostedt Reported-by: Bruno Prémont Tested-by: Steven Rostedt Signed-off-by: Peter Zijlstra Cc: Jiri Olsa Cc: Cyrill Gorcunov Cc: Lin Ming Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1332859842.16159.227.camel@twins Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_p4.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index ef484d9d0a25..a2dfacfd7103 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -1271,6 +1271,17 @@ done: return num ? -EINVAL : 0; } +PMU_FORMAT_ATTR(cccr, "config:0-31" ); +PMU_FORMAT_ATTR(escr, "config:32-62"); +PMU_FORMAT_ATTR(ht, "config:63" ); + +static struct attribute *intel_p4_formats_attr[] = { + &format_attr_cccr.attr, + &format_attr_escr.attr, + &format_attr_ht.attr, + NULL, +}; + static __initconst const struct x86_pmu p4_pmu = { .name = "Netburst P4/Xeon", .handle_irq = p4_pmu_handle_irq, @@ -1305,6 +1316,8 @@ static __initconst const struct x86_pmu p4_pmu = { * the former idea is taken from OProfile code */ .perfctr_second_write = 1, + + .format_attrs = intel_p4_formats_attr, }; __init int p4_pmu_init(void) -- cgit v1.2.1 From a998d4342337c82dacdc0897d30a9364de1576a1 Mon Sep 17 00:00:00 2001 From: Jan Seiffert Date: Fri, 30 Mar 2012 05:24:05 +0000 Subject: bpf jit: Let the x86 jit handle negative offsets Now the helper function from filter.c for negative offsets is exported, it can be used it in the jit to handle negative offsets. First modify the asm load helper functions to handle: - know positive offsets - know negative offsets - any offset then the compiler can be modified to explicitly use these helper when appropriate. This fixes the case of a negative X register and allows to lift the restriction that bpf programs with negative offsets can't be jited. Signed-of-by: Jan Seiffert Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- arch/x86/net/bpf_jit.S | 122 +++++++++++++++++++++++++++++++++----------- arch/x86/net/bpf_jit_comp.c | 41 +++++++++------ 2 files changed, 115 insertions(+), 48 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S index 66870223f8c5..877b9a1b2152 100644 --- a/arch/x86/net/bpf_jit.S +++ b/arch/x86/net/bpf_jit.S @@ -18,17 +18,17 @@ * r9d : hlen = skb->len - skb->data_len */ #define SKBDATA %r8 - -sk_load_word_ind: - .globl sk_load_word_ind - - add %ebx,%esi /* offset += X */ -# test %esi,%esi /* if (offset < 0) goto bpf_error; */ - js bpf_error +#define SKF_MAX_NEG_OFF $(-0x200000) /* SKF_LL_OFF from filter.h */ sk_load_word: .globl sk_load_word + test %esi,%esi + js bpf_slow_path_word_neg + +sk_load_word_positive_offset: + .globl sk_load_word_positive_offset + mov %r9d,%eax # hlen sub %esi,%eax # hlen - offset cmp $3,%eax @@ -37,16 +37,15 @@ sk_load_word: bswap %eax /* ntohl() */ ret - -sk_load_half_ind: - .globl sk_load_half_ind - - add %ebx,%esi /* offset += X */ - js bpf_error - sk_load_half: .globl sk_load_half + test %esi,%esi + js bpf_slow_path_half_neg + +sk_load_half_positive_offset: + .globl sk_load_half_positive_offset + mov %r9d,%eax sub %esi,%eax # hlen - offset cmp $1,%eax @@ -55,14 +54,15 @@ sk_load_half: rol $8,%ax # ntohs() ret -sk_load_byte_ind: - .globl sk_load_byte_ind - add %ebx,%esi /* offset += X */ - js bpf_error - sk_load_byte: .globl sk_load_byte + test %esi,%esi + js bpf_slow_path_byte_neg + +sk_load_byte_positive_offset: + .globl sk_load_byte_positive_offset + cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte */ jle bpf_slow_path_byte movzbl (SKBDATA,%rsi),%eax @@ -73,25 +73,21 @@ sk_load_byte: * * Implements BPF_S_LDX_B_MSH : ldxb 4*([offset]&0xf) * Must preserve A accumulator (%eax) - * Inputs : %esi is the offset value, already known positive + * Inputs : %esi is the offset value */ -ENTRY(sk_load_byte_msh) - CFI_STARTPROC +sk_load_byte_msh: + .globl sk_load_byte_msh + test %esi,%esi + js bpf_slow_path_byte_msh_neg + +sk_load_byte_msh_positive_offset: + .globl sk_load_byte_msh_positive_offset cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte_msh */ jle bpf_slow_path_byte_msh movzbl (SKBDATA,%rsi),%ebx and $15,%bl shl $2,%bl ret - CFI_ENDPROC -ENDPROC(sk_load_byte_msh) - -bpf_error: -# force a return 0 from jit handler - xor %eax,%eax - mov -8(%rbp),%rbx - leaveq - ret /* rsi contains offset and can be scratched */ #define bpf_slow_path_common(LEN) \ @@ -138,3 +134,67 @@ bpf_slow_path_byte_msh: shl $2,%al xchg %eax,%ebx ret + +#define sk_negative_common(SIZE) \ + push %rdi; /* save skb */ \ + push %r9; \ + push SKBDATA; \ +/* rsi already has offset */ \ + mov $SIZE,%ecx; /* size */ \ + call bpf_internal_load_pointer_neg_helper; \ + test %rax,%rax; \ + pop SKBDATA; \ + pop %r9; \ + pop %rdi; \ + jz bpf_error + + +bpf_slow_path_word_neg: + cmp SKF_MAX_NEG_OFF, %esi /* test range */ + jl bpf_error /* offset lower -> error */ +sk_load_word_negative_offset: + .globl sk_load_word_negative_offset + sk_negative_common(4) + mov (%rax), %eax + bswap %eax + ret + +bpf_slow_path_half_neg: + cmp SKF_MAX_NEG_OFF, %esi + jl bpf_error +sk_load_half_negative_offset: + .globl sk_load_half_negative_offset + sk_negative_common(2) + mov (%rax),%ax + rol $8,%ax + movzwl %ax,%eax + ret + +bpf_slow_path_byte_neg: + cmp SKF_MAX_NEG_OFF, %esi + jl bpf_error +sk_load_byte_negative_offset: + .globl sk_load_byte_negative_offset + sk_negative_common(1) + movzbl (%rax), %eax + ret + +bpf_slow_path_byte_msh_neg: + cmp SKF_MAX_NEG_OFF, %esi + jl bpf_error +sk_load_byte_msh_negative_offset: + .globl sk_load_byte_msh_negative_offset + xchg %eax,%ebx /* dont lose A , X is about to be scratched */ + sk_negative_common(1) + movzbl (%rax),%eax + and $15,%al + shl $2,%al + xchg %eax,%ebx + ret + +bpf_error: +# force a return 0 from jit handler + xor %eax,%eax + mov -8(%rbp),%rbx + leaveq + ret diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 5a5b6e4dd738..0597f95b6da6 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -30,7 +30,10 @@ int bpf_jit_enable __read_mostly; * assembly code in arch/x86/net/bpf_jit.S */ extern u8 sk_load_word[], sk_load_half[], sk_load_byte[], sk_load_byte_msh[]; -extern u8 sk_load_word_ind[], sk_load_half_ind[], sk_load_byte_ind[]; +extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[]; +extern u8 sk_load_byte_positive_offset[], sk_load_byte_msh_positive_offset[]; +extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[]; +extern u8 sk_load_byte_negative_offset[], sk_load_byte_msh_negative_offset[]; static inline u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) { @@ -117,6 +120,8 @@ static inline void bpf_flush_icache(void *start, void *end) set_fs(old_fs); } +#define CHOOSE_LOAD_FUNC(K, func) \ + ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset) void bpf_jit_compile(struct sk_filter *fp) { @@ -473,44 +478,46 @@ void bpf_jit_compile(struct sk_filter *fp) #endif break; case BPF_S_LD_W_ABS: - func = sk_load_word; + func = CHOOSE_LOAD_FUNC(K, sk_load_word); common_load: seen |= SEEN_DATAREF; - if ((int)K < 0) { - /* Abort the JIT because __load_pointer() is needed. */ - goto out; - } t_offset = func - (image + addrs[i]); EMIT1_off32(0xbe, K); /* mov imm32,%esi */ EMIT1_off32(0xe8, t_offset); /* call */ break; case BPF_S_LD_H_ABS: - func = sk_load_half; + func = CHOOSE_LOAD_FUNC(K, sk_load_half); goto common_load; case BPF_S_LD_B_ABS: - func = sk_load_byte; + func = CHOOSE_LOAD_FUNC(K, sk_load_byte); goto common_load; case BPF_S_LDX_B_MSH: - if ((int)K < 0) { - /* Abort the JIT because __load_pointer() is needed. */ - goto out; - } + func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh); seen |= SEEN_DATAREF | SEEN_XREG; - t_offset = sk_load_byte_msh - (image + addrs[i]); + t_offset = func - (image + addrs[i]); EMIT1_off32(0xbe, K); /* mov imm32,%esi */ EMIT1_off32(0xe8, t_offset); /* call sk_load_byte_msh */ break; case BPF_S_LD_W_IND: - func = sk_load_word_ind; + func = sk_load_word; common_load_ind: seen |= SEEN_DATAREF | SEEN_XREG; t_offset = func - (image + addrs[i]); - EMIT1_off32(0xbe, K); /* mov imm32,%esi */ + if (K) { + if (is_imm8(K)) { + EMIT3(0x8d, 0x73, K); /* lea imm8(%rbx), %esi */ + } else { + EMIT2(0x8d, 0xb3); /* lea imm32(%rbx),%esi */ + EMIT(K, 4); + } + } else { + EMIT2(0x89,0xde); /* mov %ebx,%esi */ + } EMIT1_off32(0xe8, t_offset); /* call sk_load_xxx_ind */ break; case BPF_S_LD_H_IND: - func = sk_load_half_ind; + func = sk_load_half; goto common_load_ind; case BPF_S_LD_B_IND: - func = sk_load_byte_ind; + func = sk_load_byte; goto common_load_ind; case BPF_S_JMP_JA: t_offset = addrs[i + K] - addrs[i]; -- cgit v1.2.1 From fea5295324ce7ce6ccfe0909cc6740a2d34aa5a3 Mon Sep 17 00:00:00 2001 From: Sasikantha babu Date: Wed, 21 Mar 2012 18:49:00 +0530 Subject: KVM: PMU: Fix integer constant is too large warning in kvm_pmu_set_msr() Signed-off-by: Sasikantha babu Signed-off-by: Avi Kivity --- arch/x86/kvm/pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index a73f0c104813..173df38dbda5 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -369,7 +369,7 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) case MSR_CORE_PERF_FIXED_CTR_CTRL: if (pmu->fixed_ctr_ctrl == data) return 0; - if (!(data & 0xfffffffffffff444)) { + if (!(data & 0xfffffffffffff444ull)) { reprogram_fixed_counters(pmu, data); return 0; } -- cgit v1.2.1 From 7a4f5ad051e02139a9f1c0f7f4b1acb88915852b Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 27 Mar 2012 19:47:26 -0300 Subject: KVM: VMX: vmx_set_cr0 expects kvm->srcu locked vmx_set_cr0 is called from vcpu run context, therefore it expects kvm->srcu to be held (for setting up the real-mode TSS). Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 280751c84724..ad85adfef843 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3906,7 +3906,9 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */ + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); vmx_set_cr4(&vmx->vcpu, 0); vmx_set_efer(&vmx->vcpu, 0); vmx_fpu_activate(&vmx->vcpu); -- cgit v1.2.1 From e08759215b7dcb7111e94f0f96918dd98e86ca6b Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 4 Apr 2012 15:30:33 +0300 Subject: KVM: Resolve RCU vs. async page fault problem "Page ready" async PF can kick vcpu out of idle state much like IRQ. We need to tell RCU about this. Reported-by: Sasha Levin Signed-off-by: Gleb Natapov Reviewed-by: Paul E. McKenney Signed-off-by: Avi Kivity --- arch/x86/kernel/kvm.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 694d801bf606..b8ba6e4a27e4 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -38,6 +38,7 @@ #include #include #include +#include static int kvmapf = 1; @@ -253,7 +254,10 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code) kvm_async_pf_task_wait((u32)read_cr2()); break; case KVM_PV_REASON_PAGE_READY: + rcu_irq_enter(); + exit_idle(); kvm_async_pf_task_wake((u32)read_cr2()); + rcu_irq_exit(); break; } } -- cgit v1.2.1 From 234e340582901211f40d8c732afc49f0630ecf05 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 5 Apr 2012 14:25:11 -0700 Subject: simple_open: automatically convert to simple_open() Many users of debugfs copy the implementation of default_open() when they want to support a custom read/write function op. This leads to a proliferation of the default_open() implementation across the entire tree. Now that the common implementation has been consolidated into libfs we can replace all the users of this function with simple_open(). This replacement was done with the following semantic patch: @ open @ identifier open_f != simple_open; identifier i, f; @@ -int open_f(struct inode *i, struct file *f) -{ ( -if (i->i_private) -f->private_data = i->i_private; | -f->private_data = i->i_private; ) -return 0; -} @ has_open depends on open @ identifier fops; identifier open.open_f; @@ struct file_operations fops = { ... -.open = open_f, +.open = simple_open, ... }; [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Stephen Boyd Cc: Greg Kroah-Hartman Cc: Al Viro Cc: Julia Lawall Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/kdebugfs.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index 90fcf62854bb..1d5d31ea686b 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c @@ -68,16 +68,9 @@ static ssize_t setup_data_read(struct file *file, char __user *user_buf, return count; } -static int setup_data_open(struct inode *inode, struct file *file) -{ - file->private_data = inode->i_private; - - return 0; -} - static const struct file_operations fops_setup_data = { .read = setup_data_read, - .open = setup_data_open, + .open = simple_open, .llseek = default_llseek, }; -- cgit v1.2.1 From 46ed99d1b7c92920ce9e313152522847647aae4f Mon Sep 17 00:00:00 2001 From: Emil Goode Date: Sun, 1 Apr 2012 20:48:04 +0200 Subject: x86: vsyscall: Use NULL instead 0 for a pointer argument This patch silences the following sparse warning: arch/x86/kernel/vsyscall_64.c:250:34: warning: Using plain integer as NULL pointer Signed-off-by: Emil Goode Acked-by: Andy Lutomirski Cc: john.stultz@linaro.org Link: http://lkml.kernel.org/r/1333306084-3776-1-git-send-email-emilgoode@gmail.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/vsyscall_64.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index f386dc49f988..7515cf0e1805 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c @@ -216,9 +216,9 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) current_thread_info()->sig_on_uaccess_error = 1; /* - * 0 is a valid user pointer (in the access_ok sense) on 32-bit and + * NULL is a valid user pointer (in the access_ok sense) on 32-bit and * 64-bit, so we don't need to special-case it here. For all the - * vsyscalls, 0 means "don't write anything" not "write it at + * vsyscalls, NULL means "don't write anything" not "write it at * address 0". */ ret = -EFAULT; @@ -247,7 +247,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address) ret = sys_getcpu((unsigned __user *)regs->di, (unsigned __user *)regs->si, - 0); + NULL); break; } -- cgit v1.2.1 From 2531d64b6fe2724dc432b67d8dc66bd45621da0b Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 20 Mar 2012 15:04:18 -0400 Subject: xen/x86: Workaround 'x86/ioapic: Add register level checks to detect bogus io-apic entries' The above mentioned patch checks the IOAPIC and if it contains -1, then it unmaps said IOAPIC. But under Xen we get this: BUG: unable to handle kernel NULL pointer dereference at 0000000000000040 IP: [] xen_irq_init+0x1f/0xb0 PGD 0 Oops: 0002 [#1] SMP CPU 0 Modules linked in: Pid: 1, comm: swapper/0 Not tainted 3.2.10-3.fc16.x86_64 #1 Dell Inc. Inspiron 1525 /0U990C RIP: e030:[] [] xen_irq_init+0x1f/0xb0 RSP: e02b: ffff8800d42cbb70 EFLAGS: 00010202 RAX: 0000000000000000 RBX: 00000000ffffffef RCX: 0000000000000001 RDX: 0000000000000040 RSI: 00000000ffffffef RDI: 0000000000000001 RBP: ffff8800d42cbb80 R08: ffff8800d6400000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 00000000ffffffef R13: 0000000000000001 R14: 0000000000000001 R15: 0000000000000010 FS: 0000000000000000(0000) GS:ffff8800df5fe000(0000) knlGS:0000000000000000 CS: e033 DS: 0000 ES: 0000 CR0:000000008005003b CR2: 0000000000000040 CR3: 0000000001a05000 CR4: 0000000000002660 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process swapper/0 (pid: 1, threadinfo ffff8800d42ca000, task ffff8800d42d0000) Stack: 00000000ffffffef 0000000000000010 ffff8800d42cbbe0 ffffffff8134f157 ffffffff8100a9b2 ffffffff8182ffd1 00000000000000a0 00000000829e7384 0000000000000002 0000000000000010 00000000ffffffff 0000000000000000 Call Trace: [] xen_bind_pirq_gsi_to_irq+0x87/0x230 [] ? check_events+0x12+0x20 [] xen_register_pirq+0x82/0xe0 [] xen_register_gsi.part.2+0x4a/0xd0 [] acpi_register_gsi_xen+0x20/0x30 [] acpi_register_gsi+0xf/0x20 [] acpi_pci_irq_enable+0x12e/0x202 [] pcibios_enable_device+0x39/0x40 [] do_pci_enable_device+0x4b/0x70 [] __pci_enable_device_flags+0xa8/0xf0 [] pci_enable_device+0x13/0x20 The reason we are dying is b/c the call acpi_get_override_irq() is used, which returns the polarity and trigger for the IRQs. That function calls mp_find_ioapics to get the 'struct ioapic' structure - which along with the mp_irq[x] is used to figure out the default values and the polarity/trigger overrides. Since the mp_find_ioapics now returns -1 [b/c the IOAPIC is filled with 0xffffffff], the acpi_get_override_irq() stops trying to lookup in the mp_irq[x] the proper INT_SRV_OVR and we can't install the SCI interrupt. The proper fix for this is going in v3.5 and adds an x86_io_apic_ops struct so that platforms can override it. But for v3.4 lets carry this work-around. This patch does that by providing a slightly different variant of the fake IOAPIC entries. Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/mmu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 1a309ee2331e..91dc2871e336 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1859,6 +1859,7 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd, #endif /* CONFIG_X86_64 */ static unsigned char dummy_mapping[PAGE_SIZE] __page_aligned_bss; +static unsigned char fake_ioapic_mapping[PAGE_SIZE] __page_aligned_bss; static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) { @@ -1899,7 +1900,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) * We just don't map the IO APIC - all access is via * hypercalls. Keep the address in the pte for reference. */ - pte = pfn_pte(PFN_DOWN(__pa(dummy_mapping)), PAGE_KERNEL); + pte = pfn_pte(PFN_DOWN(__pa(fake_ioapic_mapping)), PAGE_KERNEL); break; #endif @@ -2064,6 +2065,7 @@ void __init xen_init_mmu_ops(void) pv_mmu_ops = xen_mmu_ops; memset(dummy_mapping, 0xff, PAGE_SIZE); + memset(fake_ioapic_mapping, 0xfd, PAGE_SIZE); } /* Protected by xen_reservation_lock. */ -- cgit v1.2.1 From e8c9e788f493d3236809e955c9fc12625a461e09 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Thu, 22 Mar 2012 18:29:24 +0530 Subject: xen/smp: Remove unnecessary call to smp_processor_id() There is an extra and unnecessary call to smp_processor_id() in cpu_bringup(). Remove it. Signed-off-by: Srivatsa S. Bhat Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 240def438dc3..e845555ff486 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -59,7 +59,7 @@ static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) static void __cpuinit cpu_bringup(void) { - int cpu = smp_processor_id(); + int cpu; cpu_init(); touch_softlockup_watchdog(); -- cgit v1.2.1 From 2ca052a3710fac208eee690faefdeb8bbd4586a1 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 2 Apr 2012 16:15:33 -0700 Subject: x86: Use correct byte-sized register constraint in __xchg_op() x86-64 can access the low half of any register, but i386 can only do it with a subset of registers. 'r' causes compilation failures on i386, but 'q' expresses the constraint properly. Signed-off-by: Jeremy Fitzhardinge Link: http://lkml.kernel.org/r/4F7A3315.501@goop.org Reported-by: Leigh Scott Tested-by: Thomas Reitmayr Signed-off-by: H. Peter Anvin Cc: v3.3 --- arch/x86/include/asm/cmpxchg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index b3b733262909..bc18d0ed459a 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h @@ -43,7 +43,7 @@ extern void __add_wrong_size(void) switch (sizeof(*(ptr))) { \ case __X86_CASE_B: \ asm volatile (lock #op "b %b0, %1\n" \ - : "+r" (__ret), "+m" (*(ptr)) \ + : "+q" (__ret), "+m" (*(ptr)) \ : : "memory", "cc"); \ break; \ case __X86_CASE_W: \ -- cgit v1.2.1 From 8c91c5325e107ec17e40a59a47c6517387d64eb7 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Fri, 6 Apr 2012 09:30:57 -0700 Subject: x86: Use correct byte-sized register constraint in __add() Similar to: 2ca052a x86: Use correct byte-sized register constraint in __xchg_op() ... the __add() macro also needs to use a "q" constraint in the byte-sized case, lest we try to generate an illegal register. Link: http://lkml.kernel.org/r/4F7A3315.501@goop.org Signed-off-by: H. Peter Anvin Cc: Jeremy Fitzhardinge Cc: Leigh Scott Cc: Thomas Reitmayr Cc: v3.3 --- arch/x86/include/asm/cmpxchg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index bc18d0ed459a..99480e55973d 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h @@ -173,7 +173,7 @@ extern void __add_wrong_size(void) switch (sizeof(*(ptr))) { \ case __X86_CASE_B: \ asm volatile (lock "addb %b1, %0\n" \ - : "+m" (*(ptr)) : "ri" (inc) \ + : "+m" (*(ptr)) : "qi" (inc) \ : "memory", "cc"); \ break; \ case __X86_CASE_W: \ -- cgit v1.2.1 From f68e556e23d1a4176b563bcb25d8baf2c5313f91 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 6 Apr 2012 13:54:56 -0700 Subject: Make the "word-at-a-time" helper functions more commonly usable I have a new optimized x86 "strncpy_from_user()" that will use these same helper functions for all the same reasons the name lookup code uses them. This is preparation for that. This moves them into an architecture-specific header file. It's architecture-specific for two reasons: - some of the functions are likely to want architecture-specific implementations. Even if the current code happens to be "generic" in the sense that it should work on any little-endian machine, it's likely that the "multiply by a big constant and shift" implementation is less than optimal for an architecture that has a guaranteed fast bit count instruction, for example. - I expect that if architectures like sparc want to start playing around with this, we'll need to abstract out a few more details (in particular the actual unaligned accesses). So we're likely to have more architecture-specific stuff if non-x86 architectures start using this. (and if it turns out that non-x86 architectures don't start using this, then having it in an architecture-specific header is still the right thing to do, of course) Signed-off-by: Linus Torvalds --- arch/x86/include/asm/word-at-a-time.h | 46 +++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 arch/x86/include/asm/word-at-a-time.h (limited to 'arch/x86') diff --git a/arch/x86/include/asm/word-at-a-time.h b/arch/x86/include/asm/word-at-a-time.h new file mode 100644 index 000000000000..6fe6767b7124 --- /dev/null +++ b/arch/x86/include/asm/word-at-a-time.h @@ -0,0 +1,46 @@ +#ifndef _ASM_WORD_AT_A_TIME_H +#define _ASM_WORD_AT_A_TIME_H + +/* + * This is largely generic for little-endian machines, but the + * optimal byte mask counting is probably going to be something + * that is architecture-specific. If you have a reliably fast + * bit count instruction, that might be better than the multiply + * and shift, for example. + */ + +#ifdef CONFIG_64BIT + +/* + * Jan Achrenius on G+: microoptimized version of + * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56" + * that works for the bytemasks without having to + * mask them first. + */ +static inline long count_masked_bytes(unsigned long mask) +{ + return mask*0x0001020304050608ul >> 56; +} + +#else /* 32-bit case */ + +/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */ +static inline long count_masked_bytes(long mask) +{ + /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */ + long a = (0x0ff0001+mask) >> 23; + /* Fix the 1 for 00 case */ + return a & mask; +} + +#endif + +#define REPEAT_BYTE(x) ((~0ul / 0xff) * (x)) + +/* Return the high bit set in the first byte that is a zero */ +static inline unsigned long has_zero(unsigned long a) +{ + return ((a - REPEAT_BYTE(0x01)) & ~a) & REPEAT_BYTE(0x80); +} + +#endif /* _ASM_WORD_AT_A_TIME_H */ -- cgit v1.2.1 From 3cb42092ff02edec34bf936b7400b1f1efc8ca43 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 9 Apr 2012 13:59:00 -0400 Subject: um: fix linker script generation while we can't just use -U$(SUBARCH), we still need to kill idiotic define (implicit -Di386=1), both for SUBARCH=i386 and SUBARCH=x86/CONFIG_64BIT=n builds. Signed-off-by: Al Viro --- arch/x86/Makefile.um | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/Makefile.um b/arch/x86/Makefile.um index 4be406abeefd..36b62bc52638 100644 --- a/arch/x86/Makefile.um +++ b/arch/x86/Makefile.um @@ -14,6 +14,9 @@ LINK-y += $(call cc-option,-m32) export LDFLAGS +LDS_EXTRA := -Ui386 +export LDS_EXTRA + # First of all, tune CFLAGS for the specific CPU. This actually sets cflags-y. include $(srctree)/arch/x86/Makefile_32.cpu -- cgit v1.2.1 From a3a85a763c399c0bf483a30d82d2d613e6f94cd3 Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Thu, 29 Mar 2012 18:47:46 +0200 Subject: um: Disintegrate asm/system.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Richard Weinberger Reported-by: Toralf Förster CC: dhowells@redhat.com --- arch/x86/um/asm/barrier.h | 75 ++++++++++++++++++++++++ arch/x86/um/asm/switch_to.h | 7 +++ arch/x86/um/asm/system.h | 135 -------------------------------------------- 3 files changed, 82 insertions(+), 135 deletions(-) create mode 100644 arch/x86/um/asm/barrier.h create mode 100644 arch/x86/um/asm/switch_to.h delete mode 100644 arch/x86/um/asm/system.h (limited to 'arch/x86') diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h new file mode 100644 index 000000000000..7d01b8c56c00 --- /dev/null +++ b/arch/x86/um/asm/barrier.h @@ -0,0 +1,75 @@ +#ifndef _ASM_UM_BARRIER_H_ +#define _ASM_UM_BARRIER_H_ + +#include +#include +#include +#include +#include + +#include +#include + +/* + * Force strict CPU ordering. + * And yes, this is required on UP too when we're talking + * to devices. + */ +#ifdef CONFIG_X86_32 + +#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) +#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) +#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) + +#else /* CONFIG_X86_32 */ + +#define mb() asm volatile("mfence" : : : "memory") +#define rmb() asm volatile("lfence" : : : "memory") +#define wmb() asm volatile("sfence" : : : "memory") + +#endif /* CONFIG_X86_32 */ + +#define read_barrier_depends() do { } while (0) + +#ifdef CONFIG_SMP + +#define smp_mb() mb() +#ifdef CONFIG_X86_PPRO_FENCE +#define smp_rmb() rmb() +#else /* CONFIG_X86_PPRO_FENCE */ +#define smp_rmb() barrier() +#endif /* CONFIG_X86_PPRO_FENCE */ + +#ifdef CONFIG_X86_OOSTORE +#define smp_wmb() wmb() +#else /* CONFIG_X86_OOSTORE */ +#define smp_wmb() barrier() +#endif /* CONFIG_X86_OOSTORE */ + +#define smp_read_barrier_depends() read_barrier_depends() +#define set_mb(var, value) do { (void)xchg(&var, value); } while (0) + +#else /* CONFIG_SMP */ + +#define smp_mb() barrier() +#define smp_rmb() barrier() +#define smp_wmb() barrier() +#define smp_read_barrier_depends() do { } while (0) +#define set_mb(var, value) do { var = value; barrier(); } while (0) + +#endif /* CONFIG_SMP */ + +/* + * Stop RDTSC speculation. This is needed when you need to use RDTSC + * (or get_cycles or vread that possibly accesses the TSC) in a defined + * code region. + * + * (Could use an alternative three way for this if there was one.) + */ +static inline void rdtsc_barrier(void) +{ + alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); + alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); +} + +#endif diff --git a/arch/x86/um/asm/switch_to.h b/arch/x86/um/asm/switch_to.h new file mode 100644 index 000000000000..cf97d20da61f --- /dev/null +++ b/arch/x86/um/asm/switch_to.h @@ -0,0 +1,7 @@ +#ifndef _ASM_UM_SWITCH_TO_H_ +#define _ASM_UM_SWITCH_TO_H_ + +extern void *_switch_to(void *prev, void *next, void *last); +#define switch_to(prev, next, last) prev = _switch_to(prev, next, last) + +#endif diff --git a/arch/x86/um/asm/system.h b/arch/x86/um/asm/system.h deleted file mode 100644 index a459fd9b7598..000000000000 --- a/arch/x86/um/asm/system.h +++ /dev/null @@ -1,135 +0,0 @@ -#ifndef _ASM_X86_SYSTEM_H_ -#define _ASM_X86_SYSTEM_H_ - -#include -#include -#include -#include -#include - -#include -#include - -/* entries in ARCH_DLINFO: */ -#ifdef CONFIG_IA32_EMULATION -# define AT_VECTOR_SIZE_ARCH 2 -#else -# define AT_VECTOR_SIZE_ARCH 1 -#endif - -extern unsigned long arch_align_stack(unsigned long sp); - -void default_idle(void); - -/* - * Force strict CPU ordering. - * And yes, this is required on UP too when we're talking - * to devices. - */ -#ifdef CONFIG_X86_32 -/* - * Some non-Intel clones support out of order store. wmb() ceases to be a - * nop for these. - */ -#define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) -#define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) -#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) -#else -#define mb() asm volatile("mfence":::"memory") -#define rmb() asm volatile("lfence":::"memory") -#define wmb() asm volatile("sfence" ::: "memory") -#endif - -/** - * read_barrier_depends - Flush all pending reads that subsequents reads - * depend on. - * - * No data-dependent reads from memory-like regions are ever reordered - * over this barrier. All reads preceding this primitive are guaranteed - * to access memory (but not necessarily other CPUs' caches) before any - * reads following this primitive that depend on the data return by - * any of the preceding reads. This primitive is much lighter weight than - * rmb() on most CPUs, and is never heavier weight than is - * rmb(). - * - * These ordering constraints are respected by both the local CPU - * and the compiler. - * - * Ordering is not guaranteed by anything other than these primitives, - * not even by data dependencies. See the documentation for - * memory_barrier() for examples and URLs to more information. - * - * For example, the following code would force ordering (the initial - * value of "a" is zero, "b" is one, and "p" is "&a"): - * - * - * CPU 0 CPU 1 - * - * b = 2; - * memory_barrier(); - * p = &b; q = p; - * read_barrier_depends(); - * d = *q; - * - * - * because the read of "*q" depends on the read of "p" and these - * two reads are separated by a read_barrier_depends(). However, - * the following code, with the same initial values for "a" and "b": - * - * - * CPU 0 CPU 1 - * - * a = 2; - * memory_barrier(); - * b = 3; y = b; - * read_barrier_depends(); - * x = a; - * - * - * does not enforce ordering, since there is no data dependency between - * the read of "a" and the read of "b". Therefore, on some CPUs, such - * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() - * in cases like this where there are no data dependencies. - **/ - -#define read_barrier_depends() do { } while (0) - -#ifdef CONFIG_SMP -#define smp_mb() mb() -#ifdef CONFIG_X86_PPRO_FENCE -# define smp_rmb() rmb() -#else -# define smp_rmb() barrier() -#endif -#ifdef CONFIG_X86_OOSTORE -# define smp_wmb() wmb() -#else -# define smp_wmb() barrier() -#endif -#define smp_read_barrier_depends() read_barrier_depends() -#define set_mb(var, value) do { (void)xchg(&var, value); } while (0) -#else -#define smp_mb() barrier() -#define smp_rmb() barrier() -#define smp_wmb() barrier() -#define smp_read_barrier_depends() do { } while (0) -#define set_mb(var, value) do { var = value; barrier(); } while (0) -#endif - -/* - * Stop RDTSC speculation. This is needed when you need to use RDTSC - * (or get_cycles or vread that possibly accesses the TSC) in a defined - * code region. - * - * (Could use an alternative three way for this if there was one.) - */ -static inline void rdtsc_barrier(void) -{ - alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); - alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); -} - -extern void *_switch_to(void *prev, void *next, void *last); -#define switch_to(prev, next, last) prev = _switch_to(prev, next, last) - -#endif -- cgit v1.2.1 From 76b278edd99fb55525fcf2706095e388bd3d122c Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Thu, 29 Mar 2012 19:10:42 +0200 Subject: um: Use asm-generic/switch_to.h Signed-off-by: Richard Weinberger --- arch/x86/um/asm/switch_to.h | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 arch/x86/um/asm/switch_to.h (limited to 'arch/x86') diff --git a/arch/x86/um/asm/switch_to.h b/arch/x86/um/asm/switch_to.h deleted file mode 100644 index cf97d20da61f..000000000000 --- a/arch/x86/um/asm/switch_to.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef _ASM_UM_SWITCH_TO_H_ -#define _ASM_UM_SWITCH_TO_H_ - -extern void *_switch_to(void *prev, void *next, void *last); -#define switch_to(prev, next, last) prev = _switch_to(prev, next, last) - -#endif -- cgit v1.2.1 From f19a0c2c2e6add90b7d6a1b7595abebfe2e4c37a Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 9 Apr 2012 17:38:35 +0300 Subject: KVM: PMU emulation: GLOBAL_CTRL MSR should be enabled on reset On reset all MPU counters should be enabled in GLOBAL_CTRL MSR. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/kvm/pmu.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 173df38dbda5..2e88438ffd83 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -459,17 +459,17 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu) pmu->available_event_types = ~entry->ebx & ((1ull << bitmap_len) - 1); if (pmu->version == 1) { - pmu->global_ctrl = (1 << pmu->nr_arch_gp_counters) - 1; - return; + pmu->nr_arch_fixed_counters = 0; + } else { + pmu->nr_arch_fixed_counters = min((int)(entry->edx & 0x1f), + X86_PMC_MAX_FIXED); + pmu->counter_bitmask[KVM_PMC_FIXED] = + ((u64)1 << ((entry->edx >> 5) & 0xff)) - 1; } - pmu->nr_arch_fixed_counters = min((int)(entry->edx & 0x1f), - X86_PMC_MAX_FIXED); - pmu->counter_bitmask[KVM_PMC_FIXED] = - ((u64)1 << ((entry->edx >> 5) & 0xff)) - 1; - pmu->global_ctrl_mask = ~(((1 << pmu->nr_arch_gp_counters) - 1) - | (((1ull << pmu->nr_arch_fixed_counters) - 1) - << X86_PMC_IDX_FIXED)); + pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) | + (((1ull << pmu->nr_arch_fixed_counters) - 1) << X86_PMC_IDX_FIXED); + pmu->global_ctrl_mask = ~pmu->global_ctrl; } void kvm_pmu_init(struct kvm_vcpu *vcpu) -- cgit v1.2.1 From 92ae03f2ef99fbc23bfa9080d6b58f25227bd7ef Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 6 Apr 2012 14:32:32 -0700 Subject: x86: merge 32/64-bit versions of 'strncpy_from_user()' and speed it up This merges the 32- and 64-bit versions of the x86 strncpy_from_user() by just rewriting it in C rather than the ancient inline asm versions that used lodsb/stosb and had been duplicated for (trivial) differences between the 32-bit and 64-bit versions. While doing that, it also speeds them up by doing the accesses a word at a time. Finally, the new routines also properly handle the case of hitting the end of the address space, which we have never done correctly before (fs/namei.c has a hack around it for that reason). Despite all these improvements, it actually removes more lines than it adds, due to the de-duplication. Also, we no longer export (or define) the legacy __strncpy_from_user() function (that was defined to not do the user permission checks), since it's not actually used anywhere, and the user address space checks are built in to the new code. Other architecture maintainers have been notified that the old hack in fs/namei.c will be going away in the 3.5 merge window, in case they copied the x86 approach of being a bit cavalier about the end of the address space. Cc: linux-arch@vger.kernel.org Cc: Ingo Molnar Cc: Peter Anvin" Signed-off-by: Linus Torvalds --- arch/x86/include/asm/uaccess.h | 2 + arch/x86/include/asm/uaccess_32.h | 5 -- arch/x86/include/asm/uaccess_64.h | 4 -- arch/x86/lib/usercopy.c | 103 ++++++++++++++++++++++++++++++++++++++ arch/x86/lib/usercopy_32.c | 87 -------------------------------- arch/x86/lib/usercopy_64.c | 49 ------------------ 6 files changed, 105 insertions(+), 145 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 8be5f54d9360..e0544597cfe7 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -557,6 +557,8 @@ struct __large_struct { unsigned long buf[100]; }; extern unsigned long copy_from_user_nmi(void *to, const void __user *from, unsigned long n); +extern __must_check long +strncpy_from_user(char *dst, const char __user *src, long count); /* * movsl can be slow when source and dest are not both 8-byte aligned diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 566e803cc602..8084bc73b18c 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -213,11 +213,6 @@ static inline unsigned long __must_check copy_from_user(void *to, return n; } -long __must_check strncpy_from_user(char *dst, const char __user *src, - long count); -long __must_check __strncpy_from_user(char *dst, - const char __user *src, long count); - /** * strlen_user: - Get the size of a string in user space. * @str: The string to measure. diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 1c66d30971ad..fcd4b6f3ef02 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -208,10 +208,6 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size) } } -__must_check long -strncpy_from_user(char *dst, const char __user *src, long count); -__must_check long -__strncpy_from_user(char *dst, const char __user *src, long count); __must_check long strnlen_user(const char __user *str, long n); __must_check long __strnlen_user(const char __user *str, long n); __must_check long strlen_user(const char __user *str); diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c index 97be9cb54483..57252c928f56 100644 --- a/arch/x86/lib/usercopy.c +++ b/arch/x86/lib/usercopy.c @@ -7,6 +7,8 @@ #include #include +#include + /* * best effort, GUP based copy_from_user() that is NMI-safe */ @@ -41,3 +43,104 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) return len; } EXPORT_SYMBOL_GPL(copy_from_user_nmi); + +static inline unsigned long count_bytes(unsigned long mask) +{ + mask = (mask - 1) & ~mask; + mask >>= 7; + return count_masked_bytes(mask); +} + +/* + * Do a strncpy, return length of string without final '\0'. + * 'count' is the user-supplied count (return 'count' if we + * hit it), 'max' is the address space maximum (and we return + * -EFAULT if we hit it). + */ +static inline long do_strncpy_from_user(char *dst, const char __user *src, long count, long max) +{ + long res = 0; + + /* + * Truncate 'max' to the user-specified limit, so that + * we only have one limit we need to check in the loop + */ + if (max > count) + max = count; + + while (max >= sizeof(unsigned long)) { + unsigned long c; + + /* Fall back to byte-at-a-time if we get a page fault */ + if (unlikely(__get_user(c,(unsigned long __user *)(src+res)))) + break; + /* This can write a few bytes past the NUL character, but that's ok */ + *(unsigned long *)(dst+res) = c; + c = has_zero(c); + if (c) + return res + count_bytes(c); + res += sizeof(unsigned long); + max -= sizeof(unsigned long); + } + + while (max) { + char c; + + if (unlikely(__get_user(c,src+res))) + return -EFAULT; + dst[res] = c; + if (!c) + return res; + res++; + max--; + } + + /* + * Uhhuh. We hit 'max'. But was that the user-specified maximum + * too? If so, that's ok - we got as much as the user asked for. + */ + if (res >= count) + return count; + + /* + * Nope: we hit the address space limit, and we still had more + * characters the caller would have wanted. That's an EFAULT. + */ + return -EFAULT; +} + +/** + * strncpy_from_user: - Copy a NUL terminated string from userspace. + * @dst: Destination address, in kernel space. This buffer must be at + * least @count bytes long. + * @src: Source address, in user space. + * @count: Maximum number of bytes to copy, including the trailing NUL. + * + * Copies a NUL-terminated string from userspace to kernel space. + * + * On success, returns the length of the string (not including the trailing + * NUL). + * + * If access to userspace fails, returns -EFAULT (some data may have been + * copied). + * + * If @count is smaller than the length of the string, copies @count bytes + * and returns @count. + */ +long +strncpy_from_user(char *dst, const char __user *src, long count) +{ + unsigned long max_addr, src_addr; + + if (unlikely(count <= 0)) + return 0; + + max_addr = current_thread_info()->addr_limit.seg; + src_addr = (unsigned long)src; + if (likely(src_addr < max_addr)) { + unsigned long max = max_addr - src_addr; + return do_strncpy_from_user(dst, src, count, max); + } + return -EFAULT; +} +EXPORT_SYMBOL(strncpy_from_user); diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index d9b094ca7aaa..ef2a6a5d78e3 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -32,93 +32,6 @@ static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned lon #define movsl_is_ok(a1, a2, n) \ __movsl_is_ok((unsigned long)(a1), (unsigned long)(a2), (n)) -/* - * Copy a null terminated string from userspace. - */ - -#define __do_strncpy_from_user(dst, src, count, res) \ -do { \ - int __d0, __d1, __d2; \ - might_fault(); \ - __asm__ __volatile__( \ - " testl %1,%1\n" \ - " jz 2f\n" \ - "0: lodsb\n" \ - " stosb\n" \ - " testb %%al,%%al\n" \ - " jz 1f\n" \ - " decl %1\n" \ - " jnz 0b\n" \ - "1: subl %1,%0\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: movl %5,%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE(0b,3b) \ - : "=&d"(res), "=&c"(count), "=&a" (__d0), "=&S" (__d1), \ - "=&D" (__d2) \ - : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ - : "memory"); \ -} while (0) - -/** - * __strncpy_from_user: - Copy a NUL terminated string from userspace, with less checking. - * @dst: Destination address, in kernel space. This buffer must be at - * least @count bytes long. - * @src: Source address, in user space. - * @count: Maximum number of bytes to copy, including the trailing NUL. - * - * Copies a NUL-terminated string from userspace to kernel space. - * Caller must check the specified block with access_ok() before calling - * this function. - * - * On success, returns the length of the string (not including the trailing - * NUL). - * - * If access to userspace fails, returns -EFAULT (some data may have been - * copied). - * - * If @count is smaller than the length of the string, copies @count bytes - * and returns @count. - */ -long -__strncpy_from_user(char *dst, const char __user *src, long count) -{ - long res; - __do_strncpy_from_user(dst, src, count, res); - return res; -} -EXPORT_SYMBOL(__strncpy_from_user); - -/** - * strncpy_from_user: - Copy a NUL terminated string from userspace. - * @dst: Destination address, in kernel space. This buffer must be at - * least @count bytes long. - * @src: Source address, in user space. - * @count: Maximum number of bytes to copy, including the trailing NUL. - * - * Copies a NUL-terminated string from userspace to kernel space. - * - * On success, returns the length of the string (not including the trailing - * NUL). - * - * If access to userspace fails, returns -EFAULT (some data may have been - * copied). - * - * If @count is smaller than the length of the string, copies @count bytes - * and returns @count. - */ -long -strncpy_from_user(char *dst, const char __user *src, long count) -{ - long res = -EFAULT; - if (access_ok(VERIFY_READ, src, 1)) - __do_strncpy_from_user(dst, src, count, res); - return res; -} -EXPORT_SYMBOL(strncpy_from_user); - /* * Zero Userspace */ diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index b7c2849ffb66..0d0326f388c0 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -8,55 +8,6 @@ #include #include -/* - * Copy a null terminated string from userspace. - */ - -#define __do_strncpy_from_user(dst,src,count,res) \ -do { \ - long __d0, __d1, __d2; \ - might_fault(); \ - __asm__ __volatile__( \ - " testq %1,%1\n" \ - " jz 2f\n" \ - "0: lodsb\n" \ - " stosb\n" \ - " testb %%al,%%al\n" \ - " jz 1f\n" \ - " decq %1\n" \ - " jnz 0b\n" \ - "1: subq %1,%0\n" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: movq %5,%0\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE(0b,3b) \ - : "=&r"(res), "=&c"(count), "=&a" (__d0), "=&S" (__d1), \ - "=&D" (__d2) \ - : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ - : "memory"); \ -} while (0) - -long -__strncpy_from_user(char *dst, const char __user *src, long count) -{ - long res; - __do_strncpy_from_user(dst, src, count, res); - return res; -} -EXPORT_SYMBOL(__strncpy_from_user); - -long -strncpy_from_user(char *dst, const char __user *src, long count) -{ - long res = -EFAULT; - if (access_ok(VERIFY_READ, src, 1)) - return __strncpy_from_user(dst, src, count); - return res; -} -EXPORT_SYMBOL(strncpy_from_user); - /* * Zero Userspace */ -- cgit v1.2.1 From a956bd6f8583326b18348ab1452b4686778f785d Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Thu, 12 Apr 2012 16:48:01 +0200 Subject: x86, microcode: Fix sysfs warning during module unload on unsupported CPUs Loading the microcode driver on an unsupported CPU and subsequently unloading the driver causes WARNING: at fs/sysfs/group.c:138 mc_device_remove+0x5f/0x70 [microcode]() Hardware name: 01972NG sysfs group ffffffffa00013d0 not found for kobject 'cpu0' Modules linked in: snd_hda_codec_hdmi snd_hda_codec_conexant snd_hda_intel btusb snd_hda_codec bluetooth thinkpad_acpi rfkill microcode(-) [last unloaded: cfg80211] Pid: 4560, comm: modprobe Not tainted 3.4.0-rc2-00002-g258f742 #5 Call Trace: [] ? warn_slowpath_common+0x7b/0xc0 [] ? warn_slowpath_fmt+0x45/0x50 [] ? sysfs_remove_group+0x34/0x120 [] ? mc_device_remove+0x5f/0x70 [microcode] [] ? subsys_interface_unregister+0x69/0xa0 [] ? mutex_lock+0x16/0x40 [] ? microcode_exit+0x50/0x92 [microcode] [] ? sys_delete_module+0x16d/0x260 [] ? wait_iff_congested+0x45/0x110 [] ? page_fault+0x1f/0x30 [] ? system_call_fastpath+0x16/0x1b on recent kernels. This is due to commit 8a25a2fd126c ("cpu: convert 'cpu' and 'machinecheck' sysdev_class to a regular subsystem") which renders commit 6c53cbfced04 ("x86, microcode: Correct sysdev_add error path") useless. See http://marc.info/?l=linux-kernel&m=133416246406478 Avoid above warning by restoring the old driver behaviour before 6c53cbfced04 ("x86, microcode: Correct sysdev_add error path"). Cc: stable@vger.kernel.org Cc: Tigran Aivazian Signed-off-by: Andreas Herrmann Acked-by: Greg Kroah-Hartman Link: http://lkml.kernel.org/r/20120411163849.GE4794@alberich.amd.com Signed-off-by: Borislav Petkov --- arch/x86/kernel/microcode_core.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 87a0f8688301..d389e74342a3 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -419,10 +419,8 @@ static int mc_device_add(struct device *dev, struct subsys_interface *sif) if (err) return err; - if (microcode_init_cpu(cpu) == UCODE_ERROR) { - sysfs_remove_group(&dev->kobj, &mc_attr_group); + if (microcode_init_cpu(cpu) == UCODE_ERROR) return -EINVAL; - } return err; } -- cgit v1.2.1 From 283c1f2558ef4a4411fe908364b15b73b6ab44cf Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Thu, 12 Apr 2012 16:51:57 +0200 Subject: x86, microcode: Ensure that module is only loaded on supported AMD CPUs Exit early when there's no support for a particular CPU family. Also, fixup the "no support for this CPU vendor" to be issued only when the driver is attempted to be loaded on an unsupported vendor. Cc: stable@vger.kernel.org Cc: Tigran Aivazian Signed-off-by: Andreas Herrmann Acked-by: Greg Kroah-Hartman Link: http://lkml.kernel.org/r/20120411163849.GE4794@alberich.amd.com [Boris: add a commit msg because Andreas is lazy] Signed-off-by: Borislav Petkov --- arch/x86/kernel/microcode_amd.c | 12 +++++++----- arch/x86/kernel/microcode_core.c | 6 +++--- 2 files changed, 10 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 73465aab28f8..8a2ce8fd41c0 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -82,11 +82,6 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) { struct cpuinfo_x86 *c = &cpu_data(cpu); - if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { - pr_warning("CPU%d: family %d not supported\n", cpu, c->x86); - return -1; - } - csig->rev = c->microcode; pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev); @@ -380,6 +375,13 @@ static struct microcode_ops microcode_amd_ops = { struct microcode_ops * __init init_amd_microcode(void) { + struct cpuinfo_x86 *c = &cpu_data(0); + + if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { + pr_warning("AMD CPU family 0x%x not supported\n", c->x86); + return NULL; + } + patch = (void *)get_zeroed_page(GFP_KERNEL); if (!patch) return NULL; diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index d389e74342a3..c9bda6d6035c 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -526,11 +526,11 @@ static int __init microcode_init(void) microcode_ops = init_intel_microcode(); else if (c->x86_vendor == X86_VENDOR_AMD) microcode_ops = init_amd_microcode(); - - if (!microcode_ops) { + else pr_err("no support for this CPU vendor\n"); + + if (!microcode_ops) return -ENODEV; - } microcode_pdev = platform_device_register_simple("microcode", -1, NULL, 0); -- cgit v1.2.1 From 12e993b89464707398e4209bd99983e376454985 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 15 Apr 2012 17:23:00 -0700 Subject: x86-32: fix up strncpy_from_user() sign error The 'max' range needs to be unsigned, since the size of the user address space is bigger than 2GB. We know that 'count' is positive in 'long' (that is checked in the caller), so we will truncate 'max' down to something that fits in a signed long, but before we actually do that, that comparison needs to be done in unsigned. Bug introduced in commit 92ae03f2ef99 ("x86: merge 32/64-bit versions of 'strncpy_from_user()' and speed it up"). On x86-64 you can't trigger this, since the user address space is much smaller than 63 bits, and on x86-32 it works in practice, since you would seldom hit the strncpy limits anyway. I had actually tested the corner-cases, I had only tested them on x86-64. Besides, I had only worried about the case of a pointer *close* to the end of the address space, rather than really far away from it ;) This also changes the "we hit the user-specified maximum" to return 'res', for the trivial reason that gcc seems to generate better code that way. 'res' and 'count' are the same in that case, so it really doesn't matter which one we return. Signed-off-by: Linus Torvalds --- arch/x86/lib/usercopy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c index 57252c928f56..d6ae30bbd7bb 100644 --- a/arch/x86/lib/usercopy.c +++ b/arch/x86/lib/usercopy.c @@ -57,7 +57,7 @@ static inline unsigned long count_bytes(unsigned long mask) * hit it), 'max' is the address space maximum (and we return * -EFAULT if we hit it). */ -static inline long do_strncpy_from_user(char *dst, const char __user *src, long count, long max) +static inline long do_strncpy_from_user(char *dst, const char __user *src, long count, unsigned long max) { long res = 0; @@ -100,7 +100,7 @@ static inline long do_strncpy_from_user(char *dst, const char __user *src, long * too? If so, that's ok - we got as much as the user asked for. */ if (res >= count) - return count; + return res; /* * Nope: we hit the address space limit, and we still had more -- cgit v1.2.1 From d7de8649f34d45041409d1af4ba4a521971a9075 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Wed, 11 Apr 2012 17:12:38 +0200 Subject: x86/amd: Remove broken links from comment and kernel message Signed-off-by: Andreas Herrmann Link: http://lkml.kernel.org/r/20120411151238.GA4794@alberich.amd.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 0a44b90602b0..1248f9ceabc1 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -26,7 +26,8 @@ * contact AMD for precise details and a CPU swap. * * See http://www.multimania.com/poulot/k6bug.html - * http://www.amd.com/K6/k6docs/revgd.html + * and section 2.6.2 of "AMD-K6 Processor Revision Guide - Model 6" + * (Publication # 21266 Issue Date: August 1998) * * The following test is erm.. interesting. AMD neglected to up * the chip setting when fixing the bug but they also tweaked some @@ -94,7 +95,6 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) "system stability may be impaired when more than 32 MB are used.\n"); else printk(KERN_CONT "probably OK (after B9730xxxx).\n"); - printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); } /* K6 with old style WHCR */ -- cgit v1.2.1 From 6c7b8e82aab75a25581c4d446fc87f96634e9ef9 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 13 Apr 2012 12:24:27 +0900 Subject: x86: Handle failures of parsing immediate operands in the instruction decoder This can happen if the instruction is much longer than the maximum length, or if insn->opnd_bytes is manually changed. This patch also fixes warnings from -Wswitch-default flag. Reported-by: Prashanth Nageshappa Signed-off-by: Masami Hiramatsu Cc: Linus Torvalds Cc: Ananth N Mavinakayanahalli Cc: Jim Keniston Cc: Linux-mm Cc: Oleg Nesterov Cc: Andi Kleen Cc: Christoph Hellwig Cc: Steven Rostedt Cc: Arnaldo Carvalho de Melo Cc: Anton Arapov Cc: Srikar Dronamraju Cc: yrl.pp-manager.tt@hitachi.com Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20120413032427.32577.42602.stgit@localhost.localdomain Signed-off-by: Ingo Molnar --- arch/x86/lib/insn.c | 53 ++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 36 insertions(+), 17 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index 25feb1ae71c5..b1e6c4b2e8eb 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c @@ -379,8 +379,8 @@ err_out: return; } -/* Decode moffset16/32/64 */ -static void __get_moffset(struct insn *insn) +/* Decode moffset16/32/64. Return 0 if failed */ +static int __get_moffset(struct insn *insn) { switch (insn->addr_bytes) { case 2: @@ -397,15 +397,19 @@ static void __get_moffset(struct insn *insn) insn->moffset2.value = get_next(int, insn); insn->moffset2.nbytes = 4; break; + default: /* opnd_bytes must be modified manually */ + goto err_out; } insn->moffset1.got = insn->moffset2.got = 1; + return 1; + err_out: - return; + return 0; } -/* Decode imm v32(Iz) */ -static void __get_immv32(struct insn *insn) +/* Decode imm v32(Iz). Return 0 if failed */ +static int __get_immv32(struct insn *insn) { switch (insn->opnd_bytes) { case 2: @@ -417,14 +421,18 @@ static void __get_immv32(struct insn *insn) insn->immediate.value = get_next(int, insn); insn->immediate.nbytes = 4; break; + default: /* opnd_bytes must be modified manually */ + goto err_out; } + return 1; + err_out: - return; + return 0; } -/* Decode imm v64(Iv/Ov) */ -static void __get_immv(struct insn *insn) +/* Decode imm v64(Iv/Ov), Return 0 if failed */ +static int __get_immv(struct insn *insn) { switch (insn->opnd_bytes) { case 2: @@ -441,15 +449,18 @@ static void __get_immv(struct insn *insn) insn->immediate2.value = get_next(int, insn); insn->immediate2.nbytes = 4; break; + default: /* opnd_bytes must be modified manually */ + goto err_out; } insn->immediate1.got = insn->immediate2.got = 1; + return 1; err_out: - return; + return 0; } /* Decode ptr16:16/32(Ap) */ -static void __get_immptr(struct insn *insn) +static int __get_immptr(struct insn *insn) { switch (insn->opnd_bytes) { case 2: @@ -462,14 +473,17 @@ static void __get_immptr(struct insn *insn) break; case 8: /* ptr16:64 is not exist (no segment) */ - return; + return 0; + default: /* opnd_bytes must be modified manually */ + goto err_out; } insn->immediate2.value = get_next(unsigned short, insn); insn->immediate2.nbytes = 2; insn->immediate1.got = insn->immediate2.got = 1; + return 1; err_out: - return; + return 0; } /** @@ -489,7 +503,8 @@ void insn_get_immediate(struct insn *insn) insn_get_displacement(insn); if (inat_has_moffset(insn->attr)) { - __get_moffset(insn); + if (!__get_moffset(insn)) + goto err_out; goto done; } @@ -517,16 +532,20 @@ void insn_get_immediate(struct insn *insn) insn->immediate2.nbytes = 4; break; case INAT_IMM_PTR: - __get_immptr(insn); + if (!__get_immptr(insn)) + goto err_out; break; case INAT_IMM_VWORD32: - __get_immv32(insn); + if (!__get_immv32(insn)) + goto err_out; break; case INAT_IMM_VWORD: - __get_immv(insn); + if (!__get_immv(insn)) + goto err_out; break; default: - break; + /* Here, insn must have an immediate, but failed */ + goto err_out; } if (inat_has_second_immediate(insn->attr)) { insn->immediate2.value = get_next(char, insn); -- cgit v1.2.1 From b1994304fc399f5d3a5368c81111d713490c4799 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Sun, 15 Apr 2012 16:06:04 +0100 Subject: x86, efi: Add dedicated EFI stub entry point The method used to work out whether we were booted by EFI firmware or via a boot loader is broken. Because efi_main() is always executed when booting from a boot loader we will dereference invalid pointers either on the stack (CONFIG_X86_32) or contained in %rdx (CONFIG_X86_64) when searching for an EFI System Table signature. Instead of dereferencing these invalid system table pointers, add a new entry point that is only used when booting from EFI firmware, when we know the pointer arguments will be valid. With this change legacy boot loaders will no longer execute efi_main(), but will instead skip EFI stub initialisation completely. [ hpa: Marking this for urgent/stable since it is a regression when the option is enabled; without the option the patch has no effect ] Signed-off-by: Matt Fleming Link: http://lkml.kernel.org/r/1334584744.26997.14.camel@mfleming-mobl1.ger.corp.intel.com Reported-by: Jordan Justen Signed-off-by: H. Peter Anvin Cc: v3.3 --- arch/x86/boot/compressed/head_32.S | 14 +++++++++++--- arch/x86/boot/compressed/head_64.S | 22 ++++++++++++++++------ arch/x86/boot/tools/build.c | 15 +++++++++++---- 3 files changed, 38 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index a0559930a180..c85e3ac99bba 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -33,6 +33,9 @@ __HEAD ENTRY(startup_32) #ifdef CONFIG_EFI_STUB + jmp preferred_addr + + .balign 0x10 /* * We don't need the return address, so set up the stack so * efi_main() can find its arugments. @@ -41,12 +44,17 @@ ENTRY(startup_32) call efi_main cmpl $0, %eax - je preferred_addr movl %eax, %esi - call 1f + jne 2f 1: + /* EFI init failed, so hang. */ + hlt + jmp 1b +2: + call 3f +3: popl %eax - subl $1b, %eax + subl $3b, %eax subl BP_pref_address(%esi), %eax add BP_code32_start(%esi), %eax leal preferred_addr(%eax), %eax diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 558d76ce23bc..87e03a13d8e3 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -200,18 +200,28 @@ ENTRY(startup_64) * entire text+data+bss and hopefully all of memory. */ #ifdef CONFIG_EFI_STUB - pushq %rsi + /* + * The entry point for the PE/COFF executable is 0x210, so only + * legacy boot loaders will execute this jmp. + */ + jmp preferred_addr + + .org 0x210 mov %rcx, %rdi mov %rdx, %rsi call efi_main - popq %rsi - cmpq $0,%rax - je preferred_addr movq %rax,%rsi - call 1f + cmpq $0,%rax + jne 2f 1: + /* EFI init failed, so hang. */ + hlt + jmp 1b +2: + call 3f +3: popq %rax - subq $1b, %rax + subq $3b, %rax subq BP_pref_address(%rsi), %rax add BP_code32_start(%esi), %eax leaq preferred_addr(%rax), %rax diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index ed549767a231..24443a332083 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -205,8 +205,13 @@ int main(int argc, char ** argv) put_unaligned_le32(file_sz, &buf[pe_header + 0x50]); #ifdef CONFIG_X86_32 - /* Address of entry point */ - put_unaligned_le32(i, &buf[pe_header + 0x28]); + /* + * Address of entry point. + * + * The EFI stub entry point is +16 bytes from the start of + * the .text section. + */ + put_unaligned_le32(i + 16, &buf[pe_header + 0x28]); /* .text size */ put_unaligned_le32(file_sz, &buf[pe_header + 0xb0]); @@ -217,9 +222,11 @@ int main(int argc, char ** argv) /* * Address of entry point. startup_32 is at the beginning and * the 64-bit entry point (startup_64) is always 512 bytes - * after. + * after. The EFI stub entry point is 16 bytes after that, as + * the first instruction allows legacy loaders to jump over + * the EFI stub initialisation */ - put_unaligned_le32(i + 512, &buf[pe_header + 0x28]); + put_unaligned_le32(i + 528, &buf[pe_header + 0x28]); /* .text size */ put_unaligned_le32(file_sz, &buf[pe_header + 0xc0]); -- cgit v1.2.1 From 68894632afb2729a1d8785c877840953894c7283 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Mon, 2 Apr 2012 18:06:48 +0200 Subject: x86/platform: Remove incorrect error message in x86_default_fixup_cpu_id() It's only called from amd.c:srat_detect_node(). The introduced condition for calling the fixup code is true for all AMD multi-node processors, e.g. Magny-Cours and Interlagos. There we have 2 NUMA nodes on one socket. Thus there are cores having different numa-node-id but with equal phys_proc_id. There is no point to print error messages in such a situation. The confusing/misleading error message was introduced with commit 64be4c1c2428e148de6081af235e2418e6a66dda ("x86: Add x86_init platform override to fix up NUMA core numbering"). Remove the default fixup function (especially the error message) and replace it by a NULL pointer check, move the Numascale-specific condition for calling the fixup into the fixup-function itself and slightly adapt the comment. Signed-off-by: Andreas Herrmann Acked-by: Borislav Petkov Cc: Cc: Cc: Cc: Link: http://lkml.kernel.org/r/20120402160648.GR27684@alberich.amd.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/x86_init.h | 1 - arch/x86/kernel/apic/apic_numachip.c | 7 +++++-- arch/x86/kernel/cpu/amd.c | 7 ++++--- arch/x86/kernel/cpu/common.c | 9 --------- arch/x86/kernel/x86_init.c | 1 - 5 files changed, 9 insertions(+), 16 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index baaca8defec8..764b66a4cf89 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -195,6 +195,5 @@ extern struct x86_msi_ops x86_msi; extern void x86_init_noop(void); extern void x86_init_uint_noop(unsigned int unused); -extern void x86_default_fixup_cpu_id(struct cpuinfo_x86 *c, int node); #endif diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index 899803e03214..23e75422e013 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -207,8 +207,11 @@ static void __init map_csrs(void) static void fixup_cpu_id(struct cpuinfo_x86 *c, int node) { - c->phys_proc_id = node; - per_cpu(cpu_llc_id, smp_processor_id()) = node; + + if (c->phys_proc_id != node) { + c->phys_proc_id = node; + per_cpu(cpu_llc_id, smp_processor_id()) = node; + } } static int __init numachip_system_init(void) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 1248f9ceabc1..1c67ca100e4c 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -353,10 +353,11 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) node = per_cpu(cpu_llc_id, cpu); /* - * If core numbers are inconsistent, it's likely a multi-fabric platform, - * so invoke platform-specific handler + * On multi-fabric platform (e.g. Numascale NumaChip) a + * platform-specific handler needs to be called to fixup some + * IDs of the CPU. */ - if (c->phys_proc_id != node) + if (x86_cpuinit.fixup_cpu_id) x86_cpuinit.fixup_cpu_id(c, node); if (!node_online(node)) { diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 67e258362a3d..cf79302198a6 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1162,15 +1162,6 @@ static void dbg_restore_debug_regs(void) #define dbg_restore_debug_regs() #endif /* ! CONFIG_KGDB */ -/* - * Prints an error where the NUMA and configured core-number mismatch and the - * platform didn't override this to fix it up - */ -void __cpuinit x86_default_fixup_cpu_id(struct cpuinfo_x86 *c, int node) -{ - pr_err("NUMA core number %d differs from configured core number %d\n", node, c->phys_proc_id); -} - /* * cpu_init() initializes state that is per-CPU. Some data is already * initialized (naturally) in the bootstrap process, such as the GDT diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index e9f265fd79ae..9cf71d0b2d37 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -93,7 +93,6 @@ struct x86_init_ops x86_init __initdata = { struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { .early_percpu_clock_init = x86_init_noop, .setup_percpu_clockev = setup_secondary_APIC_clock, - .fixup_cpu_id = x86_default_fixup_cpu_id, }; static void default_nmi_init(void) { }; -- cgit v1.2.1 From 089f9fba56faf33cc6dd2a6442b7ac92c58b8209 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 16 Apr 2012 22:48:15 +0200 Subject: i387: ptrace breaks the lazy-fpu-restore logic Starting from 7e16838d "i387: support lazy restore of FPU state" we assume that fpu_owner_task doesn't need restore_fpu_checking() on the context switch, its FPU state should match what we already have in the FPU on this CPU. However, debugger can change the tracee's FPU state, in this case we should reset fpu.last_cpu to ensure fpu_lazy_restore() can't return true. Change init_fpu() to do this, it is called by user_regset->set() methods. Reported-by: Jan Kratochvil Suggested-by: Linus Torvalds Signed-off-by: Oleg Nesterov Link: http://lkml.kernel.org/r/20120416204815.GB24884@redhat.com Cc: v3.3 Signed-off-by: H. Peter Anvin --- arch/x86/kernel/i387.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 7734bcbb5a3a..2d6e6498c176 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -235,6 +235,7 @@ int init_fpu(struct task_struct *tsk) if (tsk_used_math(tsk)) { if (HAVE_HWFP && tsk == current) unlazy_fpu(tsk); + tsk->thread.fpu.last_cpu = ~0; return 0; } -- cgit v1.2.1 From b960d6c43a63ebd2d8518b328da3816b833ee8cc Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Tue, 27 Mar 2012 14:52:44 +0100 Subject: xen/p2m: m2p_find_override: use list_for_each_entry_safe Use list_for_each_entry_safe and remove the spin_lock acquisition in m2p_find_override. Signed-off-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/p2m.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 1b267e75158d..7ed8cc3434c5 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -809,21 +809,17 @@ struct page *m2p_find_override(unsigned long mfn) { unsigned long flags; struct list_head *bucket = &m2p_overrides[mfn_hash(mfn)]; - struct page *p, *ret; + struct page *p, *t, *ret; ret = NULL; - spin_lock_irqsave(&m2p_override_lock, flags); - - list_for_each_entry(p, bucket, lru) { + list_for_each_entry_safe(p, t, bucket, lru) { if (page_private(p) == mfn) { ret = p; break; } } - spin_unlock_irqrestore(&m2p_override_lock, flags); - return ret; } -- cgit v1.2.1 From cbf2829b61c136edcba302a5e1b6b40e97d32c00 Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Wed, 18 Apr 2012 17:37:39 +0100 Subject: x86, apic: APIC code touches invalid MSR on P5 class machines Current APIC code assumes MSR_IA32_APICBASE is present for all systems. Pentium Classic P5 and friends didn't have this MSR. MSR_IA32_APICBASE was introduced as an architectural MSR by Intel @ P6. Code paths that can touch this MSR invalidly are when vendor == Intel && cpu-family == 5 and APIC bit is set in CPUID - or when you simply pass lapic on the kernel command line, on a P5. The below patch stops Linux incorrectly interfering with the MSR_IA32_APICBASE for P5 class machines. Other code paths exist that touch the MSR - however those paths are not currently reachable for a conformant P5. Signed-off-by: Bryan O'Donoghue Link: http://lkml.kernel.org/r/4F8EEDD3.1080404@linux.intel.com Signed-off-by: H. Peter Anvin Cc: --- arch/x86/kernel/apic/apic.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 11544d8f1e97..edc24480469f 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1637,9 +1637,11 @@ static int __init apic_verify(void) mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; /* The BIOS may have set up the APIC at some other address */ - rdmsr(MSR_IA32_APICBASE, l, h); - if (l & MSR_IA32_APICBASE_ENABLE) - mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; + if (boot_cpu_data.x86 >= 6) { + rdmsr(MSR_IA32_APICBASE, l, h); + if (l & MSR_IA32_APICBASE_ENABLE) + mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; + } pr_info("Found and enabled local APIC!\n"); return 0; @@ -1657,13 +1659,15 @@ int __init apic_force_enable(unsigned long addr) * MSR. This can only be done in software for Intel P6 or later * and AMD K7 (Model > 1) or later. */ - rdmsr(MSR_IA32_APICBASE, l, h); - if (!(l & MSR_IA32_APICBASE_ENABLE)) { - pr_info("Local APIC disabled by BIOS -- reenabling.\n"); - l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | addr; - wrmsr(MSR_IA32_APICBASE, l, h); - enabled_via_apicbase = 1; + if (boot_cpu_data.x86 >= 6) { + rdmsr(MSR_IA32_APICBASE, l, h); + if (!(l & MSR_IA32_APICBASE_ENABLE)) { + pr_info("Local APIC disabled by BIOS -- reenabling.\n"); + l &= ~MSR_IA32_APICBASE_BASE; + l |= MSR_IA32_APICBASE_ENABLE | addr; + wrmsr(MSR_IA32_APICBASE, l, h); + enabled_via_apicbase = 1; + } } return apic_verify(); } @@ -2209,10 +2213,12 @@ static void lapic_resume(void) * FIXME! This will be wrong if we ever support suspend on * SMP! We'll need to do this as part of the CPU restore! */ - rdmsr(MSR_IA32_APICBASE, l, h); - l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; - wrmsr(MSR_IA32_APICBASE, l, h); + if (boot_cpu_data.x86 >= 6) { + rdmsr(MSR_IA32_APICBASE, l, h); + l &= ~MSR_IA32_APICBASE_BASE; + l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; + wrmsr(MSR_IA32_APICBASE, l, h); + } } maxlvt = lapic_get_maxlvt(); -- cgit v1.2.1 From 2225fd56049643c1a7d645c0ce9d499d43c7974e Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 18 Apr 2012 15:03:04 +0300 Subject: KVM: VMX: Fix kvm_set_shared_msr() called in preemptible context kvm_set_shared_msr() may not be called in preemptible context, but vmx_set_msr() does so: BUG: using smp_processor_id() in preemptible [00000000] code: qemu-kvm/22713 caller is kvm_set_shared_msr+0x32/0xa0 [kvm] Pid: 22713, comm: qemu-kvm Not tainted 3.4.0-rc3+ #39 Call Trace: [] debug_smp_processor_id+0xe2/0x100 [] kvm_set_shared_msr+0x32/0xa0 [kvm] [] vmx_set_msr+0x28b/0x2d0 [kvm_intel] ... Making kvm_set_shared_msr() work in preemptible is cleaner, but it's used in the fast path. Making two variants is overkill, so this patch just disables preemption around the call. Reported-by: Dave Jones Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ad85adfef843..4ff0ab9bc3c8 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2210,9 +2210,12 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) msr = find_msr_entry(vmx, msr_index); if (msr) { msr->data = data; - if (msr - vmx->guest_msrs < vmx->save_nmsrs) + if (msr - vmx->guest_msrs < vmx->save_nmsrs) { + preempt_disable(); kvm_set_shared_msr(msr->index, msr->data, msr->mask); + preempt_enable(); + } break; } ret = kvm_set_msr_common(vcpu, msr_index, data); -- cgit v1.2.1 From a720b2dd2470a52345df11dca8d6c1466599f812 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Thu, 19 Apr 2012 12:35:08 +0200 Subject: x86, intel_cacheinfo: Fix error return code in amd_set_l3_disable_slot() If the L3 disable slot is already in use, return -EEXIST instead of -EINVAL. The caller, store_cache_disable(), checks this return value to print an appropriate warning. Also, we want to signal with -EEXIST that the current index we're disabling has actually been already disabled on the node: $ echo 12 > /sys/devices/system/cpu/cpu3/cache/index3/cache_disable_0 $ echo 12 > /sys/devices/system/cpu/cpu3/cache/index3/cache_disable_0 -bash: echo: write error: File exists $ echo 12 > /sys/devices/system/cpu/cpu3/cache/index3/cache_disable_1 -bash: echo: write error: File exists $ echo 12 > /sys/devices/system/cpu/cpu5/cache/index3/cache_disable_1 -bash: echo: write error: File exists The old code would say -bash: echo: write error: Invalid argument for disable slot 1 when playing the example above with no output in dmesg, which is clearly misleading. Reported-by: Dan Carpenter Signed-off-by: Srivatsa S. Bhat Link: http://lkml.kernel.org/r/20120419070053.GB16645@elgon.mountain [Boris: add testing for the other index too] Signed-off-by: Borislav Petkov --- arch/x86/kernel/cpu/intel_cacheinfo.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 73d08ed98a64..b8f3653dddbc 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -433,14 +433,14 @@ int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu, unsigned slot, /* check if @slot is already used or the index is already disabled */ ret = amd_get_l3_disable_slot(nb, slot); if (ret >= 0) - return -EINVAL; + return -EEXIST; if (index > nb->l3_cache.indices) return -EINVAL; /* check whether the other slot has disabled the same index already */ if (index == amd_get_l3_disable_slot(nb, !slot)) - return -EINVAL; + return -EEXIST; amd_l3_disable_index(nb, cpu, slot, index); @@ -468,8 +468,8 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, err = amd_set_l3_disable_slot(this_leaf->base.nb, cpu, slot, val); if (err) { if (err == -EEXIST) - printk(KERN_WARNING "L3 disable slot %d in use!\n", - slot); + pr_warning("L3 slot %d in use/index already disabled!\n", + slot); return err; } return count; -- cgit v1.2.1 From 3d81acb1cdb242378a1acb3eb1bc28c6bb5895f1 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 20 Apr 2012 11:50:30 -0400 Subject: Revert "xen/p2m: m2p_find_override: use list_for_each_entry_safe" This reverts commit b960d6c43a63ebd2d8518b328da3816b833ee8cc. If we have another thread (very likely) touched the list, we end up hitting a problem "that the next element is wrong because we should be able to cope with that. The problem is that the next->next pointer would be set LIST_POISON1. " (Stefano's comment on the patch). Reverting for now. Suggested-by: Dan Carpenter Acked-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/p2m.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 7ed8cc3434c5..1b267e75158d 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -809,17 +809,21 @@ struct page *m2p_find_override(unsigned long mfn) { unsigned long flags; struct list_head *bucket = &m2p_overrides[mfn_hash(mfn)]; - struct page *p, *t, *ret; + struct page *p, *ret; ret = NULL; - list_for_each_entry_safe(p, t, bucket, lru) { + spin_lock_irqsave(&m2p_override_lock, flags); + + list_for_each_entry(p, bucket, lru) { if (page_private(p) == mfn) { ret = p; break; } } + spin_unlock_irqrestore(&m2p_override_lock, flags); + return ret; } -- cgit v1.2.1 From e4eb1ff61b323d6141614e5458a1f53c7046ff8e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 20 Apr 2012 15:35:40 -0700 Subject: VM: add "vm_brk()" helper function It does the same thing as "do_brk()", except it handles the VM locking too. It turns out that all external callers want that anyway, so we can make do_brk() static to just mm/mmap.c while at it. Signed-off-by: Linus Torvalds --- arch/x86/ia32/ia32_aout.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index d511d951a052..b6817ee9033f 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -119,9 +119,7 @@ static void set_brk(unsigned long start, unsigned long end) end = PAGE_ALIGN(end); if (end <= start) return; - down_write(¤t->mm->mmap_sem); - do_brk(start, end - start); - up_write(¤t->mm->mmap_sem); + vm_brk(start, end - start); } #ifdef CORE_DUMP @@ -332,9 +330,7 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) pos = 32; map_size = ex.a_text+ex.a_data; - down_write(¤t->mm->mmap_sem); - error = do_brk(text_addr & PAGE_MASK, map_size); - up_write(¤t->mm->mmap_sem); + error = vm_brk(text_addr & PAGE_MASK, map_size); if (error != (text_addr & PAGE_MASK)) { send_sig(SIGKILL, current, 0); @@ -373,9 +369,7 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (!bprm->file->f_op->mmap || (fd_offset & ~PAGE_MASK) != 0) { loff_t pos = fd_offset; - down_write(¤t->mm->mmap_sem); - do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data); - up_write(¤t->mm->mmap_sem); + vm_brk(N_TXTADDR(ex), ex.a_text+ex.a_data); bprm->file->f_op->read(bprm->file, (char __user *)N_TXTADDR(ex), ex.a_text+ex.a_data, &pos); @@ -476,9 +470,7 @@ static int load_aout_library(struct file *file) error_time = jiffies; } #endif - down_write(¤t->mm->mmap_sem); - do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss); - up_write(¤t->mm->mmap_sem); + vm_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss); file->f_op->read(file, (char __user *)start_addr, ex.a_text + ex.a_data, &pos); @@ -503,9 +495,7 @@ static int load_aout_library(struct file *file) len = PAGE_ALIGN(ex.a_text + ex.a_data); bss = ex.a_text + ex.a_data + ex.a_bss; if (bss > len) { - down_write(¤t->mm->mmap_sem); - error = do_brk(start_addr + len, bss - len); - up_write(¤t->mm->mmap_sem); + error = vm_brk(start_addr + len, bss - len); retval = error; if (error != start_addr + len) goto out; -- cgit v1.2.1 From a46ef99d80817a167477ed1c8b4d90ee0c2e726f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 20 Apr 2012 16:20:01 -0700 Subject: VM: add "vm_munmap()" helper function Like the vm_brk() function, this is the same as "do_munmap()", except it does the VM locking for the caller. Signed-off-by: Linus Torvalds --- arch/x86/kvm/x86.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4044ce0bf7c1..8beb9ce79364 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6366,10 +6366,8 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, if (!user_alloc && !old.user_alloc && old.rmap && !npages) { int ret; - down_write(¤t->mm->mmap_sem); - ret = do_munmap(current->mm, old.userspace_addr, + ret = vm_munmap(current->mm, old.userspace_addr, old.npages * PAGE_SIZE); - up_write(¤t->mm->mmap_sem); if (ret < 0) printk(KERN_WARNING "kvm_vm_ioctl_set_memory_region: " -- cgit v1.2.1 From 6be5ceb02e98eaf6cfc4f8b12a896d04023f340d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 20 Apr 2012 17:13:58 -0700 Subject: VM: add "vm_mmap()" helper function This continues the theme started with vm_brk() and vm_munmap(): vm_mmap() does the same thing as do_mmap(), but additionally does the required VM locking. This uninlines (and rewrites it to be clearer) do_mmap(), which sadly duplicates it in mm/mmap.c and mm/nommu.c. But that way we don't have to export our internal do_mmap_pgoff() function. Some day we hopefully don't have to export do_mmap() either, if all modular users can become the simpler vm_mmap() instead. We're actually very close to that already, with the notable exception of the (broken) use in i810, and a couple of stragglers in binfmt_elf. Signed-off-by: Linus Torvalds --- arch/x86/ia32/ia32_aout.c | 12 +++--------- arch/x86/kvm/x86.c | 4 +--- 2 files changed, 4 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index b6817ee9033f..4824fb45560f 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -379,26 +379,22 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) goto beyond_if; } - down_write(¤t->mm->mmap_sem); - error = do_mmap(bprm->file, N_TXTADDR(ex), ex.a_text, + error = vm_mmap(bprm->file, N_TXTADDR(ex), ex.a_text, PROT_READ | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE | MAP_32BIT, fd_offset); - up_write(¤t->mm->mmap_sem); if (error != N_TXTADDR(ex)) { send_sig(SIGKILL, current, 0); return error; } - down_write(¤t->mm->mmap_sem); - error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data, + error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE | MAP_32BIT, fd_offset + ex.a_text); - up_write(¤t->mm->mmap_sem); if (error != N_DATADDR(ex)) { send_sig(SIGKILL, current, 0); return error; @@ -482,12 +478,10 @@ static int load_aout_library(struct file *file) goto out; } /* Now use mmap to map the library into memory. */ - down_write(¤t->mm->mmap_sem); - error = do_mmap(file, start_addr, ex.a_text + ex.a_data, + error = vm_mmap(file, start_addr, ex.a_text + ex.a_data, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_32BIT, N_TXTOFF(ex)); - up_write(¤t->mm->mmap_sem); retval = error; if (error != start_addr) goto out; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8beb9ce79364..1457be305fb1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6336,13 +6336,11 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, if (npages && !old.rmap) { unsigned long userspace_addr; - down_write(¤t->mm->mmap_sem); - userspace_addr = do_mmap(NULL, 0, + userspace_addr = vm_mmap(NULL, 0, npages * PAGE_SIZE, PROT_READ | PROT_WRITE, map_flags, 0); - up_write(¤t->mm->mmap_sem); if (IS_ERR((void *)userspace_addr)) return PTR_ERR((void *)userspace_addr); -- cgit v1.2.1 From bfce281c287a427d0841fadf5d59242757b4e620 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 20 Apr 2012 21:57:04 -0400 Subject: kill mm argument of vm_munmap() it's always current->mm Signed-off-by: Al Viro --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1457be305fb1..91a5e989abcf 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6364,7 +6364,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, if (!user_alloc && !old.user_alloc && old.rmap && !npages) { int ret; - ret = vm_munmap(current->mm, old.userspace_addr, + ret = vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE); if (ret < 0) printk(KERN_WARNING -- cgit v1.2.1 From 2a14e541ed87bca0c125b82961ca3c6f808607d2 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Sun, 22 Apr 2012 23:03:17 -0400 Subject: ACPI: Convert wake_sleep_flags to a value instead of function With commit a2ef5c4fd44ce3922435139393b89f2cce47f576 "ACPI: Move module parameter gts and bfs to sleep.c" the wake_sleep_flags is required when calling acpi_enter_sleep_state, which means that if there are functions outside the sleep.c code they can't get the wake_sleep_flags values. This converts the function in to a exported value and converts the module config operands to a function. Acked-by: Rafael J. Wysocki Acked-by: Lin Ming [v2: Parameters can be turned on/off dynamically] [v3: unsigned char -> u8] [v4: val -> kp->arg] Signed-off-by: Konrad Rzeszutek Wilk Link: http://lkml.kernel.org/r/1335150198-21899-2-git-send-email-konrad.wilk@oracle.com Signed-off-by: H. Peter Anvin --- arch/x86/kernel/acpi/sleep.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h index 416d4be13fef..fe5fdda5dcd7 100644 --- a/arch/x86/kernel/acpi/sleep.h +++ b/arch/x86/kernel/acpi/sleep.h @@ -9,6 +9,8 @@ extern long saved_magic; extern int wakeup_pmode_return; +extern u8 wake_sleep_flags; + extern unsigned long acpi_copy_wakeup_routine(unsigned long); extern void wakeup_long64(void); -- cgit v1.2.1 From cd74257b974d6d26442c97891c4d05772748b177 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Sun, 22 Apr 2012 23:03:18 -0400 Subject: x86, acpi: Call acpi_enter_sleep_state via an asmlinkage C function from assembler With commit a2ef5c4fd44ce3922435139393b89f2cce47f576 "ACPI: Move module parameter gts and bfs to sleep.c" the wake_sleep_flags is required when calling acpi_enter_sleep_state. The assembler code in wakeup_*.S did not do that. One solution is to call it from assembler and stick the wake_sleep_flags on the stack (for 32-bit) or in %esi (for 64-bit). hpa and rafael both suggested however to create a wrapper function to call acpi_enter_sleep_state and call said wrapper function ("acpi_enter_s3") from assembler. For 32-bit, the acpi_enter_s3 ends up looking as so: push %ebp mov %esp,%ebp sub $0x8,%esp movzbl 0xc1809314,%eax [wake_sleep_flags] movl $0x3,(%esp) mov %eax,0x4(%esp) call 0xc12d1fa0 leave ret And 64-bit: movzbl 0x9afde1(%rip),%esi [wake_sleep_flags] push %rbp mov $0x3,%edi mov %rsp,%rbp callq 0xffffffff812e9800 leaveq retq Reviewed-by: H. Peter Anvin Suggested-by: H. Peter Anvin [v2: Remove extra assembler operations, per hpa review] Signed-off-by: Konrad Rzeszutek Wilk Link: http://lkml.kernel.org/r/1335150198-21899-3-git-send-email-konrad.wilk@oracle.com Signed-off-by: H. Peter Anvin --- arch/x86/kernel/acpi/sleep.c | 4 ++++ arch/x86/kernel/acpi/sleep.h | 2 ++ arch/x86/kernel/acpi/wakeup_32.S | 4 +--- arch/x86/kernel/acpi/wakeup_64.S | 4 +--- 4 files changed, 8 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 103b6ab368d3..146a49c763a4 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -24,6 +24,10 @@ unsigned long acpi_realmode_flags; static char temp_stack[4096]; #endif +asmlinkage void acpi_enter_s3(void) +{ + acpi_enter_sleep_state(3, wake_sleep_flags); +} /** * acpi_suspend_lowlevel - save kernel state * diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h index fe5fdda5dcd7..d68677a2a010 100644 --- a/arch/x86/kernel/acpi/sleep.h +++ b/arch/x86/kernel/acpi/sleep.h @@ -3,6 +3,7 @@ */ #include +#include extern unsigned long saved_video_mode; extern long saved_magic; @@ -10,6 +11,7 @@ extern long saved_magic; extern int wakeup_pmode_return; extern u8 wake_sleep_flags; +extern asmlinkage void acpi_enter_s3(void); extern unsigned long acpi_copy_wakeup_routine(unsigned long); extern void wakeup_long64(void); diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S index 13ab720573e3..72610839f03b 100644 --- a/arch/x86/kernel/acpi/wakeup_32.S +++ b/arch/x86/kernel/acpi/wakeup_32.S @@ -74,9 +74,7 @@ restore_registers: ENTRY(do_suspend_lowlevel) call save_processor_state call save_registers - pushl $3 - call acpi_enter_sleep_state - addl $4, %esp + call acpi_enter_s3 # In case of S3 failure, we'll emerge here. Jump # to ret_point to recover diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S index 8ea5164cbd04..014d1d28c397 100644 --- a/arch/x86/kernel/acpi/wakeup_64.S +++ b/arch/x86/kernel/acpi/wakeup_64.S @@ -71,9 +71,7 @@ ENTRY(do_suspend_lowlevel) movq %rsi, saved_rsi addq $8, %rsp - movl $3, %edi - xorl %eax, %eax - call acpi_enter_sleep_state + call acpi_enter_s3 /* in case something went wrong, restore the machine status and go on */ jmp resume_point -- cgit v1.2.1 From 98e5272fe70d62e193f70acf9951667beab27aba Mon Sep 17 00:00:00 2001 From: "H.J. Lu" Date: Mon, 23 Apr 2012 14:51:14 -0700 Subject: x32: Check __ILP32__ instead of __LP64__ for x32 Check __LP64__ isn't a reliable way to tell if we are compiling for x32 since __LP64__ isnn't specified by x86-64 psABI. Not all x86-64 compilers define __LP64__, which was added to GCC 3.3. The updated x32 psABI: https://sites.google.com/site/x32abi/documents definse _ILP32 and __ILP32__ for x32. GCC trunk and 4.7 branch have been updated to define _ILP32 and __ILP32__ for x32. This patch replaces __LP64__ check with __ILP32__. Signed-off-by: H.J. Lu Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/posix_types.h | 6 +++--- arch/x86/include/asm/sigcontext.h | 2 +- arch/x86/include/asm/unistd.h | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/posix_types.h b/arch/x86/include/asm/posix_types.h index 3427b7798dbc..7ef7c3020e5c 100644 --- a/arch/x86/include/asm/posix_types.h +++ b/arch/x86/include/asm/posix_types.h @@ -7,9 +7,9 @@ #else # ifdef __i386__ # include "posix_types_32.h" -# elif defined(__LP64__) -# include "posix_types_64.h" -# else +# elif defined(__ILP32__) # include "posix_types_x32.h" +# else +# include "posix_types_64.h" # endif #endif diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h index 4a085383af27..5ca71c065eef 100644 --- a/arch/x86/include/asm/sigcontext.h +++ b/arch/x86/include/asm/sigcontext.h @@ -257,7 +257,7 @@ struct sigcontext { __u64 oldmask; __u64 cr2; struct _fpstate __user *fpstate; /* zero when no FPU context */ -#ifndef __LP64__ +#ifdef __ILP32__ __u32 __fpstate_pad; #endif __u64 reserved1[8]; diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h index 37cdc9d99bb1..4437001d8e3d 100644 --- a/arch/x86/include/asm/unistd.h +++ b/arch/x86/include/asm/unistd.h @@ -63,10 +63,10 @@ #else # ifdef __i386__ # include -# elif defined(__LP64__) -# include -# else +# elif defined(__ILP32__) # include +# else +# include # endif #endif -- cgit v1.2.1 From 89b8835ec865dddd6673a8dd7003581bf2377176 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 23 Apr 2012 16:34:12 -0700 Subject: x32, siginfo: Provide proper overrides for x32 siginfo_t Provide the proper override macros for x32 siginfo_t. The combination of a special type here and an overall alignment constraint actually ends up with all the types being properly aligned, but the hack is needed to keep the substructures inside siginfo_t from adding padding. Note: use __attribute__((aligned())) since __aligned() is not exported to user space. [ v2: fix stray semicolon ] Reported-by: H.J. Lu Cc: Bruce J. Beare Cc: Arnd Bergmann Link: http://lkml.kernel.org/r/CAMe9rOqF6Kh6-NK7oP0Fpzkd4SBAWU%2BG53hwBbSD4iA2UzyxuA@mail.gmail.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/siginfo.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/siginfo.h b/arch/x86/include/asm/siginfo.h index fc1aa5535646..34c47b3341c0 100644 --- a/arch/x86/include/asm/siginfo.h +++ b/arch/x86/include/asm/siginfo.h @@ -2,7 +2,13 @@ #define _ASM_X86_SIGINFO_H #ifdef __x86_64__ -# define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int)) +# ifdef __ILP32__ /* x32 */ +typedef long long __kernel_si_clock_t __attribute__((aligned(4))); +# define __ARCH_SI_CLOCK_T __kernel_si_clock_t +# define __ARCH_SI_ATTRIBUTES __attribute__((aligned(8))) +# else /* x86-64 */ +# define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int)) +# endif #endif #include -- cgit v1.2.1 From d0d3bc65afcdd69bdd3b5bebdf8b3ee3680efa0e Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Tue, 24 Apr 2012 15:00:53 -0700 Subject: x86/mrst: Quiet sparse noise about plain integer as NULL pointer The second parameter to intel_scu_notifier_post is a void *, not an integer. This quiets the sparse noise: arch/x86/platform/mrst/mrst.c:808:48: warning: Using plain integer as NULL pointer arch/x86/platform/mrst/mrst.c:817:43: warning: Using plain integer as NULL pointer Signed-off-by: H Hartley Sweeten Acked-by: Alan Cox Link: http://lkml.kernel.org/r/201204241500.53685.hartleys@visionengravers.com Signed-off-by: Ingo Molnar --- arch/x86/platform/mrst/mrst.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c index e0a37233c0af..e31bcd8f2eee 100644 --- a/arch/x86/platform/mrst/mrst.c +++ b/arch/x86/platform/mrst/mrst.c @@ -805,7 +805,7 @@ void intel_scu_devices_create(void) } else i2c_register_board_info(i2c_bus[i], i2c_devs[i], 1); } - intel_scu_notifier_post(SCU_AVAILABLE, 0L); + intel_scu_notifier_post(SCU_AVAILABLE, NULL); } EXPORT_SYMBOL_GPL(intel_scu_devices_create); @@ -814,7 +814,7 @@ void intel_scu_devices_destroy(void) { int i; - intel_scu_notifier_post(SCU_DOWN, 0L); + intel_scu_notifier_post(SCU_DOWN, NULL); for (i = 0; i < ipc_next_dev; i++) platform_device_del(ipc_devs[i]); -- cgit v1.2.1 From ea0dcf903e7d76aa5d483d876215fedcfdfe140f Mon Sep 17 00:00:00 2001 From: Greg Pearson Date: Tue, 24 Apr 2012 18:23:56 -0600 Subject: x86/apic: Use x2apic physical mode based on FADT setting Provide systems that do not support x2apic cluster mode a mechanism to select x2apic physical mode using the FADT FORCE_APIC_PHYSICAL_DESTINATION_MODE bit. Changes from v1: (based on Suresh's comments) - removed #ifdef CONFIG_ACPI - removed #include Signed-off-by: Greg Pearson Acked-by: Suresh Siddha Link: http://lkml.kernel.org/r/1335313436-32020-1-git-send-email-greg.pearson@hp.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/x2apic_phys.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 8a778db45e3a..991e315f4227 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -24,6 +24,12 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { if (x2apic_phys) return x2apic_enabled(); + else if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) && + (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) && + x2apic_enabled()) { + printk(KERN_DEBUG "System requires x2apic physical mode\n"); + return 1; + } else return 0; } -- cgit v1.2.1 From df88b2d96e36d9a9e325bfcd12eb45671cbbc937 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 26 Apr 2012 13:13:21 -0400 Subject: xen/enlighten: Disable MWAIT_LEAF so that acpi-pad won't be loaded. There are exactly four users of __monitor and __mwait: - cstate.c (which allows acpi_processor_ffh_cstate_enter to be called when the cpuidle API drivers are used. However patch "cpuidle: replace xen access to x86 pm_idle and default_idle" provides a mechanism to disable the cpuidle and use safe_halt. - smpboot (which allows mwait_play_dead to be called). However safe_halt is always used so we skip that. - intel_idle (same deal as above). - acpi_pad.c. This the one that we do not want to run as we will hit the below crash. Why do we want to expose MWAIT_LEAF in the first place? We want it for the xen-acpi-processor driver - which uploads C-states to the hypervisor. If MWAIT_LEAF is set, the cstate.c sets the proper address in the C-states so that the hypervisor can benefit from using the MWAIT functionality. And that is the sole reason for using it. Without this patch, if a module performs mwait or monitor we get this: invalid opcode: 0000 [#1] SMP CPU 2 .. snip.. Pid: 5036, comm: insmod Tainted: G O 3.4.0-rc2upstream-dirty #2 Intel Corporation S2600CP/S2600CP RIP: e030:[] [] mwait_check_init+0x17/0x1000 [mwait_check] RSP: e02b:ffff8801c298bf18 EFLAGS: 00010282 RAX: ffff8801c298a010 RBX: ffffffffa03b2000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff8801c29800d8 RDI: ffff8801ff097200 RBP: ffff8801c298bf18 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000000 R13: ffffffffa000a000 R14: 0000005148db7294 R15: 0000000000000003 FS: 00007fbb364f2700(0000) GS:ffff8801ff08c000(0000) knlGS:0000000000000000 CS: e033 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 000000000179f038 CR3: 00000001c9469000 CR4: 0000000000002660 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process insmod (pid: 5036, threadinfo ffff8801c298a000, task ffff8801c29cd7e0) Stack: ffff8801c298bf48 ffffffff81002124 ffffffffa03b2000 00000000000081fd 000000000178f010 000000000178f030 ffff8801c298bf78 ffffffff810c41e6 00007fff3fb30db9 00007fff3fb30db9 00000000000081fd 0000000000010000 Call Trace: [] do_one_initcall+0x124/0x170 [] sys_init_module+0xc6/0x220 [] system_call_fastpath+0x16/0x1b Code: <0f> 01 c8 31 c0 0f 01 c9 c9 c3 00 00 00 00 00 00 00 00 00 00 00 00 RIP [] mwait_check_init+0x17/0x1000 [mwait_check] RSP ---[ end trace 16582fc8a3d1e29a ]--- Kernel panic - not syncing: Fatal exception With this module (which is what acpi_pad.c would hit): MODULE_AUTHOR("Konrad Rzeszutek Wilk "); MODULE_DESCRIPTION("mwait_check_and_back"); MODULE_LICENSE("GPL"); MODULE_VERSION(); static int __init mwait_check_init(void) { __monitor((void *)¤t_thread_info()->flags, 0, 0); __mwait(0, 0); return 0; } static void __exit mwait_check_exit(void) { } module_init(mwait_check_init); module_exit(mwait_check_exit); Reported-by: Liu, Jinsong Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/enlighten.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 4f51bebac02c..a8f8844b8d32 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -261,7 +261,8 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, static bool __init xen_check_mwait(void) { -#ifdef CONFIG_ACPI +#if defined(CONFIG_ACPI) && !defined(CONFIG_ACPI_PROCESSOR_AGGREGATOR) && \ + !defined(CONFIG_ACPI_PROCESSOR_AGGREGATOR_MODULE) struct xen_platform_op op = { .cmd = XENPF_set_processor_pminfo, .u.set_pminfo.id = -1, @@ -349,7 +350,6 @@ static void __init xen_init_cpuid_mask(void) /* Xen will set CR4.OSXSAVE if supported and not disabled by force */ if ((cx & xsave_mask) != xsave_mask) cpuid_leaf1_ecx_mask &= ~xsave_mask; /* disable XSAVE & OSXSAVE */ - if (xen_check_mwait()) cpuid_leaf1_ecx_set_mask = (1 << (X86_FEATURE_MWAIT % 32)); } -- cgit v1.2.1 From cf405ae612b0f7e2358db7ff594c0e94846137aa Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 26 Apr 2012 13:50:03 -0400 Subject: xen/smp: Fix crash when booting with ACPI hotplug CPUs. When we boot on a machine that can hotplug CPUs and we are using 'dom0_max_vcpus=X' on the Xen hypervisor line to clip the amount of CPUs available to the initial domain, we get this: (XEN) Command line: com1=115200,8n1 dom0_mem=8G noreboot dom0_max_vcpus=8 sync_console mce_verbosity=verbose console=com1,vga loglvl=all guest_loglvl=all .. snip.. DMI: Intel Corporation S2600CP/S2600CP, BIOS SE5C600.86B.99.99.x032.072520111118 07/25/2011 .. snip. SMP: Allowing 64 CPUs, 32 hotplug CPUs installing Xen timer for CPU 7 cpu 7 spinlock event irq 361 NMI watchdog: disabled (cpu7): hardware events not enabled Brought up 8 CPUs .. snip.. [acpi processor finds the CPUs are not initialized and starts calling arch_register_cpu, which creates /sys/devices/system/cpu/cpu8/online] CPU 8 got hotplugged CPU 9 got hotplugged CPU 10 got hotplugged .. snip.. initcall 1_acpi_battery_init_async+0x0/0x1b returned 0 after 406 usecs calling erst_init+0x0/0x2bb @ 1 [and the scheduler sticks newly started tasks on the new CPUs, but said CPUs cannot be initialized b/c the hypervisor has limited the amount of vCPUS to 8 - as per the dom0_max_vcpus=8 flag. The spinlock tries to kick the other CPU, but the structure for that is not initialized and we crash.] BUG: unable to handle kernel paging request at fffffffffffffed8 IP: [] xen_spin_lock+0x29/0x60 PGD 180d067 PUD 180e067 PMD 0 Oops: 0002 [#1] SMP CPU 7 Modules linked in: Pid: 1, comm: swapper/0 Not tainted 3.4.0-rc2upstream-00001-gf5154e8 #1 Intel Corporation S2600CP/S2600CP RIP: e030:[] [] xen_spin_lock+0x29/0x60 RSP: e02b:ffff8801fb9b3a70 EFLAGS: 00010282 With this patch, we cap the amount of vCPUS that the initial domain can run, to exactly what dom0_max_vcpus=X has specified. In the future, if there is a hypercall that will allow a running domain to expand past its initial set of vCPUS, this patch should be re-evaluated. CC: stable@kernel.org Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/smp.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 5fac6919b957..0503c0c493a9 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -178,6 +178,7 @@ static void __init xen_fill_possible_map(void) static void __init xen_filter_cpu_maps(void) { int i, rc; + unsigned int subtract = 0; if (!xen_initial_domain()) return; @@ -192,8 +193,22 @@ static void __init xen_filter_cpu_maps(void) } else { set_cpu_possible(i, false); set_cpu_present(i, false); + subtract++; } } +#ifdef CONFIG_HOTPLUG_CPU + /* This is akin to using 'nr_cpus' on the Linux command line. + * Which is OK as when we use 'dom0_max_vcpus=X' we can only + * have up to X, while nr_cpu_ids is greater than X. This + * normally is not a problem, except when CPU hotplugging + * is involved and then there might be more than X CPUs + * in the guest - which will not work as there is no + * hypercall to expand the max number of VCPUs an already + * running guest has. So cap it up to X. */ + if (subtract) + nr_cpu_ids = nr_cpu_ids - subtract; +#endif + } static void __init xen_smp_prepare_boot_cpu(void) -- cgit v1.2.1 From f7f286a910221ae18b21c18d9d0f4cd88965829f Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Tue, 3 Apr 2012 12:13:07 +0200 Subject: x86/amd: Re-enable CPU topology extensions in case BIOS has disabled it BIOS will switch off the corresponding feature flag on family 15h models 10h-1fh non-desktop CPUs. The topology extension CPUID leafs are required to detect which cores belong to the same compute unit. (thread siblings mask is set accordingly and also correct information about L1i and L2 cache sharing depends on this). W/o this patch we wouldn't see which cores belong to the same compute unit and also cache sharing information for L1i and L2 would be incorrect on such systems. Signed-off-by: Andreas Herrmann Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 1c67ca100e4c..146bb6218eec 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -580,6 +580,24 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) } } + /* re-enable TopologyExtensions if switched off by BIOS */ + if ((c->x86 == 0x15) && + (c->x86_model >= 0x10) && (c->x86_model <= 0x1f) && + !cpu_has(c, X86_FEATURE_TOPOEXT)) { + u64 val; + + if (!rdmsrl_amd_safe(0xc0011005, &val)) { + val |= 1ULL << 54; + wrmsrl_amd_safe(0xc0011005, val); + rdmsrl(0xc0011005, val); + if (val & (1ULL << 54)) { + set_cpu_cap(c, X86_FEATURE_TOPOEXT); + printk(KERN_INFO FW_INFO "CPU: Re-enabling " + "disabled Topology Extensions Support\n"); + } + } + } + cpu_detect_cache_sizes(c); /* Multi core CPU? */ -- cgit v1.2.1 From 7eb7ce4d2e8991aff4ecb71a81949a907ca755ac Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Thu, 26 Apr 2012 19:44:06 +0100 Subject: xen: correctly check for pending events when restoring irq flags In xen_restore_fl_direct(), xen_force_evtchn_callback() was being called even if no events were pending. This resulted in (depending on workload) about a 100 times as many xen_version hypercalls as necessary. Fix this by correcting the sense of the conditional jump. This seems to give a significant performance benefit for some workloads. There is some subtle tricksy "..since the check here is trying to check both pending and masked in a single cmpw, but I think this is correct. It will call check_events now only when the combined mask+pending word is 0x0001 (aka unmasked, pending)." (Ian) CC: stable@kernel.org Acked-by: Ian Campbell Signed-off-by: David Vrabel Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/xen-asm.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 79d7362ad6d1..3e45aa000718 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -96,7 +96,7 @@ ENTRY(xen_restore_fl_direct) /* check for unmasked and pending */ cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending - jz 1f + jnz 1f 2: call check_events 1: ENDPATCH(xen_restore_fl_direct) -- cgit v1.2.1 From 7c77cda0fe742ed07622827ce80963bbeebd1e3f Mon Sep 17 00:00:00 2001 From: Kusanagi Kouichi Date: Sun, 1 Apr 2012 17:29:32 +0900 Subject: x86, relocs: Remove an unused variable sh_symtab is set but not used. [ hpa: putting this in urgent because of the sheer harmlessness of the patch: it quiets a build warning but does not change any generated code. ] Signed-off-by: Kusanagi Kouichi Link: http://lkml.kernel.org/r/20120401082932.D5E066FC03D@msa105.auone-net.jp Signed-off-by: H. Peter Anvin Cc: --- arch/x86/boot/compressed/relocs.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/boot/compressed/relocs.c index d3c0b0277666..fb7117a4ade1 100644 --- a/arch/x86/boot/compressed/relocs.c +++ b/arch/x86/boot/compressed/relocs.c @@ -403,13 +403,11 @@ static void print_absolute_symbols(void) for (i = 0; i < ehdr.e_shnum; i++) { struct section *sec = &secs[i]; char *sym_strtab; - Elf32_Sym *sh_symtab; int j; if (sec->shdr.sh_type != SHT_SYMTAB) { continue; } - sh_symtab = sec->symtab; sym_strtab = sec->link->strtab; for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Sym); j++) { Elf32_Sym *sym; -- cgit v1.2.1 From e419b4cc585680940bc42f8ca8a071d6023fb1bb Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 3 May 2012 10:16:43 -0700 Subject: vfs: make word-at-a-time accesses handle a non-existing page It turns out that there are more cases than CONFIG_DEBUG_PAGEALLOC that can have holes in the kernel address space: it seems to happen easily with Xen, and it looks like the AMD gart64 code will also punch holes dynamically. Actually hitting that case is still very unlikely, so just do the access, and take an exception and fix it up for the very unlikely case of it being a page-crosser with no next page. And hey, this abstraction might even help other architectures that have other issues with unaligned word accesses than the possible missing next page. IOW, this could do the byte order magic too. Peter Anvin fixed a thinko in the shifting for the exception case. Reported-and-tested-by: Jana Saout Cc: Peter Anvin Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 2 +- arch/x86/include/asm/word-at-a-time.h | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1d14cc6b79ad..c9866b0b77d8 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -81,7 +81,7 @@ config X86 select CLKEVT_I8253 select ARCH_HAVE_NMI_SAFE_CMPXCHG select GENERIC_IOMAP - select DCACHE_WORD_ACCESS if !DEBUG_PAGEALLOC + select DCACHE_WORD_ACCESS config INSTRUCTION_DECODER def_bool (KPROBES || PERF_EVENTS) diff --git a/arch/x86/include/asm/word-at-a-time.h b/arch/x86/include/asm/word-at-a-time.h index 6fe6767b7124..e58f03b206c3 100644 --- a/arch/x86/include/asm/word-at-a-time.h +++ b/arch/x86/include/asm/word-at-a-time.h @@ -43,4 +43,37 @@ static inline unsigned long has_zero(unsigned long a) return ((a - REPEAT_BYTE(0x01)) & ~a) & REPEAT_BYTE(0x80); } +/* + * Load an unaligned word from kernel space. + * + * In the (very unlikely) case of the word being a page-crosser + * and the next page not being mapped, take the exception and + * return zeroes in the non-existing part. + */ +static inline unsigned long load_unaligned_zeropad(const void *addr) +{ + unsigned long ret, dummy; + + asm( + "1:\tmov %2,%0\n" + "2:\n" + ".section .fixup,\"ax\"\n" + "3:\t" + "lea %2,%1\n\t" + "and %3,%1\n\t" + "mov (%1),%0\n\t" + "leal %2,%%ecx\n\t" + "andl %4,%%ecx\n\t" + "shll $3,%%ecx\n\t" + "shr %%cl,%0\n\t" + "jmp 2b\n" + ".previous\n" + _ASM_EXTABLE(1b, 3b) + :"=&r" (ret),"=&c" (dummy) + :"m" (*(unsigned long *)addr), + "i" (-sizeof(unsigned long)), + "i" (sizeof(unsigned long)-1)); + return ret; +} + #endif /* _ASM_WORD_AT_A_TIME_H */ -- cgit v1.2.1 From d1d0589a565a2528a044cfd680141c3e2db18d0a Mon Sep 17 00:00:00 2001 From: Bjarke Istrup Pedersen Date: Fri, 4 May 2012 14:01:45 -0700 Subject: arch/x86/platform/geode/net5501.c: change active_low to 0 for LED driver It seems that there was an error with the active_low = 1 for the LED, since it should be set to 0 (meaning that active is high, since 0 is false, hence the confusion. The wiki article about it confuses it, since it contradicts itself, regarding what turns on the LED. I have tested 3.4-rc2 on my net5501 with this patch, and it makes the LED behave correctly, where "none" turns it off, and "default-on" turns it on, when echoed onto the trigger "file" in /sys/class/leds. Signed-off-by: Bjarke Istrup Pedersen Link: http://lkml.kernel.org/r/20120504210146.62186A018B@akpm.mtv.corp.google.com Cc: Philip Prindeville Signed-off-by: Andrew Morton Signed-off-by: H. Peter Anvin --- arch/x86/platform/geode/net5501.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/platform/geode/net5501.c b/arch/x86/platform/geode/net5501.c index 66d377e334f7..646e3b5b4bb6 100644 --- a/arch/x86/platform/geode/net5501.c +++ b/arch/x86/platform/geode/net5501.c @@ -63,7 +63,7 @@ static struct gpio_led net5501_leds[] = { .name = "net5501:1", .gpio = 6, .default_trigger = "default-on", - .active_low = 1, + .active_low = 0, }, }; -- cgit v1.2.1 From ce7e5d2d19bc371e1b67826bfbc79bbcbaa9772f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 6 May 2012 17:20:00 +0100 Subject: x86: fix broken TASK_SIZE for ia32_aout Setting TIF_IA32 in load_aout_binary() used to be enough; these days TASK_SIZE is controlled by TIF_ADDR32 and that one doesn't get set there. Switch to use of set_personality_ia32()... Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- arch/x86/ia32/ia32_aout.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index 4824fb45560f..07b3a68d2d29 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -294,8 +294,7 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) /* OK, This is the point of no return */ set_personality(PER_LINUX); - set_thread_flag(TIF_IA32); - current->mm->context.ia32_compat = 1; + set_personality_ia32(false); setup_new_exec(bprm); -- cgit v1.2.1