diff options
79 files changed, 1594 insertions, 759 deletions
diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt index 9691c7f5166c..0705040531a5 100644 --- a/Documentation/kdump/kdump.txt +++ b/Documentation/kdump/kdump.txt @@ -65,26 +65,26 @@ Install kexec-tools 2) Download the kexec-tools user-space package from the following URL: -http://www.kernel.org/pub/linux/kernel/people/horms/kexec-tools/kexec-tools-testing.tar.gz +http://www.kernel.org/pub/linux/kernel/people/horms/kexec-tools/kexec-tools.tar.gz -This is a symlink to the latest version, which at the time of writing is -20061214, the only release of kexec-tools-testing so far. As other versions -are released, the older ones will remain available at -http://www.kernel.org/pub/linux/kernel/people/horms/kexec-tools/ +This is a symlink to the latest version. -Note: Latest kexec-tools-testing git tree is available at +The latest kexec-tools git tree is available at: -git://git.kernel.org/pub/scm/linux/kernel/git/horms/kexec-tools-testing.git +git://git.kernel.org/pub/scm/linux/kernel/git/horms/kexec-tools.git or -http://www.kernel.org/git/?p=linux/kernel/git/horms/kexec-tools-testing.git;a=summary +http://www.kernel.org/git/?p=linux/kernel/git/horms/kexec-tools.git + +More information about kexec-tools can be found at +http://www.kernel.org/pub/linux/kernel/people/horms/kexec-tools/README.html 3) Unpack the tarball with the tar command, as follows: - tar xvpzf kexec-tools-testing.tar.gz + tar xvpzf kexec-tools.tar.gz 4) Change to the kexec-tools directory, as follows: - cd kexec-tools-testing-VERSION + cd kexec-tools-VERSION 5) Configure the package, as follows: diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index c8f528284a94..257033c691f2 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -17,6 +17,7 @@ config ARM select HAVE_KRETPROBES if (HAVE_KPROBES) select HAVE_FTRACE if (!XIP_KERNEL) select HAVE_DYNAMIC_FTRACE if (HAVE_FTRACE) + select HAVE_GENERIC_DMA_COHERENT help The ARM series is a line of low-power-consumption RISC chip designs licensed by ARM Ltd and targeted at embedded applications and @@ -234,6 +235,7 @@ config ARCH_VERSATILE config ARCH_AT91 bool "Atmel AT91" select GENERIC_GPIO + select HAVE_CLK help This enables support for systems based on the Atmel AT91RM9200, AT91SAM9 and AT91CAP9 processors. @@ -267,7 +269,6 @@ config ARCH_EP93XX select ARM_VIC select GENERIC_GPIO select HAVE_CLK - select HAVE_CLK select ARCH_REQUIRE_GPIOLIB help This enables support for the Cirrus EP93xx series of CPUs. diff --git a/arch/arm/mm/consistent.c b/arch/arm/mm/consistent.c index 333a82a3717e..db7b3e38ef1d 100644 --- a/arch/arm/mm/consistent.c +++ b/arch/arm/mm/consistent.c @@ -274,6 +274,11 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, void * dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp) { + void *memory; + + if (dma_alloc_from_coherent(dev, size, handle, &memory)) + return memory; + if (arch_is_coherent()) { void *virt; @@ -362,6 +367,9 @@ void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr WARN_ON(irqs_disabled()); + if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) + return; + if (arch_is_coherent()) { kfree(cpu_addr); return; diff --git a/arch/cris/arch-v32/drivers/Kconfig b/arch/cris/arch-v32/drivers/Kconfig index 2a92cb1886ca..7a64fcef9d07 100644 --- a/arch/cris/arch-v32/drivers/Kconfig +++ b/arch/cris/arch-v32/drivers/Kconfig @@ -641,6 +641,7 @@ config PCI bool depends on ETRAX_CARDBUS default y + select HAVE_GENERIC_DMA_COHERENT config ETRAX_IOP_FW_LOAD tristate "IO-processor hotplug firmware loading support" diff --git a/arch/cris/arch-v32/drivers/pci/dma.c b/arch/cris/arch-v32/drivers/pci/dma.c index e0364654fc44..fbe65954ee6c 100644 --- a/arch/cris/arch-v32/drivers/pci/dma.c +++ b/arch/cris/arch-v32/drivers/pci/dma.c @@ -15,35 +15,16 @@ #include <linux/pci.h> #include <asm/io.h> -struct dma_coherent_mem { - void *virt_base; - u32 device_base; - int size; - int flags; - unsigned long *bitmap; -}; - void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { void *ret; - struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; int order = get_order(size); /* ignore region specifiers */ gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); - if (mem) { - int page = bitmap_find_free_region(mem->bitmap, mem->size, - order); - if (page >= 0) { - *dma_handle = mem->device_base + (page << PAGE_SHIFT); - ret = mem->virt_base + (page << PAGE_SHIFT); - memset(ret, 0, size); - return ret; - } - if (mem->flags & DMA_MEMORY_EXCLUSIVE) - return NULL; - } + if (dma_alloc_from_coherent(dev, size, dma_handle, &ret)) + return ret; if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff)) gfp |= GFP_DMA; @@ -60,90 +41,9 @@ void *dma_alloc_coherent(struct device *dev, size_t size, void dma_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle) { - struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; int order = get_order(size); - if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) { - int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; - - bitmap_release_region(mem->bitmap, page, order); - } else + if (!dma_release_from_coherent(dev, order, vaddr)) free_pages((unsigned long)vaddr, order); } -int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, - dma_addr_t device_addr, size_t size, int flags) -{ - void __iomem *mem_base; - int pages = size >> PAGE_SHIFT; - int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long); - - if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0) - goto out; - if (!size) - goto out; - if (dev->dma_mem) - goto out; - - /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */ - - mem_base = ioremap(bus_addr, size); - if (!mem_base) - goto out; - - dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); - if (!dev->dma_mem) - goto iounmap_out; - dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL); - if (!dev->dma_mem->bitmap) - goto free1_out; - - dev->dma_mem->virt_base = mem_base; - dev->dma_mem->device_base = device_addr; - dev->dma_mem->size = pages; - dev->dma_mem->flags = flags; - - if (flags & DMA_MEMORY_MAP) - return DMA_MEMORY_MAP; - - return DMA_MEMORY_IO; - - free1_out: - kfree(dev->dma_mem); - iounmap_out: - iounmap(mem_base); - out: - return 0; -} -EXPORT_SYMBOL(dma_declare_coherent_memory); - -void dma_release_declared_memory(struct device *dev) -{ - struct dma_coherent_mem *mem = dev->dma_mem; - - if(!mem) - return; - dev->dma_mem = NULL; - iounmap(mem->virt_base); - kfree(mem->bitmap); - kfree(mem); -} -EXPORT_SYMBOL(dma_release_declared_memory); - -void *dma_mark_declared_memory_occupied(struct device *dev, - dma_addr_t device_addr, size_t size) -{ - struct dma_coherent_mem *mem = dev->dma_mem; - int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT; - int pos, err; - - if (!mem) - return ERR_PTR(-EINVAL); - - pos = (device_addr - mem->device_base) >> PAGE_SHIFT; - err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages)); - if (err != 0) - return ERR_PTR(err); - return mem->virt_base + (pos << PAGE_SHIFT); -} -EXPORT_SYMBOL(dma_mark_declared_memory_occupied); diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 7bfb0d219d67..0b88dc462d73 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -11,6 +11,7 @@ config SUPERH select HAVE_CLK select HAVE_IDE select HAVE_OPROFILE + select HAVE_GENERIC_DMA_COHERENT help The SuperH is a RISC processor targeted for use in embedded systems and consumer electronics; it was also used in the Sega Dreamcast diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c index 8277982d0938..b2ce014401b5 100644 --- a/arch/sh/mm/consistent.c +++ b/arch/sh/mm/consistent.c @@ -28,21 +28,10 @@ void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { void *ret, *ret_nocache; - struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; int order = get_order(size); - if (mem) { - int page = bitmap_find_free_region(mem->bitmap, mem->size, - order); - if (page >= 0) { - *dma_handle = mem->device_base + (page << PAGE_SHIFT); - ret = mem->virt_base + (page << PAGE_SHIFT); - memset(ret, 0, size); - return ret; - } - if (mem->flags & DMA_MEMORY_EXCLUSIVE) - return NULL; - } + if (dma_alloc_from_coherent(dev, size, dma_handle, &ret)) + return ret; ret = (void *)__get_free_pages(gfp, order); if (!ret) @@ -72,11 +61,7 @@ void dma_free_coherent(struct device *dev, size_t size, struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; int order = get_order(size); - if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) { - int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; - - bitmap_release_region(mem->bitmap, page, order); - } else { + if (!dma_release_from_coherent(dev, order, vaddr)) { WARN_ON(irqs_disabled()); /* for portability */ BUG_ON(mem && mem->flags & DMA_MEMORY_EXCLUSIVE); free_pages((unsigned long)phys_to_virt(dma_handle), order); @@ -85,83 +70,6 @@ void dma_free_coherent(struct device *dev, size_t size, } EXPORT_SYMBOL(dma_free_coherent); -int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, - dma_addr_t device_addr, size_t size, int flags) -{ - void __iomem *mem_base = NULL; - int pages = size >> PAGE_SHIFT; - int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long); - - if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0) - goto out; - if (!size) - goto out; - if (dev->dma_mem) - goto out; - - /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */ - - mem_base = ioremap_nocache(bus_addr, size); - if (!mem_base) - goto out; - - dev->dma_mem = kmalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); - if (!dev->dma_mem) - goto out; - dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL); - if (!dev->dma_mem->bitmap) - goto free1_out; - - dev->dma_mem->virt_base = mem_base; - dev->dma_mem->device_base = device_addr; - dev->dma_mem->size = pages; - dev->dma_mem->flags = flags; - - if (flags & DMA_MEMORY_MAP) - return DMA_MEMORY_MAP; - - return DMA_MEMORY_IO; - - free1_out: - kfree(dev->dma_mem); - out: - if (mem_base) - iounmap(mem_base); - return 0; -} -EXPORT_SYMBOL(dma_declare_coherent_memory); - -void dma_release_declared_memory(struct device *dev) -{ - struct dma_coherent_mem *mem = dev->dma_mem; - - if (!mem) - return; - dev->dma_mem = NULL; - iounmap(mem->virt_base); - kfree(mem->bitmap); - kfree(mem); -} -EXPORT_SYMBOL(dma_release_declared_memory); - -void *dma_mark_declared_memory_occupied(struct device *dev, - dma_addr_t device_addr, size_t size) -{ - struct dma_coherent_mem *mem = dev->dma_mem; - int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT; - int pos, err; - - if (!mem) - return ERR_PTR(-EINVAL); - - pos = (device_addr - mem->device_base) >> PAGE_SHIFT; - err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages)); - if (err != 0) - return ERR_PTR(err); - return mem->virt_base + (pos << PAGE_SHIFT); -} -EXPORT_SYMBOL(dma_mark_declared_memory_occupied); - void dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction) { diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b6fa2877b173..3d0f2b6a5a16 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -30,6 +30,7 @@ config X86 select HAVE_FTRACE select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) select HAVE_ARCH_KGDB if !X86_VOYAGER + select HAVE_GENERIC_DMA_COHERENT if X86_32 select HAVE_EFFICIENT_UNALIGNED_ACCESS config ARCH_DEFCONFIG diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 74697408576f..22d7d050905d 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -29,9 +29,6 @@ #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) -#define to_pages(addr, size) \ - (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT) - #define EXIT_LOOP_COUNT 10000000 static DEFINE_RWLOCK(amd_iommu_devtable_lock); @@ -185,7 +182,7 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, u64 address, size_t size) { int s = 0; - unsigned pages = to_pages(address, size); + unsigned pages = iommu_num_pages(address, size); address &= PAGE_MASK; @@ -557,8 +554,8 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, if (iommu->exclusion_start && iommu->exclusion_start < dma_dom->aperture_size) { unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; - int pages = to_pages(iommu->exclusion_start, - iommu->exclusion_length); + int pages = iommu_num_pages(iommu->exclusion_start, + iommu->exclusion_length); dma_ops_reserve_addresses(dma_dom, startpage, pages); } @@ -767,7 +764,7 @@ static dma_addr_t __map_single(struct device *dev, unsigned int pages; int i; - pages = to_pages(paddr, size); + pages = iommu_num_pages(paddr, size); paddr &= PAGE_MASK; address = dma_ops_alloc_addresses(dev, dma_dom, pages); @@ -802,7 +799,7 @@ static void __unmap_single(struct amd_iommu *iommu, if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size)) return; - pages = to_pages(dma_addr, size); + pages = iommu_num_pages(dma_addr, size); dma_addr &= PAGE_MASK; start = dma_addr; diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 37544123896d..8dbffb846de9 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -192,124 +192,6 @@ static __init int iommu_setup(char *p) } early_param("iommu", iommu_setup); -#ifdef CONFIG_X86_32 -int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, - dma_addr_t device_addr, size_t size, int flags) -{ - void __iomem *mem_base = NULL; - int pages = size >> PAGE_SHIFT; - int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long); - - if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0) - goto out; - if (!size) - goto out; - if (dev->dma_mem) - goto out; - - /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */ - - mem_base = ioremap(bus_addr, size); - if (!mem_base) - goto out; - - dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); - if (!dev->dma_mem) - goto out; - dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL); - if (!dev->dma_mem->bitmap) - goto free1_out; - - dev->dma_mem->virt_base = mem_base; - dev->dma_mem->device_base = device_addr; - dev->dma_mem->size = pages; - dev->dma_mem->flags = flags; - - if (flags & DMA_MEMORY_MAP) - return DMA_MEMORY_MAP; - - return DMA_MEMORY_IO; - - free1_out: - kfree(dev->dma_mem); - out: - if (mem_base) - iounmap(mem_base); - return 0; -} -EXPORT_SYMBOL(dma_declare_coherent_memory); - -void dma_release_declared_memory(struct device *dev) -{ - struct dma_coherent_mem *mem = dev->dma_mem; - - if (!mem) - return; - dev->dma_mem = NULL; - iounmap(mem->virt_base); - kfree(mem->bitmap); - kfree(mem); -} -EXPORT_SYMBOL(dma_release_declared_memory); - -void *dma_mark_declared_memory_occupied(struct device *dev, - dma_addr_t device_addr, size_t size) -{ - struct dma_coherent_mem *mem = dev->dma_mem; - int pos, err; - int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1); - - pages >>= PAGE_SHIFT; - - if (!mem) - return ERR_PTR(-EINVAL); - - pos = (device_addr - mem->device_base) >> PAGE_SHIFT; - err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages)); - if (err != 0) - return ERR_PTR(err); - return mem->virt_base + (pos << PAGE_SHIFT); -} -EXPORT_SYMBOL(dma_mark_declared_memory_occupied); - -static int dma_alloc_from_coherent_mem(struct device *dev, ssize_t size, - dma_addr_t *dma_handle, void **ret) -{ - struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; - int order = get_order(size); - - if (mem) { - int page = bitmap_find_free_region(mem->bitmap, mem->size, - order); - if (page >= 0) { - *dma_handle = mem->device_base + (page << PAGE_SHIFT); - *ret = mem->virt_base + (page << PAGE_SHIFT); - memset(*ret, 0, size); - } - if (mem->flags & DMA_MEMORY_EXCLUSIVE) - *ret = NULL; - } - return (mem != NULL); -} - -static int dma_release_coherent(struct device *dev, int order, void *vaddr) -{ - struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; - - if (mem && vaddr >= mem->virt_base && vaddr < - (mem->virt_base + (mem->size << PAGE_SHIFT))) { - int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; - - bitmap_release_region(mem->bitmap, page, order); - return 1; - } - return 0; -} -#else -#define dma_alloc_from_coherent_mem(dev, size, handle, ret) (0) -#define dma_release_coherent(dev, order, vaddr) (0) -#endif /* CONFIG_X86_32 */ - int dma_supported(struct device *dev, u64 mask) { struct dma_mapping_ops *ops = get_dma_ops(dev); @@ -379,7 +261,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, /* ignore region specifiers */ gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); - if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &memory)) + if (dma_alloc_from_coherent(dev, size, dma_handle, &memory)) return memory; if (!dev) { @@ -484,7 +366,7 @@ void dma_free_coherent(struct device *dev, size_t size, int order = get_order(size); WARN_ON(irqs_disabled()); /* for portability */ - if (dma_release_coherent(dev, order, vaddr)) + if (dma_release_from_coherent(dev, order, vaddr)) return; if (ops->unmap_single) ops->unmap_single(dev, bus, size, 0); diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 744126e64950..49285f8fd4d5 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -67,9 +67,6 @@ static u32 gart_unmapped_entry; (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT) #define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28)) -#define to_pages(addr, size) \ - (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT) - #define EMERGENCY_PAGES 32 /* = 128KB */ #ifdef CONFIG_AGP @@ -241,7 +238,7 @@ nonforced_iommu(struct device *dev, unsigned long addr, size_t size) static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, size_t size, int dir) { - unsigned long npages = to_pages(phys_mem, size); + unsigned long npages = iommu_num_pages(phys_mem, size); unsigned long iommu_page = alloc_iommu(dev, npages); int i; @@ -304,7 +301,7 @@ static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr, return; iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT; - npages = to_pages(dma_addr, size); + npages = iommu_num_pages(dma_addr, size); for (i = 0; i < npages; i++) { iommu_gatt_base[iommu_page + i] = gart_unmapped_entry; CLEAR_LEAK(iommu_page + i); @@ -387,7 +384,7 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start, } addr = phys_addr; - pages = to_pages(s->offset, s->length); + pages = iommu_num_pages(s->offset, s->length); while (pages--) { iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr); SET_LEAK(iommu_page); @@ -470,7 +467,7 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) seg_size += s->length; need = nextneed; - pages += to_pages(s->offset, s->length); + pages += iommu_num_pages(s->offset, s->length); ps = s; } if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0) diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 8d45fabc5f3b..ce3251ce5504 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -21,6 +21,7 @@ config KVM tristate "Kernel-based Virtual Machine (KVM) support" depends on HAVE_KVM select PREEMPT_NOTIFIERS + select MMU_NOTIFIER select ANON_INODES ---help--- Support hosting fully virtualized guest machines using hardware diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 3085f25b4355..007bb06c7504 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -223,14 +223,17 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages) { struct mm_struct *mm = current->mm; - unsigned long end = start + (nr_pages << PAGE_SHIFT); - unsigned long addr = start; + unsigned long addr, len, end; unsigned long next; pgd_t *pgdp; int nr = 0; + start &= PAGE_MASK; + addr = start; + len = (unsigned long) nr_pages << PAGE_SHIFT; + end = start + len; if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, - start, nr_pages*PAGE_SIZE))) + start, len))) goto slow_irqon; /* diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index ff3a6a336342..4bdaa590375d 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -23,7 +23,8 @@ static void __devinit pci_fixup_i450nx(struct pci_dev *d) pci_read_config_byte(d, reg++, &busno); pci_read_config_byte(d, reg++, &suba); pci_read_config_byte(d, reg++, &subb); - DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb); + dev_dbg(&d->dev, "i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, + suba, subb); if (busno) pci_scan_bus_with_sysdata(busno); /* Bus A */ if (suba < subb) diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index a09505806b82..5807d1bc73f7 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -128,10 +128,8 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) pr = pci_find_parent_resource(dev, r); if (!r->start || !pr || request_resource(pr, r) < 0) { - printk(KERN_ERR "PCI: Cannot allocate " - "resource region %d " - "of bridge %s\n", - idx, pci_name(dev)); + dev_err(&dev->dev, "BAR %d: can't " + "allocate resource\n", idx); /* * Something is wrong with the region. * Invalidate the resource to prevent @@ -166,15 +164,15 @@ static void __init pcibios_allocate_resources(int pass) else disabled = !(command & PCI_COMMAND_MEMORY); if (pass == disabled) { - DBG("PCI: Resource %08lx-%08lx " - "(f=%lx, d=%d, p=%d)\n", - r->start, r->end, r->flags, disabled, pass); + dev_dbg(&dev->dev, "resource %#08llx-%#08llx " + "(f=%lx, d=%d, p=%d)\n", + (unsigned long long) r->start, + (unsigned long long) r->end, + r->flags, disabled, pass); pr = pci_find_parent_resource(dev, r); if (!pr || request_resource(pr, r) < 0) { - printk(KERN_ERR "PCI: Cannot allocate " - "resource region %d " - "of device %s\n", - idx, pci_name(dev)); + dev_err(&dev->dev, "BAR %d: can't " + "allocate resource\n", idx); /* We'll assign a new address later */ r->end -= r->start; r->start = 0; @@ -187,8 +185,7 @@ static void __init pcibios_allocate_resources(int pass) /* Turn the ROM off, leave the resource region, * but keep it unregistered. */ u32 reg; - DBG("PCI: Switching off ROM of %s\n", - pci_name(dev)); + dev_dbg(&dev->dev, "disabling ROM\n"); r->flags &= ~IORESOURCE_ROM_ENABLE; pci_read_config_dword(dev, dev->rom_base_reg, ®); @@ -257,8 +254,7 @@ void pcibios_set_master(struct pci_dev *dev) lat = pcibios_max_latency; else return; - printk(KERN_DEBUG "PCI: Setting latency timer of device %s to %d\n", - pci_name(dev), lat); + dev_printk(KERN_DEBUG, &dev->dev, "setting latency timer to %d\n", lat); pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat); } diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 6a06a2eb0597..fec0123b33a9 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -436,7 +436,7 @@ static int pirq_vlsi_get(struct pci_dev *router, struct pci_dev *dev, int pirq) { WARN_ON_ONCE(pirq >= 9); if (pirq > 8) { - printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq); + dev_info(&dev->dev, "VLSI router PIRQ escape (%d)\n", pirq); return 0; } return read_config_nybble(router, 0x74, pirq-1); @@ -446,7 +446,7 @@ static int pirq_vlsi_set(struct pci_dev *router, struct pci_dev *dev, int pirq, { WARN_ON_ONCE(pirq >= 9); if (pirq > 8) { - printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq); + dev_info(&dev->dev, "VLSI router PIRQ escape (%d)\n", pirq); return 0; } write_config_nybble(router, 0x74, pirq-1, irq); @@ -492,15 +492,17 @@ static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq irq = 0; if (pirq <= 4) irq = read_config_nybble(router, 0x56, pirq - 1); - printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d get irq : %2d\n", - dev->vendor, dev->device, pirq, irq); + dev_info(&dev->dev, + "AMD756: dev [%04x/%04x], router PIRQ %d get IRQ %d\n", + dev->vendor, dev->device, pirq, irq); return irq; } static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) { - printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d SET irq : %2d\n", - dev->vendor, dev->device, pirq, irq); + dev_info(&dev->dev, + "AMD756: dev [%04x/%04x], router PIRQ %d set IRQ %d\n", + dev->vendor, dev->device, pirq, irq); if (pirq <= 4) write_config_nybble(router, 0x56, pirq - 1, irq); return 1; @@ -730,7 +732,6 @@ static __init int ali_router_probe(struct irq_router *r, struct pci_dev *router, switch (device) { case PCI_DEVICE_ID_AL_M1533: case PCI_DEVICE_ID_AL_M1563: - printk(KERN_DEBUG "PCI: Using ALI IRQ Router\n"); r->name = "ALI"; r->get = pirq_ali_get; r->set = pirq_ali_set; @@ -840,11 +841,9 @@ static void __init pirq_find_router(struct irq_router *r) h->probe(r, pirq_router_dev, pirq_router_dev->device)) break; } - printk(KERN_INFO "PCI: Using IRQ router %s [%04x/%04x] at %s\n", - pirq_router.name, - pirq_router_dev->vendor, - pirq_router_dev->device, - pci_name(pirq_router_dev)); + dev_info(&pirq_router_dev->dev, "%s IRQ router [%04x/%04x]\n", + pirq_router.name, + pirq_router_dev->vendor, pirq_router_dev->device); /* The device remains referenced for the kernel lifetime */ } @@ -877,7 +876,7 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) /* Find IRQ pin */ pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); if (!pin) { - DBG(KERN_DEBUG " -> no interrupt pin\n"); + dev_dbg(&dev->dev, "no interrupt pin\n"); return 0; } pin = pin - 1; @@ -887,20 +886,20 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) if (!pirq_table) return 0; - DBG(KERN_DEBUG "IRQ for %s[%c]", pci_name(dev), 'A' + pin); info = pirq_get_info(dev); if (!info) { - DBG(" -> not found in routing table\n" KERN_DEBUG); + dev_dbg(&dev->dev, "PCI INT %c not found in routing table\n", + 'A' + pin); return 0; } pirq = info->irq[pin].link; mask = info->irq[pin].bitmap; if (!pirq) { - DBG(" -> not routed\n" KERN_DEBUG); + dev_dbg(&dev->dev, "PCI INT %c not routed\n", 'A' + pin); return 0; } - DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask, - pirq_table->exclusive_irqs); + dev_dbg(&dev->dev, "PCI INT %c -> PIRQ %02x, mask %04x, excl %04x", + 'A' + pin, pirq, mask, pirq_table->exclusive_irqs); mask &= pcibios_irq_mask; /* Work around broken HP Pavilion Notebooks which assign USB to @@ -930,10 +929,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) if (pci_probe & PCI_USE_PIRQ_MASK) newirq = 0; else - printk("\n" KERN_WARNING - "PCI: IRQ %i for device %s doesn't match PIRQ mask - try pci=usepirqmask\n" - KERN_DEBUG, newirq, - pci_name(dev)); + dev_warn(&dev->dev, "IRQ %d doesn't match PIRQ mask " + "%#x; try pci=usepirqmask\n", newirq, mask); } if (!newirq && assign) { for (i = 0; i < 16; i++) { @@ -944,39 +941,35 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) newirq = i; } } - DBG(" -> newirq=%d", newirq); + dev_dbg(&dev->dev, "PCI INT %c -> newirq %d", 'A' + pin, newirq); /* Check if it is hardcoded */ if ((pirq & 0xf0) == 0xf0) { irq = pirq & 0xf; - DBG(" -> hardcoded IRQ %d\n", irq); - msg = "Hardcoded"; + msg = "hardcoded"; } else if (r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \ ((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask))) { - DBG(" -> got IRQ %d\n", irq); - msg = "Found"; + msg = "found"; eisa_set_level_irq(irq); } else if (newirq && r->set && (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) { - DBG(" -> assigning IRQ %d", newirq); if (r->set(pirq_router_dev, dev, pirq, newirq)) { eisa_set_level_irq(newirq); - DBG(" ... OK\n"); - msg = "Assigned"; + msg = "assigned"; irq = newirq; } } if (!irq) { - DBG(" ... failed\n"); if (newirq && mask == (1 << newirq)) { - msg = "Guessed"; + msg = "guessed"; irq = newirq; - } else + } else { + dev_dbg(&dev->dev, "can't route interrupt\n"); return 0; + } } - printk(KERN_INFO "PCI: %s IRQ %d for device %s\n", msg, irq, - pci_name(dev)); + dev_info(&dev->dev, "%s PCI INT %c -> IRQ %d\n", msg, 'A' + pin, irq); /* Update IRQ for all devices with the same pirq value */ while ((dev2 = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev2)) != NULL) { @@ -996,17 +989,17 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) (!(pci_probe & PCI_USE_PIRQ_MASK) || \ ((1 << dev2->irq) & mask))) { #ifndef CONFIG_PCI_MSI - printk(KERN_INFO "IRQ routing conflict for %s, have irq %d, want irq %d\n", - pci_name(dev2), dev2->irq, irq); + dev_info(&dev2->dev, "IRQ routing conflict: " + "have IRQ %d, want IRQ %d\n", + dev2->irq, irq); #endif continue; } dev2->irq = irq; pirq_penalty[irq]++; if (dev != dev2) - printk(KERN_INFO - "PCI: Sharing IRQ %d with %s\n", - irq, pci_name(dev2)); + dev_info(&dev->dev, "sharing IRQ %d with %s\n", + irq, pci_name(dev2)); } } return 1; @@ -1025,8 +1018,7 @@ static void __init pcibios_fixup_irqs(void) * already in use. */ if (dev->irq >= 16) { - DBG(KERN_DEBUG "%s: ignoring bogus IRQ %d\n", - pci_name(dev), dev->irq); + dev_dbg(&dev->dev, "ignoring bogus IRQ %d\n", dev->irq); dev->irq = 0; } /* @@ -1070,12 +1062,12 @@ static void __init pcibios_fixup_irqs(void) irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, PCI_SLOT(bridge->devfn), pin); if (irq >= 0) - printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n", - pci_name(bridge), 'A' + pin, irq); + dev_warn(&dev->dev, "using bridge %s INT %c to get IRQ %d\n", + pci_name(bridge), + 'A' + pin, irq); } if (irq >= 0) { - printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n", - pci_name(dev), 'A' + pin, irq); + dev_info(&dev->dev, "PCI->APIC IRQ transform: INT %c -> IRQ %d\n", 'A' + pin, irq); dev->irq = irq; } } @@ -1231,25 +1223,24 @@ static int pirq_enable_irq(struct pci_dev *dev) irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, PCI_SLOT(bridge->devfn), pin); if (irq >= 0) - printk(KERN_WARNING - "PCI: using PPB %s[%c] to get irq %d\n", - pci_name(bridge), - 'A' + pin, irq); + dev_warn(&dev->dev, "using bridge %s " + "INT %c to get IRQ %d\n", + pci_name(bridge), 'A' + pin, + irq); dev = bridge; } dev = temp_dev; if (irq >= 0) { - printk(KERN_INFO - "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n", - pci_name(dev), 'A' + pin, irq); + dev_info(&dev->dev, "PCI->APIC IRQ transform: " + "INT %c -> IRQ %d\n", 'A' + pin, irq); dev->irq = irq; return 0; } else - msg = " Probably buggy MP table."; + msg = "; probably buggy MP table"; } else if (pci_probe & PCI_BIOS_IRQ_SCAN) msg = ""; else - msg = " Please try using pci=biosirq."; + msg = "; please try using pci=biosirq"; /* * With IDE legacy devices the IRQ lookup failure is not @@ -1259,9 +1250,8 @@ static int pirq_enable_irq(struct pci_dev *dev) !(dev->class & 0x5)) return 0; - printk(KERN_WARNING - "PCI: No IRQ known for interrupt pin %c of device %s.%s\n", - 'A' + pin, pci_name(dev), msg); + dev_warn(&dev->dev, "can't find IRQ for PCI INT %c%s\n", + 'A' + pin, msg); } return 0; } diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c index f4b16dc11dad..1177845d3186 100644 --- a/arch/x86/pci/numaq_32.c +++ b/arch/x86/pci/numaq_32.c @@ -131,13 +131,14 @@ static void __devinit pci_fixup_i450nx(struct pci_dev *d) u8 busno, suba, subb; int quad = BUS2QUAD(d->bus->number); - printk("PCI: Searching for i450NX host bridges on %s\n", pci_name(d)); + dev_info(&d->dev, "searching for i450NX host bridges\n"); reg = 0xd0; for(pxb=0; pxb<2; pxb++) { pci_read_config_byte(d, reg++, &busno); pci_read_config_byte(d, reg++, &suba); pci_read_config_byte(d, reg++, &subb); - DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb); + dev_dbg(&d->dev, "i450NX PXB %d: %02x/%02x/%02x\n", + pxb, busno, suba, subb); if (busno) { /* Bus A */ pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, busno)); diff --git a/drivers/acpi/pci_slot.c b/drivers/acpi/pci_slot.c index dd376f7ad090..d5b4ef898879 100644 --- a/drivers/acpi/pci_slot.c +++ b/drivers/acpi/pci_slot.c @@ -76,9 +76,9 @@ static struct acpi_pci_driver acpi_pci_slot_driver = { }; static int -check_slot(acpi_handle handle, int *device, unsigned long *sun) +check_slot(acpi_handle handle, unsigned long *sun) { - int retval = 0; + int device = -1; unsigned long adr, sta; acpi_status status; struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; @@ -89,32 +89,27 @@ check_slot(acpi_handle handle, int *device, unsigned long *sun) if (check_sta_before_sun) { /* If SxFy doesn't have _STA, we just assume it's there */ status = acpi_evaluate_integer(handle, "_STA", NULL, &sta); - if (ACPI_SUCCESS(status) && !(sta & ACPI_STA_DEVICE_PRESENT)) { - retval = -1; + if (ACPI_SUCCESS(status) && !(sta & ACPI_STA_DEVICE_PRESENT)) goto out; - } } status = acpi_evaluate_integer(handle, "_ADR", NULL, &adr); if (ACPI_FAILURE(status)) { dbg("_ADR returned %d on %s\n", status, (char *)buffer.pointer); - retval = -1; goto out; } - *device = (adr >> 16) & 0xffff; - /* No _SUN == not a slot == bail */ status = acpi_evaluate_integer(handle, "_SUN", NULL, sun); if (ACPI_FAILURE(status)) { dbg("_SUN returned %d on %s\n", status, (char *)buffer.pointer); - retval = -1; goto out; } + device = (adr >> 16) & 0xffff; out: kfree(buffer.pointer); - return retval; + return device; } struct callback_args { @@ -144,7 +139,8 @@ register_slot(acpi_handle handle, u32 lvl, void *context, void **rv) struct callback_args *parent_context = context; struct pci_bus *pci_bus = parent_context->pci_bus; - if (check_slot(handle, &device, &sun)) + device = check_slot(handle, &sun); + if (device < 0) return AE_OK; slot = kmalloc(sizeof(*slot), GFP_KERNEL); diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c index 0454be4266c1..9c9c126ed334 100644 --- a/drivers/mfd/mfd-core.c +++ b/drivers/mfd/mfd-core.c @@ -15,24 +15,24 @@ #include <linux/platform_device.h> #include <linux/mfd/core.h> -static int mfd_add_device(struct platform_device *parent, - const struct mfd_cell *cell, - struct resource *mem_base, - int irq_base) +static int mfd_add_device(struct device *parent, int id, + const struct mfd_cell *cell, + struct resource *mem_base, + int irq_base) { struct resource res[cell->num_resources]; struct platform_device *pdev; int ret = -ENOMEM; int r; - pdev = platform_device_alloc(cell->name, parent->id); + pdev = platform_device_alloc(cell->name, id); if (!pdev) goto fail_alloc; - pdev->dev.parent = &parent->dev; + pdev->dev.parent = parent; ret = platform_device_add_data(pdev, - cell, sizeof(struct mfd_cell)); + cell->platform_data, cell->data_size); if (ret) goto fail_device; @@ -75,17 +75,16 @@ fail_alloc: return ret; } -int mfd_add_devices( - struct platform_device *parent, - const struct mfd_cell *cells, int n_devs, - struct resource *mem_base, - int irq_base) +int mfd_add_devices(struct device *parent, int id, + const struct mfd_cell *cells, int n_devs, + struct resource *mem_base, + int irq_base) { int i; int ret = 0; for (i = 0; i < n_devs; i++) { - ret = mfd_add_device(parent, cells + i, mem_base, irq_base); + ret = mfd_add_device(parent, id, cells + i, mem_base, irq_base); if (ret) break; } @@ -99,14 +98,13 @@ EXPORT_SYMBOL(mfd_add_devices); static int mfd_remove_devices_fn(struct device *dev, void *unused) { - platform_device_unregister( - container_of(dev, struct platform_device, dev)); + platform_device_unregister(to_platform_device(dev)); return 0; } -void mfd_remove_devices(struct platform_device *parent) +void mfd_remove_devices(struct device *parent) { - device_for_each_child(&parent->dev, NULL, mfd_remove_devices_fn); + device_for_each_child(parent, NULL, mfd_remove_devices_fn); } EXPORT_SYMBOL(mfd_remove_devices); diff --git a/drivers/mfd/tc6393xb.c b/drivers/mfd/tc6393xb.c index 94e55e8e7ce6..f4fd797c1590 100644 --- a/drivers/mfd/tc6393xb.c +++ b/drivers/mfd/tc6393xb.c @@ -466,8 +466,12 @@ static int __devinit tc6393xb_probe(struct platform_device *dev) tc6393xb_attach_irq(dev); tc6393xb_cells[TC6393XB_CELL_NAND].driver_data = tcpd->nand_data; + tc6393xb_cells[TC6393XB_CELL_NAND].platform_data = + &tc6393xb_cells[TC6393XB_CELL_NAND]; + tc6393xb_cells[TC6393XB_CELL_NAND].data_size = + sizeof(tc6393xb_cells[TC6393XB_CELL_NAND]); - retval = mfd_add_devices(dev, + retval = mfd_add_devices(&dev->dev, dev->id, tc6393xb_cells, ARRAY_SIZE(tc6393xb_cells), iomem, tcpd->irq_base); @@ -501,7 +505,7 @@ static int __devexit tc6393xb_remove(struct platform_device *dev) struct tc6393xb *tc6393xb = platform_get_drvdata(dev); int ret; - mfd_remove_devices(dev); + mfd_remove_devices(&dev->dev); if (tc6393xb->irq) tc6393xb_detach_irq(dev); diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 1323a43285d7..ad27e9e225a6 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -1103,7 +1103,7 @@ static inline void dbg_ctrl(struct controller *ctrl) dbg(" Power Indicator : %3s\n", PWR_LED(ctrl) ? "yes" : "no"); dbg(" Hot-Plug Surprise : %3s\n", HP_SUPR_RM(ctrl) ? "yes" : "no"); dbg(" EMI Present : %3s\n", EMI(ctrl) ? "yes" : "no"); - dbg(" Comamnd Completed : %3s\n", NO_CMD_CMPL(ctrl)? "no" : "yes"); + dbg(" Command Completed : %3s\n", NO_CMD_CMPL(ctrl)? "no" : "yes"); pciehp_readw(ctrl, SLOTSTATUS, ®16); dbg("Slot Status : 0x%04x\n", reg16); pciehp_readw(ctrl, SLOTCTRL, ®16); diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 15af618d36e2..18354817173c 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -126,7 +126,16 @@ static void msix_flush_writes(unsigned int irq) } } -static void msi_set_mask_bits(unsigned int irq, u32 mask, u32 flag) +/* + * PCI 2.3 does not specify mask bits for each MSI interrupt. Attempting to + * mask all MSI interrupts by clearing the MSI enable bit does not work + * reliably as devices without an INTx disable bit will then generate a + * level IRQ which will never be cleared. + * + * Returns 1 if it succeeded in masking the interrupt and 0 if the device + * doesn't support MSI masking. + */ +static int msi_set_mask_bits(unsigned int irq, u32 mask, u32 flag) { struct msi_desc *entry; @@ -144,8 +153,7 @@ static void msi_set_mask_bits(unsigned int irq, u32 mask, u32 flag) mask_bits |= flag & mask; pci_write_config_dword(entry->dev, pos, mask_bits); } else { - __msi_set_enable(entry->dev, entry->msi_attrib.pos, - !flag); + return 0; } break; case PCI_CAP_ID_MSIX: @@ -161,6 +169,7 @@ static void msi_set_mask_bits(unsigned int irq, u32 mask, u32 flag) break; } entry->msi_attrib.masked = !!flag; + return 1; } void read_msi_msg(unsigned int irq, struct msi_msg *msg) diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 7764768b6a0e..89a2f0fa10f9 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -11,6 +11,7 @@ #include <linux/init.h> #include <linux/pci.h> #include <linux/module.h> +#include <linux/pci-aspm.h> #include <acpi/acpi.h> #include <acpi/acnamesp.h> #include <acpi/acresrc.h> @@ -372,6 +373,12 @@ static int __init acpi_pci_init(void) printk(KERN_INFO"ACPI FADT declares the system doesn't support MSI, so disable it\n"); pci_no_msi(); } + + if (acpi_gbl_FADT.boot_flags & BAF_PCIE_ASPM_CONTROL) { + printk(KERN_INFO"ACPI FADT declares the system doesn't support PCIe ASPM, so disable it\n"); + pcie_no_aspm(); + } + ret = register_acpi_bus_type(&acpi_pci_bus); if (ret) return 0; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e9c356236d27..0a3d856833fc 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -572,6 +572,10 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state) if (!ret) pci_update_current_state(dev); } + /* This device is quirked not to be put into D3, so + don't put it in D3 */ + if (state == PCI_D3hot && (dev->dev_flags & PCI_DEV_FLAGS_NO_D3)) + return 0; error = pci_raw_set_power_state(dev, state); @@ -1123,6 +1127,12 @@ int pci_enable_wake(struct pci_dev *dev, pci_power_t state, int enable) } /** + * pci_target_state - find an appropriate low power state for a given PCI dev + * @dev: PCI device + * + * Use underlying platform code to find a supported low power state for @dev. + * If the platform can't manage @dev, return the deepest state from which it + * can generate wake events, based on any available PME info. */ pci_power_t pci_target_state(struct pci_dev *dev) { diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index f82495583e63..9a7c9e1408a4 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -55,7 +55,7 @@ struct pcie_link_state { struct endpoint_state endpoints[8]; }; -static int aspm_disabled; +static int aspm_disabled, aspm_force; static DEFINE_MUTEX(aspm_lock); static LIST_HEAD(link_list); @@ -510,6 +510,7 @@ static int pcie_aspm_sanity_check(struct pci_dev *pdev) { struct pci_dev *child_dev; int child_pos; + u32 reg32; /* * Some functions in a slot might not all be PCIE functions, very @@ -519,6 +520,19 @@ static int pcie_aspm_sanity_check(struct pci_dev *pdev) child_pos = pci_find_capability(child_dev, PCI_CAP_ID_EXP); if (!child_pos) return -EINVAL; + + /* + * Disable ASPM for pre-1.1 PCIe device, we follow MS to use + * RBER bit to determine if a function is 1.1 version device + */ + pci_read_config_dword(child_dev, child_pos + PCI_EXP_DEVCAP, + ®32); + if (!(reg32 & PCI_EXP_DEVCAP_RBER && !aspm_force)) { + printk("Pre-1.1 PCIe device detected, " + "disable ASPM for %s. It can be enabled forcedly" + " with 'pcie_aspm=force'\n", pci_name(pdev)); + return -EINVAL; + } } return 0; } @@ -802,11 +816,23 @@ void pcie_aspm_remove_sysfs_dev_files(struct pci_dev *pdev) static int __init pcie_aspm_disable(char *str) { - aspm_disabled = 1; + if (!strcmp(str, "off")) { + aspm_disabled = 1; + printk(KERN_INFO "PCIe ASPM is disabled\n"); + } else if (!strcmp(str, "force")) { + aspm_force = 1; + printk(KERN_INFO "PCIe ASPM is forcedly enabled\n"); + } return 1; } -__setup("pcie_noaspm", pcie_aspm_disable); +__setup("pcie_aspm=", pcie_aspm_disable); + +void pcie_no_aspm(void) +{ + if (!aspm_force) + aspm_disabled = 1; +} #ifdef CONFIG_ACPI #include <acpi/acpi_bus.h> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index b1724cf31b66..7098dfb07449 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -163,12 +163,9 @@ static inline unsigned int pci_calc_resource_flags(unsigned int flags) return IORESOURCE_MEM; } -/* - * Find the extent of a PCI decode.. - */ -static u32 pci_size(u32 base, u32 maxbase, u32 mask) +static u64 pci_size(u64 base, u64 maxbase, u64 mask) { - u32 size = mask & maxbase; /* Find the significant bits */ + u64 size = mask & maxbase; /* Find the significant bits */ if (!size) return 0; @@ -184,135 +181,142 @@ static u32 pci_size(u32 base, u32 maxbase, u32 mask) return size; } -static u64 pci_size64(u64 base, u64 maxbase, u64 mask) -{ - u64 size = mask & maxbase; /* Find the significant bits */ - if (!size) - return 0; +enum pci_bar_type { + pci_bar_unknown, /* Standard PCI BAR probe */ + pci_bar_io, /* An io port BAR */ + pci_bar_mem32, /* A 32-bit memory BAR */ + pci_bar_mem64, /* A 64-bit memory BAR */ +}; - /* Get the lowest of them to find the decode size, and - from that the extent. */ - size = (size & ~(size-1)) - 1; +static inline enum pci_bar_type decode_bar(struct resource *res, u32 bar) +{ + if ((bar & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO) { + res->flags = bar & ~PCI_BASE_ADDRESS_IO_MASK; + return pci_bar_io; + } - /* base == maxbase can be valid only if the BAR has - already been programmed with all 1s. */ - if (base == maxbase && ((base | size) & mask) != mask) - return 0; + res->flags = bar & ~PCI_BASE_ADDRESS_MEM_MASK; - return size; + if (res->flags == PCI_BASE_ADDRESS_MEM_TYPE_64) + return pci_bar_mem64; + return pci_bar_mem32; } -static inline int is_64bit_memory(u32 mask) +/* + * If the type is not unknown, we assume that the lowest bit is 'enable'. + * Returns 1 if the BAR was 64-bit and 0 if it was 32-bit. + */ +static int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, + struct resource *res, unsigned int pos) { - if ((mask & (PCI_BASE_ADDRESS_SPACE|PCI_BASE_ADDRESS_MEM_TYPE_MASK)) == - (PCI_BASE_ADDRESS_SPACE_MEMORY|PCI_BASE_ADDRESS_MEM_TYPE_64)) - return 1; - return 0; -} + u32 l, sz, mask; -static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom) -{ - unsigned int pos, reg, next; - u32 l, sz; - struct resource *res; + mask = type ? ~PCI_ROM_ADDRESS_ENABLE : ~0; - for(pos=0; pos<howmany; pos = next) { - u64 l64; - u64 sz64; - u32 raw_sz; + res->name = pci_name(dev); - next = pos+1; - res = &dev->resource[pos]; - res->name = pci_name(dev); - reg = PCI_BASE_ADDRESS_0 + (pos << 2); - pci_read_config_dword(dev, reg, &l); - pci_write_config_dword(dev, reg, ~0); - pci_read_config_dword(dev, reg, &sz); - pci_write_config_dword(dev, reg, l); - if (!sz || sz == 0xffffffff) - continue; - if (l == 0xffffffff) - l = 0; - raw_sz = sz; - if ((l & PCI_BASE_ADDRESS_SPACE) == - PCI_BASE_ADDRESS_SPACE_MEMORY) { - sz = pci_size(l, sz, (u32)PCI_BASE_ADDRESS_MEM_MASK); - /* - * For 64bit prefetchable memory sz could be 0, if the - * real size is bigger than 4G, so we need to check - * szhi for that. - */ - if (!is_64bit_memory(l) && !sz) - continue; - res->start = l & PCI_BASE_ADDRESS_MEM_MASK; - res->flags |= l & ~PCI_BASE_ADDRESS_MEM_MASK; + pci_read_config_dword(dev, pos, &l); + pci_write_config_dword(dev, pos, mask); + pci_read_config_dword(dev, pos, &sz); + pci_write_config_dword(dev, pos, l); + + /* + * All bits set in sz means the device isn't working properly. + * If the BAR isn't implemented, all bits must be 0. If it's a + * memory BAR or a ROM, bit 0 must be clear; if it's an io BAR, bit + * 1 must be clear. + */ + if (!sz || sz == 0xffffffff) + goto fail; + + /* + * I don't know how l can have all bits set. Copied from old code. + * Maybe it fixes a bug on some ancient platform. + */ + if (l == 0xffffffff) + l = 0; + + if (type == pci_bar_unknown) { + type = decode_bar(res, l); + res->flags |= pci_calc_resource_flags(l) | IORESOURCE_SIZEALIGN; + if (type == pci_bar_io) { + l &= PCI_BASE_ADDRESS_IO_MASK; + mask = PCI_BASE_ADDRESS_IO_MASK & 0xffff; } else { - sz = pci_size(l, sz, PCI_BASE_ADDRESS_IO_MASK & 0xffff); - if (!sz) - continue; - res->start = l & PCI_BASE_ADDRESS_IO_MASK; - res->flags |= l & ~PCI_BASE_ADDRESS_IO_MASK; + l &= PCI_BASE_ADDRESS_MEM_MASK; + mask = (u32)PCI_BASE_ADDRESS_MEM_MASK; } - res->end = res->start + (unsigned long) sz; - res->flags |= pci_calc_resource_flags(l) | IORESOURCE_SIZEALIGN; - if (is_64bit_memory(l)) { - u32 szhi, lhi; - - pci_read_config_dword(dev, reg+4, &lhi); - pci_write_config_dword(dev, reg+4, ~0); - pci_read_config_dword(dev, reg+4, &szhi); - pci_write_config_dword(dev, reg+4, lhi); - sz64 = ((u64)szhi << 32) | raw_sz; - l64 = ((u64)lhi << 32) | l; - sz64 = pci_size64(l64, sz64, PCI_BASE_ADDRESS_MEM_MASK); - next++; -#if BITS_PER_LONG == 64 - if (!sz64) { - res->start = 0; - res->end = 0; - res->flags = 0; - continue; - } - res->start = l64 & PCI_BASE_ADDRESS_MEM_MASK; - res->end = res->start + sz64; -#else - if (sz64 > 0x100000000ULL) { - dev_err(&dev->dev, "BAR %d: can't handle 64-bit" - " BAR\n", pos); - res->start = 0; - res->flags = 0; - } else if (lhi) { - /* 64-bit wide address, treat as disabled */ - pci_write_config_dword(dev, reg, - l & ~(u32)PCI_BASE_ADDRESS_MEM_MASK); - pci_write_config_dword(dev, reg+4, 0); - res->start = 0; - res->end = sz; - } -#endif + } else { + res->flags |= (l & IORESOURCE_ROM_ENABLE); + l &= PCI_ROM_ADDRESS_MASK; + mask = (u32)PCI_ROM_ADDRESS_MASK; + } + + if (type == pci_bar_mem64) { + u64 l64 = l; + u64 sz64 = sz; + u64 mask64 = mask | (u64)~0 << 32; + + pci_read_config_dword(dev, pos + 4, &l); + pci_write_config_dword(dev, pos + 4, ~0); + pci_read_config_dword(dev, pos + 4, &sz); + pci_write_config_dword(dev, pos + 4, l); + + l64 |= ((u64)l << 32); + sz64 |= ((u64)sz << 32); + + sz64 = pci_size(l64, sz64, mask64); + + if (!sz64) + goto fail; + + if ((sizeof(resource_size_t) < 8) && (sz64 > 0x100000000ULL)) { + dev_err(&dev->dev, "can't handle 64-bit BAR\n"); + goto fail; + } else if ((sizeof(resource_size_t) < 8) && l) { + /* Address above 32-bit boundary; disable the BAR */ + pci_write_config_dword(dev, pos, 0); + pci_write_config_dword(dev, pos + 4, 0); + res->start = 0; + res->end = sz64; + } else { + res->start = l64; + res->end = l64 + sz64; } + } else { + sz = pci_size(l, sz, mask); + + if (!sz) + goto fail; + + res->start = l; + res->end = l + sz; } + + out: + return (type == pci_bar_mem64) ? 1 : 0; + fail: + res->flags = 0; + goto out; +} + +static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom) +{ + unsigned int pos, reg; + + for (pos = 0; pos < howmany; pos++) { + struct resource *res = &dev->resource[pos]; + reg = PCI_BASE_ADDRESS_0 + (pos << 2); + pos += __pci_read_base(dev, pci_bar_unknown, res, reg); + } + if (rom) { + struct resource *res = &dev->resource[PCI_ROM_RESOURCE]; dev->rom_base_reg = rom; - res = &dev->resource[PCI_ROM_RESOURCE]; - res->name = pci_name(dev); - pci_read_config_dword(dev, rom, &l); - pci_write_config_dword(dev, rom, ~PCI_ROM_ADDRESS_ENABLE); - pci_read_config_dword(dev, rom, &sz); - pci_write_config_dword(dev, rom, l); - if (l == 0xffffffff) - l = 0; - if (sz && sz != 0xffffffff) { - sz = pci_size(l, sz, (u32)PCI_ROM_ADDRESS_MASK); - if (sz) { - res->flags = (l & IORESOURCE_ROM_ENABLE) | - IORESOURCE_MEM | IORESOURCE_PREFETCH | - IORESOURCE_READONLY | IORESOURCE_CACHEABLE | - IORESOURCE_SIZEALIGN; - res->start = l & PCI_ROM_ADDRESS_MASK; - res->end = res->start + (unsigned long) sz; - } - } + res->flags = IORESOURCE_MEM | IORESOURCE_PREFETCH | + IORESOURCE_READONLY | IORESOURCE_CACHEABLE | + IORESOURCE_SIZEALIGN; + __pci_read_base(dev, pci_bar_mem32, res, rom); } } @@ -1053,7 +1057,8 @@ int pci_scan_slot(struct pci_bus *bus, int devfn) } } - if (bus->self) + /* only one slot has pcie device */ + if (bus->self && nr) pcie_aspm_init_link_state(bus->self); return nr; diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 12d489395fad..0fb365074288 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -923,6 +923,19 @@ static void __init quirk_ide_samemode(struct pci_dev *pdev) } DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_10, quirk_ide_samemode); +/* + * Some ATA devices break if put into D3 + */ + +static void __devinit quirk_no_ata_d3(struct pci_dev *pdev) +{ + /* Quirk the legacy ATA devices only. The AHCI ones are ok */ + if ((pdev->class >> 8) == PCI_CLASS_STORAGE_IDE) + pdev->dev_flags |= PCI_DEV_FLAGS_NO_D3; +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, PCI_ANY_ID, quirk_no_ata_d3); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_ATI, PCI_ANY_ID, quirk_no_ata_d3); + /* This was originally an Alpha specific thing, but it really fits here. * The i82375 PCI/EISA bridge appears as non-classified. Fix that. */ diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index 7a4409ab30ea..a319a20ed440 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -8,6 +8,7 @@ #include <linux/kthread.h> #include <linux/vmalloc.h> +#include <linux/delay.h> static int qla24xx_vport_disable(struct fc_vport *, bool); diff --git a/drivers/spi/mpc52xx_psc_spi.c b/drivers/spi/mpc52xx_psc_spi.c index 604e5f0a2d95..25eda71f4bf4 100644 --- a/drivers/spi/mpc52xx_psc_spi.c +++ b/drivers/spi/mpc52xx_psc_spi.c @@ -148,7 +148,6 @@ static int mpc52xx_psc_spi_transfer_rxtx(struct spi_device *spi, unsigned rfalarm; unsigned send_at_once = MPC52xx_PSC_BUFSIZE; unsigned recv_at_once; - unsigned bpw = mps->bits_per_word / 8; if (!t->tx_buf && !t->rx_buf && t->len) return -EINVAL; @@ -164,22 +163,15 @@ static int mpc52xx_psc_spi_transfer_rxtx(struct spi_device *spi, } dev_dbg(&spi->dev, "send %d bytes...\n", send_at_once); - if (tx_buf) { - for (; send_at_once; sb++, send_at_once--) { - /* set EOF flag */ - if (mps->bits_per_word - && (sb + 1) % bpw == 0) - out_8(&psc->ircr2, 0x01); + for (; send_at_once; sb++, send_at_once--) { + /* set EOF flag before the last word is sent */ + if (send_at_once == 1) + out_8(&psc->ircr2, 0x01); + + if (tx_buf) out_8(&psc->mpc52xx_psc_buffer_8, tx_buf[sb]); - } - } else { - for (; send_at_once; sb++, send_at_once--) { - /* set EOF flag */ - if (mps->bits_per_word - && ((sb + 1) % bpw) == 0) - out_8(&psc->ircr2, 0x01); + else out_8(&psc->mpc52xx_psc_buffer_8, 0); - } } diff --git a/drivers/spi/spi_s3c24xx.c b/drivers/spi/spi_s3c24xx.c index 0885cc357a37..1c643c9e1f15 100644 --- a/drivers/spi/spi_s3c24xx.c +++ b/drivers/spi/spi_s3c24xx.c @@ -270,6 +270,7 @@ static int __init s3c24xx_spi_probe(struct platform_device *pdev) /* setup the master state. */ master->num_chipselect = hw->pdata->num_cs; + master->bus_num = pdata->bus_num; /* setup the state for the bitbang driver */ diff --git a/drivers/video/sh7760fb.c b/drivers/video/sh7760fb.c index 4d0e28c5790b..8d0212da4514 100644 --- a/drivers/video/sh7760fb.c +++ b/drivers/video/sh7760fb.c @@ -152,6 +152,7 @@ static int sh7760fb_setcmap(struct fb_cmap *cmap, struct fb_info *info) col |= ((*g) & 0xff) << 8; col |= ((*b) & 0xff); col &= SH7760FB_PALETTE_MASK; + iowrite32(col, par->base + LDPR(s)); if (s < 16) ((u32 *) (info->pseudo_palette))[s] = s; diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 63e2ee63058d..c3e174b35fe6 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -705,7 +705,6 @@ void __init bio_integrity_init_slab(void) bio_integrity_slab = KMEM_CACHE(bio_integrity_payload, SLAB_HWCACHE_ALIGN|SLAB_PANIC); } -EXPORT_SYMBOL(bio_integrity_init_slab); static int __init integrity_init(void) { diff --git a/fs/buffer.c b/fs/buffer.c index f95805019639..ca12a6bb82b1 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2096,6 +2096,52 @@ int generic_write_end(struct file *file, struct address_space *mapping, EXPORT_SYMBOL(generic_write_end); /* + * block_is_partially_uptodate checks whether buffers within a page are + * uptodate or not. + * + * Returns true if all buffers which correspond to a file portion + * we want to read are uptodate. + */ +int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, + unsigned long from) +{ + struct inode *inode = page->mapping->host; + unsigned block_start, block_end, blocksize; + unsigned to; + struct buffer_head *bh, *head; + int ret = 1; + + if (!page_has_buffers(page)) + return 0; + + blocksize = 1 << inode->i_blkbits; + to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count); + to = from + to; + if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize) + return 0; + + head = page_buffers(page); + bh = head; + block_start = 0; + do { + block_end = block_start + blocksize; + if (block_end > from && block_start < to) { + if (!buffer_uptodate(bh)) { + ret = 0; + break; + } + if (block_end >= to) + break; + } + block_start = block_end; + bh = bh->b_this_page; + } while (bh != head); + + return ret; +} +EXPORT_SYMBOL(block_is_partially_uptodate); + +/* * Generic "read page" function for block devices that have the normal * get_block functionality. This is most of the block device filesystems. * Reads the page asynchronously --- the unlock_buffer() and diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 7b99917ffadc..06db79d05c12 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -475,8 +475,8 @@ int ecryptfs_encrypt_page(struct page *page) { struct inode *ecryptfs_inode; struct ecryptfs_crypt_stat *crypt_stat; - char *enc_extent_virt = NULL; - struct page *enc_extent_page; + char *enc_extent_virt; + struct page *enc_extent_page = NULL; loff_t extent_offset; int rc = 0; @@ -492,14 +492,14 @@ int ecryptfs_encrypt_page(struct page *page) page->index); goto out; } - enc_extent_virt = kmalloc(PAGE_CACHE_SIZE, GFP_USER); - if (!enc_extent_virt) { + enc_extent_page = alloc_page(GFP_USER); + if (!enc_extent_page) { rc = -ENOMEM; ecryptfs_printk(KERN_ERR, "Error allocating memory for " "encrypted extent\n"); goto out; } - enc_extent_page = virt_to_page(enc_extent_virt); + enc_extent_virt = kmap(enc_extent_page); for (extent_offset = 0; extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size); extent_offset++) { @@ -527,7 +527,10 @@ int ecryptfs_encrypt_page(struct page *page) } } out: - kfree(enc_extent_virt); + if (enc_extent_page) { + kunmap(enc_extent_page); + __free_page(enc_extent_page); + } return rc; } @@ -609,8 +612,8 @@ int ecryptfs_decrypt_page(struct page *page) { struct inode *ecryptfs_inode; struct ecryptfs_crypt_stat *crypt_stat; - char *enc_extent_virt = NULL; - struct page *enc_extent_page; + char *enc_extent_virt; + struct page *enc_extent_page = NULL; unsigned long extent_offset; int rc = 0; @@ -627,14 +630,14 @@ int ecryptfs_decrypt_page(struct page *page) page->index); goto out; } - enc_extent_virt = kmalloc(PAGE_CACHE_SIZE, GFP_USER); - if (!enc_extent_virt) { + enc_extent_page = alloc_page(GFP_USER); + if (!enc_extent_page) { rc = -ENOMEM; ecryptfs_printk(KERN_ERR, "Error allocating memory for " "encrypted extent\n"); goto out; } - enc_extent_page = virt_to_page(enc_extent_virt); + enc_extent_virt = kmap(enc_extent_page); for (extent_offset = 0; extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size); extent_offset++) { @@ -662,7 +665,10 @@ int ecryptfs_decrypt_page(struct page *page) } } out: - kfree(enc_extent_virt); + if (enc_extent_page) { + kunmap(enc_extent_page); + __free_page(enc_extent_page); + } return rc; } diff --git a/fs/exec.c b/fs/exec.c index 9696bbf0f0b1..32993beecbe9 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -32,6 +32,7 @@ #include <linux/swap.h> #include <linux/string.h> #include <linux/init.h> +#include <linux/pagemap.h> #include <linux/highmem.h> #include <linux/spinlock.h> #include <linux/key.h> diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 384fc0d1dd74..991d6dfeb51f 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -791,6 +791,7 @@ const struct address_space_operations ext2_aops = { .direct_IO = ext2_direct_IO, .writepages = ext2_writepages, .migratepage = buffer_migrate_page, + .is_partially_uptodate = block_is_partially_uptodate, }; const struct address_space_operations ext2_aops_xip = { diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 3bf07d70b914..507d8689b111 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1767,44 +1767,47 @@ static int ext3_journalled_set_page_dirty(struct page *page) } static const struct address_space_operations ext3_ordered_aops = { - .readpage = ext3_readpage, - .readpages = ext3_readpages, - .writepage = ext3_ordered_writepage, - .sync_page = block_sync_page, - .write_begin = ext3_write_begin, - .write_end = ext3_ordered_write_end, - .bmap = ext3_bmap, - .invalidatepage = ext3_invalidatepage, - .releasepage = ext3_releasepage, - .direct_IO = ext3_direct_IO, - .migratepage = buffer_migrate_page, + .readpage = ext3_readpage, + .readpages = ext3_readpages, + .writepage = ext3_ordered_writepage, + .sync_page = block_sync_page, + .write_begin = ext3_write_begin, + .write_end = ext3_ordered_write_end, + .bmap = ext3_bmap, + .invalidatepage = ext3_invalidatepage, + .releasepage = ext3_releasepage, + .direct_IO = ext3_direct_IO, + .migratepage = buffer_migrate_page, + .is_partially_uptodate = block_is_partially_uptodate, }; static const struct address_space_operations ext3_writeback_aops = { - .readpage = ext3_readpage, - .readpages = ext3_readpages, - .writepage = ext3_writeback_writepage, - .sync_page = block_sync_page, - .write_begin = ext3_write_begin, - .write_end = ext3_writeback_write_end, - .bmap = ext3_bmap, - .invalidatepage = ext3_invalidatepage, - .releasepage = ext3_releasepage, - .direct_IO = ext3_direct_IO, - .migratepage = buffer_migrate_page, + .readpage = ext3_readpage, + .readpages = ext3_readpages, + .writepage = ext3_writeback_writepage, + .sync_page = block_sync_page, + .write_begin = ext3_write_begin, + .write_end = ext3_writeback_write_end, + .bmap = ext3_bmap, + .invalidatepage = ext3_invalidatepage, + .releasepage = ext3_releasepage, + .direct_IO = ext3_direct_IO, + .migratepage = buffer_migrate_page, + .is_partially_uptodate = block_is_partially_uptodate, }; static const struct address_space_operations ext3_journalled_aops = { - .readpage = ext3_readpage, - .readpages = ext3_readpages, - .writepage = ext3_journalled_writepage, - .sync_page = block_sync_page, - .write_begin = ext3_write_begin, - .write_end = ext3_journalled_write_end, - .set_page_dirty = ext3_journalled_set_page_dirty, - .bmap = ext3_bmap, - .invalidatepage = ext3_invalidatepage, - .releasepage = ext3_releasepage, + .readpage = ext3_readpage, + .readpages = ext3_readpages, + .writepage = ext3_journalled_writepage, + .sync_page = block_sync_page, + .write_begin = ext3_write_begin, + .write_end = ext3_journalled_write_end, + .set_page_dirty = ext3_journalled_set_page_dirty, + .bmap = ext3_bmap, + .invalidatepage = ext3_invalidatepage, + .releasepage = ext3_releasepage, + .is_partially_uptodate = block_is_partially_uptodate, }; void ext3_set_aops(struct inode *inode) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 8ca2763df091..9843b046c235 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2806,59 +2806,63 @@ static int ext4_journalled_set_page_dirty(struct page *page) } static const struct address_space_operations ext4_ordered_aops = { - .readpage = ext4_readpage, - .readpages = ext4_readpages, - .writepage = ext4_normal_writepage, - .sync_page = block_sync_page, - .write_begin = ext4_write_begin, - .write_end = ext4_ordered_write_end, - .bmap = ext4_bmap, - .invalidatepage = ext4_invalidatepage, - .releasepage = ext4_releasepage, - .direct_IO = ext4_direct_IO, - .migratepage = buffer_migrate_page, + .readpage = ext4_readpage, + .readpages = ext4_readpages, + .writepage = ext4_normal_writepage, + .sync_page = block_sync_page, + .write_begin = ext4_write_begin, + .write_end = ext4_ordered_write_end, + .bmap = ext4_bmap, + .invalidatepage = ext4_invalidatepage, + .releasepage = ext4_releasepage, + .direct_IO = ext4_direct_IO, + .migratepage = buffer_migrate_page, + .is_partially_uptodate = block_is_partially_uptodate, }; static const struct address_space_operations ext4_writeback_aops = { - .readpage = ext4_readpage, - .readpages = ext4_readpages, - .writepage = ext4_normal_writepage, - .sync_page = block_sync_page, - .write_begin = ext4_write_begin, - .write_end = ext4_writeback_write_end, - .bmap = ext4_bmap, - .invalidatepage = ext4_invalidatepage, - .releasepage = ext4_releasepage, - .direct_IO = ext4_direct_IO, - .migratepage = buffer_migrate_page, + .readpage = ext4_readpage, + .readpages = ext4_readpages, + .writepage = ext4_normal_writepage, + .sync_page = block_sync_page, + .write_begin = ext4_write_begin, + .write_end = ext4_writeback_write_end, + .bmap = ext4_bmap, + .invalidatepage = ext4_invalidatepage, + .releasepage = ext4_releasepage, + .direct_IO = ext4_direct_IO, + .migratepage = buffer_migrate_page, + .is_partially_uptodate = block_is_partially_uptodate, }; static const struct address_space_operations ext4_journalled_aops = { - .readpage = ext4_readpage, - .readpages = ext4_readpages, - .writepage = ext4_journalled_writepage, - .sync_page = block_sync_page, - .write_begin = ext4_write_begin, - .write_end = ext4_journalled_write_end, - .set_page_dirty = ext4_journalled_set_page_dirty, - .bmap = ext4_bmap, - .invalidatepage = ext4_invalidatepage, - .releasepage = ext4_releasepage, + .readpage = ext4_readpage, + .readpages = ext4_readpages, + .writepage = ext4_journalled_writepage, + .sync_page = block_sync_page, + .write_begin = ext4_write_begin, + .write_end = ext4_journalled_write_end, + .set_page_dirty = ext4_journalled_set_page_dirty, + .bmap = ext4_bmap, + .invalidatepage = ext4_invalidatepage, + .releasepage = ext4_releasepage, + .is_partially_uptodate = block_is_partially_uptodate, }; static const struct address_space_operations ext4_da_aops = { - .readpage = ext4_readpage, - .readpages = ext4_readpages, - .writepage = ext4_da_writepage, - .writepages = ext4_da_writepages, - .sync_page = block_sync_page, - .write_begin = ext4_da_write_begin, - .write_end = ext4_da_write_end, - .bmap = ext4_bmap, - .invalidatepage = ext4_da_invalidatepage, - .releasepage = ext4_releasepage, - .direct_IO = ext4_direct_IO, - .migratepage = buffer_migrate_page, + .readpage = ext4_readpage, + .readpages = ext4_readpages, + .writepage = ext4_da_writepage, + .writepages = ext4_da_writepages, + .sync_page = block_sync_page, + .write_begin = ext4_da_write_begin, + .write_end = ext4_da_write_end, + .bmap = ext4_bmap, + .invalidatepage = ext4_da_invalidatepage, + .releasepage = ext4_releasepage, + .direct_IO = ext4_direct_IO, + .migratepage = buffer_migrate_page, + .is_partially_uptodate = block_is_partially_uptodate, }; void ext4_set_aops(struct inode *inode) diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h index 1ebbe883f786..13a3d9ad92db 100644 --- a/include/acpi/actbl.h +++ b/include/acpi/actbl.h @@ -277,6 +277,7 @@ enum acpi_prefered_pm_profiles { #define BAF_LEGACY_DEVICES 0x0001 #define BAF_8042_KEYBOARD_CONTROLLER 0x0002 #define BAF_MSI_NOT_SUPPORTED 0x0008 +#define BAF_PCIE_ASPM_CONTROL 0x0010 #define FADT2_REVISION_ID 3 #define FADT2_MINUS_REVISION_ID 2 diff --git a/include/asm-arm/arch-s3c2410/spi.h b/include/asm-arm/arch-s3c2410/spi.h index 352d33860b63..442169887d3b 100644 --- a/include/asm-arm/arch-s3c2410/spi.h +++ b/include/asm-arm/arch-s3c2410/spi.h @@ -16,6 +16,7 @@ struct s3c2410_spi_info { unsigned long pin_cs; /* simple gpio cs */ unsigned int num_cs; /* total chipselects */ + int bus_num; /* bus number to use. */ void (*set_cs)(struct s3c2410_spi_info *spi, int cs, int pol); }; diff --git a/include/asm-arm/dma-mapping.h b/include/asm-arm/dma-mapping.h index f41335ba6337..45329fca1b64 100644 --- a/include/asm-arm/dma-mapping.h +++ b/include/asm-arm/dma-mapping.h @@ -7,6 +7,8 @@ #include <linux/scatterlist.h> +#include <asm-generic/dma-coherent.h> + /* * DMA-consistent mapping functions. These allocate/free a region of * uncached, unwrite-buffered mapped memory space for use with DMA diff --git a/include/asm-cris/dma-mapping.h b/include/asm-cris/dma-mapping.h index cb2fb25ff8d9..da8ef8e8f842 100644 --- a/include/asm-cris/dma-mapping.h +++ b/include/asm-cris/dma-mapping.h @@ -14,6 +14,8 @@ #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) #ifdef CONFIG_PCI +#include <asm-generic/dma-coherent.h> + void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag); diff --git a/include/asm-generic/dma-coherent.h b/include/asm-generic/dma-coherent.h new file mode 100644 index 000000000000..85a3ffaa0242 --- /dev/null +++ b/include/asm-generic/dma-coherent.h @@ -0,0 +1,32 @@ +#ifndef DMA_COHERENT_H +#define DMA_COHERENT_H + +#ifdef CONFIG_HAVE_GENERIC_DMA_COHERENT +/* + * These two functions are only for dma allocator. + * Don't use them in device drivers. + */ +int dma_alloc_from_coherent(struct device *dev, ssize_t size, + dma_addr_t *dma_handle, void **ret); +int dma_release_from_coherent(struct device *dev, int order, void *vaddr); + +/* + * Standard interface + */ +#define ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY +extern int +dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, + dma_addr_t device_addr, size_t size, int flags); + +extern void +dma_release_declared_memory(struct device *dev); + +extern void * +dma_mark_declared_memory_occupied(struct device *dev, + dma_addr_t device_addr, size_t size); +#else +#define dma_alloc_from_coherent(dev, size, handle, ret) (0) +#define dma_release_from_coherent(dev, order, vaddr) (0) +#endif + +#endif diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h index c764a8fcb058..0f99ad38b012 100644 --- a/include/asm-generic/gpio.h +++ b/include/asm-generic/gpio.h @@ -2,6 +2,7 @@ #define _ASM_GENERIC_GPIO_H #include <linux/types.h> +#include <linux/errno.h> #ifdef CONFIG_GPIOLIB diff --git a/include/asm-generic/pgtable-nopmd.h b/include/asm-generic/pgtable-nopmd.h index 087325ede76c..a7cdc48e8b78 100644 --- a/include/asm-generic/pgtable-nopmd.h +++ b/include/asm-generic/pgtable-nopmd.h @@ -5,6 +5,8 @@ #include <asm-generic/pgtable-nopud.h> +struct mm_struct; + #define __PAGETABLE_PMD_FOLDED /* @@ -54,7 +56,9 @@ static inline pmd_t * pmd_offset(pud_t * pud, unsigned long address) * inside the pud, so has no extra memory associated with it. */ #define pmd_alloc_one(mm, address) NULL -#define pmd_free(mm, x) do { } while (0) +static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) +{ +} #define __pmd_free_tlb(tlb, x) do { } while (0) #undef pmd_addr_end diff --git a/include/asm-sh/dma-mapping.h b/include/asm-sh/dma-mapping.h index 6c0b8a2de143..627315ecdb52 100644 --- a/include/asm-sh/dma-mapping.h +++ b/include/asm-sh/dma-mapping.h @@ -5,6 +5,7 @@ #include <linux/scatterlist.h> #include <asm/cacheflush.h> #include <asm/io.h> +#include <asm-generic/dma-coherent.h> extern struct bus_type pci_bus_type; diff --git a/include/asm-x86/dma-mapping.h b/include/asm-x86/dma-mapping.h index 0eaa9bf6011f..ad9cd6d49bfc 100644 --- a/include/asm-x86/dma-mapping.h +++ b/include/asm-x86/dma-mapping.h @@ -249,25 +249,5 @@ static inline int dma_get_cache_alignment(void) #define dma_is_consistent(d, h) (1) -#ifdef CONFIG_X86_32 -# define ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY -struct dma_coherent_mem { - void *virt_base; - u32 device_base; - int size; - int flags; - unsigned long *bitmap; -}; - -extern int -dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, - dma_addr_t device_addr, size_t size, int flags); - -extern void -dma_release_declared_memory(struct device *dev); - -extern void * -dma_mark_declared_memory_occupied(struct device *dev, - dma_addr_t device_addr, size_t size); -#endif /* CONFIG_X86_32 */ +#include <asm-generic/dma-coherent.h> #endif diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 82aa36c53ea7..50cfe8ceb478 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -205,6 +205,8 @@ void block_invalidatepage(struct page *page, unsigned long offset); int block_write_full_page(struct page *page, get_block_t *get_block, struct writeback_control *wbc); int block_read_full_page(struct page*, get_block_t*); +int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, + unsigned long from); int block_write_begin(struct file *, struct address_space *, loff_t, unsigned, unsigned, struct page **, void **, get_block_t*); diff --git a/include/linux/fs.h b/include/linux/fs.h index 8252b045e624..580b513668fe 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -443,6 +443,27 @@ static inline size_t iov_iter_count(struct iov_iter *i) return i->count; } +/* + * "descriptor" for what we're up to with a read. + * This allows us to use the same read code yet + * have multiple different users of the data that + * we read from a file. + * + * The simplest case just copies the data to user + * mode. + */ +typedef struct { + size_t written; + size_t count; + union { + char __user *buf; + void *data; + } arg; + int error; +} read_descriptor_t; + +typedef int (*read_actor_t)(read_descriptor_t *, struct page *, + unsigned long, unsigned long); struct address_space_operations { int (*writepage)(struct page *page, struct writeback_control *wbc); @@ -484,6 +505,8 @@ struct address_space_operations { int (*migratepage) (struct address_space *, struct page *, struct page *); int (*launder_page) (struct page *); + int (*is_partially_uptodate) (struct page *, read_descriptor_t *, + unsigned long); }; /* @@ -1198,27 +1221,6 @@ struct block_device_operations { struct module *owner; }; -/* - * "descriptor" for what we're up to with a read. - * This allows us to use the same read code yet - * have multiple different users of the data that - * we read from a file. - * - * The simplest case just copies the data to user - * mode. - */ -typedef struct { - size_t written; - size_t count; - union { - char __user * buf; - void *data; - } arg; - int error; -} read_descriptor_t; - -typedef int (*read_actor_t)(read_descriptor_t *, struct page *, unsigned long, unsigned long); - /* These macros are for out of kernel modules to test that * the kernel supports the unlocked_ioctl and compat_ioctl * fields in struct file_operations. */ diff --git a/include/linux/iommu-helper.h b/include/linux/iommu-helper.h index c975caf75385..f8598f583944 100644 --- a/include/linux/iommu-helper.h +++ b/include/linux/iommu-helper.h @@ -8,3 +8,4 @@ extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size, unsigned long align_mask); extern void iommu_area_free(unsigned long *map, unsigned long start, unsigned int nr); +extern unsigned long iommu_num_pages(unsigned long addr, unsigned long len); diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h index bb3dd0545928..49ef857cdb2d 100644 --- a/include/linux/mfd/core.h +++ b/include/linux/mfd/core.h @@ -1,5 +1,3 @@ -#ifndef MFD_CORE_H -#define MFD_CORE_H /* * drivers/mfd/mfd-core.h * @@ -13,6 +11,9 @@ * */ +#ifndef MFD_CORE_H +#define MFD_CORE_H + #include <linux/platform_device.h> /* @@ -28,7 +29,13 @@ struct mfd_cell { int (*suspend)(struct platform_device *dev); int (*resume)(struct platform_device *dev); - void *driver_data; /* driver-specific data */ + /* driver-specific data for MFD-aware "cell" drivers */ + void *driver_data; + + /* platform_data can be used to either pass data to "generic" + driver or as a hook to mfd_cell for the "cell" drivers */ + void *platform_data; + size_t data_size; /* * This resources can be specified relatievly to the parent device. @@ -38,18 +45,11 @@ struct mfd_cell { const struct resource *resources; }; -static inline struct mfd_cell * -mfd_get_cell(struct platform_device *pdev) -{ - return (struct mfd_cell *)pdev->dev.platform_data; -} - -extern int mfd_add_devices( - struct platform_device *parent, - const struct mfd_cell *cells, int n_devs, - struct resource *mem_base, - int irq_base); +extern int mfd_add_devices(struct device *parent, int id, + const struct mfd_cell *cells, int n_devs, + struct resource *mem_base, + int irq_base); -extern void mfd_remove_devices(struct platform_device *parent); +extern void mfd_remove_devices(struct device *parent); #endif diff --git a/include/linux/mm.h b/include/linux/mm.h index 6e695eaab4ce..866a3dbe5c75 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1104,6 +1104,9 @@ extern struct vm_area_struct *copy_vma(struct vm_area_struct **, unsigned long addr, unsigned long len, pgoff_t pgoff); extern void exit_mmap(struct mm_struct *); +extern int mm_take_all_locks(struct mm_struct *mm); +extern void mm_drop_all_locks(struct mm_struct *mm); + #ifdef CONFIG_PROC_FS /* From fs/proc/base.c. callers must _not_ hold the mm's exe_file_lock */ extern void added_exe_file_vma(struct mm_struct *mm); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 746f975b58ef..386edbe2cb4e 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -10,6 +10,7 @@ #include <linux/rbtree.h> #include <linux/rwsem.h> #include <linux/completion.h> +#include <linux/cpumask.h> #include <asm/page.h> #include <asm/mmu.h> @@ -253,6 +254,9 @@ struct mm_struct { struct file *exe_file; unsigned long num_exe_file_vmas; #endif +#ifdef CONFIG_MMU_NOTIFIER + struct mmu_notifier_mm *mmu_notifier_mm; +#endif }; #endif /* _LINUX_MM_TYPES_H */ diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h new file mode 100644 index 000000000000..b77486d152cd --- /dev/null +++ b/include/linux/mmu_notifier.h @@ -0,0 +1,279 @@ +#ifndef _LINUX_MMU_NOTIFIER_H +#define _LINUX_MMU_NOTIFIER_H + +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/mm_types.h> + +struct mmu_notifier; +struct mmu_notifier_ops; + +#ifdef CONFIG_MMU_NOTIFIER + +/* + * The mmu notifier_mm structure is allocated and installed in + * mm->mmu_notifier_mm inside the mm_take_all_locks() protected + * critical section and it's released only when mm_count reaches zero + * in mmdrop(). + */ +struct mmu_notifier_mm { + /* all mmu notifiers registerd in this mm are queued in this list */ + struct hlist_head list; + /* to serialize the list modifications and hlist_unhashed */ + spinlock_t lock; +}; + +struct mmu_notifier_ops { + /* + * Called either by mmu_notifier_unregister or when the mm is + * being destroyed by exit_mmap, always before all pages are + * freed. This can run concurrently with other mmu notifier + * methods (the ones invoked outside the mm context) and it + * should tear down all secondary mmu mappings and freeze the + * secondary mmu. If this method isn't implemented you've to + * be sure that nothing could possibly write to the pages + * through the secondary mmu by the time the last thread with + * tsk->mm == mm exits. + * + * As side note: the pages freed after ->release returns could + * be immediately reallocated by the gart at an alias physical + * address with a different cache model, so if ->release isn't + * implemented because all _software_ driven memory accesses + * through the secondary mmu are terminated by the time the + * last thread of this mm quits, you've also to be sure that + * speculative _hardware_ operations can't allocate dirty + * cachelines in the cpu that could not be snooped and made + * coherent with the other read and write operations happening + * through the gart alias address, so leading to memory + * corruption. + */ + void (*release)(struct mmu_notifier *mn, + struct mm_struct *mm); + + /* + * clear_flush_young is called after the VM is + * test-and-clearing the young/accessed bitflag in the + * pte. This way the VM will provide proper aging to the + * accesses to the page through the secondary MMUs and not + * only to the ones through the Linux pte. + */ + int (*clear_flush_young)(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long address); + + /* + * Before this is invoked any secondary MMU is still ok to + * read/write to the page previously pointed to by the Linux + * pte because the page hasn't been freed yet and it won't be + * freed until this returns. If required set_page_dirty has to + * be called internally to this method. + */ + void (*invalidate_page)(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long address); + + /* + * invalidate_range_start() and invalidate_range_end() must be + * paired and are called only when the mmap_sem and/or the + * locks protecting the reverse maps are held. The subsystem + * must guarantee that no additional references are taken to + * the pages in the range established between the call to + * invalidate_range_start() and the matching call to + * invalidate_range_end(). + * + * Invalidation of multiple concurrent ranges may be + * optionally permitted by the driver. Either way the + * establishment of sptes is forbidden in the range passed to + * invalidate_range_begin/end for the whole duration of the + * invalidate_range_begin/end critical section. + * + * invalidate_range_start() is called when all pages in the + * range are still mapped and have at least a refcount of one. + * + * invalidate_range_end() is called when all pages in the + * range have been unmapped and the pages have been freed by + * the VM. + * + * The VM will remove the page table entries and potentially + * the page between invalidate_range_start() and + * invalidate_range_end(). If the page must not be freed + * because of pending I/O or other circumstances then the + * invalidate_range_start() callback (or the initial mapping + * by the driver) must make sure that the refcount is kept + * elevated. + * + * If the driver increases the refcount when the pages are + * initially mapped into an address space then either + * invalidate_range_start() or invalidate_range_end() may + * decrease the refcount. If the refcount is decreased on + * invalidate_range_start() then the VM can free pages as page + * table entries are removed. If the refcount is only + * droppped on invalidate_range_end() then the driver itself + * will drop the last refcount but it must take care to flush + * any secondary tlb before doing the final free on the + * page. Pages will no longer be referenced by the linux + * address space but may still be referenced by sptes until + * the last refcount is dropped. + */ + void (*invalidate_range_start)(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, unsigned long end); + void (*invalidate_range_end)(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, unsigned long end); +}; + +/* + * The notifier chains are protected by mmap_sem and/or the reverse map + * semaphores. Notifier chains are only changed when all reverse maps and + * the mmap_sem locks are taken. + * + * Therefore notifier chains can only be traversed when either + * + * 1. mmap_sem is held. + * 2. One of the reverse map locks is held (i_mmap_lock or anon_vma->lock). + * 3. No other concurrent thread can access the list (release) + */ +struct mmu_notifier { + struct hlist_node hlist; + const struct mmu_notifier_ops *ops; +}; + +static inline int mm_has_notifiers(struct mm_struct *mm) +{ + return unlikely(mm->mmu_notifier_mm); +} + +extern int mmu_notifier_register(struct mmu_notifier *mn, + struct mm_struct *mm); +extern int __mmu_notifier_register(struct mmu_notifier *mn, + struct mm_struct *mm); +extern void mmu_notifier_unregister(struct mmu_notifier *mn, + struct mm_struct *mm); +extern void __mmu_notifier_mm_destroy(struct mm_struct *mm); +extern void __mmu_notifier_release(struct mm_struct *mm); +extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm, + unsigned long address); +extern void __mmu_notifier_invalidate_page(struct mm_struct *mm, + unsigned long address); +extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm, + unsigned long start, unsigned long end); +extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm, + unsigned long start, unsigned long end); + +static inline void mmu_notifier_release(struct mm_struct *mm) +{ + if (mm_has_notifiers(mm)) + __mmu_notifier_release(mm); +} + +static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, + unsigned long address) +{ + if (mm_has_notifiers(mm)) + return __mmu_notifier_clear_flush_young(mm, address); + return 0; +} + +static inline void mmu_notifier_invalidate_page(struct mm_struct *mm, + unsigned long address) +{ + if (mm_has_notifiers(mm)) + __mmu_notifier_invalidate_page(mm, address); +} + +static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + if (mm_has_notifiers(mm)) + __mmu_notifier_invalidate_range_start(mm, start, end); +} + +static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + if (mm_has_notifiers(mm)) + __mmu_notifier_invalidate_range_end(mm, start, end); +} + +static inline void mmu_notifier_mm_init(struct mm_struct *mm) +{ + mm->mmu_notifier_mm = NULL; +} + +static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) +{ + if (mm_has_notifiers(mm)) + __mmu_notifier_mm_destroy(mm); +} + +/* + * These two macros will sometime replace ptep_clear_flush. + * ptep_clear_flush is impleemnted as macro itself, so this also is + * implemented as a macro until ptep_clear_flush will converted to an + * inline function, to diminish the risk of compilation failure. The + * invalidate_page method over time can be moved outside the PT lock + * and these two macros can be later removed. + */ +#define ptep_clear_flush_notify(__vma, __address, __ptep) \ +({ \ + pte_t __pte; \ + struct vm_area_struct *___vma = __vma; \ + unsigned long ___address = __address; \ + __pte = ptep_clear_flush(___vma, ___address, __ptep); \ + mmu_notifier_invalidate_page(___vma->vm_mm, ___address); \ + __pte; \ +}) + +#define ptep_clear_flush_young_notify(__vma, __address, __ptep) \ +({ \ + int __young; \ + struct vm_area_struct *___vma = __vma; \ + unsigned long ___address = __address; \ + __young = ptep_clear_flush_young(___vma, ___address, __ptep); \ + __young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \ + ___address); \ + __young; \ +}) + +#else /* CONFIG_MMU_NOTIFIER */ + +static inline void mmu_notifier_release(struct mm_struct *mm) +{ +} + +static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm, + unsigned long address) +{ + return 0; +} + +static inline void mmu_notifier_invalidate_page(struct mm_struct *mm, + unsigned long address) +{ +} + +static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm, + unsigned long start, unsigned long end) +{ +} + +static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm, + unsigned long start, unsigned long end) +{ +} + +static inline void mmu_notifier_mm_init(struct mm_struct *mm) +{ +} + +static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) +{ +} + +#define ptep_clear_flush_young_notify ptep_clear_flush_young +#define ptep_clear_flush_notify ptep_clear_flush + +#endif /* CONFIG_MMU_NOTIFIER */ + +#endif /* _LINUX_MMU_NOTIFIER_H */ diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index a81d81890422..a39b38ccdc97 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -20,6 +20,7 @@ */ #define AS_EIO (__GFP_BITS_SHIFT + 0) /* IO error on async write */ #define AS_ENOSPC (__GFP_BITS_SHIFT + 1) /* ENOSPC on async write */ +#define AS_MM_ALL_LOCKS (__GFP_BITS_SHIFT + 2) /* under mm_take_all_locks() */ static inline void mapping_set_error(struct address_space *mapping, int error) { diff --git a/include/linux/pci-aspm.h b/include/linux/pci-aspm.h index a1a1e618e996..91ba0b338b47 100644 --- a/include/linux/pci-aspm.h +++ b/include/linux/pci-aspm.h @@ -27,6 +27,7 @@ extern void pcie_aspm_init_link_state(struct pci_dev *pdev); extern void pcie_aspm_exit_link_state(struct pci_dev *pdev); extern void pcie_aspm_pm_state_change(struct pci_dev *pdev); extern void pci_disable_link_state(struct pci_dev *pdev, int state); +extern void pcie_no_aspm(void); #else static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) { @@ -40,6 +41,10 @@ static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev) static inline void pci_disable_link_state(struct pci_dev *pdev, int state) { } + +static inline void pcie_no_aspm(void) +{ +} #endif #ifdef CONFIG_PCIEASPM_DEBUG /* this depends on CONFIG_PCIEASPM */ diff --git a/include/linux/pci.h b/include/linux/pci.h index 1d296d31abe0..825be3878f68 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -124,6 +124,8 @@ enum pci_dev_flags { * generation too. */ PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG = (__force pci_dev_flags_t) 1, + /* Device configuration is irrevocably lost if disabled into D3 */ + PCI_DEV_FLAGS_NO_D3 = (__force pci_dev_flags_t) 2, }; typedef unsigned short __bitwise pci_bus_flags_t; diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index 19958b929905..450684f7eaac 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -374,6 +374,7 @@ #define PCI_EXP_DEVCAP_ATN_BUT 0x1000 /* Attention Button Present */ #define PCI_EXP_DEVCAP_ATN_IND 0x2000 /* Attention Indicator Present */ #define PCI_EXP_DEVCAP_PWR_IND 0x4000 /* Power Indicator Present */ +#define PCI_EXP_DEVCAP_RBER 0x8000 /* Role-Based Error Reporting */ #define PCI_EXP_DEVCAP_PWR_VAL 0x3fc0000 /* Slot Power Limit Value */ #define PCI_EXP_DEVCAP_PWR_SCL 0xc000000 /* Slot Power Limit Scale */ #define PCI_EXP_DEVCTL 8 /* Device Control */ diff --git a/include/linux/rculist.h b/include/linux/rculist.h index b0f39be08b6c..eb4443c7e05b 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -98,6 +98,34 @@ static inline void list_del_rcu(struct list_head *entry) } /** + * hlist_del_init_rcu - deletes entry from hash list with re-initialization + * @n: the element to delete from the hash list. + * + * Note: list_unhashed() on the node return true after this. It is + * useful for RCU based read lockfree traversal if the writer side + * must know if the list entry is still hashed or already unhashed. + * + * In particular, it means that we can not poison the forward pointers + * that may still be used for walking the hash list and we can only + * zero the pprev pointer so list_unhashed() will return true after + * this. + * + * The caller must take whatever precautions are necessary (such as + * holding appropriate locks) to avoid racing with another + * list-mutation primitive, such as hlist_add_head_rcu() or + * hlist_del_rcu(), running on this same list. However, it is + * perfectly legal to run concurrently with the _rcu list-traversal + * primitives, such as hlist_for_each_entry_rcu(). + */ +static inline void hlist_del_init_rcu(struct hlist_node *n) +{ + if (!hlist_unhashed(n)) { + __hlist_del(n); + n->pprev = NULL; + } +} + +/** * list_replace_rcu - replace old entry by new one * @old : the element to be replaced * @new : the new element to insert diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 1383692ac5bd..69407f85e10b 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -26,6 +26,14 @@ */ struct anon_vma { spinlock_t lock; /* Serialize access to vma list */ + /* + * NOTE: the LSB of the head.next is set by + * mm_take_all_locks() _after_ taking the above lock. So the + * head must only be read/written after taking the above lock + * to be sure to see a valid next pointer. The LSB bit itself + * is serialized by a system wide lock only visible to + * mm_take_all_locks() (mm_all_locks_mutex). + */ struct list_head head; /* List of private "related" vmas */ }; diff --git a/init/Kconfig b/init/Kconfig index 43d6989c275f..250e02c8f8f9 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -802,6 +802,10 @@ config PROC_PAGE_MONITOR endmenu # General setup +config HAVE_GENERIC_DMA_COHERENT + bool + default n + config SLABINFO bool depends on PROC_FS diff --git a/kernel/Makefile b/kernel/Makefile index 54f69837d35a..4e1d7df7c3e2 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -84,6 +84,7 @@ obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o obj-$(CONFIG_MARKERS) += marker.o obj-$(CONFIG_LATENCYTOP) += latencytop.o +obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o obj-$(CONFIG_FTRACE) += trace/ obj-$(CONFIG_TRACING) += trace/ obj-$(CONFIG_SMP) += sched_cpupri.o diff --git a/kernel/dma-coherent.c b/kernel/dma-coherent.c new file mode 100644 index 000000000000..7517115a8cce --- /dev/null +++ b/kernel/dma-coherent.c @@ -0,0 +1,154 @@ +/* + * Coherent per-device memory handling. + * Borrowed from i386 + */ +#include <linux/kernel.h> +#include <linux/dma-mapping.h> + +struct dma_coherent_mem { + void *virt_base; + u32 device_base; + int size; + int flags; + unsigned long *bitmap; +}; + +int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, + dma_addr_t device_addr, size_t size, int flags) +{ + void __iomem *mem_base = NULL; + int pages = size >> PAGE_SHIFT; + int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long); + + if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0) + goto out; + if (!size) + goto out; + if (dev->dma_mem) + goto out; + + /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */ + + mem_base = ioremap(bus_addr, size); + if (!mem_base) + goto out; + + dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); + if (!dev->dma_mem) + goto out; + dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL); + if (!dev->dma_mem->bitmap) + goto free1_out; + + dev->dma_mem->virt_base = mem_base; + dev->dma_mem->device_base = device_addr; + dev->dma_mem->size = pages; + dev->dma_mem->flags = flags; + + if (flags & DMA_MEMORY_MAP) + return DMA_MEMORY_MAP; + + return DMA_MEMORY_IO; + + free1_out: + kfree(dev->dma_mem); + out: + if (mem_base) + iounmap(mem_base); + return 0; +} +EXPORT_SYMBOL(dma_declare_coherent_memory); + +void dma_release_declared_memory(struct device *dev) +{ + struct dma_coherent_mem *mem = dev->dma_mem; + + if (!mem) + return; + dev->dma_mem = NULL; + iounmap(mem->virt_base); + kfree(mem->bitmap); + kfree(mem); +} +EXPORT_SYMBOL(dma_release_declared_memory); + +void *dma_mark_declared_memory_occupied(struct device *dev, + dma_addr_t device_addr, size_t size) +{ + struct dma_coherent_mem *mem = dev->dma_mem; + int pos, err; + int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1); + + pages >>= PAGE_SHIFT; + + if (!mem) + return ERR_PTR(-EINVAL); + + pos = (device_addr - mem->device_base) >> PAGE_SHIFT; + err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages)); + if (err != 0) + return ERR_PTR(err); + return mem->virt_base + (pos << PAGE_SHIFT); +} +EXPORT_SYMBOL(dma_mark_declared_memory_occupied); + +/** + * Try to allocate memory from the per-device coherent area. + * + * @dev: device from which we allocate memory + * @size: size of requested memory area + * @dma_handle: This will be filled with the correct dma handle + * @ret: This pointer will be filled with the virtual address + * to allocated area. + * + * This function should be only called from per-arch %dma_alloc_coherent() + * to support allocation from per-device coherent memory pools. + * + * Returns 0 if dma_alloc_coherent should continue with allocating from + * generic memory areas, or !0 if dma_alloc_coherent should return %ret. + */ +int dma_alloc_from_coherent(struct device *dev, ssize_t size, + dma_addr_t *dma_handle, void **ret) +{ + struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; + int order = get_order(size); + + if (mem) { + int page = bitmap_find_free_region(mem->bitmap, mem->size, + order); + if (page >= 0) { + *dma_handle = mem->device_base + (page << PAGE_SHIFT); + *ret = mem->virt_base + (page << PAGE_SHIFT); + memset(*ret, 0, size); + } else if (mem->flags & DMA_MEMORY_EXCLUSIVE) + *ret = NULL; + } + return (mem != NULL); +} + +/** + * Try to free the memory allocated from per-device coherent memory pool. + * @dev: device from which the memory was allocated + * @order: the order of pages allocated + * @vaddr: virtual address of allocated pages + * + * This checks whether the memory was allocated from the per-device + * coherent memory pool and if so, releases that memory. + * + * Returns 1 if we correctly released the memory, or 0 if + * %dma_release_coherent() should proceed with releasing memory from + * generic pools. + */ +int dma_release_from_coherent(struct device *dev, int order, void *vaddr) +{ + struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; + + if (mem && vaddr >= mem->virt_base && vaddr < + (mem->virt_base + (mem->size << PAGE_SHIFT))) { + int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; + + bitmap_release_region(mem->bitmap, page, order); + return 1; + } + return 0; +} diff --git a/kernel/fork.c b/kernel/fork.c index 8214ba7c8bb1..7ce2ebe84796 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -27,6 +27,7 @@ #include <linux/key.h> #include <linux/binfmts.h> #include <linux/mman.h> +#include <linux/mmu_notifier.h> #include <linux/fs.h> #include <linux/nsproxy.h> #include <linux/capability.h> @@ -414,6 +415,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) if (likely(!mm_alloc_pgd(mm))) { mm->def_flags = 0; + mmu_notifier_mm_init(mm); return mm; } @@ -446,6 +448,7 @@ void __mmdrop(struct mm_struct *mm) BUG_ON(mm == &init_mm); mm_free_pgd(mm); destroy_context(mm); + mmu_notifier_mm_destroy(mm); free_mm(mm); } EXPORT_SYMBOL_GPL(__mmdrop); diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c index a3b8d4c3f77a..889ddce2021e 100644 --- a/lib/iommu-helper.c +++ b/lib/iommu-helper.c @@ -80,3 +80,11 @@ void iommu_area_free(unsigned long *map, unsigned long start, unsigned int nr) } } EXPORT_SYMBOL(iommu_area_free); + +unsigned long iommu_num_pages(unsigned long addr, unsigned long len) +{ + unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE); + + return size >> PAGE_SHIFT; +} +EXPORT_SYMBOL(iommu_num_pages); diff --git a/lib/ratelimit.c b/lib/ratelimit.c index 35136671b215..26187edcc7ea 100644 --- a/lib/ratelimit.c +++ b/lib/ratelimit.c @@ -15,7 +15,6 @@ #include <linux/module.h> static DEFINE_SPINLOCK(ratelimit_lock); -static unsigned long flags; /* * __ratelimit - rate limiting @@ -26,6 +25,8 @@ static unsigned long flags; */ int __ratelimit(struct ratelimit_state *rs) { + unsigned long flags; + if (!rs->interval) return 1; diff --git a/mm/Kconfig b/mm/Kconfig index efee5d379df4..446c6588c753 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -208,3 +208,6 @@ config NR_QUICK config VIRT_TO_BUS def_bool y depends on !ARCH_NO_VIRT_TO_BUS + +config MMU_NOTIFIER + bool diff --git a/mm/Makefile b/mm/Makefile index 06ca2381fef1..da4ccf015aea 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -25,6 +25,7 @@ obj-$(CONFIG_SHMEM) += shmem.o obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o obj-$(CONFIG_SLOB) += slob.o +obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o obj-$(CONFIG_SLAB) += slab.o obj-$(CONFIG_SLUB) += slub.o obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o diff --git a/mm/filemap.c b/mm/filemap.c index 5de7633e1dbe..42bbc6909ba4 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1023,8 +1023,17 @@ find_page: ra, filp, page, index, last_index - index); } - if (!PageUptodate(page)) - goto page_not_up_to_date; + if (!PageUptodate(page)) { + if (inode->i_blkbits == PAGE_CACHE_SHIFT || + !mapping->a_ops->is_partially_uptodate) + goto page_not_up_to_date; + if (TestSetPageLocked(page)) + goto page_not_up_to_date; + if (!mapping->a_ops->is_partially_uptodate(page, + desc, offset)) + goto page_not_up_to_date_locked; + unlock_page(page); + } page_ok: /* * i_size must be checked after we know the page is Uptodate. @@ -1094,6 +1103,7 @@ page_not_up_to_date: if (lock_page_killable(page)) goto readpage_eio; +page_not_up_to_date_locked: /* Did it get truncated before we got the lock? */ if (!page->mapping) { unlock_page(page); diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index 98a3f31ccd6a..380ab402d711 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c @@ -13,6 +13,7 @@ #include <linux/module.h> #include <linux/uio.h> #include <linux/rmap.h> +#include <linux/mmu_notifier.h> #include <linux/sched.h> #include <asm/tlbflush.h> #include <asm/io.h> @@ -188,7 +189,7 @@ __xip_unmap (struct address_space * mapping, if (pte) { /* Nuke the page table entry. */ flush_cache_page(vma, address, pte_pfn(*pte)); - pteval = ptep_clear_flush(vma, address, pte); + pteval = ptep_clear_flush_notify(vma, address, pte); page_remove_rmap(page, vma); dec_mm_counter(mm, file_rss); BUG_ON(pte_dirty(pteval)); diff --git a/mm/fremap.c b/mm/fremap.c index 07a9c82ce1a3..7881638e4a12 100644 --- a/mm/fremap.c +++ b/mm/fremap.c @@ -15,6 +15,7 @@ #include <linux/rmap.h> #include <linux/module.h> #include <linux/syscalls.h> +#include <linux/mmu_notifier.h> #include <asm/mmu_context.h> #include <asm/cacheflush.h> @@ -214,7 +215,9 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, spin_unlock(&mapping->i_mmap_lock); } + mmu_notifier_invalidate_range_start(mm, start, start + size); err = populate_range(mm, vma, start, size, pgoff); + mmu_notifier_invalidate_range_end(mm, start, start + size); if (!err && !(flags & MAP_NONBLOCK)) { if (unlikely(has_write_lock)) { downgrade_write(&mm->mmap_sem); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 3be79dc18c5c..254ce2b90158 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -9,6 +9,7 @@ #include <linux/mm.h> #include <linux/sysctl.h> #include <linux/highmem.h> +#include <linux/mmu_notifier.h> #include <linux/nodemask.h> #include <linux/pagemap.h> #include <linux/mempolicy.h> @@ -19,6 +20,7 @@ #include <asm/page.h> #include <asm/pgtable.h> +#include <asm/io.h> #include <linux/hugetlb.h> #include "internal.h" @@ -1672,6 +1674,7 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, BUG_ON(start & ~huge_page_mask(h)); BUG_ON(end & ~huge_page_mask(h)); + mmu_notifier_invalidate_range_start(mm, start, end); spin_lock(&mm->page_table_lock); for (address = start; address < end; address += sz) { ptep = huge_pte_offset(mm, address); @@ -1713,6 +1716,7 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, } spin_unlock(&mm->page_table_lock); flush_tlb_range(vma, start, end); + mmu_notifier_invalidate_range_end(mm, start, end); list_for_each_entry_safe(page, tmp, &page_list, lru) { list_del(&page->lru); put_page(page); diff --git a/mm/memory.c b/mm/memory.c index a8ca04faaea6..67f0ab9077d9 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -51,6 +51,7 @@ #include <linux/init.h> #include <linux/writeback.h> #include <linux/memcontrol.h> +#include <linux/mmu_notifier.h> #include <asm/pgalloc.h> #include <asm/uaccess.h> @@ -652,6 +653,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, unsigned long next; unsigned long addr = vma->vm_start; unsigned long end = vma->vm_end; + int ret; /* * Don't copy ptes where a page fault will fill them correctly. @@ -667,17 +669,33 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, if (is_vm_hugetlb_page(vma)) return copy_hugetlb_page_range(dst_mm, src_mm, vma); + /* + * We need to invalidate the secondary MMU mappings only when + * there could be a permission downgrade on the ptes of the + * parent mm. And a permission downgrade will only happen if + * is_cow_mapping() returns true. + */ + if (is_cow_mapping(vma->vm_flags)) + mmu_notifier_invalidate_range_start(src_mm, addr, end); + + ret = 0; dst_pgd = pgd_offset(dst_mm, addr); src_pgd = pgd_offset(src_mm, addr); do { next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(src_pgd)) continue; - if (copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd, - vma, addr, next)) - return -ENOMEM; + if (unlikely(copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd, + vma, addr, next))) { + ret = -ENOMEM; + break; + } } while (dst_pgd++, src_pgd++, addr = next, addr != end); - return 0; + + if (is_cow_mapping(vma->vm_flags)) + mmu_notifier_invalidate_range_end(src_mm, + vma->vm_start, end); + return ret; } static unsigned long zap_pte_range(struct mmu_gather *tlb, @@ -881,7 +899,9 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, unsigned long start = start_addr; spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL; int fullmm = (*tlbp)->fullmm; + struct mm_struct *mm = vma->vm_mm; + mmu_notifier_invalidate_range_start(mm, start_addr, end_addr); for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) { unsigned long end; @@ -946,6 +966,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, } } out: + mmu_notifier_invalidate_range_end(mm, start_addr, end_addr); return start; /* which is now the end (or restart) address */ } @@ -1616,10 +1637,11 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr, { pgd_t *pgd; unsigned long next; - unsigned long end = addr + size; + unsigned long start = addr, end = addr + size; int err; BUG_ON(addr >= end); + mmu_notifier_invalidate_range_start(mm, start, end); pgd = pgd_offset(mm, addr); do { next = pgd_addr_end(addr, end); @@ -1627,6 +1649,7 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr, if (err) break; } while (pgd++, addr = next, addr != end); + mmu_notifier_invalidate_range_end(mm, start, end); return err; } EXPORT_SYMBOL_GPL(apply_to_page_range); @@ -1839,7 +1862,7 @@ gotten: * seen in the presence of one thread doing SMC and another * thread doing COW. */ - ptep_clear_flush(vma, address, page_table); + ptep_clear_flush_notify(vma, address, page_table); set_pte_at(mm, address, page_table, entry); update_mmu_cache(vma, address, entry); lru_cache_add_active(new_page); diff --git a/mm/mmap.c b/mm/mmap.c index 5e0cc99e9cd5..245c3d69067b 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -26,6 +26,7 @@ #include <linux/mount.h> #include <linux/mempolicy.h> #include <linux/rmap.h> +#include <linux/mmu_notifier.h> #include <asm/uaccess.h> #include <asm/cacheflush.h> @@ -2061,6 +2062,7 @@ void exit_mmap(struct mm_struct *mm) /* mm's last user has gone, and its about to be pulled down */ arch_exit_mmap(mm); + mmu_notifier_release(mm); lru_add_drain(); flush_cache_mm(mm); @@ -2268,3 +2270,161 @@ int install_special_mapping(struct mm_struct *mm, return 0; } + +static DEFINE_MUTEX(mm_all_locks_mutex); + +static void vm_lock_anon_vma(struct anon_vma *anon_vma) +{ + if (!test_bit(0, (unsigned long *) &anon_vma->head.next)) { + /* + * The LSB of head.next can't change from under us + * because we hold the mm_all_locks_mutex. + */ + spin_lock(&anon_vma->lock); + /* + * We can safely modify head.next after taking the + * anon_vma->lock. If some other vma in this mm shares + * the same anon_vma we won't take it again. + * + * No need of atomic instructions here, head.next + * can't change from under us thanks to the + * anon_vma->lock. + */ + if (__test_and_set_bit(0, (unsigned long *) + &anon_vma->head.next)) + BUG(); + } +} + +static void vm_lock_mapping(struct address_space *mapping) +{ + if (!test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) { + /* + * AS_MM_ALL_LOCKS can't change from under us because + * we hold the mm_all_locks_mutex. + * + * Operations on ->flags have to be atomic because + * even if AS_MM_ALL_LOCKS is stable thanks to the + * mm_all_locks_mutex, there may be other cpus + * changing other bitflags in parallel to us. + */ + if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags)) + BUG(); + spin_lock(&mapping->i_mmap_lock); + } +} + +/* + * This operation locks against the VM for all pte/vma/mm related + * operations that could ever happen on a certain mm. This includes + * vmtruncate, try_to_unmap, and all page faults. + * + * The caller must take the mmap_sem in write mode before calling + * mm_take_all_locks(). The caller isn't allowed to release the + * mmap_sem until mm_drop_all_locks() returns. + * + * mmap_sem in write mode is required in order to block all operations + * that could modify pagetables and free pages without need of + * altering the vma layout (for example populate_range() with + * nonlinear vmas). It's also needed in write mode to avoid new + * anon_vmas to be associated with existing vmas. + * + * A single task can't take more than one mm_take_all_locks() in a row + * or it would deadlock. + * + * The LSB in anon_vma->head.next and the AS_MM_ALL_LOCKS bitflag in + * mapping->flags avoid to take the same lock twice, if more than one + * vma in this mm is backed by the same anon_vma or address_space. + * + * We can take all the locks in random order because the VM code + * taking i_mmap_lock or anon_vma->lock outside the mmap_sem never + * takes more than one of them in a row. Secondly we're protected + * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex. + * + * mm_take_all_locks() and mm_drop_all_locks are expensive operations + * that may have to take thousand of locks. + * + * mm_take_all_locks() can fail if it's interrupted by signals. + */ +int mm_take_all_locks(struct mm_struct *mm) +{ + struct vm_area_struct *vma; + int ret = -EINTR; + + BUG_ON(down_read_trylock(&mm->mmap_sem)); + + mutex_lock(&mm_all_locks_mutex); + + for (vma = mm->mmap; vma; vma = vma->vm_next) { + if (signal_pending(current)) + goto out_unlock; + if (vma->anon_vma) + vm_lock_anon_vma(vma->anon_vma); + if (vma->vm_file && vma->vm_file->f_mapping) + vm_lock_mapping(vma->vm_file->f_mapping); + } + ret = 0; + +out_unlock: + if (ret) + mm_drop_all_locks(mm); + + return ret; +} + +static void vm_unlock_anon_vma(struct anon_vma *anon_vma) +{ + if (test_bit(0, (unsigned long *) &anon_vma->head.next)) { + /* + * The LSB of head.next can't change to 0 from under + * us because we hold the mm_all_locks_mutex. + * + * We must however clear the bitflag before unlocking + * the vma so the users using the anon_vma->head will + * never see our bitflag. + * + * No need of atomic instructions here, head.next + * can't change from under us until we release the + * anon_vma->lock. + */ + if (!__test_and_clear_bit(0, (unsigned long *) + &anon_vma->head.next)) + BUG(); + spin_unlock(&anon_vma->lock); + } +} + +static void vm_unlock_mapping(struct address_space *mapping) +{ + if (test_bit(AS_MM_ALL_LOCKS, &mapping->flags)) { + /* + * AS_MM_ALL_LOCKS can't change to 0 from under us + * because we hold the mm_all_locks_mutex. + */ + spin_unlock(&mapping->i_mmap_lock); + if (!test_and_clear_bit(AS_MM_ALL_LOCKS, + &mapping->flags)) + BUG(); + } +} + +/* + * The mmap_sem cannot be released by the caller until + * mm_drop_all_locks() returns. + */ +void mm_drop_all_locks(struct mm_struct *mm) +{ + struct vm_area_struct *vma; + + BUG_ON(down_read_trylock(&mm->mmap_sem)); + BUG_ON(!mutex_is_locked(&mm_all_locks_mutex)); + + for (vma = mm->mmap; vma; vma = vma->vm_next) { + if (vma->anon_vma) + vm_unlock_anon_vma(vma->anon_vma); + if (vma->vm_file && vma->vm_file->f_mapping) + vm_unlock_mapping(vma->vm_file->f_mapping); + } + + mutex_unlock(&mm_all_locks_mutex); +} diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c new file mode 100644 index 000000000000..5f4ef0250bee --- /dev/null +++ b/mm/mmu_notifier.c @@ -0,0 +1,277 @@ +/* + * linux/mm/mmu_notifier.c + * + * Copyright (C) 2008 Qumranet, Inc. + * Copyright (C) 2008 SGI + * Christoph Lameter <clameter@sgi.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include <linux/rculist.h> +#include <linux/mmu_notifier.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/err.h> +#include <linux/rcupdate.h> +#include <linux/sched.h> + +/* + * This function can't run concurrently against mmu_notifier_register + * because mm->mm_users > 0 during mmu_notifier_register and exit_mmap + * runs with mm_users == 0. Other tasks may still invoke mmu notifiers + * in parallel despite there being no task using this mm any more, + * through the vmas outside of the exit_mmap context, such as with + * vmtruncate. This serializes against mmu_notifier_unregister with + * the mmu_notifier_mm->lock in addition to RCU and it serializes + * against the other mmu notifiers with RCU. struct mmu_notifier_mm + * can't go away from under us as exit_mmap holds an mm_count pin + * itself. + */ +void __mmu_notifier_release(struct mm_struct *mm) +{ + struct mmu_notifier *mn; + + spin_lock(&mm->mmu_notifier_mm->lock); + while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) { + mn = hlist_entry(mm->mmu_notifier_mm->list.first, + struct mmu_notifier, + hlist); + /* + * We arrived before mmu_notifier_unregister so + * mmu_notifier_unregister will do nothing other than + * to wait ->release to finish and + * mmu_notifier_unregister to return. + */ + hlist_del_init_rcu(&mn->hlist); + /* + * RCU here will block mmu_notifier_unregister until + * ->release returns. + */ + rcu_read_lock(); + spin_unlock(&mm->mmu_notifier_mm->lock); + /* + * if ->release runs before mmu_notifier_unregister it + * must be handled as it's the only way for the driver + * to flush all existing sptes and stop the driver + * from establishing any more sptes before all the + * pages in the mm are freed. + */ + if (mn->ops->release) + mn->ops->release(mn, mm); + rcu_read_unlock(); + spin_lock(&mm->mmu_notifier_mm->lock); + } + spin_unlock(&mm->mmu_notifier_mm->lock); + + /* + * synchronize_rcu here prevents mmu_notifier_release to + * return to exit_mmap (which would proceed freeing all pages + * in the mm) until the ->release method returns, if it was + * invoked by mmu_notifier_unregister. + * + * The mmu_notifier_mm can't go away from under us because one + * mm_count is hold by exit_mmap. + */ + synchronize_rcu(); +} + +/* + * If no young bitflag is supported by the hardware, ->clear_flush_young can + * unmap the address and return 1 or 0 depending if the mapping previously + * existed or not. + */ +int __mmu_notifier_clear_flush_young(struct mm_struct *mm, + unsigned long address) +{ + struct mmu_notifier *mn; + struct hlist_node *n; + int young = 0; + + rcu_read_lock(); + hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { + if (mn->ops->clear_flush_young) + young |= mn->ops->clear_flush_young(mn, mm, address); + } + rcu_read_unlock(); + + return young; +} + +void __mmu_notifier_invalidate_page(struct mm_struct *mm, + unsigned long address) +{ + struct mmu_notifier *mn; + struct hlist_node *n; + + rcu_read_lock(); + hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { + if (mn->ops->invalidate_page) + mn->ops->invalidate_page(mn, mm, address); + } + rcu_read_unlock(); +} + +void __mmu_notifier_invalidate_range_start(struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + struct mmu_notifier *mn; + struct hlist_node *n; + + rcu_read_lock(); + hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { + if (mn->ops->invalidate_range_start) + mn->ops->invalidate_range_start(mn, mm, start, end); + } + rcu_read_unlock(); +} + +void __mmu_notifier_invalidate_range_end(struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + struct mmu_notifier *mn; + struct hlist_node *n; + + rcu_read_lock(); + hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { + if (mn->ops->invalidate_range_end) + mn->ops->invalidate_range_end(mn, mm, start, end); + } + rcu_read_unlock(); +} + +static int do_mmu_notifier_register(struct mmu_notifier *mn, + struct mm_struct *mm, + int take_mmap_sem) +{ + struct mmu_notifier_mm *mmu_notifier_mm; + int ret; + + BUG_ON(atomic_read(&mm->mm_users) <= 0); + + ret = -ENOMEM; + mmu_notifier_mm = kmalloc(sizeof(struct mmu_notifier_mm), GFP_KERNEL); + if (unlikely(!mmu_notifier_mm)) + goto out; + + if (take_mmap_sem) + down_write(&mm->mmap_sem); + ret = mm_take_all_locks(mm); + if (unlikely(ret)) + goto out_cleanup; + + if (!mm_has_notifiers(mm)) { + INIT_HLIST_HEAD(&mmu_notifier_mm->list); + spin_lock_init(&mmu_notifier_mm->lock); + mm->mmu_notifier_mm = mmu_notifier_mm; + mmu_notifier_mm = NULL; + } + atomic_inc(&mm->mm_count); + + /* + * Serialize the update against mmu_notifier_unregister. A + * side note: mmu_notifier_release can't run concurrently with + * us because we hold the mm_users pin (either implicitly as + * current->mm or explicitly with get_task_mm() or similar). + * We can't race against any other mmu notifier method either + * thanks to mm_take_all_locks(). + */ + spin_lock(&mm->mmu_notifier_mm->lock); + hlist_add_head(&mn->hlist, &mm->mmu_notifier_mm->list); + spin_unlock(&mm->mmu_notifier_mm->lock); + + mm_drop_all_locks(mm); +out_cleanup: + if (take_mmap_sem) + up_write(&mm->mmap_sem); + /* kfree() does nothing if mmu_notifier_mm is NULL */ + kfree(mmu_notifier_mm); +out: + BUG_ON(atomic_read(&mm->mm_users) <= 0); + return ret; +} + +/* + * Must not hold mmap_sem nor any other VM related lock when calling + * this registration function. Must also ensure mm_users can't go down + * to zero while this runs to avoid races with mmu_notifier_release, + * so mm has to be current->mm or the mm should be pinned safely such + * as with get_task_mm(). If the mm is not current->mm, the mm_users + * pin should be released by calling mmput after mmu_notifier_register + * returns. mmu_notifier_unregister must be always called to + * unregister the notifier. mm_count is automatically pinned to allow + * mmu_notifier_unregister to safely run at any time later, before or + * after exit_mmap. ->release will always be called before exit_mmap + * frees the pages. + */ +int mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm) +{ + return do_mmu_notifier_register(mn, mm, 1); +} +EXPORT_SYMBOL_GPL(mmu_notifier_register); + +/* + * Same as mmu_notifier_register but here the caller must hold the + * mmap_sem in write mode. + */ +int __mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm) +{ + return do_mmu_notifier_register(mn, mm, 0); +} +EXPORT_SYMBOL_GPL(__mmu_notifier_register); + +/* this is called after the last mmu_notifier_unregister() returned */ +void __mmu_notifier_mm_destroy(struct mm_struct *mm) +{ + BUG_ON(!hlist_empty(&mm->mmu_notifier_mm->list)); + kfree(mm->mmu_notifier_mm); + mm->mmu_notifier_mm = LIST_POISON1; /* debug */ +} + +/* + * This releases the mm_count pin automatically and frees the mm + * structure if it was the last user of it. It serializes against + * running mmu notifiers with RCU and against mmu_notifier_unregister + * with the unregister lock + RCU. All sptes must be dropped before + * calling mmu_notifier_unregister. ->release or any other notifier + * method may be invoked concurrently with mmu_notifier_unregister, + * and only after mmu_notifier_unregister returned we're guaranteed + * that ->release or any other method can't run anymore. + */ +void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm) +{ + BUG_ON(atomic_read(&mm->mm_count) <= 0); + + spin_lock(&mm->mmu_notifier_mm->lock); + if (!hlist_unhashed(&mn->hlist)) { + hlist_del_rcu(&mn->hlist); + + /* + * RCU here will force exit_mmap to wait ->release to finish + * before freeing the pages. + */ + rcu_read_lock(); + spin_unlock(&mm->mmu_notifier_mm->lock); + /* + * exit_mmap will block in mmu_notifier_release to + * guarantee ->release is called before freeing the + * pages. + */ + if (mn->ops->release) + mn->ops->release(mn, mm); + rcu_read_unlock(); + } else + spin_unlock(&mm->mmu_notifier_mm->lock); + + /* + * Wait any running method to finish, of course including + * ->release if it was run by mmu_notifier_relase instead of us. + */ + synchronize_rcu(); + + BUG_ON(atomic_read(&mm->mm_count) <= 0); + + mmdrop(mm); +} +EXPORT_SYMBOL_GPL(mmu_notifier_unregister); diff --git a/mm/mprotect.c b/mm/mprotect.c index abd645a3b0a0..fded06f923f4 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -21,6 +21,7 @@ #include <linux/syscalls.h> #include <linux/swap.h> #include <linux/swapops.h> +#include <linux/mmu_notifier.h> #include <asm/uaccess.h> #include <asm/pgtable.h> #include <asm/cacheflush.h> @@ -203,10 +204,12 @@ success: dirty_accountable = 1; } + mmu_notifier_invalidate_range_start(mm, start, end); if (is_vm_hugetlb_page(vma)) hugetlb_change_protection(vma, start, end, vma->vm_page_prot); else change_protection(vma, start, end, vma->vm_page_prot, dirty_accountable); + mmu_notifier_invalidate_range_end(mm, start, end); vm_stat_account(mm, oldflags, vma->vm_file, -nrpages); vm_stat_account(mm, newflags, vma->vm_file, nrpages); return 0; diff --git a/mm/mremap.c b/mm/mremap.c index 08e3c7f2bd15..1a7743923c8c 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -18,6 +18,7 @@ #include <linux/highmem.h> #include <linux/security.h> #include <linux/syscalls.h> +#include <linux/mmu_notifier.h> #include <asm/uaccess.h> #include <asm/cacheflush.h> @@ -74,7 +75,11 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, struct mm_struct *mm = vma->vm_mm; pte_t *old_pte, *new_pte, pte; spinlock_t *old_ptl, *new_ptl; + unsigned long old_start; + old_start = old_addr; + mmu_notifier_invalidate_range_start(vma->vm_mm, + old_start, old_end); if (vma->vm_file) { /* * Subtle point from Rajesh Venkatasubramanian: before @@ -116,6 +121,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, pte_unmap_unlock(old_pte - 1, old_ptl); if (mapping) spin_unlock(&mapping->i_mmap_lock); + mmu_notifier_invalidate_range_end(vma->vm_mm, old_start, old_end); } #define LATENCY_LIMIT (64 * PAGE_SIZE) diff --git a/mm/rmap.c b/mm/rmap.c index 39ae5a9bf382..99bc3f9cd796 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -49,6 +49,7 @@ #include <linux/module.h> #include <linux/kallsyms.h> #include <linux/memcontrol.h> +#include <linux/mmu_notifier.h> #include <asm/tlbflush.h> @@ -287,7 +288,7 @@ static int page_referenced_one(struct page *page, if (vma->vm_flags & VM_LOCKED) { referenced++; *mapcount = 1; /* break early from loop */ - } else if (ptep_clear_flush_young(vma, address, pte)) + } else if (ptep_clear_flush_young_notify(vma, address, pte)) referenced++; /* Pretend the page is referenced if the task has the @@ -457,7 +458,7 @@ static int page_mkclean_one(struct page *page, struct vm_area_struct *vma) pte_t entry; flush_cache_page(vma, address, pte_pfn(*pte)); - entry = ptep_clear_flush(vma, address, pte); + entry = ptep_clear_flush_notify(vma, address, pte); entry = pte_wrprotect(entry); entry = pte_mkclean(entry); set_pte_at(mm, address, pte, entry); @@ -705,14 +706,14 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, * skipped over this mm) then we should reactivate it. */ if (!migration && ((vma->vm_flags & VM_LOCKED) || - (ptep_clear_flush_young(vma, address, pte)))) { + (ptep_clear_flush_young_notify(vma, address, pte)))) { ret = SWAP_FAIL; goto out_unmap; } /* Nuke the page table entry. */ flush_cache_page(vma, address, page_to_pfn(page)); - pteval = ptep_clear_flush(vma, address, pte); + pteval = ptep_clear_flush_notify(vma, address, pte); /* Move the dirty bit to the physical page now the pte is gone. */ if (pte_dirty(pteval)) @@ -837,12 +838,12 @@ static void try_to_unmap_cluster(unsigned long cursor, page = vm_normal_page(vma, address, *pte); BUG_ON(!page || PageAnon(page)); - if (ptep_clear_flush_young(vma, address, pte)) + if (ptep_clear_flush_young_notify(vma, address, pte)) continue; /* Nuke the page table entry. */ flush_cache_page(vma, address, pte_pfn(*pte)); - pteval = ptep_clear_flush(vma, address, pte); + pteval = ptep_clear_flush_notify(vma, address, pte); /* If nonlinear, store the file page offset in the pte. */ if (page->index != linear_page_index(vma, address)) diff --git a/mm/shmem.c b/mm/shmem.c index 952d361774bb..c1e5a3b4f758 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1513,7 +1513,6 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev) inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; inode->i_blocks = 0; - inode->i_mapping->a_ops = &shmem_aops; inode->i_mapping->backing_dev_info = &shmem_backing_dev_info; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_generation = get_seconds(); @@ -1528,6 +1527,7 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev) init_special_inode(inode, mode, dev); break; case S_IFREG: + inode->i_mapping->a_ops = &shmem_aops; inode->i_op = &shmem_inode_operations; inode->i_fop = &shmem_file_operations; mpol_shared_policy_init(&info->policy, @@ -1929,6 +1929,7 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s return error; } unlock_page(page); + inode->i_mapping->a_ops = &shmem_aops; inode->i_op = &shmem_symlink_inode_operations; kaddr = kmap_atomic(page, KM_USER0); memcpy(kaddr, symname, len); |