diff options
41 files changed, 2898 insertions, 780 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index b69cfdc12112..f1959b7d13d0 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -508,6 +508,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Also note the kernel might malfunction if you disable some critical bits. + cma=nn[MG] [ARM,KNL] + Sets the size of kernel global memory area for contiguous + memory allocations. For more information, see + include/linux/dma-contiguous.h + cmo_free_hint= [PPC] Format: { yes | no } Specify whether pages are marked as being inactive when they are freed. This is used in CMO environments @@ -515,6 +520,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. a hypervisor. Default: yes + coherent_pool=nn[KMG] [ARM,KNL] + Sets the size of memory pool for coherent, atomic dma + allocations if Contiguous Memory Allocator (CMA) is used. + code_bytes [X86] How many bytes of object code to print in an oops report. Range: 0 - 8192 diff --git a/arch/Kconfig b/arch/Kconfig index e9a910876cda..8c3d957fa8e2 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -159,6 +159,9 @@ config HAVE_ARCH_TRACEHOOK config HAVE_DMA_ATTRS bool +config HAVE_DMA_CONTIGUOUS + bool + config USE_GENERIC_SMP_HELPERS bool diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 5458aa9db067..3ca1ba981efb 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -5,6 +5,9 @@ config ARM select HAVE_AOUT select HAVE_DMA_API_DEBUG select HAVE_IDE if PCI || ISA || PCMCIA + select HAVE_DMA_ATTRS + select HAVE_DMA_CONTIGUOUS if (CPU_V6 || CPU_V6K || CPU_V7) + select CMA if (CPU_V6 || CPU_V6K || CPU_V7) select HAVE_MEMBLOCK select RTC_LIB select SYS_SUPPORTS_APM_EMULATION @@ -54,6 +57,14 @@ config ARM config ARM_HAS_SG_CHAIN bool +config NEED_SG_DMA_LENGTH + bool + +config ARM_DMA_USE_IOMMU + select NEED_SG_DMA_LENGTH + select ARM_HAS_SG_CHAIN + bool + config HAVE_PWM bool diff --git a/arch/arm/common/dmabounce.c b/arch/arm/common/dmabounce.c index 595ecd290ebf..9d7eb530f95f 100644 --- a/arch/arm/common/dmabounce.c +++ b/arch/arm/common/dmabounce.c @@ -173,7 +173,8 @@ find_safe_buffer(struct dmabounce_device_info *device_info, dma_addr_t safe_dma_ read_lock_irqsave(&device_info->lock, flags); list_for_each_entry(b, &device_info->safe_buffers, node) - if (b->safe_dma_addr == safe_dma_addr) { + if (b->safe_dma_addr <= safe_dma_addr && + b->safe_dma_addr + b->size > safe_dma_addr) { rb = b; break; } @@ -254,7 +255,7 @@ static inline dma_addr_t map_single(struct device *dev, void *ptr, size_t size, if (buf == NULL) { dev_err(dev, "%s: unable to map unsafe buffer %p!\n", __func__, ptr); - return ~0; + return DMA_ERROR_CODE; } dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x) mapped to %p (dma=%#x)\n", @@ -307,8 +308,9 @@ static inline void unmap_single(struct device *dev, struct safe_buffer *buf, * substitute the safe buffer for the unsafe one. * (basically move the buffer from an unsafe area to a safe one) */ -dma_addr_t __dma_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction dir) +static dma_addr_t dmabounce_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) { dma_addr_t dma_addr; int ret; @@ -320,21 +322,20 @@ dma_addr_t __dma_map_page(struct device *dev, struct page *page, ret = needs_bounce(dev, dma_addr, size); if (ret < 0) - return ~0; + return DMA_ERROR_CODE; if (ret == 0) { - __dma_page_cpu_to_dev(page, offset, size, dir); + arm_dma_ops.sync_single_for_device(dev, dma_addr, size, dir); return dma_addr; } if (PageHighMem(page)) { dev_err(dev, "DMA buffer bouncing of HIGHMEM pages is not supported\n"); - return ~0; + return DMA_ERROR_CODE; } return map_single(dev, page_address(page) + offset, size, dir); } -EXPORT_SYMBOL(__dma_map_page); /* * see if a mapped address was really a "safe" buffer and if so, copy @@ -342,8 +343,8 @@ EXPORT_SYMBOL(__dma_map_page); * the safe buffer. (basically return things back to the way they * should be) */ -void __dma_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, - enum dma_data_direction dir) +static void dmabounce_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, + enum dma_data_direction dir, struct dma_attrs *attrs) { struct safe_buffer *buf; @@ -352,19 +353,18 @@ void __dma_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, buf = find_safe_buffer_dev(dev, dma_addr, __func__); if (!buf) { - __dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, dma_addr)), - dma_addr & ~PAGE_MASK, size, dir); + arm_dma_ops.sync_single_for_cpu(dev, dma_addr, size, dir); return; } unmap_single(dev, buf, size, dir); } -EXPORT_SYMBOL(__dma_unmap_page); -int dmabounce_sync_for_cpu(struct device *dev, dma_addr_t addr, - unsigned long off, size_t sz, enum dma_data_direction dir) +static int __dmabounce_sync_for_cpu(struct device *dev, dma_addr_t addr, + size_t sz, enum dma_data_direction dir) { struct safe_buffer *buf; + unsigned long off; dev_dbg(dev, "%s(dma=%#x,off=%#lx,sz=%zx,dir=%x)\n", __func__, addr, off, sz, dir); @@ -373,6 +373,8 @@ int dmabounce_sync_for_cpu(struct device *dev, dma_addr_t addr, if (!buf) return 1; + off = addr - buf->safe_dma_addr; + BUG_ON(buf->direction != dir); dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x) mapped to %p (dma=%#x)\n", @@ -388,12 +390,21 @@ int dmabounce_sync_for_cpu(struct device *dev, dma_addr_t addr, } return 0; } -EXPORT_SYMBOL(dmabounce_sync_for_cpu); -int dmabounce_sync_for_device(struct device *dev, dma_addr_t addr, - unsigned long off, size_t sz, enum dma_data_direction dir) +static void dmabounce_sync_for_cpu(struct device *dev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + if (!__dmabounce_sync_for_cpu(dev, handle, size, dir)) + return; + + arm_dma_ops.sync_single_for_cpu(dev, handle, size, dir); +} + +static int __dmabounce_sync_for_device(struct device *dev, dma_addr_t addr, + size_t sz, enum dma_data_direction dir) { struct safe_buffer *buf; + unsigned long off; dev_dbg(dev, "%s(dma=%#x,off=%#lx,sz=%zx,dir=%x)\n", __func__, addr, off, sz, dir); @@ -402,6 +413,8 @@ int dmabounce_sync_for_device(struct device *dev, dma_addr_t addr, if (!buf) return 1; + off = addr - buf->safe_dma_addr; + BUG_ON(buf->direction != dir); dev_dbg(dev, "%s: unsafe buffer %p (dma=%#x) mapped to %p (dma=%#x)\n", @@ -417,7 +430,38 @@ int dmabounce_sync_for_device(struct device *dev, dma_addr_t addr, } return 0; } -EXPORT_SYMBOL(dmabounce_sync_for_device); + +static void dmabounce_sync_for_device(struct device *dev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + if (!__dmabounce_sync_for_device(dev, handle, size, dir)) + return; + + arm_dma_ops.sync_single_for_device(dev, handle, size, dir); +} + +static int dmabounce_set_mask(struct device *dev, u64 dma_mask) +{ + if (dev->archdata.dmabounce) + return 0; + + return arm_dma_ops.set_dma_mask(dev, dma_mask); +} + +static struct dma_map_ops dmabounce_ops = { + .alloc = arm_dma_alloc, + .free = arm_dma_free, + .mmap = arm_dma_mmap, + .map_page = dmabounce_map_page, + .unmap_page = dmabounce_unmap_page, + .sync_single_for_cpu = dmabounce_sync_for_cpu, + .sync_single_for_device = dmabounce_sync_for_device, + .map_sg = arm_dma_map_sg, + .unmap_sg = arm_dma_unmap_sg, + .sync_sg_for_cpu = arm_dma_sync_sg_for_cpu, + .sync_sg_for_device = arm_dma_sync_sg_for_device, + .set_dma_mask = dmabounce_set_mask, +}; static int dmabounce_init_pool(struct dmabounce_pool *pool, struct device *dev, const char *name, unsigned long size) @@ -479,6 +523,7 @@ int dmabounce_register_dev(struct device *dev, unsigned long small_buffer_size, #endif dev->archdata.dmabounce = device_info; + set_dma_ops(dev, &dmabounce_ops); dev_info(dev, "dmabounce: registered device\n"); @@ -497,6 +542,7 @@ void dmabounce_unregister_dev(struct device *dev) struct dmabounce_device_info *device_info = dev->archdata.dmabounce; dev->archdata.dmabounce = NULL; + set_dma_ops(dev, NULL); if (!device_info) { dev_warn(dev, diff --git a/arch/arm/include/asm/device.h b/arch/arm/include/asm/device.h index 7aa368003b05..b69c0d3285f8 100644 --- a/arch/arm/include/asm/device.h +++ b/arch/arm/include/asm/device.h @@ -7,12 +7,16 @@ #define ASMARM_DEVICE_H struct dev_archdata { + struct dma_map_ops *dma_ops; #ifdef CONFIG_DMABOUNCE struct dmabounce_device_info *dmabounce; #endif #ifdef CONFIG_IOMMU_API void *iommu; /* private IOMMU data */ #endif +#ifdef CONFIG_ARM_DMA_USE_IOMMU + struct dma_iommu_mapping *mapping; +#endif }; struct omap_device; diff --git a/arch/arm/include/asm/dma-contiguous.h b/arch/arm/include/asm/dma-contiguous.h new file mode 100644 index 000000000000..3ed37b4d93da --- /dev/null +++ b/arch/arm/include/asm/dma-contiguous.h @@ -0,0 +1,15 @@ +#ifndef ASMARM_DMA_CONTIGUOUS_H +#define ASMARM_DMA_CONTIGUOUS_H + +#ifdef __KERNEL__ +#ifdef CONFIG_CMA + +#include <linux/types.h> +#include <asm-generic/dma-contiguous.h> + +void dma_contiguous_early_fixup(phys_addr_t base, unsigned long size); + +#endif +#endif + +#endif diff --git a/arch/arm/include/asm/dma-iommu.h b/arch/arm/include/asm/dma-iommu.h new file mode 100644 index 000000000000..799b09409fad --- /dev/null +++ b/arch/arm/include/asm/dma-iommu.h @@ -0,0 +1,34 @@ +#ifndef ASMARM_DMA_IOMMU_H +#define ASMARM_DMA_IOMMU_H + +#ifdef __KERNEL__ + +#include <linux/mm_types.h> +#include <linux/scatterlist.h> +#include <linux/dma-debug.h> +#include <linux/kmemcheck.h> + +struct dma_iommu_mapping { + /* iommu specific data */ + struct iommu_domain *domain; + + void *bitmap; + size_t bits; + unsigned int order; + dma_addr_t base; + + spinlock_t lock; + struct kref kref; +}; + +struct dma_iommu_mapping * +arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size, + int order); + +void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping); + +int arm_iommu_attach_device(struct device *dev, + struct dma_iommu_mapping *mapping); + +#endif /* __KERNEL__ */ +#endif diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index cb3b7c981c4b..bbef15d04890 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -5,11 +5,35 @@ #include <linux/mm_types.h> #include <linux/scatterlist.h> +#include <linux/dma-attrs.h> #include <linux/dma-debug.h> #include <asm-generic/dma-coherent.h> #include <asm/memory.h> +#define DMA_ERROR_CODE (~0) +extern struct dma_map_ops arm_dma_ops; + +static inline struct dma_map_ops *get_dma_ops(struct device *dev) +{ + if (dev && dev->archdata.dma_ops) + return dev->archdata.dma_ops; + return &arm_dma_ops; +} + +static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops) +{ + BUG_ON(!dev); + dev->archdata.dma_ops = ops; +} + +#include <asm-generic/dma-mapping-common.h> + +static inline int dma_set_mask(struct device *dev, u64 mask) +{ + return get_dma_ops(dev)->set_dma_mask(dev, mask); +} + #ifdef __arch_page_to_dma #error Please update to __arch_pfn_to_dma #endif @@ -62,68 +86,11 @@ static inline dma_addr_t virt_to_dma(struct device *dev, void *addr) #endif /* - * The DMA API is built upon the notion of "buffer ownership". A buffer - * is either exclusively owned by the CPU (and therefore may be accessed - * by it) or exclusively owned by the DMA device. These helper functions - * represent the transitions between these two ownership states. - * - * Note, however, that on later ARMs, this notion does not work due to - * speculative prefetches. We model our approach on the assumption that - * the CPU does do speculative prefetches, which means we clean caches - * before transfers and delay cache invalidation until transfer completion. - * - * Private support functions: these are not part of the API and are - * liable to change. Drivers must not use these. - */ -static inline void __dma_single_cpu_to_dev(const void *kaddr, size_t size, - enum dma_data_direction dir) -{ - extern void ___dma_single_cpu_to_dev(const void *, size_t, - enum dma_data_direction); - - if (!arch_is_coherent()) - ___dma_single_cpu_to_dev(kaddr, size, dir); -} - -static inline void __dma_single_dev_to_cpu(const void *kaddr, size_t size, - enum dma_data_direction dir) -{ - extern void ___dma_single_dev_to_cpu(const void *, size_t, - enum dma_data_direction); - - if (!arch_is_coherent()) - ___dma_single_dev_to_cpu(kaddr, size, dir); -} - -static inline void __dma_page_cpu_to_dev(struct page *page, unsigned long off, - size_t size, enum dma_data_direction dir) -{ - extern void ___dma_page_cpu_to_dev(struct page *, unsigned long, - size_t, enum dma_data_direction); - - if (!arch_is_coherent()) - ___dma_page_cpu_to_dev(page, off, size, dir); -} - -static inline void __dma_page_dev_to_cpu(struct page *page, unsigned long off, - size_t size, enum dma_data_direction dir) -{ - extern void ___dma_page_dev_to_cpu(struct page *, unsigned long, - size_t, enum dma_data_direction); - - if (!arch_is_coherent()) - ___dma_page_dev_to_cpu(page, off, size, dir); -} - -extern int dma_supported(struct device *, u64); -extern int dma_set_mask(struct device *, u64); - -/* * DMA errors are defined by all-bits-set in the DMA address. */ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { - return dma_addr == ~0; + return dma_addr == DMA_ERROR_CODE; } /* @@ -141,69 +108,118 @@ static inline void dma_free_noncoherent(struct device *dev, size_t size, { } +extern int dma_supported(struct device *dev, u64 mask); + /** - * dma_alloc_coherent - allocate consistent memory for DMA + * arm_dma_alloc - allocate consistent memory for DMA * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices * @size: required memory size * @handle: bus-specific DMA address + * @attrs: optinal attributes that specific mapping properties * - * Allocate some uncached, unbuffered memory for a device for - * performing DMA. This function allocates pages, and will - * return the CPU-viewed address, and sets @handle to be the - * device-viewed address. + * Allocate some memory for a device for performing DMA. This function + * allocates pages, and will return the CPU-viewed address, and sets @handle + * to be the device-viewed address. */ -extern void *dma_alloc_coherent(struct device *, size_t, dma_addr_t *, gfp_t); +extern void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, + gfp_t gfp, struct dma_attrs *attrs); + +#define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL) + +static inline void *dma_alloc_attrs(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag, + struct dma_attrs *attrs) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + void *cpu_addr; + BUG_ON(!ops); + + cpu_addr = ops->alloc(dev, size, dma_handle, flag, attrs); + debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr); + return cpu_addr; +} /** - * dma_free_coherent - free memory allocated by dma_alloc_coherent + * arm_dma_free - free memory allocated by arm_dma_alloc * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices * @size: size of memory originally requested in dma_alloc_coherent * @cpu_addr: CPU-view address returned from dma_alloc_coherent * @handle: device-view address returned from dma_alloc_coherent + * @attrs: optinal attributes that specific mapping properties * * Free (and unmap) a DMA buffer previously allocated by - * dma_alloc_coherent(). + * arm_dma_alloc(). * * References to memory and mappings associated with cpu_addr/handle * during and after this call executing are illegal. */ -extern void dma_free_coherent(struct device *, size_t, void *, dma_addr_t); +extern void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, struct dma_attrs *attrs); + +#define dma_free_coherent(d, s, c, h) dma_free_attrs(d, s, c, h, NULL) + +static inline void dma_free_attrs(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t dma_handle, + struct dma_attrs *attrs) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + BUG_ON(!ops); + + debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); + ops->free(dev, size, cpu_addr, dma_handle, attrs); +} /** - * dma_mmap_coherent - map a coherent DMA allocation into user space + * arm_dma_mmap - map a coherent DMA allocation into user space * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices * @vma: vm_area_struct describing requested user mapping * @cpu_addr: kernel CPU-view address returned from dma_alloc_coherent * @handle: device-view address returned from dma_alloc_coherent * @size: size of memory originally requested in dma_alloc_coherent + * @attrs: optinal attributes that specific mapping properties * * Map a coherent DMA buffer previously allocated by dma_alloc_coherent * into user space. The coherent DMA buffer must not be freed by the * driver until the user space mapping has been released. */ -int dma_mmap_coherent(struct device *, struct vm_area_struct *, - void *, dma_addr_t, size_t); +extern int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + struct dma_attrs *attrs); +#define dma_mmap_coherent(d, v, c, h, s) dma_mmap_attrs(d, v, c, h, s, NULL) -/** - * dma_alloc_writecombine - allocate writecombining memory for DMA - * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices - * @size: required memory size - * @handle: bus-specific DMA address - * - * Allocate some uncached, buffered memory for a device for - * performing DMA. This function allocates pages, and will - * return the CPU-viewed address, and sets @handle to be the - * device-viewed address. - */ -extern void *dma_alloc_writecombine(struct device *, size_t, dma_addr_t *, - gfp_t); +static inline int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, + size_t size, struct dma_attrs *attrs) +{ + struct dma_map_ops *ops = get_dma_ops(dev); + BUG_ON(!ops); + return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs); +} + +static inline void *dma_alloc_writecombine(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag) +{ + DEFINE_DMA_ATTRS(attrs); + dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); + return dma_alloc_attrs(dev, size, dma_handle, flag, &attrs); +} -#define dma_free_writecombine(dev,size,cpu_addr,handle) \ - dma_free_coherent(dev,size,cpu_addr,handle) +static inline void dma_free_writecombine(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t dma_handle) +{ + DEFINE_DMA_ATTRS(attrs); + dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); + return dma_free_attrs(dev, size, cpu_addr, dma_handle, &attrs); +} -int dma_mmap_writecombine(struct device *, struct vm_area_struct *, - void *, dma_addr_t, size_t); +static inline int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size) +{ + DEFINE_DMA_ATTRS(attrs); + dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs); + return dma_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, &attrs); +} /* * This can be called during boot to increase the size of the consistent @@ -212,8 +228,6 @@ int dma_mmap_writecombine(struct device *, struct vm_area_struct *, */ extern void __init init_consistent_dma_size(unsigned long size); - -#ifdef CONFIG_DMABOUNCE /* * For SA-1111, IXP425, and ADI systems the dma-mapping functions are "magic" * and utilize bounce buffers as needed to work around limited DMA windows. @@ -253,222 +267,19 @@ extern int dmabounce_register_dev(struct device *, unsigned long, */ extern void dmabounce_unregister_dev(struct device *); -/* - * The DMA API, implemented by dmabounce.c. See below for descriptions. - */ -extern dma_addr_t __dma_map_page(struct device *, struct page *, - unsigned long, size_t, enum dma_data_direction); -extern void __dma_unmap_page(struct device *, dma_addr_t, size_t, - enum dma_data_direction); - -/* - * Private functions - */ -int dmabounce_sync_for_cpu(struct device *, dma_addr_t, unsigned long, - size_t, enum dma_data_direction); -int dmabounce_sync_for_device(struct device *, dma_addr_t, unsigned long, - size_t, enum dma_data_direction); -#else -static inline int dmabounce_sync_for_cpu(struct device *d, dma_addr_t addr, - unsigned long offset, size_t size, enum dma_data_direction dir) -{ - return 1; -} -static inline int dmabounce_sync_for_device(struct device *d, dma_addr_t addr, - unsigned long offset, size_t size, enum dma_data_direction dir) -{ - return 1; -} - - -static inline dma_addr_t __dma_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction dir) -{ - __dma_page_cpu_to_dev(page, offset, size, dir); - return pfn_to_dma(dev, page_to_pfn(page)) + offset; -} - -static inline void __dma_unmap_page(struct device *dev, dma_addr_t handle, - size_t size, enum dma_data_direction dir) -{ - __dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, handle)), - handle & ~PAGE_MASK, size, dir); -} -#endif /* CONFIG_DMABOUNCE */ - -/** - * dma_map_single - map a single buffer for streaming DMA - * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices - * @cpu_addr: CPU direct mapped address of buffer - * @size: size of buffer to map - * @dir: DMA transfer direction - * - * Ensure that any data held in the cache is appropriately discarded - * or written back. - * - * The device owns this memory once this call has completed. The CPU - * can regain ownership by calling dma_unmap_single() or - * dma_sync_single_for_cpu(). - */ -static inline dma_addr_t dma_map_single(struct device *dev, void *cpu_addr, - size_t size, enum dma_data_direction dir) -{ - unsigned long offset; - struct page *page; - dma_addr_t addr; - - BUG_ON(!virt_addr_valid(cpu_addr)); - BUG_ON(!virt_addr_valid(cpu_addr + size - 1)); - BUG_ON(!valid_dma_direction(dir)); - - page = virt_to_page(cpu_addr); - offset = (unsigned long)cpu_addr & ~PAGE_MASK; - addr = __dma_map_page(dev, page, offset, size, dir); - debug_dma_map_page(dev, page, offset, size, dir, addr, true); - - return addr; -} - -/** - * dma_map_page - map a portion of a page for streaming DMA - * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices - * @page: page that buffer resides in - * @offset: offset into page for start of buffer - * @size: size of buffer to map - * @dir: DMA transfer direction - * - * Ensure that any data held in the cache is appropriately discarded - * or written back. - * - * The device owns this memory once this call has completed. The CPU - * can regain ownership by calling dma_unmap_page(). - */ -static inline dma_addr_t dma_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction dir) -{ - dma_addr_t addr; - - BUG_ON(!valid_dma_direction(dir)); - - addr = __dma_map_page(dev, page, offset, size, dir); - debug_dma_map_page(dev, page, offset, size, dir, addr, false); - - return addr; -} - -/** - * dma_unmap_single - unmap a single buffer previously mapped - * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices - * @handle: DMA address of buffer - * @size: size of buffer (same as passed to dma_map_single) - * @dir: DMA transfer direction (same as passed to dma_map_single) - * - * Unmap a single streaming mode DMA translation. The handle and size - * must match what was provided in the previous dma_map_single() call. - * All other usages are undefined. - * - * After this call, reads by the CPU to the buffer are guaranteed to see - * whatever the device wrote there. - */ -static inline void dma_unmap_single(struct device *dev, dma_addr_t handle, - size_t size, enum dma_data_direction dir) -{ - debug_dma_unmap_page(dev, handle, size, dir, true); - __dma_unmap_page(dev, handle, size, dir); -} - -/** - * dma_unmap_page - unmap a buffer previously mapped through dma_map_page() - * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices - * @handle: DMA address of buffer - * @size: size of buffer (same as passed to dma_map_page) - * @dir: DMA transfer direction (same as passed to dma_map_page) - * - * Unmap a page streaming mode DMA translation. The handle and size - * must match what was provided in the previous dma_map_page() call. - * All other usages are undefined. - * - * After this call, reads by the CPU to the buffer are guaranteed to see - * whatever the device wrote there. - */ -static inline void dma_unmap_page(struct device *dev, dma_addr_t handle, - size_t size, enum dma_data_direction dir) -{ - debug_dma_unmap_page(dev, handle, size, dir, false); - __dma_unmap_page(dev, handle, size, dir); -} - -/** - * dma_sync_single_range_for_cpu - * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices - * @handle: DMA address of buffer - * @offset: offset of region to start sync - * @size: size of region to sync - * @dir: DMA transfer direction (same as passed to dma_map_single) - * - * Make physical memory consistent for a single streaming mode DMA - * translation after a transfer. - * - * If you perform a dma_map_single() but wish to interrogate the - * buffer using the cpu, yet do not wish to teardown the PCI dma - * mapping, you must call this function before doing so. At the - * next point you give the PCI dma address back to the card, you - * must first the perform a dma_sync_for_device, and then the - * device again owns the buffer. - */ -static inline void dma_sync_single_range_for_cpu(struct device *dev, - dma_addr_t handle, unsigned long offset, size_t size, - enum dma_data_direction dir) -{ - BUG_ON(!valid_dma_direction(dir)); - - debug_dma_sync_single_for_cpu(dev, handle + offset, size, dir); - - if (!dmabounce_sync_for_cpu(dev, handle, offset, size, dir)) - return; - - __dma_single_dev_to_cpu(dma_to_virt(dev, handle) + offset, size, dir); -} - -static inline void dma_sync_single_range_for_device(struct device *dev, - dma_addr_t handle, unsigned long offset, size_t size, - enum dma_data_direction dir) -{ - BUG_ON(!valid_dma_direction(dir)); - - debug_dma_sync_single_for_device(dev, handle + offset, size, dir); - - if (!dmabounce_sync_for_device(dev, handle, offset, size, dir)) - return; - - __dma_single_cpu_to_dev(dma_to_virt(dev, handle) + offset, size, dir); -} - -static inline void dma_sync_single_for_cpu(struct device *dev, - dma_addr_t handle, size_t size, enum dma_data_direction dir) -{ - dma_sync_single_range_for_cpu(dev, handle, 0, size, dir); -} - -static inline void dma_sync_single_for_device(struct device *dev, - dma_addr_t handle, size_t size, enum dma_data_direction dir) -{ - dma_sync_single_range_for_device(dev, handle, 0, size, dir); -} /* * The scatter list versions of the above methods. */ -extern int dma_map_sg(struct device *, struct scatterlist *, int, - enum dma_data_direction); -extern void dma_unmap_sg(struct device *, struct scatterlist *, int, +extern int arm_dma_map_sg(struct device *, struct scatterlist *, int, + enum dma_data_direction, struct dma_attrs *attrs); +extern void arm_dma_unmap_sg(struct device *, struct scatterlist *, int, + enum dma_data_direction, struct dma_attrs *attrs); +extern void arm_dma_sync_sg_for_cpu(struct device *, struct scatterlist *, int, enum dma_data_direction); -extern void dma_sync_sg_for_cpu(struct device *, struct scatterlist *, int, +extern void arm_dma_sync_sg_for_device(struct device *, struct scatterlist *, int, enum dma_data_direction); -extern void dma_sync_sg_for_device(struct device *, struct scatterlist *, int, - enum dma_data_direction); - #endif /* __KERNEL__ */ #endif diff --git a/arch/arm/include/asm/mach/map.h b/arch/arm/include/asm/mach/map.h index b36f3654bf54..a6efcdd6fd25 100644 --- a/arch/arm/include/asm/mach/map.h +++ b/arch/arm/include/asm/mach/map.h @@ -30,6 +30,7 @@ struct map_desc { #define MT_MEMORY_DTCM 12 #define MT_MEMORY_ITCM 13 #define MT_MEMORY_SO 14 +#define MT_MEMORY_DMA_READY 15 #ifdef CONFIG_MMU extern void iotable_init(struct map_desc *, int); diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index ebfac782593f..1b3096dfb964 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -81,6 +81,7 @@ __setup("fpe=", fpe_setup); extern void paging_init(struct machine_desc *desc); extern void sanity_check_meminfo(void); extern void reboot_setup(char *str); +extern void setup_dma_zone(struct machine_desc *desc); unsigned int processor_id; EXPORT_SYMBOL(processor_id); @@ -939,12 +940,8 @@ void __init setup_arch(char **cmdline_p) machine_desc = mdesc; machine_name = mdesc->name; -#ifdef CONFIG_ZONE_DMA - if (mdesc->dma_zone_size) { - extern unsigned long arm_dma_zone_size; - arm_dma_zone_size = mdesc->dma_zone_size; - } -#endif + setup_dma_zone(mdesc); + if (mdesc->restart_mode) reboot_setup(&mdesc->restart_mode); diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index db23ae4aaaab..ea6b43154090 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -17,8 +17,12 @@ #include <linux/init.h> #include <linux/device.h> #include <linux/dma-mapping.h> +#include <linux/dma-contiguous.h> #include <linux/highmem.h> +#include <linux/memblock.h> #include <linux/slab.h> +#include <linux/iommu.h> +#include <linux/vmalloc.h> #include <asm/memory.h> #include <asm/highmem.h> @@ -26,9 +30,112 @@ #include <asm/tlbflush.h> #include <asm/sizes.h> #include <asm/mach/arch.h> +#include <asm/dma-iommu.h> +#include <asm/mach/map.h> +#include <asm/system_info.h> +#include <asm/dma-contiguous.h> #include "mm.h" +/* + * The DMA API is built upon the notion of "buffer ownership". A buffer + * is either exclusively owned by the CPU (and therefore may be accessed + * by it) or exclusively owned by the DMA device. These helper functions + * represent the transitions between these two ownership states. + * + * Note, however, that on later ARMs, this notion does not work due to + * speculative prefetches. We model our approach on the assumption that + * the CPU does do speculative prefetches, which means we clean caches + * before transfers and delay cache invalidation until transfer completion. + * + */ +static void __dma_page_cpu_to_dev(struct page *, unsigned long, + size_t, enum dma_data_direction); +static void __dma_page_dev_to_cpu(struct page *, unsigned long, + size_t, enum dma_data_direction); + +/** + * arm_dma_map_page - map a portion of a page for streaming DMA + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @page: page that buffer resides in + * @offset: offset into page for start of buffer + * @size: size of buffer to map + * @dir: DMA transfer direction + * + * Ensure that any data held in the cache is appropriately discarded + * or written back. + * + * The device owns this memory once this call has completed. The CPU + * can regain ownership by calling dma_unmap_page(). + */ +static dma_addr_t arm_dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + if (!arch_is_coherent()) + __dma_page_cpu_to_dev(page, offset, size, dir); + return pfn_to_dma(dev, page_to_pfn(page)) + offset; +} + +/** + * arm_dma_unmap_page - unmap a buffer previously mapped through dma_map_page() + * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices + * @handle: DMA address of buffer + * @size: size of buffer (same as passed to dma_map_page) + * @dir: DMA transfer direction (same as passed to dma_map_page) + * + * Unmap a page streaming mode DMA translation. The handle and size + * must match what was provided in the previous dma_map_page() call. + * All other usages are undefined. + * + * After this call, reads by the CPU to the buffer are guaranteed to see + * whatever the device wrote there. + */ +static void arm_dma_unmap_page(struct device *dev, dma_addr_t handle, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + if (!arch_is_coherent()) + __dma_page_dev_to_cpu(pfn_to_page(dma_to_pfn(dev, handle)), + handle & ~PAGE_MASK, size, dir); +} + +static void arm_dma_sync_single_for_cpu(struct device *dev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + unsigned int offset = handle & (PAGE_SIZE - 1); + struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset)); + if (!arch_is_coherent()) + __dma_page_dev_to_cpu(page, offset, size, dir); +} + +static void arm_dma_sync_single_for_device(struct device *dev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + unsigned int offset = handle & (PAGE_SIZE - 1); + struct page *page = pfn_to_page(dma_to_pfn(dev, handle-offset)); + if (!arch_is_coherent()) + __dma_page_cpu_to_dev(page, offset, size, dir); +} + +static int arm_dma_set_mask(struct device *dev, u64 dma_mask); + +struct dma_map_ops arm_dma_ops = { + .alloc = arm_dma_alloc, + .free = arm_dma_free, + .mmap = arm_dma_mmap, + .map_page = arm_dma_map_page, + .unmap_page = arm_dma_unmap_page, + .map_sg = arm_dma_map_sg, + .unmap_sg = arm_dma_unmap_sg, + .sync_single_for_cpu = arm_dma_sync_single_for_cpu, + .sync_single_for_device = arm_dma_sync_single_for_device, + .sync_sg_for_cpu = arm_dma_sync_sg_for_cpu, + .sync_sg_for_device = arm_dma_sync_sg_for_device, + .set_dma_mask = arm_dma_set_mask, +}; +EXPORT_SYMBOL(arm_dma_ops); + static u64 get_coherent_dma_mask(struct device *dev) { u64 mask = (u64)arm_dma_limit; @@ -56,6 +163,21 @@ static u64 get_coherent_dma_mask(struct device *dev) return mask; } +static void __dma_clear_buffer(struct page *page, size_t size) +{ + void *ptr; + /* + * Ensure that the allocated pages are zeroed, and that any data + * lurking in the kernel direct-mapped region is invalidated. + */ + ptr = page_address(page); + if (ptr) { + memset(ptr, 0, size); + dmac_flush_range(ptr, ptr + size); + outer_flush_range(__pa(ptr), __pa(ptr) + size); + } +} + /* * Allocate a DMA buffer for 'dev' of size 'size' using the * specified gfp mask. Note that 'size' must be page aligned. @@ -64,23 +186,6 @@ static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gf { unsigned long order = get_order(size); struct page *page, *p, *e; - void *ptr; - u64 mask = get_coherent_dma_mask(dev); - -#ifdef CONFIG_DMA_API_DEBUG - u64 limit = (mask + 1) & ~mask; - if (limit && size >= limit) { - dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n", - size, mask); - return NULL; - } -#endif - - if (!mask) - return NULL; - - if (mask < 0xffffffffULL) - gfp |= GFP_DMA; page = alloc_pages(gfp, order); if (!page) @@ -93,14 +198,7 @@ static struct page *__dma_alloc_buffer(struct device *dev, size_t size, gfp_t gf for (p = page + (size >> PAGE_SHIFT), e = page + (1 << order); p < e; p++) __free_page(p); - /* - * Ensure that the allocated pages are zeroed, and that any data - * lurking in the kernel direct-mapped region is invalidated. - */ - ptr = page_address(page); - memset(ptr, 0, size); - dmac_flush_range(ptr, ptr + size); - outer_flush_range(__pa(ptr), __pa(ptr) + size); + __dma_clear_buffer(page, size); return page; } @@ -170,6 +268,11 @@ static int __init consistent_init(void) unsigned long base = consistent_base; unsigned long num_ptes = (CONSISTENT_END - base) >> PMD_SHIFT; +#ifndef CONFIG_ARM_DMA_USE_IOMMU + if (cpu_architecture() >= CPU_ARCH_ARMv6) + return 0; +#endif + consistent_pte = kmalloc(num_ptes * sizeof(pte_t), GFP_KERNEL); if (!consistent_pte) { pr_err("%s: no memory\n", __func__); @@ -184,14 +287,14 @@ static int __init consistent_init(void) pud = pud_alloc(&init_mm, pgd, base); if (!pud) { - printk(KERN_ERR "%s: no pud tables\n", __func__); + pr_err("%s: no pud tables\n", __func__); ret = -ENOMEM; break; } pmd = pmd_alloc(&init_mm, pud, base); if (!pmd) { - printk(KERN_ERR "%s: no pmd tables\n", __func__); + pr_err("%s: no pmd tables\n", __func__); ret = -ENOMEM; break; } @@ -199,7 +302,7 @@ static int __init consistent_init(void) pte = pte_alloc_kernel(pmd, base); if (!pte) { - printk(KERN_ERR "%s: no pte tables\n", __func__); + pr_err("%s: no pte tables\n", __func__); ret = -ENOMEM; break; } @@ -210,9 +313,101 @@ static int __init consistent_init(void) return ret; } - core_initcall(consistent_init); +static void *__alloc_from_contiguous(struct device *dev, size_t size, + pgprot_t prot, struct page **ret_page); + +static struct arm_vmregion_head coherent_head = { + .vm_lock = __SPIN_LOCK_UNLOCKED(&coherent_head.vm_lock), + .vm_list = LIST_HEAD_INIT(coherent_head.vm_list), +}; + +size_t coherent_pool_size = DEFAULT_CONSISTENT_DMA_SIZE / 8; + +static int __init early_coherent_pool(char *p) +{ + coherent_pool_size = memparse(p, &p); + return 0; +} +early_param("coherent_pool", early_coherent_pool); + +/* + * Initialise the coherent pool for atomic allocations. + */ +static int __init coherent_init(void) +{ + pgprot_t prot = pgprot_dmacoherent(pgprot_kernel); + size_t size = coherent_pool_size; + struct page *page; + void *ptr; + + if (cpu_architecture() < CPU_ARCH_ARMv6) + return 0; + + ptr = __alloc_from_contiguous(NULL, size, prot, &page); + if (ptr) { + coherent_head.vm_start = (unsigned long) ptr; + coherent_head.vm_end = (unsigned long) ptr + size; + printk(KERN_INFO "DMA: preallocated %u KiB pool for atomic coherent allocations\n", + (unsigned)size / 1024); + return 0; + } + printk(KERN_ERR "DMA: failed to allocate %u KiB pool for atomic coherent allocation\n", + (unsigned)size / 1024); + return -ENOMEM; +} +/* + * CMA is activated by core_initcall, so we must be called after it. + */ +postcore_initcall(coherent_init); + +struct dma_contig_early_reserve { + phys_addr_t base; + unsigned long size; +}; + +static struct dma_contig_early_reserve dma_mmu_remap[MAX_CMA_AREAS] __initdata; + +static int dma_mmu_remap_num __initdata; + +void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) +{ + dma_mmu_remap[dma_mmu_remap_num].base = base; + dma_mmu_remap[dma_mmu_remap_num].size = size; + dma_mmu_remap_num++; +} + +void __init dma_contiguous_remap(void) +{ + int i; + for (i = 0; i < dma_mmu_remap_num; i++) { + phys_addr_t start = dma_mmu_remap[i].base; + phys_addr_t end = start + dma_mmu_remap[i].size; + struct map_desc map; + unsigned long addr; + + if (end > arm_lowmem_limit) + end = arm_lowmem_limit; + if (start >= end) + return; + + map.pfn = __phys_to_pfn(start); + map.virtual = __phys_to_virt(start); + map.length = end - start; + map.type = MT_MEMORY_DMA_READY; + + /* + * Clear previous low-memory mapping + */ + for (addr = __phys_to_virt(start); addr < __phys_to_virt(end); + addr += PMD_SIZE) + pmd_clear(pmd_off_k(addr)); + + iotable_init(&map, 1); + } +} + static void * __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, const void *caller) @@ -222,7 +417,7 @@ __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, int bit; if (!consistent_pte) { - printk(KERN_ERR "%s: not initialised\n", __func__); + pr_err("%s: not initialised\n", __func__); dump_stack(); return NULL; } @@ -249,7 +444,7 @@ __dma_alloc_remap(struct page *page, size_t size, gfp_t gfp, pgprot_t prot, u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); pte = consistent_pte[idx] + off; - c->vm_pages = page; + c->priv = page; do { BUG_ON(!pte_none(*pte)); @@ -281,14 +476,14 @@ static void __dma_free_remap(void *cpu_addr, size_t size) c = arm_vmregion_find_remove(&consistent_head, (unsigned long)cpu_addr); if (!c) { - printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n", + pr_err("%s: trying to free invalid coherent area: %p\n", __func__, cpu_addr); dump_stack(); return; } if ((c->vm_end - c->vm_start) != size) { - printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", + pr_err("%s: freeing wrong coherent size (%ld != %d)\n", __func__, c->vm_end - c->vm_start, size); dump_stack(); size = c->vm_end - c->vm_start; @@ -310,8 +505,8 @@ static void __dma_free_remap(void *cpu_addr, size_t size) } if (pte_none(pte) || !pte_present(pte)) - printk(KERN_CRIT "%s: bad page in kernel page table\n", - __func__); + pr_crit("%s: bad page in kernel page table\n", + __func__); } while (size -= PAGE_SIZE); flush_tlb_kernel_range(c->vm_start, c->vm_end); @@ -319,20 +514,182 @@ static void __dma_free_remap(void *cpu_addr, size_t size) arm_vmregion_free(&consistent_head, c); } +static int __dma_update_pte(pte_t *pte, pgtable_t token, unsigned long addr, + void *data) +{ + struct page *page = virt_to_page(addr); + pgprot_t prot = *(pgprot_t *)data; + + set_pte_ext(pte, mk_pte(page, prot), 0); + return 0; +} + +static void __dma_remap(struct page *page, size_t size, pgprot_t prot) +{ + unsigned long start = (unsigned long) page_address(page); + unsigned end = start + size; + + apply_to_page_range(&init_mm, start, size, __dma_update_pte, &prot); + dsb(); + flush_tlb_kernel_range(start, end); +} + +static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, + pgprot_t prot, struct page **ret_page, + const void *caller) +{ + struct page *page; + void *ptr; + page = __dma_alloc_buffer(dev, size, gfp); + if (!page) + return NULL; + + ptr = __dma_alloc_remap(page, size, gfp, prot, caller); + if (!ptr) { + __dma_free_buffer(page, size); + return NULL; + } + + *ret_page = page; + return ptr; +} + +static void *__alloc_from_pool(struct device *dev, size_t size, + struct page **ret_page, const void *caller) +{ + struct arm_vmregion *c; + size_t align; + + if (!coherent_head.vm_start) { + printk(KERN_ERR "%s: coherent pool not initialised!\n", + __func__); + dump_stack(); + return NULL; + } + + /* + * Align the region allocation - allocations from pool are rather + * small, so align them to their order in pages, minimum is a page + * size. This helps reduce fragmentation of the DMA space. + */ + align = PAGE_SIZE << get_order(size); + c = arm_vmregion_alloc(&coherent_head, align, size, 0, caller); + if (c) { + void *ptr = (void *)c->vm_start; + struct page *page = virt_to_page(ptr); + *ret_page = page; + return ptr; + } + return NULL; +} + +static int __free_from_pool(void *cpu_addr, size_t size) +{ + unsigned long start = (unsigned long)cpu_addr; + unsigned long end = start + size; + struct arm_vmregion *c; + + if (start < coherent_head.vm_start || end > coherent_head.vm_end) + return 0; + + c = arm_vmregion_find_remove(&coherent_head, (unsigned long)start); + + if ((c->vm_end - c->vm_start) != size) { + printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n", + __func__, c->vm_end - c->vm_start, size); + dump_stack(); + size = c->vm_end - c->vm_start; + } + + arm_vmregion_free(&coherent_head, c); + return 1; +} + +static void *__alloc_from_contiguous(struct device *dev, size_t size, + pgprot_t prot, struct page **ret_page) +{ + unsigned long order = get_order(size); + size_t count = size >> PAGE_SHIFT; + struct page *page; + + page = dma_alloc_from_contiguous(dev, count, order); + if (!page) + return NULL; + + __dma_clear_buffer(page, size); + __dma_remap(page, size, prot); + + *ret_page = page; + return page_address(page); +} + +static void __free_from_contiguous(struct device *dev, struct page *page, + size_t size) +{ + __dma_remap(page, size, pgprot_kernel); + dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT); +} + +static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot) +{ + prot = dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs) ? + pgprot_writecombine(prot) : + pgprot_dmacoherent(prot); + return prot; +} + +#define nommu() 0 + #else /* !CONFIG_MMU */ -#define __dma_alloc_remap(page, size, gfp, prot, c) page_address(page) -#define __dma_free_remap(addr, size) do { } while (0) +#define nommu() 1 + +#define __get_dma_pgprot(attrs, prot) __pgprot(0) +#define __alloc_remap_buffer(dev, size, gfp, prot, ret, c) NULL +#define __alloc_from_pool(dev, size, ret_page, c) NULL +#define __alloc_from_contiguous(dev, size, prot, ret) NULL +#define __free_from_pool(cpu_addr, size) 0 +#define __free_from_contiguous(dev, page, size) do { } while (0) +#define __dma_free_remap(cpu_addr, size) do { } while (0) #endif /* CONFIG_MMU */ -static void * -__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, - pgprot_t prot, const void *caller) +static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp, + struct page **ret_page) +{ + struct page *page; + page = __dma_alloc_buffer(dev, size, gfp); + if (!page) + return NULL; + + *ret_page = page; + return page_address(page); +} + + + +static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, + gfp_t gfp, pgprot_t prot, const void *caller) { + u64 mask = get_coherent_dma_mask(dev); struct page *page; void *addr; +#ifdef CONFIG_DMA_API_DEBUG + u64 limit = (mask + 1) & ~mask; + if (limit && size >= limit) { + dev_warn(dev, "coherent allocation too big (requested %#x mask %#llx)\n", + size, mask); + return NULL; + } +#endif + + if (!mask) + return NULL; + + if (mask < 0xffffffffULL) + gfp |= GFP_DMA; + /* * Following is a work-around (a.k.a. hack) to prevent pages * with __GFP_COMP being passed to split_page() which cannot @@ -342,22 +699,20 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, */ gfp &= ~(__GFP_COMP); - *handle = ~0; + *handle = DMA_ERROR_CODE; size = PAGE_ALIGN(size); - page = __dma_alloc_buffer(dev, size, gfp); - if (!page) - return NULL; - - if (!arch_is_coherent()) - addr = __dma_alloc_remap(page, size, gfp, prot, caller); + if (arch_is_coherent() || nommu()) + addr = __alloc_simple_buffer(dev, size, gfp, &page); + else if (cpu_architecture() < CPU_ARCH_ARMv6) + addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller); + else if (gfp & GFP_ATOMIC) + addr = __alloc_from_pool(dev, size, &page, caller); else - addr = page_address(page); + addr = __alloc_from_contiguous(dev, size, prot, &page); if (addr) *handle = pfn_to_dma(dev, page_to_pfn(page)); - else - __dma_free_buffer(page, size); return addr; } @@ -366,138 +721,71 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, * Allocate DMA-coherent memory space and return both the kernel remapped * virtual and bus address for that space. */ -void * -dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp) +void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, + gfp_t gfp, struct dma_attrs *attrs) { + pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel); void *memory; if (dma_alloc_from_coherent(dev, size, handle, &memory)) return memory; - return __dma_alloc(dev, size, handle, gfp, - pgprot_dmacoherent(pgprot_kernel), + return __dma_alloc(dev, size, handle, gfp, prot, __builtin_return_address(0)); } -EXPORT_SYMBOL(dma_alloc_coherent); /* - * Allocate a writecombining region, in much the same way as - * dma_alloc_coherent above. + * Create userspace mapping for the DMA-coherent memory. */ -void * -dma_alloc_writecombine(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp) -{ - return __dma_alloc(dev, size, handle, gfp, - pgprot_writecombine(pgprot_kernel), - __builtin_return_address(0)); -} -EXPORT_SYMBOL(dma_alloc_writecombine); - -static int dma_mmap(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size) +int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + struct dma_attrs *attrs) { int ret = -ENXIO; #ifdef CONFIG_MMU - unsigned long user_size, kern_size; - struct arm_vmregion *c; + unsigned long pfn = dma_to_pfn(dev, dma_addr); + vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); - user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) + return ret; - c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr); - if (c) { - unsigned long off = vma->vm_pgoff; - - kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT; - - if (off < kern_size && - user_size <= (kern_size - off)) { - ret = remap_pfn_range(vma, vma->vm_start, - page_to_pfn(c->vm_pages) + off, - user_size << PAGE_SHIFT, - vma->vm_page_prot); - } - } + ret = remap_pfn_range(vma, vma->vm_start, + pfn + vma->vm_pgoff, + vma->vm_end - vma->vm_start, + vma->vm_page_prot); #endif /* CONFIG_MMU */ return ret; } -int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size) -{ - vma->vm_page_prot = pgprot_dmacoherent(vma->vm_page_prot); - return dma_mmap(dev, vma, cpu_addr, dma_addr, size); -} -EXPORT_SYMBOL(dma_mmap_coherent); - -int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size) -{ - vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); - return dma_mmap(dev, vma, cpu_addr, dma_addr, size); -} -EXPORT_SYMBOL(dma_mmap_writecombine); - /* - * free a page as defined by the above mapping. - * Must not be called with IRQs disabled. + * Free a buffer as defined by the above mapping. */ -void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle) +void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, struct dma_attrs *attrs) { - WARN_ON(irqs_disabled()); + struct page *page = pfn_to_page(dma_to_pfn(dev, handle)); if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) return; size = PAGE_ALIGN(size); - if (!arch_is_coherent()) + if (arch_is_coherent() || nommu()) { + __dma_free_buffer(page, size); + } else if (cpu_architecture() < CPU_ARCH_ARMv6) { __dma_free_remap(cpu_addr, size); - - __dma_free_buffer(pfn_to_page(dma_to_pfn(dev, handle)), size); -} -EXPORT_SYMBOL(dma_free_coherent); - -/* - * Make an area consistent for devices. - * Note: Drivers should NOT use this function directly, as it will break - * platforms with CONFIG_DMABOUNCE. - * Use the driver DMA support - see dma-mapping.h (dma_sync_*) - */ -void ___dma_single_cpu_to_dev(const void *kaddr, size_t size, - enum dma_data_direction dir) -{ - unsigned long paddr; - - BUG_ON(!virt_addr_valid(kaddr) || !virt_addr_valid(kaddr + size - 1)); - - dmac_map_area(kaddr, size, dir); - - paddr = __pa(kaddr); - if (dir == DMA_FROM_DEVICE) { - outer_inv_range(paddr, paddr + size); + __dma_free_buffer(page, size); } else { - outer_clean_range(paddr, paddr + size); - } - /* FIXME: non-speculating: flush on bidirectional mappings? */ -} -EXPORT_SYMBOL(___dma_single_cpu_to_dev); - -void ___dma_single_dev_to_cpu(const void *kaddr, size_t size, - enum dma_data_direction dir) -{ - BUG_ON(!virt_addr_valid(kaddr) || !virt_addr_valid(kaddr + size - 1)); - - /* FIXME: non-speculating: not required */ - /* don't bother invalidating if DMA to device */ - if (dir != DMA_TO_DEVICE) { - unsigned long paddr = __pa(kaddr); - outer_inv_range(paddr, paddr + size); + if (__free_from_pool(cpu_addr, size)) + return; + /* + * Non-atomic allocations cannot be freed with IRQs disabled + */ + WARN_ON(irqs_disabled()); + __free_from_contiguous(dev, page, size); } - - dmac_unmap_area(kaddr, size, dir); } -EXPORT_SYMBOL(___dma_single_dev_to_cpu); static void dma_cache_maint_page(struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, @@ -543,7 +831,13 @@ static void dma_cache_maint_page(struct page *page, unsigned long offset, } while (left); } -void ___dma_page_cpu_to_dev(struct page *page, unsigned long off, +/* + * Make an area consistent for devices. + * Note: Drivers should NOT use this function directly, as it will break + * platforms with CONFIG_DMABOUNCE. + * Use the driver DMA support - see dma-mapping.h (dma_sync_*) + */ +static void __dma_page_cpu_to_dev(struct page *page, unsigned long off, size_t size, enum dma_data_direction dir) { unsigned long paddr; @@ -558,9 +852,8 @@ void ___dma_page_cpu_to_dev(struct page *page, unsigned long off, } /* FIXME: non-speculating: flush on bidirectional mappings? */ } -EXPORT_SYMBOL(___dma_page_cpu_to_dev); -void ___dma_page_dev_to_cpu(struct page *page, unsigned long off, +static void __dma_page_dev_to_cpu(struct page *page, unsigned long off, size_t size, enum dma_data_direction dir) { unsigned long paddr = page_to_phys(page) + off; @@ -578,10 +871,9 @@ void ___dma_page_dev_to_cpu(struct page *page, unsigned long off, if (dir != DMA_TO_DEVICE && off == 0 && size >= PAGE_SIZE) set_bit(PG_dcache_clean, &page->flags); } -EXPORT_SYMBOL(___dma_page_dev_to_cpu); /** - * dma_map_sg - map a set of SG buffers for streaming mode DMA + * arm_dma_map_sg - map a set of SG buffers for streaming mode DMA * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices * @sg: list of buffers * @nents: number of buffers to map @@ -596,32 +888,32 @@ EXPORT_SYMBOL(___dma_page_dev_to_cpu); * Device ownership issues as mentioned for dma_map_single are the same * here. */ -int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir) +int arm_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, struct dma_attrs *attrs) { + struct dma_map_ops *ops = get_dma_ops(dev); struct scatterlist *s; int i, j; - BUG_ON(!valid_dma_direction(dir)); - for_each_sg(sg, s, nents, i) { - s->dma_address = __dma_map_page(dev, sg_page(s), s->offset, - s->length, dir); +#ifdef CONFIG_NEED_SG_DMA_LENGTH + s->dma_length = s->length; +#endif + s->dma_address = ops->map_page(dev, sg_page(s), s->offset, + s->length, dir, attrs); if (dma_mapping_error(dev, s->dma_address)) goto bad_mapping; } - debug_dma_map_sg(dev, sg, nents, nents, dir); return nents; bad_mapping: for_each_sg(sg, s, i, j) - __dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir); + ops->unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, attrs); return 0; } -EXPORT_SYMBOL(dma_map_sg); /** - * dma_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg + * arm_dma_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices * @sg: list of buffers * @nents: number of buffers to unmap (same as was passed to dma_map_sg) @@ -630,70 +922,55 @@ EXPORT_SYMBOL(dma_map_sg); * Unmap a set of streaming mode DMA translations. Again, CPU access * rules concerning calls here are the same as for dma_unmap_single(). */ -void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir) +void arm_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, struct dma_attrs *attrs) { + struct dma_map_ops *ops = get_dma_ops(dev); struct scatterlist *s; - int i; - debug_dma_unmap_sg(dev, sg, nents, dir); + int i; for_each_sg(sg, s, nents, i) - __dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir); + ops->unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, attrs); } -EXPORT_SYMBOL(dma_unmap_sg); /** - * dma_sync_sg_for_cpu + * arm_dma_sync_sg_for_cpu * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices * @sg: list of buffers * @nents: number of buffers to map (returned from dma_map_sg) * @dir: DMA transfer direction (same as was passed to dma_map_sg) */ -void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, +void arm_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir) { + struct dma_map_ops *ops = get_dma_ops(dev); struct scatterlist *s; int i; - for_each_sg(sg, s, nents, i) { - if (!dmabounce_sync_for_cpu(dev, sg_dma_address(s), 0, - sg_dma_len(s), dir)) - continue; - - __dma_page_dev_to_cpu(sg_page(s), s->offset, - s->length, dir); - } - - debug_dma_sync_sg_for_cpu(dev, sg, nents, dir); + for_each_sg(sg, s, nents, i) + ops->sync_single_for_cpu(dev, sg_dma_address(s), s->length, + dir); } -EXPORT_SYMBOL(dma_sync_sg_for_cpu); /** - * dma_sync_sg_for_device + * arm_dma_sync_sg_for_device * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices * @sg: list of buffers * @nents: number of buffers to map (returned from dma_map_sg) * @dir: DMA transfer direction (same as was passed to dma_map_sg) */ -void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, +void arm_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir) { + struct dma_map_ops *ops = get_dma_ops(dev); struct scatterlist *s; int i; - for_each_sg(sg, s, nents, i) { - if (!dmabounce_sync_for_device(dev, sg_dma_address(s), 0, - sg_dma_len(s), dir)) - continue; - - __dma_page_cpu_to_dev(sg_page(s), s->offset, - s->length, dir); - } - - debug_dma_sync_sg_for_device(dev, sg, nents, dir); + for_each_sg(sg, s, nents, i) + ops->sync_single_for_device(dev, sg_dma_address(s), s->length, + dir); } -EXPORT_SYMBOL(dma_sync_sg_for_device); /* * Return whether the given device DMA address mask can be supported @@ -709,18 +986,15 @@ int dma_supported(struct device *dev, u64 mask) } EXPORT_SYMBOL(dma_supported); -int dma_set_mask(struct device *dev, u64 dma_mask) +static int arm_dma_set_mask(struct device *dev, u64 dma_mask) { if (!dev->dma_mask || !dma_supported(dev, dma_mask)) return -EIO; -#ifndef CONFIG_DMABOUNCE *dev->dma_mask = dma_mask; -#endif return 0; } -EXPORT_SYMBOL(dma_set_mask); #define PREALLOC_DMA_DEBUG_ENTRIES 4096 @@ -733,3 +1007,679 @@ static int __init dma_debug_do_init(void) return 0; } fs_initcall(dma_debug_do_init); + +#ifdef CONFIG_ARM_DMA_USE_IOMMU + +/* IOMMU */ + +static inline dma_addr_t __alloc_iova(struct dma_iommu_mapping *mapping, + size_t size) +{ + unsigned int order = get_order(size); + unsigned int align = 0; + unsigned int count, start; + unsigned long flags; + + count = ((PAGE_ALIGN(size) >> PAGE_SHIFT) + + (1 << mapping->order) - 1) >> mapping->order; + + if (order > mapping->order) + align = (1 << (order - mapping->order)) - 1; + + spin_lock_irqsave(&mapping->lock, flags); + start = bitmap_find_next_zero_area(mapping->bitmap, mapping->bits, 0, + count, align); + if (start > mapping->bits) { + spin_unlock_irqrestore(&mapping->lock, flags); + return DMA_ERROR_CODE; + } + + bitmap_set(mapping->bitmap, start, count); + spin_unlock_irqrestore(&mapping->lock, flags); + + return mapping->base + (start << (mapping->order + PAGE_SHIFT)); +} + +static inline void __free_iova(struct dma_iommu_mapping *mapping, + dma_addr_t addr, size_t size) +{ + unsigned int start = (addr - mapping->base) >> + (mapping->order + PAGE_SHIFT); + unsigned int count = ((size >> PAGE_SHIFT) + + (1 << mapping->order) - 1) >> mapping->order; + unsigned long flags; + + spin_lock_irqsave(&mapping->lock, flags); + bitmap_clear(mapping->bitmap, start, count); + spin_unlock_irqrestore(&mapping->lock, flags); +} + +static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, gfp_t gfp) +{ + struct page **pages; + int count = size >> PAGE_SHIFT; + int array_size = count * sizeof(struct page *); + int i = 0; + + if (array_size <= PAGE_SIZE) + pages = kzalloc(array_size, gfp); + else + pages = vzalloc(array_size); + if (!pages) + return NULL; + + while (count) { + int j, order = __ffs(count); + + pages[i] = alloc_pages(gfp | __GFP_NOWARN, order); + while (!pages[i] && order) + pages[i] = alloc_pages(gfp | __GFP_NOWARN, --order); + if (!pages[i]) + goto error; + + if (order) + split_page(pages[i], order); + j = 1 << order; + while (--j) + pages[i + j] = pages[i] + j; + + __dma_clear_buffer(pages[i], PAGE_SIZE << order); + i += 1 << order; + count -= 1 << order; + } + + return pages; +error: + while (--i) + if (pages[i]) + __free_pages(pages[i], 0); + if (array_size < PAGE_SIZE) + kfree(pages); + else + vfree(pages); + return NULL; +} + +static int __iommu_free_buffer(struct device *dev, struct page **pages, size_t size) +{ + int count = size >> PAGE_SHIFT; + int array_size = count * sizeof(struct page *); + int i; + for (i = 0; i < count; i++) + if (pages[i]) + __free_pages(pages[i], 0); + if (array_size < PAGE_SIZE) + kfree(pages); + else + vfree(pages); + return 0; +} + +/* + * Create a CPU mapping for a specified pages + */ +static void * +__iommu_alloc_remap(struct page **pages, size_t size, gfp_t gfp, pgprot_t prot) +{ + struct arm_vmregion *c; + size_t align; + size_t count = size >> PAGE_SHIFT; + int bit; + + if (!consistent_pte[0]) { + pr_err("%s: not initialised\n", __func__); + dump_stack(); + return NULL; + } + + /* + * Align the virtual region allocation - maximum alignment is + * a section size, minimum is a page size. This helps reduce + * fragmentation of the DMA space, and also prevents allocations + * smaller than a section from crossing a section boundary. + */ + bit = fls(size - 1); + if (bit > SECTION_SHIFT) + bit = SECTION_SHIFT; + align = 1 << bit; + + /* + * Allocate a virtual address in the consistent mapping region. + */ + c = arm_vmregion_alloc(&consistent_head, align, size, + gfp & ~(__GFP_DMA | __GFP_HIGHMEM), NULL); + if (c) { + pte_t *pte; + int idx = CONSISTENT_PTE_INDEX(c->vm_start); + int i = 0; + u32 off = CONSISTENT_OFFSET(c->vm_start) & (PTRS_PER_PTE-1); + + pte = consistent_pte[idx] + off; + c->priv = pages; + + do { + BUG_ON(!pte_none(*pte)); + + set_pte_ext(pte, mk_pte(pages[i], prot), 0); + pte++; + off++; + i++; + if (off >= PTRS_PER_PTE) { + off = 0; + pte = consistent_pte[++idx]; + } + } while (i < count); + + dsb(); + + return (void *)c->vm_start; + } + return NULL; +} + +/* + * Create a mapping in device IO address space for specified pages + */ +static dma_addr_t +__iommu_create_mapping(struct device *dev, struct page **pages, size_t size) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + dma_addr_t dma_addr, iova; + int i, ret = DMA_ERROR_CODE; + + dma_addr = __alloc_iova(mapping, size); + if (dma_addr == DMA_ERROR_CODE) + return dma_addr; + + iova = dma_addr; + for (i = 0; i < count; ) { + unsigned int next_pfn = page_to_pfn(pages[i]) + 1; + phys_addr_t phys = page_to_phys(pages[i]); + unsigned int len, j; + + for (j = i + 1; j < count; j++, next_pfn++) + if (page_to_pfn(pages[j]) != next_pfn) + break; + + len = (j - i) << PAGE_SHIFT; + ret = iommu_map(mapping->domain, iova, phys, len, 0); + if (ret < 0) + goto fail; + iova += len; + i = j; + } + return dma_addr; +fail: + iommu_unmap(mapping->domain, dma_addr, iova-dma_addr); + __free_iova(mapping, dma_addr, size); + return DMA_ERROR_CODE; +} + +static int __iommu_remove_mapping(struct device *dev, dma_addr_t iova, size_t size) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + + /* + * add optional in-page offset from iova to size and align + * result to page size + */ + size = PAGE_ALIGN((iova & ~PAGE_MASK) + size); + iova &= PAGE_MASK; + + iommu_unmap(mapping->domain, iova, size); + __free_iova(mapping, iova, size); + return 0; +} + +static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, + dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) +{ + pgprot_t prot = __get_dma_pgprot(attrs, pgprot_kernel); + struct page **pages; + void *addr = NULL; + + *handle = DMA_ERROR_CODE; + size = PAGE_ALIGN(size); + + pages = __iommu_alloc_buffer(dev, size, gfp); + if (!pages) + return NULL; + + *handle = __iommu_create_mapping(dev, pages, size); + if (*handle == DMA_ERROR_CODE) + goto err_buffer; + + addr = __iommu_alloc_remap(pages, size, gfp, prot); + if (!addr) + goto err_mapping; + + return addr; + +err_mapping: + __iommu_remove_mapping(dev, *handle, size); +err_buffer: + __iommu_free_buffer(dev, pages, size); + return NULL; +} + +static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + struct dma_attrs *attrs) +{ + struct arm_vmregion *c; + + vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); + c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr); + + if (c) { + struct page **pages = c->priv; + + unsigned long uaddr = vma->vm_start; + unsigned long usize = vma->vm_end - vma->vm_start; + int i = 0; + + do { + int ret; + + ret = vm_insert_page(vma, uaddr, pages[i++]); + if (ret) { + pr_err("Remapping memory, error: %d\n", ret); + return ret; + } + + uaddr += PAGE_SIZE; + usize -= PAGE_SIZE; + } while (usize > 0); + } + return 0; +} + +/* + * free a page as defined by the above mapping. + * Must not be called with IRQs disabled. + */ +void arm_iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, struct dma_attrs *attrs) +{ + struct arm_vmregion *c; + size = PAGE_ALIGN(size); + + c = arm_vmregion_find(&consistent_head, (unsigned long)cpu_addr); + if (c) { + struct page **pages = c->priv; + __dma_free_remap(cpu_addr, size); + __iommu_remove_mapping(dev, handle, size); + __iommu_free_buffer(dev, pages, size); + } +} + +/* + * Map a part of the scatter-gather list into contiguous io address space + */ +static int __map_sg_chunk(struct device *dev, struct scatterlist *sg, + size_t size, dma_addr_t *handle, + enum dma_data_direction dir) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + dma_addr_t iova, iova_base; + int ret = 0; + unsigned int count; + struct scatterlist *s; + + size = PAGE_ALIGN(size); + *handle = DMA_ERROR_CODE; + + iova_base = iova = __alloc_iova(mapping, size); + if (iova == DMA_ERROR_CODE) + return -ENOMEM; + + for (count = 0, s = sg; count < (size >> PAGE_SHIFT); s = sg_next(s)) { + phys_addr_t phys = page_to_phys(sg_page(s)); + unsigned int len = PAGE_ALIGN(s->offset + s->length); + + if (!arch_is_coherent()) + __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); + + ret = iommu_map(mapping->domain, iova, phys, len, 0); + if (ret < 0) + goto fail; + count += len >> PAGE_SHIFT; + iova += len; + } + *handle = iova_base; + + return 0; +fail: + iommu_unmap(mapping->domain, iova_base, count * PAGE_SIZE); + __free_iova(mapping, iova_base, size); + return ret; +} + +/** + * arm_iommu_map_sg - map a set of SG buffers for streaming mode DMA + * @dev: valid struct device pointer + * @sg: list of buffers + * @nents: number of buffers to map + * @dir: DMA transfer direction + * + * Map a set of buffers described by scatterlist in streaming mode for DMA. + * The scatter gather list elements are merged together (if possible) and + * tagged with the appropriate dma address and length. They are obtained via + * sg_dma_{address,length}. + */ +int arm_iommu_map_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, struct dma_attrs *attrs) +{ + struct scatterlist *s = sg, *dma = sg, *start = sg; + int i, count = 0; + unsigned int offset = s->offset; + unsigned int size = s->offset + s->length; + unsigned int max = dma_get_max_seg_size(dev); + + for (i = 1; i < nents; i++) { + s = sg_next(s); + + s->dma_address = DMA_ERROR_CODE; + s->dma_length = 0; + + if (s->offset || (size & ~PAGE_MASK) || size + s->length > max) { + if (__map_sg_chunk(dev, start, size, &dma->dma_address, + dir) < 0) + goto bad_mapping; + + dma->dma_address += offset; + dma->dma_length = size - offset; + + size = offset = s->offset; + start = s; + dma = sg_next(dma); + count += 1; + } + size += s->length; + } + if (__map_sg_chunk(dev, start, size, &dma->dma_address, dir) < 0) + goto bad_mapping; + + dma->dma_address += offset; + dma->dma_length = size - offset; + + return count+1; + +bad_mapping: + for_each_sg(sg, s, count, i) + __iommu_remove_mapping(dev, sg_dma_address(s), sg_dma_len(s)); + return 0; +} + +/** + * arm_iommu_unmap_sg - unmap a set of SG buffers mapped by dma_map_sg + * @dev: valid struct device pointer + * @sg: list of buffers + * @nents: number of buffers to unmap (same as was passed to dma_map_sg) + * @dir: DMA transfer direction (same as was passed to dma_map_sg) + * + * Unmap a set of streaming mode DMA translations. Again, CPU access + * rules concerning calls here are the same as for dma_unmap_single(). + */ +void arm_iommu_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, struct dma_attrs *attrs) +{ + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nents, i) { + if (sg_dma_len(s)) + __iommu_remove_mapping(dev, sg_dma_address(s), + sg_dma_len(s)); + if (!arch_is_coherent()) + __dma_page_dev_to_cpu(sg_page(s), s->offset, + s->length, dir); + } +} + +/** + * arm_iommu_sync_sg_for_cpu + * @dev: valid struct device pointer + * @sg: list of buffers + * @nents: number of buffers to map (returned from dma_map_sg) + * @dir: DMA transfer direction (same as was passed to dma_map_sg) + */ +void arm_iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir) +{ + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nents, i) + if (!arch_is_coherent()) + __dma_page_dev_to_cpu(sg_page(s), s->offset, s->length, dir); + +} + +/** + * arm_iommu_sync_sg_for_device + * @dev: valid struct device pointer + * @sg: list of buffers + * @nents: number of buffers to map (returned from dma_map_sg) + * @dir: DMA transfer direction (same as was passed to dma_map_sg) + */ +void arm_iommu_sync_sg_for_device(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir) +{ + struct scatterlist *s; + int i; + + for_each_sg(sg, s, nents, i) + if (!arch_is_coherent()) + __dma_page_cpu_to_dev(sg_page(s), s->offset, s->length, dir); +} + + +/** + * arm_iommu_map_page + * @dev: valid struct device pointer + * @page: page that buffer resides in + * @offset: offset into page for start of buffer + * @size: size of buffer to map + * @dir: DMA transfer direction + * + * IOMMU aware version of arm_dma_map_page() + */ +static dma_addr_t arm_iommu_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + dma_addr_t dma_addr; + int ret, len = PAGE_ALIGN(size + offset); + + if (!arch_is_coherent()) + __dma_page_cpu_to_dev(page, offset, size, dir); + + dma_addr = __alloc_iova(mapping, len); + if (dma_addr == DMA_ERROR_CODE) + return dma_addr; + + ret = iommu_map(mapping->domain, dma_addr, page_to_phys(page), len, 0); + if (ret < 0) + goto fail; + + return dma_addr + offset; +fail: + __free_iova(mapping, dma_addr, len); + return DMA_ERROR_CODE; +} + +/** + * arm_iommu_unmap_page + * @dev: valid struct device pointer + * @handle: DMA address of buffer + * @size: size of buffer (same as passed to dma_map_page) + * @dir: DMA transfer direction (same as passed to dma_map_page) + * + * IOMMU aware version of arm_dma_unmap_page() + */ +static void arm_iommu_unmap_page(struct device *dev, dma_addr_t handle, + size_t size, enum dma_data_direction dir, + struct dma_attrs *attrs) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + dma_addr_t iova = handle & PAGE_MASK; + struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); + int offset = handle & ~PAGE_MASK; + int len = PAGE_ALIGN(size + offset); + + if (!iova) + return; + + if (!arch_is_coherent()) + __dma_page_dev_to_cpu(page, offset, size, dir); + + iommu_unmap(mapping->domain, iova, len); + __free_iova(mapping, iova, len); +} + +static void arm_iommu_sync_single_for_cpu(struct device *dev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + dma_addr_t iova = handle & PAGE_MASK; + struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); + unsigned int offset = handle & ~PAGE_MASK; + + if (!iova) + return; + + if (!arch_is_coherent()) + __dma_page_dev_to_cpu(page, offset, size, dir); +} + +static void arm_iommu_sync_single_for_device(struct device *dev, + dma_addr_t handle, size_t size, enum dma_data_direction dir) +{ + struct dma_iommu_mapping *mapping = dev->archdata.mapping; + dma_addr_t iova = handle & PAGE_MASK; + struct page *page = phys_to_page(iommu_iova_to_phys(mapping->domain, iova)); + unsigned int offset = handle & ~PAGE_MASK; + + if (!iova) + return; + + __dma_page_cpu_to_dev(page, offset, size, dir); +} + +struct dma_map_ops iommu_ops = { + .alloc = arm_iommu_alloc_attrs, + .free = arm_iommu_free_attrs, + .mmap = arm_iommu_mmap_attrs, + + .map_page = arm_iommu_map_page, + .unmap_page = arm_iommu_unmap_page, + .sync_single_for_cpu = arm_iommu_sync_single_for_cpu, + .sync_single_for_device = arm_iommu_sync_single_for_device, + + .map_sg = arm_iommu_map_sg, + .unmap_sg = arm_iommu_unmap_sg, + .sync_sg_for_cpu = arm_iommu_sync_sg_for_cpu, + .sync_sg_for_device = arm_iommu_sync_sg_for_device, +}; + +/** + * arm_iommu_create_mapping + * @bus: pointer to the bus holding the client device (for IOMMU calls) + * @base: start address of the valid IO address space + * @size: size of the valid IO address space + * @order: accuracy of the IO addresses allocations + * + * Creates a mapping structure which holds information about used/unused + * IO address ranges, which is required to perform memory allocation and + * mapping with IOMMU aware functions. + * + * The client device need to be attached to the mapping with + * arm_iommu_attach_device function. + */ +struct dma_iommu_mapping * +arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size, + int order) +{ + unsigned int count = size >> (PAGE_SHIFT + order); + unsigned int bitmap_size = BITS_TO_LONGS(count) * sizeof(long); + struct dma_iommu_mapping *mapping; + int err = -ENOMEM; + + if (!count) + return ERR_PTR(-EINVAL); + + mapping = kzalloc(sizeof(struct dma_iommu_mapping), GFP_KERNEL); + if (!mapping) + goto err; + + mapping->bitmap = kzalloc(bitmap_size, GFP_KERNEL); + if (!mapping->bitmap) + goto err2; + + mapping->base = base; + mapping->bits = BITS_PER_BYTE * bitmap_size; + mapping->order = order; + spin_lock_init(&mapping->lock); + + mapping->domain = iommu_domain_alloc(bus); + if (!mapping->domain) + goto err3; + + kref_init(&mapping->kref); + return mapping; +err3: + kfree(mapping->bitmap); +err2: + kfree(mapping); +err: + return ERR_PTR(err); +} + +static void release_iommu_mapping(struct kref *kref) +{ + struct dma_iommu_mapping *mapping = + container_of(kref, struct dma_iommu_mapping, kref); + + iommu_domain_free(mapping->domain); + kfree(mapping->bitmap); + kfree(mapping); +} + +void arm_iommu_release_mapping(struct dma_iommu_mapping *mapping) +{ + if (mapping) + kref_put(&mapping->kref, release_iommu_mapping); +} + +/** + * arm_iommu_attach_device + * @dev: valid struct device pointer + * @mapping: io address space mapping structure (returned from + * arm_iommu_create_mapping) + * + * Attaches specified io address space mapping to the provided device, + * this replaces the dma operations (dma_map_ops pointer) with the + * IOMMU aware version. More than one client might be attached to + * the same io address space mapping. + */ +int arm_iommu_attach_device(struct device *dev, + struct dma_iommu_mapping *mapping) +{ + int err; + + err = iommu_attach_device(mapping->domain, dev); + if (err) + return err; + + kref_get(&mapping->kref); + dev->archdata.mapping = mapping; + set_dma_ops(dev, &iommu_ops); + + pr_info("Attached IOMMU controller to %s device.\n", dev_name(dev)); + return 0; +} + +#endif diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 8f5813bbffb5..c21d06c7dd7e 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -20,6 +20,7 @@ #include <linux/highmem.h> #include <linux/gfp.h> #include <linux/memblock.h> +#include <linux/dma-contiguous.h> #include <asm/mach-types.h> #include <asm/memblock.h> @@ -226,6 +227,17 @@ static void __init arm_adjust_dma_zone(unsigned long *size, unsigned long *hole, } #endif +void __init setup_dma_zone(struct machine_desc *mdesc) +{ +#ifdef CONFIG_ZONE_DMA + if (mdesc->dma_zone_size) { + arm_dma_zone_size = mdesc->dma_zone_size; + arm_dma_limit = PHYS_OFFSET + arm_dma_zone_size - 1; + } else + arm_dma_limit = 0xffffffff; +#endif +} + static void __init arm_bootmem_free(unsigned long min, unsigned long max_low, unsigned long max_high) { @@ -273,12 +285,9 @@ static void __init arm_bootmem_free(unsigned long min, unsigned long max_low, * Adjust the sizes according to any special requirements for * this machine type. */ - if (arm_dma_zone_size) { + if (arm_dma_zone_size) arm_adjust_dma_zone(zone_size, zhole_size, arm_dma_zone_size >> PAGE_SHIFT); - arm_dma_limit = PHYS_OFFSET + arm_dma_zone_size - 1; - } else - arm_dma_limit = 0xffffffff; #endif free_area_init_node(0, zone_size, min, zhole_size); @@ -364,6 +373,12 @@ void __init arm_memblock_init(struct meminfo *mi, struct machine_desc *mdesc) if (mdesc->reserve) mdesc->reserve(); + /* + * reserve memory for DMA contigouos allocations, + * must come from DMA area inside low memory + */ + dma_contiguous_reserve(min(arm_dma_limit, arm_lowmem_limit)); + arm_memblock_steal_permitted = false; memblock_allow_resize(); memblock_dump_all(); diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h index 27f4a619b35d..93dc0c17cdcb 100644 --- a/arch/arm/mm/mm.h +++ b/arch/arm/mm/mm.h @@ -67,5 +67,8 @@ extern u32 arm_dma_limit; #define arm_dma_limit ((u32)~0) #endif +extern phys_addr_t arm_lowmem_limit; + void __init bootmem_init(void); void arm_mm_memblock_reserve(void); +void dma_contiguous_remap(void); diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index aa78de8bfdd3..e5dad60b558b 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -288,6 +288,11 @@ static struct mem_type mem_types[] = { PMD_SECT_UNCACHED | PMD_SECT_XN, .domain = DOMAIN_KERNEL, }, + [MT_MEMORY_DMA_READY] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY, + .prot_l1 = PMD_TYPE_TABLE, + .domain = DOMAIN_KERNEL, + }, }; const struct mem_type *get_mem_type(unsigned int type) @@ -429,6 +434,7 @@ static void __init build_mem_type_table(void) if (arch_is_coherent() && cpu_is_xsc3()) { mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; + mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED; mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED; } @@ -460,6 +466,7 @@ static void __init build_mem_type_table(void) mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED; mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; + mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED; mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED; } @@ -512,6 +519,7 @@ static void __init build_mem_type_table(void) mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask; mem_types[MT_MEMORY].prot_sect |= ecc_mask | cp->pmd; mem_types[MT_MEMORY].prot_pte |= kern_pgprot; + mem_types[MT_MEMORY_DMA_READY].prot_pte |= kern_pgprot; mem_types[MT_MEMORY_NONCACHED].prot_sect |= ecc_mask; mem_types[MT_ROM].prot_sect |= cp->pmd; @@ -596,7 +604,7 @@ static void __init alloc_init_section(pud_t *pud, unsigned long addr, * L1 entries, whereas PGDs refer to a group of L1 entries making * up one logical pointer to an L2 table. */ - if (((addr | end | phys) & ~SECTION_MASK) == 0) { + if (type->prot_sect && ((addr | end | phys) & ~SECTION_MASK) == 0) { pmd_t *p = pmd; #ifndef CONFIG_ARM_LPAE @@ -814,7 +822,7 @@ static int __init early_vmalloc(char *arg) } early_param("vmalloc", early_vmalloc); -static phys_addr_t lowmem_limit __initdata = 0; +phys_addr_t arm_lowmem_limit __initdata = 0; void __init sanity_check_meminfo(void) { @@ -897,8 +905,8 @@ void __init sanity_check_meminfo(void) bank->size = newsize; } #endif - if (!bank->highmem && bank->start + bank->size > lowmem_limit) - lowmem_limit = bank->start + bank->size; + if (!bank->highmem && bank->start + bank->size > arm_lowmem_limit) + arm_lowmem_limit = bank->start + bank->size; j++; } @@ -923,8 +931,8 @@ void __init sanity_check_meminfo(void) } #endif meminfo.nr_banks = j; - high_memory = __va(lowmem_limit - 1) + 1; - memblock_set_current_limit(lowmem_limit); + high_memory = __va(arm_lowmem_limit - 1) + 1; + memblock_set_current_limit(arm_lowmem_limit); } static inline void prepare_page_table(void) @@ -949,8 +957,8 @@ static inline void prepare_page_table(void) * Find the end of the first block of lowmem. */ end = memblock.memory.regions[0].base + memblock.memory.regions[0].size; - if (end >= lowmem_limit) - end = lowmem_limit; + if (end >= arm_lowmem_limit) + end = arm_lowmem_limit; /* * Clear out all the kernel space mappings, except for the first @@ -1093,8 +1101,8 @@ static void __init map_lowmem(void) phys_addr_t end = start + reg->size; struct map_desc map; - if (end > lowmem_limit) - end = lowmem_limit; + if (end > arm_lowmem_limit) + end = arm_lowmem_limit; if (start >= end) break; @@ -1115,11 +1123,12 @@ void __init paging_init(struct machine_desc *mdesc) { void *zero_page; - memblock_set_current_limit(lowmem_limit); + memblock_set_current_limit(arm_lowmem_limit); build_mem_type_table(); prepare_page_table(); map_lowmem(); + dma_contiguous_remap(); devicemaps_init(mdesc); kmap_init(); diff --git a/arch/arm/mm/vmregion.h b/arch/arm/mm/vmregion.h index 162be662c088..bf312c354a21 100644 --- a/arch/arm/mm/vmregion.h +++ b/arch/arm/mm/vmregion.h @@ -17,7 +17,7 @@ struct arm_vmregion { struct list_head vm_list; unsigned long vm_start; unsigned long vm_end; - struct page *vm_pages; + void *priv; int vm_active; const void *caller; }; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 66cc380bebf0..81c3e8be789a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -32,6 +32,7 @@ config X86 select ARCH_WANT_OPTIONAL_GPIOLIB select ARCH_WANT_FRAME_POINTERS select HAVE_DMA_ATTRS + select HAVE_DMA_CONTIGUOUS if !SWIOTLB select HAVE_KRETPROBES select HAVE_OPTPROBES select HAVE_FTRACE_MCOUNT_RECORD diff --git a/arch/x86/include/asm/dma-contiguous.h b/arch/x86/include/asm/dma-contiguous.h new file mode 100644 index 000000000000..c09241659971 --- /dev/null +++ b/arch/x86/include/asm/dma-contiguous.h @@ -0,0 +1,13 @@ +#ifndef ASMX86_DMA_CONTIGUOUS_H +#define ASMX86_DMA_CONTIGUOUS_H + +#ifdef __KERNEL__ + +#include <linux/types.h> +#include <asm-generic/dma-contiguous.h> + +static inline void +dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) { } + +#endif +#endif diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 61c0bd25845a..f7b4c7903e7e 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -13,6 +13,7 @@ #include <asm/io.h> #include <asm/swiotlb.h> #include <asm-generic/dma-coherent.h> +#include <linux/dma-contiguous.h> #ifdef CONFIG_ISA # define ISA_DMA_BIT_MASK DMA_BIT_MASK(24) @@ -62,6 +63,10 @@ extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, gfp_t flag, struct dma_attrs *attrs); +extern void dma_generic_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_addr, + struct dma_attrs *attrs); + #ifdef CONFIG_X86_DMA_REMAP /* Platform code defines bridge-specific code */ extern bool dma_capable(struct device *dev, dma_addr_t addr, size_t size); extern dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr); diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 3003250ac51d..62c9457ccd2f 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -100,14 +100,18 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size, struct dma_attrs *attrs) { unsigned long dma_mask; - struct page *page; + struct page *page = NULL; + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; dma_addr_t addr; dma_mask = dma_alloc_coherent_mask(dev, flag); flag |= __GFP_ZERO; again: - page = alloc_pages_node(dev_to_node(dev), flag, get_order(size)); + if (!(flag & GFP_ATOMIC)) + page = dma_alloc_from_contiguous(dev, count, get_order(size)); + if (!page) + page = alloc_pages_node(dev_to_node(dev), flag, get_order(size)); if (!page) return NULL; @@ -127,6 +131,16 @@ again: return page_address(page); } +void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_addr, struct dma_attrs *attrs) +{ + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + struct page *page = virt_to_page(vaddr); + + if (!dma_release_from_contiguous(dev, page, count)) + free_pages((unsigned long)vaddr, get_order(size)); +} + /* * See <Documentation/x86/x86_64/boot-options.txt> for the iommu kernel * parameter documentation. diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index f96050685b46..871be4a84c7d 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -74,12 +74,6 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, return nents; } -static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_addr, struct dma_attrs *attrs) -{ - free_pages((unsigned long)vaddr, get_order(size)); -} - static void nommu_sync_single_for_device(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) @@ -97,7 +91,7 @@ static void nommu_sync_sg_for_device(struct device *dev, struct dma_map_ops nommu_dma_ops = { .alloc = dma_generic_alloc_coherent, - .free = nommu_free_coherent, + .free = dma_generic_free_coherent, .map_sg = nommu_map_sg, .map_page = nommu_map_page, .sync_single_for_device = nommu_sync_single_for_device, diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 366c688d619e..f2afee6a19c1 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -49,6 +49,7 @@ #include <asm/pci-direct.h> #include <linux/init_ohci1394_dma.h> #include <linux/kvm_para.h> +#include <linux/dma-contiguous.h> #include <linux/errno.h> #include <linux/kernel.h> @@ -925,6 +926,7 @@ void __init setup_arch(char **cmdline_p) } #endif memblock.current_limit = get_max_mapped(); + dma_contiguous_reserve(0); /* * NOTE: On x86-32, only from this point on, fixmaps are ready for use. diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig index 9aa618acfe97..9b21469482ae 100644 --- a/drivers/base/Kconfig +++ b/drivers/base/Kconfig @@ -192,4 +192,93 @@ config DMA_SHARED_BUFFER APIs extension; the file's descriptor can then be passed on to other driver. +config CMA + bool "Contiguous Memory Allocator (EXPERIMENTAL)" + depends on HAVE_DMA_CONTIGUOUS && HAVE_MEMBLOCK && EXPERIMENTAL + select MIGRATION + help + This enables the Contiguous Memory Allocator which allows drivers + to allocate big physically-contiguous blocks of memory for use with + hardware components that do not support I/O map nor scatter-gather. + + For more information see <include/linux/dma-contiguous.h>. + If unsure, say "n". + +if CMA + +config CMA_DEBUG + bool "CMA debug messages (DEVELOPMENT)" + depends on DEBUG_KERNEL + help + Turns on debug messages in CMA. This produces KERN_DEBUG + messages for every CMA call as well as various messages while + processing calls such as dma_alloc_from_contiguous(). + This option does not affect warning and error messages. + +comment "Default contiguous memory area size:" + +config CMA_SIZE_MBYTES + int "Size in Mega Bytes" + depends on !CMA_SIZE_SEL_PERCENTAGE + default 16 + help + Defines the size (in MiB) of the default memory area for Contiguous + Memory Allocator. + +config CMA_SIZE_PERCENTAGE + int "Percentage of total memory" + depends on !CMA_SIZE_SEL_MBYTES + default 10 + help + Defines the size of the default memory area for Contiguous Memory + Allocator as a percentage of the total memory in the system. + +choice + prompt "Selected region size" + default CMA_SIZE_SEL_ABSOLUTE + +config CMA_SIZE_SEL_MBYTES + bool "Use mega bytes value only" + +config CMA_SIZE_SEL_PERCENTAGE + bool "Use percentage value only" + +config CMA_SIZE_SEL_MIN + bool "Use lower value (minimum)" + +config CMA_SIZE_SEL_MAX + bool "Use higher value (maximum)" + +endchoice + +config CMA_ALIGNMENT + int "Maximum PAGE_SIZE order of alignment for contiguous buffers" + range 4 9 + default 8 + help + DMA mapping framework by default aligns all buffers to the smallest + PAGE_SIZE order which is greater than or equal to the requested buffer + size. This works well for buffers up to a few hundreds kilobytes, but + for larger buffers it just a memory waste. With this parameter you can + specify the maximum PAGE_SIZE order for contiguous buffers. Larger + buffers will be aligned only to this specified order. The order is + expressed as a power of two multiplied by the PAGE_SIZE. + + For example, if your system defaults to 4KiB pages, the order value + of 8 means that the buffers will be aligned up to 1MiB only. + + If unsure, leave the default value "8". + +config CMA_AREAS + int "Maximum count of the CMA device-private areas" + default 7 + help + CMA allows to create CMA areas for particular devices. This parameter + sets the maximum number of such device private CMA areas in the + system. + + If unsure, leave the default value "7". + +endif + endmenu diff --git a/drivers/base/Makefile b/drivers/base/Makefile index b6d1b9c4200c..5aa2d703d19f 100644 --- a/drivers/base/Makefile +++ b/drivers/base/Makefile @@ -6,6 +6,7 @@ obj-y := core.o bus.o dd.o syscore.o \ attribute_container.o transport_class.o \ topology.o obj-$(CONFIG_DEVTMPFS) += devtmpfs.o +obj-$(CONFIG_CMA) += dma-contiguous.o obj-y += power/ obj-$(CONFIG_HAS_DMA) += dma-mapping.o obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o diff --git a/drivers/base/dma-coherent.c b/drivers/base/dma-coherent.c index bb0025c510b3..1b85949e3d2f 100644 --- a/drivers/base/dma-coherent.c +++ b/drivers/base/dma-coherent.c @@ -10,6 +10,7 @@ struct dma_coherent_mem { void *virt_base; dma_addr_t device_base; + phys_addr_t pfn_base; int size; int flags; unsigned long *bitmap; @@ -44,6 +45,7 @@ int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, dev->dma_mem->virt_base = mem_base; dev->dma_mem->device_base = device_addr; + dev->dma_mem->pfn_base = PFN_DOWN(bus_addr); dev->dma_mem->size = pages; dev->dma_mem->flags = flags; @@ -176,3 +178,43 @@ int dma_release_from_coherent(struct device *dev, int order, void *vaddr) return 0; } EXPORT_SYMBOL(dma_release_from_coherent); + +/** + * dma_mmap_from_coherent() - try to mmap the memory allocated from + * per-device coherent memory pool to userspace + * @dev: device from which the memory was allocated + * @vma: vm_area for the userspace memory + * @vaddr: cpu address returned by dma_alloc_from_coherent + * @size: size of the memory buffer allocated by dma_alloc_from_coherent + * + * This checks whether the memory was allocated from the per-device + * coherent memory pool and if so, maps that memory to the provided vma. + * + * Returns 1 if we correctly mapped the memory, or 0 if + * dma_release_coherent() should proceed with mapping memory from + * generic pools. + */ +int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma, + void *vaddr, size_t size, int *ret) +{ + struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; + + if (mem && vaddr >= mem->virt_base && vaddr + size <= + (mem->virt_base + (mem->size << PAGE_SHIFT))) { + unsigned long off = vma->vm_pgoff; + int start = (vaddr - mem->virt_base) >> PAGE_SHIFT; + int user_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + int count = size >> PAGE_SHIFT; + + *ret = -ENXIO; + if (off < count && user_count <= count - off) { + unsigned pfn = mem->pfn_base + start + off; + *ret = remap_pfn_range(vma, vma->vm_start, pfn, + user_count << PAGE_SHIFT, + vma->vm_page_prot); + } + return 1; + } + return 0; +} +EXPORT_SYMBOL(dma_mmap_from_coherent); diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c new file mode 100644 index 000000000000..78efb0306a44 --- /dev/null +++ b/drivers/base/dma-contiguous.c @@ -0,0 +1,401 @@ +/* + * Contiguous Memory Allocator for DMA mapping framework + * Copyright (c) 2010-2011 by Samsung Electronics. + * Written by: + * Marek Szyprowski <m.szyprowski@samsung.com> + * Michal Nazarewicz <mina86@mina86.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License or (at your optional) any later version of the license. + */ + +#define pr_fmt(fmt) "cma: " fmt + +#ifdef CONFIG_CMA_DEBUG +#ifndef DEBUG +# define DEBUG +#endif +#endif + +#include <asm/page.h> +#include <asm/dma-contiguous.h> + +#include <linux/memblock.h> +#include <linux/err.h> +#include <linux/mm.h> +#include <linux/mutex.h> +#include <linux/page-isolation.h> +#include <linux/slab.h> +#include <linux/swap.h> +#include <linux/mm_types.h> +#include <linux/dma-contiguous.h> + +#ifndef SZ_1M +#define SZ_1M (1 << 20) +#endif + +struct cma { + unsigned long base_pfn; + unsigned long count; + unsigned long *bitmap; +}; + +struct cma *dma_contiguous_default_area; + +#ifdef CONFIG_CMA_SIZE_MBYTES +#define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES +#else +#define CMA_SIZE_MBYTES 0 +#endif + +/* + * Default global CMA area size can be defined in kernel's .config. + * This is usefull mainly for distro maintainers to create a kernel + * that works correctly for most supported systems. + * The size can be set in bytes or as a percentage of the total memory + * in the system. + * + * Users, who want to set the size of global CMA area for their system + * should use cma= kernel parameter. + */ +static const unsigned long size_bytes = CMA_SIZE_MBYTES * SZ_1M; +static long size_cmdline = -1; + +static int __init early_cma(char *p) +{ + pr_debug("%s(%s)\n", __func__, p); + size_cmdline = memparse(p, &p); + return 0; +} +early_param("cma", early_cma); + +#ifdef CONFIG_CMA_SIZE_PERCENTAGE + +static unsigned long __init __maybe_unused cma_early_percent_memory(void) +{ + struct memblock_region *reg; + unsigned long total_pages = 0; + + /* + * We cannot use memblock_phys_mem_size() here, because + * memblock_analyze() has not been called yet. + */ + for_each_memblock(memory, reg) + total_pages += memblock_region_memory_end_pfn(reg) - + memblock_region_memory_base_pfn(reg); + + return (total_pages * CONFIG_CMA_SIZE_PERCENTAGE / 100) << PAGE_SHIFT; +} + +#else + +static inline __maybe_unused unsigned long cma_early_percent_memory(void) +{ + return 0; +} + +#endif + +/** + * dma_contiguous_reserve() - reserve area for contiguous memory handling + * @limit: End address of the reserved memory (optional, 0 for any). + * + * This function reserves memory from early allocator. It should be + * called by arch specific code once the early allocator (memblock or bootmem) + * has been activated and all other subsystems have already allocated/reserved + * memory. + */ +void __init dma_contiguous_reserve(phys_addr_t limit) +{ + unsigned long selected_size = 0; + + pr_debug("%s(limit %08lx)\n", __func__, (unsigned long)limit); + + if (size_cmdline != -1) { + selected_size = size_cmdline; + } else { +#ifdef CONFIG_CMA_SIZE_SEL_MBYTES + selected_size = size_bytes; +#elif defined(CONFIG_CMA_SIZE_SEL_PERCENTAGE) + selected_size = cma_early_percent_memory(); +#elif defined(CONFIG_CMA_SIZE_SEL_MIN) + selected_size = min(size_bytes, cma_early_percent_memory()); +#elif defined(CONFIG_CMA_SIZE_SEL_MAX) + selected_size = max(size_bytes, cma_early_percent_memory()); +#endif + } + + if (selected_size) { + pr_debug("%s: reserving %ld MiB for global area\n", __func__, + selected_size / SZ_1M); + + dma_declare_contiguous(NULL, selected_size, 0, limit); + } +}; + +static DEFINE_MUTEX(cma_mutex); + +static __init int cma_activate_area(unsigned long base_pfn, unsigned long count) +{ + unsigned long pfn = base_pfn; + unsigned i = count >> pageblock_order; + struct zone *zone; + + WARN_ON_ONCE(!pfn_valid(pfn)); + zone = page_zone(pfn_to_page(pfn)); + + do { + unsigned j; + base_pfn = pfn; + for (j = pageblock_nr_pages; j; --j, pfn++) { + WARN_ON_ONCE(!pfn_valid(pfn)); + if (page_zone(pfn_to_page(pfn)) != zone) + return -EINVAL; + } + init_cma_reserved_pageblock(pfn_to_page(base_pfn)); + } while (--i); + return 0; +} + +static __init struct cma *cma_create_area(unsigned long base_pfn, + unsigned long count) +{ + int bitmap_size = BITS_TO_LONGS(count) * sizeof(long); + struct cma *cma; + int ret = -ENOMEM; + + pr_debug("%s(base %08lx, count %lx)\n", __func__, base_pfn, count); + + cma = kmalloc(sizeof *cma, GFP_KERNEL); + if (!cma) + return ERR_PTR(-ENOMEM); + + cma->base_pfn = base_pfn; + cma->count = count; + cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL); + + if (!cma->bitmap) + goto no_mem; + + ret = cma_activate_area(base_pfn, count); + if (ret) + goto error; + + pr_debug("%s: returned %p\n", __func__, (void *)cma); + return cma; + +error: + kfree(cma->bitmap); +no_mem: + kfree(cma); + return ERR_PTR(ret); +} + +static struct cma_reserved { + phys_addr_t start; + unsigned long size; + struct device *dev; +} cma_reserved[MAX_CMA_AREAS] __initdata; +static unsigned cma_reserved_count __initdata; + +static int __init cma_init_reserved_areas(void) +{ + struct cma_reserved *r = cma_reserved; + unsigned i = cma_reserved_count; + + pr_debug("%s()\n", __func__); + + for (; i; --i, ++r) { + struct cma *cma; + cma = cma_create_area(PFN_DOWN(r->start), + r->size >> PAGE_SHIFT); + if (!IS_ERR(cma)) + dev_set_cma_area(r->dev, cma); + } + return 0; +} +core_initcall(cma_init_reserved_areas); + +/** + * dma_declare_contiguous() - reserve area for contiguous memory handling + * for particular device + * @dev: Pointer to device structure. + * @size: Size of the reserved memory. + * @base: Start address of the reserved memory (optional, 0 for any). + * @limit: End address of the reserved memory (optional, 0 for any). + * + * This function reserves memory for specified device. It should be + * called by board specific code when early allocator (memblock or bootmem) + * is still activate. + */ +int __init dma_declare_contiguous(struct device *dev, unsigned long size, + phys_addr_t base, phys_addr_t limit) +{ + struct cma_reserved *r = &cma_reserved[cma_reserved_count]; + unsigned long alignment; + + pr_debug("%s(size %lx, base %08lx, limit %08lx)\n", __func__, + (unsigned long)size, (unsigned long)base, + (unsigned long)limit); + + /* Sanity checks */ + if (cma_reserved_count == ARRAY_SIZE(cma_reserved)) { + pr_err("Not enough slots for CMA reserved regions!\n"); + return -ENOSPC; + } + + if (!size) + return -EINVAL; + + /* Sanitise input arguments */ + alignment = PAGE_SIZE << max(MAX_ORDER, pageblock_order); + base = ALIGN(base, alignment); + size = ALIGN(size, alignment); + limit &= ~(alignment - 1); + + /* Reserve memory */ + if (base) { + if (memblock_is_region_reserved(base, size) || + memblock_reserve(base, size) < 0) { + base = -EBUSY; + goto err; + } + } else { + /* + * Use __memblock_alloc_base() since + * memblock_alloc_base() panic()s. + */ + phys_addr_t addr = __memblock_alloc_base(size, alignment, limit); + if (!addr) { + base = -ENOMEM; + goto err; + } else if (addr + size > ~(unsigned long)0) { + memblock_free(addr, size); + base = -EINVAL; + goto err; + } else { + base = addr; + } + } + + /* + * Each reserved area must be initialised later, when more kernel + * subsystems (like slab allocator) are available. + */ + r->start = base; + r->size = size; + r->dev = dev; + cma_reserved_count++; + pr_info("CMA: reserved %ld MiB at %08lx\n", size / SZ_1M, + (unsigned long)base); + + /* Architecture specific contiguous memory fixup. */ + dma_contiguous_early_fixup(base, size); + return 0; +err: + pr_err("CMA: failed to reserve %ld MiB\n", size / SZ_1M); + return base; +} + +/** + * dma_alloc_from_contiguous() - allocate pages from contiguous area + * @dev: Pointer to device for which the allocation is performed. + * @count: Requested number of pages. + * @align: Requested alignment of pages (in PAGE_SIZE order). + * + * This function allocates memory buffer for specified device. It uses + * device specific contiguous memory area if available or the default + * global one. Requires architecture specific get_dev_cma_area() helper + * function. + */ +struct page *dma_alloc_from_contiguous(struct device *dev, int count, + unsigned int align) +{ + unsigned long mask, pfn, pageno, start = 0; + struct cma *cma = dev_get_cma_area(dev); + int ret; + + if (!cma || !cma->count) + return NULL; + + if (align > CONFIG_CMA_ALIGNMENT) + align = CONFIG_CMA_ALIGNMENT; + + pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma, + count, align); + + if (!count) + return NULL; + + mask = (1 << align) - 1; + + mutex_lock(&cma_mutex); + + for (;;) { + pageno = bitmap_find_next_zero_area(cma->bitmap, cma->count, + start, count, mask); + if (pageno >= cma->count) { + ret = -ENOMEM; + goto error; + } + + pfn = cma->base_pfn + pageno; + ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA); + if (ret == 0) { + bitmap_set(cma->bitmap, pageno, count); + break; + } else if (ret != -EBUSY) { + goto error; + } + pr_debug("%s(): memory range at %p is busy, retrying\n", + __func__, pfn_to_page(pfn)); + /* try again with a bit different memory target */ + start = pageno + mask + 1; + } + + mutex_unlock(&cma_mutex); + + pr_debug("%s(): returned %p\n", __func__, pfn_to_page(pfn)); + return pfn_to_page(pfn); +error: + mutex_unlock(&cma_mutex); + return NULL; +} + +/** + * dma_release_from_contiguous() - release allocated pages + * @dev: Pointer to device for which the pages were allocated. + * @pages: Allocated pages. + * @count: Number of allocated pages. + * + * This function releases memory allocated by dma_alloc_from_contiguous(). + * It returns false when provided pages do not belong to contiguous area and + * true otherwise. + */ +bool dma_release_from_contiguous(struct device *dev, struct page *pages, + int count) +{ + struct cma *cma = dev_get_cma_area(dev); + unsigned long pfn; + + if (!cma || !pages) + return false; + + pr_debug("%s(page %p)\n", __func__, (void *)pages); + + pfn = page_to_pfn(pages); + + if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count) + return false; + + VM_BUG_ON(pfn + count > cma->base_pfn + cma->count); + + mutex_lock(&cma_mutex); + bitmap_clear(cma->bitmap, pfn - cma->base_pfn, count); + free_contig_range(pfn, count); + mutex_unlock(&cma_mutex); + + return true; +} diff --git a/include/asm-generic/dma-coherent.h b/include/asm-generic/dma-coherent.h index 85a3ffaa0242..abfb2682de7f 100644 --- a/include/asm-generic/dma-coherent.h +++ b/include/asm-generic/dma-coherent.h @@ -3,13 +3,15 @@ #ifdef CONFIG_HAVE_GENERIC_DMA_COHERENT /* - * These two functions are only for dma allocator. + * These three functions are only for dma allocator. * Don't use them in device drivers. */ int dma_alloc_from_coherent(struct device *dev, ssize_t size, dma_addr_t *dma_handle, void **ret); int dma_release_from_coherent(struct device *dev, int order, void *vaddr); +int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, size_t size, int *ret); /* * Standard interface */ diff --git a/include/asm-generic/dma-contiguous.h b/include/asm-generic/dma-contiguous.h new file mode 100644 index 000000000000..c544356b374b --- /dev/null +++ b/include/asm-generic/dma-contiguous.h @@ -0,0 +1,28 @@ +#ifndef ASM_DMA_CONTIGUOUS_H +#define ASM_DMA_CONTIGUOUS_H + +#ifdef __KERNEL__ +#ifdef CONFIG_CMA + +#include <linux/device.h> +#include <linux/dma-contiguous.h> + +static inline struct cma *dev_get_cma_area(struct device *dev) +{ + if (dev && dev->cma_area) + return dev->cma_area; + return dma_contiguous_default_area; +} + +static inline void dev_set_cma_area(struct device *dev, struct cma *cma) +{ + if (dev) + dev->cma_area = cma; + if (!dev || !dma_contiguous_default_area) + dma_contiguous_default_area = cma; +} + +#endif +#endif + +#endif diff --git a/include/linux/device.h b/include/linux/device.h index e04f5776f6d0..161d96241b1b 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -667,6 +667,10 @@ struct device { struct dma_coherent_mem *dma_mem; /* internal for coherent mem override */ +#ifdef CONFIG_CMA + struct cma *cma_area; /* contiguous memory area for dma + allocations */ +#endif /* arch specific additions */ struct dev_archdata archdata; diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h new file mode 100644 index 000000000000..2f303e4b7ed3 --- /dev/null +++ b/include/linux/dma-contiguous.h @@ -0,0 +1,110 @@ +#ifndef __LINUX_CMA_H +#define __LINUX_CMA_H + +/* + * Contiguous Memory Allocator for DMA mapping framework + * Copyright (c) 2010-2011 by Samsung Electronics. + * Written by: + * Marek Szyprowski <m.szyprowski@samsung.com> + * Michal Nazarewicz <mina86@mina86.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License or (at your optional) any later version of the license. + */ + +/* + * Contiguous Memory Allocator + * + * The Contiguous Memory Allocator (CMA) makes it possible to + * allocate big contiguous chunks of memory after the system has + * booted. + * + * Why is it needed? + * + * Various devices on embedded systems have no scatter-getter and/or + * IO map support and require contiguous blocks of memory to + * operate. They include devices such as cameras, hardware video + * coders, etc. + * + * Such devices often require big memory buffers (a full HD frame + * is, for instance, more then 2 mega pixels large, i.e. more than 6 + * MB of memory), which makes mechanisms such as kmalloc() or + * alloc_page() ineffective. + * + * At the same time, a solution where a big memory region is + * reserved for a device is suboptimal since often more memory is + * reserved then strictly required and, moreover, the memory is + * inaccessible to page system even if device drivers don't use it. + * + * CMA tries to solve this issue by operating on memory regions + * where only movable pages can be allocated from. This way, kernel + * can use the memory for pagecache and when device driver requests + * it, allocated pages can be migrated. + * + * Driver usage + * + * CMA should not be used by the device drivers directly. It is + * only a helper framework for dma-mapping subsystem. + * + * For more information, see kernel-docs in drivers/base/dma-contiguous.c + */ + +#ifdef __KERNEL__ + +struct cma; +struct page; +struct device; + +#ifdef CONFIG_CMA + +/* + * There is always at least global CMA area and a few optional device + * private areas configured in kernel .config. + */ +#define MAX_CMA_AREAS (1 + CONFIG_CMA_AREAS) + +extern struct cma *dma_contiguous_default_area; + +void dma_contiguous_reserve(phys_addr_t addr_limit); +int dma_declare_contiguous(struct device *dev, unsigned long size, + phys_addr_t base, phys_addr_t limit); + +struct page *dma_alloc_from_contiguous(struct device *dev, int count, + unsigned int order); +bool dma_release_from_contiguous(struct device *dev, struct page *pages, + int count); + +#else + +#define MAX_CMA_AREAS (0) + +static inline void dma_contiguous_reserve(phys_addr_t limit) { } + +static inline +int dma_declare_contiguous(struct device *dev, unsigned long size, + phys_addr_t base, phys_addr_t limit) +{ + return -ENOSYS; +} + +static inline +struct page *dma_alloc_from_contiguous(struct device *dev, int count, + unsigned int order) +{ + return NULL; +} + +static inline +bool dma_release_from_contiguous(struct device *dev, struct page *pages, + int count) +{ + return false; +} + +#endif + +#endif + +#endif diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 581e74b7df95..1e49be49d324 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -391,4 +391,16 @@ static inline bool pm_suspended_storage(void) } #endif /* CONFIG_PM_SLEEP */ +#ifdef CONFIG_CMA + +/* The below functions must be run on a range from a single zone. */ +extern int alloc_contig_range(unsigned long start, unsigned long end, + unsigned migratetype); +extern void free_contig_range(unsigned long pfn, unsigned nr_pages); + +/* CMA stuff */ +extern void init_cma_reserved_pageblock(struct page *page); + +#endif + #endif /* __LINUX_GFP_H */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 41aa49b74821..4871e31ae277 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -35,13 +35,39 @@ */ #define PAGE_ALLOC_COSTLY_ORDER 3 -#define MIGRATE_UNMOVABLE 0 -#define MIGRATE_RECLAIMABLE 1 -#define MIGRATE_MOVABLE 2 -#define MIGRATE_PCPTYPES 3 /* the number of types on the pcp lists */ -#define MIGRATE_RESERVE 3 -#define MIGRATE_ISOLATE 4 /* can't allocate from here */ -#define MIGRATE_TYPES 5 +enum { + MIGRATE_UNMOVABLE, + MIGRATE_RECLAIMABLE, + MIGRATE_MOVABLE, + MIGRATE_PCPTYPES, /* the number of types on the pcp lists */ + MIGRATE_RESERVE = MIGRATE_PCPTYPES, +#ifdef CONFIG_CMA + /* + * MIGRATE_CMA migration type is designed to mimic the way + * ZONE_MOVABLE works. Only movable pages can be allocated + * from MIGRATE_CMA pageblocks and page allocator never + * implicitly change migration type of MIGRATE_CMA pageblock. + * + * The way to use it is to change migratetype of a range of + * pageblocks to MIGRATE_CMA which can be done by + * __free_pageblock_cma() function. What is important though + * is that a range of pageblocks must be aligned to + * MAX_ORDER_NR_PAGES should biggest page be bigger then + * a single pageblock. + */ + MIGRATE_CMA, +#endif + MIGRATE_ISOLATE, /* can't allocate from here */ + MIGRATE_TYPES +}; + +#ifdef CONFIG_CMA +# define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA) +# define cma_wmark_pages(zone) zone->min_cma_pages +#else +# define is_migrate_cma(migratetype) false +# define cma_wmark_pages(zone) 0 +#endif #define for_each_migratetype_order(order, type) \ for (order = 0; order < MAX_ORDER; order++) \ @@ -347,6 +373,13 @@ struct zone { /* see spanned/present_pages for more description */ seqlock_t span_seqlock; #endif +#ifdef CONFIG_CMA + /* + * CMA needs to increase watermark levels during the allocation + * process to make sure that the system is not starved. + */ + unsigned long min_cma_pages; +#endif struct free_area free_area[MAX_ORDER]; #ifndef CONFIG_SPARSEMEM diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index 051c1b1ede4e..3bdcab30ca41 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h @@ -3,7 +3,7 @@ /* * Changes migrate type in [start_pfn, end_pfn) to be MIGRATE_ISOLATE. - * If specified range includes migrate types other than MOVABLE, + * If specified range includes migrate types other than MOVABLE or CMA, * this will fail with -EBUSY. * * For isolating all pages in the range finally, the caller have to @@ -11,27 +11,27 @@ * test it. */ extern int -start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn); +start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, + unsigned migratetype); /* * Changes MIGRATE_ISOLATE to MIGRATE_MOVABLE. * target range is [start_pfn, end_pfn) */ extern int -undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn); +undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, + unsigned migratetype); /* - * test all pages in [start_pfn, end_pfn)are isolated or not. + * Test all pages in [start_pfn, end_pfn) are isolated or not. */ -extern int -test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn); +int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn); /* - * Internal funcs.Changes pageblock's migrate type. - * Please use make_pagetype_isolated()/make_pagetype_movable(). + * Internal functions. Changes pageblock's migrate type. */ extern int set_migratetype_isolate(struct page *page); -extern void unset_migratetype_isolate(struct page *page); +extern void unset_migratetype_isolate(struct page *page, unsigned migratetype); #endif diff --git a/mm/Kconfig b/mm/Kconfig index e338407f1225..39220026c797 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -198,7 +198,7 @@ config COMPACTION config MIGRATION bool "Page migration" def_bool y - depends on NUMA || ARCH_ENABLE_MEMORY_HOTREMOVE || COMPACTION + depends on NUMA || ARCH_ENABLE_MEMORY_HOTREMOVE || COMPACTION || CMA help Allows the migration of the physical location of pages of processes while the virtual addresses are not changed. This is useful in diff --git a/mm/Makefile b/mm/Makefile index 50ec00ef2a0e..8aada89efbbb 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -13,7 +13,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ readahead.o swap.o truncate.o vmscan.o shmem.o \ prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ page_isolation.o mm_init.o mmu_context.o percpu.o \ - $(mmu-y) + compaction.o $(mmu-y) obj-y += init-mm.o ifdef CONFIG_NO_BOOTMEM @@ -32,7 +32,6 @@ obj-$(CONFIG_NUMA) += mempolicy.o obj-$(CONFIG_SPARSEMEM) += sparse.o obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o obj-$(CONFIG_SLOB) += slob.o -obj-$(CONFIG_COMPACTION) += compaction.o obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o obj-$(CONFIG_KSM) += ksm.o obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o diff --git a/mm/compaction.c b/mm/compaction.c index 74a8c825ff28..da7d35ea5103 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -16,30 +16,11 @@ #include <linux/sysfs.h> #include "internal.h" +#if defined CONFIG_COMPACTION || defined CONFIG_CMA + #define CREATE_TRACE_POINTS #include <trace/events/compaction.h> -/* - * compact_control is used to track pages being migrated and the free pages - * they are being migrated to during memory compaction. The free_pfn starts - * at the end of a zone and migrate_pfn begins at the start. Movable pages - * are moved to the end of a zone during a compaction run and the run - * completes when free_pfn <= migrate_pfn - */ -struct compact_control { - struct list_head freepages; /* List of free pages to migrate to */ - struct list_head migratepages; /* List of pages being migrated */ - unsigned long nr_freepages; /* Number of isolated free pages */ - unsigned long nr_migratepages; /* Number of pages to migrate */ - unsigned long free_pfn; /* isolate_freepages search base */ - unsigned long migrate_pfn; /* isolate_migratepages search base */ - bool sync; /* Synchronous migration */ - - int order; /* order a direct compactor needs */ - int migratetype; /* MOVABLE, RECLAIMABLE etc */ - struct zone *zone; -}; - static unsigned long release_freepages(struct list_head *freelist) { struct page *page, *next; @@ -54,24 +35,35 @@ static unsigned long release_freepages(struct list_head *freelist) return count; } -/* Isolate free pages onto a private freelist. Must hold zone->lock */ -static unsigned long isolate_freepages_block(struct zone *zone, - unsigned long blockpfn, - struct list_head *freelist) +static void map_pages(struct list_head *list) +{ + struct page *page; + + list_for_each_entry(page, list, lru) { + arch_alloc_page(page, 0); + kernel_map_pages(page, 1, 1); + } +} + +static inline bool migrate_async_suitable(int migratetype) +{ + return is_migrate_cma(migratetype) || migratetype == MIGRATE_MOVABLE; +} + +/* + * Isolate free pages onto a private freelist. Caller must hold zone->lock. + * If @strict is true, will abort returning 0 on any invalid PFNs or non-free + * pages inside of the pageblock (even though it may still end up isolating + * some pages). + */ +static unsigned long isolate_freepages_block(unsigned long blockpfn, + unsigned long end_pfn, + struct list_head *freelist, + bool strict) { - unsigned long zone_end_pfn, end_pfn; int nr_scanned = 0, total_isolated = 0; struct page *cursor; - /* Get the last PFN we should scan for free pages at */ - zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages; - end_pfn = min(blockpfn + pageblock_nr_pages, zone_end_pfn); - - /* Find the first usable PFN in the block to initialse page cursor */ - for (; blockpfn < end_pfn; blockpfn++) { - if (pfn_valid_within(blockpfn)) - break; - } cursor = pfn_to_page(blockpfn); /* Isolate free pages. This assumes the block is valid */ @@ -79,15 +71,23 @@ static unsigned long isolate_freepages_block(struct zone *zone, int isolated, i; struct page *page = cursor; - if (!pfn_valid_within(blockpfn)) + if (!pfn_valid_within(blockpfn)) { + if (strict) + return 0; continue; + } nr_scanned++; - if (!PageBuddy(page)) + if (!PageBuddy(page)) { + if (strict) + return 0; continue; + } /* Found a free page, break it into order-0 pages */ isolated = split_free_page(page); + if (!isolated && strict) + return 0; total_isolated += isolated; for (i = 0; i < isolated; i++) { list_add(&page->lru, freelist); @@ -105,114 +105,71 @@ static unsigned long isolate_freepages_block(struct zone *zone, return total_isolated; } -/* Returns true if the page is within a block suitable for migration to */ -static bool suitable_migration_target(struct page *page) -{ - - int migratetype = get_pageblock_migratetype(page); - - /* Don't interfere with memory hot-remove or the min_free_kbytes blocks */ - if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE) - return false; - - /* If the page is a large free page, then allow migration */ - if (PageBuddy(page) && page_order(page) >= pageblock_order) - return true; - - /* If the block is MIGRATE_MOVABLE, allow migration */ - if (migratetype == MIGRATE_MOVABLE) - return true; - - /* Otherwise skip the block */ - return false; -} - -/* - * Based on information in the current compact_control, find blocks - * suitable for isolating free pages from and then isolate them. +/** + * isolate_freepages_range() - isolate free pages. + * @start_pfn: The first PFN to start isolating. + * @end_pfn: The one-past-last PFN. + * + * Non-free pages, invalid PFNs, or zone boundaries within the + * [start_pfn, end_pfn) range are considered errors, cause function to + * undo its actions and return zero. + * + * Otherwise, function returns one-past-the-last PFN of isolated page + * (which may be greater then end_pfn if end fell in a middle of + * a free page). */ -static void isolate_freepages(struct zone *zone, - struct compact_control *cc) +unsigned long +isolate_freepages_range(unsigned long start_pfn, unsigned long end_pfn) { - struct page *page; - unsigned long high_pfn, low_pfn, pfn; - unsigned long flags; - int nr_freepages = cc->nr_freepages; - struct list_head *freelist = &cc->freepages; - - /* - * Initialise the free scanner. The starting point is where we last - * scanned from (or the end of the zone if starting). The low point - * is the end of the pageblock the migration scanner is using. - */ - pfn = cc->free_pfn; - low_pfn = cc->migrate_pfn + pageblock_nr_pages; + unsigned long isolated, pfn, block_end_pfn, flags; + struct zone *zone = NULL; + LIST_HEAD(freelist); - /* - * Take care that if the migration scanner is at the end of the zone - * that the free scanner does not accidentally move to the next zone - * in the next isolation cycle. - */ - high_pfn = min(low_pfn, pfn); - - /* - * Isolate free pages until enough are available to migrate the - * pages on cc->migratepages. We stop searching if the migrate - * and free page scanners meet or enough free pages are isolated. - */ - for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages; - pfn -= pageblock_nr_pages) { - unsigned long isolated; + if (pfn_valid(start_pfn)) + zone = page_zone(pfn_to_page(start_pfn)); - if (!pfn_valid(pfn)) - continue; + for (pfn = start_pfn; pfn < end_pfn; pfn += isolated) { + if (!pfn_valid(pfn) || zone != page_zone(pfn_to_page(pfn))) + break; /* - * Check for overlapping nodes/zones. It's possible on some - * configurations to have a setup like - * node0 node1 node0 - * i.e. it's possible that all pages within a zones range of - * pages do not belong to a single zone. + * On subsequent iterations ALIGN() is actually not needed, + * but we keep it that we not to complicate the code. */ - page = pfn_to_page(pfn); - if (page_zone(page) != zone) - continue; + block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); + block_end_pfn = min(block_end_pfn, end_pfn); - /* Check the block is suitable for migration */ - if (!suitable_migration_target(page)) - continue; + spin_lock_irqsave(&zone->lock, flags); + isolated = isolate_freepages_block(pfn, block_end_pfn, + &freelist, true); + spin_unlock_irqrestore(&zone->lock, flags); /* - * Found a block suitable for isolating free pages from. Now - * we disabled interrupts, double check things are ok and - * isolate the pages. This is to minimise the time IRQs - * are disabled + * In strict mode, isolate_freepages_block() returns 0 if + * there are any holes in the block (ie. invalid PFNs or + * non-free pages). */ - isolated = 0; - spin_lock_irqsave(&zone->lock, flags); - if (suitable_migration_target(page)) { - isolated = isolate_freepages_block(zone, pfn, freelist); - nr_freepages += isolated; - } - spin_unlock_irqrestore(&zone->lock, flags); + if (!isolated) + break; /* - * Record the highest PFN we isolated pages from. When next - * looking for free pages, the search will restart here as - * page migration may have returned some pages to the allocator + * If we managed to isolate pages, it is always (1 << n) * + * pageblock_nr_pages for some non-negative n. (Max order + * page may span two pageblocks). */ - if (isolated) - high_pfn = max(high_pfn, pfn); } /* split_free_page does not map the pages */ - list_for_each_entry(page, freelist, lru) { - arch_alloc_page(page, 0); - kernel_map_pages(page, 1, 1); + map_pages(&freelist); + + if (pfn < end_pfn) { + /* Loop terminated early, cleanup. */ + release_freepages(&freelist); + return 0; } - cc->free_pfn = high_pfn; - cc->nr_freepages = nr_freepages; + /* We don't use freelists for anything. */ + return pfn; } /* Update the number of anon and file isolated pages in the zone */ @@ -243,38 +200,34 @@ static bool too_many_isolated(struct zone *zone) return isolated > (inactive + active) / 2; } -/* possible outcome of isolate_migratepages */ -typedef enum { - ISOLATE_ABORT, /* Abort compaction now */ - ISOLATE_NONE, /* No pages isolated, continue scanning */ - ISOLATE_SUCCESS, /* Pages isolated, migrate */ -} isolate_migrate_t; - -/* - * Isolate all pages that can be migrated from the block pointed to by - * the migrate scanner within compact_control. +/** + * isolate_migratepages_range() - isolate all migrate-able pages in range. + * @zone: Zone pages are in. + * @cc: Compaction control structure. + * @low_pfn: The first PFN of the range. + * @end_pfn: The one-past-the-last PFN of the range. + * + * Isolate all pages that can be migrated from the range specified by + * [low_pfn, end_pfn). Returns zero if there is a fatal signal + * pending), otherwise PFN of the first page that was not scanned + * (which may be both less, equal to or more then end_pfn). + * + * Assumes that cc->migratepages is empty and cc->nr_migratepages is + * zero. + * + * Apart from cc->migratepages and cc->nr_migratetypes this function + * does not modify any cc's fields, in particular it does not modify + * (or read for that matter) cc->migrate_pfn. */ -static isolate_migrate_t isolate_migratepages(struct zone *zone, - struct compact_control *cc) +unsigned long +isolate_migratepages_range(struct zone *zone, struct compact_control *cc, + unsigned long low_pfn, unsigned long end_pfn) { - unsigned long low_pfn, end_pfn; unsigned long last_pageblock_nr = 0, pageblock_nr; unsigned long nr_scanned = 0, nr_isolated = 0; struct list_head *migratelist = &cc->migratepages; isolate_mode_t mode = ISOLATE_ACTIVE|ISOLATE_INACTIVE; - /* Do not scan outside zone boundaries */ - low_pfn = max(cc->migrate_pfn, zone->zone_start_pfn); - - /* Only scan within a pageblock boundary */ - end_pfn = ALIGN(low_pfn + pageblock_nr_pages, pageblock_nr_pages); - - /* Do not cross the free scanner or scan within a memory hole */ - if (end_pfn > cc->free_pfn || !pfn_valid(low_pfn)) { - cc->migrate_pfn = end_pfn; - return ISOLATE_NONE; - } - /* * Ensure that there are not too many pages isolated from the LRU * list by either parallel reclaimers or compaction. If there are, @@ -283,12 +236,12 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, while (unlikely(too_many_isolated(zone))) { /* async migration should just abort */ if (!cc->sync) - return ISOLATE_ABORT; + return 0; congestion_wait(BLK_RW_ASYNC, HZ/10); if (fatal_signal_pending(current)) - return ISOLATE_ABORT; + return 0; } /* Time to isolate some pages for migration */ @@ -351,7 +304,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, */ pageblock_nr = low_pfn >> pageblock_order; if (!cc->sync && last_pageblock_nr != pageblock_nr && - get_pageblock_migratetype(page) != MIGRATE_MOVABLE) { + !migrate_async_suitable(get_pageblock_migratetype(page))) { low_pfn += pageblock_nr_pages; low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1; last_pageblock_nr = pageblock_nr; @@ -396,11 +349,124 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, acct_isolated(zone, cc); spin_unlock_irq(&zone->lru_lock); - cc->migrate_pfn = low_pfn; trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated); - return ISOLATE_SUCCESS; + return low_pfn; +} + +#endif /* CONFIG_COMPACTION || CONFIG_CMA */ +#ifdef CONFIG_COMPACTION + +/* Returns true if the page is within a block suitable for migration to */ +static bool suitable_migration_target(struct page *page) +{ + + int migratetype = get_pageblock_migratetype(page); + + /* Don't interfere with memory hot-remove or the min_free_kbytes blocks */ + if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE) + return false; + + /* If the page is a large free page, then allow migration */ + if (PageBuddy(page) && page_order(page) >= pageblock_order) + return true; + + /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */ + if (migrate_async_suitable(migratetype)) + return true; + + /* Otherwise skip the block */ + return false; +} + +/* + * Based on information in the current compact_control, find blocks + * suitable for isolating free pages from and then isolate them. + */ +static void isolate_freepages(struct zone *zone, + struct compact_control *cc) +{ + struct page *page; + unsigned long high_pfn, low_pfn, pfn, zone_end_pfn, end_pfn; + unsigned long flags; + int nr_freepages = cc->nr_freepages; + struct list_head *freelist = &cc->freepages; + + /* + * Initialise the free scanner. The starting point is where we last + * scanned from (or the end of the zone if starting). The low point + * is the end of the pageblock the migration scanner is using. + */ + pfn = cc->free_pfn; + low_pfn = cc->migrate_pfn + pageblock_nr_pages; + + /* + * Take care that if the migration scanner is at the end of the zone + * that the free scanner does not accidentally move to the next zone + * in the next isolation cycle. + */ + high_pfn = min(low_pfn, pfn); + + zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages; + + /* + * Isolate free pages until enough are available to migrate the + * pages on cc->migratepages. We stop searching if the migrate + * and free page scanners meet or enough free pages are isolated. + */ + for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages; + pfn -= pageblock_nr_pages) { + unsigned long isolated; + + if (!pfn_valid(pfn)) + continue; + + /* + * Check for overlapping nodes/zones. It's possible on some + * configurations to have a setup like + * node0 node1 node0 + * i.e. it's possible that all pages within a zones range of + * pages do not belong to a single zone. + */ + page = pfn_to_page(pfn); + if (page_zone(page) != zone) + continue; + + /* Check the block is suitable for migration */ + if (!suitable_migration_target(page)) + continue; + + /* + * Found a block suitable for isolating free pages from. Now + * we disabled interrupts, double check things are ok and + * isolate the pages. This is to minimise the time IRQs + * are disabled + */ + isolated = 0; + spin_lock_irqsave(&zone->lock, flags); + if (suitable_migration_target(page)) { + end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn); + isolated = isolate_freepages_block(pfn, end_pfn, + freelist, false); + nr_freepages += isolated; + } + spin_unlock_irqrestore(&zone->lock, flags); + + /* + * Record the highest PFN we isolated pages from. When next + * looking for free pages, the search will restart here as + * page migration may have returned some pages to the allocator + */ + if (isolated) + high_pfn = max(high_pfn, pfn); + } + + /* split_free_page does not map the pages */ + map_pages(freelist); + + cc->free_pfn = high_pfn; + cc->nr_freepages = nr_freepages; } /* @@ -449,6 +515,44 @@ static void update_nr_listpages(struct compact_control *cc) cc->nr_freepages = nr_freepages; } +/* possible outcome of isolate_migratepages */ +typedef enum { + ISOLATE_ABORT, /* Abort compaction now */ + ISOLATE_NONE, /* No pages isolated, continue scanning */ + ISOLATE_SUCCESS, /* Pages isolated, migrate */ +} isolate_migrate_t; + +/* + * Isolate all pages that can be migrated from the block pointed to by + * the migrate scanner within compact_control. + */ +static isolate_migrate_t isolate_migratepages(struct zone *zone, + struct compact_control *cc) +{ + unsigned long low_pfn, end_pfn; + + /* Do not scan outside zone boundaries */ + low_pfn = max(cc->migrate_pfn, zone->zone_start_pfn); + + /* Only scan within a pageblock boundary */ + end_pfn = ALIGN(low_pfn + pageblock_nr_pages, pageblock_nr_pages); + + /* Do not cross the free scanner or scan within a memory hole */ + if (end_pfn > cc->free_pfn || !pfn_valid(low_pfn)) { + cc->migrate_pfn = end_pfn; + return ISOLATE_NONE; + } + + /* Perform the isolation */ + low_pfn = isolate_migratepages_range(zone, cc, low_pfn, end_pfn); + if (!low_pfn) + return ISOLATE_ABORT; + + cc->migrate_pfn = low_pfn; + + return ISOLATE_SUCCESS; +} + static int compact_finished(struct zone *zone, struct compact_control *cc) { @@ -795,3 +899,5 @@ void compaction_unregister_node(struct node *node) return device_remove_file(&node->dev, &dev_attr_compact); } #endif /* CONFIG_SYSFS && CONFIG_NUMA */ + +#endif /* CONFIG_COMPACTION */ diff --git a/mm/internal.h b/mm/internal.h index 2189af491783..aee4761cf9a9 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -100,6 +100,39 @@ extern void prep_compound_page(struct page *page, unsigned long order); extern bool is_free_buddy_page(struct page *page); #endif +#if defined CONFIG_COMPACTION || defined CONFIG_CMA + +/* + * in mm/compaction.c + */ +/* + * compact_control is used to track pages being migrated and the free pages + * they are being migrated to during memory compaction. The free_pfn starts + * at the end of a zone and migrate_pfn begins at the start. Movable pages + * are moved to the end of a zone during a compaction run and the run + * completes when free_pfn <= migrate_pfn + */ +struct compact_control { + struct list_head freepages; /* List of free pages to migrate to */ + struct list_head migratepages; /* List of pages being migrated */ + unsigned long nr_freepages; /* Number of isolated free pages */ + unsigned long nr_migratepages; /* Number of pages to migrate */ + unsigned long free_pfn; /* isolate_freepages search base */ + unsigned long migrate_pfn; /* isolate_migratepages search base */ + bool sync; /* Synchronous migration */ + + int order; /* order a direct compactor needs */ + int migratetype; /* MOVABLE, RECLAIMABLE etc */ + struct zone *zone; +}; + +unsigned long +isolate_freepages_range(unsigned long start_pfn, unsigned long end_pfn); +unsigned long +isolate_migratepages_range(struct zone *zone, struct compact_control *cc, + unsigned long low_pfn, unsigned long end_pfn); + +#endif /* * function for dealing with page's order in buddy system. diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 97cc2733551a..c99ad4e6b88c 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1404,7 +1404,7 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags) /* Not a free page */ ret = 1; } - unset_migratetype_isolate(p); + unset_migratetype_isolate(p, MIGRATE_MOVABLE); unlock_memory_hotplug(); return ret; } diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 6629fafd6ce4..fc898cb4fe8f 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -891,7 +891,7 @@ static int __ref offline_pages(unsigned long start_pfn, nr_pages = end_pfn - start_pfn; /* set above range as isolated */ - ret = start_isolate_page_range(start_pfn, end_pfn); + ret = start_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); if (ret) goto out; @@ -956,7 +956,7 @@ repeat: We cannot do rollback at this point. */ offline_isolated_pages(start_pfn, end_pfn); /* reset pagetype flags and makes migrate type to be MOVABLE */ - undo_isolate_page_range(start_pfn, end_pfn); + undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); /* removal success */ zone->present_pages -= offlined_pages; zone->zone_pgdat->node_present_pages -= offlined_pages; @@ -981,7 +981,7 @@ failed_removal: start_pfn, end_pfn); memory_notify(MEM_CANCEL_OFFLINE, &arg); /* pushback to free area */ - undo_isolate_page_range(start_pfn, end_pfn); + undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); out: unlock_memory_hotplug(); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1851df600438..bab8e3bc4202 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -57,6 +57,7 @@ #include <linux/ftrace_event.h> #include <linux/memcontrol.h> #include <linux/prefetch.h> +#include <linux/migrate.h> #include <linux/page-debug-flags.h> #include <asm/tlbflush.h> @@ -513,10 +514,10 @@ static inline int page_is_buddy(struct page *page, struct page *buddy, * free pages of length of (1 << order) and marked with _mapcount -2. Page's * order is recorded in page_private(page) field. * So when we are allocating or freeing one, we can derive the state of the - * other. That is, if we allocate a small block, and both were - * free, the remainder of the region must be split into blocks. + * other. That is, if we allocate a small block, and both were + * free, the remainder of the region must be split into blocks. * If a block is freed, and its buddy is also free, then this - * triggers coalescing into a block of larger size. + * triggers coalescing into a block of larger size. * * -- wli */ @@ -749,6 +750,24 @@ void __meminit __free_pages_bootmem(struct page *page, unsigned int order) __free_pages(page, order); } +#ifdef CONFIG_CMA +/* Free whole pageblock and set it's migration type to MIGRATE_CMA. */ +void __init init_cma_reserved_pageblock(struct page *page) +{ + unsigned i = pageblock_nr_pages; + struct page *p = page; + + do { + __ClearPageReserved(p); + set_page_count(p, 0); + } while (++p, --i); + + set_page_refcounted(page); + set_pageblock_migratetype(page, MIGRATE_CMA); + __free_pages(page, pageblock_order); + totalram_pages += pageblock_nr_pages; +} +#endif /* * The order of subdivision here is critical for the IO subsystem. @@ -874,11 +893,17 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order, * This array describes the order lists are fallen back to when * the free lists for the desirable migrate type are depleted */ -static int fallbacks[MIGRATE_TYPES][MIGRATE_TYPES-1] = { - [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, - [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, - [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, - [MIGRATE_RESERVE] = { MIGRATE_RESERVE, MIGRATE_RESERVE, MIGRATE_RESERVE }, /* Never used */ +static int fallbacks[MIGRATE_TYPES][4] = { + [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, + [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RESERVE }, +#ifdef CONFIG_CMA + [MIGRATE_MOVABLE] = { MIGRATE_CMA, MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, + [MIGRATE_CMA] = { MIGRATE_RESERVE }, /* Never used */ +#else + [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_RESERVE }, +#endif + [MIGRATE_RESERVE] = { MIGRATE_RESERVE }, /* Never used */ + [MIGRATE_ISOLATE] = { MIGRATE_RESERVE }, /* Never used */ }; /* @@ -973,12 +998,12 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) /* Find the largest possible block of pages in the other list */ for (current_order = MAX_ORDER-1; current_order >= order; --current_order) { - for (i = 0; i < MIGRATE_TYPES - 1; i++) { + for (i = 0;; i++) { migratetype = fallbacks[start_migratetype][i]; /* MIGRATE_RESERVE handled later if necessary */ if (migratetype == MIGRATE_RESERVE) - continue; + break; area = &(zone->free_area[current_order]); if (list_empty(&area->free_list[migratetype])) @@ -993,11 +1018,18 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) * pages to the preferred allocation list. If falling * back for a reclaimable kernel allocation, be more * aggressive about taking ownership of free pages + * + * On the other hand, never change migration + * type of MIGRATE_CMA pageblocks nor move CMA + * pages on different free lists. We don't + * want unmovable pages to be allocated from + * MIGRATE_CMA areas. */ - if (unlikely(current_order >= (pageblock_order >> 1)) || - start_migratetype == MIGRATE_RECLAIMABLE || - page_group_by_mobility_disabled) { - unsigned long pages; + if (!is_migrate_cma(migratetype) && + (unlikely(current_order >= pageblock_order / 2) || + start_migratetype == MIGRATE_RECLAIMABLE || + page_group_by_mobility_disabled)) { + int pages; pages = move_freepages_block(zone, page, start_migratetype); @@ -1015,11 +1047,14 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype) rmv_page_order(page); /* Take ownership for orders >= pageblock_order */ - if (current_order >= pageblock_order) + if (current_order >= pageblock_order && + !is_migrate_cma(migratetype)) change_pageblock_range(page, current_order, start_migratetype); - expand(zone, page, order, current_order, area, migratetype); + expand(zone, page, order, current_order, area, + is_migrate_cma(migratetype) + ? migratetype : start_migratetype); trace_mm_page_alloc_extfrag(page, order, current_order, start_migratetype, migratetype); @@ -1061,17 +1096,17 @@ retry_reserve: return page; } -/* +/* * Obtain a specified number of elements from the buddy allocator, all under * a single hold of the lock, for efficiency. Add them to the supplied list. * Returns the number of new pages which were placed at *list. */ -static int rmqueue_bulk(struct zone *zone, unsigned int order, +static int rmqueue_bulk(struct zone *zone, unsigned int order, unsigned long count, struct list_head *list, int migratetype, int cold) { - int i; - + int mt = migratetype, i; + spin_lock(&zone->lock); for (i = 0; i < count; ++i) { struct page *page = __rmqueue(zone, order, migratetype); @@ -1091,7 +1126,12 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, list_add(&page->lru, list); else list_add_tail(&page->lru, list); - set_page_private(page, migratetype); + if (IS_ENABLED(CONFIG_CMA)) { + mt = get_pageblock_migratetype(page); + if (!is_migrate_cma(mt) && mt != MIGRATE_ISOLATE) + mt = migratetype; + } + set_page_private(page, mt); list = &page->lru; } __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); @@ -1371,8 +1411,12 @@ int split_free_page(struct page *page) if (order >= pageblock_order - 1) { struct page *endpage = page + (1 << order) - 1; - for (; page < endpage; page += pageblock_nr_pages) - set_pageblock_migratetype(page, MIGRATE_MOVABLE); + for (; page < endpage; page += pageblock_nr_pages) { + int mt = get_pageblock_migratetype(page); + if (mt != MIGRATE_ISOLATE && !is_migrate_cma(mt)) + set_pageblock_migratetype(page, + MIGRATE_MOVABLE); + } } return 1 << order; @@ -2086,16 +2130,13 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order, } #endif /* CONFIG_COMPACTION */ -/* The really slow allocator path where we enter direct reclaim */ -static inline struct page * -__alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, - struct zonelist *zonelist, enum zone_type high_zoneidx, - nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, - int migratetype, unsigned long *did_some_progress) +/* Perform direct synchronous page reclaim */ +static int +__perform_reclaim(gfp_t gfp_mask, unsigned int order, struct zonelist *zonelist, + nodemask_t *nodemask) { - struct page *page = NULL; struct reclaim_state reclaim_state; - bool drained = false; + int progress; cond_resched(); @@ -2106,7 +2147,7 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, reclaim_state.reclaimed_slab = 0; current->reclaim_state = &reclaim_state; - *did_some_progress = try_to_free_pages(zonelist, order, gfp_mask, nodemask); + progress = try_to_free_pages(zonelist, order, gfp_mask, nodemask); current->reclaim_state = NULL; lockdep_clear_current_reclaim_state(); @@ -2114,6 +2155,21 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, cond_resched(); + return progress; +} + +/* The really slow allocator path where we enter direct reclaim */ +static inline struct page * +__alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, + struct zonelist *zonelist, enum zone_type high_zoneidx, + nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone, + int migratetype, unsigned long *did_some_progress) +{ + struct page *page = NULL; + bool drained = false; + + *did_some_progress = __perform_reclaim(gfp_mask, order, zonelist, + nodemask); if (unlikely(!(*did_some_progress))) return NULL; @@ -4301,7 +4357,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, init_waitqueue_head(&pgdat->kswapd_wait); pgdat->kswapd_max_order = 0; pgdat_page_cgroup_init(pgdat); - + for (j = 0; j < MAX_NR_ZONES; j++) { struct zone *zone = pgdat->node_zones + j; unsigned long size, realsize, memmap_pages; @@ -4976,14 +5032,7 @@ static void setup_per_zone_lowmem_reserve(void) calculate_totalreserve_pages(); } -/** - * setup_per_zone_wmarks - called when min_free_kbytes changes - * or when memory is hot-{added|removed} - * - * Ensures that the watermark[min,low,high] values for each zone are set - * correctly with respect to min_free_kbytes. - */ -void setup_per_zone_wmarks(void) +static void __setup_per_zone_wmarks(void) { unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10); unsigned long lowmem_pages = 0; @@ -5030,6 +5079,11 @@ void setup_per_zone_wmarks(void) zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + (tmp >> 2); zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1); + + zone->watermark[WMARK_MIN] += cma_wmark_pages(zone); + zone->watermark[WMARK_LOW] += cma_wmark_pages(zone); + zone->watermark[WMARK_HIGH] += cma_wmark_pages(zone); + setup_zone_migrate_reserve(zone); spin_unlock_irqrestore(&zone->lock, flags); } @@ -5038,6 +5092,20 @@ void setup_per_zone_wmarks(void) calculate_totalreserve_pages(); } +/** + * setup_per_zone_wmarks - called when min_free_kbytes changes + * or when memory is hot-{added|removed} + * + * Ensures that the watermark[min,low,high] values for each zone are set + * correctly with respect to min_free_kbytes. + */ +void setup_per_zone_wmarks(void) +{ + mutex_lock(&zonelists_mutex); + __setup_per_zone_wmarks(); + mutex_unlock(&zonelists_mutex); +} + /* * The inactive anon list should be small enough that the VM never has to * do too much work, but large enough that each inactive page has a chance @@ -5415,14 +5483,16 @@ static int __count_immobile_pages(struct zone *zone, struct page *page, int count) { unsigned long pfn, iter, found; + int mt; + /* * For avoiding noise data, lru_add_drain_all() should be called * If ZONE_MOVABLE, the zone never contains immobile pages */ if (zone_idx(zone) == ZONE_MOVABLE) return true; - - if (get_pageblock_migratetype(page) == MIGRATE_MOVABLE) + mt = get_pageblock_migratetype(page); + if (mt == MIGRATE_MOVABLE || is_migrate_cma(mt)) return true; pfn = page_to_pfn(page); @@ -5539,7 +5609,7 @@ out: return ret; } -void unset_migratetype_isolate(struct page *page) +void unset_migratetype_isolate(struct page *page, unsigned migratetype) { struct zone *zone; unsigned long flags; @@ -5547,12 +5617,259 @@ void unset_migratetype_isolate(struct page *page) spin_lock_irqsave(&zone->lock, flags); if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE) goto out; - set_pageblock_migratetype(page, MIGRATE_MOVABLE); - move_freepages_block(zone, page, MIGRATE_MOVABLE); + set_pageblock_migratetype(page, migratetype); + move_freepages_block(zone, page, migratetype); out: spin_unlock_irqrestore(&zone->lock, flags); } +#ifdef CONFIG_CMA + +static unsigned long pfn_max_align_down(unsigned long pfn) +{ + return pfn & ~(max_t(unsigned long, MAX_ORDER_NR_PAGES, + pageblock_nr_pages) - 1); +} + +static unsigned long pfn_max_align_up(unsigned long pfn) +{ + return ALIGN(pfn, max_t(unsigned long, MAX_ORDER_NR_PAGES, + pageblock_nr_pages)); +} + +static struct page * +__alloc_contig_migrate_alloc(struct page *page, unsigned long private, + int **resultp) +{ + return alloc_page(GFP_HIGHUSER_MOVABLE); +} + +/* [start, end) must belong to a single zone. */ +static int __alloc_contig_migrate_range(unsigned long start, unsigned long end) +{ + /* This function is based on compact_zone() from compaction.c. */ + + unsigned long pfn = start; + unsigned int tries = 0; + int ret = 0; + + struct compact_control cc = { + .nr_migratepages = 0, + .order = -1, + .zone = page_zone(pfn_to_page(start)), + .sync = true, + }; + INIT_LIST_HEAD(&cc.migratepages); + + migrate_prep_local(); + + while (pfn < end || !list_empty(&cc.migratepages)) { + if (fatal_signal_pending(current)) { + ret = -EINTR; + break; + } + + if (list_empty(&cc.migratepages)) { + cc.nr_migratepages = 0; + pfn = isolate_migratepages_range(cc.zone, &cc, + pfn, end); + if (!pfn) { + ret = -EINTR; + break; + } + tries = 0; + } else if (++tries == 5) { + ret = ret < 0 ? ret : -EBUSY; + break; + } + + ret = migrate_pages(&cc.migratepages, + __alloc_contig_migrate_alloc, + 0, false, MIGRATE_SYNC); + } + + putback_lru_pages(&cc.migratepages); + return ret > 0 ? 0 : ret; +} + +/* + * Update zone's cma pages counter used for watermark level calculation. + */ +static inline void __update_cma_watermarks(struct zone *zone, int count) +{ + unsigned long flags; + spin_lock_irqsave(&zone->lock, flags); + zone->min_cma_pages += count; + spin_unlock_irqrestore(&zone->lock, flags); + setup_per_zone_wmarks(); +} + +/* + * Trigger memory pressure bump to reclaim some pages in order to be able to + * allocate 'count' pages in single page units. Does similar work as + *__alloc_pages_slowpath() function. + */ +static int __reclaim_pages(struct zone *zone, gfp_t gfp_mask, int count) +{ + enum zone_type high_zoneidx = gfp_zone(gfp_mask); + struct zonelist *zonelist = node_zonelist(0, gfp_mask); + int did_some_progress = 0; + int order = 1; + + /* + * Increase level of watermarks to force kswapd do his job + * to stabilise at new watermark level. + */ + __update_cma_watermarks(zone, count); + + /* Obey watermarks as if the page was being allocated */ + while (!zone_watermark_ok(zone, 0, low_wmark_pages(zone), 0, 0)) { + wake_all_kswapd(order, zonelist, high_zoneidx, zone_idx(zone)); + + did_some_progress = __perform_reclaim(gfp_mask, order, zonelist, + NULL); + if (!did_some_progress) { + /* Exhausted what can be done so it's blamo time */ + out_of_memory(zonelist, gfp_mask, order, NULL, false); + } + } + + /* Restore original watermark levels. */ + __update_cma_watermarks(zone, -count); + + return count; +} + +/** + * alloc_contig_range() -- tries to allocate given range of pages + * @start: start PFN to allocate + * @end: one-past-the-last PFN to allocate + * @migratetype: migratetype of the underlaying pageblocks (either + * #MIGRATE_MOVABLE or #MIGRATE_CMA). All pageblocks + * in range must have the same migratetype and it must + * be either of the two. + * + * The PFN range does not have to be pageblock or MAX_ORDER_NR_PAGES + * aligned, however it's the caller's responsibility to guarantee that + * we are the only thread that changes migrate type of pageblocks the + * pages fall in. + * + * The PFN range must belong to a single zone. + * + * Returns zero on success or negative error code. On success all + * pages which PFN is in [start, end) are allocated for the caller and + * need to be freed with free_contig_range(). + */ +int alloc_contig_range(unsigned long start, unsigned long end, + unsigned migratetype) +{ + struct zone *zone = page_zone(pfn_to_page(start)); + unsigned long outer_start, outer_end; + int ret = 0, order; + + /* + * What we do here is we mark all pageblocks in range as + * MIGRATE_ISOLATE. Because pageblock and max order pages may + * have different sizes, and due to the way page allocator + * work, we align the range to biggest of the two pages so + * that page allocator won't try to merge buddies from + * different pageblocks and change MIGRATE_ISOLATE to some + * other migration type. + * + * Once the pageblocks are marked as MIGRATE_ISOLATE, we + * migrate the pages from an unaligned range (ie. pages that + * we are interested in). This will put all the pages in + * range back to page allocator as MIGRATE_ISOLATE. + * + * When this is done, we take the pages in range from page + * allocator removing them from the buddy system. This way + * page allocator will never consider using them. + * + * This lets us mark the pageblocks back as + * MIGRATE_CMA/MIGRATE_MOVABLE so that free pages in the + * aligned range but not in the unaligned, original range are + * put back to page allocator so that buddy can use them. + */ + + ret = start_isolate_page_range(pfn_max_align_down(start), + pfn_max_align_up(end), migratetype); + if (ret) + goto done; + + ret = __alloc_contig_migrate_range(start, end); + if (ret) + goto done; + + /* + * Pages from [start, end) are within a MAX_ORDER_NR_PAGES + * aligned blocks that are marked as MIGRATE_ISOLATE. What's + * more, all pages in [start, end) are free in page allocator. + * What we are going to do is to allocate all pages from + * [start, end) (that is remove them from page allocator). + * + * The only problem is that pages at the beginning and at the + * end of interesting range may be not aligned with pages that + * page allocator holds, ie. they can be part of higher order + * pages. Because of this, we reserve the bigger range and + * once this is done free the pages we are not interested in. + * + * We don't have to hold zone->lock here because the pages are + * isolated thus they won't get removed from buddy. + */ + + lru_add_drain_all(); + drain_all_pages(); + + order = 0; + outer_start = start; + while (!PageBuddy(pfn_to_page(outer_start))) { + if (++order >= MAX_ORDER) { + ret = -EBUSY; + goto done; + } + outer_start &= ~0UL << order; + } + + /* Make sure the range is really isolated. */ + if (test_pages_isolated(outer_start, end)) { + pr_warn("alloc_contig_range test_pages_isolated(%lx, %lx) failed\n", + outer_start, end); + ret = -EBUSY; + goto done; + } + + /* + * Reclaim enough pages to make sure that contiguous allocation + * will not starve the system. + */ + __reclaim_pages(zone, GFP_HIGHUSER_MOVABLE, end-start); + + /* Grab isolated pages from freelists. */ + outer_end = isolate_freepages_range(outer_start, end); + if (!outer_end) { + ret = -EBUSY; + goto done; + } + + /* Free head and tail (if any) */ + if (start != outer_start) + free_contig_range(outer_start, start - outer_start); + if (end != outer_end) + free_contig_range(end, outer_end - end); + +done: + undo_isolate_page_range(pfn_max_align_down(start), + pfn_max_align_up(end), migratetype); + return ret; +} + +void free_contig_range(unsigned long pfn, unsigned nr_pages) +{ + for (; nr_pages--; ++pfn) + __free_page(pfn_to_page(pfn)); +} +#endif + #ifdef CONFIG_MEMORY_HOTREMOVE /* * All pages in the range must be isolated before calling this. diff --git a/mm/page_isolation.c b/mm/page_isolation.c index 4ae42bb40892..c9f04774f2b8 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -24,6 +24,7 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) * to be MIGRATE_ISOLATE. * @start_pfn: The lower PFN of the range to be isolated. * @end_pfn: The upper PFN of the range to be isolated. + * @migratetype: migrate type to set in error recovery. * * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in * the range will never be allocated. Any free pages and pages freed in the @@ -32,8 +33,8 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) * start_pfn/end_pfn must be aligned to pageblock_order. * Returns 0 on success and -EBUSY if any part of range cannot be isolated. */ -int -start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn) +int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, + unsigned migratetype) { unsigned long pfn; unsigned long undo_pfn; @@ -56,7 +57,7 @@ undo: for (pfn = start_pfn; pfn < undo_pfn; pfn += pageblock_nr_pages) - unset_migratetype_isolate(pfn_to_page(pfn)); + unset_migratetype_isolate(pfn_to_page(pfn), migratetype); return -EBUSY; } @@ -64,8 +65,8 @@ undo: /* * Make isolated pages available again. */ -int -undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn) +int undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, + unsigned migratetype) { unsigned long pfn; struct page *page; @@ -77,7 +78,7 @@ undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn) page = __first_valid_page(pfn, pageblock_nr_pages); if (!page || get_pageblock_migratetype(page) != MIGRATE_ISOLATE) continue; - unset_migratetype_isolate(page); + unset_migratetype_isolate(page, migratetype); } return 0; } @@ -86,7 +87,7 @@ undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn) * all pages in [start_pfn...end_pfn) must be in the same zone. * zone->lock must be held before call this. * - * Returns 1 if all pages in the range is isolated. + * Returns 1 if all pages in the range are isolated. */ static int __test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn) diff --git a/mm/vmstat.c b/mm/vmstat.c index 7db1b9bab492..0dad31dc1618 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -613,6 +613,9 @@ static char * const migratetype_names[MIGRATE_TYPES] = { "Reclaimable", "Movable", "Reserve", +#ifdef CONFIG_CMA + "CMA", +#endif "Isolate", }; |