From 1f6a6cc82ed305c09385753c80bb7b3bc9eea864 Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Wed, 6 Aug 2014 16:05:11 -0700 Subject: mem-hotplug: avoid illegal state prefixed with legal state when changing state of memory_block We use the following command to online a memory_block: echo online|online_kernel|online_movable > /sys/devices/system/memory/memoryXXX/state But, if we do the following: echo online_fhsjkghfkd > /sys/devices/system/memory/memoryXXX/state the block will also be onlined. This is because the following code in store_mem_state() does not compare the whole string, but only the prefix of the string. store_mem_state() { ...... 328 if (!strncmp(buf, "online_kernel", min_t(int, count, 13))) Here, only compare the first 13 letters of the string. If we give "online_kernelXXXXXX", it will be recognized as online_kernel, which is incorrect. 329 online_type = ONLINE_KERNEL; 330 else if (!strncmp(buf, "online_movable", min_t(int, count, 14))) We have the same problem here, 331 online_type = ONLINE_MOVABLE; 332 else if (!strncmp(buf, "online", min_t(int, count, 6))) here, (Here is more problematic. If we give online_movalbe, which is a typo of online_movable, it will be recognized as online without noticing the author.) 333 online_type = ONLINE_KEEP; 334 else if (!strncmp(buf, "offline", min_t(int, count, 7))) and here. 335 online_type = -1; 336 else { 337 ret = -EINVAL; 338 goto err; 339 } ...... } This patch fixes this problem by using sysfs_streq() to compare the whole string. Signed-off-by: Tang Chen Reported-by: Hu Tao Cc: Greg Kroah-Hartman Cc: Lai Jiangshan Cc: Yasuaki Ishimatsu Cc: Gu Zheng Acked-by: Toshi Kani Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/memory.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 89f752dd8465..c6707dfb5284 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -315,13 +315,13 @@ store_mem_state(struct device *dev, if (ret) return ret; - if (!strncmp(buf, "online_kernel", min_t(int, count, 13))) + if (sysfs_streq(buf, "online_kernel")) online_type = ONLINE_KERNEL; - else if (!strncmp(buf, "online_movable", min_t(int, count, 14))) + else if (sysfs_streq(buf, "online_movable")) online_type = ONLINE_MOVABLE; - else if (!strncmp(buf, "online", min_t(int, count, 6))) + else if (sysfs_streq(buf, "online")) online_type = ONLINE_KEEP; - else if (!strncmp(buf, "offline", min_t(int, count, 7))) + else if (sysfs_streq(buf, "offline")) online_type = -1; else { ret = -EINVAL; -- cgit v1.2.1 From 4f7c6b49c45a398d72763d1f0e64ddff8b3653c7 Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Wed, 6 Aug 2014 16:05:13 -0700 Subject: mem-hotplug: introduce MMOP_OFFLINE to replace the hard coding -1 In store_mem_state(), we have: ... 334 else if (!strncmp(buf, "offline", min_t(int, count, 7))) 335 online_type = -1; ... 355 case -1: 356 ret = device_offline(&mem->dev); 357 break; ... Here, "offline" is hard coded as -1. This patch does the following renaming: ONLINE_KEEP -> MMOP_ONLINE_KEEP ONLINE_KERNEL -> MMOP_ONLINE_KERNEL ONLINE_MOVABLE -> MMOP_ONLINE_MOVABLE and introduces MMOP_OFFLINE = -1 to avoid hard coding. Signed-off-by: Tang Chen Cc: Hu Tao Cc: Greg Kroah-Hartman Cc: Lai Jiangshan Cc: Yasuaki Ishimatsu Cc: Gu Zheng Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/memory.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/base/memory.c b/drivers/base/memory.c index c6707dfb5284..7c60ed27e711 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -284,7 +284,7 @@ static int memory_subsys_online(struct device *dev) * attribute and need to set the online_type. */ if (mem->online_type < 0) - mem->online_type = ONLINE_KEEP; + mem->online_type = MMOP_ONLINE_KEEP; ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE); @@ -316,22 +316,22 @@ store_mem_state(struct device *dev, return ret; if (sysfs_streq(buf, "online_kernel")) - online_type = ONLINE_KERNEL; + online_type = MMOP_ONLINE_KERNEL; else if (sysfs_streq(buf, "online_movable")) - online_type = ONLINE_MOVABLE; + online_type = MMOP_ONLINE_MOVABLE; else if (sysfs_streq(buf, "online")) - online_type = ONLINE_KEEP; + online_type = MMOP_ONLINE_KEEP; else if (sysfs_streq(buf, "offline")) - online_type = -1; + online_type = MMOP_OFFLINE; else { ret = -EINVAL; goto err; } switch (online_type) { - case ONLINE_KERNEL: - case ONLINE_MOVABLE: - case ONLINE_KEEP: + case MMOP_ONLINE_KERNEL: + case MMOP_ONLINE_MOVABLE: + case MMOP_ONLINE_KEEP: /* * mem->online_type is not protected so there can be a * race here. However, when racing online, the first @@ -342,7 +342,7 @@ store_mem_state(struct device *dev, mem->online_type = online_type; ret = device_online(&mem->dev); break; - case -1: + case MMOP_OFFLINE: ret = device_offline(&mem->dev); break; default: -- cgit v1.2.1 From 3162bbd7e65b9cc57b660796dd3409807bfc9070 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Wed, 6 Aug 2014 16:05:19 -0700 Subject: DMA, CMA: separate core CMA management codes from DMA APIs To prepare future generalization work on CMA area management code, we need to separate core CMA management codes from DMA APIs. We will extend these core functions to cover requirements of PPC KVM's CMA area management functionality in following patches. This separation helps us not to touch DMA APIs while extending core functions. Signed-off-by: Joonsoo Kim Acked-by: Michal Nazarewicz Reviewed-by: Aneesh Kumar K.V Cc: Alexander Graf Cc: Aneesh Kumar K.V Cc: Gleb Natapov Cc: Minchan Kim Cc: Paolo Bonzini Cc: Zhang Yanfei Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Acked-by: Marek Szyprowski Tested-by: Marek Szyprowski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/dma-contiguous.c | 125 ++++++++++++++++++++++++++---------------- 1 file changed, 77 insertions(+), 48 deletions(-) (limited to 'drivers') diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c index 6467c919c509..9021762227a7 100644 --- a/drivers/base/dma-contiguous.c +++ b/drivers/base/dma-contiguous.c @@ -213,26 +213,9 @@ static int __init cma_init_reserved_areas(void) } core_initcall(cma_init_reserved_areas); -/** - * dma_contiguous_reserve_area() - reserve custom contiguous area - * @size: Size of the reserved area (in bytes), - * @base: Base address of the reserved area optional, use 0 for any - * @limit: End address of the reserved memory (optional, 0 for any). - * @res_cma: Pointer to store the created cma region. - * @fixed: hint about where to place the reserved area - * - * This function reserves memory from early allocator. It should be - * called by arch specific code once the early allocator (memblock or bootmem) - * has been activated and all other subsystems have already allocated/reserved - * memory. This function allows to create custom reserved areas for specific - * devices. - * - * If @fixed is true, reserve contiguous area at exactly @base. If false, - * reserve in range from @base to @limit. - */ -int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, - phys_addr_t limit, struct cma **res_cma, - bool fixed) +static int __init __dma_contiguous_reserve_area(phys_addr_t size, + phys_addr_t base, phys_addr_t limit, + struct cma **res_cma, bool fixed) { struct cma *cma = &cma_areas[cma_area_count]; phys_addr_t alignment; @@ -286,15 +269,47 @@ int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, pr_info("CMA: reserved %ld MiB at %08lx\n", (unsigned long)size / SZ_1M, (unsigned long)base); - - /* Architecture specific contiguous memory fixup. */ - dma_contiguous_early_fixup(base, size); return 0; + err: pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M); return ret; } +/** + * dma_contiguous_reserve_area() - reserve custom contiguous area + * @size: Size of the reserved area (in bytes), + * @base: Base address of the reserved area optional, use 0 for any + * @limit: End address of the reserved memory (optional, 0 for any). + * @res_cma: Pointer to store the created cma region. + * @fixed: hint about where to place the reserved area + * + * This function reserves memory from early allocator. It should be + * called by arch specific code once the early allocator (memblock or bootmem) + * has been activated and all other subsystems have already allocated/reserved + * memory. This function allows to create custom reserved areas for specific + * devices. + * + * If @fixed is true, reserve contiguous area at exactly @base. If false, + * reserve in range from @base to @limit. + */ +int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, + phys_addr_t limit, struct cma **res_cma, + bool fixed) +{ + int ret; + + ret = __dma_contiguous_reserve_area(size, base, limit, res_cma, fixed); + if (ret) + return ret; + + /* Architecture specific contiguous memory fixup. */ + dma_contiguous_early_fixup(PFN_PHYS((*res_cma)->base_pfn), + (*res_cma)->count << PAGE_SHIFT); + + return 0; +} + static void clear_cma_bitmap(struct cma *cma, unsigned long pfn, int count) { mutex_lock(&cma->lock); @@ -302,31 +317,16 @@ static void clear_cma_bitmap(struct cma *cma, unsigned long pfn, int count) mutex_unlock(&cma->lock); } -/** - * dma_alloc_from_contiguous() - allocate pages from contiguous area - * @dev: Pointer to device for which the allocation is performed. - * @count: Requested number of pages. - * @align: Requested alignment of pages (in PAGE_SIZE order). - * - * This function allocates memory buffer for specified device. It uses - * device specific contiguous memory area if available or the default - * global one. Requires architecture specific dev_get_cma_area() helper - * function. - */ -struct page *dma_alloc_from_contiguous(struct device *dev, int count, +static struct page *__dma_alloc_from_contiguous(struct cma *cma, int count, unsigned int align) { unsigned long mask, pfn, pageno, start = 0; - struct cma *cma = dev_get_cma_area(dev); struct page *page = NULL; int ret; if (!cma || !cma->count) return NULL; - if (align > CONFIG_CMA_ALIGNMENT) - align = CONFIG_CMA_ALIGNMENT; - pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma, count, align); @@ -375,19 +375,30 @@ struct page *dma_alloc_from_contiguous(struct device *dev, int count, } /** - * dma_release_from_contiguous() - release allocated pages - * @dev: Pointer to device for which the pages were allocated. - * @pages: Allocated pages. - * @count: Number of allocated pages. + * dma_alloc_from_contiguous() - allocate pages from contiguous area + * @dev: Pointer to device for which the allocation is performed. + * @count: Requested number of pages. + * @align: Requested alignment of pages (in PAGE_SIZE order). * - * This function releases memory allocated by dma_alloc_from_contiguous(). - * It returns false when provided pages do not belong to contiguous area and - * true otherwise. + * This function allocates memory buffer for specified device. It uses + * device specific contiguous memory area if available or the default + * global one. Requires architecture specific dev_get_cma_area() helper + * function. */ -bool dma_release_from_contiguous(struct device *dev, struct page *pages, - int count) +struct page *dma_alloc_from_contiguous(struct device *dev, int count, + unsigned int align) { struct cma *cma = dev_get_cma_area(dev); + + if (align > CONFIG_CMA_ALIGNMENT) + align = CONFIG_CMA_ALIGNMENT; + + return __dma_alloc_from_contiguous(cma, count, align); +} + +static bool __dma_release_from_contiguous(struct cma *cma, struct page *pages, + int count) +{ unsigned long pfn; if (!cma || !pages) @@ -407,3 +418,21 @@ bool dma_release_from_contiguous(struct device *dev, struct page *pages, return true; } + +/** + * dma_release_from_contiguous() - release allocated pages + * @dev: Pointer to device for which the pages were allocated. + * @pages: Allocated pages. + * @count: Number of allocated pages. + * + * This function releases memory allocated by dma_alloc_from_contiguous(). + * It returns false when provided pages do not belong to contiguous area and + * true otherwise. + */ +bool dma_release_from_contiguous(struct device *dev, struct page *pages, + int count) +{ + struct cma *cma = dev_get_cma_area(dev); + + return __dma_release_from_contiguous(cma, pages, count); +} -- cgit v1.2.1 From a15bc0b89e8812d0db297bc771a85812c4fa83c1 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Wed, 6 Aug 2014 16:05:21 -0700 Subject: DMA, CMA: support alignment constraint on CMA region PPC KVM's CMA area management needs alignment constraint on CMA region. So support it to prepare generalization of CMA area management functionality. Additionally, add some comments which tell us why alignment constraint is needed on CMA region. Signed-off-by: Joonsoo Kim Acked-by: Michal Nazarewicz Reviewed-by: Aneesh Kumar K.V Cc: Alexander Graf Cc: Aneesh Kumar K.V Cc: Gleb Natapov Acked-by: Marek Szyprowski Tested-by: Marek Szyprowski Cc: Minchan Kim Cc: Paolo Bonzini Cc: Zhang Yanfei Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/dma-contiguous.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c index 9021762227a7..5f62c284072c 100644 --- a/drivers/base/dma-contiguous.c +++ b/drivers/base/dma-contiguous.c @@ -32,6 +32,7 @@ #include #include #include +#include struct cma { unsigned long base_pfn; @@ -215,17 +216,16 @@ core_initcall(cma_init_reserved_areas); static int __init __dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, phys_addr_t limit, + phys_addr_t alignment, struct cma **res_cma, bool fixed) { struct cma *cma = &cma_areas[cma_area_count]; - phys_addr_t alignment; int ret = 0; - pr_debug("%s(size %lx, base %08lx, limit %08lx)\n", __func__, - (unsigned long)size, (unsigned long)base, - (unsigned long)limit); + pr_debug("%s(size %lx, base %08lx, limit %08lx alignment %08lx)\n", + __func__, (unsigned long)size, (unsigned long)base, + (unsigned long)limit, (unsigned long)alignment); - /* Sanity checks */ if (cma_area_count == ARRAY_SIZE(cma_areas)) { pr_err("Not enough slots for CMA reserved regions!\n"); return -ENOSPC; @@ -234,8 +234,17 @@ static int __init __dma_contiguous_reserve_area(phys_addr_t size, if (!size) return -EINVAL; - /* Sanitise input arguments */ - alignment = PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order); + if (alignment && !is_power_of_2(alignment)) + return -EINVAL; + + /* + * Sanitise input arguments. + * Pages both ends in CMA area could be merged into adjacent unmovable + * migratetype page by page allocator's buddy algorithm. In the case, + * you couldn't get a contiguous memory, which is not what we want. + */ + alignment = max(alignment, + (phys_addr_t)PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order)); base = ALIGN(base, alignment); size = ALIGN(size, alignment); limit &= ~(alignment - 1); @@ -299,7 +308,8 @@ int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, { int ret; - ret = __dma_contiguous_reserve_area(size, base, limit, res_cma, fixed); + ret = __dma_contiguous_reserve_area(size, base, limit, 0, + res_cma, fixed); if (ret) return ret; -- cgit v1.2.1 From e0bdb37d95dd44086159607e571fd70f6b62dc2d Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Wed, 6 Aug 2014 16:05:23 -0700 Subject: DMA, CMA: support arbitrary bitmap granularity PPC KVM's CMA area management requires arbitrary bitmap granularity, since they want to reserve very large memory and manage this region with bitmap that one bit for several pages to reduce management overheads. So support arbitrary bitmap granularity for following generalization. [akpm@linux-foundation.org: s/1/1UL/] Signed-off-by: Joonsoo Kim Acked-by: Michal Nazarewicz Acked-by: Zhang Yanfei Acked-by: Minchan Kim Reviewed-by: Aneesh Kumar K.V Cc: Alexander Graf Cc: Aneesh Kumar K.V Cc: Gleb Natapov Acked-by: Marek Szyprowski Tested-by: Marek Szyprowski Cc: Paolo Bonzini Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/dma-contiguous.c | 77 +++++++++++++++++++++++++++++-------------- 1 file changed, 53 insertions(+), 24 deletions(-) (limited to 'drivers') diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c index 5f62c284072c..ad8a85bf852f 100644 --- a/drivers/base/dma-contiguous.c +++ b/drivers/base/dma-contiguous.c @@ -38,6 +38,7 @@ struct cma { unsigned long base_pfn; unsigned long count; unsigned long *bitmap; + unsigned int order_per_bit; /* Order of pages represented by one bit */ struct mutex lock; }; @@ -157,9 +158,37 @@ void __init dma_contiguous_reserve(phys_addr_t limit) static DEFINE_MUTEX(cma_mutex); +static unsigned long cma_bitmap_aligned_mask(struct cma *cma, int align_order) +{ + return (1UL << (align_order >> cma->order_per_bit)) - 1; +} + +static unsigned long cma_bitmap_maxno(struct cma *cma) +{ + return cma->count >> cma->order_per_bit; +} + +static unsigned long cma_bitmap_pages_to_bits(struct cma *cma, + unsigned long pages) +{ + return ALIGN(pages, 1UL << cma->order_per_bit) >> cma->order_per_bit; +} + +static void cma_clear_bitmap(struct cma *cma, unsigned long pfn, int count) +{ + unsigned long bitmap_no, bitmap_count; + + bitmap_no = (pfn - cma->base_pfn) >> cma->order_per_bit; + bitmap_count = cma_bitmap_pages_to_bits(cma, count); + + mutex_lock(&cma->lock); + bitmap_clear(cma->bitmap, bitmap_no, bitmap_count); + mutex_unlock(&cma->lock); +} + static int __init cma_activate_area(struct cma *cma) { - int bitmap_size = BITS_TO_LONGS(cma->count) * sizeof(long); + int bitmap_size = BITS_TO_LONGS(cma_bitmap_maxno(cma)) * sizeof(long); unsigned long base_pfn = cma->base_pfn, pfn = base_pfn; unsigned i = cma->count >> pageblock_order; struct zone *zone; @@ -215,9 +244,9 @@ static int __init cma_init_reserved_areas(void) core_initcall(cma_init_reserved_areas); static int __init __dma_contiguous_reserve_area(phys_addr_t size, - phys_addr_t base, phys_addr_t limit, - phys_addr_t alignment, - struct cma **res_cma, bool fixed) + phys_addr_t base, phys_addr_t limit, + phys_addr_t alignment, unsigned int order_per_bit, + struct cma **res_cma, bool fixed) { struct cma *cma = &cma_areas[cma_area_count]; int ret = 0; @@ -249,6 +278,10 @@ static int __init __dma_contiguous_reserve_area(phys_addr_t size, size = ALIGN(size, alignment); limit &= ~(alignment - 1); + /* size should be aligned with order_per_bit */ + if (!IS_ALIGNED(size >> PAGE_SHIFT, 1 << order_per_bit)) + return -EINVAL; + /* Reserve memory */ if (base && fixed) { if (memblock_is_region_reserved(base, size) || @@ -273,6 +306,7 @@ static int __init __dma_contiguous_reserve_area(phys_addr_t size, */ cma->base_pfn = PFN_DOWN(base); cma->count = size >> PAGE_SHIFT; + cma->order_per_bit = order_per_bit; *res_cma = cma; cma_area_count++; @@ -308,7 +342,7 @@ int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, { int ret; - ret = __dma_contiguous_reserve_area(size, base, limit, 0, + ret = __dma_contiguous_reserve_area(size, base, limit, 0, 0, res_cma, fixed); if (ret) return ret; @@ -320,17 +354,11 @@ int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, return 0; } -static void clear_cma_bitmap(struct cma *cma, unsigned long pfn, int count) -{ - mutex_lock(&cma->lock); - bitmap_clear(cma->bitmap, pfn - cma->base_pfn, count); - mutex_unlock(&cma->lock); -} - static struct page *__dma_alloc_from_contiguous(struct cma *cma, int count, unsigned int align) { - unsigned long mask, pfn, pageno, start = 0; + unsigned long mask, pfn, start = 0; + unsigned long bitmap_maxno, bitmap_no, bitmap_count; struct page *page = NULL; int ret; @@ -343,18 +371,19 @@ static struct page *__dma_alloc_from_contiguous(struct cma *cma, int count, if (!count) return NULL; - mask = (1 << align) - 1; - + mask = cma_bitmap_aligned_mask(cma, align); + bitmap_maxno = cma_bitmap_maxno(cma); + bitmap_count = cma_bitmap_pages_to_bits(cma, count); for (;;) { mutex_lock(&cma->lock); - pageno = bitmap_find_next_zero_area(cma->bitmap, cma->count, - start, count, mask); - if (pageno >= cma->count) { + bitmap_no = bitmap_find_next_zero_area(cma->bitmap, + bitmap_maxno, start, bitmap_count, mask); + if (bitmap_no >= bitmap_maxno) { mutex_unlock(&cma->lock); break; } - bitmap_set(cma->bitmap, pageno, count); + bitmap_set(cma->bitmap, bitmap_no, bitmap_count); /* * It's safe to drop the lock here. We've marked this region for * our exclusive use. If the migration fails we will take the @@ -362,7 +391,7 @@ static struct page *__dma_alloc_from_contiguous(struct cma *cma, int count, */ mutex_unlock(&cma->lock); - pfn = cma->base_pfn + pageno; + pfn = cma->base_pfn + (bitmap_no << cma->order_per_bit); mutex_lock(&cma_mutex); ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA); mutex_unlock(&cma_mutex); @@ -370,14 +399,14 @@ static struct page *__dma_alloc_from_contiguous(struct cma *cma, int count, page = pfn_to_page(pfn); break; } else if (ret != -EBUSY) { - clear_cma_bitmap(cma, pfn, count); + cma_clear_bitmap(cma, pfn, count); break; } - clear_cma_bitmap(cma, pfn, count); + cma_clear_bitmap(cma, pfn, count); pr_debug("%s(): memory range at %p is busy, retrying\n", __func__, pfn_to_page(pfn)); /* try again with a bit different memory target */ - start = pageno + mask + 1; + start = bitmap_no + mask + 1; } pr_debug("%s(): returned %p\n", __func__, page); @@ -424,7 +453,7 @@ static bool __dma_release_from_contiguous(struct cma *cma, struct page *pages, VM_BUG_ON(pfn + count > cma->base_pfn + cma->count); free_contig_range(pfn, count); - clear_cma_bitmap(cma, pfn, count); + cma_clear_bitmap(cma, pfn, count); return true; } -- cgit v1.2.1 From a254129e8686bff7a340b58f35241b04927e81c0 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Wed, 6 Aug 2014 16:05:25 -0700 Subject: CMA: generalize CMA reserved area management functionality Currently, there are two users on CMA functionality, one is the DMA subsystem and the other is the KVM on powerpc. They have their own code to manage CMA reserved area even if they looks really similar. From my guess, it is caused by some needs on bitmap management. KVM side wants to maintain bitmap not for 1 page, but for more size. Eventually it use bitmap where one bit represents 64 pages. When I implement CMA related patches, I should change those two places to apply my change and it seem to be painful to me. I want to change this situation and reduce future code management overhead through this patch. This change could also help developer who want to use CMA in their new feature development, since they can use CMA easily without copying & pasting this reserved area management code. In previous patches, we have prepared some features to generalize CMA reserved area management and now it's time to do it. This patch moves core functions to mm/cma.c and change DMA APIs to use these functions. There is no functional change in DMA APIs. Signed-off-by: Joonsoo Kim Acked-by: Michal Nazarewicz Acked-by: Zhang Yanfei Acked-by: Minchan Kim Reviewed-by: Aneesh Kumar K.V Cc: Alexander Graf Cc: Aneesh Kumar K.V Cc: Gleb Natapov Acked-by: Marek Szyprowski Tested-by: Marek Szyprowski Cc: Paolo Bonzini Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/Kconfig | 10 -- drivers/base/dma-contiguous.c | 280 ++---------------------------------------- 2 files changed, 8 insertions(+), 282 deletions(-) (limited to 'drivers') diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig index 88500fed3c7a..4e7f0ff83ae7 100644 --- a/drivers/base/Kconfig +++ b/drivers/base/Kconfig @@ -289,16 +289,6 @@ config CMA_ALIGNMENT If unsure, leave the default value "8". -config CMA_AREAS - int "Maximum count of the CMA device-private areas" - default 7 - help - CMA allows to create CMA areas for particular devices. This parameter - sets the maximum number of such device private CMA areas in the - system. - - If unsure, leave the default value "7". - endif endmenu diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c index ad8a85bf852f..0411c1c57005 100644 --- a/drivers/base/dma-contiguous.c +++ b/drivers/base/dma-contiguous.c @@ -24,25 +24,9 @@ #include #include -#include -#include -#include #include -#include -#include -#include #include -#include - -struct cma { - unsigned long base_pfn; - unsigned long count; - unsigned long *bitmap; - unsigned int order_per_bit; /* Order of pages represented by one bit */ - struct mutex lock; -}; - -struct cma *dma_contiguous_default_area; +#include #ifdef CONFIG_CMA_SIZE_MBYTES #define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES @@ -50,6 +34,8 @@ struct cma *dma_contiguous_default_area; #define CMA_SIZE_MBYTES 0 #endif +struct cma *dma_contiguous_default_area; + /* * Default global CMA area size can be defined in kernel's .config. * This is useful mainly for distro maintainers to create a kernel @@ -156,169 +142,6 @@ void __init dma_contiguous_reserve(phys_addr_t limit) } } -static DEFINE_MUTEX(cma_mutex); - -static unsigned long cma_bitmap_aligned_mask(struct cma *cma, int align_order) -{ - return (1UL << (align_order >> cma->order_per_bit)) - 1; -} - -static unsigned long cma_bitmap_maxno(struct cma *cma) -{ - return cma->count >> cma->order_per_bit; -} - -static unsigned long cma_bitmap_pages_to_bits(struct cma *cma, - unsigned long pages) -{ - return ALIGN(pages, 1UL << cma->order_per_bit) >> cma->order_per_bit; -} - -static void cma_clear_bitmap(struct cma *cma, unsigned long pfn, int count) -{ - unsigned long bitmap_no, bitmap_count; - - bitmap_no = (pfn - cma->base_pfn) >> cma->order_per_bit; - bitmap_count = cma_bitmap_pages_to_bits(cma, count); - - mutex_lock(&cma->lock); - bitmap_clear(cma->bitmap, bitmap_no, bitmap_count); - mutex_unlock(&cma->lock); -} - -static int __init cma_activate_area(struct cma *cma) -{ - int bitmap_size = BITS_TO_LONGS(cma_bitmap_maxno(cma)) * sizeof(long); - unsigned long base_pfn = cma->base_pfn, pfn = base_pfn; - unsigned i = cma->count >> pageblock_order; - struct zone *zone; - - cma->bitmap = kzalloc(bitmap_size, GFP_KERNEL); - - if (!cma->bitmap) - return -ENOMEM; - - WARN_ON_ONCE(!pfn_valid(pfn)); - zone = page_zone(pfn_to_page(pfn)); - - do { - unsigned j; - base_pfn = pfn; - for (j = pageblock_nr_pages; j; --j, pfn++) { - WARN_ON_ONCE(!pfn_valid(pfn)); - /* - * alloc_contig_range requires the pfn range - * specified to be in the same zone. Make this - * simple by forcing the entire CMA resv range - * to be in the same zone. - */ - if (page_zone(pfn_to_page(pfn)) != zone) - goto err; - } - init_cma_reserved_pageblock(pfn_to_page(base_pfn)); - } while (--i); - - mutex_init(&cma->lock); - return 0; - -err: - kfree(cma->bitmap); - return -EINVAL; -} - -static struct cma cma_areas[MAX_CMA_AREAS]; -static unsigned cma_area_count; - -static int __init cma_init_reserved_areas(void) -{ - int i; - - for (i = 0; i < cma_area_count; i++) { - int ret = cma_activate_area(&cma_areas[i]); - if (ret) - return ret; - } - - return 0; -} -core_initcall(cma_init_reserved_areas); - -static int __init __dma_contiguous_reserve_area(phys_addr_t size, - phys_addr_t base, phys_addr_t limit, - phys_addr_t alignment, unsigned int order_per_bit, - struct cma **res_cma, bool fixed) -{ - struct cma *cma = &cma_areas[cma_area_count]; - int ret = 0; - - pr_debug("%s(size %lx, base %08lx, limit %08lx alignment %08lx)\n", - __func__, (unsigned long)size, (unsigned long)base, - (unsigned long)limit, (unsigned long)alignment); - - if (cma_area_count == ARRAY_SIZE(cma_areas)) { - pr_err("Not enough slots for CMA reserved regions!\n"); - return -ENOSPC; - } - - if (!size) - return -EINVAL; - - if (alignment && !is_power_of_2(alignment)) - return -EINVAL; - - /* - * Sanitise input arguments. - * Pages both ends in CMA area could be merged into adjacent unmovable - * migratetype page by page allocator's buddy algorithm. In the case, - * you couldn't get a contiguous memory, which is not what we want. - */ - alignment = max(alignment, - (phys_addr_t)PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order)); - base = ALIGN(base, alignment); - size = ALIGN(size, alignment); - limit &= ~(alignment - 1); - - /* size should be aligned with order_per_bit */ - if (!IS_ALIGNED(size >> PAGE_SHIFT, 1 << order_per_bit)) - return -EINVAL; - - /* Reserve memory */ - if (base && fixed) { - if (memblock_is_region_reserved(base, size) || - memblock_reserve(base, size) < 0) { - ret = -EBUSY; - goto err; - } - } else { - phys_addr_t addr = memblock_alloc_range(size, alignment, base, - limit); - if (!addr) { - ret = -ENOMEM; - goto err; - } else { - base = addr; - } - } - - /* - * Each reserved area must be initialised later, when more kernel - * subsystems (like slab allocator) are available. - */ - cma->base_pfn = PFN_DOWN(base); - cma->count = size >> PAGE_SHIFT; - cma->order_per_bit = order_per_bit; - *res_cma = cma; - cma_area_count++; - - pr_info("CMA: reserved %ld MiB at %08lx\n", (unsigned long)size / SZ_1M, - (unsigned long)base); - return 0; - -err: - pr_err("CMA: failed to reserve %ld MiB\n", (unsigned long)size / SZ_1M); - return ret; -} - /** * dma_contiguous_reserve_area() - reserve custom contiguous area * @size: Size of the reserved area (in bytes), @@ -342,77 +165,17 @@ int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, { int ret; - ret = __dma_contiguous_reserve_area(size, base, limit, 0, 0, - res_cma, fixed); + ret = cma_declare_contiguous(size, base, limit, 0, 0, res_cma, fixed); if (ret) return ret; /* Architecture specific contiguous memory fixup. */ - dma_contiguous_early_fixup(PFN_PHYS((*res_cma)->base_pfn), - (*res_cma)->count << PAGE_SHIFT); + dma_contiguous_early_fixup(cma_get_base(*res_cma), + cma_get_size(*res_cma)); return 0; } -static struct page *__dma_alloc_from_contiguous(struct cma *cma, int count, - unsigned int align) -{ - unsigned long mask, pfn, start = 0; - unsigned long bitmap_maxno, bitmap_no, bitmap_count; - struct page *page = NULL; - int ret; - - if (!cma || !cma->count) - return NULL; - - pr_debug("%s(cma %p, count %d, align %d)\n", __func__, (void *)cma, - count, align); - - if (!count) - return NULL; - - mask = cma_bitmap_aligned_mask(cma, align); - bitmap_maxno = cma_bitmap_maxno(cma); - bitmap_count = cma_bitmap_pages_to_bits(cma, count); - - for (;;) { - mutex_lock(&cma->lock); - bitmap_no = bitmap_find_next_zero_area(cma->bitmap, - bitmap_maxno, start, bitmap_count, mask); - if (bitmap_no >= bitmap_maxno) { - mutex_unlock(&cma->lock); - break; - } - bitmap_set(cma->bitmap, bitmap_no, bitmap_count); - /* - * It's safe to drop the lock here. We've marked this region for - * our exclusive use. If the migration fails we will take the - * lock again and unmark it. - */ - mutex_unlock(&cma->lock); - - pfn = cma->base_pfn + (bitmap_no << cma->order_per_bit); - mutex_lock(&cma_mutex); - ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA); - mutex_unlock(&cma_mutex); - if (ret == 0) { - page = pfn_to_page(pfn); - break; - } else if (ret != -EBUSY) { - cma_clear_bitmap(cma, pfn, count); - break; - } - cma_clear_bitmap(cma, pfn, count); - pr_debug("%s(): memory range at %p is busy, retrying\n", - __func__, pfn_to_page(pfn)); - /* try again with a bit different memory target */ - start = bitmap_no + mask + 1; - } - - pr_debug("%s(): returned %p\n", __func__, page); - return page; -} - /** * dma_alloc_from_contiguous() - allocate pages from contiguous area * @dev: Pointer to device for which the allocation is performed. @@ -427,35 +190,10 @@ static struct page *__dma_alloc_from_contiguous(struct cma *cma, int count, struct page *dma_alloc_from_contiguous(struct device *dev, int count, unsigned int align) { - struct cma *cma = dev_get_cma_area(dev); - if (align > CONFIG_CMA_ALIGNMENT) align = CONFIG_CMA_ALIGNMENT; - return __dma_alloc_from_contiguous(cma, count, align); -} - -static bool __dma_release_from_contiguous(struct cma *cma, struct page *pages, - int count) -{ - unsigned long pfn; - - if (!cma || !pages) - return false; - - pr_debug("%s(page %p)\n", __func__, (void *)pages); - - pfn = page_to_pfn(pages); - - if (pfn < cma->base_pfn || pfn >= cma->base_pfn + cma->count) - return false; - - VM_BUG_ON(pfn + count > cma->base_pfn + cma->count); - - free_contig_range(pfn, count); - cma_clear_bitmap(cma, pfn, count); - - return true; + return cma_alloc(dev_get_cma_area(dev), count, align); } /** @@ -471,7 +209,5 @@ static bool __dma_release_from_contiguous(struct cma *cma, struct page *pages, bool dma_release_from_contiguous(struct device *dev, struct page *pages, int count) { - struct cma *cma = dev_get_cma_area(dev); - - return __dma_release_from_contiguous(cma, pages, count); + return cma_release(dev_get_cma_area(dev), pages, count); } -- cgit v1.2.1 From c1f733aaaf30a0068a3126d5aa9d5b4c25ba4c0c Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Wed, 6 Aug 2014 16:05:32 -0700 Subject: mm, CMA: change cma_declare_contiguous() to obey coding convention Conventionally, we put output param to the end of param list and put the 'base' ahead of 'size', but cma_declare_contiguous() doesn't look like that, so change it. Additionally, move down cma_areas reference code to the position where it is really needed. Signed-off-by: Joonsoo Kim Acked-by: Michal Nazarewicz Reviewed-by: Aneesh Kumar K.V Cc: Alexander Graf Cc: Aneesh Kumar K.V Cc: Gleb Natapov Acked-by: Marek Szyprowski Tested-by: Marek Szyprowski Cc: Minchan Kim Cc: Paolo Bonzini Cc: Zhang Yanfei Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/dma-contiguous.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c index 0411c1c57005..6606abdf880c 100644 --- a/drivers/base/dma-contiguous.c +++ b/drivers/base/dma-contiguous.c @@ -165,7 +165,7 @@ int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, { int ret; - ret = cma_declare_contiguous(size, base, limit, 0, 0, res_cma, fixed); + ret = cma_declare_contiguous(base, size, limit, 0, 0, fixed, res_cma); if (ret) return ret; -- cgit v1.2.1 From b69deb2b7e13f04da5c0684c7ce19e788736ab0d Mon Sep 17 00:00:00 2001 From: Zhang Zhen Date: Wed, 6 Aug 2014 16:06:06 -0700 Subject: mm/mem-hotplug: replace simple_strtoull() with kstrtoull() Use the newer and more pleasant kstrtoull() to replace simple_strtoull(), because simple_strtoull() is marked for obsoletion. Signed-off-by: Zhang Zhen Acked-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/memory.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 7c60ed27e711..a2e13e250bba 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -406,7 +406,9 @@ memory_probe_store(struct device *dev, struct device_attribute *attr, int i, ret; unsigned long pages_per_block = PAGES_PER_SECTION * sections_per_block; - phys_addr = simple_strtoull(buf, NULL, 0); + ret = kstrtoull(buf, 0, &phys_addr); + if (ret) + return ret; if (phys_addr & ((pages_per_block << PAGE_SHIFT) - 1)) return -EINVAL; -- cgit v1.2.1 From cc7452b6dca384400960d40090a98d0eb920ab22 Mon Sep 17 00:00:00 2001 From: Rafael Aquini Date: Wed, 6 Aug 2014 16:06:38 -0700 Subject: mm: export NR_SHMEM via sysinfo(2) / si_meminfo() interfaces Historically, we exported shared pages to userspace via sysinfo(2) sharedram and /proc/meminfo's "MemShared" fields. With the advent of tmpfs, from kernel v2.4 onward, that old way for accounting shared mem was deemed inaccurate and we started to export a hard-coded 0 for sysinfo.sharedram. Later on, during the 2.6 timeframe, "MemShared" got re-introduced to /proc/meminfo re-branded as "Shmem", but we're still reporting sysinfo.sharedmem as that old hard-coded zero, which makes the "shared memory" report inconsistent across interfaces. This patch leverages the addition of explicit accounting for pages used by shmem/tmpfs -- "4b02108 mm: oom analysis: add shmem vmstat" -- in order to make the users of sysinfo(2) and si_meminfo*() friends aware of that vmstat entry and make them report it consistently across the interfaces, as well to make sysinfo(2) returned data consistent with our current API documentation states. Signed-off-by: Rafael Aquini Acked-by: Rik van Riel Cc: Mel Gorman Cc: Johannes Weiner Cc: KOSAKI Motohiro Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/node.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/base/node.c b/drivers/base/node.c index 8f7ed9933a7c..c6d3ae05f1ca 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -126,7 +126,7 @@ static ssize_t node_read_meminfo(struct device *dev, nid, K(node_page_state(nid, NR_FILE_PAGES)), nid, K(node_page_state(nid, NR_FILE_MAPPED)), nid, K(node_page_state(nid, NR_ANON_PAGES)), - nid, K(node_page_state(nid, NR_SHMEM)), + nid, K(i.sharedram), nid, node_page_state(nid, NR_KERNEL_STACK) * THREAD_SIZE / 1024, nid, K(node_page_state(nid, NR_PAGETABLE)), -- cgit v1.2.1 From f6f8ed47353597dcb895eb4a15a28af657392e72 Mon Sep 17 00:00:00 2001 From: WANG Chao Date: Wed, 6 Aug 2014 16:06:58 -0700 Subject: mm/vmalloc.c: clean up map_vm_area third argument Currently map_vm_area() takes (struct page *** pages) as third argument, and after mapping, it moves (*pages) to point to (*pages + nr_mappped_pages). It looks like this kind of increment is useless to its caller these days. The callers don't care about the increments and actually they're trying to avoid this by passing another copy to map_vm_area(). The caller can always guarantee all the pages can be mapped into vm_area as specified in first argument and the caller only cares about whether map_vm_area() fails or not. This patch cleans up the pointer movement in map_vm_area() and updates its callers accordingly. Signed-off-by: WANG Chao Cc: Zhang Yanfei Acked-by: Greg Kroah-Hartman Cc: Minchan Kim Cc: Nitin Gupta Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/lguest/core.c | 7 ++----- drivers/staging/android/binder.c | 4 +--- 2 files changed, 3 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index 0bf1e4edf04d..6590558d1d31 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c @@ -42,7 +42,6 @@ DEFINE_MUTEX(lguest_lock); static __init int map_switcher(void) { int i, err; - struct page **pagep; /* * Map the Switcher in to high memory. @@ -110,11 +109,9 @@ static __init int map_switcher(void) * This code actually sets up the pages we've allocated to appear at * switcher_addr. map_vm_area() takes the vma we allocated above, the * kind of pages we're mapping (kernel pages), and a pointer to our - * array of struct pages. It increments that pointer, but we don't - * care. + * array of struct pages. */ - pagep = lg_switcher_pages; - err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, &pagep); + err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, lg_switcher_pages); if (err) { printk("lguest: map_vm_area failed: %i\n", err); goto free_vma; diff --git a/drivers/staging/android/binder.c b/drivers/staging/android/binder.c index 02b0379ae550..4f34dc0095b5 100644 --- a/drivers/staging/android/binder.c +++ b/drivers/staging/android/binder.c @@ -585,7 +585,6 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate, for (page_addr = start; page_addr < end; page_addr += PAGE_SIZE) { int ret; - struct page **page_array_ptr; page = &proc->pages[(page_addr - proc->buffer) / PAGE_SIZE]; @@ -598,8 +597,7 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate, } tmp_area.addr = page_addr; tmp_area.size = PAGE_SIZE + PAGE_SIZE /* guard page? */; - page_array_ptr = page; - ret = map_vm_area(&tmp_area, PAGE_KERNEL, &page_array_ptr); + ret = map_vm_area(&tmp_area, PAGE_KERNEL, page); if (ret) { pr_err("%d: binder_alloc_buf failed to map page at %p in kernel\n", proc->pid, page_addr); -- cgit v1.2.1 From 49c8b24d00b6cb06a9c3fb959a957319cc770d71 Mon Sep 17 00:00:00 2001 From: Yasuaki Ishimatsu Date: Wed, 6 Aug 2014 16:07:00 -0700 Subject: drivers/firmware/memmap.c: pass the correct argument to firmware_map_find_entry_bootmem() firmware_map_add_hotplug() calls firmware_map_find_entry_bootmem() to get free firmware_map_entry. But end arguments is not correct. So firmware_map_find_entry_bootmem() cannot not find firmware_map_entry. The patch passes the correct end argument to firmware_map_find_entry_bootmem(). Signed-off-by: Yasuaki Ishimatsu Cc: Santosh Shilimkar Cc: Toshi Kani Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/firmware/memmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c index 17cf96c45f2b..1815849f83cb 100644 --- a/drivers/firmware/memmap.c +++ b/drivers/firmware/memmap.c @@ -286,7 +286,7 @@ int __meminit firmware_map_add_hotplug(u64 start, u64 end, const char *type) { struct firmware_map_entry *entry; - entry = firmware_map_find_entry_bootmem(start, end, type); + entry = firmware_map_find_entry_bootmem(start, end - 1, type); if (!entry) { entry = kzalloc(sizeof(struct firmware_map_entry), GFP_ATOMIC); if (!entry) -- cgit v1.2.1 From f0093ede9b726ccb1876d43574f5b45c79940aca Mon Sep 17 00:00:00 2001 From: Yasuaki Ishimatsu Date: Wed, 6 Aug 2014 16:07:03 -0700 Subject: drivers/firmware/memmap.c: don't allocate firmware_map_entry of same memory range When limiting memory by mem= and ACPI DSDT table has PNP0C80, firmware_map_entrys of same memory range are allocated and memmap X sysfses which have same memory range are created as follows: # cat /sys/firmware/memmap/0/* 0x407ffffffff 0x40000000000 System RAM # cat /sys/firmware/memmap/33/* 0x407ffffffff 0x40000000000 System RAM # cat /sys/firmware/memmap/35/* 0x407ffffffff 0x40000000000 System RAM In this case, when hot-removing memory, kernel panic occurs, showing following call trace: BUG: unable to handle kernel paging request at 00000001003e000b IP: sysfs_open_file+0x46/0x2b0 PGD 203a89fe067 PUD 0 Oops: 0000 [#1] SMP ... Call Trace: do_dentry_open+0x1ef/0x2a0 finish_open+0x31/0x40 do_last+0x57c/0x1220 path_openat+0xc2/0x4c0 do_filp_open+0x4b/0xb0 do_sys_open+0xf3/0x1f0 SyS_open+0x1e/0x20 system_call_fastpath+0x16/0x1b The problem occurs as follows: When calling e820_reserve_resources(), firmware_map_entrys of all e820 memory map are allocated. And all firmware_map_entrys is added map_entries list as follows: map_entries -> +--- entry A --------+ -> ... | start 0x407ffffffff| | end 0x40000000000| | type System RAM | +--------------------+ After that, if ACPI DSDT table has PNP0C80 and the memory range is limited by mem=, the PNP0C80 is hot-added. Then firmware_map_entry of PNP0C80 is allocated and added map_entries list as follows: map_entries -> +--- entry A --------+ -> ... -> +--- entry B --------+ | start 0x407ffffffff| | start 0x407ffffffff| | end 0x40000000000| | end 0x40000000000| | type System RAM | | type System RAM | +--------------------+ +--------------------+ Then memmap 0 sysfs for entry B is created. After that, firmware_memmap_init() creates memmap sysfses of all firmware_map_entrys in map_entries list. As a result, memmap 33 sysfs for entry A and memmap 35 sysfs for entry B are created. But kobject of entry B has been used by memmap 0 sysfs. So when creating memmap 35 sysfs, the kobject is broken. If hot-removing memory, memmap 0 sysfs is destroyed and kobject of memmap 0 sysfs is freed. But the kobject can be accessed via memmap 35 sysfs. So when open memmap 35 sysfs, kernel panic occurs. This patch checks whether there is firmware_map_entry of same memory range in map_entries list and don't allocate firmware_map_entry of same memroy range. Signed-off-by: Yasuaki Ishimatsu Cc: Santosh Shilimkar Cc: Toshi Kani Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/firmware/memmap.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers') diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c index 1815849f83cb..79f18e6d9c4f 100644 --- a/drivers/firmware/memmap.c +++ b/drivers/firmware/memmap.c @@ -286,6 +286,10 @@ int __meminit firmware_map_add_hotplug(u64 start, u64 end, const char *type) { struct firmware_map_entry *entry; + entry = firmware_map_find_entry(start, end - 1, type); + if (entry) + return 0; + entry = firmware_map_find_entry_bootmem(start, end - 1, type); if (!entry) { entry = kzalloc(sizeof(struct firmware_map_entry), GFP_ATOMIC); -- cgit v1.2.1 From 8d060bf490930f305c4efc45724e861a268f4d2f Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 6 Aug 2014 16:07:50 -0700 Subject: mm, oom: ensure memoryless node zonelist always includes zones With memoryless node support being worked on, it's possible that for optimizations that a node may not have a non-NULL zonelist. When CONFIG_NUMA is enabled and node 0 is memoryless, this means the zonelist for first_online_node may become NULL. The oom killer requires a zonelist that includes all memory zones for the sysrq trigger and pagefault out of memory handler. Ensure that a non-NULL zonelist is always passed to the oom killer. [akpm@linux-foundation.org: fix non-numa build] Signed-off-by: David Rientjes Cc: "Kirill A. Shutemov" Cc: Johannes Weiner Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/tty/sysrq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index 454b65898e2c..42bad18c66c9 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -355,7 +355,7 @@ static struct sysrq_key_op sysrq_term_op = { static void moom_callback(struct work_struct *ignored) { - out_of_memory(node_zonelist(first_online_node, GFP_KERNEL), GFP_KERNEL, + out_of_memory(node_zonelist(first_memory_node, GFP_KERNEL), GFP_KERNEL, 0, NULL, true); } -- cgit v1.2.1 From cb8f2eec3c5c87e31219c5e58625b8e890004e48 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 6 Aug 2014 16:08:25 -0700 Subject: zram: rename struct `table' to `zram_table_entry' Andrew Morton has recently noted that `struct table' actually represents table entry and, thus, should be renamed. Rename to `zram_table_entry'. Signed-off-by: Sergey Senozhatsky Cc: Minchan Kim Cc: Nitin Gupta Cc: Weijie Yang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zram_drv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 7f21c145e317..8909f86caf0d 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -62,7 +62,7 @@ enum zram_pageflags { /*-- Data structures */ /* Allocated for each disk page */ -struct table { +struct zram_table_entry { unsigned long handle; u16 size; /* object size (excluding header) */ u8 flags; @@ -82,7 +82,7 @@ struct zram_stats { struct zram_meta { rwlock_t tb_lock; /* protect table */ - struct table *table; + struct zram_table_entry *table; struct zs_pool *mem_pool; }; -- cgit v1.2.1 From a830eff749eb2bf906783f6bf74a74dad3de3aea Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 6 Aug 2014 16:08:27 -0700 Subject: zram: remove unused SECTOR_SIZE define Drop SECTOR_SIZE define, because it's not used. Signed-off-by: Sergey Senozhatsky Cc: Minchan Kim Cc: Nitin Gupta Cc: Weijie Yang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zram_drv.h | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 8909f86caf0d..c8161bd8969c 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -43,7 +43,6 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3; /*-- End of configurable params */ #define SECTOR_SHIFT 9 -#define SECTOR_SIZE (1 << SECTOR_SHIFT) #define SECTORS_PER_PAGE_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) #define SECTORS_PER_PAGE (1 << SECTORS_PER_PAGE_SHIFT) #define ZRAM_LOGICAL_BLOCK_SHIFT 12 -- cgit v1.2.1 From 023b409f9dac4cdea3322009f2e592068558690c Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Wed, 6 Aug 2014 16:08:29 -0700 Subject: zram: use size_t instead of u16 Some architectures (eg, hexagon and PowerPC) could use PAGE_SHIFT of 16 or more. In these cases u16 is not sufficiently large to represent a compressed page's size so use size_t. Signed-off-by: Minchan Kim Reported-by: Weijie Yang Acked-by: Sergey Senozhatsky Cc: Jerome Marchand Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zram_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 36e54be402df..40743972eaf7 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -337,7 +337,7 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index) unsigned char *cmem; struct zram_meta *meta = zram->meta; unsigned long handle; - u16 size; + size_t size; read_lock(&meta->tb_lock); handle = meta->table[index].handle; -- cgit v1.2.1 From d2d5e762c8990c4031890e03565983a05febd64a Mon Sep 17 00:00:00 2001 From: Weijie Yang Date: Wed, 6 Aug 2014 16:08:31 -0700 Subject: zram: replace global tb_lock with fine grain lock Currently, we use a rwlock tb_lock to protect concurrent access to the whole zram meta table. However, according to the actual access model, there is only a small chance for upper user to access the same table[index], so the current lock granularity is too big. The idea of optimization is to change the lock granularity from whole meta table to per table entry (table -> table[index]), so that we can protect concurrent access to the same table[index], meanwhile allow the maximum concurrency. With this in mind, several kinds of locks which could be used as a per-entry lock were tested and compared: Test environment: x86-64 Intel Core2 Q8400, system memory 4GB, Ubuntu 12.04, kernel v3.15.0-rc3 as base, zram with 4 max_comp_streams LZO. iozone test: iozone -t 4 -R -r 16K -s 200M -I +Z (1GB zram with ext4 filesystem, take the average of 10 tests, KB/s) Test base CAS spinlock rwlock bit_spinlock ------------------------------------------------------------------- Initial write 1381094 1425435 1422860 1423075 1421521 Rewrite 1529479 1641199 1668762 1672855 1654910 Read 8468009 11324979 11305569 11117273 10997202 Re-read 8467476 11260914 11248059 11145336 10906486 Reverse Read 6821393 8106334 8282174 8279195 8109186 Stride read 7191093 8994306 9153982 8961224 9004434 Random read 7156353 8957932 9167098 8980465 8940476 Mixed workload 4172747 5680814 5927825 5489578 5972253 Random write 1483044 1605588 1594329 1600453 1596010 Pwrite 1276644 1303108 1311612 1314228 1300960 Pread 4324337 4632869 4618386 4457870 4500166 To enhance the possibility of access the same table[index] concurrently, set zram a small disksize(10MB) and let threads run with large loop count. fio test: fio --bs=32k --randrepeat=1 --randseed=100 --refill_buffers --scramble_buffers=1 --direct=1 --loops=3000 --numjobs=4 --filename=/dev/zram0 --name=seq-write --rw=write --stonewall --name=seq-read --rw=read --stonewall --name=seq-readwrite --rw=rw --stonewall --name=rand-readwrite --rw=randrw --stonewall (10MB zram raw block device, take the average of 10 tests, KB/s) Test base CAS spinlock rwlock bit_spinlock ------------------------------------------------------------- seq-write 933789 999357 1003298 995961 1001958 seq-read 5634130 6577930 6380861 6243912 6230006 seq-rw 1405687 1638117 1640256 1633903 1634459 rand-rw 1386119 1614664 1617211 1609267 1612471 All the optimization methods show a higher performance than the base, however, it is hard to say which method is the most appropriate. On the other hand, zram is mostly used on small embedded system, so we don't want to increase any memory footprint. This patch pick the bit_spinlock method, pack object size and page_flag into an unsigned long table.value, so as to not increase any memory overhead on both 32-bit and 64-bit system. On the third hand, even though different kinds of locks have different performances, we can ignore this difference, because: if zram is used as zram swapfile, the swap subsystem can prevent concurrent access to the same swapslot; if zram is used as zram-blk for set up filesystem on it, the upper filesystem and the page cache also prevent concurrent access of the same block mostly. So we can ignore the different performances among locks. Acked-by: Sergey Senozhatsky Reviewed-by: Davidlohr Bueso Signed-off-by: Weijie Yang Signed-off-by: Minchan Kim Cc: Jerome Marchand Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zram_drv.c | 69 ++++++++++++++++++++++++++----------------- drivers/block/zram/zram_drv.h | 24 +++++++++++---- 2 files changed, 60 insertions(+), 33 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 40743972eaf7..dfa4024c448a 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -183,19 +183,32 @@ static ssize_t comp_algorithm_store(struct device *dev, static int zram_test_flag(struct zram_meta *meta, u32 index, enum zram_pageflags flag) { - return meta->table[index].flags & BIT(flag); + return meta->table[index].value & BIT(flag); } static void zram_set_flag(struct zram_meta *meta, u32 index, enum zram_pageflags flag) { - meta->table[index].flags |= BIT(flag); + meta->table[index].value |= BIT(flag); } static void zram_clear_flag(struct zram_meta *meta, u32 index, enum zram_pageflags flag) { - meta->table[index].flags &= ~BIT(flag); + meta->table[index].value &= ~BIT(flag); +} + +static size_t zram_get_obj_size(struct zram_meta *meta, u32 index) +{ + return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1); +} + +static void zram_set_obj_size(struct zram_meta *meta, + u32 index, size_t size) +{ + unsigned long flags = meta->table[index].value >> ZRAM_FLAG_SHIFT; + + meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size; } static inline int is_partial_io(struct bio_vec *bvec) @@ -255,7 +268,6 @@ static struct zram_meta *zram_meta_alloc(u64 disksize) goto free_table; } - rwlock_init(&meta->tb_lock); return meta; free_table: @@ -304,7 +316,12 @@ static void handle_zero_page(struct bio_vec *bvec) flush_dcache_page(page); } -/* NOTE: caller should hold meta->tb_lock with write-side */ + +/* + * To protect concurrent access to the same index entry, + * caller should hold this table index entry's bit_spinlock to + * indicate this index entry is accessing. + */ static void zram_free_page(struct zram *zram, size_t index) { struct zram_meta *meta = zram->meta; @@ -324,11 +341,12 @@ static void zram_free_page(struct zram *zram, size_t index) zs_free(meta->mem_pool, handle); - atomic64_sub(meta->table[index].size, &zram->stats.compr_data_size); + atomic64_sub(zram_get_obj_size(meta, index), + &zram->stats.compr_data_size); atomic64_dec(&zram->stats.pages_stored); meta->table[index].handle = 0; - meta->table[index].size = 0; + zram_set_obj_size(meta, index, 0); } static int zram_decompress_page(struct zram *zram, char *mem, u32 index) @@ -339,12 +357,12 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index) unsigned long handle; size_t size; - read_lock(&meta->tb_lock); + bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); handle = meta->table[index].handle; - size = meta->table[index].size; + size = zram_get_obj_size(meta, index); if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) { - read_unlock(&meta->tb_lock); + bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); clear_page(mem); return 0; } @@ -355,7 +373,7 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index) else ret = zcomp_decompress(zram->comp, cmem, size, mem); zs_unmap_object(meta->mem_pool, handle); - read_unlock(&meta->tb_lock); + bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); /* Should NEVER happen. Return bio error if it does. */ if (unlikely(ret)) { @@ -376,14 +394,14 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, struct zram_meta *meta = zram->meta; page = bvec->bv_page; - read_lock(&meta->tb_lock); + bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); if (unlikely(!meta->table[index].handle) || zram_test_flag(meta, index, ZRAM_ZERO)) { - read_unlock(&meta->tb_lock); + bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); handle_zero_page(bvec); return 0; } - read_unlock(&meta->tb_lock); + bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); if (is_partial_io(bvec)) /* Use a temporary buffer to decompress the page */ @@ -461,10 +479,10 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, if (page_zero_filled(uncmem)) { kunmap_atomic(user_mem); /* Free memory associated with this sector now. */ - write_lock(&zram->meta->tb_lock); + bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); zram_free_page(zram, index); zram_set_flag(meta, index, ZRAM_ZERO); - write_unlock(&zram->meta->tb_lock); + bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); atomic64_inc(&zram->stats.zero_pages); ret = 0; @@ -514,12 +532,12 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, * Free memory associated with this sector * before overwriting unused sectors. */ - write_lock(&zram->meta->tb_lock); + bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); zram_free_page(zram, index); meta->table[index].handle = handle; - meta->table[index].size = clen; - write_unlock(&zram->meta->tb_lock); + zram_set_obj_size(meta, index, clen); + bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); /* Update stats */ atomic64_add(clen, &zram->stats.compr_data_size); @@ -560,6 +578,7 @@ static void zram_bio_discard(struct zram *zram, u32 index, int offset, struct bio *bio) { size_t n = bio->bi_iter.bi_size; + struct zram_meta *meta = zram->meta; /* * zram manages data in physical block size units. Because logical block @@ -580,13 +599,9 @@ static void zram_bio_discard(struct zram *zram, u32 index, } while (n >= PAGE_SIZE) { - /* - * Discard request can be large so the lock hold times could be - * lengthy. So take the lock once per page. - */ - write_lock(&zram->meta->tb_lock); + bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); zram_free_page(zram, index); - write_unlock(&zram->meta->tb_lock); + bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); index++; n -= PAGE_SIZE; } @@ -821,9 +836,9 @@ static void zram_slot_free_notify(struct block_device *bdev, zram = bdev->bd_disk->private_data; meta = zram->meta; - write_lock(&meta->tb_lock); + bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); zram_free_page(zram, index); - write_unlock(&meta->tb_lock); + bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); atomic64_inc(&zram->stats.notify_free); } diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index c8161bd8969c..5b0afde729cd 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -50,10 +50,24 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3; #define ZRAM_SECTOR_PER_LOGICAL_BLOCK \ (1 << (ZRAM_LOGICAL_BLOCK_SHIFT - SECTOR_SHIFT)) -/* Flags for zram pages (table[page_no].flags) */ + +/* + * The lower ZRAM_FLAG_SHIFT bits of table.value is for + * object size (excluding header), the higher bits is for + * zram_pageflags. + * + * zram is mainly used for memory efficiency so we want to keep memory + * footprint small so we can squeeze size and flags into a field. + * The lower ZRAM_FLAG_SHIFT bits is for object size (excluding header), + * the higher bits is for zram_pageflags. + */ +#define ZRAM_FLAG_SHIFT 24 + +/* Flags for zram pages (table[page_no].value) */ enum zram_pageflags { /* Page consists entirely of zeros */ - ZRAM_ZERO, + ZRAM_ZERO = ZRAM_FLAG_SHIFT + 1, + ZRAM_ACCESS, /* page in now accessed */ __NR_ZRAM_PAGEFLAGS, }; @@ -63,9 +77,8 @@ enum zram_pageflags { /* Allocated for each disk page */ struct zram_table_entry { unsigned long handle; - u16 size; /* object size (excluding header) */ - u8 flags; -} __aligned(4); + unsigned long value; +}; struct zram_stats { atomic64_t compr_data_size; /* compressed size of pages stored */ @@ -80,7 +93,6 @@ struct zram_stats { }; struct zram_meta { - rwlock_t tb_lock; /* protect table */ struct zram_table_entry *table; struct zs_pool *mem_pool; }; -- cgit v1.2.1 From 68be302963230fa76600cd598935a830ac95dca2 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 6 Aug 2014 16:08:45 -0700 Subject: fs.h, drivers/hwmon/asus_atk0110.c: fix DEFINE_SIMPLE_ATTRIBUTE semicolon definition and use The DEFINE_SIMPLE_ATTRIBUTE macro should not end in a ; Fix the one use in the kernel tree that did not have a semicolon. Signed-off-by: Joe Perches Acked-by: Guenter Roeck Acked-by: Luca Tettamanti Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/hwmon/asus_atk0110.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/hwmon/asus_atk0110.c b/drivers/hwmon/asus_atk0110.c index ae208f612198..cccef87963e0 100644 --- a/drivers/hwmon/asus_atk0110.c +++ b/drivers/hwmon/asus_atk0110.c @@ -688,7 +688,7 @@ static int atk_debugfs_gitm_get(void *p, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(atk_debugfs_gitm, atk_debugfs_gitm_get, NULL, - "0x%08llx\n") + "0x%08llx\n"); static int atk_acpi_print(char *buf, size_t sz, union acpi_object *obj) { -- cgit v1.2.1 From 1d023284c31a4e40a94d5bbcb7dbb7a35ee0bcbc Mon Sep 17 00:00:00 2001 From: Ken Helias Date: Wed, 6 Aug 2014 16:09:16 -0700 Subject: list: fix order of arguments for hlist_add_after(_rcu) All other add functions for lists have the new item as first argument and the position where it is added as second argument. This was changed for no good reason in this function and makes using it unnecessary confusing. The name was changed to hlist_add_behind() to cause unconverted code to generate a compile error instead of using the wrong parameter order. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Ken Helias Cc: "Paul E. McKenney" Acked-by: Jeff Kirsher [intel driver bits] Cc: Hugh Dickins Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpu/drm/drm_hashtab.c | 2 +- drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 2 +- drivers/staging/lustre/lustre/libcfs/hash.c | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/drm_hashtab.c b/drivers/gpu/drm/drm_hashtab.c index 7e4bae760e27..c3b80fd65d62 100644 --- a/drivers/gpu/drm/drm_hashtab.c +++ b/drivers/gpu/drm/drm_hashtab.c @@ -125,7 +125,7 @@ int drm_ht_insert_item(struct drm_open_hash *ht, struct drm_hash_item *item) parent = &entry->head; } if (parent) { - hlist_add_after_rcu(parent, &item->head); + hlist_add_behind_rcu(&item->head, parent); } else { hlist_add_head_rcu(&item->head, h_list); } diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 681a9e81ff51..e8ba7470700a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -1948,7 +1948,7 @@ static int i40e_update_ethtool_fdir_entry(struct i40e_vsi *vsi, /* add filter to the list */ if (parent) - hlist_add_after(&parent->fdir_node, &input->fdir_node); + hlist_add_behind(&input->fdir_node, &parent->fdir_node); else hlist_add_head(&input->fdir_node, &pf->fdir_filter_list); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 94a1c07efeb0..e4100b5737b6 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -2517,7 +2517,7 @@ static int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter, /* add filter to the list */ if (parent) - hlist_add_after(&parent->fdir_node, &input->fdir_node); + hlist_add_behind(&input->fdir_node, &parent->fdir_node); else hlist_add_head(&input->fdir_node, &adapter->fdir_filter_list); diff --git a/drivers/staging/lustre/lustre/libcfs/hash.c b/drivers/staging/lustre/lustre/libcfs/hash.c index 5dde79418297..8ef1deb59d4a 100644 --- a/drivers/staging/lustre/lustre/libcfs/hash.c +++ b/drivers/staging/lustre/lustre/libcfs/hash.c @@ -351,7 +351,7 @@ cfs_hash_dh_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd, cfs_hash_dhead_t, dh_head); if (dh->dh_tail != NULL) /* not empty */ - hlist_add_after(dh->dh_tail, hnode); + hlist_add_behind(hnode, dh->dh_tail); else /* empty list */ hlist_add_head(hnode, &dh->dh_head); dh->dh_tail = hnode; @@ -406,7 +406,7 @@ cfs_hash_dd_hnode_add(struct cfs_hash *hs, struct cfs_hash_bd *bd, cfs_hash_dhead_dep_t, dd_head); if (dh->dd_tail != NULL) /* not empty */ - hlist_add_after(dh->dd_tail, hnode); + hlist_add_behind(hnode, dh->dd_tail); else /* empty list */ hlist_add_head(hnode, &dh->dd_head); dh->dd_tail = hnode; -- cgit v1.2.1 From 428ac5fc056e06dc0b4ed82d5979add9a8c62b35 Mon Sep 17 00:00:00 2001 From: George Spelvin Date: Wed, 6 Aug 2014 16:09:27 -0700 Subject: libata: Use glob_match from lib/glob.c The function may be useful for other drivers, so export it. (Suggested by Tejun Heo.) Note that I inverted the return value of glob_match; returning true on match seemed to make more sense. Signed-off-by: George Spelvin Cc: Randy Dunlap Cc: Tejun Heo Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/ata/Kconfig | 1 + drivers/ata/libata-core.c | 72 ++--------------------------------------------- 2 files changed, 4 insertions(+), 69 deletions(-) (limited to 'drivers') diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index e65d400efd44..e1b92788c225 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -16,6 +16,7 @@ menuconfig ATA depends on BLOCK depends on !(M32R || M68K || S390) || BROKEN select SCSI + select GLOB ---help--- If you want to use an ATA hard disk, ATA tape drive, ATA CD-ROM or any other ATA device under Linux, say Y and make sure that you know diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 677c0c1b03bd..dbdc5d32343f 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -59,6 +59,7 @@ #include #include #include +#include #include #include #include @@ -4250,73 +4251,6 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { } }; -/** - * glob_match - match a text string against a glob-style pattern - * @text: the string to be examined - * @pattern: the glob-style pattern to be matched against - * - * Either/both of text and pattern can be empty strings. - * - * Match text against a glob-style pattern, with wildcards and simple sets: - * - * ? matches any single character. - * * matches any run of characters. - * [xyz] matches a single character from the set: x, y, or z. - * [a-d] matches a single character from the range: a, b, c, or d. - * [a-d0-9] matches a single character from either range. - * - * The special characters ?, [, -, or *, can be matched using a set, eg. [*] - * Behaviour with malformed patterns is undefined, though generally reasonable. - * - * Sample patterns: "SD1?", "SD1[0-5]", "*R0", "SD*1?[012]*xx" - * - * This function uses one level of recursion per '*' in pattern. - * Since it calls _nothing_ else, and has _no_ explicit local variables, - * this will not cause stack problems for any reasonable use here. - * - * RETURNS: - * 0 on match, 1 otherwise. - */ -static int glob_match (const char *text, const char *pattern) -{ - do { - /* Match single character or a '?' wildcard */ - if (*text == *pattern || *pattern == '?') { - if (!*pattern++) - return 0; /* End of both strings: match */ - } else { - /* Match single char against a '[' bracketed ']' pattern set */ - if (!*text || *pattern != '[') - break; /* Not a pattern set */ - while (*++pattern && *pattern != ']' && *text != *pattern) { - if (*pattern == '-' && *(pattern - 1) != '[') - if (*text > *(pattern - 1) && *text < *(pattern + 1)) { - ++pattern; - break; - } - } - if (!*pattern || *pattern == ']') - return 1; /* No match */ - while (*pattern && *pattern++ != ']'); - } - } while (*++text && *pattern); - - /* Match any run of chars against a '*' wildcard */ - if (*pattern == '*') { - if (!*++pattern) - return 0; /* Match: avoid recursion at end of pattern */ - /* Loop to handle additional pattern chars after the wildcard */ - while (*text) { - if (glob_match(text, pattern) == 0) - return 0; /* Remainder matched */ - ++text; /* Absorb (match) this char and try again */ - } - } - if (!*text && !*pattern) - return 0; /* End of both strings: match */ - return 1; /* No match */ -} - static unsigned long ata_dev_blacklisted(const struct ata_device *dev) { unsigned char model_num[ATA_ID_PROD_LEN + 1]; @@ -4327,10 +4261,10 @@ static unsigned long ata_dev_blacklisted(const struct ata_device *dev) ata_id_c_string(dev->id, model_rev, ATA_ID_FW_REV, sizeof(model_rev)); while (ad->model_num) { - if (!glob_match(model_num, ad->model_num)) { + if (glob_match(model_num, ad->model_num)) { if (ad->model_rev == NULL) return ad->horkage; - if (!glob_match(model_rev, ad->model_rev)) + if (glob_match(model_rev, ad->model_rev)) return ad->horkage; } ad++; -- cgit v1.2.1