diff options
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r-- | mm/hugetlb.c | 119 |
1 files changed, 104 insertions, 15 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 87e11d8ad536..418bf01a50ed 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -567,13 +567,13 @@ retry: * appear as a "reserved" entry instead of simply dangling with incorrect * counts. */ -void hugetlb_fix_reserve_counts(struct inode *inode, bool restore_reserve) +void hugetlb_fix_reserve_counts(struct inode *inode) { struct hugepage_subpool *spool = subpool_inode(inode); long rsv_adjust; rsv_adjust = hugepage_subpool_get_pages(spool, 1); - if (restore_reserve && rsv_adjust) { + if (rsv_adjust) { struct hstate *h = hstate_inode(inode); hugetlb_acct_memory(h, 1); @@ -1022,7 +1022,7 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed) ((node = hstate_next_node_to_free(hs, mask)) || 1); \ nr_nodes--) -#if (defined(CONFIG_X86_64) || defined(CONFIG_S390)) && \ +#if defined(CONFIG_ARCH_HAS_GIGANTIC_PAGE) && \ ((defined(CONFIG_MEMORY_ISOLATION) && defined(CONFIG_COMPACTION)) || \ defined(CONFIG_CMA)) static void destroy_compound_gigantic_page(struct page *page, @@ -1437,38 +1437,61 @@ static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed, /* * Dissolve a given free hugepage into free buddy pages. This function does - * nothing for in-use (including surplus) hugepages. + * nothing for in-use (including surplus) hugepages. Returns -EBUSY if the + * number of free hugepages would be reduced below the number of reserved + * hugepages. */ -static void dissolve_free_huge_page(struct page *page) +static int dissolve_free_huge_page(struct page *page) { + int rc = 0; + spin_lock(&hugetlb_lock); if (PageHuge(page) && !page_count(page)) { - struct hstate *h = page_hstate(page); - int nid = page_to_nid(page); - list_del(&page->lru); + struct page *head = compound_head(page); + struct hstate *h = page_hstate(head); + int nid = page_to_nid(head); + if (h->free_huge_pages - h->resv_huge_pages == 0) { + rc = -EBUSY; + goto out; + } + list_del(&head->lru); h->free_huge_pages--; h->free_huge_pages_node[nid]--; h->max_huge_pages--; - update_and_free_page(h, page); + update_and_free_page(h, head); } +out: spin_unlock(&hugetlb_lock); + return rc; } /* * Dissolve free hugepages in a given pfn range. Used by memory hotplug to * make specified memory blocks removable from the system. - * Note that start_pfn should aligned with (minimum) hugepage size. + * Note that this will dissolve a free gigantic hugepage completely, if any + * part of it lies within the given range. + * Also note that if dissolve_free_huge_page() returns with an error, all + * free hugepages that were dissolved before that error are lost. */ -void dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn) +int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn) { unsigned long pfn; + struct page *page; + int rc = 0; if (!hugepages_supported()) - return; + return rc; - VM_BUG_ON(!IS_ALIGNED(start_pfn, 1 << minimum_order)); - for (pfn = start_pfn; pfn < end_pfn; pfn += 1 << minimum_order) - dissolve_free_huge_page(pfn_to_page(pfn)); + for (pfn = start_pfn; pfn < end_pfn; pfn += 1 << minimum_order) { + page = pfn_to_page(pfn); + if (PageHuge(page) && !page_count(page)) { + rc = dissolve_free_huge_page(page); + if (rc) + break; + } + } + + return rc; } /* @@ -1803,11 +1826,17 @@ static void return_unused_surplus_pages(struct hstate *h, * is not the case is if a reserve map was changed between calls. It * is the responsibility of the caller to notice the difference and * take appropriate action. + * + * vma_add_reservation is used in error paths where a reservation must + * be restored when a newly allocated huge page must be freed. It is + * to be called after calling vma_needs_reservation to determine if a + * reservation exists. */ enum vma_resv_mode { VMA_NEEDS_RESV, VMA_COMMIT_RESV, VMA_END_RESV, + VMA_ADD_RESV, }; static long __vma_reservation_common(struct hstate *h, struct vm_area_struct *vma, unsigned long addr, @@ -1833,6 +1862,14 @@ static long __vma_reservation_common(struct hstate *h, region_abort(resv, idx, idx + 1); ret = 0; break; + case VMA_ADD_RESV: + if (vma->vm_flags & VM_MAYSHARE) + ret = region_add(resv, idx, idx + 1); + else { + region_abort(resv, idx, idx + 1); + ret = region_del(resv, idx, idx + 1); + } + break; default: BUG(); } @@ -1880,6 +1917,56 @@ static void vma_end_reservation(struct hstate *h, (void)__vma_reservation_common(h, vma, addr, VMA_END_RESV); } +static long vma_add_reservation(struct hstate *h, + struct vm_area_struct *vma, unsigned long addr) +{ + return __vma_reservation_common(h, vma, addr, VMA_ADD_RESV); +} + +/* + * This routine is called to restore a reservation on error paths. In the + * specific error paths, a huge page was allocated (via alloc_huge_page) + * and is about to be freed. If a reservation for the page existed, + * alloc_huge_page would have consumed the reservation and set PagePrivate + * in the newly allocated page. When the page is freed via free_huge_page, + * the global reservation count will be incremented if PagePrivate is set. + * However, free_huge_page can not adjust the reserve map. Adjust the + * reserve map here to be consistent with global reserve count adjustments + * to be made by free_huge_page. + */ +static void restore_reserve_on_error(struct hstate *h, + struct vm_area_struct *vma, unsigned long address, + struct page *page) +{ + if (unlikely(PagePrivate(page))) { + long rc = vma_needs_reservation(h, vma, address); + + if (unlikely(rc < 0)) { + /* + * Rare out of memory condition in reserve map + * manipulation. Clear PagePrivate so that + * global reserve count will not be incremented + * by free_huge_page. This will make it appear + * as though the reservation for this page was + * consumed. This may prevent the task from + * faulting in the page at a later time. This + * is better than inconsistent global huge page + * accounting of reserve counts. + */ + ClearPagePrivate(page); + } else if (rc) { + rc = vma_add_reservation(h, vma, address); + if (unlikely(rc < 0)) + /* + * See above comment about rare out of + * memory condition. + */ + ClearPagePrivate(page); + } else + vma_end_reservation(h, vma, address); + } +} + struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve) { @@ -3475,6 +3562,7 @@ retry_avoidcopy: spin_unlock(ptl); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); out_release_all: + restore_reserve_on_error(h, vma, address, new_page); put_page(new_page); out_release_old: put_page(old_page); @@ -3657,6 +3745,7 @@ backout: spin_unlock(ptl); backout_unlocked: unlock_page(page); + restore_reserve_on_error(h, vma, address, page); put_page(page); goto out; } |