summaryrefslogtreecommitdiffstats
path: root/include/linux/mm.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/mm.h')
-rw-r--r--include/linux/mm.h294
1 files changed, 160 insertions, 134 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0334ca97c584..52269e56c514 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -70,11 +70,6 @@ static inline void totalram_pages_add(long count)
atomic_long_add(count, &_totalram_pages);
}
-static inline void totalram_pages_set(long val)
-{
- atomic_long_set(&_totalram_pages, val);
-}
-
extern void * high_memory;
extern int page_cluster;
@@ -564,21 +559,6 @@ int vma_is_stack_for_current(struct vm_area_struct *vma);
struct mmu_gather;
struct inode;
-#if !defined(CONFIG_ARCH_HAS_PTE_DEVMAP) || !defined(CONFIG_TRANSPARENT_HUGEPAGE)
-static inline int pmd_devmap(pmd_t pmd)
-{
- return 0;
-}
-static inline int pud_devmap(pud_t pud)
-{
- return 0;
-}
-static inline int pgd_devmap(pgd_t pgd)
-{
- return 0;
-}
-#endif
-
/*
* FIXME: take this include out, include page-flags.h in
* files which need it (119 of them)
@@ -640,24 +620,19 @@ unsigned long vmalloc_to_pfn(const void *addr);
* On nommu, vmalloc/vfree wrap through kmalloc/kfree directly, so there
* is no special casing required.
*/
-static inline bool is_vmalloc_addr(const void *x)
-{
-#ifdef CONFIG_MMU
- unsigned long addr = (unsigned long)x;
-
- return addr >= VMALLOC_START && addr < VMALLOC_END;
-#else
- return false;
-#endif
-}
#ifndef is_ioremap_addr
#define is_ioremap_addr(x) is_vmalloc_addr(x)
#endif
#ifdef CONFIG_MMU
+extern bool is_vmalloc_addr(const void *x);
extern int is_vmalloc_or_module_addr(const void *x);
#else
+static inline bool is_vmalloc_addr(const void *x)
+{
+ return false;
+}
static inline int is_vmalloc_or_module_addr(const void *x)
{
return 0;
@@ -695,11 +670,6 @@ static inline void *kvcalloc(size_t n, size_t size, gfp_t flags)
extern void kvfree(const void *addr);
-static inline atomic_t *compound_mapcount_ptr(struct page *page)
-{
- return &page[1].compound_mapcount;
-}
-
static inline int compound_mapcount(struct page *page)
{
VM_BUG_ON_PAGE(!PageCompound(page), page);
@@ -805,6 +775,24 @@ static inline void set_compound_order(struct page *page, unsigned int order)
page[1].compound_order = order;
}
+/* Returns the number of pages in this potentially compound page. */
+static inline unsigned long compound_nr(struct page *page)
+{
+ return 1UL << compound_order(page);
+}
+
+/* Returns the number of bytes in this potentially compound page. */
+static inline unsigned long page_size(struct page *page)
+{
+ return PAGE_SIZE << compound_order(page);
+}
+
+/* Returns the number of bits needed for the number of bytes in a page */
+static inline unsigned int page_shift(struct page *page)
+{
+ return PAGE_SHIFT + compound_order(page);
+}
+
void free_compound_page(struct page *page);
#ifdef CONFIG_MMU
@@ -923,10 +911,6 @@ vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf);
#define ZONEID_PGSHIFT (ZONEID_PGOFF * (ZONEID_SHIFT != 0))
-#if SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > BITS_PER_LONG - NR_PAGEFLAGS
-#error SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > BITS_PER_LONG - NR_PAGEFLAGS
-#endif
-
#define ZONES_MASK ((1UL << ZONES_WIDTH) - 1)
#define NODES_MASK ((1UL << NODES_WIDTH) - 1)
#define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1)
@@ -954,9 +938,10 @@ static inline bool is_zone_device_page(const struct page *page)
#endif
#ifdef CONFIG_DEV_PAGEMAP_OPS
-void __put_devmap_managed_page(struct page *page);
+void free_devmap_managed_page(struct page *page);
DECLARE_STATIC_KEY_FALSE(devmap_managed_key);
-static inline bool put_devmap_managed_page(struct page *page)
+
+static inline bool page_is_devmap_managed(struct page *page)
{
if (!static_branch_unlikely(&devmap_managed_key))
return false;
@@ -965,7 +950,6 @@ static inline bool put_devmap_managed_page(struct page *page)
switch (page->pgmap->type) {
case MEMORY_DEVICE_PRIVATE:
case MEMORY_DEVICE_FS_DAX:
- __put_devmap_managed_page(page);
return true;
default:
break;
@@ -973,11 +957,17 @@ static inline bool put_devmap_managed_page(struct page *page)
return false;
}
+void put_devmap_managed_page(struct page *page);
+
#else /* CONFIG_DEV_PAGEMAP_OPS */
-static inline bool put_devmap_managed_page(struct page *page)
+static inline bool page_is_devmap_managed(struct page *page)
{
return false;
}
+
+static inline void put_devmap_managed_page(struct page *page)
+{
+}
#endif /* CONFIG_DEV_PAGEMAP_OPS */
static inline bool is_device_private_page(const struct page *page)
@@ -1030,36 +1020,37 @@ static inline void put_page(struct page *page)
* need to inform the device driver through callback. See
* include/linux/memremap.h and HMM for details.
*/
- if (put_devmap_managed_page(page))
+ if (page_is_devmap_managed(page)) {
+ put_devmap_managed_page(page);
return;
+ }
if (put_page_testzero(page))
__put_page(page);
}
/**
- * put_user_page() - release a gup-pinned page
+ * unpin_user_page() - release a gup-pinned page
* @page: pointer to page to be released
*
- * Pages that were pinned via get_user_pages*() must be released via
- * either put_user_page(), or one of the put_user_pages*() routines
- * below. This is so that eventually, pages that are pinned via
- * get_user_pages*() can be separately tracked and uniquely handled. In
- * particular, interactions with RDMA and filesystems need special
- * handling.
+ * Pages that were pinned via pin_user_pages*() must be released via either
+ * unpin_user_page(), or one of the unpin_user_pages*() routines. This is so
+ * that eventually such pages can be separately tracked and uniquely handled. In
+ * particular, interactions with RDMA and filesystems need special handling.
*
- * put_user_page() and put_page() are not interchangeable, despite this early
- * implementation that makes them look the same. put_user_page() calls must
- * be perfectly matched up with get_user_page() calls.
+ * unpin_user_page() and put_page() are not interchangeable, despite this early
+ * implementation that makes them look the same. unpin_user_page() calls must
+ * be perfectly matched up with pin*() calls.
*/
-static inline void put_user_page(struct page *page)
+static inline void unpin_user_page(struct page *page)
{
put_page(page);
}
-void put_user_pages_dirty(struct page **pages, unsigned long npages);
-void put_user_pages_dirty_lock(struct page **pages, unsigned long npages);
-void put_user_pages(struct page **pages, unsigned long npages);
+void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages,
+ bool make_dirty);
+
+void unpin_user_pages(struct page **pages, unsigned long npages);
#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
#define SECTION_IN_PAGE_FLAGS
@@ -1405,7 +1396,11 @@ extern void pagefault_out_of_memory(void);
extern void show_free_areas(unsigned int flags, nodemask_t *nodemask);
+#ifdef CONFIG_MMU
extern bool can_do_mlock(void);
+#else
+static inline bool can_do_mlock(void) { return false; }
+#endif
extern int user_shm_lock(size_t, struct user_struct *);
extern void user_shm_unlock(size_t, struct user_struct *);
@@ -1430,54 +1425,8 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long address,
void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
unsigned long start, unsigned long end);
-/**
- * mm_walk - callbacks for walk_page_range
- * @pud_entry: if set, called for each non-empty PUD (2nd-level) entry
- * this handler should only handle pud_trans_huge() puds.
- * the pmd_entry or pte_entry callbacks will be used for
- * regular PUDs.
- * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry
- * this handler is required to be able to handle
- * pmd_trans_huge() pmds. They may simply choose to
- * split_huge_page() instead of handling it explicitly.
- * @pte_entry: if set, called for each non-empty PTE (4th-level) entry
- * @pte_hole: if set, called for each hole at all levels
- * @hugetlb_entry: if set, called for each hugetlb entry
- * @test_walk: caller specific callback function to determine whether
- * we walk over the current vma or not. Returning 0
- * value means "do page table walk over the current vma,"
- * and a negative one means "abort current page table walk
- * right now." 1 means "skip the current vma."
- * @mm: mm_struct representing the target process of page table walk
- * @vma: vma currently walked (NULL if walking outside vmas)
- * @private: private data for callbacks' usage
- *
- * (see the comment on walk_page_range() for more details)
- */
-struct mm_walk {
- int (*pud_entry)(pud_t *pud, unsigned long addr,
- unsigned long next, struct mm_walk *walk);
- int (*pmd_entry)(pmd_t *pmd, unsigned long addr,
- unsigned long next, struct mm_walk *walk);
- int (*pte_entry)(pte_t *pte, unsigned long addr,
- unsigned long next, struct mm_walk *walk);
- int (*pte_hole)(unsigned long addr, unsigned long next,
- struct mm_walk *walk);
- int (*hugetlb_entry)(pte_t *pte, unsigned long hmask,
- unsigned long addr, unsigned long next,
- struct mm_walk *walk);
- int (*test_walk)(unsigned long addr, unsigned long next,
- struct mm_walk *walk);
- struct mm_struct *mm;
- struct vm_area_struct *vma;
- void *private;
-};
-
struct mmu_notifier_range;
-int walk_page_range(unsigned long addr, unsigned long end,
- struct mm_walk *walk);
-int walk_page_vma(struct vm_area_struct *vma, struct mm_walk *walk);
void free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
unsigned long end, unsigned long floor, unsigned long ceiling);
int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
@@ -1549,9 +1498,16 @@ long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas, int *locked);
+long pin_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
+ unsigned long start, unsigned long nr_pages,
+ unsigned int gup_flags, struct page **pages,
+ struct vm_area_struct **vmas, int *locked);
long get_user_pages(unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas);
+long pin_user_pages(unsigned long start, unsigned long nr_pages,
+ unsigned int gup_flags, struct page **pages,
+ struct vm_area_struct **vmas);
long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages, int *locked);
long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
@@ -1559,6 +1515,8 @@ long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
int get_user_pages_fast(unsigned long start, int nr_pages,
unsigned int gup_flags, struct page **pages);
+int pin_user_pages_fast(unsigned long start, int nr_pages,
+ unsigned int gup_flags, struct page **pages);
int account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc);
int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc,
@@ -1671,19 +1629,27 @@ static inline unsigned long get_mm_counter(struct mm_struct *mm, int member)
return (unsigned long)val;
}
+void mm_trace_rss_stat(struct mm_struct *mm, int member, long count);
+
static inline void add_mm_counter(struct mm_struct *mm, int member, long value)
{
- atomic_long_add(value, &mm->rss_stat.count[member]);
+ long count = atomic_long_add_return(value, &mm->rss_stat.count[member]);
+
+ mm_trace_rss_stat(mm, member, count);
}
static inline void inc_mm_counter(struct mm_struct *mm, int member)
{
- atomic_long_inc(&mm->rss_stat.count[member]);
+ long count = atomic_long_inc_return(&mm->rss_stat.count[member]);
+
+ mm_trace_rss_stat(mm, member, count);
}
static inline void dec_mm_counter(struct mm_struct *mm, int member)
{
- atomic_long_dec(&mm->rss_stat.count[member]);
+ long count = atomic_long_dec_return(&mm->rss_stat.count[member]);
+
+ mm_trace_rss_stat(mm, member, count);
}
/* Optimized variant when page is already known not to be PageAnon */
@@ -1873,12 +1839,12 @@ static inline void mm_dec_nr_ptes(struct mm_struct *mm) {}
int __pte_alloc(struct mm_struct *mm, pmd_t *pmd);
int __pte_alloc_kernel(pmd_t *pmd);
+#if defined(CONFIG_MMU)
+
/*
- * The following ifdef needed to get the 4level-fixup.h header to work.
- * Remove it when 4level-fixup.h has been removed.
+ * The following ifdef needed to get the 5level-fixup.h header to work.
+ * Remove it when 5level-fixup.h has been removed.
*/
-#if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK)
-
#ifndef __ARCH_HAS_5LEVEL_HACK
static inline p4d_t *p4d_alloc(struct mm_struct *mm, pgd_t *pgd,
unsigned long address)
@@ -1900,7 +1866,7 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a
return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?
NULL: pmd_offset(pud, address);
}
-#endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */
+#endif /* CONFIG_MMU */
#if USE_SPLIT_PTE_PTLOCKS
#if ALLOC_SPLIT_PTLOCKS
@@ -1972,7 +1938,7 @@ static inline void pgtable_init(void)
pgtable_cache_init();
}
-static inline bool pgtable_page_ctor(struct page *page)
+static inline bool pgtable_pte_page_ctor(struct page *page)
{
if (!ptlock_init(page))
return false;
@@ -1981,7 +1947,7 @@ static inline bool pgtable_page_ctor(struct page *page)
return true;
}
-static inline void pgtable_page_dtor(struct page *page)
+static inline void pgtable_pte_page_dtor(struct page *page)
{
ptlock_free(page);
__ClearPageTable(page);
@@ -2216,12 +2182,6 @@ extern int __meminit __early_pfn_to_nid(unsigned long pfn,
struct mminit_pfnnid_cache *state);
#endif
-#if !defined(CONFIG_FLAT_NODE_MEM_MAP)
-void zero_resv_unavail(void);
-#else
-static inline void zero_resv_unavail(void) {}
-#endif
-
extern void set_dma_reserve(unsigned long new_dma_reserve);
extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long,
enum memmap_context, struct vmem_altmap *);
@@ -2242,9 +2202,6 @@ void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...);
extern void setup_per_cpu_pageset(void);
-extern void zone_pcp_update(struct zone *zone);
-extern void zone_pcp_reset(struct zone *zone);
-
/* page_alloc.c */
extern int min_free_kbytes;
extern int watermark_boost_factor;
@@ -2351,6 +2308,8 @@ extern int install_special_mapping(struct mm_struct *mm,
unsigned long addr, unsigned long len,
unsigned long flags, struct page **pages);
+unsigned long randomize_stack_top(unsigned long stack_top);
+
extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
extern unsigned long mmap_region(struct file *file, unsigned long addr,
@@ -2364,6 +2323,7 @@ extern int __do_munmap(struct mm_struct *, unsigned long, size_t,
struct list_head *uf, bool downgrade);
extern int do_munmap(struct mm_struct *, unsigned long, size_t,
struct list_head *uf);
+extern int do_madvise(unsigned long start, size_t len_in, int behavior);
static inline unsigned long
do_mmap_pgoff(struct file *file, unsigned long addr,
@@ -2569,6 +2529,8 @@ vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
unsigned long pfn, pgprot_t pgprot);
vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
pfn_t pfn);
+vm_fault_t vmf_insert_mixed_prot(struct vm_area_struct *vma, unsigned long addr,
+ pfn_t pfn, pgprot_t pgprot);
vm_fault_t vmf_insert_mixed_mkwrite(struct vm_area_struct *vma,
unsigned long addr, pfn_t pfn);
int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len);
@@ -2614,13 +2576,16 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
#define FOLL_COW 0x4000 /* internal GUP flag */
#define FOLL_ANON 0x8000 /* don't do file mappings */
#define FOLL_LONGTERM 0x10000 /* mapping lifetime is indefinite: see below */
+#define FOLL_SPLIT_PMD 0x20000 /* split huge pmd before returning */
+#define FOLL_PIN 0x40000 /* pages must be released via unpin_user_page */
/*
- * NOTE on FOLL_LONGTERM:
+ * FOLL_PIN and FOLL_LONGTERM may be used in various combinations with each
+ * other. Here is what they mean, and how to use them:
*
* FOLL_LONGTERM indicates that the page will be held for an indefinite time
- * period _often_ under userspace control. This is contrasted with
- * iov_iter_get_pages() where usages which are transient.
+ * period _often_ under userspace control. This is in contrast to
+ * iov_iter_get_pages(), whose usages are transient.
*
* FIXME: For pages which are part of a filesystem, mappings are subject to the
* lifetime enforced by the filesystem and we need guarantees that longterm
@@ -2635,11 +2600,39 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
* Currently only get_user_pages() and get_user_pages_fast() support this flag
* and calls to get_user_pages_[un]locked are specifically not allowed. This
* is due to an incompatibility with the FS DAX check and
- * FAULT_FLAG_ALLOW_RETRY
+ * FAULT_FLAG_ALLOW_RETRY.
*
- * In the CMA case: longterm pins in a CMA region would unnecessarily fragment
- * that region. And so CMA attempts to migrate the page before pinning when
+ * In the CMA case: long term pins in a CMA region would unnecessarily fragment
+ * that region. And so, CMA attempts to migrate the page before pinning, when
* FOLL_LONGTERM is specified.
+ *
+ * FOLL_PIN indicates that a special kind of tracking (not just page->_refcount,
+ * but an additional pin counting system) will be invoked. This is intended for
+ * anything that gets a page reference and then touches page data (for example,
+ * Direct IO). This lets the filesystem know that some non-file-system entity is
+ * potentially changing the pages' data. In contrast to FOLL_GET (whose pages
+ * are released via put_page()), FOLL_PIN pages must be released, ultimately, by
+ * a call to unpin_user_page().
+ *
+ * FOLL_PIN is similar to FOLL_GET: both of these pin pages. They use different
+ * and separate refcounting mechanisms, however, and that means that each has
+ * its own acquire and release mechanisms:
+ *
+ * FOLL_GET: get_user_pages*() to acquire, and put_page() to release.
+ *
+ * FOLL_PIN: pin_user_pages*() to acquire, and unpin_user_pages to release.
+ *
+ * FOLL_PIN and FOLL_GET are mutually exclusive for a given function call.
+ * (The underlying pages may experience both FOLL_GET-based and FOLL_PIN-based
+ * calls applied to them, and that's perfectly OK. This is a constraint on the
+ * callers, not on the pages.)
+ *
+ * FOLL_PIN should be set internally by the pin_user_pages*() APIs, never
+ * directly by the caller. That's in order to help avoid mismatches when
+ * releasing pages: get_user_pages*() pages must be released via put_page(),
+ * while pin_user_pages*() pages must be released via unpin_user_page().
+ *
+ * Please see Documentation/vm/pin_user_pages.rst for more information.
*/
static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags)
@@ -2656,7 +2649,9 @@ static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags)
typedef int (*pte_fn_t)(pte_t *pte, unsigned long addr, void *data);
extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
unsigned long size, pte_fn_t fn, void *data);
-
+extern int apply_to_existing_page_range(struct mm_struct *mm,
+ unsigned long address, unsigned long size,
+ pte_fn_t fn, void *data);
#ifdef CONFIG_PAGE_POISONING
extern bool page_poisoning_enabled(void);
@@ -2691,14 +2686,26 @@ static inline bool want_init_on_free(void)
!page_poisoning_enabled();
}
-#ifdef CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT
-DECLARE_STATIC_KEY_TRUE(_debug_pagealloc_enabled);
+#ifdef CONFIG_DEBUG_PAGEALLOC
+extern void init_debug_pagealloc(void);
#else
-DECLARE_STATIC_KEY_FALSE(_debug_pagealloc_enabled);
+static inline void init_debug_pagealloc(void) {}
#endif
+extern bool _debug_pagealloc_enabled_early;
+DECLARE_STATIC_KEY_FALSE(_debug_pagealloc_enabled);
static inline bool debug_pagealloc_enabled(void)
{
+ return IS_ENABLED(CONFIG_DEBUG_PAGEALLOC) &&
+ _debug_pagealloc_enabled_early;
+}
+
+/*
+ * For use in fast paths after init_debug_pagealloc() has run, or when a
+ * false negative result is not harmful when called too early.
+ */
+static inline bool debug_pagealloc_enabled_static(void)
+{
if (!IS_ENABLED(CONFIG_DEBUG_PAGEALLOC))
return false;
@@ -2806,7 +2813,7 @@ extern int sysctl_memory_failure_early_kill;
extern int sysctl_memory_failure_recovery;
extern void shake_page(struct page *p, int access);
extern atomic_long_t num_poisoned_pages __read_mostly;
-extern int soft_offline_page(struct page *page, int flags);
+extern int soft_offline_page(unsigned long pfn, int flags);
/*
@@ -2891,5 +2898,24 @@ void __init setup_nr_node_ids(void);
static inline void setup_nr_node_ids(void) {}
#endif
+extern int memcmp_pages(struct page *page1, struct page *page2);
+
+static inline int pages_identical(struct page *page1, struct page *page2)
+{
+ return !memcmp_pages(page1, page2);
+}
+
+#ifdef CONFIG_MAPPING_DIRTY_HELPERS
+unsigned long clean_record_shared_mapping_range(struct address_space *mapping,
+ pgoff_t first_index, pgoff_t nr,
+ pgoff_t bitmap_pgoff,
+ unsigned long *bitmap,
+ pgoff_t *start,
+ pgoff_t *end);
+
+unsigned long wp_shared_mapping_range(struct address_space *mapping,
+ pgoff_t first_index, pgoff_t nr);
+#endif
+
#endif /* __KERNEL__ */
#endif /* _LINUX_MM_H */
OpenPOWER on IntegriCloud