summaryrefslogtreecommitdiffstats
path: root/mm/gup.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/gup.c')
-rw-r--r--mm/gup.c101
1 files changed, 72 insertions, 29 deletions
diff --git a/mm/gup.c b/mm/gup.c
index ec4f82704b6f..04aa405350dc 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -10,7 +10,7 @@
#include <linux/swap.h>
#include <linux/swapops.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/rwsem.h>
#include <linux/hugetlb.h>
@@ -226,6 +226,7 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
unsigned int *page_mask)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
spinlock_t *ptl;
@@ -243,8 +244,13 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
pgd = pgd_offset(mm, address);
if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
return no_page_table(vma, flags);
-
- pud = pud_offset(pgd, address);
+ p4d = p4d_offset(pgd, address);
+ if (p4d_none(*p4d))
+ return no_page_table(vma, flags);
+ BUILD_BUG_ON(p4d_huge(*p4d));
+ if (unlikely(p4d_bad(*p4d)))
+ return no_page_table(vma, flags);
+ pud = pud_offset(p4d, address);
if (pud_none(*pud))
return no_page_table(vma, flags);
if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) {
@@ -253,6 +259,13 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
return page;
return no_page_table(vma, flags);
}
+ if (pud_devmap(*pud)) {
+ ptl = pud_lock(mm, pud);
+ page = follow_devmap_pud(vma, address, pud, flags);
+ spin_unlock(ptl);
+ if (page)
+ return page;
+ }
if (unlikely(pud_bad(*pud)))
return no_page_table(vma, flags);
@@ -265,8 +278,6 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
return page;
return no_page_table(vma, flags);
}
- if ((flags & FOLL_NUMA) && pmd_protnone(*pmd))
- return no_page_table(vma, flags);
if (pmd_devmap(*pmd)) {
ptl = pmd_lock(mm, pmd);
page = follow_devmap_pmd(vma, address, pmd, flags);
@@ -277,6 +288,9 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
if (likely(!pmd_trans_huge(*pmd)))
return follow_page_pte(vma, address, pmd, flags);
+ if ((flags & FOLL_NUMA) && pmd_protnone(*pmd))
+ return no_page_table(vma, flags);
+
ptl = pmd_lock(mm, pmd);
if (unlikely(!pmd_trans_huge(*pmd))) {
spin_unlock(ptl);
@@ -317,6 +331,7 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
struct page **page)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
@@ -330,7 +345,9 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address,
else
pgd = pgd_offset_gate(mm, address);
BUG_ON(pgd_none(*pgd));
- pud = pud_offset(pgd, address);
+ p4d = p4d_offset(pgd, address);
+ BUG_ON(p4d_none(*p4d));
+ pud = pud_offset(p4d, address);
BUG_ON(pud_none(*pud));
pmd = pmd_offset(pud, address);
if (pmd_none(*pmd))
@@ -572,7 +589,7 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
if (is_vm_hugetlb_page(vma)) {
i = follow_hugetlb_page(mm, vma, pages, vmas,
&start, &nr_pages, i,
- gup_flags);
+ gup_flags, nonblocking);
continue;
}
}
@@ -632,7 +649,8 @@ next_page:
return i;
}
-bool vma_permits_fault(struct vm_area_struct *vma, unsigned int fault_flags)
+static bool vma_permits_fault(struct vm_area_struct *vma,
+ unsigned int fault_flags)
{
bool write = !!(fault_flags & FAULT_FLAG_WRITE);
bool foreign = !!(fault_flags & FAULT_FLAG_REMOTE);
@@ -857,18 +875,17 @@ long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
EXPORT_SYMBOL(get_user_pages_locked);
/*
- * Same as get_user_pages_unlocked(...., FOLL_TOUCH) but it allows to
- * pass additional gup_flags as last parameter (like FOLL_HWPOISON).
+ * Same as get_user_pages_unlocked(...., FOLL_TOUCH) but it allows for
+ * tsk, mm to be specified.
*
* NOTE: here FOLL_TOUCH is not set implicitly and must be set by the
- * caller if required (just like with __get_user_pages). "FOLL_GET",
- * "FOLL_WRITE" and "FOLL_FORCE" are set implicitly as needed
- * according to the parameters "pages", "write", "force"
- * respectively.
+ * caller if required (just like with __get_user_pages). "FOLL_GET"
+ * is set implicitly if "pages" is non-NULL.
*/
-__always_inline long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
- unsigned long start, unsigned long nr_pages,
- struct page **pages, unsigned int gup_flags)
+static __always_inline long __get_user_pages_unlocked(struct task_struct *tsk,
+ struct mm_struct *mm, unsigned long start,
+ unsigned long nr_pages, struct page **pages,
+ unsigned int gup_flags)
{
long ret;
int locked = 1;
@@ -880,7 +897,6 @@ __always_inline long __get_user_pages_unlocked(struct task_struct *tsk, struct m
up_read(&mm->mmap_sem);
return ret;
}
-EXPORT_SYMBOL(__get_user_pages_unlocked);
/*
* get_user_pages_unlocked() is suitable to replace the form:
@@ -894,10 +910,8 @@ EXPORT_SYMBOL(__get_user_pages_unlocked);
* get_user_pages_unlocked(tsk, mm, ..., pages);
*
* It is functionally equivalent to get_user_pages_fast so
- * get_user_pages_fast should be used instead, if the two parameters
- * "tsk" and "mm" are respectively equal to current and current->mm,
- * or if "force" shall be set to 1 (get_user_pages_fast misses the
- * "force" parameter).
+ * get_user_pages_fast should be used instead if specific gup_flags
+ * (e.g. FOLL_FORCE) are not required.
*/
long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
struct page **pages, unsigned int gup_flags)
@@ -920,6 +934,9 @@ EXPORT_SYMBOL(get_user_pages_unlocked);
* only intends to ensure the pages are faulted in.
* @vmas: array of pointers to vmas corresponding to each page.
* Or NULL if the caller does not require them.
+ * @locked: pointer to lock flag indicating whether lock is held and
+ * subsequently whether VM_FAULT_RETRY functionality can be
+ * utilised. Lock must initially be held.
*
* Returns number of pages pinned. This may be fewer than the number
* requested. If nr_pages is 0 or negative, returns 0. If no pages
@@ -963,10 +980,10 @@ EXPORT_SYMBOL(get_user_pages_unlocked);
long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
- struct vm_area_struct **vmas)
+ struct vm_area_struct **vmas, int *locked)
{
return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas,
- NULL, false,
+ locked, true,
gup_flags | FOLL_TOUCH | FOLL_REMOTE);
}
EXPORT_SYMBOL(get_user_pages_remote);
@@ -974,8 +991,9 @@ EXPORT_SYMBOL(get_user_pages_remote);
/*
* This is the same as get_user_pages_remote(), just with a
* less-flexible calling convention where we assume that the task
- * and mm being operated on are the current task's. We also
- * obviously don't pass FOLL_REMOTE in here.
+ * and mm being operated on are the current task's and don't allow
+ * passing of a locked parameter. We also obviously don't pass
+ * FOLL_REMOTE in here.
*/
long get_user_pages(unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
@@ -1391,13 +1409,13 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
return 1;
}
-static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
+static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end,
int write, struct page **pages, int *nr)
{
unsigned long next;
pud_t *pudp;
- pudp = pud_offset(&pgd, addr);
+ pudp = pud_offset(&p4d, addr);
do {
pud_t pud = READ_ONCE(*pudp);
@@ -1419,6 +1437,31 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
return 1;
}
+static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
+ int write, struct page **pages, int *nr)
+{
+ unsigned long next;
+ p4d_t *p4dp;
+
+ p4dp = p4d_offset(&pgd, addr);
+ do {
+ p4d_t p4d = READ_ONCE(*p4dp);
+
+ next = p4d_addr_end(addr, end);
+ if (p4d_none(p4d))
+ return 0;
+ BUILD_BUG_ON(p4d_huge(p4d));
+ if (unlikely(is_hugepd(__hugepd(p4d_val(p4d))))) {
+ if (!gup_huge_pd(__hugepd(p4d_val(p4d)), addr,
+ P4D_SHIFT, next, write, pages, nr))
+ return 0;
+ } else if (!gup_pud_range(p4d, addr, next, write, pages, nr))
+ return 0;
+ } while (p4dp++, addr = next, addr != end);
+
+ return 1;
+}
+
/*
* Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to
* the regular GUP. It will only return non-negative values.
@@ -1469,7 +1512,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
PGDIR_SHIFT, next, write, pages, &nr))
break;
- } else if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
+ } else if (!gup_p4d_range(pgd, addr, next, write, pages, &nr))
break;
} while (pgdp++, addr = next, addr != end);
local_irq_restore(flags);
OpenPOWER on IntegriCloud