summaryrefslogtreecommitdiffstats
path: root/arch/x86/mm/fault.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm/fault.c')
-rw-r--r--arch/x86/mm/fault.c125
1 files changed, 53 insertions, 72 deletions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 2a1fa10c6a98..e2baeaa053a5 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -192,8 +192,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
* 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really
* faulted on a pte with its pkey=4.
*/
-static void fill_sig_info_pkey(int si_code, siginfo_t *info,
- struct vm_area_struct *vma)
+static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey)
{
/* This is effectively an #ifdef */
if (!boot_cpu_has(X86_FEATURE_OSPKE))
@@ -209,7 +208,7 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info,
* valid VMA, so we should never reach this without a
* valid VMA.
*/
- if (!vma) {
+ if (!pkey) {
WARN_ONCE(1, "PKU fault with no VMA passed in");
info->si_pkey = 0;
return;
@@ -219,13 +218,12 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info,
* absolutely guranteed to be 100% accurate because of
* the race explained above.
*/
- info->si_pkey = vma_pkey(vma);
+ info->si_pkey = *pkey;
}
static void
force_sig_info_fault(int si_signo, int si_code, unsigned long address,
- struct task_struct *tsk, struct vm_area_struct *vma,
- int fault)
+ struct task_struct *tsk, u32 *pkey, int fault)
{
unsigned lsb = 0;
siginfo_t info;
@@ -240,7 +238,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address,
lsb = PAGE_SHIFT;
info.si_addr_lsb = lsb;
- fill_sig_info_pkey(si_code, &info, vma);
+ fill_sig_info_pkey(si_code, &info, pkey);
force_sig_info(si_signo, &info, tsk);
}
@@ -396,14 +394,18 @@ static void dump_pagetable(unsigned long address)
pte_t *pte;
#ifdef CONFIG_X86_PAE
- printk("*pdpt = %016Lx ", pgd_val(*pgd));
+ pr_info("*pdpt = %016Lx ", pgd_val(*pgd));
if (!low_pfn(pgd_val(*pgd) >> PAGE_SHIFT) || !pgd_present(*pgd))
goto out;
+#define pr_pde pr_cont
+#else
+#define pr_pde pr_info
#endif
p4d = p4d_offset(pgd, address);
pud = pud_offset(p4d, address);
pmd = pmd_offset(pud, address);
- printk(KERN_CONT "*pde = %0*Lx ", sizeof(*pmd) * 2, (u64)pmd_val(*pmd));
+ pr_pde("*pde = %0*Lx ", sizeof(*pmd) * 2, (u64)pmd_val(*pmd));
+#undef pr_pde
/*
* We must not directly access the pte in the highpte
@@ -415,9 +417,9 @@ static void dump_pagetable(unsigned long address)
goto out;
pte = pte_offset_kernel(pmd, address);
- printk("*pte = %0*Lx ", sizeof(*pte) * 2, (u64)pte_val(*pte));
+ pr_cont("*pte = %0*Lx ", sizeof(*pte) * 2, (u64)pte_val(*pte));
out:
- printk("\n");
+ pr_cont("\n");
}
#else /* CONFIG_X86_64: */
@@ -565,7 +567,7 @@ static void dump_pagetable(unsigned long address)
if (bad_address(pgd))
goto bad;
- printk("PGD %lx ", pgd_val(*pgd));
+ pr_info("PGD %lx ", pgd_val(*pgd));
if (!pgd_present(*pgd))
goto out;
@@ -574,7 +576,7 @@ static void dump_pagetable(unsigned long address)
if (bad_address(p4d))
goto bad;
- printk("P4D %lx ", p4d_val(*p4d));
+ pr_cont("P4D %lx ", p4d_val(*p4d));
if (!p4d_present(*p4d) || p4d_large(*p4d))
goto out;
@@ -582,7 +584,7 @@ static void dump_pagetable(unsigned long address)
if (bad_address(pud))
goto bad;
- printk("PUD %lx ", pud_val(*pud));
+ pr_cont("PUD %lx ", pud_val(*pud));
if (!pud_present(*pud) || pud_large(*pud))
goto out;
@@ -590,7 +592,7 @@ static void dump_pagetable(unsigned long address)
if (bad_address(pmd))
goto bad;
- printk("PMD %lx ", pmd_val(*pmd));
+ pr_cont("PMD %lx ", pmd_val(*pmd));
if (!pmd_present(*pmd) || pmd_large(*pmd))
goto out;
@@ -598,12 +600,12 @@ static void dump_pagetable(unsigned long address)
if (bad_address(pte))
goto bad;
- printk("PTE %lx", pte_val(*pte));
+ pr_cont("PTE %lx", pte_val(*pte));
out:
- printk("\n");
+ pr_cont("\n");
return;
bad:
- printk("BAD\n");
+ pr_info("BAD\n");
}
#endif /* CONFIG_X86_64 */
@@ -758,8 +760,6 @@ no_context(struct pt_regs *regs, unsigned long error_code,
struct task_struct *tsk = current;
unsigned long flags;
int sig;
- /* No context means no VMA to pass down */
- struct vm_area_struct *vma = NULL;
/* Are we prepared to handle this kernel fault? */
if (fixup_exception(regs, X86_TRAP_PF)) {
@@ -784,7 +784,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
/* XXX: hwpoison faults will set the wrong code. */
force_sig_info_fault(signal, si_code, address,
- tsk, vma, 0);
+ tsk, NULL, 0);
}
/*
@@ -802,7 +802,6 @@ no_context(struct pt_regs *regs, unsigned long error_code,
if (is_vmalloc_addr((void *)address) &&
(((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) ||
address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) {
- register void *__sp asm("rsp");
unsigned long stack = this_cpu_read(orig_ist.ist[DOUBLEFAULT_STACK]) - sizeof(void *);
/*
* We're likely to be running with very little stack space
@@ -817,7 +816,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
asm volatile ("movq %[stack], %%rsp\n\t"
"call handle_stack_overflow\n\t"
"1: jmp 1b"
- : "+r" (__sp)
+ : ASM_CALL_CONSTRAINT
: "D" ("kernel stack overflow (page fault)"),
"S" (regs), "d" (address),
[stack] "rm" (stack));
@@ -893,8 +892,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
static void
__bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
- unsigned long address, struct vm_area_struct *vma,
- int si_code)
+ unsigned long address, u32 *pkey, int si_code)
{
struct task_struct *tsk = current;
@@ -942,7 +940,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
tsk->thread.error_code = error_code;
tsk->thread.trap_nr = X86_TRAP_PF;
- force_sig_info_fault(SIGSEGV, si_code, address, tsk, vma, 0);
+ force_sig_info_fault(SIGSEGV, si_code, address, tsk, pkey, 0);
return;
}
@@ -955,9 +953,9 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
static noinline void
bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
- unsigned long address, struct vm_area_struct *vma)
+ unsigned long address, u32 *pkey)
{
- __bad_area_nosemaphore(regs, error_code, address, vma, SEGV_MAPERR);
+ __bad_area_nosemaphore(regs, error_code, address, pkey, SEGV_MAPERR);
}
static void
@@ -965,6 +963,10 @@ __bad_area(struct pt_regs *regs, unsigned long error_code,
unsigned long address, struct vm_area_struct *vma, int si_code)
{
struct mm_struct *mm = current->mm;
+ u32 pkey;
+
+ if (vma)
+ pkey = vma_pkey(vma);
/*
* Something tried to access memory that isn't in our memory map..
@@ -972,7 +974,8 @@ __bad_area(struct pt_regs *regs, unsigned long error_code,
*/
up_read(&mm->mmap_sem);
- __bad_area_nosemaphore(regs, error_code, address, vma, si_code);
+ __bad_area_nosemaphore(regs, error_code, address,
+ (vma) ? &pkey : NULL, si_code);
}
static noinline void
@@ -1015,7 +1018,7 @@ bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
static void
do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
- struct vm_area_struct *vma, unsigned int fault)
+ u32 *pkey, unsigned int fault)
{
struct task_struct *tsk = current;
int code = BUS_ADRERR;
@@ -1042,13 +1045,12 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
code = BUS_MCEERR_AR;
}
#endif
- force_sig_info_fault(SIGBUS, code, address, tsk, vma, fault);
+ force_sig_info_fault(SIGBUS, code, address, tsk, pkey, fault);
}
static noinline void
mm_fault_error(struct pt_regs *regs, unsigned long error_code,
- unsigned long address, struct vm_area_struct *vma,
- unsigned int fault)
+ unsigned long address, u32 *pkey, unsigned int fault)
{
if (fatal_signal_pending(current) && !(error_code & PF_USER)) {
no_context(regs, error_code, address, 0, 0);
@@ -1072,9 +1074,9 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
} else {
if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
VM_FAULT_HWPOISON_LARGE))
- do_sigbus(regs, error_code, address, vma, fault);
+ do_sigbus(regs, error_code, address, pkey, fault);
else if (fault & VM_FAULT_SIGSEGV)
- bad_area_nosemaphore(regs, error_code, address, vma);
+ bad_area_nosemaphore(regs, error_code, address, pkey);
else
BUG();
}
@@ -1254,10 +1256,6 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs)
* This routine handles page faults. It determines the address,
* and the problem, and then passes it off to one of the appropriate
* routines.
- *
- * This function must have noinline because both callers
- * {,trace_}do_page_fault() have notrace on. Having this an actual function
- * guarantees there's a function trace entry.
*/
static noinline void
__do_page_fault(struct pt_regs *regs, unsigned long error_code,
@@ -1268,6 +1266,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
struct mm_struct *mm;
int fault, major = 0;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+ u32 pkey;
tsk = current;
mm = tsk->mm;
@@ -1468,9 +1467,10 @@ good_area:
return;
}
+ pkey = vma_pkey(vma);
up_read(&mm->mmap_sem);
if (unlikely(fault & VM_FAULT_ERROR)) {
- mm_fault_error(regs, error_code, address, vma, fault);
+ mm_fault_error(regs, error_code, address, &pkey, fault);
return;
}
@@ -1490,27 +1490,6 @@ good_area:
}
NOKPROBE_SYMBOL(__do_page_fault);
-dotraplinkage void notrace
-do_page_fault(struct pt_regs *regs, unsigned long error_code)
-{
- unsigned long address = read_cr2(); /* Get the faulting address */
- enum ctx_state prev_state;
-
- /*
- * We must have this function tagged with __kprobes, notrace and call
- * read_cr2() before calling anything else. To avoid calling any kind
- * of tracing machinery before we've observed the CR2 value.
- *
- * exception_{enter,exit}() contain all sorts of tracepoints.
- */
-
- prev_state = exception_enter();
- __do_page_fault(regs, error_code, address);
- exception_exit(prev_state);
-}
-NOKPROBE_SYMBOL(do_page_fault);
-
-#ifdef CONFIG_TRACING
static nokprobe_inline void
trace_page_fault_entries(unsigned long address, struct pt_regs *regs,
unsigned long error_code)
@@ -1521,22 +1500,24 @@ trace_page_fault_entries(unsigned long address, struct pt_regs *regs,
trace_page_fault_kernel(address, regs, error_code);
}
+/*
+ * We must have this function blacklisted from kprobes, tagged with notrace
+ * and call read_cr2() before calling anything else. To avoid calling any
+ * kind of tracing machinery before we've observed the CR2 value.
+ *
+ * exception_{enter,exit}() contains all sorts of tracepoints.
+ */
dotraplinkage void notrace
-trace_do_page_fault(struct pt_regs *regs, unsigned long error_code)
+do_page_fault(struct pt_regs *regs, unsigned long error_code)
{
- /*
- * The exception_enter and tracepoint processing could
- * trigger another page faults (user space callchain
- * reading) and destroy the original cr2 value, so read
- * the faulting address now.
- */
- unsigned long address = read_cr2();
+ unsigned long address = read_cr2(); /* Get the faulting address */
enum ctx_state prev_state;
prev_state = exception_enter();
- trace_page_fault_entries(address, regs, error_code);
+ if (trace_pagefault_enabled())
+ trace_page_fault_entries(address, regs, error_code);
+
__do_page_fault(regs, error_code, address);
exception_exit(prev_state);
}
-NOKPROBE_SYMBOL(trace_do_page_fault);
-#endif /* CONFIG_TRACING */
+NOKPROBE_SYMBOL(do_page_fault);
OpenPOWER on IntegriCloud