summaryrefslogtreecommitdiffstats
path: root/arch/s390/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/mm')
-rw-r--r--arch/s390/mm/Makefile6
-rw-r--r--arch/s390/mm/cmm.c74
-rw-r--r--arch/s390/mm/dump_pagetables.c58
-rw-r--r--arch/s390/mm/extmem.c8
-rw-r--r--arch/s390/mm/fault.c74
-rw-r--r--arch/s390/mm/gmap.c468
-rw-r--r--arch/s390/mm/gup.c2
-rw-r--r--arch/s390/mm/hugetlbpage.c24
-rw-r--r--arch/s390/mm/init.c10
-rw-r--r--arch/s390/mm/kasan_init.c387
-rw-r--r--arch/s390/mm/maccess.c25
-rw-r--r--arch/s390/mm/mem_detect.c62
-rw-r--r--arch/s390/mm/mmap.c15
-rw-r--r--arch/s390/mm/page-states.c2
-rw-r--r--arch/s390/mm/pageattr.c6
-rw-r--r--arch/s390/mm/pgalloc.c321
-rw-r--r--arch/s390/mm/pgtable.c163
-rw-r--r--arch/s390/mm/vmem.c7
18 files changed, 1450 insertions, 262 deletions
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index 33fe418506bc..f5880bfd1b0c 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -4,10 +4,12 @@
#
obj-y := init.o fault.o extmem.o mmap.o vmem.o maccess.o
-obj-y += page-states.o gup.o pageattr.o mem_detect.o
-obj-y += pgtable.o pgalloc.o
+obj-y += page-states.o gup.o pageattr.o pgtable.o pgalloc.o
obj-$(CONFIG_CMM) += cmm.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_S390_PTDUMP) += dump_pagetables.o
obj-$(CONFIG_PGSTE) += gmap.o
+
+KASAN_SANITIZE_kasan_init.o := n
+obj-$(CONFIG_KASAN) += kasan_init.o
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index 6cf024eb2085..510a18299196 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -191,12 +191,7 @@ static void cmm_set_timer(void)
del_timer(&cmm_timer);
return;
}
- if (timer_pending(&cmm_timer)) {
- if (mod_timer(&cmm_timer, jiffies + cmm_timeout_seconds*HZ))
- return;
- }
- cmm_timer.expires = jiffies + cmm_timeout_seconds*HZ;
- add_timer(&cmm_timer);
+ mod_timer(&cmm_timer, jiffies + cmm_timeout_seconds * HZ);
}
static void cmm_timer_fn(struct timer_list *unused)
@@ -251,45 +246,42 @@ static int cmm_skip_blanks(char *cp, char **endp)
return str != cp;
}
-static struct ctl_table cmm_table[];
-
static int cmm_pages_handler(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- char buf[16], *p;
- unsigned int len;
- long nr;
+ long nr = cmm_get_pages();
+ struct ctl_table ctl_entry = {
+ .procname = ctl->procname,
+ .data = &nr,
+ .maxlen = sizeof(long),
+ };
+ int rc;
- if (!*lenp || (*ppos && !write)) {
- *lenp = 0;
- return 0;
- }
+ rc = proc_doulongvec_minmax(&ctl_entry, write, buffer, lenp, ppos);
+ if (rc < 0 || !write)
+ return rc;
- if (write) {
- len = *lenp;
- if (copy_from_user(buf, buffer,
- len > sizeof(buf) ? sizeof(buf) : len))
- return -EFAULT;
- buf[sizeof(buf) - 1] = '\0';
- cmm_skip_blanks(buf, &p);
- nr = simple_strtoul(p, &p, 0);
- if (ctl == &cmm_table[0])
- cmm_set_pages(nr);
- else
- cmm_add_timed_pages(nr);
- } else {
- if (ctl == &cmm_table[0])
- nr = cmm_get_pages();
- else
- nr = cmm_get_timed_pages();
- len = sprintf(buf, "%ld\n", nr);
- if (len > *lenp)
- len = *lenp;
- if (copy_to_user(buffer, buf, len))
- return -EFAULT;
- }
- *lenp = len;
- *ppos += len;
+ cmm_set_pages(nr);
+ return 0;
+}
+
+static int cmm_timed_pages_handler(struct ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ long nr = cmm_get_timed_pages();
+ struct ctl_table ctl_entry = {
+ .procname = ctl->procname,
+ .data = &nr,
+ .maxlen = sizeof(long),
+ };
+ int rc;
+
+ rc = proc_doulongvec_minmax(&ctl_entry, write, buffer, lenp, ppos);
+ if (rc < 0 || !write)
+ return rc;
+
+ cmm_add_timed_pages(nr);
return 0;
}
@@ -338,7 +330,7 @@ static struct ctl_table cmm_table[] = {
{
.procname = "cmm_timed_pages",
.mode = 0644,
- .proc_handler = cmm_pages_handler,
+ .proc_handler = cmm_timed_pages_handler,
},
{
.procname = "cmm_timeout",
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index 507f23ba2034..363f6470d742 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -3,6 +3,8 @@
#include <linux/debugfs.h>
#include <linux/sched.h>
#include <linux/mm.h>
+#include <linux/kasan.h>
+#include <asm/kasan.h>
#include <asm/sections.h>
#include <asm/pgtable.h>
@@ -17,18 +19,26 @@ enum address_markers_idx {
IDENTITY_NR = 0,
KERNEL_START_NR,
KERNEL_END_NR,
+#ifdef CONFIG_KASAN
+ KASAN_SHADOW_START_NR,
+ KASAN_SHADOW_END_NR,
+#endif
VMEMMAP_NR,
VMALLOC_NR,
MODULES_NR,
};
static struct addr_marker address_markers[] = {
- [IDENTITY_NR] = {0, "Identity Mapping"},
- [KERNEL_START_NR] = {(unsigned long)&_stext, "Kernel Image Start"},
- [KERNEL_END_NR] = {(unsigned long)&_end, "Kernel Image End"},
- [VMEMMAP_NR] = {0, "vmemmap Area"},
- [VMALLOC_NR] = {0, "vmalloc Area"},
- [MODULES_NR] = {0, "Modules Area"},
+ [IDENTITY_NR] = {0, "Identity Mapping"},
+ [KERNEL_START_NR] = {(unsigned long)_stext, "Kernel Image Start"},
+ [KERNEL_END_NR] = {(unsigned long)_end, "Kernel Image End"},
+#ifdef CONFIG_KASAN
+ [KASAN_SHADOW_START_NR] = {KASAN_SHADOW_START, "Kasan Shadow Start"},
+ [KASAN_SHADOW_END_NR] = {KASAN_SHADOW_END, "Kasan Shadow End"},
+#endif
+ [VMEMMAP_NR] = {0, "vmemmap Area"},
+ [VMALLOC_NR] = {0, "vmalloc Area"},
+ [MODULES_NR] = {0, "Modules Area"},
{ -1, NULL }
};
@@ -80,7 +90,7 @@ static void note_page(struct seq_file *m, struct pg_state *st,
} else if (prot != cur || level != st->level ||
st->current_address >= st->marker[1].start_address) {
/* Print the actual finished series */
- seq_printf(m, "0x%0*lx-0x%0*lx",
+ seq_printf(m, "0x%0*lx-0x%0*lx ",
width, st->start_address,
width, st->current_address);
delta = (st->current_address - st->start_address) >> 10;
@@ -90,7 +100,7 @@ static void note_page(struct seq_file *m, struct pg_state *st,
}
seq_printf(m, "%9lu%c ", delta, *unit);
print_prot(m, st->current_prot, st->level);
- if (st->current_address >= st->marker[1].start_address) {
+ while (st->current_address >= st->marker[1].start_address) {
st->marker++;
seq_printf(m, "---[ %s ]---\n", st->marker->name);
}
@@ -100,6 +110,17 @@ static void note_page(struct seq_file *m, struct pg_state *st,
}
}
+#ifdef CONFIG_KASAN
+static void note_kasan_zero_page(struct seq_file *m, struct pg_state *st)
+{
+ unsigned int prot;
+
+ prot = pte_val(*kasan_zero_pte) &
+ (_PAGE_PROTECT | _PAGE_INVALID | _PAGE_NOEXEC);
+ note_page(m, st, prot, 4);
+}
+#endif
+
/*
* The actual page table walker functions. In order to keep the
* implementation of print_prot() short, we only check and pass
@@ -132,6 +153,13 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st,
pmd_t *pmd;
int i;
+#ifdef CONFIG_KASAN
+ if ((pud_val(*pud) & PAGE_MASK) == __pa(kasan_zero_pmd)) {
+ note_kasan_zero_page(m, st);
+ return;
+ }
+#endif
+
for (i = 0; i < PTRS_PER_PMD && addr < max_addr; i++) {
st->current_address = addr;
pmd = pmd_offset(pud, addr);
@@ -156,6 +184,13 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st,
pud_t *pud;
int i;
+#ifdef CONFIG_KASAN
+ if ((p4d_val(*p4d) & PAGE_MASK) == __pa(kasan_zero_pud)) {
+ note_kasan_zero_page(m, st);
+ return;
+ }
+#endif
+
for (i = 0; i < PTRS_PER_PUD && addr < max_addr; i++) {
st->current_address = addr;
pud = pud_offset(p4d, addr);
@@ -179,6 +214,13 @@ static void walk_p4d_level(struct seq_file *m, struct pg_state *st,
p4d_t *p4d;
int i;
+#ifdef CONFIG_KASAN
+ if ((pgd_val(*pgd) & PAGE_MASK) == __pa(kasan_zero_p4d)) {
+ note_kasan_zero_page(m, st);
+ return;
+ }
+#endif
+
for (i = 0; i < PTRS_PER_P4D && addr < max_addr; i++) {
st->current_address = addr;
p4d = p4d_offset(pgd, addr);
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
index 920d40894535..eba2def3414d 100644
--- a/arch/s390/mm/extmem.c
+++ b/arch/s390/mm/extmem.c
@@ -16,7 +16,7 @@
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/export.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/ctype.h>
#include <linux/ioport.h>
#include <asm/diag.h>
@@ -80,7 +80,7 @@ struct qin64 {
struct dcss_segment {
struct list_head list;
char dcss_name[8];
- char res_name[15];
+ char res_name[16];
unsigned long start_addr;
unsigned long end;
atomic_t ref_count;
@@ -103,7 +103,7 @@ static int scode_set;
static int
dcss_set_subcodes(void)
{
- char *name = kmalloc(8 * sizeof(char), GFP_KERNEL | GFP_DMA);
+ char *name = kmalloc(8, GFP_KERNEL | GFP_DMA);
unsigned long rx, ry;
int rc;
@@ -433,7 +433,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
memcpy(&seg->res_name, seg->dcss_name, 8);
EBCASC(seg->res_name, 8);
seg->res_name[8] = '\0';
- strncat(seg->res_name, " (DCSS)", 7);
+ strlcat(seg->res_name, " (DCSS)", sizeof(seg->res_name));
seg->res->name = seg->res_name;
rc = seg->vm_segtype;
if (rc == SEG_TYPE_SC ||
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 93faeca52284..2b8f32f56e0c 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -265,14 +265,10 @@ void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault)
*/
static noinline void do_sigsegv(struct pt_regs *regs, int si_code)
{
- struct siginfo si;
-
report_user_fault(regs, SIGSEGV, 1);
- si.si_signo = SIGSEGV;
- si.si_errno = 0;
- si.si_code = si_code;
- si.si_addr = (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK);
- force_sig_info(SIGSEGV, &si, current);
+ force_sig_fault(SIGSEGV, si_code,
+ (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK),
+ current);
}
static noinline void do_no_context(struct pt_regs *regs)
@@ -316,18 +312,13 @@ static noinline void do_low_address(struct pt_regs *regs)
static noinline void do_sigbus(struct pt_regs *regs)
{
- struct task_struct *tsk = current;
- struct siginfo si;
-
/*
* Send a sigbus, regardless of whether we were in kernel
* or user mode.
*/
- si.si_signo = SIGBUS;
- si.si_errno = 0;
- si.si_code = BUS_ADRERR;
- si.si_addr = (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK);
- force_sig_info(SIGBUS, &si, tsk);
+ force_sig_fault(SIGBUS, BUS_ADRERR,
+ (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK),
+ current);
}
static noinline int signal_return(struct pt_regs *regs)
@@ -350,7 +341,8 @@ static noinline int signal_return(struct pt_regs *regs)
return -EACCES;
}
-static noinline void do_fault_error(struct pt_regs *regs, int access, int fault)
+static noinline void do_fault_error(struct pt_regs *regs, int access,
+ vm_fault_t fault)
{
int si_code;
@@ -410,7 +402,7 @@ static noinline void do_fault_error(struct pt_regs *regs, int access, int fault)
* 11 Page translation -> Not present (nullification)
* 3b Region third trans. -> Not present (nullification)
*/
-static inline int do_exception(struct pt_regs *regs, int access)
+static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
{
struct gmap *gmap;
struct task_struct *tsk;
@@ -420,7 +412,7 @@ static inline int do_exception(struct pt_regs *regs, int access)
unsigned long trans_exc_code;
unsigned long address;
unsigned int flags;
- int fault;
+ vm_fault_t fault;
tsk = current;
/*
@@ -511,6 +503,8 @@ retry:
/* No reason to continue if interrupted by SIGKILL. */
if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) {
fault = VM_FAULT_SIGNAL;
+ if (flags & FAULT_FLAG_RETRY_NOWAIT)
+ goto out_up;
goto out;
}
if (unlikely(fault & VM_FAULT_ERROR))
@@ -571,7 +565,8 @@ out:
void do_protection_exception(struct pt_regs *regs)
{
unsigned long trans_exc_code;
- int access, fault;
+ int access;
+ vm_fault_t fault;
trans_exc_code = regs->int_parm_long;
/*
@@ -606,7 +601,8 @@ NOKPROBE_SYMBOL(do_protection_exception);
void do_dat_exception(struct pt_regs *regs)
{
- int access, fault;
+ int access;
+ vm_fault_t fault;
access = VM_READ | VM_EXEC | VM_WRITE;
fault = do_exception(regs, access);
@@ -640,17 +636,19 @@ struct pfault_refbk {
u64 reserved;
} __attribute__ ((packed, aligned(8)));
+static struct pfault_refbk pfault_init_refbk = {
+ .refdiagc = 0x258,
+ .reffcode = 0,
+ .refdwlen = 5,
+ .refversn = 2,
+ .refgaddr = __LC_LPP,
+ .refselmk = 1ULL << 48,
+ .refcmpmk = 1ULL << 48,
+ .reserved = __PF_RES_FIELD
+};
+
int pfault_init(void)
{
- struct pfault_refbk refbk = {
- .refdiagc = 0x258,
- .reffcode = 0,
- .refdwlen = 5,
- .refversn = 2,
- .refgaddr = __LC_LPP,
- .refselmk = 1ULL << 48,
- .refcmpmk = 1ULL << 48,
- .reserved = __PF_RES_FIELD };
int rc;
if (pfault_disable)
@@ -662,18 +660,20 @@ int pfault_init(void)
"1: la %0,8\n"
"2:\n"
EX_TABLE(0b,1b)
- : "=d" (rc) : "a" (&refbk), "m" (refbk) : "cc");
+ : "=d" (rc)
+ : "a" (&pfault_init_refbk), "m" (pfault_init_refbk) : "cc");
return rc;
}
+static struct pfault_refbk pfault_fini_refbk = {
+ .refdiagc = 0x258,
+ .reffcode = 1,
+ .refdwlen = 5,
+ .refversn = 2,
+};
+
void pfault_fini(void)
{
- struct pfault_refbk refbk = {
- .refdiagc = 0x258,
- .reffcode = 1,
- .refdwlen = 5,
- .refversn = 2,
- };
if (pfault_disable)
return;
@@ -682,7 +682,7 @@ void pfault_fini(void)
" diag %0,0,0x258\n"
"0: nopr %%r7\n"
EX_TABLE(0b,0b)
- : : "a" (&refbk), "m" (refbk) : "cc");
+ : : "a" (&pfault_fini_refbk), "m" (pfault_fini_refbk) : "cc");
}
static DEFINE_SPINLOCK(pfault_lock);
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 2c55a2b9d6c6..1e668b95e0c6 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -2,8 +2,10 @@
/*
* KVM guest address space mapping code
*
- * Copyright IBM Corp. 2007, 2016
+ * Copyright IBM Corp. 2007, 2016, 2018
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ * David Hildenbrand <david@redhat.com>
+ * Janosch Frank <frankja@linux.vnet.ibm.com>
*/
#include <linux/kernel.h>
@@ -521,6 +523,9 @@ void gmap_unlink(struct mm_struct *mm, unsigned long *table,
rcu_read_unlock();
}
+static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *old, pmd_t new,
+ unsigned long gaddr);
+
/**
* gmap_link - set up shadow page tables to connect a host to a guest address
* @gmap: pointer to guest mapping meta data structure
@@ -541,6 +546,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
+ u64 unprot;
int rc;
BUG_ON(gmap_is_shadow(gmap));
@@ -584,8 +590,8 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
return -EFAULT;
pmd = pmd_offset(pud, vmaddr);
VM_BUG_ON(pmd_none(*pmd));
- /* large pmds cannot yet be handled */
- if (pmd_large(*pmd))
+ /* Are we allowed to use huge pages? */
+ if (pmd_large(*pmd) && !gmap->mm->context.allow_gmap_hpage_1m)
return -EFAULT;
/* Link gmap segment table entry location to page table. */
rc = radix_tree_preload(GFP_KERNEL);
@@ -596,10 +602,22 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
if (*table == _SEGMENT_ENTRY_EMPTY) {
rc = radix_tree_insert(&gmap->host_to_guest,
vmaddr >> PMD_SHIFT, table);
- if (!rc)
- *table = pmd_val(*pmd);
- } else
- rc = 0;
+ if (!rc) {
+ if (pmd_large(*pmd)) {
+ *table = (pmd_val(*pmd) &
+ _SEGMENT_ENTRY_HARDWARE_BITS_LARGE)
+ | _SEGMENT_ENTRY_GMAP_UC;
+ } else
+ *table = pmd_val(*pmd) &
+ _SEGMENT_ENTRY_HARDWARE_BITS;
+ }
+ } else if (*table & _SEGMENT_ENTRY_PROTECT &&
+ !(pmd_val(*pmd) & _SEGMENT_ENTRY_PROTECT)) {
+ unprot = (u64)*table;
+ unprot &= ~_SEGMENT_ENTRY_PROTECT;
+ unprot |= _SEGMENT_ENTRY_GMAP_UC;
+ gmap_pmdp_xchg(gmap, (pmd_t *)table, __pmd(unprot), gaddr);
+ }
spin_unlock(&gmap->guest_table_lock);
spin_unlock(ptl);
radix_tree_preload_end();
@@ -690,6 +708,14 @@ void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
vmaddr |= gaddr & ~PMD_MASK;
/* Find vma in the parent mm */
vma = find_vma(gmap->mm, vmaddr);
+ if (!vma)
+ continue;
+ /*
+ * We do not discard pages that are backed by
+ * hugetlbfs, so we don't have to refault them.
+ */
+ if (is_vm_hugetlb_page(vma))
+ continue;
size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
zap_page_range(vma, vmaddr, size);
}
@@ -864,7 +890,134 @@ static int gmap_pte_op_fixup(struct gmap *gmap, unsigned long gaddr,
*/
static void gmap_pte_op_end(spinlock_t *ptl)
{
- spin_unlock(ptl);
+ if (ptl)
+ spin_unlock(ptl);
+}
+
+/**
+ * gmap_pmd_op_walk - walk the gmap tables, get the guest table lock
+ * and return the pmd pointer
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: virtual address in the guest address space
+ *
+ * Returns a pointer to the pmd for a guest address, or NULL
+ */
+static inline pmd_t *gmap_pmd_op_walk(struct gmap *gmap, unsigned long gaddr)
+{
+ pmd_t *pmdp;
+
+ BUG_ON(gmap_is_shadow(gmap));
+ pmdp = (pmd_t *) gmap_table_walk(gmap, gaddr, 1);
+ if (!pmdp)
+ return NULL;
+
+ /* without huge pages, there is no need to take the table lock */
+ if (!gmap->mm->context.allow_gmap_hpage_1m)
+ return pmd_none(*pmdp) ? NULL : pmdp;
+
+ spin_lock(&gmap->guest_table_lock);
+ if (pmd_none(*pmdp)) {
+ spin_unlock(&gmap->guest_table_lock);
+ return NULL;
+ }
+
+ /* 4k page table entries are locked via the pte (pte_alloc_map_lock). */
+ if (!pmd_large(*pmdp))
+ spin_unlock(&gmap->guest_table_lock);
+ return pmdp;
+}
+
+/**
+ * gmap_pmd_op_end - release the guest_table_lock if needed
+ * @gmap: pointer to the guest mapping meta data structure
+ * @pmdp: pointer to the pmd
+ */
+static inline void gmap_pmd_op_end(struct gmap *gmap, pmd_t *pmdp)
+{
+ if (pmd_large(*pmdp))
+ spin_unlock(&gmap->guest_table_lock);
+}
+
+/*
+ * gmap_protect_pmd - remove access rights to memory and set pmd notification bits
+ * @pmdp: pointer to the pmd to be protected
+ * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
+ * @bits: notification bits to set
+ *
+ * Returns:
+ * 0 if successfully protected
+ * -EAGAIN if a fixup is needed
+ * -EINVAL if unsupported notifier bits have been specified
+ *
+ * Expected to be called with sg->mm->mmap_sem in read and
+ * guest_table_lock held.
+ */
+static int gmap_protect_pmd(struct gmap *gmap, unsigned long gaddr,
+ pmd_t *pmdp, int prot, unsigned long bits)
+{
+ int pmd_i = pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID;
+ int pmd_p = pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT;
+ pmd_t new = *pmdp;
+
+ /* Fixup needed */
+ if ((pmd_i && (prot != PROT_NONE)) || (pmd_p && (prot == PROT_WRITE)))
+ return -EAGAIN;
+
+ if (prot == PROT_NONE && !pmd_i) {
+ pmd_val(new) |= _SEGMENT_ENTRY_INVALID;
+ gmap_pmdp_xchg(gmap, pmdp, new, gaddr);
+ }
+
+ if (prot == PROT_READ && !pmd_p) {
+ pmd_val(new) &= ~_SEGMENT_ENTRY_INVALID;
+ pmd_val(new) |= _SEGMENT_ENTRY_PROTECT;
+ gmap_pmdp_xchg(gmap, pmdp, new, gaddr);
+ }
+
+ if (bits & GMAP_NOTIFY_MPROT)
+ pmd_val(*pmdp) |= _SEGMENT_ENTRY_GMAP_IN;
+
+ /* Shadow GMAP protection needs split PMDs */
+ if (bits & GMAP_NOTIFY_SHADOW)
+ return -EINVAL;
+
+ return 0;
+}
+
+/*
+ * gmap_protect_pte - remove access rights to memory and set pgste bits
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: virtual address in the guest address space
+ * @pmdp: pointer to the pmd associated with the pte
+ * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE
+ * @bits: notification bits to set
+ *
+ * Returns 0 if successfully protected, -ENOMEM if out of memory and
+ * -EAGAIN if a fixup is needed.
+ *
+ * Expected to be called with sg->mm->mmap_sem in read
+ */
+static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr,
+ pmd_t *pmdp, int prot, unsigned long bits)
+{
+ int rc;
+ pte_t *ptep;
+ spinlock_t *ptl = NULL;
+ unsigned long pbits = 0;
+
+ if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
+ return -EAGAIN;
+
+ ptep = pte_alloc_map_lock(gmap->mm, pmdp, gaddr, &ptl);
+ if (!ptep)
+ return -ENOMEM;
+
+ pbits |= (bits & GMAP_NOTIFY_MPROT) ? PGSTE_IN_BIT : 0;
+ pbits |= (bits & GMAP_NOTIFY_SHADOW) ? PGSTE_VSIE_BIT : 0;
+ /* Protect and unlock. */
+ rc = ptep_force_prot(gmap->mm, gaddr, ptep, prot, pbits);
+ gmap_pte_op_end(ptl);
+ return rc;
}
/*
@@ -883,30 +1036,45 @@ static void gmap_pte_op_end(spinlock_t *ptl)
static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr,
unsigned long len, int prot, unsigned long bits)
{
- unsigned long vmaddr;
- spinlock_t *ptl;
- pte_t *ptep;
+ unsigned long vmaddr, dist;
+ pmd_t *pmdp;
int rc;
BUG_ON(gmap_is_shadow(gmap));
while (len) {
rc = -EAGAIN;
- ptep = gmap_pte_op_walk(gmap, gaddr, &ptl);
- if (ptep) {
- rc = ptep_force_prot(gmap->mm, gaddr, ptep, prot, bits);
- gmap_pte_op_end(ptl);
+ pmdp = gmap_pmd_op_walk(gmap, gaddr);
+ if (pmdp) {
+ if (!pmd_large(*pmdp)) {
+ rc = gmap_protect_pte(gmap, gaddr, pmdp, prot,
+ bits);
+ if (!rc) {
+ len -= PAGE_SIZE;
+ gaddr += PAGE_SIZE;
+ }
+ } else {
+ rc = gmap_protect_pmd(gmap, gaddr, pmdp, prot,
+ bits);
+ if (!rc) {
+ dist = HPAGE_SIZE - (gaddr & ~HPAGE_MASK);
+ len = len < dist ? 0 : len - dist;
+ gaddr = (gaddr & HPAGE_MASK) + HPAGE_SIZE;
+ }
+ }
+ gmap_pmd_op_end(gmap, pmdp);
}
if (rc) {
+ if (rc == -EINVAL)
+ return rc;
+
+ /* -EAGAIN, fixup of userspace mm and gmap */
vmaddr = __gmap_translate(gmap, gaddr);
if (IS_ERR_VALUE(vmaddr))
return vmaddr;
rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, prot);
if (rc)
return rc;
- continue;
}
- gaddr += PAGE_SIZE;
- len -= PAGE_SIZE;
}
return 0;
}
@@ -935,7 +1103,7 @@ int gmap_mprotect_notify(struct gmap *gmap, unsigned long gaddr,
if (!MACHINE_HAS_ESOP && prot == PROT_READ)
return -EINVAL;
down_read(&gmap->mm->mmap_sem);
- rc = gmap_protect_range(gmap, gaddr, len, prot, PGSTE_IN_BIT);
+ rc = gmap_protect_range(gmap, gaddr, len, prot, GMAP_NOTIFY_MPROT);
up_read(&gmap->mm->mmap_sem);
return rc;
}
@@ -1474,6 +1642,7 @@ struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce,
unsigned long limit;
int rc;
+ BUG_ON(parent->mm->context.allow_gmap_hpage_1m);
BUG_ON(gmap_is_shadow(parent));
spin_lock(&parent->shadow_lock);
sg = gmap_find_shadow(parent, asce, edat_level);
@@ -1526,7 +1695,7 @@ struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce,
down_read(&parent->mm->mmap_sem);
rc = gmap_protect_range(parent, asce & _ASCE_ORIGIN,
((asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE,
- PROT_READ, PGSTE_VSIE_BIT);
+ PROT_READ, GMAP_NOTIFY_SHADOW);
up_read(&parent->mm->mmap_sem);
spin_lock(&parent->shadow_lock);
new->initialized = true;
@@ -2092,6 +2261,225 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr,
}
EXPORT_SYMBOL_GPL(ptep_notify);
+static void pmdp_notify_gmap(struct gmap *gmap, pmd_t *pmdp,
+ unsigned long gaddr)
+{
+ pmd_val(*pmdp) &= ~_SEGMENT_ENTRY_GMAP_IN;
+ gmap_call_notifier(gmap, gaddr, gaddr + HPAGE_SIZE - 1);
+}
+
+/**
+ * gmap_pmdp_xchg - exchange a gmap pmd with another
+ * @gmap: pointer to the guest address space structure
+ * @pmdp: pointer to the pmd entry
+ * @new: replacement entry
+ * @gaddr: the affected guest address
+ *
+ * This function is assumed to be called with the guest_table_lock
+ * held.
+ */
+static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *pmdp, pmd_t new,
+ unsigned long gaddr)
+{
+ gaddr &= HPAGE_MASK;
+ pmdp_notify_gmap(gmap, pmdp, gaddr);
+ pmd_val(new) &= ~_SEGMENT_ENTRY_GMAP_IN;
+ if (MACHINE_HAS_TLB_GUEST)
+ __pmdp_idte(gaddr, (pmd_t *)pmdp, IDTE_GUEST_ASCE, gmap->asce,
+ IDTE_GLOBAL);
+ else if (MACHINE_HAS_IDTE)
+ __pmdp_idte(gaddr, (pmd_t *)pmdp, 0, 0, IDTE_GLOBAL);
+ else
+ __pmdp_csp(pmdp);
+ *pmdp = new;
+}
+
+static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr,
+ int purge)
+{
+ pmd_t *pmdp;
+ struct gmap *gmap;
+ unsigned long gaddr;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
+ spin_lock(&gmap->guest_table_lock);
+ pmdp = (pmd_t *)radix_tree_delete(&gmap->host_to_guest,
+ vmaddr >> PMD_SHIFT);
+ if (pmdp) {
+ gaddr = __gmap_segment_gaddr((unsigned long *)pmdp);
+ pmdp_notify_gmap(gmap, pmdp, gaddr);
+ WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
+ _SEGMENT_ENTRY_GMAP_UC));
+ if (purge)
+ __pmdp_csp(pmdp);
+ pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY;
+ }
+ spin_unlock(&gmap->guest_table_lock);
+ }
+ rcu_read_unlock();
+}
+
+/**
+ * gmap_pmdp_invalidate - invalidate all affected guest pmd entries without
+ * flushing
+ * @mm: pointer to the process mm_struct
+ * @vmaddr: virtual address in the process address space
+ */
+void gmap_pmdp_invalidate(struct mm_struct *mm, unsigned long vmaddr)
+{
+ gmap_pmdp_clear(mm, vmaddr, 0);
+}
+EXPORT_SYMBOL_GPL(gmap_pmdp_invalidate);
+
+/**
+ * gmap_pmdp_csp - csp all affected guest pmd entries
+ * @mm: pointer to the process mm_struct
+ * @vmaddr: virtual address in the process address space
+ */
+void gmap_pmdp_csp(struct mm_struct *mm, unsigned long vmaddr)
+{
+ gmap_pmdp_clear(mm, vmaddr, 1);
+}
+EXPORT_SYMBOL_GPL(gmap_pmdp_csp);
+
+/**
+ * gmap_pmdp_idte_local - invalidate and clear a guest pmd entry
+ * @mm: pointer to the process mm_struct
+ * @vmaddr: virtual address in the process address space
+ */
+void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr)
+{
+ unsigned long *entry, gaddr;
+ struct gmap *gmap;
+ pmd_t *pmdp;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
+ spin_lock(&gmap->guest_table_lock);
+ entry = radix_tree_delete(&gmap->host_to_guest,
+ vmaddr >> PMD_SHIFT);
+ if (entry) {
+ pmdp = (pmd_t *)entry;
+ gaddr = __gmap_segment_gaddr(entry);
+ pmdp_notify_gmap(gmap, pmdp, gaddr);
+ WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
+ _SEGMENT_ENTRY_GMAP_UC));
+ if (MACHINE_HAS_TLB_GUEST)
+ __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,
+ gmap->asce, IDTE_LOCAL);
+ else if (MACHINE_HAS_IDTE)
+ __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_LOCAL);
+ *entry = _SEGMENT_ENTRY_EMPTY;
+ }
+ spin_unlock(&gmap->guest_table_lock);
+ }
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(gmap_pmdp_idte_local);
+
+/**
+ * gmap_pmdp_idte_global - invalidate and clear a guest pmd entry
+ * @mm: pointer to the process mm_struct
+ * @vmaddr: virtual address in the process address space
+ */
+void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr)
+{
+ unsigned long *entry, gaddr;
+ struct gmap *gmap;
+ pmd_t *pmdp;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
+ spin_lock(&gmap->guest_table_lock);
+ entry = radix_tree_delete(&gmap->host_to_guest,
+ vmaddr >> PMD_SHIFT);
+ if (entry) {
+ pmdp = (pmd_t *)entry;
+ gaddr = __gmap_segment_gaddr(entry);
+ pmdp_notify_gmap(gmap, pmdp, gaddr);
+ WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE |
+ _SEGMENT_ENTRY_GMAP_UC));
+ if (MACHINE_HAS_TLB_GUEST)
+ __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE,
+ gmap->asce, IDTE_GLOBAL);
+ else if (MACHINE_HAS_IDTE)
+ __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_GLOBAL);
+ else
+ __pmdp_csp(pmdp);
+ *entry = _SEGMENT_ENTRY_EMPTY;
+ }
+ spin_unlock(&gmap->guest_table_lock);
+ }
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(gmap_pmdp_idte_global);
+
+/**
+ * gmap_test_and_clear_dirty_pmd - test and reset segment dirty status
+ * @gmap: pointer to guest address space
+ * @pmdp: pointer to the pmd to be tested
+ * @gaddr: virtual address in the guest address space
+ *
+ * This function is assumed to be called with the guest_table_lock
+ * held.
+ */
+bool gmap_test_and_clear_dirty_pmd(struct gmap *gmap, pmd_t *pmdp,
+ unsigned long gaddr)
+{
+ if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
+ return false;
+
+ /* Already protected memory, which did not change is clean */
+ if (pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT &&
+ !(pmd_val(*pmdp) & _SEGMENT_ENTRY_GMAP_UC))
+ return false;
+
+ /* Clear UC indication and reset protection */
+ pmd_val(*pmdp) &= ~_SEGMENT_ENTRY_GMAP_UC;
+ gmap_protect_pmd(gmap, gaddr, pmdp, PROT_READ, 0);
+ return true;
+}
+
+/**
+ * gmap_sync_dirty_log_pmd - set bitmap based on dirty status of segment
+ * @gmap: pointer to guest address space
+ * @bitmap: dirty bitmap for this pmd
+ * @gaddr: virtual address in the guest address space
+ * @vmaddr: virtual address in the host address space
+ *
+ * This function is assumed to be called with the guest_table_lock
+ * held.
+ */
+void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4],
+ unsigned long gaddr, unsigned long vmaddr)
+{
+ int i;
+ pmd_t *pmdp;
+ pte_t *ptep;
+ spinlock_t *ptl;
+
+ pmdp = gmap_pmd_op_walk(gmap, gaddr);
+ if (!pmdp)
+ return;
+
+ if (pmd_large(*pmdp)) {
+ if (gmap_test_and_clear_dirty_pmd(gmap, pmdp, gaddr))
+ bitmap_fill(bitmap, _PAGE_ENTRIES);
+ } else {
+ for (i = 0; i < _PAGE_ENTRIES; i++, vmaddr += PAGE_SIZE) {
+ ptep = pte_alloc_map_lock(gmap->mm, pmdp, vmaddr, &ptl);
+ if (!ptep)
+ continue;
+ if (ptep_test_and_clear_uc(gmap->mm, vmaddr, ptep))
+ set_bit(i, bitmap);
+ spin_unlock(ptl);
+ }
+ }
+ gmap_pmd_op_end(gmap, pmdp);
+}
+EXPORT_SYMBOL_GPL(gmap_sync_dirty_log_pmd);
+
static inline void thp_split_mm(struct mm_struct *mm)
{
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -2168,30 +2556,58 @@ EXPORT_SYMBOL_GPL(s390_enable_sie);
* Enable storage key handling from now on and initialize the storage
* keys with the default key.
*/
-static int __s390_enable_skey(pte_t *pte, unsigned long addr,
- unsigned long next, struct mm_walk *walk)
+static int __s390_enable_skey_pte(pte_t *pte, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
{
/* Clear storage key */
ptep_zap_key(walk->mm, addr, pte);
return 0;
}
+static int __s390_enable_skey_hugetlb(pte_t *pte, unsigned long addr,
+ unsigned long hmask, unsigned long next,
+ struct mm_walk *walk)
+{
+ pmd_t *pmd = (pmd_t *)pte;
+ unsigned long start, end;
+ struct page *page = pmd_page(*pmd);
+
+ /*
+ * The write check makes sure we do not set a key on shared
+ * memory. This is needed as the walker does not differentiate
+ * between actual guest memory and the process executable or
+ * shared libraries.
+ */
+ if (pmd_val(*pmd) & _SEGMENT_ENTRY_INVALID ||
+ !(pmd_val(*pmd) & _SEGMENT_ENTRY_WRITE))
+ return 0;
+
+ start = pmd_val(*pmd) & HPAGE_MASK;
+ end = start + HPAGE_SIZE - 1;
+ __storage_key_init_range(start, end);
+ set_bit(PG_arch_1, &page->flags);
+ return 0;
+}
+
int s390_enable_skey(void)
{
- struct mm_walk walk = { .pte_entry = __s390_enable_skey };
+ struct mm_walk walk = {
+ .hugetlb_entry = __s390_enable_skey_hugetlb,
+ .pte_entry = __s390_enable_skey_pte,
+ };
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
int rc = 0;
down_write(&mm->mmap_sem);
- if (mm_use_skey(mm))
+ if (mm_uses_skeys(mm))
goto out_up;
- mm->context.use_skey = 1;
+ mm->context.uses_skeys = 1;
for (vma = mm->mmap; vma; vma = vma->vm_next) {
if (ksm_madvise(vma, vma->vm_start, vma->vm_end,
MADV_UNMERGEABLE, &vma->vm_flags)) {
- mm->context.use_skey = 0;
+ mm->context.uses_skeys = 0;
rc = -ENOMEM;
goto out_up;
}
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index 05c8abd864f1..2809d11c7a28 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -220,6 +220,8 @@ static inline int gup_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
/*
* Like get_user_pages_fast() except its IRQ-safe in that it won't fall
* back to the regular GUP.
+ * Note a difference with get_user_pages_fast: this always returns the
+ * number of pages pinned, 0 if no pages were pinned.
*/
int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages)
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index e804090f4470..b0246c705a19 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -123,6 +123,29 @@ static inline pte_t __rste_to_pte(unsigned long rste)
return pte;
}
+static void clear_huge_pte_skeys(struct mm_struct *mm, unsigned long rste)
+{
+ struct page *page;
+ unsigned long size, paddr;
+
+ if (!mm_uses_skeys(mm) ||
+ rste & _SEGMENT_ENTRY_INVALID)
+ return;
+
+ if ((rste & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3) {
+ page = pud_page(__pud(rste));
+ size = PUD_SIZE;
+ paddr = rste & PUD_MASK;
+ } else {
+ page = pmd_page(__pmd(rste));
+ size = PMD_SIZE;
+ paddr = rste & PMD_MASK;
+ }
+
+ if (!test_and_set_bit(PG_arch_1, &page->flags))
+ __storage_key_init_range(paddr, paddr + size - 1);
+}
+
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
@@ -137,6 +160,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
rste |= _REGION_ENTRY_TYPE_R3 | _REGION3_ENTRY_LARGE;
else
rste |= _SEGMENT_ENTRY_LARGE;
+ clear_huge_pte_skeys(mm, rste);
pte_val(*ptep) = rste;
}
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 3fa3e5323612..76d0708438e9 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -21,7 +21,7 @@
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/pagemap.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/memory.h>
#include <linux/pfn.h>
#include <linux/poison.h>
@@ -29,7 +29,6 @@
#include <linux/export.h>
#include <linux/cma.h>
#include <linux/gfp.h>
-#include <linux/memblock.h>
#include <asm/processor.h>
#include <linux/uaccess.h>
#include <asm/pgtable.h>
@@ -42,6 +41,7 @@
#include <asm/ctl_reg.h>
#include <asm/sclp.h>
#include <asm/set_memory.h>
+#include <asm/kasan.h>
pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(.bss..swapper_pg_dir);
@@ -98,8 +98,9 @@ void __init paging_init(void)
S390_lowcore.user_asce = S390_lowcore.kernel_asce;
crst_table_init((unsigned long *) init_mm.pgd, pgd_type);
vmem_map_init();
+ kasan_copy_shadow(init_mm.pgd);
- /* enable virtual mapping in kernel mode */
+ /* enable virtual mapping in kernel mode */
__ctl_load(S390_lowcore.kernel_asce, 1, 1);
__ctl_load(S390_lowcore.kernel_asce, 7, 7);
__ctl_load(S390_lowcore.kernel_asce, 13, 13);
@@ -107,6 +108,7 @@ void __init paging_init(void)
psw_bits(psw).dat = 1;
psw_bits(psw).as = PSW_BITS_AS_HOME;
__load_psw_mask(psw.mask);
+ kasan_free_early_identity();
sparse_memory_present_with_active_regions(MAX_NUMNODES);
sparse_init();
@@ -136,7 +138,7 @@ void __init mem_init(void)
cmma_init();
/* this will put all low memory onto the freelists */
- free_all_bootmem();
+ memblock_free_all();
setup_zero_pages(); /* Setup zeroed pages. */
cmma_init_nodat();
diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c
new file mode 100644
index 000000000000..acb9645b762b
--- /dev/null
+++ b/arch/s390/mm/kasan_init.c
@@ -0,0 +1,387 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kasan.h>
+#include <linux/sched/task.h>
+#include <linux/memblock.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/kasan.h>
+#include <asm/mem_detect.h>
+#include <asm/processor.h>
+#include <asm/sclp.h>
+#include <asm/facility.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+
+static unsigned long segment_pos __initdata;
+static unsigned long segment_low __initdata;
+static unsigned long pgalloc_pos __initdata;
+static unsigned long pgalloc_low __initdata;
+static unsigned long pgalloc_freeable __initdata;
+static bool has_edat __initdata;
+static bool has_nx __initdata;
+
+#define __sha(x) ((unsigned long)kasan_mem_to_shadow((void *)x))
+
+static pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
+
+static void __init kasan_early_panic(const char *reason)
+{
+ sclp_early_printk("The Linux kernel failed to boot with the KernelAddressSanitizer:\n");
+ sclp_early_printk(reason);
+ disabled_wait(0);
+}
+
+static void * __init kasan_early_alloc_segment(void)
+{
+ segment_pos -= _SEGMENT_SIZE;
+
+ if (segment_pos < segment_low)
+ kasan_early_panic("out of memory during initialisation\n");
+
+ return (void *)segment_pos;
+}
+
+static void * __init kasan_early_alloc_pages(unsigned int order)
+{
+ pgalloc_pos -= (PAGE_SIZE << order);
+
+ if (pgalloc_pos < pgalloc_low)
+ kasan_early_panic("out of memory during initialisation\n");
+
+ return (void *)pgalloc_pos;
+}
+
+static void * __init kasan_early_crst_alloc(unsigned long val)
+{
+ unsigned long *table;
+
+ table = kasan_early_alloc_pages(CRST_ALLOC_ORDER);
+ if (table)
+ crst_table_init(table, val);
+ return table;
+}
+
+static pte_t * __init kasan_early_pte_alloc(void)
+{
+ static void *pte_leftover;
+ pte_t *pte;
+
+ BUILD_BUG_ON(_PAGE_TABLE_SIZE * 2 != PAGE_SIZE);
+
+ if (!pte_leftover) {
+ pte_leftover = kasan_early_alloc_pages(0);
+ pte = pte_leftover + _PAGE_TABLE_SIZE;
+ } else {
+ pte = pte_leftover;
+ pte_leftover = NULL;
+ }
+ memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
+ return pte;
+}
+
+enum populate_mode {
+ POPULATE_ONE2ONE,
+ POPULATE_MAP,
+ POPULATE_ZERO_SHADOW
+};
+static void __init kasan_early_vmemmap_populate(unsigned long address,
+ unsigned long end,
+ enum populate_mode mode)
+{
+ unsigned long pgt_prot_zero, pgt_prot, sgt_prot;
+ pgd_t *pg_dir;
+ p4d_t *p4_dir;
+ pud_t *pu_dir;
+ pmd_t *pm_dir;
+ pte_t *pt_dir;
+
+ pgt_prot_zero = pgprot_val(PAGE_KERNEL_RO);
+ if (!has_nx)
+ pgt_prot_zero &= ~_PAGE_NOEXEC;
+ pgt_prot = pgprot_val(PAGE_KERNEL_EXEC);
+ sgt_prot = pgprot_val(SEGMENT_KERNEL_EXEC);
+
+ while (address < end) {
+ pg_dir = pgd_offset_k(address);
+ if (pgd_none(*pg_dir)) {
+ if (mode == POPULATE_ZERO_SHADOW &&
+ IS_ALIGNED(address, PGDIR_SIZE) &&
+ end - address >= PGDIR_SIZE) {
+ pgd_populate(&init_mm, pg_dir, kasan_zero_p4d);
+ address = (address + PGDIR_SIZE) & PGDIR_MASK;
+ continue;
+ }
+ p4_dir = kasan_early_crst_alloc(_REGION2_ENTRY_EMPTY);
+ pgd_populate(&init_mm, pg_dir, p4_dir);
+ }
+
+ p4_dir = p4d_offset(pg_dir, address);
+ if (p4d_none(*p4_dir)) {
+ if (mode == POPULATE_ZERO_SHADOW &&
+ IS_ALIGNED(address, P4D_SIZE) &&
+ end - address >= P4D_SIZE) {
+ p4d_populate(&init_mm, p4_dir, kasan_zero_pud);
+ address = (address + P4D_SIZE) & P4D_MASK;
+ continue;
+ }
+ pu_dir = kasan_early_crst_alloc(_REGION3_ENTRY_EMPTY);
+ p4d_populate(&init_mm, p4_dir, pu_dir);
+ }
+
+ pu_dir = pud_offset(p4_dir, address);
+ if (pud_none(*pu_dir)) {
+ if (mode == POPULATE_ZERO_SHADOW &&
+ IS_ALIGNED(address, PUD_SIZE) &&
+ end - address >= PUD_SIZE) {
+ pud_populate(&init_mm, pu_dir, kasan_zero_pmd);
+ address = (address + PUD_SIZE) & PUD_MASK;
+ continue;
+ }
+ pm_dir = kasan_early_crst_alloc(_SEGMENT_ENTRY_EMPTY);
+ pud_populate(&init_mm, pu_dir, pm_dir);
+ }
+
+ pm_dir = pmd_offset(pu_dir, address);
+ if (pmd_none(*pm_dir)) {
+ if (mode == POPULATE_ZERO_SHADOW &&
+ IS_ALIGNED(address, PMD_SIZE) &&
+ end - address >= PMD_SIZE) {
+ pmd_populate(&init_mm, pm_dir, kasan_zero_pte);
+ address = (address + PMD_SIZE) & PMD_MASK;
+ continue;
+ }
+ /* the first megabyte of 1:1 is mapped with 4k pages */
+ if (has_edat && address && end - address >= PMD_SIZE &&
+ mode != POPULATE_ZERO_SHADOW) {
+ void *page;
+
+ if (mode == POPULATE_ONE2ONE) {
+ page = (void *)address;
+ } else {
+ page = kasan_early_alloc_segment();
+ memset(page, 0, _SEGMENT_SIZE);
+ }
+ pmd_val(*pm_dir) = __pa(page) | sgt_prot;
+ address = (address + PMD_SIZE) & PMD_MASK;
+ continue;
+ }
+
+ pt_dir = kasan_early_pte_alloc();
+ pmd_populate(&init_mm, pm_dir, pt_dir);
+ } else if (pmd_large(*pm_dir)) {
+ address = (address + PMD_SIZE) & PMD_MASK;
+ continue;
+ }
+
+ pt_dir = pte_offset_kernel(pm_dir, address);
+ if (pte_none(*pt_dir)) {
+ void *page;
+
+ switch (mode) {
+ case POPULATE_ONE2ONE:
+ page = (void *)address;
+ pte_val(*pt_dir) = __pa(page) | pgt_prot;
+ break;
+ case POPULATE_MAP:
+ page = kasan_early_alloc_pages(0);
+ memset(page, 0, PAGE_SIZE);
+ pte_val(*pt_dir) = __pa(page) | pgt_prot;
+ break;
+ case POPULATE_ZERO_SHADOW:
+ page = kasan_zero_page;
+ pte_val(*pt_dir) = __pa(page) | pgt_prot_zero;
+ break;
+ }
+ }
+ address += PAGE_SIZE;
+ }
+}
+
+static void __init kasan_set_pgd(pgd_t *pgd, unsigned long asce_type)
+{
+ unsigned long asce_bits;
+
+ asce_bits = asce_type | _ASCE_TABLE_LENGTH;
+ S390_lowcore.kernel_asce = (__pa(pgd) & PAGE_MASK) | asce_bits;
+ S390_lowcore.user_asce = S390_lowcore.kernel_asce;
+
+ __ctl_load(S390_lowcore.kernel_asce, 1, 1);
+ __ctl_load(S390_lowcore.kernel_asce, 7, 7);
+ __ctl_load(S390_lowcore.kernel_asce, 13, 13);
+}
+
+static void __init kasan_enable_dat(void)
+{
+ psw_t psw;
+
+ psw.mask = __extract_psw();
+ psw_bits(psw).dat = 1;
+ psw_bits(psw).as = PSW_BITS_AS_HOME;
+ __load_psw_mask(psw.mask);
+}
+
+static void __init kasan_early_detect_facilities(void)
+{
+ __stfle(S390_lowcore.stfle_fac_list,
+ ARRAY_SIZE(S390_lowcore.stfle_fac_list));
+ if (test_facility(8)) {
+ has_edat = true;
+ __ctl_set_bit(0, 23);
+ }
+ if (!noexec_disabled && test_facility(130)) {
+ has_nx = true;
+ __ctl_set_bit(0, 20);
+ }
+}
+
+static unsigned long __init get_mem_detect_end(void)
+{
+ unsigned long start;
+ unsigned long end;
+
+ if (mem_detect.count) {
+ __get_mem_detect_block(mem_detect.count - 1, &start, &end);
+ return end;
+ }
+ return 0;
+}
+
+void __init kasan_early_init(void)
+{
+ unsigned long untracked_mem_end;
+ unsigned long shadow_alloc_size;
+ unsigned long initrd_end;
+ unsigned long asce_type;
+ unsigned long memsize;
+ unsigned long vmax;
+ unsigned long pgt_prot = pgprot_val(PAGE_KERNEL_RO);
+ pte_t pte_z;
+ pmd_t pmd_z = __pmd(__pa(kasan_zero_pte) | _SEGMENT_ENTRY);
+ pud_t pud_z = __pud(__pa(kasan_zero_pmd) | _REGION3_ENTRY);
+ p4d_t p4d_z = __p4d(__pa(kasan_zero_pud) | _REGION2_ENTRY);
+
+ kasan_early_detect_facilities();
+ if (!has_nx)
+ pgt_prot &= ~_PAGE_NOEXEC;
+ pte_z = __pte(__pa(kasan_zero_page) | pgt_prot);
+
+ memsize = get_mem_detect_end();
+ if (!memsize)
+ kasan_early_panic("cannot detect physical memory size\n");
+ /* respect mem= cmdline parameter */
+ if (memory_end_set && memsize > memory_end)
+ memsize = memory_end;
+ memsize = min(memsize, KASAN_SHADOW_START);
+
+ if (IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING)) {
+ /* 4 level paging */
+ BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, P4D_SIZE));
+ BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, P4D_SIZE));
+ crst_table_init((unsigned long *)early_pg_dir,
+ _REGION2_ENTRY_EMPTY);
+ untracked_mem_end = vmax = _REGION1_SIZE;
+ asce_type = _ASCE_TYPE_REGION2;
+ } else {
+ /* 3 level paging */
+ BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PUD_SIZE));
+ BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PUD_SIZE));
+ crst_table_init((unsigned long *)early_pg_dir,
+ _REGION3_ENTRY_EMPTY);
+ untracked_mem_end = vmax = _REGION2_SIZE;
+ asce_type = _ASCE_TYPE_REGION3;
+ }
+
+ /* init kasan zero shadow */
+ crst_table_init((unsigned long *)kasan_zero_p4d, p4d_val(p4d_z));
+ crst_table_init((unsigned long *)kasan_zero_pud, pud_val(pud_z));
+ crst_table_init((unsigned long *)kasan_zero_pmd, pmd_val(pmd_z));
+ memset64((u64 *)kasan_zero_pte, pte_val(pte_z), PTRS_PER_PTE);
+
+ shadow_alloc_size = memsize >> KASAN_SHADOW_SCALE_SHIFT;
+ pgalloc_low = round_up((unsigned long)_end, _SEGMENT_SIZE);
+ if (IS_ENABLED(CONFIG_BLK_DEV_INITRD)) {
+ initrd_end =
+ round_up(INITRD_START + INITRD_SIZE, _SEGMENT_SIZE);
+ pgalloc_low = max(pgalloc_low, initrd_end);
+ }
+
+ if (pgalloc_low + shadow_alloc_size > memsize)
+ kasan_early_panic("out of memory during initialisation\n");
+
+ if (has_edat) {
+ segment_pos = round_down(memsize, _SEGMENT_SIZE);
+ segment_low = segment_pos - shadow_alloc_size;
+ pgalloc_pos = segment_low;
+ } else {
+ pgalloc_pos = memsize;
+ }
+ init_mm.pgd = early_pg_dir;
+ /*
+ * Current memory layout:
+ * +- 0 -------------+ +- shadow start -+
+ * | 1:1 ram mapping | /| 1/8 ram |
+ * +- end of ram ----+ / +----------------+
+ * | ... gap ... |/ | kasan |
+ * +- shadow start --+ | zero |
+ * | 1/8 addr space | | page |
+ * +- shadow end -+ | mapping |
+ * | ... gap ... |\ | (untracked) |
+ * +- modules vaddr -+ \ +----------------+
+ * | 2Gb | \| unmapped | allocated per module
+ * +-----------------+ +- shadow end ---+
+ */
+ /* populate kasan shadow (for identity mapping and zero page mapping) */
+ kasan_early_vmemmap_populate(__sha(0), __sha(memsize), POPULATE_MAP);
+ if (IS_ENABLED(CONFIG_MODULES))
+ untracked_mem_end = vmax - MODULES_LEN;
+ kasan_early_vmemmap_populate(__sha(max_physmem_end),
+ __sha(untracked_mem_end),
+ POPULATE_ZERO_SHADOW);
+ /* memory allocated for identity mapping structs will be freed later */
+ pgalloc_freeable = pgalloc_pos;
+ /* populate identity mapping */
+ kasan_early_vmemmap_populate(0, memsize, POPULATE_ONE2ONE);
+ kasan_set_pgd(early_pg_dir, asce_type);
+ kasan_enable_dat();
+ /* enable kasan */
+ init_task.kasan_depth = 0;
+ memblock_reserve(pgalloc_pos, memsize - pgalloc_pos);
+ sclp_early_printk("KernelAddressSanitizer initialized\n");
+}
+
+void __init kasan_copy_shadow(pgd_t *pg_dir)
+{
+ /*
+ * At this point we are still running on early pages setup early_pg_dir,
+ * while swapper_pg_dir has just been initialized with identity mapping.
+ * Carry over shadow memory region from early_pg_dir to swapper_pg_dir.
+ */
+
+ pgd_t *pg_dir_src;
+ pgd_t *pg_dir_dst;
+ p4d_t *p4_dir_src;
+ p4d_t *p4_dir_dst;
+ pud_t *pu_dir_src;
+ pud_t *pu_dir_dst;
+
+ pg_dir_src = pgd_offset_raw(early_pg_dir, KASAN_SHADOW_START);
+ pg_dir_dst = pgd_offset_raw(pg_dir, KASAN_SHADOW_START);
+ p4_dir_src = p4d_offset(pg_dir_src, KASAN_SHADOW_START);
+ p4_dir_dst = p4d_offset(pg_dir_dst, KASAN_SHADOW_START);
+ if (!p4d_folded(*p4_dir_src)) {
+ /* 4 level paging */
+ memcpy(p4_dir_dst, p4_dir_src,
+ (KASAN_SHADOW_SIZE >> P4D_SHIFT) * sizeof(p4d_t));
+ return;
+ }
+ /* 3 level paging */
+ pu_dir_src = pud_offset(p4_dir_src, KASAN_SHADOW_START);
+ pu_dir_dst = pud_offset(p4_dir_dst, KASAN_SHADOW_START);
+ memcpy(pu_dir_dst, pu_dir_src,
+ (KASAN_SHADOW_SIZE >> PUD_SHIFT) * sizeof(pud_t));
+}
+
+void __init kasan_free_early_identity(void)
+{
+ memblock_free(pgalloc_pos, pgalloc_freeable - pgalloc_pos);
+}
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index 7be06475809b..97b3ee53852b 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -89,10 +89,8 @@ static int __memcpy_real(void *dest, void *src, size_t count)
return rc;
}
-/*
- * Copy memory in real mode (kernel to kernel)
- */
-int memcpy_real(void *dest, void *src, size_t count)
+static unsigned long _memcpy_real(unsigned long dest, unsigned long src,
+ unsigned long count)
{
int irqs_disabled, rc;
unsigned long flags;
@@ -103,7 +101,7 @@ int memcpy_real(void *dest, void *src, size_t count)
irqs_disabled = arch_irqs_disabled_flags(flags);
if (!irqs_disabled)
trace_hardirqs_off();
- rc = __memcpy_real(dest, src, count);
+ rc = __memcpy_real((void *) dest, (void *) src, (size_t) count);
if (!irqs_disabled)
trace_hardirqs_on();
__arch_local_irq_ssm(flags);
@@ -111,6 +109,23 @@ int memcpy_real(void *dest, void *src, size_t count)
}
/*
+ * Copy memory in real mode (kernel to kernel)
+ */
+int memcpy_real(void *dest, void *src, size_t count)
+{
+ if (S390_lowcore.nodat_stack != 0)
+ return CALL_ON_STACK(_memcpy_real, S390_lowcore.nodat_stack,
+ 3, dest, src, count);
+ /*
+ * This is a really early memcpy_real call, the stacks are
+ * not set up yet. Just call _memcpy_real on the early boot
+ * stack
+ */
+ return _memcpy_real((unsigned long) dest,(unsigned long) src,
+ (unsigned long) count);
+}
+
+/*
* Copy memory in absolute mode (kernel to kernel)
*/
void memcpy_absolute(void *dest, void *src, size_t count)
diff --git a/arch/s390/mm/mem_detect.c b/arch/s390/mm/mem_detect.c
deleted file mode 100644
index 21f6c82c8296..000000000000
--- a/arch/s390/mm/mem_detect.c
+++ /dev/null
@@ -1,62 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright IBM Corp. 2008, 2009
- *
- * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
- */
-
-#include <linux/kernel.h>
-#include <linux/memblock.h>
-#include <linux/init.h>
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-#include <asm/ipl.h>
-#include <asm/sclp.h>
-#include <asm/setup.h>
-
-#define CHUNK_READ_WRITE 0
-#define CHUNK_READ_ONLY 1
-
-static inline void memblock_physmem_add(phys_addr_t start, phys_addr_t size)
-{
- memblock_dbg("memblock_physmem_add: [%#016llx-%#016llx]\n",
- start, start + size - 1);
- memblock_add_range(&memblock.memory, start, size, 0, 0);
- memblock_add_range(&memblock.physmem, start, size, 0, 0);
-}
-
-void __init detect_memory_memblock(void)
-{
- unsigned long memsize, rnmax, rzm, addr, size;
- int type;
-
- rzm = sclp.rzm;
- rnmax = sclp.rnmax;
- memsize = rzm * rnmax;
- if (!rzm)
- rzm = 1UL << 17;
- max_physmem_end = memsize;
- addr = 0;
- /* keep memblock lists close to the kernel */
- memblock_set_bottom_up(true);
- do {
- size = 0;
- /* assume lowcore is writable */
- type = addr ? tprot(addr) : CHUNK_READ_WRITE;
- do {
- size += rzm;
- if (max_physmem_end && addr + size >= max_physmem_end)
- break;
- } while (type == tprot(addr + size));
- if (type == CHUNK_READ_WRITE || type == CHUNK_READ_ONLY) {
- if (max_physmem_end && (addr + size > max_physmem_end))
- size = max_physmem_end - addr;
- memblock_physmem_add(addr, size);
- }
- addr += size;
- } while (addr < max_physmem_end);
- memblock_set_bottom_up(false);
- if (!max_physmem_end)
- max_physmem_end = memblock_end_of_DRAM();
- memblock_dump_all();
-}
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index 831bdcf407bb..0a7627cdb34e 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -37,11 +37,11 @@ static unsigned long stack_maxrandom_size(void)
#define MIN_GAP (32*1024*1024)
#define MAX_GAP (STACK_TOP/6*5)
-static inline int mmap_is_legacy(void)
+static inline int mmap_is_legacy(struct rlimit *rlim_stack)
{
if (current->personality & ADDR_COMPAT_LAYOUT)
return 1;
- if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
+ if (rlim_stack->rlim_cur == RLIM_INFINITY)
return 1;
return sysctl_legacy_va_layout;
}
@@ -56,9 +56,10 @@ static unsigned long mmap_base_legacy(unsigned long rnd)
return TASK_UNMAPPED_BASE + rnd;
}
-static inline unsigned long mmap_base(unsigned long rnd)
+static inline unsigned long mmap_base(unsigned long rnd,
+ struct rlimit *rlim_stack)
{
- unsigned long gap = rlimit(RLIMIT_STACK);
+ unsigned long gap = rlim_stack->rlim_cur;
if (gap < MIN_GAP)
gap = MIN_GAP;
@@ -184,7 +185,7 @@ check_asce_limit:
* This function, called very early during the creation of a new
* process VM image, sets up which VM layout function to use:
*/
-void arch_pick_mmap_layout(struct mm_struct *mm)
+void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack)
{
unsigned long random_factor = 0UL;
@@ -195,11 +196,11 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
* Fall back to the standard layout if the personality
* bit is set, or if the expected stack growth is unlimited:
*/
- if (mmap_is_legacy()) {
+ if (mmap_is_legacy(rlim_stack)) {
mm->mmap_base = mmap_base_legacy(random_factor);
mm->get_unmapped_area = arch_get_unmapped_area;
} else {
- mm->mmap_base = mmap_base(random_factor);
+ mm->mmap_base = mmap_base(random_factor, rlim_stack);
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
}
}
diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c
index 382153ff17e3..dc3cede7f2ec 100644
--- a/arch/s390/mm/page-states.c
+++ b/arch/s390/mm/page-states.c
@@ -271,7 +271,7 @@ void arch_set_page_states(int make_stable)
list_for_each(l, &zone->free_area[order].free_list[t]) {
page = list_entry(l, struct page, lru);
if (make_stable)
- set_page_stable_dat(page, 0);
+ set_page_stable_dat(page, order);
else
set_page_unused(page, order);
}
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index c44171588d08..f8c6faab41f4 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -14,7 +14,7 @@
static inline unsigned long sske_frame(unsigned long addr, unsigned char skey)
{
- asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],9,0"
+ asm volatile(".insn rrf,0xb22b0000,%[skey],%[addr],1,0"
: [addr] "+a" (addr) : [skey] "d" (skey));
return addr;
}
@@ -23,8 +23,6 @@ void __storage_key_init_range(unsigned long start, unsigned long end)
{
unsigned long boundary, size;
- if (!PAGE_DEFAULT_KEY)
- return;
while (start < end) {
if (MACHINE_HAS_EDAT1) {
/* set storage keys for a 1MB frame */
@@ -37,7 +35,7 @@ void __storage_key_init_range(unsigned long start, unsigned long end)
continue;
}
}
- page_set_storage_key(start, PAGE_DEFAULT_KEY, 0);
+ page_set_storage_key(start, PAGE_DEFAULT_KEY, 1);
start += PAGE_SIZE;
}
}
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index cb364153c43c..814f26520aa2 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -6,8 +6,9 @@
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
*/
-#include <linux/mm.h>
#include <linux/sysctl.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
#include <asm/mmu_context.h>
#include <asm/pgalloc.h>
#include <asm/gmap.h>
@@ -27,7 +28,7 @@ static struct ctl_table page_table_sysctl[] = {
.data = &page_table_allocate_pgste,
.maxlen = sizeof(int),
.mode = S_IRUGO | S_IWUSR,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = &page_table_allocate_pgste_min,
.extra2 = &page_table_allocate_pgste_max,
},
@@ -100,6 +101,7 @@ int crst_table_upgrade(struct mm_struct *mm, unsigned long end)
mm->context.asce_limit = _REGION1_SIZE;
mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH |
_ASCE_USER_BITS | _ASCE_TYPE_REGION2;
+ mm_inc_nr_puds(mm);
} else {
crst_table_init(table, _REGION1_ENTRY_EMPTY);
pgd_populate(mm, (pgd_t *) table, (p4d_t *) pgd);
@@ -189,14 +191,15 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
if (!list_empty(&mm->context.pgtable_list)) {
page = list_first_entry(&mm->context.pgtable_list,
struct page, lru);
- mask = atomic_read(&page->_mapcount);
+ mask = atomic_read(&page->_refcount) >> 24;
mask = (mask | (mask >> 4)) & 3;
if (mask != 3) {
table = (unsigned long *) page_to_phys(page);
bit = mask & 1; /* =1 -> second 2K */
if (bit)
table += PTRS_PER_PTE;
- atomic_xor_bits(&page->_mapcount, 1U << bit);
+ atomic_xor_bits(&page->_refcount,
+ 1U << (bit + 24));
list_del(&page->lru);
}
}
@@ -217,12 +220,12 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
table = (unsigned long *) page_to_phys(page);
if (mm_alloc_pgste(mm)) {
/* Return 4K page table with PGSTEs */
- atomic_set(&page->_mapcount, 3);
+ atomic_xor_bits(&page->_refcount, 3 << 24);
memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE);
memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
} else {
/* Return the first 2K fragment of the page */
- atomic_set(&page->_mapcount, 1);
+ atomic_xor_bits(&page->_refcount, 1 << 24);
memset64((u64 *)table, _PAGE_INVALID, 2 * PTRS_PER_PTE);
spin_lock_bh(&mm->context.lock);
list_add(&page->lru, &mm->context.pgtable_list);
@@ -241,7 +244,8 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
/* Free 2K page table fragment of a 4K page */
bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
spin_lock_bh(&mm->context.lock);
- mask = atomic_xor_bits(&page->_mapcount, 1U << bit);
+ mask = atomic_xor_bits(&page->_refcount, 1U << (bit + 24));
+ mask >>= 24;
if (mask & 3)
list_add(&page->lru, &mm->context.pgtable_list);
else
@@ -249,10 +253,11 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
spin_unlock_bh(&mm->context.lock);
if (mask != 0)
return;
+ } else {
+ atomic_xor_bits(&page->_refcount, 3U << 24);
}
pgtable_page_dtor(page);
- atomic_set(&page->_mapcount, -1);
__free_page(page);
}
@@ -273,7 +278,8 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
}
bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
spin_lock_bh(&mm->context.lock);
- mask = atomic_xor_bits(&page->_mapcount, 0x11U << bit);
+ mask = atomic_xor_bits(&page->_refcount, 0x11U << (bit + 24));
+ mask >>= 24;
if (mask & 3)
list_add_tail(&page->lru, &mm->context.pgtable_list);
else
@@ -295,12 +301,15 @@ static void __tlb_remove_table(void *_table)
break;
case 1: /* lower 2K of a 4K page table */
case 2: /* higher 2K of a 4K page table */
- if (atomic_xor_bits(&page->_mapcount, mask << 4) != 0)
+ mask = atomic_xor_bits(&page->_refcount, mask << (4 + 24));
+ mask >>= 24;
+ if (mask != 0)
break;
/* fallthrough */
case 3: /* 4K page table with pgstes */
+ if (mask & 3)
+ atomic_xor_bits(&page->_refcount, 3 << 24);
pgtable_page_dtor(page);
- atomic_set(&page->_mapcount, -1);
__free_page(page);
break;
}
@@ -366,3 +375,293 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
if ((*batch)->nr == MAX_TABLE_BATCH)
tlb_flush_mmu(tlb);
}
+
+/*
+ * Base infrastructure required to generate basic asces, region, segment,
+ * and page tables that do not make use of enhanced features like EDAT1.
+ */
+
+static struct kmem_cache *base_pgt_cache;
+
+static unsigned long base_pgt_alloc(void)
+{
+ u64 *table;
+
+ table = kmem_cache_alloc(base_pgt_cache, GFP_KERNEL);
+ if (table)
+ memset64(table, _PAGE_INVALID, PTRS_PER_PTE);
+ return (unsigned long) table;
+}
+
+static void base_pgt_free(unsigned long table)
+{
+ kmem_cache_free(base_pgt_cache, (void *) table);
+}
+
+static unsigned long base_crst_alloc(unsigned long val)
+{
+ unsigned long table;
+
+ table = __get_free_pages(GFP_KERNEL, CRST_ALLOC_ORDER);
+ if (table)
+ crst_table_init((unsigned long *)table, val);
+ return table;
+}
+
+static void base_crst_free(unsigned long table)
+{
+ free_pages(table, CRST_ALLOC_ORDER);
+}
+
+#define BASE_ADDR_END_FUNC(NAME, SIZE) \
+static inline unsigned long base_##NAME##_addr_end(unsigned long addr, \
+ unsigned long end) \
+{ \
+ unsigned long next = (addr + (SIZE)) & ~((SIZE) - 1); \
+ \
+ return (next - 1) < (end - 1) ? next : end; \
+}
+
+BASE_ADDR_END_FUNC(page, _PAGE_SIZE)
+BASE_ADDR_END_FUNC(segment, _SEGMENT_SIZE)
+BASE_ADDR_END_FUNC(region3, _REGION3_SIZE)
+BASE_ADDR_END_FUNC(region2, _REGION2_SIZE)
+BASE_ADDR_END_FUNC(region1, _REGION1_SIZE)
+
+static inline unsigned long base_lra(unsigned long address)
+{
+ unsigned long real;
+
+ asm volatile(
+ " lra %0,0(%1)\n"
+ : "=d" (real) : "a" (address) : "cc");
+ return real;
+}
+
+static int base_page_walk(unsigned long origin, unsigned long addr,
+ unsigned long end, int alloc)
+{
+ unsigned long *pte, next;
+
+ if (!alloc)
+ return 0;
+ pte = (unsigned long *) origin;
+ pte += (addr & _PAGE_INDEX) >> _PAGE_SHIFT;
+ do {
+ next = base_page_addr_end(addr, end);
+ *pte = base_lra(addr);
+ } while (pte++, addr = next, addr < end);
+ return 0;
+}
+
+static int base_segment_walk(unsigned long origin, unsigned long addr,
+ unsigned long end, int alloc)
+{
+ unsigned long *ste, next, table;
+ int rc;
+
+ ste = (unsigned long *) origin;
+ ste += (addr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
+ do {
+ next = base_segment_addr_end(addr, end);
+ if (*ste & _SEGMENT_ENTRY_INVALID) {
+ if (!alloc)
+ continue;
+ table = base_pgt_alloc();
+ if (!table)
+ return -ENOMEM;
+ *ste = table | _SEGMENT_ENTRY;
+ }
+ table = *ste & _SEGMENT_ENTRY_ORIGIN;
+ rc = base_page_walk(table, addr, next, alloc);
+ if (rc)
+ return rc;
+ if (!alloc)
+ base_pgt_free(table);
+ cond_resched();
+ } while (ste++, addr = next, addr < end);
+ return 0;
+}
+
+static int base_region3_walk(unsigned long origin, unsigned long addr,
+ unsigned long end, int alloc)
+{
+ unsigned long *rtte, next, table;
+ int rc;
+
+ rtte = (unsigned long *) origin;
+ rtte += (addr & _REGION3_INDEX) >> _REGION3_SHIFT;
+ do {
+ next = base_region3_addr_end(addr, end);
+ if (*rtte & _REGION_ENTRY_INVALID) {
+ if (!alloc)
+ continue;
+ table = base_crst_alloc(_SEGMENT_ENTRY_EMPTY);
+ if (!table)
+ return -ENOMEM;
+ *rtte = table | _REGION3_ENTRY;
+ }
+ table = *rtte & _REGION_ENTRY_ORIGIN;
+ rc = base_segment_walk(table, addr, next, alloc);
+ if (rc)
+ return rc;
+ if (!alloc)
+ base_crst_free(table);
+ } while (rtte++, addr = next, addr < end);
+ return 0;
+}
+
+static int base_region2_walk(unsigned long origin, unsigned long addr,
+ unsigned long end, int alloc)
+{
+ unsigned long *rste, next, table;
+ int rc;
+
+ rste = (unsigned long *) origin;
+ rste += (addr & _REGION2_INDEX) >> _REGION2_SHIFT;
+ do {
+ next = base_region2_addr_end(addr, end);
+ if (*rste & _REGION_ENTRY_INVALID) {
+ if (!alloc)
+ continue;
+ table = base_crst_alloc(_REGION3_ENTRY_EMPTY);
+ if (!table)
+ return -ENOMEM;
+ *rste = table | _REGION2_ENTRY;
+ }
+ table = *rste & _REGION_ENTRY_ORIGIN;
+ rc = base_region3_walk(table, addr, next, alloc);
+ if (rc)
+ return rc;
+ if (!alloc)
+ base_crst_free(table);
+ } while (rste++, addr = next, addr < end);
+ return 0;
+}
+
+static int base_region1_walk(unsigned long origin, unsigned long addr,
+ unsigned long end, int alloc)
+{
+ unsigned long *rfte, next, table;
+ int rc;
+
+ rfte = (unsigned long *) origin;
+ rfte += (addr & _REGION1_INDEX) >> _REGION1_SHIFT;
+ do {
+ next = base_region1_addr_end(addr, end);
+ if (*rfte & _REGION_ENTRY_INVALID) {
+ if (!alloc)
+ continue;
+ table = base_crst_alloc(_REGION2_ENTRY_EMPTY);
+ if (!table)
+ return -ENOMEM;
+ *rfte = table | _REGION1_ENTRY;
+ }
+ table = *rfte & _REGION_ENTRY_ORIGIN;
+ rc = base_region2_walk(table, addr, next, alloc);
+ if (rc)
+ return rc;
+ if (!alloc)
+ base_crst_free(table);
+ } while (rfte++, addr = next, addr < end);
+ return 0;
+}
+
+/**
+ * base_asce_free - free asce and tables returned from base_asce_alloc()
+ * @asce: asce to be freed
+ *
+ * Frees all region, segment, and page tables that were allocated with a
+ * corresponding base_asce_alloc() call.
+ */
+void base_asce_free(unsigned long asce)
+{
+ unsigned long table = asce & _ASCE_ORIGIN;
+
+ if (!asce)
+ return;
+ switch (asce & _ASCE_TYPE_MASK) {
+ case _ASCE_TYPE_SEGMENT:
+ base_segment_walk(table, 0, _REGION3_SIZE, 0);
+ break;
+ case _ASCE_TYPE_REGION3:
+ base_region3_walk(table, 0, _REGION2_SIZE, 0);
+ break;
+ case _ASCE_TYPE_REGION2:
+ base_region2_walk(table, 0, _REGION1_SIZE, 0);
+ break;
+ case _ASCE_TYPE_REGION1:
+ base_region1_walk(table, 0, -_PAGE_SIZE, 0);
+ break;
+ }
+ base_crst_free(table);
+}
+
+static int base_pgt_cache_init(void)
+{
+ static DEFINE_MUTEX(base_pgt_cache_mutex);
+ unsigned long sz = _PAGE_TABLE_SIZE;
+
+ if (base_pgt_cache)
+ return 0;
+ mutex_lock(&base_pgt_cache_mutex);
+ if (!base_pgt_cache)
+ base_pgt_cache = kmem_cache_create("base_pgt", sz, sz, 0, NULL);
+ mutex_unlock(&base_pgt_cache_mutex);
+ return base_pgt_cache ? 0 : -ENOMEM;
+}
+
+/**
+ * base_asce_alloc - create kernel mapping without enhanced DAT features
+ * @addr: virtual start address of kernel mapping
+ * @num_pages: number of consecutive pages
+ *
+ * Generate an asce, including all required region, segment and page tables,
+ * that can be used to access the virtual kernel mapping. The difference is
+ * that the returned asce does not make use of any enhanced DAT features like
+ * e.g. large pages. This is required for some I/O functions that pass an
+ * asce, like e.g. some service call requests.
+ *
+ * Note: the returned asce may NEVER be attached to any cpu. It may only be
+ * used for I/O requests. tlb entries that might result because the
+ * asce was attached to a cpu won't be cleared.
+ */
+unsigned long base_asce_alloc(unsigned long addr, unsigned long num_pages)
+{
+ unsigned long asce, table, end;
+ int rc;
+
+ if (base_pgt_cache_init())
+ return 0;
+ end = addr + num_pages * PAGE_SIZE;
+ if (end <= _REGION3_SIZE) {
+ table = base_crst_alloc(_SEGMENT_ENTRY_EMPTY);
+ if (!table)
+ return 0;
+ rc = base_segment_walk(table, addr, end, 1);
+ asce = table | _ASCE_TYPE_SEGMENT | _ASCE_TABLE_LENGTH;
+ } else if (end <= _REGION2_SIZE) {
+ table = base_crst_alloc(_REGION3_ENTRY_EMPTY);
+ if (!table)
+ return 0;
+ rc = base_region3_walk(table, addr, end, 1);
+ asce = table | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
+ } else if (end <= _REGION1_SIZE) {
+ table = base_crst_alloc(_REGION2_ENTRY_EMPTY);
+ if (!table)
+ return 0;
+ rc = base_region2_walk(table, addr, end, 1);
+ asce = table | _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
+ } else {
+ table = base_crst_alloc(_REGION1_ENTRY_EMPTY);
+ if (!table)
+ return 0;
+ rc = base_region1_walk(table, addr, end, 1);
+ asce = table | _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH;
+ }
+ if (rc) {
+ base_asce_free(asce);
+ asce = 0;
+ }
+ return asce;
+}
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 4f2b65d01a70..f2cc7da473e4 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -158,7 +158,7 @@ static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste,
#ifdef CONFIG_PGSTE
unsigned long address, bits, skey;
- if (!mm_use_skey(mm) || pte_val(pte) & _PAGE_INVALID)
+ if (!mm_uses_skeys(mm) || pte_val(pte) & _PAGE_INVALID)
return pgste;
address = pte_val(pte) & PAGE_MASK;
skey = (unsigned long) page_get_storage_key(address);
@@ -180,7 +180,7 @@ static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry,
unsigned long address;
unsigned long nkey;
- if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID)
+ if (!mm_uses_skeys(mm) || pte_val(entry) & _PAGE_INVALID)
return;
VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID));
address = pte_val(entry) & PAGE_MASK;
@@ -347,18 +347,27 @@ static inline void pmdp_idte_local(struct mm_struct *mm,
mm->context.asce, IDTE_LOCAL);
else
__pmdp_idte(addr, pmdp, 0, 0, IDTE_LOCAL);
+ if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
+ gmap_pmdp_idte_local(mm, addr);
}
static inline void pmdp_idte_global(struct mm_struct *mm,
unsigned long addr, pmd_t *pmdp)
{
- if (MACHINE_HAS_TLB_GUEST)
+ if (MACHINE_HAS_TLB_GUEST) {
__pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE,
mm->context.asce, IDTE_GLOBAL);
- else if (MACHINE_HAS_IDTE)
+ if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
+ gmap_pmdp_idte_global(mm, addr);
+ } else if (MACHINE_HAS_IDTE) {
__pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL);
- else
+ if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
+ gmap_pmdp_idte_global(mm, addr);
+ } else {
__pmdp_csp(pmdp);
+ if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m)
+ gmap_pmdp_csp(mm, addr);
+ }
}
static inline pmd_t pmdp_flush_direct(struct mm_struct *mm,
@@ -392,6 +401,8 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
cpumask_of(smp_processor_id()))) {
pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
mm->context.flush_mm = 1;
+ if (mm_has_pgste(mm))
+ gmap_pmdp_invalidate(mm, addr);
} else {
pmdp_idte_global(mm, addr, pmdp);
}
@@ -399,6 +410,24 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
return old;
}
+static pmd_t *pmd_alloc_map(struct mm_struct *mm, unsigned long addr)
+{
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ pgd = pgd_offset(mm, addr);
+ p4d = p4d_alloc(mm, pgd, addr);
+ if (!p4d)
+ return NULL;
+ pud = pud_alloc(mm, p4d, addr);
+ if (!pud)
+ return NULL;
+ pmd = pmd_alloc(mm, pud, addr);
+ return pmd;
+}
+
pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t new)
{
@@ -693,40 +722,14 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
/*
* Test and reset if a guest page is dirty
*/
-bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
+bool ptep_test_and_clear_uc(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep)
{
- spinlock_t *ptl;
- pgd_t *pgd;
- p4d_t *p4d;
- pud_t *pud;
- pmd_t *pmd;
pgste_t pgste;
- pte_t *ptep;
pte_t pte;
bool dirty;
int nodat;
- pgd = pgd_offset(mm, addr);
- p4d = p4d_alloc(mm, pgd, addr);
- if (!p4d)
- return false;
- pud = pud_alloc(mm, p4d, addr);
- if (!pud)
- return false;
- pmd = pmd_alloc(mm, pud, addr);
- if (!pmd)
- return false;
- /* We can't run guests backed by huge pages, but userspace can
- * still set them up and then try to migrate them without any
- * migration support.
- */
- if (pmd_large(*pmd))
- return true;
-
- ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl);
- if (unlikely(!ptep))
- return false;
-
pgste = pgste_get_lock(ptep);
dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT);
pgste_val(pgste) &= ~PGSTE_UC_BIT;
@@ -742,21 +745,43 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
*ptep = pte;
}
pgste_set_unlock(ptep, pgste);
-
- spin_unlock(ptl);
return dirty;
}
-EXPORT_SYMBOL_GPL(test_and_clear_guest_dirty);
+EXPORT_SYMBOL_GPL(ptep_test_and_clear_uc);
int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
unsigned char key, bool nq)
{
- unsigned long keyul;
+ unsigned long keyul, paddr;
spinlock_t *ptl;
pgste_t old, new;
+ pmd_t *pmdp;
pte_t *ptep;
- ptep = get_locked_pte(mm, addr, &ptl);
+ pmdp = pmd_alloc_map(mm, addr);
+ if (unlikely(!pmdp))
+ return -EFAULT;
+
+ ptl = pmd_lock(mm, pmdp);
+ if (!pmd_present(*pmdp)) {
+ spin_unlock(ptl);
+ return -EFAULT;
+ }
+
+ if (pmd_large(*pmdp)) {
+ paddr = pmd_val(*pmdp) & HPAGE_MASK;
+ paddr |= addr & ~HPAGE_MASK;
+ /*
+ * Huge pmds need quiescing operations, they are
+ * always mapped.
+ */
+ page_set_storage_key(paddr, key, 1);
+ spin_unlock(ptl);
+ return 0;
+ }
+ spin_unlock(ptl);
+
+ ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl);
if (unlikely(!ptep))
return -EFAULT;
@@ -767,14 +792,14 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48;
pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56;
if (!(pte_val(*ptep) & _PAGE_INVALID)) {
- unsigned long address, bits, skey;
+ unsigned long bits, skey;
- address = pte_val(*ptep) & PAGE_MASK;
- skey = (unsigned long) page_get_storage_key(address);
+ paddr = pte_val(*ptep) & PAGE_MASK;
+ skey = (unsigned long) page_get_storage_key(paddr);
bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED);
skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT);
/* Set storage key ACC and FP */
- page_set_storage_key(address, skey, !nq);
+ page_set_storage_key(paddr, skey, !nq);
/* Merge host changed & referenced into pgste */
pgste_val(new) |= bits << 52;
}
@@ -830,11 +855,32 @@ EXPORT_SYMBOL(cond_set_guest_storage_key);
int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr)
{
spinlock_t *ptl;
+ unsigned long paddr;
pgste_t old, new;
+ pmd_t *pmdp;
pte_t *ptep;
int cc = 0;
- ptep = get_locked_pte(mm, addr, &ptl);
+ pmdp = pmd_alloc_map(mm, addr);
+ if (unlikely(!pmdp))
+ return -EFAULT;
+
+ ptl = pmd_lock(mm, pmdp);
+ if (!pmd_present(*pmdp)) {
+ spin_unlock(ptl);
+ return -EFAULT;
+ }
+
+ if (pmd_large(*pmdp)) {
+ paddr = pmd_val(*pmdp) & HPAGE_MASK;
+ paddr |= addr & ~HPAGE_MASK;
+ cc = page_reset_referenced(paddr);
+ spin_unlock(ptl);
+ return cc;
+ }
+ spin_unlock(ptl);
+
+ ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl);
if (unlikely(!ptep))
return -EFAULT;
@@ -843,7 +889,8 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr)
pgste_val(new) &= ~PGSTE_GR_BIT;
if (!(pte_val(*ptep) & _PAGE_INVALID)) {
- cc = page_reset_referenced(pte_val(*ptep) & PAGE_MASK);
+ paddr = pte_val(*ptep) & PAGE_MASK;
+ cc = page_reset_referenced(paddr);
/* Merge real referenced bit into host-set */
pgste_val(new) |= ((unsigned long) cc << 53) & PGSTE_HR_BIT;
}
@@ -862,18 +909,42 @@ EXPORT_SYMBOL(reset_guest_reference_bit);
int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
unsigned char *key)
{
+ unsigned long paddr;
spinlock_t *ptl;
pgste_t pgste;
+ pmd_t *pmdp;
pte_t *ptep;
- ptep = get_locked_pte(mm, addr, &ptl);
+ pmdp = pmd_alloc_map(mm, addr);
+ if (unlikely(!pmdp))
+ return -EFAULT;
+
+ ptl = pmd_lock(mm, pmdp);
+ if (!pmd_present(*pmdp)) {
+ /* Not yet mapped memory has a zero key */
+ spin_unlock(ptl);
+ *key = 0;
+ return 0;
+ }
+
+ if (pmd_large(*pmdp)) {
+ paddr = pmd_val(*pmdp) & HPAGE_MASK;
+ paddr |= addr & ~HPAGE_MASK;
+ *key = page_get_storage_key(paddr);
+ spin_unlock(ptl);
+ return 0;
+ }
+ spin_unlock(ptl);
+
+ ptep = pte_alloc_map_lock(mm, pmdp, addr, &ptl);
if (unlikely(!ptep))
return -EFAULT;
pgste = pgste_get_lock(ptep);
*key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
+ paddr = pte_val(*ptep) & PAGE_MASK;
if (!(pte_val(*ptep) & _PAGE_INVALID))
- *key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK);
+ *key = page_get_storage_key(paddr);
/* Reflect guest's logical view, not physical */
*key |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
pgste_set_unlock(ptep, pgste);
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index db55561c5981..0472e27febdf 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -4,14 +4,13 @@
* Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
*/
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/pfn.h>
#include <linux/mm.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/hugetlb.h>
#include <linux/slab.h>
-#include <linux/memblock.h>
#include <asm/cacheflush.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
@@ -36,7 +35,7 @@ static void __ref *vmem_alloc_pages(unsigned int order)
if (slab_is_available())
return (void *)__get_free_pages(GFP_KERNEL, order);
- return (void *) memblock_alloc(size, size);
+ return (void *) memblock_phys_alloc(size, size);
}
void *vmem_crst_alloc(unsigned long val)
@@ -57,7 +56,7 @@ pte_t __ref *vmem_pte_alloc(void)
if (slab_is_available())
pte = (pte_t *) page_table_alloc(&init_mm);
else
- pte = (pte_t *) memblock_alloc(size, size);
+ pte = (pte_t *) memblock_phys_alloc(size, size);
if (!pte)
return NULL;
memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
OpenPOWER on IntegriCloud