diff options
Diffstat (limited to 'arch/sparc/kernel')
28 files changed, 780 insertions, 233 deletions
diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile index cc97545737f0..76cb57750dda 100644 --- a/arch/sparc/kernel/Makefile +++ b/arch/sparc/kernel/Makefile @@ -69,6 +69,7 @@ obj-$(CONFIG_SPARC64) += visemul.o obj-$(CONFIG_SPARC64) += hvapi.o obj-$(CONFIG_SPARC64) += sstate.o obj-$(CONFIG_SPARC64) += mdesc.o +obj-$(CONFIG_SPARC64) += adi_64.o obj-$(CONFIG_SPARC64) += pcr.o obj-$(CONFIG_SPARC64) += nmi.o obj-$(CONFIG_SPARC64_SMP) += cpumap.o diff --git a/arch/sparc/kernel/adi_64.c b/arch/sparc/kernel/adi_64.c new file mode 100644 index 000000000000..d0a2ac975b42 --- /dev/null +++ b/arch/sparc/kernel/adi_64.c @@ -0,0 +1,397 @@ +/* adi_64.c: support for ADI (Application Data Integrity) feature on + * sparc m7 and newer processors. This feature is also known as + * SSM (Silicon Secured Memory). + * + * Copyright (C) 2016 Oracle and/or its affiliates. All rights reserved. + * Author: Khalid Aziz (khalid.aziz@oracle.com) + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/mm_types.h> +#include <asm/mdesc.h> +#include <asm/adi_64.h> +#include <asm/mmu_64.h> +#include <asm/pgtable_64.h> + +/* Each page of storage for ADI tags can accommodate tags for 128 + * pages. When ADI enabled pages are being swapped out, it would be + * prudent to allocate at least enough tag storage space to accommodate + * SWAPFILE_CLUSTER number of pages. Allocate enough tag storage to + * store tags for four SWAPFILE_CLUSTER pages to reduce need for + * further allocations for same vma. + */ +#define TAG_STORAGE_PAGES 8 + +struct adi_config adi_state; +EXPORT_SYMBOL(adi_state); + +/* mdesc_adi_init() : Parse machine description provided by the + * hypervisor to detect ADI capabilities + * + * Hypervisor reports ADI capabilities of platform in "hwcap-list" property + * for "cpu" node. If the platform supports ADI, "hwcap-list" property + * contains the keyword "adp". If the platform supports ADI, "platform" + * node will contain "adp-blksz", "adp-nbits" and "ue-on-adp" properties + * to describe the ADI capabilities. + */ +void __init mdesc_adi_init(void) +{ + struct mdesc_handle *hp = mdesc_grab(); + const char *prop; + u64 pn, *val; + int len; + + if (!hp) + goto adi_not_found; + + pn = mdesc_node_by_name(hp, MDESC_NODE_NULL, "cpu"); + if (pn == MDESC_NODE_NULL) + goto adi_not_found; + + prop = mdesc_get_property(hp, pn, "hwcap-list", &len); + if (!prop) + goto adi_not_found; + + /* + * Look for "adp" keyword in hwcap-list which would indicate + * ADI support + */ + adi_state.enabled = false; + while (len) { + int plen; + + if (!strcmp(prop, "adp")) { + adi_state.enabled = true; + break; + } + + plen = strlen(prop) + 1; + prop += plen; + len -= plen; + } + + if (!adi_state.enabled) + goto adi_not_found; + + /* Find the ADI properties in "platform" node. If all ADI + * properties are not found, ADI support is incomplete and + * do not enable ADI in the kernel. + */ + pn = mdesc_node_by_name(hp, MDESC_NODE_NULL, "platform"); + if (pn == MDESC_NODE_NULL) + goto adi_not_found; + + val = (u64 *) mdesc_get_property(hp, pn, "adp-blksz", &len); + if (!val) + goto adi_not_found; + adi_state.caps.blksz = *val; + + val = (u64 *) mdesc_get_property(hp, pn, "adp-nbits", &len); + if (!val) + goto adi_not_found; + adi_state.caps.nbits = *val; + + val = (u64 *) mdesc_get_property(hp, pn, "ue-on-adp", &len); + if (!val) + goto adi_not_found; + adi_state.caps.ue_on_adi = *val; + + /* Some of the code to support swapping ADI tags is written + * assumption that two ADI tags can fit inside one byte. If + * this assumption is broken by a future architecture change, + * that code will have to be revisited. If that were to happen, + * disable ADI support so we do not get unpredictable results + * with programs trying to use ADI and their pages getting + * swapped out + */ + if (adi_state.caps.nbits > 4) { + pr_warn("WARNING: ADI tag size >4 on this platform. Disabling AADI support\n"); + adi_state.enabled = false; + } + + mdesc_release(hp); + return; + +adi_not_found: + adi_state.enabled = false; + adi_state.caps.blksz = 0; + adi_state.caps.nbits = 0; + if (hp) + mdesc_release(hp); +} + +tag_storage_desc_t *find_tag_store(struct mm_struct *mm, + struct vm_area_struct *vma, + unsigned long addr) +{ + tag_storage_desc_t *tag_desc = NULL; + unsigned long i, max_desc, flags; + + /* Check if this vma already has tag storage descriptor + * allocated for it. + */ + max_desc = PAGE_SIZE/sizeof(tag_storage_desc_t); + if (mm->context.tag_store) { + tag_desc = mm->context.tag_store; + spin_lock_irqsave(&mm->context.tag_lock, flags); + for (i = 0; i < max_desc; i++) { + if ((addr >= tag_desc->start) && + ((addr + PAGE_SIZE - 1) <= tag_desc->end)) + break; + tag_desc++; + } + spin_unlock_irqrestore(&mm->context.tag_lock, flags); + + /* If no matching entries were found, this must be a + * freshly allocated page + */ + if (i >= max_desc) + tag_desc = NULL; + } + + return tag_desc; +} + +tag_storage_desc_t *alloc_tag_store(struct mm_struct *mm, + struct vm_area_struct *vma, + unsigned long addr) +{ + unsigned char *tags; + unsigned long i, size, max_desc, flags; + tag_storage_desc_t *tag_desc, *open_desc; + unsigned long end_addr, hole_start, hole_end; + + max_desc = PAGE_SIZE/sizeof(tag_storage_desc_t); + open_desc = NULL; + hole_start = 0; + hole_end = ULONG_MAX; + end_addr = addr + PAGE_SIZE - 1; + + /* Check if this vma already has tag storage descriptor + * allocated for it. + */ + spin_lock_irqsave(&mm->context.tag_lock, flags); + if (mm->context.tag_store) { + tag_desc = mm->context.tag_store; + + /* Look for a matching entry for this address. While doing + * that, look for the first open slot as well and find + * the hole in already allocated range where this request + * will fit in. + */ + for (i = 0; i < max_desc; i++) { + if (tag_desc->tag_users == 0) { + if (open_desc == NULL) + open_desc = tag_desc; + } else { + if ((addr >= tag_desc->start) && + (tag_desc->end >= (addr + PAGE_SIZE - 1))) { + tag_desc->tag_users++; + goto out; + } + } + if ((tag_desc->start > end_addr) && + (tag_desc->start < hole_end)) + hole_end = tag_desc->start; + if ((tag_desc->end < addr) && + (tag_desc->end > hole_start)) + hole_start = tag_desc->end; + tag_desc++; + } + + } else { + size = sizeof(tag_storage_desc_t)*max_desc; + mm->context.tag_store = kzalloc(size, GFP_NOWAIT|__GFP_NOWARN); + if (mm->context.tag_store == NULL) { + tag_desc = NULL; + goto out; + } + tag_desc = mm->context.tag_store; + for (i = 0; i < max_desc; i++, tag_desc++) + tag_desc->tag_users = 0; + open_desc = mm->context.tag_store; + i = 0; + } + + /* Check if we ran out of tag storage descriptors */ + if (open_desc == NULL) { + tag_desc = NULL; + goto out; + } + + /* Mark this tag descriptor slot in use and then initialize it */ + tag_desc = open_desc; + tag_desc->tag_users = 1; + + /* Tag storage has not been allocated for this vma and space + * is available in tag storage descriptor. Since this page is + * being swapped out, there is high probability subsequent pages + * in the VMA will be swapped out as well. Allocate pages to + * store tags for as many pages in this vma as possible but not + * more than TAG_STORAGE_PAGES. Each byte in tag space holds + * two ADI tags since each ADI tag is 4 bits. Each ADI tag + * covers adi_blksize() worth of addresses. Check if the hole is + * big enough to accommodate full address range for using + * TAG_STORAGE_PAGES number of tag pages. + */ + size = TAG_STORAGE_PAGES * PAGE_SIZE; + end_addr = addr + (size*2*adi_blksize()) - 1; + /* Check for overflow. If overflow occurs, allocate only one page */ + if (end_addr < addr) { + size = PAGE_SIZE; + end_addr = addr + (size*2*adi_blksize()) - 1; + /* If overflow happens with the minimum tag storage + * allocation as well, adjust ending address for this + * tag storage. + */ + if (end_addr < addr) + end_addr = ULONG_MAX; + } + if (hole_end < end_addr) { + /* Available hole is too small on the upper end of + * address. Can we expand the range towards the lower + * address and maximize use of this slot? + */ + unsigned long tmp_addr; + + end_addr = hole_end - 1; + tmp_addr = end_addr - (size*2*adi_blksize()) + 1; + /* Check for underflow. If underflow occurs, allocate + * only one page for storing ADI tags + */ + if (tmp_addr > addr) { + size = PAGE_SIZE; + tmp_addr = end_addr - (size*2*adi_blksize()) - 1; + /* If underflow happens with the minimum tag storage + * allocation as well, adjust starting address for + * this tag storage. + */ + if (tmp_addr > addr) + tmp_addr = 0; + } + if (tmp_addr < hole_start) { + /* Available hole is restricted on lower address + * end as well + */ + tmp_addr = hole_start + 1; + } + addr = tmp_addr; + size = (end_addr + 1 - addr)/(2*adi_blksize()); + size = (size + (PAGE_SIZE-adi_blksize()))/PAGE_SIZE; + size = size * PAGE_SIZE; + } + tags = kzalloc(size, GFP_NOWAIT|__GFP_NOWARN); + if (tags == NULL) { + tag_desc->tag_users = 0; + tag_desc = NULL; + goto out; + } + tag_desc->start = addr; + tag_desc->tags = tags; + tag_desc->end = end_addr; + +out: + spin_unlock_irqrestore(&mm->context.tag_lock, flags); + return tag_desc; +} + +void del_tag_store(tag_storage_desc_t *tag_desc, struct mm_struct *mm) +{ + unsigned long flags; + unsigned char *tags = NULL; + + spin_lock_irqsave(&mm->context.tag_lock, flags); + tag_desc->tag_users--; + if (tag_desc->tag_users == 0) { + tag_desc->start = tag_desc->end = 0; + /* Do not free up the tag storage space allocated + * by the first descriptor. This is persistent + * emergency tag storage space for the task. + */ + if (tag_desc != mm->context.tag_store) { + tags = tag_desc->tags; + tag_desc->tags = NULL; + } + } + spin_unlock_irqrestore(&mm->context.tag_lock, flags); + kfree(tags); +} + +#define tag_start(addr, tag_desc) \ + ((tag_desc)->tags + ((addr - (tag_desc)->start)/(2*adi_blksize()))) + +/* Retrieve any saved ADI tags for the page being swapped back in and + * restore these tags to the newly allocated physical page. + */ +void adi_restore_tags(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long addr, pte_t pte) +{ + unsigned char *tag; + tag_storage_desc_t *tag_desc; + unsigned long paddr, tmp, version1, version2; + + /* Check if the swapped out page has an ADI version + * saved. If yes, restore version tag to the newly + * allocated page. + */ + tag_desc = find_tag_store(mm, vma, addr); + if (tag_desc == NULL) + return; + + tag = tag_start(addr, tag_desc); + paddr = pte_val(pte) & _PAGE_PADDR_4V; + for (tmp = paddr; tmp < (paddr+PAGE_SIZE); tmp += adi_blksize()) { + version1 = (*tag) >> 4; + version2 = (*tag) & 0x0f; + *tag++ = 0; + asm volatile("stxa %0, [%1] %2\n\t" + : + : "r" (version1), "r" (tmp), + "i" (ASI_MCD_REAL)); + tmp += adi_blksize(); + asm volatile("stxa %0, [%1] %2\n\t" + : + : "r" (version2), "r" (tmp), + "i" (ASI_MCD_REAL)); + } + asm volatile("membar #Sync\n\t"); + + /* Check and mark this tag space for release later if + * the swapped in page was the last user of tag space + */ + del_tag_store(tag_desc, mm); +} + +/* A page is about to be swapped out. Save any ADI tags associated with + * this physical page so they can be restored later when the page is swapped + * back in. + */ +int adi_save_tags(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long addr, pte_t oldpte) +{ + unsigned char *tag; + tag_storage_desc_t *tag_desc; + unsigned long version1, version2, paddr, tmp; + + tag_desc = alloc_tag_store(mm, vma, addr); + if (tag_desc == NULL) + return -1; + + tag = tag_start(addr, tag_desc); + paddr = pte_val(oldpte) & _PAGE_PADDR_4V; + for (tmp = paddr; tmp < (paddr+PAGE_SIZE); tmp += adi_blksize()) { + asm volatile("ldxa [%1] %2, %0\n\t" + : "=r" (version1) + : "r" (tmp), "i" (ASI_MCD_REAL)); + tmp += adi_blksize(); + asm volatile("ldxa [%1] %2, %0\n\t" + : "=r" (version2) + : "r" (tmp), "i" (ASI_MCD_REAL)); + *tag = (version1 << 4) | version2; + tag++; + } + + return 0; +} diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S index 358fe4ef08a2..4d3696973325 100644 --- a/arch/sparc/kernel/entry.S +++ b/arch/sparc/kernel/entry.S @@ -801,27 +801,12 @@ SUN_PI_(lda [%l4] ASI_M_MMUREGS, %l5) ! read sfsr last RESTORE_ALL .align 4 - .globl sys_nis_syscall -sys_nis_syscall: - mov %o7, %l5 - add %sp, STACKFRAME_SZ, %o0 ! pt_regs *regs arg - call c_sys_nis_syscall - mov %l5, %o7 - sunos_execv: .globl sunos_execv b sys_execve clr %i2 .align 4 - .globl sys_sparc_pipe -sys_sparc_pipe: - mov %o7, %l5 - add %sp, STACKFRAME_SZ, %o0 ! pt_regs *regs arg - call sparc_pipe - mov %l5, %o7 - - .align 4 .globl sys_sigstack sys_sigstack: mov %o7, %l5 diff --git a/arch/sparc/kernel/entry.h b/arch/sparc/kernel/entry.h index 7378567b601f..c746c0fd5d6b 100644 --- a/arch/sparc/kernel/entry.h +++ b/arch/sparc/kernel/entry.h @@ -160,6 +160,9 @@ void sun4v_resum_overflow(struct pt_regs *regs); void sun4v_nonresum_error(struct pt_regs *regs, unsigned long offset); void sun4v_nonresum_overflow(struct pt_regs *regs); +void sun4v_mem_corrupt_detect_precise(struct pt_regs *regs, + unsigned long addr, + unsigned long context); extern unsigned long sun4v_err_itlb_vaddr; extern unsigned long sun4v_err_itlb_ctx; diff --git a/arch/sparc/kernel/etrap_64.S b/arch/sparc/kernel/etrap_64.S index 5c77a2e0e991..08cc41f64725 100644 --- a/arch/sparc/kernel/etrap_64.S +++ b/arch/sparc/kernel/etrap_64.S @@ -151,7 +151,32 @@ etrap_save: save %g2, -STACK_BIAS, %sp stx %g6, [%sp + PTREGS_OFF + PT_V9_G6] stx %g7, [%sp + PTREGS_OFF + PT_V9_G7] or %l7, %l0, %l7 - sethi %hi(TSTATE_TSO | TSTATE_PEF), %l0 +661: sethi %hi(TSTATE_TSO | TSTATE_PEF), %l0 + /* If userspace is using ADI, it could potentially pass + * a pointer with version tag embedded in it. To maintain + * the ADI security, we must enable PSTATE.mcde. Userspace + * would have already set TTE.mcd in an earlier call to + * kernel and set the version tag for the address being + * dereferenced. Setting PSTATE.mcde would ensure any + * access to userspace data through a system call honors + * ADI and does not allow a rogue app to bypass ADI by + * using system calls. Setting PSTATE.mcde only affects + * accesses to virtual addresses that have TTE.mcd set. + * Set PMCDPER to ensure any exceptions caused by ADI + * version tag mismatch are exposed before system call + * returns to userspace. Setting PMCDPER affects only + * writes to virtual addresses that have TTE.mcd set and + * have a version tag set as well. + */ + .section .sun_m7_1insn_patch, "ax" + .word 661b + sethi %hi(TSTATE_TSO | TSTATE_PEF | TSTATE_MCDE), %l0 + .previous +661: nop + .section .sun_m7_1insn_patch, "ax" + .word 661b + .word 0xaf902001 /* wrpr %g0, 1, %pmcdper */ + .previous or %l7, %l0, %l7 wrpr %l2, %tnpc wrpr %l7, (TSTATE_PRIV | TSTATE_IE), %tstate diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index a41e6e16eb36..540bfc98472c 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -897,6 +897,7 @@ sparc64_boot_end: #include "syscalls.S" #include "helpers.S" #include "sun4v_tlb_miss.S" +#include "sun4v_mcd.S" #include "sun4v_ivec.S" #include "ktlb.S" #include "tsb.S" diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index d66dde833f5e..713670e6d13d 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -22,7 +22,6 @@ #include <linux/seq_file.h> #include <linux/ftrace.h> #include <linux/irq.h> -#include <linux/kmemleak.h> #include <asm/ptrace.h> #include <asm/processor.h> diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c index 418592a09b41..39a2503fa3e1 100644 --- a/arch/sparc/kernel/mdesc.c +++ b/arch/sparc/kernel/mdesc.c @@ -22,6 +22,7 @@ #include <linux/uaccess.h> #include <asm/oplib.h> #include <asm/smp.h> +#include <asm/adi.h> /* Unlike the OBP device tree, the machine description is a full-on * DAG. An arbitrary number of ARCs are possible from one @@ -1345,5 +1346,6 @@ void __init sun4v_mdesc_init(void) cur_mdesc = hp; + mdesc_adi_init(); report_platform_properties(); } diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index 220d0f36560a..41b20edb427d 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -664,12 +664,12 @@ struct pci_bus *pci_scan_one_pbm(struct pci_pbm_info *pbm, printk("PCI: Scanning PBM %s\n", node->full_name); pci_add_resource_offset(&resources, &pbm->io_space, - pbm->io_space.start); + pbm->io_offset); pci_add_resource_offset(&resources, &pbm->mem_space, - pbm->mem_space.start); + pbm->mem_offset); if (pbm->mem64_space.flags) pci_add_resource_offset(&resources, &pbm->mem64_space, - pbm->mem_space.start); + pbm->mem64_offset); pbm->busn.start = pbm->pci_first_busno; pbm->busn.end = pbm->pci_last_busno; pbm->busn.flags = IORESOURCE_BUS; diff --git a/arch/sparc/kernel/pci_common.c b/arch/sparc/kernel/pci_common.c index 1e10fb26fa88..38d46bcc8634 100644 --- a/arch/sparc/kernel/pci_common.c +++ b/arch/sparc/kernel/pci_common.c @@ -344,26 +344,6 @@ static void pci_register_legacy_regions(struct resource *io_res, p->end = p->start + 0x1ffffUL; p->flags = IORESOURCE_BUSY; request_resource(mem_res, p); - - p = kzalloc(sizeof(*p), GFP_KERNEL); - if (!p) - return; - - p->name = "System ROM"; - p->start = mem_res->start + 0xf0000UL; - p->end = p->start + 0xffffUL; - p->flags = IORESOURCE_BUSY; - request_resource(mem_res, p); - - p = kzalloc(sizeof(*p), GFP_KERNEL); - if (!p) - return; - - p->name = "Video ROM"; - p->start = mem_res->start + 0xc0000UL; - p->end = p->start + 0x7fffUL; - p->flags = IORESOURCE_BUSY; - request_resource(mem_res, p); } static void pci_register_iommu_region(struct pci_pbm_info *pbm) @@ -397,6 +377,8 @@ void pci_determine_mem_io_space(struct pci_pbm_info *pbm) int i, saw_mem, saw_io; int num_pbm_ranges; + /* Corresponding generic code in of_pci_get_host_bridge_resources() */ + saw_mem = saw_io = 0; pbm_ranges = of_get_property(pbm->op->dev.of_node, "ranges", &i); if (!pbm_ranges) { @@ -411,13 +393,16 @@ void pci_determine_mem_io_space(struct pci_pbm_info *pbm) for (i = 0; i < num_pbm_ranges; i++) { const struct linux_prom_pci_ranges *pr = &pbm_ranges[i]; - unsigned long a, size; + unsigned long a, size, region_a; u32 parent_phys_hi, parent_phys_lo; + u32 child_phys_mid, child_phys_lo; u32 size_hi, size_lo; int type; parent_phys_hi = pr->parent_phys_hi; parent_phys_lo = pr->parent_phys_lo; + child_phys_mid = pr->child_phys_mid; + child_phys_lo = pr->child_phys_lo; if (tlb_type == hypervisor) parent_phys_hi &= 0x0fffffff; @@ -427,6 +412,8 @@ void pci_determine_mem_io_space(struct pci_pbm_info *pbm) type = (pr->child_phys_hi >> 24) & 0x3; a = (((unsigned long)parent_phys_hi << 32UL) | ((unsigned long)parent_phys_lo << 0UL)); + region_a = (((unsigned long)child_phys_mid << 32UL) | + ((unsigned long)child_phys_lo << 0UL)); size = (((unsigned long)size_hi << 32UL) | ((unsigned long)size_lo << 0UL)); @@ -441,6 +428,7 @@ void pci_determine_mem_io_space(struct pci_pbm_info *pbm) pbm->io_space.start = a; pbm->io_space.end = a + size - 1UL; pbm->io_space.flags = IORESOURCE_IO; + pbm->io_offset = a - region_a; saw_io = 1; break; @@ -449,6 +437,7 @@ void pci_determine_mem_io_space(struct pci_pbm_info *pbm) pbm->mem_space.start = a; pbm->mem_space.end = a + size - 1UL; pbm->mem_space.flags = IORESOURCE_MEM; + pbm->mem_offset = a - region_a; saw_mem = 1; break; @@ -457,6 +446,7 @@ void pci_determine_mem_io_space(struct pci_pbm_info *pbm) pbm->mem64_space.start = a; pbm->mem64_space.end = a + size - 1UL; pbm->mem64_space.flags = IORESOURCE_MEM; + pbm->mem64_offset = a - region_a; saw_mem = 1; break; @@ -472,14 +462,22 @@ void pci_determine_mem_io_space(struct pci_pbm_info *pbm) prom_halt(); } - printk("%s: PCI IO[%llx] MEM[%llx]", - pbm->name, - pbm->io_space.start, - pbm->mem_space.start); + if (pbm->io_space.flags) + printk("%s: PCI IO %pR offset %llx\n", + pbm->name, &pbm->io_space, pbm->io_offset); + if (pbm->mem_space.flags) + printk("%s: PCI MEM %pR offset %llx\n", + pbm->name, &pbm->mem_space, pbm->mem_offset); + if (pbm->mem64_space.flags && pbm->mem_space.flags) { + if (pbm->mem64_space.start <= pbm->mem_space.end) + pbm->mem64_space.start = pbm->mem_space.end + 1; + if (pbm->mem64_space.start > pbm->mem64_space.end) + pbm->mem64_space.flags = 0; + } + if (pbm->mem64_space.flags) - printk(" MEM64[%llx]", - pbm->mem64_space.start); - printk("\n"); + printk("%s: PCI MEM64 %pR offset %llx\n", + pbm->name, &pbm->mem64_space, pbm->mem64_offset); pbm->io_space.name = pbm->mem_space.name = pbm->name; pbm->mem64_space.name = pbm->name; diff --git a/arch/sparc/kernel/pci_impl.h b/arch/sparc/kernel/pci_impl.h index ac172961d276..4e3d15189fa9 100644 --- a/arch/sparc/kernel/pci_impl.h +++ b/arch/sparc/kernel/pci_impl.h @@ -100,6 +100,10 @@ struct pci_pbm_info { struct resource mem_space; struct resource mem64_space; struct resource busn; + /* offset */ + resource_size_t io_offset; + resource_size_t mem_offset; + resource_size_t mem64_offset; /* Base of PCI Config space, can be per-PBM or shared. */ unsigned long config_space; diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index 318efd784a0b..454a8af28f13 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -670,6 +670,31 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, return 0; } +/* TIF_MCDPER in thread info flags for current task is updated lazily upon + * a context switch. Update this flag in current task's thread flags + * before dup so the dup'd task will inherit the current TIF_MCDPER flag. + */ +int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) +{ + if (adi_capable()) { + register unsigned long tmp_mcdper; + + __asm__ __volatile__( + ".word 0x83438000\n\t" /* rd %mcdper, %g1 */ + "mov %%g1, %0\n\t" + : "=r" (tmp_mcdper) + : + : "g1"); + if (tmp_mcdper) + set_thread_flag(TIF_MCDPER); + else + clear_thread_flag(TIF_MCDPER); + } + + *dst = *src; + return 0; +} + typedef struct { union { unsigned int pr_regs[32]; diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S index 0b21042ab181..f6528884a2c8 100644 --- a/arch/sparc/kernel/rtrap_64.S +++ b/arch/sparc/kernel/rtrap_64.S @@ -25,13 +25,31 @@ .align 32 __handle_preemption: call SCHEDULE_USER - wrpr %g0, RTRAP_PSTATE, %pstate +661: wrpr %g0, RTRAP_PSTATE, %pstate + /* If userspace is using ADI, it could potentially pass + * a pointer with version tag embedded in it. To maintain + * the ADI security, we must re-enable PSTATE.mcde before + * we continue execution in the kernel for another thread. + */ + .section .sun_m7_1insn_patch, "ax" + .word 661b + wrpr %g0, RTRAP_PSTATE|PSTATE_MCDE, %pstate + .previous ba,pt %xcc, __handle_preemption_continue wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate __handle_user_windows: call fault_in_user_windows - wrpr %g0, RTRAP_PSTATE, %pstate +661: wrpr %g0, RTRAP_PSTATE, %pstate + /* If userspace is using ADI, it could potentially pass + * a pointer with version tag embedded in it. To maintain + * the ADI security, we must re-enable PSTATE.mcde before + * we continue execution in the kernel for another thread. + */ + .section .sun_m7_1insn_patch, "ax" + .word 661b + wrpr %g0, RTRAP_PSTATE|PSTATE_MCDE, %pstate + .previous ba,pt %xcc, __handle_preemption_continue wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate @@ -48,7 +66,16 @@ __handle_signal: add %sp, PTREGS_OFF, %o0 mov %l0, %o2 call do_notify_resume - wrpr %g0, RTRAP_PSTATE, %pstate +661: wrpr %g0, RTRAP_PSTATE, %pstate + /* If userspace is using ADI, it could potentially pass + * a pointer with version tag embedded in it. To maintain + * the ADI security, we must re-enable PSTATE.mcde before + * we continue execution in the kernel for another thread. + */ + .section .sun_m7_1insn_patch, "ax" + .word 661b + wrpr %g0, RTRAP_PSTATE|PSTATE_MCDE, %pstate + .previous wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate /* Signal delivery can modify pt_regs tstate, so we must diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c index 34f7a533a74f..7944b3ca216a 100644 --- a/arch/sparc/kernel/setup_64.c +++ b/arch/sparc/kernel/setup_64.c @@ -294,6 +294,8 @@ static void __init sun4v_patch(void) case SUN4V_CHIP_SPARC_M7: case SUN4V_CHIP_SPARC_M8: case SUN4V_CHIP_SPARC_SN: + sun4v_patch_1insn_range(&__sun_m7_1insn_patch, + &__sun_m7_1insn_patch_end); sun_m7_patch_2insn_range(&__sun_m7_2insn_patch, &__sun_m7_2insn_patch_end); break; diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index c50182cd2f64..d3ea1f3c06a0 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -929,9 +929,9 @@ static inline void __local_flush_dcache_page(struct page *page) #ifdef DCACHE_ALIASING_POSSIBLE __flush_dcache_page(page_address(page), ((tlb_type == spitfire) && - page_mapping(page) != NULL)); + page_mapping_file(page) != NULL)); #else - if (page_mapping(page) != NULL && + if (page_mapping_file(page) != NULL && tlb_type == spitfire) __flush_icache_page(__pa(page_address(page))); #endif @@ -958,7 +958,7 @@ void smp_flush_dcache_page_impl(struct page *page, int cpu) if (tlb_type == spitfire) { data0 = ((u64)&xcall_flush_dcache_page_spitfire); - if (page_mapping(page) != NULL) + if (page_mapping_file(page) != NULL) data0 |= ((u64)1 << 32); } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { #ifdef DCACHE_ALIASING_POSSIBLE @@ -994,7 +994,7 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page) pg_addr = page_address(page); if (tlb_type == spitfire) { data0 = ((u64)&xcall_flush_dcache_page_spitfire); - if (page_mapping(page) != NULL) + if (page_mapping_file(page) != NULL) data0 |= ((u64)1 << 32); } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { #ifdef DCACHE_ALIASING_POSSIBLE diff --git a/arch/sparc/kernel/sun4v_mcd.S b/arch/sparc/kernel/sun4v_mcd.S new file mode 100644 index 000000000000..d6c69ebca110 --- /dev/null +++ b/arch/sparc/kernel/sun4v_mcd.S @@ -0,0 +1,18 @@ +/* sun4v_mcd.S: Sun4v memory corruption detected precise exception handler + * + * Copyright (c) 2015 Oracle and/or its affiliates. All rights reserved. + * Authors: Bob Picco <bob.picco@oracle.com>, + * Khalid Aziz <khalid.aziz@oracle.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ + .text + .align 32 + +sun4v_mcd_detect_precise: + mov %l4, %o1 + mov %l5, %o2 + call sun4v_mem_corrupt_detect_precise + add %sp, PTREGS_OFF, %o0 + ba,a,pt %xcc, rtrap + nop diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S index 7e7011a1e712..489ffab918a8 100644 --- a/arch/sparc/kernel/sys32.S +++ b/arch/sparc/kernel/sys32.S @@ -13,44 +13,6 @@ .text -#define SIGN1(STUB,SYSCALL,REG1) \ - .align 32; \ - .globl STUB; \ -STUB: sethi %hi(SYSCALL), %g1; \ - jmpl %g1 + %lo(SYSCALL), %g0; \ - sra REG1, 0, REG1 - -#define SIGN2(STUB,SYSCALL,REG1,REG2) \ - .align 32; \ - .globl STUB; \ -STUB: sethi %hi(SYSCALL), %g1; \ - sra REG1, 0, REG1; \ - jmpl %g1 + %lo(SYSCALL), %g0; \ - sra REG2, 0, REG2 - -#define SIGN3(STUB,SYSCALL,REG1,REG2,REG3) \ - .align 32; \ - .globl STUB; \ -STUB: sra REG1, 0, REG1; \ - sethi %hi(SYSCALL), %g1; \ - sra REG2, 0, REG2; \ - jmpl %g1 + %lo(SYSCALL), %g0; \ - sra REG3, 0, REG3 - -SIGN1(sys32_readahead, compat_sys_readahead, %o0) -SIGN2(sys32_fadvise64, compat_sys_fadvise64, %o0, %o4) -SIGN2(sys32_fadvise64_64, compat_sys_fadvise64_64, %o0, %o5) -SIGN1(sys32_clock_nanosleep, compat_sys_clock_nanosleep, %o1) -SIGN1(sys32_timer_settime, compat_sys_timer_settime, %o1) -SIGN1(sys32_io_submit, compat_sys_io_submit, %o1) -SIGN1(sys32_mq_open, compat_sys_mq_open, %o1) -SIGN1(sys32_select, compat_sys_select, %o0) -SIGN1(sys32_futex, compat_sys_futex, %o1) -SIGN1(sys32_recvfrom, compat_sys_recvfrom, %o0) -SIGN1(sys32_recvmsg, compat_sys_recvmsg, %o0) -SIGN1(sys32_sendmsg, compat_sys_sendmsg, %o0) -SIGN2(sys32_renameat2, sys_renameat2, %o0, %o2) - .globl sys32_mmap2 sys32_mmap2: sethi %hi(sys_mmap), %g1 diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c index f166e5bbf506..b5da3bfdc225 100644 --- a/arch/sparc/kernel/sys_sparc32.c +++ b/arch/sparc/kernel/sys_sparc32.c @@ -52,20 +52,14 @@ #include "systbls.h" -asmlinkage long sys32_truncate64(const char __user * path, unsigned long high, unsigned long low) +COMPAT_SYSCALL_DEFINE3(truncate64, const char __user *, path, u32, high, u32, low) { - if ((int)high < 0) - return -EINVAL; - else - return ksys_truncate(path, (high << 32) | low); + return ksys_truncate(path, ((u64)high << 32) | low); } -asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long high, unsigned long low) +COMPAT_SYSCALL_DEFINE3(ftruncate64, unsigned int, fd, u32, high, u32, low) { - if ((int)high < 0) - return -EINVAL; - else - return ksys_ftruncate(fd, (high << 32) | low); + return ksys_ftruncate(fd, ((u64)high << 32) | low); } static int cp_compat_stat64(struct kstat *stat, @@ -98,8 +92,8 @@ static int cp_compat_stat64(struct kstat *stat, return err; } -asmlinkage long compat_sys_stat64(const char __user * filename, - struct compat_stat64 __user *statbuf) +COMPAT_SYSCALL_DEFINE2(stat64, const char __user *, filename, + struct compat_stat64 __user *, statbuf) { struct kstat stat; int error = vfs_stat(filename, &stat); @@ -109,8 +103,8 @@ asmlinkage long compat_sys_stat64(const char __user * filename, return error; } -asmlinkage long compat_sys_lstat64(const char __user * filename, - struct compat_stat64 __user *statbuf) +COMPAT_SYSCALL_DEFINE2(lstat64, const char __user *, filename, + struct compat_stat64 __user *, statbuf) { struct kstat stat; int error = vfs_lstat(filename, &stat); @@ -120,8 +114,8 @@ asmlinkage long compat_sys_lstat64(const char __user * filename, return error; } -asmlinkage long compat_sys_fstat64(unsigned int fd, - struct compat_stat64 __user * statbuf) +COMPAT_SYSCALL_DEFINE2(fstat64, unsigned int, fd, + struct compat_stat64 __user *, statbuf) { struct kstat stat; int error = vfs_fstat(fd, &stat); @@ -131,9 +125,9 @@ asmlinkage long compat_sys_fstat64(unsigned int fd, return error; } -asmlinkage long compat_sys_fstatat64(unsigned int dfd, - const char __user *filename, - struct compat_stat64 __user * statbuf, int flag) +COMPAT_SYSCALL_DEFINE4(fstatat64, unsigned int, dfd, + const char __user *, filename, + struct compat_stat64 __user *, statbuf, int, flag) { struct kstat stat; int error; @@ -194,61 +188,50 @@ COMPAT_SYSCALL_DEFINE5(rt_sigaction, int, sig, return ret; } -asmlinkage compat_ssize_t sys32_pread64(unsigned int fd, - char __user *ubuf, - compat_size_t count, - unsigned long poshi, - unsigned long poslo) +COMPAT_SYSCALL_DEFINE5(pread64, unsigned int, fd, char __user *, ubuf, + compat_size_t, count, u32, poshi, u32, poslo) { - return ksys_pread64(fd, ubuf, count, (poshi << 32) | poslo); + return ksys_pread64(fd, ubuf, count, ((u64)poshi << 32) | poslo); } -asmlinkage compat_ssize_t sys32_pwrite64(unsigned int fd, - char __user *ubuf, - compat_size_t count, - unsigned long poshi, - unsigned long poslo) +COMPAT_SYSCALL_DEFINE5(pwrite64, unsigned int, fd, char __user *, ubuf, + compat_size_t, count, u32, poshi, u32, poslo) { - return ksys_pwrite64(fd, ubuf, count, (poshi << 32) | poslo); + return ksys_pwrite64(fd, ubuf, count, ((u64)poshi << 32) | poslo); } -asmlinkage long compat_sys_readahead(int fd, - unsigned long offhi, - unsigned long offlo, - compat_size_t count) +COMPAT_SYSCALL_DEFINE4(readahead, int, fd, u32, offhi, u32, offlo, + compat_size_t, count) { - return ksys_readahead(fd, (offhi << 32) | offlo, count); + return ksys_readahead(fd, ((u64)offhi << 32) | offlo, count); } -long compat_sys_fadvise64(int fd, - unsigned long offhi, - unsigned long offlo, - compat_size_t len, int advice) +COMPAT_SYSCALL_DEFINE5(fadvise64, int, fd, u32, offhi, u32, offlo, + compat_size_t, len, int, advice) { - return ksys_fadvise64_64(fd, (offhi << 32) | offlo, len, advice); + return ksys_fadvise64_64(fd, ((u64)offhi << 32) | offlo, len, advice); } -long compat_sys_fadvise64_64(int fd, - unsigned long offhi, unsigned long offlo, - unsigned long lenhi, unsigned long lenlo, - int advice) +COMPAT_SYSCALL_DEFINE6(fadvise64_64, int, fd, u32, offhi, u32, offlo, + u32, lenhi, u32, lenlo, int, advice) { return ksys_fadvise64_64(fd, - (offhi << 32) | offlo, - (lenhi << 32) | lenlo, + ((u64)offhi << 32) | offlo, + ((u64)lenhi << 32) | lenlo, advice); } -long sys32_sync_file_range(unsigned int fd, unsigned long off_high, unsigned long off_low, unsigned long nb_high, unsigned long nb_low, unsigned int flags) +COMPAT_SYSCALL_DEFINE6(sync_file_range, unsigned int, fd, u32, off_high, u32, off_low, + u32, nb_high, u32, nb_low, unsigned int, flags) { return ksys_sync_file_range(fd, - (off_high << 32) | off_low, - (nb_high << 32) | nb_low, - flags); + ((u64)off_high << 32) | off_low, + ((u64)nb_high << 32) | nb_low, + flags); } -asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offhi, u32 offlo, - u32 lenhi, u32 lenlo) +COMPAT_SYSCALL_DEFINE6(fallocate, int, fd, int, mode, u32, offhi, u32, offlo, + u32, lenhi, u32, lenlo) { return ksys_fallocate(fd, mode, ((loff_t)offhi << 32) | offlo, ((loff_t)lenhi << 32) | lenlo); diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c index d980da4ffd7b..e8c3cb6b6d08 100644 --- a/arch/sparc/kernel/sys_sparc_32.c +++ b/arch/sparc/kernel/sys_sparc_32.c @@ -34,7 +34,7 @@ /* XXX Make this per-binary type, this way we can detect the type of * XXX a binary. Every Sparc executable calls this very early on. */ -asmlinkage unsigned long sys_getpagesize(void) +SYSCALL_DEFINE0(getpagesize) { return PAGE_SIZE; /* Possibly older binaries want 8192 on sun4's? */ } @@ -73,7 +73,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi * sys_pipe() is the normal C calling standard for creating * a pipe. It's not the way unix traditionally does this, though. */ -asmlinkage long sparc_pipe(struct pt_regs *regs) +SYSCALL_DEFINE0(sparc_pipe) { int fd[2]; int error; @@ -81,7 +81,7 @@ asmlinkage long sparc_pipe(struct pt_regs *regs) error = do_pipe_flags(fd, 0); if (error) goto out; - regs->u_regs[UREG_I1] = fd[1]; + current_pt_regs()->u_regs[UREG_I1] = fd[1]; error = fd[0]; out: return error; @@ -98,9 +98,9 @@ int sparc_mmap_check(unsigned long addr, unsigned long len) /* Linux version of mmap */ -asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, unsigned long fd, - unsigned long pgoff) +SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len, + unsigned long, prot, unsigned long, flags, unsigned long, fd, + unsigned long, pgoff) { /* Make sure the shift for mmap2 is constant (12), no matter what PAGE_SIZE we have. */ @@ -108,17 +108,17 @@ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, pgoff >> (PAGE_SHIFT - 12)); } -asmlinkage long sys_mmap(unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, unsigned long fd, - unsigned long off) +SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, + unsigned long, prot, unsigned long, flags, unsigned long, fd, + unsigned long, off) { /* no alignment check? */ return ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); } -long sparc_remap_file_pages(unsigned long start, unsigned long size, - unsigned long prot, unsigned long pgoff, - unsigned long flags) +SYSCALL_DEFINE5(sparc_remap_file_pages, unsigned long, start, unsigned long, size, + unsigned long, prot, unsigned long, pgoff, + unsigned long, flags) { /* This works on an existing mmap so we don't need to validate * the range as that was done at the original mmap call. @@ -127,11 +127,10 @@ long sparc_remap_file_pages(unsigned long start, unsigned long size, (pgoff >> (PAGE_SHIFT - 12)), flags); } -/* we come to here via sys_nis_syscall so it can setup the regs argument */ -asmlinkage unsigned long -c_sys_nis_syscall (struct pt_regs *regs) +SYSCALL_DEFINE0(nis_syscall) { static int count = 0; + struct pt_regs *regs = current_pt_regs(); if (count++ > 5) return -ENOSYS; @@ -202,7 +201,7 @@ SYSCALL_DEFINE5(rt_sigaction, int, sig, return ret; } -asmlinkage long sys_getdomainname(char __user *name, int len) +SYSCALL_DEFINE2(getdomainname, char __user *, name, int, len) { int nlen, err; diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index ebb84dc8a5a7..9ef8de63f28b 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -39,7 +39,7 @@ /* #define DEBUG_UNIMP_SYSCALL */ -asmlinkage unsigned long sys_getpagesize(void) +SYSCALL_DEFINE0(getpagesize) { return PAGE_SIZE; } @@ -276,7 +276,7 @@ static unsigned long mmap_rnd(void) return rnd << PAGE_SHIFT; } -void arch_pick_mmap_layout(struct mm_struct *mm) +void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) { unsigned long random_factor = mmap_rnd(); unsigned long gap; @@ -285,7 +285,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm) * Fall back to the standard layout if the personality * bit is set, or if the expected stack growth is unlimited: */ - gap = rlimit(RLIMIT_STACK); + gap = rlim_stack->rlim_cur; if (!test_thread_flag(TIF_32BIT) || (current->personality & ADDR_COMPAT_LAYOUT) || gap == RLIM_INFINITY || @@ -310,7 +310,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm) * sys_pipe() is the normal C calling standard for creating * a pipe. It's not the way unix traditionally does this, though. */ -SYSCALL_DEFINE1(sparc_pipe_real, struct pt_regs *, regs) +SYSCALL_DEFINE0(sparc_pipe) { int fd[2]; int error; @@ -318,7 +318,7 @@ SYSCALL_DEFINE1(sparc_pipe_real, struct pt_regs *, regs) error = do_pipe_flags(fd, 0); if (error) goto out; - regs->u_regs[UREG_I1] = fd[1]; + current_pt_regs()->u_regs[UREG_I1] = fd[1]; error = fd[0]; out: return error; @@ -480,10 +480,10 @@ SYSCALL_DEFINE5(64_mremap, unsigned long, addr, unsigned long, old_len, return sys_mremap(addr, old_len, new_len, flags, new_addr); } -/* we come to here via sys_nis_syscall so it can setup the regs argument */ -asmlinkage unsigned long c_sys_nis_syscall(struct pt_regs *regs) +SYSCALL_DEFINE0(nis_syscall) { static int count; + struct pt_regs *regs = current_pt_regs(); /* Don't make the system unusable, if someone goes stuck */ if (count++ > 5) @@ -523,8 +523,6 @@ asmlinkage void sparc_breakpoint(struct pt_regs *regs) exception_exit(prev_state); } -extern void check_pending(int signum); - SYSCALL_DEFINE2(getdomainname, char __user *, name, int, len) { int nlen, err; @@ -608,9 +606,9 @@ SYSCALL_DEFINE5(utrap_install, utrap_entry_t, type, return 0; } -asmlinkage long sparc_memory_ordering(unsigned long model, - struct pt_regs *regs) +SYSCALL_DEFINE1(memory_ordering, unsigned long, model) { + struct pt_regs *regs = current_pt_regs(); if (model >= 3) return -EINVAL; regs->tstate = (regs->tstate & ~TSTATE_MM) | (model << 14); @@ -644,7 +642,7 @@ SYSCALL_DEFINE5(rt_sigaction, int, sig, const struct sigaction __user *, act, return ret; } -asmlinkage long sys_kern_features(void) +SYSCALL_DEFINE0(kern_features) { return KERN_FEATURE_MIXED_MODE_STACK; } diff --git a/arch/sparc/kernel/syscalls.S b/arch/sparc/kernel/syscalls.S index c5f9ec8c52eb..db42b4fb3708 100644 --- a/arch/sparc/kernel/syscalls.S +++ b/arch/sparc/kernel/syscalls.S @@ -27,15 +27,6 @@ sys32_execveat: #endif .align 32 -sys_sparc_pipe: - ba,pt %xcc, sys_sparc_pipe_real - add %sp, PTREGS_OFF, %o0 -sys_nis_syscall: - ba,pt %xcc, c_sys_nis_syscall - add %sp, PTREGS_OFF, %o0 -sys_memory_ordering: - ba,pt %xcc, sparc_memory_ordering - add %sp, PTREGS_OFF, %o1 #ifdef CONFIG_COMPAT sys32_sigstack: ba,pt %xcc, do_sys32_sigstack diff --git a/arch/sparc/kernel/systbls.h b/arch/sparc/kernel/systbls.h index 5a01cfe19a0e..bf014267d619 100644 --- a/arch/sparc/kernel/systbls.h +++ b/arch/sparc/kernel/systbls.h @@ -9,9 +9,9 @@ #include <asm/utrap.h> -asmlinkage unsigned long sys_getpagesize(void); -asmlinkage long sparc_pipe(struct pt_regs *regs); -asmlinkage unsigned long c_sys_nis_syscall(struct pt_regs *regs); +asmlinkage long sys_getpagesize(void); +asmlinkage long sys_sparc_pipe(void); +asmlinkage long sys_nis_syscall(void); asmlinkage long sys_getdomainname(char __user *name, int len); void do_rt_sigreturn(struct pt_regs *regs); asmlinkage long sys_mmap(unsigned long addr, unsigned long len, @@ -23,7 +23,7 @@ asmlinkage void sparc_breakpoint(struct pt_regs *regs); asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff); -long sparc_remap_file_pages(unsigned long start, unsigned long size, +long sys_sparc_remap_file_pages(unsigned long start, unsigned long size, unsigned long prot, unsigned long pgoff, unsigned long flags); @@ -46,16 +46,15 @@ asmlinkage long sys_utrap_install(utrap_entry_t type, utrap_handler_t new_d, utrap_handler_t __user *old_p, utrap_handler_t __user *old_d); -asmlinkage long sparc_memory_ordering(unsigned long model, - struct pt_regs *regs); +asmlinkage long sys_memory_ordering(unsigned long model); asmlinkage void sparc64_set_context(struct pt_regs *regs); asmlinkage void sparc64_get_context(struct pt_regs *regs); -asmlinkage long sys32_truncate64(const char __user * path, - unsigned long high, - unsigned long low); -asmlinkage long sys32_ftruncate64(unsigned int fd, - unsigned long high, - unsigned long low); +asmlinkage long compat_sys_truncate64(const char __user * path, + u32 high, + u32 low); +asmlinkage long compat_sys_ftruncate64(unsigned int fd, + u32 high, + u32 low); struct compat_stat64; asmlinkage long compat_sys_stat64(const char __user * filename, struct compat_stat64 __user *statbuf); @@ -66,31 +65,31 @@ asmlinkage long compat_sys_fstat64(unsigned int fd, asmlinkage long compat_sys_fstatat64(unsigned int dfd, const char __user *filename, struct compat_stat64 __user * statbuf, int flag); -asmlinkage compat_ssize_t sys32_pread64(unsigned int fd, +asmlinkage long compat_sys_pread64(unsigned int fd, char __user *ubuf, compat_size_t count, - unsigned long poshi, - unsigned long poslo); -asmlinkage compat_ssize_t sys32_pwrite64(unsigned int fd, + u32 poshi, + u32 poslo); +asmlinkage long compat_sys_pwrite64(unsigned int fd, char __user *ubuf, compat_size_t count, - unsigned long poshi, - unsigned long poslo); + u32 poshi, + u32 poslo); asmlinkage long compat_sys_readahead(int fd, - unsigned long offhi, - unsigned long offlo, + unsigned offhi, + unsigned offlo, compat_size_t count); long compat_sys_fadvise64(int fd, - unsigned long offhi, - unsigned long offlo, + unsigned offhi, + unsigned offlo, compat_size_t len, int advice); long compat_sys_fadvise64_64(int fd, - unsigned long offhi, unsigned long offlo, - unsigned long lenhi, unsigned long lenlo, + unsigned offhi, unsigned offlo, + unsigned lenhi, unsigned lenlo, int advice); -long sys32_sync_file_range(unsigned int fd, - unsigned long off_high, unsigned long off_low, - unsigned long nb_high, unsigned long nb_low, +long compat_sys_sync_file_range(unsigned int fd, + unsigned off_high, unsigned off_low, + unsigned nb_high, unsigned nb_low, unsigned int flags); asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offhi, u32 offlo, u32 lenhi, u32 lenlo); diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S index 731b25d572a1..12bee14b552c 100644 --- a/arch/sparc/kernel/systbls_32.S +++ b/arch/sparc/kernel/systbls_32.S @@ -55,7 +55,7 @@ sys_call_table: /*175*/ .long sys_setsid, sys_fchdir, sys_fgetxattr, sys_listxattr, sys_llistxattr /*180*/ .long sys_flistxattr, sys_removexattr, sys_lremovexattr, sys_sigpending, sys_ni_syscall /*185*/ .long sys_setpgid, sys_fremovexattr, sys_tkill, sys_exit_group, sys_newuname -/*190*/ .long sys_init_module, sys_personality, sparc_remap_file_pages, sys_epoll_create, sys_epoll_ctl +/*190*/ .long sys_init_module, sys_personality, sys_sparc_remap_file_pages, sys_epoll_create, sys_epoll_ctl /*195*/ .long sys_epoll_wait, sys_ioprio_set, sys_getppid, sys_sparc_sigaction, sys_sgetmask /*200*/ .long sys_ssetmask, sys_sigsuspend, sys_newlstat, sys_uselib, sys_old_readdir /*205*/ .long sys_readahead, sys_socketcall, sys_syslog, sys_lookup_dcookie, sys_fadvise64 diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index 293c1cb31262..387ef993880a 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S @@ -32,12 +32,12 @@ sys_call_table32: /*50*/ .word sys_getegid16, sys_acct, sys_nis_syscall, sys_getgid, compat_sys_ioctl .word sys_reboot, sys32_mmap2, sys_symlink, sys_readlink, sys32_execve /*60*/ .word sys_umask, sys_chroot, compat_sys_newfstat, compat_sys_fstat64, sys_getpagesize - .word sys_msync, sys_vfork, sys32_pread64, sys32_pwrite64, sys_geteuid + .word sys_msync, sys_vfork, compat_sys_pread64, compat_sys_pwrite64, sys_geteuid /*70*/ .word sys_getegid, sys_mmap, sys_setreuid, sys_munmap, sys_mprotect - .word sys_madvise, sys_vhangup, sys32_truncate64, sys_mincore, sys_getgroups16 -/*80*/ .word sys_setgroups16, sys_getpgrp, sys_setgroups, compat_sys_setitimer, sys32_ftruncate64 + .word sys_madvise, sys_vhangup, compat_sys_truncate64, sys_mincore, sys_getgroups16 +/*80*/ .word sys_setgroups16, sys_getpgrp, sys_setgroups, compat_sys_setitimer, compat_sys_ftruncate64 .word sys_swapon, compat_sys_getitimer, sys_setuid, sys_sethostname, sys_setgid -/*90*/ .word sys_dup2, sys_setfsuid, compat_sys_fcntl, sys32_select, sys_setfsgid +/*90*/ .word sys_dup2, sys_setfsuid, compat_sys_fcntl, compat_sys_select, sys_setfsgid .word sys_fsync, sys_setpriority, sys_socket, sys_connect, sys_accept /*100*/ .word sys_getpriority, sys32_rt_sigreturn, compat_sys_rt_sigaction, compat_sys_rt_sigprocmask, compat_sys_rt_sigpending .word compat_sys_rt_sigtimedwait, compat_sys_rt_sigqueueinfo, compat_sys_rt_sigsuspend, sys_setresuid, sys_getresuid @@ -47,7 +47,7 @@ sys_call_table32: .word sys_recvfrom, sys_setreuid16, sys_setregid16, sys_rename, compat_sys_truncate /*130*/ .word compat_sys_ftruncate, sys_flock, compat_sys_lstat64, sys_sendto, sys_shutdown .word sys_socketpair, sys_mkdir, sys_rmdir, compat_sys_utimes, compat_sys_stat64 -/*140*/ .word sys_sendfile64, sys_nis_syscall, sys32_futex, sys_gettid, compat_sys_getrlimit +/*140*/ .word sys_sendfile64, sys_nis_syscall, compat_sys_futex, sys_gettid, compat_sys_getrlimit .word compat_sys_setrlimit, sys_pivot_root, sys_prctl, sys_pciconfig_read, sys_pciconfig_write /*150*/ .word sys_nis_syscall, sys_inotify_init, sys_inotify_add_watch, sys_poll, sys_getdents64 .word compat_sys_fcntl64, sys_inotify_rm_watch, compat_sys_statfs, compat_sys_fstatfs, sys_oldumount @@ -60,20 +60,20 @@ sys_call_table32: /*190*/ .word sys_init_module, sys_sparc64_personality, sys_remap_file_pages, sys_epoll_create, sys_epoll_ctl .word sys_epoll_wait, sys_ioprio_set, sys_getppid, compat_sys_sparc_sigaction, sys_sgetmask /*200*/ .word sys_ssetmask, sys_sigsuspend, compat_sys_newlstat, sys_uselib, compat_sys_old_readdir - .word sys32_readahead, sys32_socketcall, sys_syslog, compat_sys_lookup_dcookie, sys32_fadvise64 -/*210*/ .word sys32_fadvise64_64, sys_tgkill, sys_waitpid, sys_swapoff, compat_sys_sysinfo + .word compat_sys_readahead, sys32_socketcall, sys_syslog, compat_sys_lookup_dcookie, compat_sys_fadvise64 +/*210*/ .word compat_sys_fadvise64_64, sys_tgkill, sys_waitpid, sys_swapoff, compat_sys_sysinfo .word compat_sys_ipc, sys32_sigreturn, sys_clone, sys_ioprio_get, compat_sys_adjtimex /*220*/ .word compat_sys_sigprocmask, sys_ni_syscall, sys_delete_module, sys_ni_syscall, sys_getpgid .word sys_bdflush, sys_sysfs, sys_nis_syscall, sys_setfsuid16, sys_setfsgid16 -/*230*/ .word sys32_select, compat_sys_time, sys_splice, compat_sys_stime, compat_sys_statfs64 +/*230*/ .word compat_sys_select, compat_sys_time, sys_splice, compat_sys_stime, compat_sys_statfs64 .word compat_sys_fstatfs64, sys_llseek, sys_mlock, sys_munlock, sys_mlockall /*240*/ .word sys_munlockall, sys_sched_setparam, sys_sched_getparam, sys_sched_setscheduler, sys_sched_getscheduler .word sys_sched_yield, sys_sched_get_priority_max, sys_sched_get_priority_min, compat_sys_sched_rr_get_interval, compat_sys_nanosleep /*250*/ .word sys_mremap, compat_sys_sysctl, sys_getsid, sys_fdatasync, sys_nis_syscall - .word sys32_sync_file_range, compat_sys_clock_settime, compat_sys_clock_gettime, compat_sys_clock_getres, sys32_clock_nanosleep -/*260*/ .word compat_sys_sched_getaffinity, compat_sys_sched_setaffinity, sys32_timer_settime, compat_sys_timer_gettime, sys_timer_getoverrun + .word compat_sys_sync_file_range, compat_sys_clock_settime, compat_sys_clock_gettime, compat_sys_clock_getres, compat_sys_clock_nanosleep +/*260*/ .word compat_sys_sched_getaffinity, compat_sys_sched_setaffinity, compat_sys_timer_settime, compat_sys_timer_gettime, sys_timer_getoverrun .word sys_timer_delete, compat_sys_timer_create, sys_ni_syscall, compat_sys_io_setup, sys_io_destroy -/*270*/ .word sys32_io_submit, sys_io_cancel, compat_sys_io_getevents, sys32_mq_open, sys_mq_unlink +/*270*/ .word compat_sys_io_submit, sys_io_cancel, compat_sys_io_getevents, compat_sys_mq_open, sys_mq_unlink .word compat_sys_mq_timedsend, compat_sys_mq_timedreceive, compat_sys_mq_notify, compat_sys_mq_getsetattr, compat_sys_waitid /*280*/ .word sys_tee, sys_add_key, sys_request_key, compat_sys_keyctl, compat_sys_openat .word sys_mkdirat, sys_mknodat, sys_fchownat, compat_sys_futimesat, compat_sys_fstatat64 @@ -88,7 +88,7 @@ sys_call_table32: /*330*/ .word compat_sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, compat_sys_open_by_handle_at, compat_sys_clock_adjtime .word sys_syncfs, compat_sys_sendmmsg, sys_setns, compat_sys_process_vm_readv, compat_sys_process_vm_writev /*340*/ .word sys_kern_features, sys_kcmp, sys_finit_module, sys_sched_setattr, sys_sched_getattr - .word sys32_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf + .word sys_renameat2, sys_seccomp, sys_getrandom, sys_memfd_create, sys_bpf /*350*/ .word sys32_execveat, sys_membarrier, sys_userfaultfd, sys_bind, sys_listen .word compat_sys_setsockopt, sys_mlock2, sys_copy_file_range, compat_sys_preadv2, compat_sys_pwritev2 /*360*/ .word sys_statx diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index 0a56dc257cb9..462a21abd105 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -362,7 +362,6 @@ void sun4v_data_access_exception(struct pt_regs *regs, unsigned long addr, unsig { unsigned short type = (type_ctx >> 16); unsigned short ctx = (type_ctx & 0xffff); - siginfo_t info; if (notify_die(DIE_TRAP, "data access exception", regs, 0, 0x8, SIGTRAP) == NOTIFY_STOP) @@ -397,12 +396,29 @@ void sun4v_data_access_exception(struct pt_regs *regs, unsigned long addr, unsig if (is_no_fault_exception(regs)) return; - info.si_signo = SIGSEGV; - info.si_errno = 0; - info.si_code = SEGV_MAPERR; - info.si_addr = (void __user *) addr; - info.si_trapno = 0; - force_sig_info(SIGSEGV, &info, current); + /* MCD (Memory Corruption Detection) disabled trap (TT=0x19) in HV + * is vectored thorugh data access exception trap with fault type + * set to HV_FAULT_TYPE_MCD_DIS. Check for MCD disabled trap. + * Accessing an address with invalid ASI for the address, for + * example setting an ADI tag on an address with ASI_MCD_PRIMARY + * when TTE.mcd is not set for the VA, is also vectored into + * kerbel by HV as data access exception with fault type set to + * HV_FAULT_TYPE_INV_ASI. + */ + switch (type) { + case HV_FAULT_TYPE_INV_ASI: + force_sig_fault(SIGILL, ILL_ILLADR, (void __user *)addr, 0, + current); + break; + case HV_FAULT_TYPE_MCD_DIS: + force_sig_fault(SIGSEGV, SEGV_ACCADI, (void __user *)addr, 0, + current); + break; + default: + force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)addr, 0, + current); + break; + } } void sun4v_data_access_exception_tl1(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx) @@ -1847,6 +1863,7 @@ struct sun4v_error_entry { #define SUN4V_ERR_ATTRS_ASI 0x00000080 #define SUN4V_ERR_ATTRS_PRIV_REG 0x00000100 #define SUN4V_ERR_ATTRS_SPSTATE_MSK 0x00000600 +#define SUN4V_ERR_ATTRS_MCD 0x00000800 #define SUN4V_ERR_ATTRS_SPSTATE_SHFT 9 #define SUN4V_ERR_ATTRS_MODE_MSK 0x03000000 #define SUN4V_ERR_ATTRS_MODE_SHFT 24 @@ -2044,6 +2061,50 @@ static void sun4v_log_error(struct pt_regs *regs, struct sun4v_error_entry *ent, } } +/* Handle memory corruption detected error which is vectored in + * through resumable error trap. + */ +void do_mcd_err(struct pt_regs *regs, struct sun4v_error_entry ent) +{ + if (notify_die(DIE_TRAP, "MCD error", regs, 0, 0x34, + SIGSEGV) == NOTIFY_STOP) + return; + + if (regs->tstate & TSTATE_PRIV) { + /* MCD exception could happen because the task was + * running a system call with MCD enabled and passed a + * non-versioned pointer or pointer with bad version + * tag to the system call. In such cases, hypervisor + * places the address of offending instruction in the + * resumable error report. This is a deferred error, + * so the read/write that caused the trap was potentially + * retired long time back and we may have no choice + * but to send SIGSEGV to the process. + */ + const struct exception_table_entry *entry; + + entry = search_exception_tables(regs->tpc); + if (entry) { + /* Looks like a bad syscall parameter */ +#ifdef DEBUG_EXCEPTIONS + pr_emerg("Exception: PC<%016lx> faddr<UNKNOWN>\n", + regs->tpc); + pr_emerg("EX_TABLE: insn<%016lx> fixup<%016lx>\n", + ent.err_raddr, entry->fixup); +#endif + regs->tpc = entry->fixup; + regs->tnpc = regs->tpc + 4; + return; + } + } + + /* Send SIGSEGV to the userspace process with the right signal + * code + */ + force_sig_fault(SIGSEGV, SEGV_ADIDERR, (void __user *)ent.err_raddr, + 0, current); +} + /* We run with %pil set to PIL_NORMAL_MAX and PSTATE_IE enabled in %pstate. * Log the event and clear the first word of the entry. */ @@ -2081,6 +2142,14 @@ void sun4v_resum_error(struct pt_regs *regs, unsigned long offset) goto out; } + /* If this is a memory corruption detected error vectored in + * by HV through resumable error trap, call the handler + */ + if (local_copy.err_attrs & SUN4V_ERR_ATTRS_MCD) { + do_mcd_err(regs, local_copy); + return; + } + sun4v_log_error(regs, &local_copy, cpu, KERN_ERR "RESUMABLE ERROR", &sun4v_resum_oflow_cnt); @@ -2656,6 +2725,53 @@ void sun4v_do_mna(struct pt_regs *regs, unsigned long addr, unsigned long type_c force_sig_info(SIGBUS, &info, current); } +/* sun4v_mem_corrupt_detect_precise() - Handle precise exception on an ADI + * tag mismatch. + * + * ADI version tag mismatch on a load from memory always results in a + * precise exception. Tag mismatch on a store to memory will result in + * precise exception if MCDPER or PMCDPER is set to 1. + */ +void sun4v_mem_corrupt_detect_precise(struct pt_regs *regs, unsigned long addr, + unsigned long context) +{ + if (notify_die(DIE_TRAP, "memory corruption precise exception", regs, + 0, 0x8, SIGSEGV) == NOTIFY_STOP) + return; + + if (regs->tstate & TSTATE_PRIV) { + /* MCD exception could happen because the task was running + * a system call with MCD enabled and passed a non-versioned + * pointer or pointer with bad version tag to the system + * call. + */ + const struct exception_table_entry *entry; + + entry = search_exception_tables(regs->tpc); + if (entry) { + /* Looks like a bad syscall parameter */ +#ifdef DEBUG_EXCEPTIONS + pr_emerg("Exception: PC<%016lx> faddr<UNKNOWN>\n", + regs->tpc); + pr_emerg("EX_TABLE: insn<%016lx> fixup<%016lx>\n", + regs->tpc, entry->fixup); +#endif + regs->tpc = entry->fixup; + regs->tnpc = regs->tpc + 4; + return; + } + pr_emerg("%s: ADDR[%016lx] CTX[%lx], going.\n", + __func__, addr, context); + die_if_kernel("MCD precise", regs); + } + + if (test_thread_flag(TIF_32BIT)) { + regs->tpc &= 0xffffffff; + regs->tnpc &= 0xffffffff; + } + force_sig_fault(SIGSEGV, SEGV_ADIPERR, (void __user *)addr, 0, current); +} + void do_privop(struct pt_regs *regs) { enum ctx_state prev_state = exception_enter(); diff --git a/arch/sparc/kernel/ttable_64.S b/arch/sparc/kernel/ttable_64.S index 18685fe69b91..86e737e59c7e 100644 --- a/arch/sparc/kernel/ttable_64.S +++ b/arch/sparc/kernel/ttable_64.S @@ -26,8 +26,10 @@ tl0_ill: membar #Sync TRAP_7INSNS(do_illegal_instruction) tl0_privop: TRAP(do_privop) tl0_resv012: BTRAP(0x12) BTRAP(0x13) BTRAP(0x14) BTRAP(0x15) BTRAP(0x16) BTRAP(0x17) -tl0_resv018: BTRAP(0x18) BTRAP(0x19) BTRAP(0x1a) BTRAP(0x1b) BTRAP(0x1c) BTRAP(0x1d) -tl0_resv01e: BTRAP(0x1e) BTRAP(0x1f) +tl0_resv018: BTRAP(0x18) BTRAP(0x19) +tl0_mcd: SUN4V_MCD_PRECISE +tl0_resv01b: BTRAP(0x1b) +tl0_resv01c: BTRAP(0x1c) BTRAP(0x1d) BTRAP(0x1e) BTRAP(0x1f) tl0_fpdis: TRAP_NOSAVE(do_fpdis) tl0_fpieee: TRAP_SAVEFPU(do_fpieee) tl0_fpother: TRAP_NOSAVE(do_fpother_check_fitos) diff --git a/arch/sparc/kernel/urtt_fill.S b/arch/sparc/kernel/urtt_fill.S index 44183aa59168..e4cee7be5cd0 100644 --- a/arch/sparc/kernel/urtt_fill.S +++ b/arch/sparc/kernel/urtt_fill.S @@ -50,7 +50,12 @@ user_rtt_fill_fixup_common: SET_GL(0) .previous - wrpr %g0, RTRAP_PSTATE, %pstate +661: wrpr %g0, RTRAP_PSTATE, %pstate + .section .sun_m7_1insn_patch, "ax" + .word 661b + /* Re-enable PSTATE.mcde to maintain ADI security */ + wrpr %g0, RTRAP_PSTATE|PSTATE_MCDE, %pstate + .previous mov %l1, %g6 ldx [%g6 + TI_TASK], %g4 diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index 5a2344574f39..61afd787bd0c 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -145,6 +145,11 @@ SECTIONS *(.pause_3insn_patch) __pause_3insn_patch_end = .; } + .sun_m7_1insn_patch : { + __sun_m7_1insn_patch = .; + *(.sun_m7_1insn_patch) + __sun_m7_1insn_patch_end = .; + } .sun_m7_2insn_patch : { __sun_m7_2insn_patch = .; *(.sun_m7_2insn_patch) |