diff options
Diffstat (limited to 'arch/arm64/mm')
-rw-r--r-- | arch/arm64/mm/Makefile | 4 | ||||
-rw-r--r-- | arch/arm64/mm/cache.S | 52 | ||||
-rw-r--r-- | arch/arm64/mm/context.c | 38 | ||||
-rw-r--r-- | arch/arm64/mm/dma-mapping.c | 24 | ||||
-rw-r--r-- | arch/arm64/mm/dump.c | 171 | ||||
-rw-r--r-- | arch/arm64/mm/fault.c | 113 | ||||
-rw-r--r-- | arch/arm64/mm/flush.c | 3 | ||||
-rw-r--r-- | arch/arm64/mm/init.c | 119 | ||||
-rw-r--r-- | arch/arm64/mm/ioremap.c | 4 | ||||
-rw-r--r-- | arch/arm64/mm/kasan_init.c | 9 | ||||
-rw-r--r-- | arch/arm64/mm/mmap.c | 72 | ||||
-rw-r--r-- | arch/arm64/mm/mmu.c | 49 | ||||
-rw-r--r-- | arch/arm64/mm/numa.c | 2 | ||||
-rw-r--r-- | arch/arm64/mm/pageattr.c | 4 | ||||
-rw-r--r-- | arch/arm64/mm/pgd.c | 2 | ||||
-rw-r--r-- | arch/arm64/mm/proc.S | 132 | ||||
-rw-r--r-- | arch/arm64/mm/ptdump_debugfs.c | 2 |
17 files changed, 374 insertions, 426 deletions
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index 849c1df3d214..d91030f0ffee 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -4,8 +4,8 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ ioremap.o mmap.o pgd.o mmu.o \ context.o proc.o pageattr.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_ARM64_PTDUMP_CORE) += dump.o -obj-$(CONFIG_ARM64_PTDUMP_DEBUGFS) += ptdump_debugfs.o +obj-$(CONFIG_PTDUMP_CORE) += dump.o +obj-$(CONFIG_PTDUMP_DEBUGFS) += ptdump_debugfs.o obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o KASAN_SANITIZE_physaddr.o += n diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index db767b072601..2d881f34dd9d 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -24,7 +24,7 @@ * - start - virtual start address of region * - end - virtual end address of region */ -ENTRY(__flush_icache_range) +SYM_FUNC_START(__flush_icache_range) /* FALLTHROUGH */ /* @@ -37,7 +37,7 @@ ENTRY(__flush_icache_range) * - start - virtual start address of region * - end - virtual end address of region */ -ENTRY(__flush_cache_user_range) +SYM_FUNC_START(__flush_cache_user_range) uaccess_ttbr0_enable x2, x3, x4 alternative_if ARM64_HAS_CACHE_IDC dsb ishst @@ -66,8 +66,8 @@ alternative_else_nop_endif 9: mov x0, #-EFAULT b 1b -ENDPROC(__flush_icache_range) -ENDPROC(__flush_cache_user_range) +SYM_FUNC_END(__flush_icache_range) +SYM_FUNC_END(__flush_cache_user_range) /* * invalidate_icache_range(start,end) @@ -77,7 +77,7 @@ ENDPROC(__flush_cache_user_range) * - start - virtual start address of region * - end - virtual end address of region */ -ENTRY(invalidate_icache_range) +SYM_FUNC_START(invalidate_icache_range) alternative_if ARM64_HAS_CACHE_DIC mov x0, xzr isb @@ -94,7 +94,7 @@ alternative_else_nop_endif 2: mov x0, #-EFAULT b 1b -ENDPROC(invalidate_icache_range) +SYM_FUNC_END(invalidate_icache_range) /* * __flush_dcache_area(kaddr, size) @@ -105,10 +105,10 @@ ENDPROC(invalidate_icache_range) * - kaddr - kernel address * - size - size in question */ -ENTRY(__flush_dcache_area) +SYM_FUNC_START_PI(__flush_dcache_area) dcache_by_line_op civac, sy, x0, x1, x2, x3 ret -ENDPIPROC(__flush_dcache_area) +SYM_FUNC_END_PI(__flush_dcache_area) /* * __clean_dcache_area_pou(kaddr, size) @@ -119,14 +119,14 @@ ENDPIPROC(__flush_dcache_area) * - kaddr - kernel address * - size - size in question */ -ENTRY(__clean_dcache_area_pou) +SYM_FUNC_START(__clean_dcache_area_pou) alternative_if ARM64_HAS_CACHE_IDC dsb ishst ret alternative_else_nop_endif dcache_by_line_op cvau, ish, x0, x1, x2, x3 ret -ENDPROC(__clean_dcache_area_pou) +SYM_FUNC_END(__clean_dcache_area_pou) /* * __inval_dcache_area(kaddr, size) @@ -138,7 +138,8 @@ ENDPROC(__clean_dcache_area_pou) * - kaddr - kernel address * - size - size in question */ -ENTRY(__inval_dcache_area) +SYM_FUNC_START_LOCAL(__dma_inv_area) +SYM_FUNC_START_PI(__inval_dcache_area) /* FALLTHROUGH */ /* @@ -146,7 +147,6 @@ ENTRY(__inval_dcache_area) * - start - virtual start address of region * - size - size in question */ -__dma_inv_area: add x1, x1, x0 dcache_line_size x2, x3 sub x3, x2, #1 @@ -165,8 +165,8 @@ __dma_inv_area: b.lo 2b dsb sy ret -ENDPIPROC(__inval_dcache_area) -ENDPROC(__dma_inv_area) +SYM_FUNC_END_PI(__inval_dcache_area) +SYM_FUNC_END(__dma_inv_area) /* * __clean_dcache_area_poc(kaddr, size) @@ -177,7 +177,8 @@ ENDPROC(__dma_inv_area) * - kaddr - kernel address * - size - size in question */ -ENTRY(__clean_dcache_area_poc) +SYM_FUNC_START_LOCAL(__dma_clean_area) +SYM_FUNC_START_PI(__clean_dcache_area_poc) /* FALLTHROUGH */ /* @@ -185,11 +186,10 @@ ENTRY(__clean_dcache_area_poc) * - start - virtual start address of region * - size - size in question */ -__dma_clean_area: dcache_by_line_op cvac, sy, x0, x1, x2, x3 ret -ENDPIPROC(__clean_dcache_area_poc) -ENDPROC(__dma_clean_area) +SYM_FUNC_END_PI(__clean_dcache_area_poc) +SYM_FUNC_END(__dma_clean_area) /* * __clean_dcache_area_pop(kaddr, size) @@ -200,13 +200,13 @@ ENDPROC(__dma_clean_area) * - kaddr - kernel address * - size - size in question */ -ENTRY(__clean_dcache_area_pop) +SYM_FUNC_START_PI(__clean_dcache_area_pop) alternative_if_not ARM64_HAS_DCPOP b __clean_dcache_area_poc alternative_else_nop_endif dcache_by_line_op cvap, sy, x0, x1, x2, x3 ret -ENDPIPROC(__clean_dcache_area_pop) +SYM_FUNC_END_PI(__clean_dcache_area_pop) /* * __dma_flush_area(start, size) @@ -216,10 +216,10 @@ ENDPIPROC(__clean_dcache_area_pop) * - start - virtual start address of region * - size - size in question */ -ENTRY(__dma_flush_area) +SYM_FUNC_START_PI(__dma_flush_area) dcache_by_line_op civac, sy, x0, x1, x2, x3 ret -ENDPIPROC(__dma_flush_area) +SYM_FUNC_END_PI(__dma_flush_area) /* * __dma_map_area(start, size, dir) @@ -227,11 +227,11 @@ ENDPIPROC(__dma_flush_area) * - size - size of region * - dir - DMA direction */ -ENTRY(__dma_map_area) +SYM_FUNC_START_PI(__dma_map_area) cmp w2, #DMA_FROM_DEVICE b.eq __dma_inv_area b __dma_clean_area -ENDPIPROC(__dma_map_area) +SYM_FUNC_END_PI(__dma_map_area) /* * __dma_unmap_area(start, size, dir) @@ -239,8 +239,8 @@ ENDPIPROC(__dma_map_area) * - size - size of region * - dir - DMA direction */ -ENTRY(__dma_unmap_area) +SYM_FUNC_START_PI(__dma_unmap_area) cmp w2, #DMA_TO_DEVICE b.ne __dma_inv_area ret -ENDPIPROC(__dma_unmap_area) +SYM_FUNC_END_PI(__dma_unmap_area) diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index b5e329fde2dd..8ef73e89d514 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -29,15 +29,9 @@ static cpumask_t tlb_flush_pending; #define ASID_MASK (~GENMASK(asid_bits - 1, 0)) #define ASID_FIRST_VERSION (1UL << asid_bits) -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 -#define NUM_USER_ASIDS (ASID_FIRST_VERSION >> 1) -#define asid2idx(asid) (((asid) & ~ASID_MASK) >> 1) -#define idx2asid(idx) (((idx) << 1) & ~ASID_MASK) -#else -#define NUM_USER_ASIDS (ASID_FIRST_VERSION) +#define NUM_USER_ASIDS ASID_FIRST_VERSION #define asid2idx(asid) ((asid) & ~ASID_MASK) #define idx2asid(idx) asid2idx(idx) -#endif /* Get the ASIDBits supported by the current CPU */ static u32 get_cpu_asid_bits(void) @@ -77,13 +71,33 @@ void verify_cpu_asid_bits(void) } } +static void set_kpti_asid_bits(void) +{ + unsigned int len = BITS_TO_LONGS(NUM_USER_ASIDS) * sizeof(unsigned long); + /* + * In case of KPTI kernel/user ASIDs are allocated in + * pairs, the bottom bit distinguishes the two: if it + * is set, then the ASID will map only userspace. Thus + * mark even as reserved for kernel. + */ + memset(asid_map, 0xaa, len); +} + +static void set_reserved_asid_bits(void) +{ + if (arm64_kernel_unmapped_at_el0()) + set_kpti_asid_bits(); + else + bitmap_clear(asid_map, 0, NUM_USER_ASIDS); +} + static void flush_context(void) { int i; u64 asid; /* Update the list of reserved ASIDs and the ASID bitmap. */ - bitmap_clear(asid_map, 0, NUM_USER_ASIDS); + set_reserved_asid_bits(); for_each_possible_cpu(i) { asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0); @@ -261,6 +275,14 @@ static int asids_init(void) panic("Failed to allocate bitmap for %lu ASIDs\n", NUM_USER_ASIDS); + /* + * We cannot call set_reserved_asid_bits() here because CPU + * caps are not finalized yet, so it is safer to assume KPTI + * and reserve kernel ASID's from beginning. + */ + if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) + set_kpti_asid_bits(); + pr_info("ASID allocator initialised with %lu entries\n", NUM_USER_ASIDS); return 0; } diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index bd2b039f43a6..6c45350e33aa 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -8,23 +8,19 @@ #include <linux/cache.h> #include <linux/dma-noncoherent.h> #include <linux/dma-iommu.h> +#include <xen/xen.h> +#include <xen/swiotlb-xen.h> #include <asm/cacheflush.h> -pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot, - unsigned long attrs) -{ - return pgprot_writecombine(prot); -} - -void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, - size_t size, enum dma_data_direction dir) +void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, + enum dma_data_direction dir) { __dma_map_area(phys_to_virt(paddr), size, dir); } -void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, - size_t size, enum dma_data_direction dir) +void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, + enum dma_data_direction dir) { __dma_unmap_area(phys_to_virt(paddr), size, dir); } @@ -34,12 +30,6 @@ void arch_dma_prep_coherent(struct page *page, size_t size) __dma_flush_area(page_address(page), size); } -static int __init arm64_dma_init(void) -{ - return dma_atomic_pool_init(GFP_DMA32, __pgprot(PROT_NORMAL_NC)); -} -arch_initcall(arm64_dma_init); - #ifdef CONFIG_IOMMU_DMA void arch_teardown_dma_ops(struct device *dev) { @@ -64,6 +54,6 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, #ifdef CONFIG_XEN if (xen_initial_domain()) - dev->dma_ops = xen_dma_ops; + dev->dma_ops = &xen_swiotlb_dma_ops; #endif } diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index 82b3a7fdb4a6..860c00ec8bd3 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -15,6 +15,7 @@ #include <linux/io.h> #include <linux/init.h> #include <linux/mm.h> +#include <linux/ptdump.h> #include <linux/sched.h> #include <linux/seq_file.h> @@ -25,9 +26,20 @@ #include <asm/pgtable-hwdef.h> #include <asm/ptdump.h> -static const struct addr_marker address_markers[] = { + +enum address_markers_idx { + PAGE_OFFSET_NR = 0, + PAGE_END_NR, +#ifdef CONFIG_KASAN + KASAN_START_NR, +#endif +}; + +static struct addr_marker address_markers[] = { + { PAGE_OFFSET, "Linear Mapping start" }, + { 0 /* PAGE_END */, "Linear Mapping end" }, #ifdef CONFIG_KASAN - { KASAN_SHADOW_START, "Kasan shadow start" }, + { 0 /* KASAN_SHADOW_START */, "Kasan shadow start" }, { KASAN_SHADOW_END, "Kasan shadow end" }, #endif { MODULES_VADDR, "Modules start" }, @@ -42,7 +54,6 @@ static const struct addr_marker address_markers[] = { { VMEMMAP_START, "vmemmap start" }, { VMEMMAP_START + VMEMMAP_SIZE, "vmemmap end" }, #endif - { PAGE_OFFSET, "Linear mapping" }, { -1, NULL }, }; @@ -65,10 +76,11 @@ static const struct addr_marker address_markers[] = { * dumps out a description of the range. */ struct pg_state { + struct ptdump_state ptdump; struct seq_file *seq; const struct addr_marker *marker; unsigned long start_address; - unsigned level; + int level; u64 current_prot; bool check_wx; unsigned long wx_pages; @@ -132,6 +144,7 @@ static const struct prot_bits pte_bits[] = { .mask = PTE_UXN, .val = PTE_UXN, .set = "UXN", + .clear = " ", }, { .mask = PTE_ATTRINDX_MASK, .val = PTE_ATTRINDX(MT_DEVICE_nGnRnE), @@ -163,11 +176,14 @@ struct pg_level { }; static struct pg_level pg_level[] = { - { - }, { /* pgd */ + { /* pgd */ .name = "PGD", .bits = pte_bits, .num = ARRAY_SIZE(pte_bits), + }, { /* p4d */ + .name = "P4D", + .bits = pte_bits, + .num = ARRAY_SIZE(pte_bits), }, { /* pud */ .name = (CONFIG_PGTABLE_LEVELS > 3) ? "PUD" : "PGD", .bits = pte_bits, @@ -230,13 +246,17 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr) st->wx_pages += (addr - st->start_address) / PAGE_SIZE; } -static void note_page(struct pg_state *st, unsigned long addr, unsigned level, - u64 val) +static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, + unsigned long val) { + struct pg_state *st = container_of(pt_st, struct pg_state, ptdump); static const char units[] = "KMGTPE"; - u64 prot = val & pg_level[level].mask; + u64 prot = 0; + + if (level >= 0) + prot = val & pg_level[level].mask; - if (!st->level) { + if (st->level == -1) { st->level = level; st->current_prot = prot; st->start_address = addr; @@ -249,21 +269,22 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level, if (st->current_prot) { note_prot_uxn(st, addr); note_prot_wx(st, addr); - pt_dump_seq_printf(st->seq, "0x%016lx-0x%016lx ", + } + + pt_dump_seq_printf(st->seq, "0x%016lx-0x%016lx ", st->start_address, addr); - delta = (addr - st->start_address) >> 10; - while (!(delta & 1023) && unit[1]) { - delta >>= 10; - unit++; - } - pt_dump_seq_printf(st->seq, "%9lu%c %s", delta, *unit, - pg_level[st->level].name); - if (pg_level[st->level].bits) - dump_prot(st, pg_level[st->level].bits, - pg_level[st->level].num); - pt_dump_seq_puts(st->seq, "\n"); + delta = (addr - st->start_address) >> 10; + while (!(delta & 1023) && unit[1]) { + delta >>= 10; + unit++; } + pt_dump_seq_printf(st->seq, "%9lu%c %s", delta, *unit, + pg_level[st->level].name); + if (st->current_prot && pg_level[st->level].bits) + dump_prot(st, pg_level[st->level].bits, + pg_level[st->level].num); + pt_dump_seq_puts(st->seq, "\n"); if (addr >= st->marker[1].start_address) { st->marker++; @@ -282,85 +303,27 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level, } -static void walk_pte(struct pg_state *st, pmd_t *pmdp, unsigned long start, - unsigned long end) -{ - unsigned long addr = start; - pte_t *ptep = pte_offset_kernel(pmdp, start); - - do { - note_page(st, addr, 4, READ_ONCE(pte_val(*ptep))); - } while (ptep++, addr += PAGE_SIZE, addr != end); -} - -static void walk_pmd(struct pg_state *st, pud_t *pudp, unsigned long start, - unsigned long end) +void ptdump_walk(struct seq_file *s, struct ptdump_info *info) { - unsigned long next, addr = start; - pmd_t *pmdp = pmd_offset(pudp, start); - - do { - pmd_t pmd = READ_ONCE(*pmdp); - next = pmd_addr_end(addr, end); - - if (pmd_none(pmd) || pmd_sect(pmd)) { - note_page(st, addr, 3, pmd_val(pmd)); - } else { - BUG_ON(pmd_bad(pmd)); - walk_pte(st, pmdp, addr, next); - } - } while (pmdp++, addr = next, addr != end); -} - -static void walk_pud(struct pg_state *st, pgd_t *pgdp, unsigned long start, - unsigned long end) -{ - unsigned long next, addr = start; - pud_t *pudp = pud_offset(pgdp, start); - - do { - pud_t pud = READ_ONCE(*pudp); - next = pud_addr_end(addr, end); - - if (pud_none(pud) || pud_sect(pud)) { - note_page(st, addr, 2, pud_val(pud)); - } else { - BUG_ON(pud_bad(pud)); - walk_pmd(st, pudp, addr, next); - } - } while (pudp++, addr = next, addr != end); -} + unsigned long end = ~0UL; + struct pg_state st; -static void walk_pgd(struct pg_state *st, struct mm_struct *mm, - unsigned long start) -{ - unsigned long end = (start < TASK_SIZE_64) ? TASK_SIZE_64 : 0; - unsigned long next, addr = start; - pgd_t *pgdp = pgd_offset(mm, start); - - do { - pgd_t pgd = READ_ONCE(*pgdp); - next = pgd_addr_end(addr, end); - - if (pgd_none(pgd)) { - note_page(st, addr, 1, pgd_val(pgd)); - } else { - BUG_ON(pgd_bad(pgd)); - walk_pud(st, pgdp, addr, next); - } - } while (pgdp++, addr = next, addr != end); -} + if (info->base_addr < TASK_SIZE_64) + end = TASK_SIZE_64; -void ptdump_walk_pgd(struct seq_file *m, struct ptdump_info *info) -{ - struct pg_state st = { - .seq = m, + st = (struct pg_state){ + .seq = s, .marker = info->markers, + .ptdump = { + .note_page = note_page, + .range = (struct ptdump_range[]){ + {info->base_addr, end}, + {0, 0} + } + } }; - walk_pgd(&st, info->mm, info->base_addr); - - note_page(&st, 0, 0, 0); + ptdump_walk_pgd(&st.ptdump, info->mm, NULL); } static void ptdump_initialize(void) @@ -376,7 +339,7 @@ static void ptdump_initialize(void) static struct ptdump_info kernel_ptdump_info = { .mm = &init_mm, .markers = address_markers, - .base_addr = VA_START, + .base_addr = PAGE_OFFSET, }; void ptdump_check_wx(void) @@ -387,11 +350,19 @@ void ptdump_check_wx(void) { 0, NULL}, { -1, NULL}, }, + .level = -1, .check_wx = true, + .ptdump = { + .note_page = note_page, + .range = (struct ptdump_range[]) { + {PAGE_OFFSET, ~0UL}, + {0, 0} + } + } }; - walk_pgd(&st, &init_mm, VA_START); - note_page(&st, 0, 0, 0); + ptdump_walk_pgd(&st.ptdump, &init_mm, NULL); + if (st.wx_pages || st.uxn_pages) pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found, %lu non-UXN pages found\n", st.wx_pages, st.uxn_pages); @@ -401,6 +372,10 @@ void ptdump_check_wx(void) static int ptdump_init(void) { + address_markers[PAGE_END_NR].start_address = PAGE_END; +#ifdef CONFIG_KASAN + address_markers[KASAN_START_NR].start_address = KASAN_SHADOW_START; +#endif ptdump_initialize(); ptdump_debugfs_register(&kernel_ptdump_info, "kernel_page_tables"); return 0; diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index cfd65b63f36f..85566d32958f 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -8,6 +8,7 @@ */ #include <linux/acpi.h> +#include <linux/bitfield.h> #include <linux/extable.h> #include <linux/signal.h> #include <linux/mm.h> @@ -31,7 +32,8 @@ #include <asm/daifflags.h> #include <asm/debug-monitors.h> #include <asm/esr.h> -#include <asm/kasan.h> +#include <asm/kprobes.h> +#include <asm/processor.h> #include <asm/sysreg.h> #include <asm/system_misc.h> #include <asm/pgtable.h> @@ -86,8 +88,8 @@ static void mem_abort_decode(unsigned int esr) pr_alert("Mem abort info:\n"); pr_alert(" ESR = 0x%08x\n", esr); - pr_alert(" Exception class = %s, IL = %u bits\n", - esr_get_class_string(esr), + pr_alert(" EC = 0x%02lx: %s, IL = %u bits\n", + ESR_ELx_EC(esr), esr_get_class_string(esr), (esr & ESR_ELx_IL) ? 32 : 16); pr_alert(" SET = %lu, FnV = %lu\n", (esr & ESR_ELx_SET_MASK) >> ESR_ELx_SET_SHIFT, @@ -100,16 +102,13 @@ static void mem_abort_decode(unsigned int esr) data_abort_decode(esr); } -static inline bool is_ttbr0_addr(unsigned long addr) +static inline unsigned long mm_to_pgd_phys(struct mm_struct *mm) { - /* entry assembly clears tags for TTBR0 addrs */ - return addr < TASK_SIZE; -} + /* Either init_pg_dir or swapper_pg_dir */ + if (mm == &init_mm) + return __pa_symbol(mm->pgd); -static inline bool is_ttbr1_addr(unsigned long addr) -{ - /* TTBR1 addresses may have a tag if KASAN_SW_TAGS is in use */ - return arch_kasan_reset_tag(addr) >= VA_START; + return (unsigned long)virt_to_phys(mm->pgd); } /* @@ -138,10 +137,9 @@ static void show_pte(unsigned long addr) return; } - pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgdp=%016lx\n", + pr_alert("%s pgtable: %luk pages, %llu-bit VAs, pgdp=%016lx\n", mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K, - mm == &init_mm ? VA_BITS : (int)vabits_user, - (unsigned long)virt_to_phys(mm->pgd)); + vabits_actual, mm_to_pgd_phys(mm)); pgdp = pgd_offset(mm, addr); pgd = READ_ONCE(*pgdp); pr_alert("[%016lx] pgd=%016llx", addr, pgd_val(pgd)); @@ -242,6 +240,38 @@ static inline bool is_el1_permission_fault(unsigned long addr, unsigned int esr, return false; } +static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr, + unsigned int esr, + struct pt_regs *regs) +{ + unsigned long flags; + u64 par, dfsc; + + if (ESR_ELx_EC(esr) != ESR_ELx_EC_DABT_CUR || + (esr & ESR_ELx_FSC_TYPE) != ESR_ELx_FSC_FAULT) + return false; + + local_irq_save(flags); + asm volatile("at s1e1r, %0" :: "r" (addr)); + isb(); + par = read_sysreg(par_el1); + local_irq_restore(flags); + + /* + * If we now have a valid translation, treat the translation fault as + * spurious. + */ + if (!(par & SYS_PAR_EL1_F)) + return true; + + /* + * If we got a different type of fault from the AT instruction, + * treat the translation fault as spurious. + */ + dfsc = FIELD_GET(SYS_PAR_EL1_FST, par); + return (dfsc & ESR_ELx_FSC_TYPE) != ESR_ELx_FSC_FAULT; +} + static void die_kernel_fault(const char *msg, unsigned long addr, unsigned int esr, struct pt_regs *regs) { @@ -270,9 +300,15 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr, if (!is_el1_instruction_abort(esr) && fixup_exception(regs)) return; + if (WARN_RATELIMIT(is_spurious_el1_translation_fault(addr, esr, regs), + "Ignoring spurious kernel translation fault at virtual address %016lx\n", addr)) + return; + if (is_el1_permission_fault(addr, esr, regs)) { if (esr & ESR_ELx_WNR) msg = "write to read-only memory"; + else if (is_el1_instruction_abort(esr)) + msg = "execute from non-executable memory"; else msg = "read from unreadable memory"; } else if (addr < PAGE_SIZE) { @@ -409,7 +445,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, const struct fault_info *inf; struct mm_struct *mm = current->mm; vm_fault_t fault, major = 0; - unsigned long vm_flags = VM_READ | VM_WRITE; + unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC; unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; if (kprobe_page_fault(regs, esr)) @@ -691,8 +727,7 @@ static const struct fault_info fault_info[] = { { do_bad, SIGKILL, SI_KERNEL, "unknown 63" }, }; -asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr, - struct pt_regs *regs) +void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs) { const struct fault_info *inf = esr_to_fault_info(esr); @@ -708,43 +743,21 @@ asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr, arm64_notify_die(inf->name, regs, inf->sig, inf->code, (void __user *)addr, esr); } +NOKPROBE_SYMBOL(do_mem_abort); -asmlinkage void __exception do_el0_irq_bp_hardening(void) +void do_el0_irq_bp_hardening(void) { /* PC has already been checked in entry.S */ arm64_apply_bp_hardening(); } +NOKPROBE_SYMBOL(do_el0_irq_bp_hardening); -asmlinkage void __exception do_el0_ia_bp_hardening(unsigned long addr, - unsigned int esr, - struct pt_regs *regs) -{ - /* - * We've taken an instruction abort from userspace and not yet - * re-enabled IRQs. If the address is a kernel address, apply - * BP hardening prior to enabling IRQs and pre-emption. - */ - if (!is_ttbr0_addr(addr)) - arm64_apply_bp_hardening(); - - local_daif_restore(DAIF_PROCCTX); - do_mem_abort(addr, esr, regs); -} - - -asmlinkage void __exception do_sp_pc_abort(unsigned long addr, - unsigned int esr, - struct pt_regs *regs) +void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs) { - if (user_mode(regs)) { - if (!is_ttbr0_addr(instruction_pointer(regs))) - arm64_apply_bp_hardening(); - local_daif_restore(DAIF_PROCCTX); - } - arm64_notify_die("SP/PC alignment exception", regs, SIGBUS, BUS_ADRALN, (void __user *)addr, esr); } +NOKPROBE_SYMBOL(do_sp_pc_abort); int __init early_brk64(unsigned long addr, unsigned int esr, struct pt_regs *regs); @@ -827,8 +840,7 @@ NOKPROBE_SYMBOL(debug_exception_exit); #ifdef CONFIG_ARM64_ERRATUM_1463225 DECLARE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa); -static int __exception -cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) +static int cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) { if (user_mode(regs)) return 0; @@ -847,16 +859,15 @@ cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) return 1; } #else -static int __exception -cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) +static int cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) { return 0; } #endif /* CONFIG_ARM64_ERRATUM_1463225 */ +NOKPROBE_SYMBOL(cortex_a76_erratum_1463225_debug_handler); -asmlinkage void __exception do_debug_exception(unsigned long addr_if_watchpoint, - unsigned int esr, - struct pt_regs *regs) +void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr, + struct pt_regs *regs) { const struct fault_info *inf = esr_to_debug_fault_info(esr); unsigned long pc = instruction_pointer(regs); diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index dc19300309d2..ac485163a4a7 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -56,8 +56,7 @@ void __sync_icache_dcache(pte_t pte) struct page *page = pte_page(pte); if (!test_and_set_bit(PG_dcache_clean, &page->flags)) - sync_icache_aliases(page_address(page), - PAGE_SIZE << compound_order(page)); + sync_icache_aliases(page_address(page), page_size(page)); } EXPORT_SYMBOL_GPL(__sync_icache_dcache); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index f3c795278def..b65dffdfb201 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -20,6 +20,7 @@ #include <linux/sort.h> #include <linux/of.h> #include <linux/of_fdt.h> +#include <linux/dma-direct.h> #include <linux/dma-mapping.h> #include <linux/dma-contiguous.h> #include <linux/efi.h> @@ -41,6 +42,8 @@ #include <asm/tlb.h> #include <asm/alternative.h> +#define ARM64_ZONE_DMA_BITS 30 + /* * We need to be able to catch inadvertent references to memstart_addr * that occur (potentially in generic code) before arm64_memblock_init() @@ -50,7 +53,20 @@ s64 memstart_addr __ro_after_init = -1; EXPORT_SYMBOL(memstart_addr); +s64 physvirt_offset __ro_after_init; +EXPORT_SYMBOL(physvirt_offset); + +struct page *vmemmap __ro_after_init; +EXPORT_SYMBOL(vmemmap); + +/* + * We create both ZONE_DMA and ZONE_DMA32. ZONE_DMA covers the first 1G of + * memory as some devices, namely the Raspberry Pi 4, have peripherals with + * this limited view of the memory. ZONE_DMA32 will cover the rest of the 32 + * bit addressable memory area. + */ phys_addr_t arm64_dma_phys_limit __ro_after_init; +static phys_addr_t arm64_dma32_phys_limit __ro_after_init; #ifdef CONFIG_KEXEC_CORE /* @@ -75,7 +91,7 @@ static void __init reserve_crashkernel(void) if (crash_base == 0) { /* Current arm64 boot protocol requires 2MB alignment */ - crash_base = memblock_find_in_range(0, ARCH_LOW_ADDRESS_LIMIT, + crash_base = memblock_find_in_range(0, arm64_dma32_phys_limit, crash_size, SZ_2M); if (crash_base == 0) { pr_warn("cannot allocate crashkernel (size:0x%llx)\n", @@ -163,15 +179,16 @@ static void __init reserve_elfcorehdr(void) { } #endif /* CONFIG_CRASH_DUMP */ + /* - * Return the maximum physical address for ZONE_DMA32 (DMA_BIT_MASK(32)). It - * currently assumes that for memory starting above 4G, 32-bit devices will - * use a DMA offset. + * Return the maximum physical address for a zone with a given address size + * limit. It currently assumes that for memory starting above 4G, 32-bit + * devices will use a DMA offset. */ -static phys_addr_t __init max_zone_dma_phys(void) +static phys_addr_t __init max_zone_phys(unsigned int zone_bits) { - phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, 32); - return min(offset + (1ULL << 32), memblock_end_of_DRAM()); + phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, zone_bits); + return min(offset + (1ULL << zone_bits), memblock_end_of_DRAM()); } #ifdef CONFIG_NUMA @@ -180,8 +197,11 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) { unsigned long max_zone_pfns[MAX_NR_ZONES] = {0}; +#ifdef CONFIG_ZONE_DMA + max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit); +#endif #ifdef CONFIG_ZONE_DMA32 - max_zone_pfns[ZONE_DMA32] = PFN_DOWN(max_zone_dma_phys()); + max_zone_pfns[ZONE_DMA32] = PFN_DOWN(arm64_dma32_phys_limit); #endif max_zone_pfns[ZONE_NORMAL] = max; @@ -194,16 +214,20 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) { struct memblock_region *reg; unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES]; - unsigned long max_dma = min; + unsigned long __maybe_unused max_dma, max_dma32; memset(zone_size, 0, sizeof(zone_size)); - /* 4GB maximum for 32-bit only capable devices */ + max_dma = max_dma32 = min; +#ifdef CONFIG_ZONE_DMA + max_dma = max_dma32 = PFN_DOWN(arm64_dma_phys_limit); + zone_size[ZONE_DMA] = max_dma - min; +#endif #ifdef CONFIG_ZONE_DMA32 - max_dma = PFN_DOWN(arm64_dma_phys_limit); - zone_size[ZONE_DMA32] = max_dma - min; + max_dma32 = PFN_DOWN(arm64_dma32_phys_limit); + zone_size[ZONE_DMA32] = max_dma32 - max_dma; #endif - zone_size[ZONE_NORMAL] = max - max_dma; + zone_size[ZONE_NORMAL] = max - max_dma32; memcpy(zhole_size, zone_size, sizeof(zhole_size)); @@ -211,19 +235,23 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) unsigned long start = memblock_region_memory_base_pfn(reg); unsigned long end = memblock_region_memory_end_pfn(reg); - if (start >= max) - continue; - -#ifdef CONFIG_ZONE_DMA32 - if (start < max_dma) { +#ifdef CONFIG_ZONE_DMA + if (start >= min && start < max_dma) { unsigned long dma_end = min(end, max_dma); - zhole_size[ZONE_DMA32] -= dma_end - start; + zhole_size[ZONE_DMA] -= dma_end - start; + start = dma_end; } #endif - if (end > max_dma) { +#ifdef CONFIG_ZONE_DMA32 + if (start >= max_dma && start < max_dma32) { + unsigned long dma32_end = min(end, max_dma32); + zhole_size[ZONE_DMA32] -= dma32_end - start; + start = dma32_end; + } +#endif + if (start >= max_dma32 && start < max) { unsigned long normal_end = min(end, max); - unsigned long normal_start = max(start, max_dma); - zhole_size[ZONE_NORMAL] -= normal_end - normal_start; + zhole_size[ZONE_NORMAL] -= normal_end - start; } } @@ -301,7 +329,7 @@ static void __init fdt_enforce_memory_region(void) void __init arm64_memblock_init(void) { - const s64 linear_region_size = -(s64)PAGE_OFFSET; + const s64 linear_region_size = BIT(vabits_actual - 1); /* Handle linux,usable-memory-range property */ fdt_enforce_memory_region(); @@ -310,18 +338,25 @@ void __init arm64_memblock_init(void) memblock_remove(1ULL << PHYS_MASK_SHIFT, ULLONG_MAX); /* - * Ensure that the linear region takes up exactly half of the kernel - * virtual address space. This way, we can distinguish a linear address - * from a kernel/module/vmalloc address by testing a single bit. - */ - BUILD_BUG_ON(linear_region_size != BIT(VA_BITS - 1)); - - /* * Select a suitable value for the base of physical memory. */ memstart_addr = round_down(memblock_start_of_DRAM(), ARM64_MEMSTART_ALIGN); + physvirt_offset = PHYS_OFFSET - PAGE_OFFSET; + + vmemmap = ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT)); + + /* + * If we are running with a 52-bit kernel VA config on a system that + * does not support it, we have to offset our vmemmap and physvirt_offset + * s.t. we avoid the 52-bit portion of the direct linear map + */ + if (IS_ENABLED(CONFIG_ARM64_VA_BITS_52) && (vabits_actual != 52)) { + vmemmap += (_PAGE_OFFSET(48) - _PAGE_OFFSET(52)) >> PAGE_SHIFT; + physvirt_offset = PHYS_OFFSET - _PAGE_OFFSET(48); + } + /* * Remove the memory that we will not be able to cover with the * linear mapping. Take care not to clip the kernel which may be @@ -405,11 +440,15 @@ void __init arm64_memblock_init(void) early_init_fdt_scan_reserved_mem(); - /* 4GB maximum for 32-bit only capable devices */ + if (IS_ENABLED(CONFIG_ZONE_DMA)) { + zone_dma_bits = ARM64_ZONE_DMA_BITS; + arm64_dma_phys_limit = max_zone_phys(ARM64_ZONE_DMA_BITS); + } + if (IS_ENABLED(CONFIG_ZONE_DMA32)) - arm64_dma_phys_limit = max_zone_dma_phys(); + arm64_dma32_phys_limit = max_zone_phys(32); else - arm64_dma_phys_limit = PHYS_MASK + 1; + arm64_dma32_phys_limit = PHYS_MASK + 1; reserve_crashkernel(); @@ -417,7 +456,7 @@ void __init arm64_memblock_init(void) high_memory = __va(memblock_end_of_DRAM() - 1) + 1; - dma_contiguous_reserve(arm64_dma_phys_limit); + dma_contiguous_reserve(arm64_dma32_phys_limit); } void __init bootmem_init(void) @@ -521,7 +560,7 @@ static void __init free_unused_memmap(void) void __init mem_init(void) { if (swiotlb_force == SWIOTLB_FORCE || - max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) + max_pfn > PFN_DOWN(arm64_dma_phys_limit ? : arm64_dma32_phys_limit)) swiotlb_init(1); else swiotlb_force = SWIOTLB_NO_FORCE; @@ -558,7 +597,7 @@ void free_initmem(void) { free_reserved_area(lm_alias(__init_begin), lm_alias(__init_end), - 0, "unused kernel"); + POISON_FREE_INITMEM, "unused kernel"); /* * Unmap the __init region but leave the VM area in place. This * prevents the region from being reused for kernel modules, which @@ -567,14 +606,6 @@ void free_initmem(void) unmap_kernel_range((u64)__init_begin, (u64)(__init_end - __init_begin)); } -#ifdef CONFIG_BLK_DEV_INITRD -void __init free_initrd_mem(unsigned long start, unsigned long end) -{ - free_reserved_area((void *)start, (void *)end, 0, "initrd"); - memblock_free(__virt_to_phys(start), end - start); -} -#endif - /* * Dump out memory limit information on panic. */ diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c index fdb595a5d65f..9be71bee902c 100644 --- a/arch/arm64/mm/ioremap.c +++ b/arch/arm64/mm/ioremap.c @@ -69,7 +69,7 @@ void __iomem *__ioremap(phys_addr_t phys_addr, size_t size, pgprot_t prot) } EXPORT_SYMBOL(__ioremap); -void __iounmap(volatile void __iomem *io_addr) +void iounmap(volatile void __iomem *io_addr) { unsigned long addr = (unsigned long)io_addr & PAGE_MASK; @@ -80,7 +80,7 @@ void __iounmap(volatile void __iomem *io_addr) if (is_vmalloc_addr((void *)addr)) vunmap((void *)addr); } -EXPORT_SYMBOL(__iounmap); +EXPORT_SYMBOL(iounmap); void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size) { diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index 6cf97b904ebb..f87a32484ea8 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -156,7 +156,8 @@ asmlinkage void __init kasan_early_init(void) { BUILD_BUG_ON(KASAN_SHADOW_OFFSET != KASAN_SHADOW_END - (1UL << (64 - KASAN_SHADOW_SCALE_SHIFT))); - BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE)); + BUILD_BUG_ON(!IS_ALIGNED(_KASAN_SHADOW_START(VA_BITS), PGDIR_SIZE)); + BUILD_BUG_ON(!IS_ALIGNED(_KASAN_SHADOW_START(VA_BITS_MIN), PGDIR_SIZE)); BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE)); kasan_pgd_populate(KASAN_SHADOW_START, KASAN_SHADOW_END, NUMA_NO_NODE, true); @@ -225,10 +226,10 @@ void __init kasan_init(void) kasan_map_populate(kimg_shadow_start, kimg_shadow_end, early_pfn_to_nid(virt_to_pfn(lm_alias(_text)))); - kasan_populate_early_shadow((void *)KASAN_SHADOW_START, - (void *)mod_shadow_start); + kasan_populate_early_shadow(kasan_mem_to_shadow((void *)PAGE_END), + (void *)mod_shadow_start); kasan_populate_early_shadow((void *)kimg_shadow_end, - kasan_mem_to_shadow((void *)PAGE_OFFSET)); + (void *)KASAN_SHADOW_END); if (kimg_shadow_start > mod_shadow_end) kasan_populate_early_shadow((void *)mod_shadow_end, diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c index b050641b5139..3028bacbc4e9 100644 --- a/arch/arm64/mm/mmap.c +++ b/arch/arm64/mm/mmap.c @@ -21,78 +21,6 @@ #include <asm/cputype.h> /* - * Leave enough space between the mmap area and the stack to honour ulimit in - * the face of randomisation. - */ -#define MIN_GAP (SZ_128M) -#define MAX_GAP (STACK_TOP/6*5) - -static int mmap_is_legacy(struct rlimit *rlim_stack) -{ - if (current->personality & ADDR_COMPAT_LAYOUT) - return 1; - - if (rlim_stack->rlim_cur == RLIM_INFINITY) - return 1; - - return sysctl_legacy_va_layout; -} - -unsigned long arch_mmap_rnd(void) -{ - unsigned long rnd; - -#ifdef CONFIG_COMPAT - if (test_thread_flag(TIF_32BIT)) - rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1); - else -#endif - rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1); - return rnd << PAGE_SHIFT; -} - -static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack) -{ - unsigned long gap = rlim_stack->rlim_cur; - unsigned long pad = (STACK_RND_MASK << PAGE_SHIFT) + stack_guard_gap; - - /* Values close to RLIM_INFINITY can overflow. */ - if (gap + pad > gap) - gap += pad; - - if (gap < MIN_GAP) - gap = MIN_GAP; - else if (gap > MAX_GAP) - gap = MAX_GAP; - - return PAGE_ALIGN(STACK_TOP - gap - rnd); -} - -/* - * This function, called very early during the creation of a new process VM - * image, sets up which VM layout function to use: - */ -void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) -{ - unsigned long random_factor = 0UL; - - if (current->flags & PF_RANDOMIZE) - random_factor = arch_mmap_rnd(); - - /* - * Fall back to the standard layout if the personality bit is set, or - * if the expected stack growth is unlimited: - */ - if (mmap_is_legacy(rlim_stack)) { - mm->mmap_base = TASK_UNMAPPED_BASE + random_factor; - mm->get_unmapped_area = arch_get_unmapped_area; - } else { - mm->mmap_base = mmap_base(random_factor, rlim_stack); - mm->get_unmapped_area = arch_get_unmapped_area_topdown; - } -} - -/* * You really shouldn't be using read() or write() on /dev/mem. This might go * away in the future. */ diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 750a69dde39b..128f70852bf3 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -40,8 +40,9 @@ u64 idmap_t0sz = TCR_T0SZ(VA_BITS); u64 idmap_ptrs_per_pgd = PTRS_PER_PGD; -u64 vabits_user __ro_after_init; -EXPORT_SYMBOL(vabits_user); + +u64 __section(".mmuoff.data.write") vabits_actual; +EXPORT_SYMBOL(vabits_actual); u64 kimage_voffset __ro_after_init; EXPORT_SYMBOL(kimage_voffset); @@ -337,7 +338,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, phys_addr_t (*pgtable_alloc)(int), int flags) { - unsigned long addr, length, end, next; + unsigned long addr, end, next; pgd_t *pgdp = pgd_offset_raw(pgdir, virt); /* @@ -349,9 +350,8 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, phys &= PAGE_MASK; addr = virt & PAGE_MASK; - length = PAGE_ALIGN(size + (virt & ~PAGE_MASK)); + end = PAGE_ALIGN(virt + size); - end = addr + length; do { next = pgd_addr_end(addr, end); alloc_init_pud(pgdp, addr, next, phys, prot, pgtable_alloc, @@ -383,7 +383,7 @@ static phys_addr_t pgd_pgtable_alloc(int shift) * folded, and if so pgtable_pmd_page_ctor() becomes nop. */ if (shift == PAGE_SHIFT) - BUG_ON(!pgtable_page_ctor(phys_to_page(pa))); + BUG_ON(!pgtable_pte_page_ctor(phys_to_page(pa))); else if (shift == PMD_SHIFT) BUG_ON(!pgtable_pmd_page_ctor(phys_to_page(pa))); @@ -398,7 +398,7 @@ static phys_addr_t pgd_pgtable_alloc(int shift) static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot) { - if (virt < VMALLOC_START) { + if ((virt >= PAGE_END) && (virt < VMALLOC_START)) { pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", &phys, virt); return; @@ -425,7 +425,7 @@ void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, static void update_mapping_prot(phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot) { - if (virt < VMALLOC_START) { + if ((virt >= PAGE_END) && (virt < VMALLOC_START)) { pr_warn("BUG: not updating mapping for %pa at 0x%016lx - outside kernel range\n", &phys, virt); return; @@ -646,6 +646,8 @@ static void __init map_kernel(pgd_t *pgdp) set_pgd(pgd_offset_raw(pgdp, FIXADDR_START), READ_ONCE(*pgd_offset_k(FIXADDR_START))); } else if (CONFIG_PGTABLE_LEVELS > 3) { + pgd_t *bm_pgdp; + pud_t *bm_pudp; /* * The fixmap shares its top level pgd entry with the kernel * mapping. This can really only occur when we are running @@ -653,9 +655,9 @@ static void __init map_kernel(pgd_t *pgdp) * entry instead. */ BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES)); - pud_populate(&init_mm, - pud_set_fixmap_offset(pgdp, FIXADDR_START), - lm_alias(bm_pmd)); + bm_pgdp = pgd_offset_raw(pgdp, FIXADDR_START); + bm_pudp = pud_set_fixmap_offset(bm_pgdp, FIXADDR_START); + pud_populate(&init_mm, bm_pudp, lm_alias(bm_pmd)); pud_clear_fixmap(); } else { BUG(); @@ -876,7 +878,7 @@ void __set_fixmap(enum fixed_addresses idx, } } -void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot) +void *__init fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot) { const u64 dt_virt_base = __fix_to_virt(FIX_FDT); int offset; @@ -929,19 +931,6 @@ void *__init __fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot) return dt_virt; } -void *__init fixmap_remap_fdt(phys_addr_t dt_phys) -{ - void *dt_virt; - int size; - - dt_virt = __fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL_RO); - if (!dt_virt) - return NULL; - - memblock_reserve(dt_phys, size); - return dt_virt; -} - int __init arch_ioremap_p4d_supported(void) { return 0; @@ -954,13 +943,13 @@ int __init arch_ioremap_pud_supported(void) * SW table walks can't handle removal of intermediate entries. */ return IS_ENABLED(CONFIG_ARM64_4K_PAGES) && - !IS_ENABLED(CONFIG_ARM64_PTDUMP_DEBUGFS); + !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS); } int __init arch_ioremap_pmd_supported(void) { /* See arch_ioremap_pud_supported() */ - return !IS_ENABLED(CONFIG_ARM64_PTDUMP_DEBUGFS); + return !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS); } int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot) @@ -1071,6 +1060,8 @@ int arch_add_memory(int nid, u64 start, u64 size, __create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start), size, PAGE_KERNEL, __pgd_pgtable_alloc, flags); + memblock_clear_nomap(start, size); + return __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT, restrictions); } @@ -1079,7 +1070,6 @@ void arch_remove_memory(int nid, u64 start, u64 size, { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - struct zone *zone; /* * FIXME: Cleanup page tables (also in arch_add_memory() in case @@ -1088,7 +1078,6 @@ void arch_remove_memory(int nid, u64 start, u64 size, * unplug. ARCH_ENABLE_MEMORY_HOTREMOVE must not be * unlocked yet. */ - zone = page_zone(pfn_to_page(start_pfn)); - __remove_pages(zone, start_pfn, nr_pages, altmap); + __remove_pages(start_pfn, nr_pages, altmap); } #endif diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 4f241cc7cc3b..4decf1659700 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -29,7 +29,7 @@ static __init int numa_parse_early_param(char *opt) { if (!opt) return -EINVAL; - if (!strncmp(opt, "off", 3)) + if (str_has_prefix(opt, "off")) numa_off = true; return 0; diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 03c53f16ee77..250c49008d73 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -54,7 +54,7 @@ static int change_memory_common(unsigned long addr, int numpages, pgprot_t set_mask, pgprot_t clear_mask) { unsigned long start = addr; - unsigned long size = PAGE_SIZE*numpages; + unsigned long size = PAGE_SIZE * numpages; unsigned long end = start + size; struct vm_struct *area; int i; @@ -128,7 +128,6 @@ int set_memory_nx(unsigned long addr, int numpages) __pgprot(PTE_PXN), __pgprot(0)); } -EXPORT_SYMBOL_GPL(set_memory_nx); int set_memory_x(unsigned long addr, int numpages) { @@ -136,7 +135,6 @@ int set_memory_x(unsigned long addr, int numpages) __pgprot(0), __pgprot(PTE_PXN)); } -EXPORT_SYMBOL_GPL(set_memory_x); int set_memory_valid(unsigned long addr, int numpages, int enable) { diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c index 7548f9ca1f11..4a64089e5771 100644 --- a/arch/arm64/mm/pgd.c +++ b/arch/arm64/mm/pgd.c @@ -35,7 +35,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd) kmem_cache_free(pgd_cache, pgd); } -void __init pgd_cache_init(void) +void __init pgtable_cache_init(void) { if (PGD_SIZE == PAGE_SIZE) return; diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 7dbf2be470f6..aafed6902411 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -42,7 +42,14 @@ #define TCR_KASAN_FLAGS 0 #endif -#define MAIR(attr, mt) ((attr) << ((mt) * 8)) +/* Default MAIR_EL1 */ +#define MAIR_EL1_SET \ + (MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRnE, MT_DEVICE_nGnRnE) | \ + MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRE, MT_DEVICE_nGnRE) | \ + MAIR_ATTRIDX(MAIR_ATTR_DEVICE_GRE, MT_DEVICE_GRE) | \ + MAIR_ATTRIDX(MAIR_ATTR_NORMAL_NC, MT_NORMAL_NC) | \ + MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) | \ + MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT)) #ifdef CONFIG_CPU_PM /** @@ -50,7 +57,7 @@ * * x0: virtual address of context pointer */ -ENTRY(cpu_do_suspend) +SYM_FUNC_START(cpu_do_suspend) mrs x2, tpidr_el0 mrs x3, tpidrro_el0 mrs x4, contextidr_el1 @@ -74,7 +81,7 @@ alternative_endif stp x10, x11, [x0, #64] stp x12, x13, [x0, #80] ret -ENDPROC(cpu_do_suspend) +SYM_FUNC_END(cpu_do_suspend) /** * cpu_do_resume - restore CPU register context @@ -82,7 +89,7 @@ ENDPROC(cpu_do_suspend) * x0: Address of context pointer */ .pushsection ".idmap.text", "awx" -ENTRY(cpu_do_resume) +SYM_FUNC_START(cpu_do_resume) ldp x2, x3, [x0] ldp x4, x5, [x0, #16] ldp x6, x8, [x0, #32] @@ -131,7 +138,7 @@ alternative_else_nop_endif isb ret -ENDPROC(cpu_do_resume) +SYM_FUNC_END(cpu_do_resume) .popsection #endif @@ -142,7 +149,7 @@ ENDPROC(cpu_do_resume) * * - pgd_phys - physical address of new TTB */ -ENTRY(cpu_do_switch_mm) +SYM_FUNC_START(cpu_do_switch_mm) mrs x2, ttbr1_el1 mmid x1, x1 // get mm->context.id phys_to_ttbr x3, x0 @@ -161,14 +168,14 @@ alternative_else_nop_endif msr ttbr0_el1, x3 // now update TTBR0 isb b post_ttbr_update_workaround // Back to C code... -ENDPROC(cpu_do_switch_mm) +SYM_FUNC_END(cpu_do_switch_mm) .pushsection ".idmap.text", "awx" .macro __idmap_cpu_set_reserved_ttbr1, tmp1, tmp2 adrp \tmp1, empty_zero_page phys_to_ttbr \tmp2, \tmp1 - offset_ttbr1 \tmp2 + offset_ttbr1 \tmp2, \tmp1 msr ttbr1_el1, \tmp2 isb tlbi vmalle1 @@ -182,19 +189,19 @@ ENDPROC(cpu_do_switch_mm) * This is the low-level counterpart to cpu_replace_ttbr1, and should not be * called by anything else. It can only be executed from a TTBR0 mapping. */ -ENTRY(idmap_cpu_replace_ttbr1) +SYM_FUNC_START(idmap_cpu_replace_ttbr1) save_and_disable_daif flags=x2 __idmap_cpu_set_reserved_ttbr1 x1, x3 - offset_ttbr1 x0 + offset_ttbr1 x0, x3 msr ttbr1_el1, x0 isb restore_daif x2 ret -ENDPROC(idmap_cpu_replace_ttbr1) +SYM_FUNC_END(idmap_cpu_replace_ttbr1) .popsection #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 @@ -222,7 +229,7 @@ ENDPROC(idmap_cpu_replace_ttbr1) */ __idmap_kpti_flag: .long 1 -ENTRY(idmap_kpti_install_ng_mappings) +SYM_FUNC_START(idmap_kpti_install_ng_mappings) cpu .req w0 num_cpus .req w1 swapper_pa .req x2 @@ -250,15 +257,15 @@ ENTRY(idmap_kpti_install_ng_mappings) /* We're the boot CPU. Wait for the others to catch up */ sevl 1: wfe - ldaxr w18, [flag_ptr] - eor w18, w18, num_cpus - cbnz w18, 1b + ldaxr w17, [flag_ptr] + eor w17, w17, num_cpus + cbnz w17, 1b /* We need to walk swapper, so turn off the MMU. */ pre_disable_mmu_workaround - mrs x18, sctlr_el1 - bic x18, x18, #SCTLR_ELx_M - msr sctlr_el1, x18 + mrs x17, sctlr_el1 + bic x17, x17, #SCTLR_ELx_M + msr sctlr_el1, x17 isb /* Everybody is enjoying the idmap, so we can rewrite swapper. */ @@ -281,9 +288,18 @@ skip_pgd: isb /* We're done: fire up the MMU again */ - mrs x18, sctlr_el1 - orr x18, x18, #SCTLR_ELx_M - msr sctlr_el1, x18 + mrs x17, sctlr_el1 + orr x17, x17, #SCTLR_ELx_M + msr sctlr_el1, x17 + isb + + /* + * Invalidate the local I-cache so that any instructions fetched + * speculatively from the PoC are discarded, since they may have + * been dynamically patched at the PoU. + */ + ic iallu + dsb nsh isb /* Set the flag to zero to indicate that we're all done */ @@ -344,47 +360,48 @@ skip_pte: b.ne do_pte b next_pmd + .unreq cpu + .unreq num_cpus + .unreq swapper_pa + .unreq cur_pgdp + .unreq end_pgdp + .unreq pgd + .unreq cur_pudp + .unreq end_pudp + .unreq pud + .unreq cur_pmdp + .unreq end_pmdp + .unreq pmd + .unreq cur_ptep + .unreq end_ptep + .unreq pte + /* Secondary CPUs end up here */ __idmap_kpti_secondary: /* Uninstall swapper before surgery begins */ - __idmap_cpu_set_reserved_ttbr1 x18, x17 + __idmap_cpu_set_reserved_ttbr1 x16, x17 /* Increment the flag to let the boot CPU we're ready */ -1: ldxr w18, [flag_ptr] - add w18, w18, #1 - stxr w17, w18, [flag_ptr] +1: ldxr w16, [flag_ptr] + add w16, w16, #1 + stxr w17, w16, [flag_ptr] cbnz w17, 1b /* Wait for the boot CPU to finish messing around with swapper */ sevl 1: wfe - ldxr w18, [flag_ptr] - cbnz w18, 1b + ldxr w16, [flag_ptr] + cbnz w16, 1b /* All done, act like nothing happened */ - offset_ttbr1 swapper_ttb + offset_ttbr1 swapper_ttb, x16 msr ttbr1_el1, swapper_ttb isb ret - .unreq cpu - .unreq num_cpus - .unreq swapper_pa .unreq swapper_ttb .unreq flag_ptr - .unreq cur_pgdp - .unreq end_pgdp - .unreq pgd - .unreq cur_pudp - .unreq end_pudp - .unreq pud - .unreq cur_pmdp - .unreq end_pmdp - .unreq pmd - .unreq cur_ptep - .unreq end_ptep - .unreq pte -ENDPROC(idmap_kpti_install_ng_mappings) +SYM_FUNC_END(idmap_kpti_install_ng_mappings) .popsection #endif @@ -395,7 +412,7 @@ ENDPROC(idmap_kpti_install_ng_mappings) * value of the SCTLR_EL1 register. */ .pushsection ".idmap.text", "awx" -ENTRY(__cpu_setup) +SYM_FUNC_START(__cpu_setup) tlbi vmalle1 // Invalidate local TLB dsb nsh @@ -407,23 +424,9 @@ ENTRY(__cpu_setup) enable_dbg // since this is per-cpu reset_pmuserenr_el0 x0 // Disable PMU access from EL0 /* - * Memory region attributes for LPAE: - * - * n = AttrIndx[2:0] - * n MAIR - * DEVICE_nGnRnE 000 00000000 - * DEVICE_nGnRE 001 00000100 - * DEVICE_GRE 010 00001100 - * NORMAL_NC 011 01000100 - * NORMAL 100 11111111 - * NORMAL_WT 101 10111011 + * Memory region attributes */ - ldr x5, =MAIR(0x00, MT_DEVICE_nGnRnE) | \ - MAIR(0x04, MT_DEVICE_nGnRE) | \ - MAIR(0x0c, MT_DEVICE_GRE) | \ - MAIR(0x44, MT_NORMAL_NC) | \ - MAIR(0xff, MT_NORMAL) | \ - MAIR(0xbb, MT_NORMAL_WT) + mov_q x5, MAIR_EL1_SET msr mair_el1, x5 /* * Prepare SCTLR @@ -438,10 +441,11 @@ ENTRY(__cpu_setup) TCR_TBI0 | TCR_A1 | TCR_KASAN_FLAGS tcr_clear_errata_bits x10, x9, x5 -#ifdef CONFIG_ARM64_USER_VA_BITS_52 - ldr_l x9, vabits_user +#ifdef CONFIG_ARM64_VA_BITS_52 + ldr_l x9, vabits_actual sub x9, xzr, x9 add x9, x9, #64 + tcr_set_t1sz x10, x9 #else ldr_l x9, idmap_t0sz #endif @@ -465,4 +469,4 @@ ENTRY(__cpu_setup) #endif /* CONFIG_ARM64_HW_AFDBM */ msr tcr_el1, x10 ret // return to head.S -ENDPROC(__cpu_setup) +SYM_FUNC_END(__cpu_setup) diff --git a/arch/arm64/mm/ptdump_debugfs.c b/arch/arm64/mm/ptdump_debugfs.c index 064163f25592..1f2eae3e988b 100644 --- a/arch/arm64/mm/ptdump_debugfs.c +++ b/arch/arm64/mm/ptdump_debugfs.c @@ -7,7 +7,7 @@ static int ptdump_show(struct seq_file *m, void *v) { struct ptdump_info *info = m->private; - ptdump_walk_pgd(m, info); + ptdump_walk(m, info); return 0; } DEFINE_SHOW_ATTRIBUTE(ptdump); |