From 6083fe74b7bfffc2c7be8c711596608bda0cda6e Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 15 Jan 2015 16:42:14 +0000 Subject: arm64: respect mem= for EFI When booting with EFI, we acquire the EFI memory map after parsing the early params. This unfortuantely renders the option useless as we call memblock_enforce_memory_limit (which uses memblock_remove_range behind the scenes) before we've added any memblocks. We end up removing nothing, then adding all of memory later when efi_init calls reserve_regions. Instead, we can log the limit and apply this later when we do the rest of the memblock work in memblock_init, which should work regardless of the presence of EFI. At the same time we may as well move the early parameter into arm64's mm/init.c, close to arm64_memblock_init. Any memory which must be mapped (e.g. for use by EFI runtime services) must be mapped explicitly reather than relying on the linear mapping, which may be truncated as a result of a mem= option passed on the kernel command line. Signed-off-by: Mark Rutland Acked-by: Catalin Marinas Acked-by: Ard Biesheuvel Tested-by: Ard Biesheuvel Cc: Leif Lindholm Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/mm/init.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'arch/arm64/mm/init.c') diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index bac492c12fcc..11c7b701b681 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -136,10 +136,29 @@ static void arm64_memory_present(void) } #endif +static phys_addr_t memory_limit = (phys_addr_t)ULLONG_MAX; + +/* + * Limit the memory size that was specified via FDT. + */ +static int __init early_mem(char *p) +{ + if (!p) + return 1; + + memory_limit = memparse(p, &p) & PAGE_MASK; + pr_notice("Memory limited to %lldMB\n", memory_limit >> 20); + + return 0; +} +early_param("mem", early_mem); + void __init arm64_memblock_init(void) { phys_addr_t dma_phys_limit = 0; + memblock_enforce_memory_limit(memory_limit); + /* * Register the kernel text, kernel data, initrd, and initial * pagetables with memblock. -- cgit v1.2.3 From da141706aea52c1a9fbd28cb8d289b78819f5436 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Wed, 21 Jan 2015 17:36:06 -0800 Subject: arm64: add better page protections to arm64 Add page protections for arm64 similar to those in arm. This is for security reasons to prevent certain classes of exploits. The current method: - Map all memory as either RWX or RW. We round to the nearest section to avoid creating page tables before everything is mapped - Once everything is mapped, if either end of the RWX section should not be X, we split the PMD and remap as necessary - When initmem is to be freed, we change the permissions back to RW (using stop machine if necessary to flush the TLB) - If CONFIG_DEBUG_RODATA is set, the read only sections are set read only. Acked-by: Ard Biesheuvel Tested-by: Kees Cook Tested-by: Ard Biesheuvel Signed-off-by: Laura Abbott Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig.debug | 23 ++++ arch/arm64/include/asm/cacheflush.h | 5 + arch/arm64/kernel/vmlinux.lds.S | 17 ++- arch/arm64/mm/init.c | 1 + arch/arm64/mm/mm.h | 2 + arch/arm64/mm/mmu.c | 211 ++++++++++++++++++++++++++++++++---- 6 files changed, 233 insertions(+), 26 deletions(-) (limited to 'arch/arm64/mm/init.c') diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index 5fdd6dce8061..4a8741073c90 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -66,4 +66,27 @@ config DEBUG_SET_MODULE_RONX against certain classes of kernel exploits. If in doubt, say "N". +config DEBUG_RODATA + bool "Make kernel text and rodata read-only" + help + If this is set, kernel text and rodata will be made read-only. This + is to help catch accidental or malicious attempts to change the + kernel's executable code. Additionally splits rodata from kernel + text so it can be made explicitly non-executable. + + If in doubt, say Y + +config DEBUG_ALIGN_RODATA + depends on DEBUG_RODATA && !ARM64_64K_PAGES + bool "Align linker sections up to SECTION_SIZE" + help + If this option is enabled, sections that may potentially be marked as + read only or non-executable will be aligned up to the section size of + the kernel. This prevents sections from being split into pages and + avoids a potential TLB penalty. The downside is an increase in + alignment and potentially wasted space. Turn on this option if + performance is more important than memory pressure. + + If in doubt, say N + endmenu diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 7ae31a2cc6c0..67d309cc3b6b 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -152,4 +152,9 @@ int set_memory_ro(unsigned long addr, int numpages); int set_memory_rw(unsigned long addr, int numpages); int set_memory_x(unsigned long addr, int numpages); int set_memory_nx(unsigned long addr, int numpages); + +#ifdef CONFIG_DEBUG_RODATA +void mark_rodata_ro(void); +#endif + #endif diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 9965ec87cbec..5d9d2dca530d 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -8,6 +8,7 @@ #include #include #include +#include #include "image.h" @@ -49,6 +50,14 @@ PECOFF_FILE_ALIGNMENT = 0x200; #define PECOFF_EDATA_PADDING #endif +#ifdef CONFIG_DEBUG_ALIGN_RODATA +#define ALIGN_DEBUG_RO . = ALIGN(1< #include #include +#include #include #include @@ -59,21 +60,43 @@ EXPORT_SYMBOL(phys_mem_access_prot); static void __init *early_alloc(unsigned long sz) { void *ptr = __va(memblock_alloc(sz, sz)); + BUG_ON(!ptr); memset(ptr, 0, sz); return ptr; } -static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr, +/* + * remap a PMD into pages + */ +static void split_pmd(pmd_t *pmd, pte_t *pte) +{ + unsigned long pfn = pmd_pfn(*pmd); + int i = 0; + + do { + /* + * Need to have the least restrictive permissions available + * permissions will be fixed up later + */ + set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); + pfn++; + } while (pte++, i++, i < PTRS_PER_PTE); +} + +static void alloc_init_pte(pmd_t *pmd, unsigned long addr, unsigned long end, unsigned long pfn, - pgprot_t prot) + pgprot_t prot, + void *(*alloc)(unsigned long size)) { pte_t *pte; - if (pmd_none(*pmd)) { - pte = early_alloc(PTRS_PER_PTE * sizeof(pte_t)); + if (pmd_none(*pmd) || pmd_bad(*pmd)) { + pte = alloc(PTRS_PER_PTE * sizeof(pte_t)); + if (pmd_sect(*pmd)) + split_pmd(pmd, pte); __pmd_populate(pmd, __pa(pte), PMD_TYPE_TABLE); + flush_tlb_all(); } - BUG_ON(pmd_bad(*pmd)); pte = pte_offset_kernel(pmd, addr); do { @@ -82,9 +105,22 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr, } while (pte++, addr += PAGE_SIZE, addr != end); } -static void __init alloc_init_pmd(struct mm_struct *mm, pud_t *pud, +void split_pud(pud_t *old_pud, pmd_t *pmd) +{ + unsigned long addr = pud_pfn(*old_pud) << PAGE_SHIFT; + pgprot_t prot = __pgprot(pud_val(*old_pud) ^ addr); + int i = 0; + + do { + set_pmd(pmd, __pmd(addr | prot)); + addr += PMD_SIZE; + } while (pmd++, i++, i < PTRS_PER_PMD); +} + +static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, unsigned long addr, unsigned long end, - phys_addr_t phys, pgprot_t prot) + phys_addr_t phys, pgprot_t prot, + void *(*alloc)(unsigned long size)) { pmd_t *pmd; unsigned long next; @@ -93,8 +129,16 @@ static void __init alloc_init_pmd(struct mm_struct *mm, pud_t *pud, * Check for initial section mappings in the pgd/pud and remove them. */ if (pud_none(*pud) || pud_bad(*pud)) { - pmd = early_alloc(PTRS_PER_PMD * sizeof(pmd_t)); + pmd = alloc(PTRS_PER_PMD * sizeof(pmd_t)); + if (pud_sect(*pud)) { + /* + * need to have the 1G of mappings continue to be + * present + */ + split_pud(pud, pmd); + } pud_populate(mm, pud, pmd); + flush_tlb_all(); } pmd = pmd_offset(pud, addr); @@ -113,21 +157,34 @@ static void __init alloc_init_pmd(struct mm_struct *mm, pud_t *pud, flush_tlb_all(); } else { alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys), - prot); + prot, alloc); } phys += next - addr; } while (pmd++, addr = next, addr != end); } -static void __init alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, +static inline bool use_1G_block(unsigned long addr, unsigned long next, + unsigned long phys) +{ + if (PAGE_SHIFT != 12) + return false; + + if (((addr | next | phys) & ~PUD_MASK) != 0) + return false; + + return true; +} + +static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, unsigned long addr, unsigned long end, - phys_addr_t phys, pgprot_t prot) + phys_addr_t phys, pgprot_t prot, + void *(*alloc)(unsigned long size)) { pud_t *pud; unsigned long next; if (pgd_none(*pgd)) { - pud = early_alloc(PTRS_PER_PUD * sizeof(pud_t)); + pud = alloc(PTRS_PER_PUD * sizeof(pud_t)); pgd_populate(mm, pgd, pud); } BUG_ON(pgd_bad(*pgd)); @@ -139,8 +196,7 @@ static void __init alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, /* * For 4K granule only, attempt to put down a 1GB block */ - if ((PAGE_SHIFT == 12) && - ((addr | next | phys) & ~PUD_MASK) == 0) { + if (use_1G_block(addr, next, phys)) { pud_t old_pud = *pud; set_pud(pud, __pud(phys | pgprot_val(mk_sect_prot(prot)))); @@ -158,7 +214,7 @@ static void __init alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, flush_tlb_all(); } } else { - alloc_init_pmd(mm, pud, addr, next, phys, prot); + alloc_init_pmd(mm, pud, addr, next, phys, prot, alloc); } phys += next - addr; } while (pud++, addr = next, addr != end); @@ -168,9 +224,10 @@ static void __init alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, * Create the page directory entries and any necessary page tables for the * mapping specified by 'md'. */ -static void __init __create_mapping(struct mm_struct *mm, pgd_t *pgd, +static void __create_mapping(struct mm_struct *mm, pgd_t *pgd, phys_addr_t phys, unsigned long virt, - phys_addr_t size, pgprot_t prot) + phys_addr_t size, pgprot_t prot, + void *(*alloc)(unsigned long size)) { unsigned long addr, length, end, next; @@ -180,13 +237,23 @@ static void __init __create_mapping(struct mm_struct *mm, pgd_t *pgd, end = addr + length; do { next = pgd_addr_end(addr, end); - alloc_init_pud(mm, pgd, addr, next, phys, prot); + alloc_init_pud(mm, pgd, addr, next, phys, prot, alloc); phys += next - addr; } while (pgd++, addr = next, addr != end); } -static void __init create_mapping(phys_addr_t phys, unsigned long virt, - phys_addr_t size) +static void *late_alloc(unsigned long size) +{ + void *ptr; + + BUG_ON(size > PAGE_SIZE); + ptr = (void *)__get_free_page(PGALLOC_GFP); + BUG_ON(!ptr); + return ptr; +} + +static void __ref create_mapping(phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot) { if (virt < VMALLOC_START) { pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", @@ -194,15 +261,71 @@ static void __init create_mapping(phys_addr_t phys, unsigned long virt, return; } __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), phys, virt, - size, PAGE_KERNEL_EXEC); + size, prot, early_alloc); } void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot) { - __create_mapping(mm, pgd_offset(mm, virt), phys, virt, size, prot); + __create_mapping(mm, pgd_offset(mm, virt), phys, virt, size, prot, + early_alloc); +} + +static void create_mapping_late(phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot) +{ + if (virt < VMALLOC_START) { + pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", + &phys, virt); + return; + } + + return __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), + phys, virt, size, prot, late_alloc); +} + +#ifdef CONFIG_DEBUG_RODATA +static void __init __map_memblock(phys_addr_t start, phys_addr_t end) +{ + /* + * Set up the executable regions using the existing section mappings + * for now. This will get more fine grained later once all memory + * is mapped + */ + unsigned long kernel_x_start = round_down(__pa(_stext), SECTION_SIZE); + unsigned long kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE); + + if (end < kernel_x_start) { + create_mapping(start, __phys_to_virt(start), + end - start, PAGE_KERNEL); + } else if (start >= kernel_x_end) { + create_mapping(start, __phys_to_virt(start), + end - start, PAGE_KERNEL); + } else { + if (start < kernel_x_start) + create_mapping(start, __phys_to_virt(start), + kernel_x_start - start, + PAGE_KERNEL); + create_mapping(kernel_x_start, + __phys_to_virt(kernel_x_start), + kernel_x_end - kernel_x_start, + PAGE_KERNEL_EXEC); + if (kernel_x_end < end) + create_mapping(kernel_x_end, + __phys_to_virt(kernel_x_end), + end - kernel_x_end, + PAGE_KERNEL); + } + +} +#else +static void __init __map_memblock(phys_addr_t start, phys_addr_t end) +{ + create_mapping(start, __phys_to_virt(start), end - start, + PAGE_KERNEL_EXEC); } +#endif static void __init map_mem(void) { @@ -248,14 +371,53 @@ static void __init map_mem(void) memblock_set_current_limit(limit); } #endif - - create_mapping(start, __phys_to_virt(start), end - start); + __map_memblock(start, end); } /* Limit no longer required. */ memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); } +void __init fixup_executable(void) +{ +#ifdef CONFIG_DEBUG_RODATA + /* now that we are actually fully mapped, make the start/end more fine grained */ + if (!IS_ALIGNED((unsigned long)_stext, SECTION_SIZE)) { + unsigned long aligned_start = round_down(__pa(_stext), + SECTION_SIZE); + + create_mapping(aligned_start, __phys_to_virt(aligned_start), + __pa(_stext) - aligned_start, + PAGE_KERNEL); + } + + if (!IS_ALIGNED((unsigned long)__init_end, SECTION_SIZE)) { + unsigned long aligned_end = round_up(__pa(__init_end), + SECTION_SIZE); + create_mapping(__pa(__init_end), (unsigned long)__init_end, + aligned_end - __pa(__init_end), + PAGE_KERNEL); + } +#endif +} + +#ifdef CONFIG_DEBUG_RODATA +void mark_rodata_ro(void) +{ + create_mapping_late(__pa(_stext), (unsigned long)_stext, + (unsigned long)_etext - (unsigned long)_stext, + PAGE_KERNEL_EXEC | PTE_RDONLY); + +} +#endif + +void fixup_init(void) +{ + create_mapping_late(__pa(__init_begin), (unsigned long)__init_begin, + (unsigned long)__init_end - (unsigned long)__init_begin, + PAGE_KERNEL); +} + /* * paging_init() sets up the page tables, initialises the zone memory * maps and sets up the zero page. @@ -265,6 +427,7 @@ void __init paging_init(void) void *zero_page; map_mem(); + fixup_executable(); /* * Finally flush the caches and tlb to ensure that we're in a -- cgit v1.2.3 From aa03c428e67881795f4190f9ffdb62fc14978608 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 22 Jan 2015 18:20:35 +0000 Subject: arm64: Fix overlapping VA allocations PCI IO space was intended to be 16MiB, at 32MiB below MODULES_VADDR, but commit d1e6dc91b532d3d3 ("arm64: Add architectural support for PCI") extended this to cover the full 32MiB. The final 8KiB of this 32MiB is also allocated for the fixmap, allowing for potential clashes between the two. This change was masked by assumptions in mem_init and the page table dumping code, which assumed the I/O space to be 16MiB long through seaparte hard-coded definitions. This patch changes the definition of the PCI I/O space allocation to live in asm/memory.h, along with the other VA space allocations. As the fixmap allocation depends on the number of fixmap entries, this is moved below the PCI I/O space allocation. Both the fixmap and PCI I/O space are guarded with 2MB of padding. Sites assuming the I/O space was 16MiB are moved over use new PCI_IO_{START,END} definitions, which will keep in sync with the size of the IO space (now restored to 16MiB). As a useful side effect, the use of the new PCI_IO_{START,END} definitions prevents a build issue in the dumping code due to a (now redundant) missing include of io.h for PCI_IOBASE. Signed-off-by: Mark Rutland Cc: Kees Cook Cc: Laura Abbott Cc: Liviu Dudau Cc: Steve Capper Cc: Will Deacon [catalin.marinas@arm.com: reorder FIXADDR and PCI_IO address_markers_idx enum] Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/io.h | 5 +++-- arch/arm64/include/asm/memory.h | 10 +++++++++- arch/arm64/mm/dump.c | 8 ++++---- arch/arm64/mm/init.c | 5 +++-- 4 files changed, 19 insertions(+), 9 deletions(-) (limited to 'arch/arm64/mm/init.c') diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 949c406d4df4..540f7c0aea82 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -26,6 +26,7 @@ #include #include +#include #include #include #include @@ -145,8 +146,8 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) * I/O port access primitives. */ #define arch_has_dev_port() (1) -#define IO_SPACE_LIMIT (SZ_32M - 1) -#define PCI_IOBASE ((void __iomem *)(MODULES_VADDR - SZ_32M)) +#define IO_SPACE_LIMIT (PCI_IO_SIZE - 1) +#define PCI_IOBASE ((void __iomem *)PCI_IO_START) /* * String version of I/O memory access operations. diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 6486b2bfd562..f800d45ea226 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -32,6 +32,12 @@ */ #define UL(x) _AC(x, UL) +/* + * Size of the PCI I/O space. This must remain a power of two so that + * IO_SPACE_LIMIT acts as a mask for the low bits of I/O addresses. + */ +#define PCI_IO_SIZE SZ_16M + /* * PAGE_OFFSET - the virtual address of the start of the kernel image (top * (VA_BITS - 1)) @@ -45,7 +51,9 @@ #define PAGE_OFFSET (UL(0xffffffffffffffff) << (VA_BITS - 1)) #define MODULES_END (PAGE_OFFSET) #define MODULES_VADDR (MODULES_END - SZ_64M) -#define FIXADDR_TOP (MODULES_VADDR - SZ_2M - PAGE_SIZE) +#define PCI_IO_END (MODULES_VADDR - SZ_2M) +#define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE) +#define FIXADDR_TOP (PCI_IO_START - SZ_2M) #define TASK_SIZE_64 (UL(1) << VA_BITS) #ifdef CONFIG_COMPAT diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index cf33f33333cc..77d39a1b6f8d 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -36,10 +36,10 @@ enum address_markers_idx { VMEMMAP_START_NR, VMEMMAP_END_NR, #endif - PCI_START_NR, - PCI_END_NR, FIXADDR_START_NR, FIXADDR_END_NR, + PCI_START_NR, + PCI_END_NR, MODULES_START_NR, MODUELS_END_NR, KERNEL_SPACE_NR, @@ -52,10 +52,10 @@ static struct addr_marker address_markers[] = { { 0, "vmemmap start" }, { 0, "vmemmap end" }, #endif - { (unsigned long) PCI_IOBASE, "PCI I/O start" }, - { (unsigned long) PCI_IOBASE + SZ_16M, "PCI I/O end" }, { FIXADDR_START, "Fixmap start" }, { FIXADDR_TOP, "Fixmap end" }, + { PCI_IO_START, "PCI I/O start" }, + { PCI_IO_END, "PCI I/O end" }, { MODULES_VADDR, "Modules start" }, { MODULES_END, "Modules end" }, { PAGE_OFFSET, "Kernel Mapping" }, diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 43cccb5101c0..a8dfb40eefa8 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -35,6 +35,7 @@ #include #include +#include #include #include #include @@ -296,8 +297,8 @@ void __init mem_init(void) " vmemmap : 0x%16lx - 0x%16lx (%6ld GB maximum)\n" " 0x%16lx - 0x%16lx (%6ld MB actual)\n" #endif - " PCI I/O : 0x%16lx - 0x%16lx (%6ld MB)\n" " fixed : 0x%16lx - 0x%16lx (%6ld KB)\n" + " PCI I/O : 0x%16lx - 0x%16lx (%6ld MB)\n" " modules : 0x%16lx - 0x%16lx (%6ld MB)\n" " memory : 0x%16lx - 0x%16lx (%6ld MB)\n" " .init : 0x%p" " - 0x%p" " (%6ld KB)\n" @@ -310,8 +311,8 @@ void __init mem_init(void) MLM((unsigned long)virt_to_page(PAGE_OFFSET), (unsigned long)virt_to_page(high_memory)), #endif - MLM((unsigned long)PCI_IOBASE, (unsigned long)PCI_IOBASE + SZ_16M), MLK(FIXADDR_START, FIXADDR_TOP), + MLM(PCI_IO_START, PCI_IO_END), MLM(MODULES_VADDR, MODULES_END), MLM(PAGE_OFFSET, (unsigned long)high_memory), MLK_ROUNDUP(__init_begin, __init_end), -- cgit v1.2.3