From 65fe935dd2387a4faf15314c73f5e6d31ef0217e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 13 Jun 2016 15:10:02 -0700 Subject: x86/KASLR, x86/power: Remove x86 hibernation restrictions With the following fix: 70595b479ce1 ("x86/power/64: Fix crash whan the hibernation code passes control to the image kernel") ... there is no longer a problem with hibernation resuming a KASLR-booted kernel image, so remove the restriction. Signed-off-by: Kees Cook Cc: Andy Lutomirski Cc: Baoquan He Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jonathan Corbet Cc: Len Brown Cc: Linus Torvalds Cc: Linux PM list Cc: Logan Gunthorpe Cc: Pavel Machek Cc: Peter Zijlstra Cc: Stephen Smalley Cc: Thomas Gleixner Cc: Yinghai Lu Cc: linux-doc@vger.kernel.org Link: http://lkml.kernel.org/r/20160613221002.GA29719@www.outflux.net Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/kaslr.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index cfeb0259ed81..dff42177cb0c 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -471,17 +471,10 @@ unsigned char *choose_random_location(unsigned long input, unsigned long choice = output; unsigned long random_addr; -#ifdef CONFIG_HIBERNATION - if (!cmdline_find_option_bool("kaslr")) { - warn("KASLR disabled: 'kaslr' not on cmdline (hibernation selected)."); - goto out; - } -#else if (cmdline_find_option_bool("nokaslr")) { warn("KASLR disabled: 'nokaslr' on cmdline."); goto out; } -#endif boot_params->hdr.loadflags |= KASLR_FLAG; -- cgit v1.2.1 From 98f78525371b55ccd1c480207ce10296c72fa340 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 25 May 2016 15:45:30 -0700 Subject: x86/boot: Refuse to build with data relocations The compressed kernel is built with -fPIC/-fPIE so that it can run in any location a bootloader happens to put it. However, since ELF relocation processing is not happening (and all the relocation information has already been stripped at link time), none of the code can use data relocations (e.g. static assignments of pointers). This is already noted in a warning comment at the top of misc.c, but this adds an explicit check for the condition during the linking stage to block any such bugs from appearing. If this was in place with the earlier bug in pagetable.c, the build would fail like this: ... CC arch/x86/boot/compressed/pagetable.o DATAREL arch/x86/boot/compressed/vmlinux error: arch/x86/boot/compressed/pagetable.o has data relocations! make[2]: *** [arch/x86/boot/compressed/vmlinux] Error 1 ... A clean build shows: ... CC arch/x86/boot/compressed/pagetable.o DATAREL arch/x86/boot/compressed/vmlinux LD arch/x86/boot/compressed/vmlinux ... Suggested-by: Ingo Molnar Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Baoquan He Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: H. Peter Anvin Cc: H.J. Lu Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yinghai Lu Link: http://lkml.kernel.org/r/1464216334-17200-2-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/Makefile | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index f1356889204e..536ccfcc01c6 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -85,7 +85,25 @@ vmlinux-objs-$(CONFIG_EFI_STUB) += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o \ $(objtree)/drivers/firmware/efi/libstub/lib.a vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o +# The compressed kernel is built with -fPIC/-fPIE so that a boot loader +# can place it anywhere in memory and it will still run. However, since +# it is executed as-is without any ELF relocation processing performed +# (and has already had all relocation sections stripped from the binary), +# none of the code can use data relocations (e.g. static assignments of +# pointer values), since they will be meaningless at runtime. This check +# will refuse to link the vmlinux if any of these relocations are found. +quiet_cmd_check_data_rel = DATAREL $@ +define cmd_check_data_rel + for obj in $(filter %.o,$^); do \ + readelf -S $$obj | grep -qF .rel.local && { \ + echo "error: $$obj has data relocations!" >&2; \ + exit 1; \ + } || true; \ + done +endef + $(obj)/vmlinux: $(vmlinux-objs-y) FORCE + $(call if_changed,check_data_rel) $(call if_changed,ld) OBJCOPYFLAGS_vmlinux.bin := -R .comment -S -- cgit v1.2.1 From 11fdf97a3cd1a5a27625f820ceb74e1caba4fd26 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 25 May 2016 15:45:31 -0700 Subject: x86/KASLR: Clarify identity map interface This extracts the call to prepare_level4() into a top-level function that the user of the pagetable.c interface must call to initialize the new page tables. For clarity and to match the "finalize" function, it has been renamed to initialize_identity_maps(). This function also gains the initialization of mapping_info so we don't have to do it each time in add_identity_map(). Additionally add copyright notice to the top, to make it clear that the bulk of the pagetable.c code was written by Yinghai, and that I just added bugs later. :) Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Baoquan He Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: H. Peter Anvin Cc: H.J. Lu Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yinghai Lu Link: http://lkml.kernel.org/r/1464216334-17200-3-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/kaslr.c | 3 +++ arch/x86/boot/compressed/misc.h | 3 +++ arch/x86/boot/compressed/pagetable.c | 26 ++++++++++++++++---------- 3 files changed, 22 insertions(+), 10 deletions(-) (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index dff42177cb0c..54037c9f2def 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -478,6 +478,9 @@ unsigned char *choose_random_location(unsigned long input, boot_params->hdr.loadflags |= KASLR_FLAG; + /* Prepare to add new identity pagetables on demand. */ + initialize_identity_maps(); + /* Record the various known unsafe memory ranges. */ mem_avoid_init(input, input_size, output); diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index b6fec1ff10e4..09c4ddd02ac6 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -85,10 +85,13 @@ unsigned char *choose_random_location(unsigned long input_ptr, #endif #ifdef CONFIG_X86_64 +void initialize_identity_maps(void); void add_identity_map(unsigned long start, unsigned long size); void finalize_identity_maps(void); extern unsigned char _pgtable[]; #else +static inline void initialize_identity_maps(void) +{ } static inline void add_identity_map(unsigned long start, unsigned long size) { } static inline void finalize_identity_maps(void) diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c index 34b95df14e69..6e31a6aac4d3 100644 --- a/arch/x86/boot/compressed/pagetable.c +++ b/arch/x86/boot/compressed/pagetable.c @@ -2,6 +2,9 @@ * This code is used on x86_64 to create page table identity mappings on * demand by building up a new set of page tables (or appending to the * existing ones), and then switching over to them when ready. + * + * Copyright (C) 2015-2016 Yinghai Lu + * Copyright (C) 2016 Kees Cook */ /* @@ -59,9 +62,21 @@ static struct alloc_pgt_data pgt_data; /* The top level page table entry pointer. */ static unsigned long level4p; +/* + * Mapping information structure passed to kernel_ident_mapping_init(). + * Due to relocation, pointers must be assigned at run time not build time. + */ +static struct x86_mapping_info mapping_info = { + .pmd_flag = __PAGE_KERNEL_LARGE_EXEC, +}; + /* Locates and clears a region for a new top level page table. */ -static void prepare_level4(void) +void initialize_identity_maps(void) { + /* Init mapping_info with run-time function/buffer pointers. */ + mapping_info.alloc_pgt_page = alloc_pgt_page; + mapping_info.context = &pgt_data; + /* * It should be impossible for this not to already be true, * but since calling this a second time would rewind the other @@ -96,17 +111,8 @@ static void prepare_level4(void) */ void add_identity_map(unsigned long start, unsigned long size) { - struct x86_mapping_info mapping_info = { - .alloc_pgt_page = alloc_pgt_page, - .context = &pgt_data, - .pmd_flag = __PAGE_KERNEL_LARGE_EXEC, - }; unsigned long end = start + size; - /* Make sure we have a top level page table ready to use. */ - if (!level4p) - prepare_level4(); - /* Align boundary to 2M. */ start = round_down(start, PMD_SIZE); end = round_up(end, PMD_SIZE); -- cgit v1.2.1 From 8391c73c96f28d4e8c40fd401fd0c9c04391b44a Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 25 May 2016 15:45:32 -0700 Subject: x86/KASLR: Randomize virtual address separately The current KASLR implementation randomizes the physical and virtual addresses of the kernel together (both are offset by the same amount). It calculates the delta of the physical address where vmlinux was linked to load and where it is finally loaded. If the delta is not equal to 0 (i.e. the kernel was relocated), relocation handling needs be done. On 64-bit, this patch randomizes both the physical address where kernel is decompressed and the virtual address where kernel text is mapped and will execute from. We now have two values being chosen, so the function arguments are reorganized to pass by pointer so they can be directly updated. Since relocation handling only depends on the virtual address, we must check the virtual delta, not the physical delta for processing kernel relocations. This also populates the page table for the new virtual address range. 32-bit does not support a separate virtual address, so it continues to use the physical offset for its virtual offset. Additionally updates the sanity checks done on the resulting kernel addresses since they are potentially separate now. [kees: rewrote changelog, limited virtual split to 64-bit only, update checks] [kees: fix CONFIG_RANDOMIZE_BASE=n boot failure] Signed-off-by: Baoquan He Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: H. Peter Anvin Cc: H.J. Lu Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yinghai Lu Link: http://lkml.kernel.org/r/1464216334-17200-4-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/kaslr.c | 41 +++++++++++++++++---------------- arch/x86/boot/compressed/misc.c | 49 ++++++++++++++++++++++++---------------- arch/x86/boot/compressed/misc.h | 22 ++++++++++-------- 3 files changed, 64 insertions(+), 48 deletions(-) (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 54037c9f2def..5550546916be 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -463,17 +463,20 @@ static unsigned long find_random_virt_addr(unsigned long minimum, * Since this function examines addresses much more numerically, * it takes the input and output pointers as 'unsigned long'. */ -unsigned char *choose_random_location(unsigned long input, - unsigned long input_size, - unsigned long output, - unsigned long output_size) +void choose_random_location(unsigned long input, + unsigned long input_size, + unsigned long *output, + unsigned long output_size, + unsigned long *virt_addr) { - unsigned long choice = output; unsigned long random_addr; + /* By default, keep output position unchanged. */ + *virt_addr = *output; + if (cmdline_find_option_bool("nokaslr")) { warn("KASLR disabled: 'nokaslr' on cmdline."); - goto out; + return; } boot_params->hdr.loadflags |= KASLR_FLAG; @@ -482,25 +485,25 @@ unsigned char *choose_random_location(unsigned long input, initialize_identity_maps(); /* Record the various known unsafe memory ranges. */ - mem_avoid_init(input, input_size, output); + mem_avoid_init(input, input_size, *output); /* Walk e820 and find a random address. */ - random_addr = find_random_phys_addr(output, output_size); + random_addr = find_random_phys_addr(*output, output_size); if (!random_addr) { warn("KASLR disabled: could not find suitable E820 region!"); - goto out; + } else { + /* Update the new physical address location. */ + if (*output != random_addr) { + add_identity_map(random_addr, output_size); + *output = random_addr; + } } - /* Always enforce the minimum. */ - if (random_addr < choice) - goto out; - - choice = random_addr; - - add_identity_map(choice, output_size); - /* This actually loads the identity pagetable on x86_64. */ finalize_identity_maps(); -out: - return (unsigned char *)choice; + + /* Pick random virtual address starting from LOAD_PHYSICAL_ADDR. */ + if (IS_ENABLED(CONFIG_X86_64)) + random_addr = find_random_virt_addr(LOAD_PHYSICAL_ADDR, output_size); + *virt_addr = random_addr; } diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index f14db4e21654..b3c5a5f030ce 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -170,7 +170,8 @@ void __puthex(unsigned long value) } #if CONFIG_X86_NEED_RELOCS -static void handle_relocations(void *output, unsigned long output_len) +static void handle_relocations(void *output, unsigned long output_len, + unsigned long virt_addr) { int *reloc; unsigned long delta, map, ptr; @@ -182,11 +183,6 @@ static void handle_relocations(void *output, unsigned long output_len) * and where it was actually loaded. */ delta = min_addr - LOAD_PHYSICAL_ADDR; - if (!delta) { - debug_putstr("No relocation needed... "); - return; - } - debug_putstr("Performing relocations... "); /* * The kernel contains a table of relocation addresses. Those @@ -197,6 +193,20 @@ static void handle_relocations(void *output, unsigned long output_len) */ map = delta - __START_KERNEL_map; + /* + * 32-bit always performs relocations. 64-bit relocations are only + * needed if KASLR has chosen a different starting address offset + * from __START_KERNEL_map. + */ + if (IS_ENABLED(CONFIG_X86_64)) + delta = virt_addr - LOAD_PHYSICAL_ADDR; + + if (!delta) { + debug_putstr("No relocation needed... "); + return; + } + debug_putstr("Performing relocations... "); + /* * Process relocations: 32 bit relocations first then 64 bit after. * Three sets of binary relocations are added to the end of the kernel @@ -250,7 +260,8 @@ static void handle_relocations(void *output, unsigned long output_len) #endif } #else -static inline void handle_relocations(void *output, unsigned long output_len) +static inline void handle_relocations(void *output, unsigned long output_len, + unsigned long virt_addr) { } #endif @@ -327,7 +338,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, unsigned long output_len) { const unsigned long kernel_total_size = VO__end - VO__text; - unsigned char *output_orig = output; + unsigned long virt_addr = (unsigned long)output; /* Retain x86 boot parameters pointer passed from startup_32/64. */ boot_params = rmode; @@ -366,13 +377,16 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, * the entire decompressed kernel plus relocation table, or the * entire decompressed kernel plus .bss and .brk sections. */ - output = choose_random_location((unsigned long)input_data, input_len, - (unsigned long)output, - max(output_len, kernel_total_size)); + choose_random_location((unsigned long)input_data, input_len, + (unsigned long *)&output, + max(output_len, kernel_total_size), + &virt_addr); /* Validate memory location choices. */ if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1)) - error("Destination address inappropriately aligned"); + error("Destination physical address inappropriately aligned"); + if (virt_addr & (MIN_KERNEL_ALIGN - 1)) + error("Destination virtual address inappropriately aligned"); #ifdef CONFIG_X86_64 if (heap > 0x3fffffffffffUL) error("Destination address too large"); @@ -382,19 +396,16 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, #endif #ifndef CONFIG_RELOCATABLE if ((unsigned long)output != LOAD_PHYSICAL_ADDR) - error("Wrong destination address"); + error("Destination address does not match LOAD_PHYSICAL_ADDR"); + if ((unsigned long)output != virt_addr) + error("Destination virtual address changed when not relocatable"); #endif debug_putstr("\nDecompressing Linux... "); __decompress(input_data, input_len, NULL, NULL, output, output_len, NULL, error); parse_elf(output); - /* - * 32-bit always performs relocations. 64-bit relocations are only - * needed if kASLR has chosen a different load address. - */ - if (!IS_ENABLED(CONFIG_X86_64) || output != output_orig) - handle_relocations(output, output_len); + handle_relocations(output, output_len, virt_addr); debug_putstr("done.\nBooting the kernel.\n"); return output; } diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 09c4ddd02ac6..1c8355eadbd1 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -67,20 +67,22 @@ int cmdline_find_option_bool(const char *option); #if CONFIG_RANDOMIZE_BASE /* kaslr.c */ -unsigned char *choose_random_location(unsigned long input_ptr, - unsigned long input_size, - unsigned long output_ptr, - unsigned long output_size); +void choose_random_location(unsigned long input, + unsigned long input_size, + unsigned long *output, + unsigned long output_size, + unsigned long *virt_addr); /* cpuflags.c */ bool has_cpuflag(int flag); #else -static inline -unsigned char *choose_random_location(unsigned long input_ptr, - unsigned long input_size, - unsigned long output_ptr, - unsigned long output_size) +static inline void choose_random_location(unsigned long input, + unsigned long input_size, + unsigned long *output, + unsigned long output_size, + unsigned long *virt_addr) { - return (unsigned char *)output_ptr; + /* No change from existing output location. */ + *virt_addr = *output; } #endif -- cgit v1.2.1 From ed9f007ee68478f6a50ec9971ade25a0129a5c0e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 25 May 2016 15:45:33 -0700 Subject: x86/KASLR: Extend kernel image physical address randomization to addresses larger than 4G We want the physical address to be randomized anywhere between 16MB and the top of physical memory (up to 64TB). This patch exchanges the prior slots[] array for the new slot_areas[] array, and lifts the limitation of KERNEL_IMAGE_SIZE on the physical address offset for 64-bit. As before, process_e820_entry() walks memory and populates slot_areas[], splitting on any detected mem_avoid collisions. Finally, since the slots[] array and its associated functions are not needed any more, so they are removed. Based on earlier patches by Baoquan He. Originally-from: Baoquan He Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Baoquan He Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: H. Peter Anvin Cc: H.J. Lu Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yinghai Lu Link: http://lkml.kernel.org/r/1464216334-17200-5-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/kaslr.c | 115 +++++++++++++++++++++++---------------- 1 file changed, 69 insertions(+), 46 deletions(-) (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 5550546916be..36e28112523a 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -132,17 +132,6 @@ enum mem_avoid_index { static struct mem_vector mem_avoid[MEM_AVOID_MAX]; -static bool mem_contains(struct mem_vector *region, struct mem_vector *item) -{ - /* Item at least partially before region. */ - if (item->start < region->start) - return false; - /* Item at least partially after region. */ - if (item->start + item->size > region->start + region->size) - return false; - return true; -} - static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two) { /* Item one is entirely before item two. */ @@ -319,8 +308,6 @@ static bool mem_avoid_overlap(struct mem_vector *img, return is_overlapping; } -static unsigned long slots[KERNEL_IMAGE_SIZE / CONFIG_PHYSICAL_ALIGN]; - struct slot_area { unsigned long addr; int num; @@ -351,36 +338,44 @@ static void store_slot_info(struct mem_vector *region, unsigned long image_size) } } -static void slots_append(unsigned long addr) -{ - /* Overflowing the slots list should be impossible. */ - if (slot_max >= KERNEL_IMAGE_SIZE / CONFIG_PHYSICAL_ALIGN) - return; - - slots[slot_max++] = addr; -} - static unsigned long slots_fetch_random(void) { + unsigned long slot; + int i; + /* Handle case of no slots stored. */ if (slot_max == 0) return 0; - return slots[get_random_long("Physical") % slot_max]; + slot = get_random_long("Physical") % slot_max; + + for (i = 0; i < slot_area_index; i++) { + if (slot >= slot_areas[i].num) { + slot -= slot_areas[i].num; + continue; + } + return slot_areas[i].addr + slot * CONFIG_PHYSICAL_ALIGN; + } + + if (i == slot_area_index) + debug_putstr("slots_fetch_random() failed!?\n"); + return 0; } static void process_e820_entry(struct e820entry *entry, unsigned long minimum, unsigned long image_size) { - struct mem_vector region, img, overlap; + struct mem_vector region, overlap; + struct slot_area slot_area; + unsigned long start_orig; /* Skip non-RAM entries. */ if (entry->type != E820_RAM) return; - /* Ignore entries entirely above our maximum. */ - if (entry->addr >= KERNEL_IMAGE_SIZE) + /* On 32-bit, ignore entries entirely above our maximum. */ + if (IS_ENABLED(CONFIG_X86_32) && entry->addr >= KERNEL_IMAGE_SIZE) return; /* Ignore entries entirely below our minimum. */ @@ -390,31 +385,55 @@ static void process_e820_entry(struct e820entry *entry, region.start = entry->addr; region.size = entry->size; - /* Potentially raise address to minimum location. */ - if (region.start < minimum) - region.start = minimum; + /* Give up if slot area array is full. */ + while (slot_area_index < MAX_SLOT_AREA) { + start_orig = region.start; - /* Potentially raise address to meet alignment requirements. */ - region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN); + /* Potentially raise address to minimum location. */ + if (region.start < minimum) + region.start = minimum; - /* Did we raise the address above the bounds of this e820 region? */ - if (region.start > entry->addr + entry->size) - return; + /* Potentially raise address to meet alignment needs. */ + region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN); - /* Reduce size by any delta from the original address. */ - region.size -= region.start - entry->addr; + /* Did we raise the address above this e820 region? */ + if (region.start > entry->addr + entry->size) + return; - /* Reduce maximum size to fit end of image within maximum limit. */ - if (region.start + region.size > KERNEL_IMAGE_SIZE) - region.size = KERNEL_IMAGE_SIZE - region.start; + /* Reduce size by any delta from the original address. */ + region.size -= region.start - start_orig; - /* Walk each aligned slot and check for avoided areas. */ - for (img.start = region.start, img.size = image_size ; - mem_contains(®ion, &img) ; - img.start += CONFIG_PHYSICAL_ALIGN) { - if (mem_avoid_overlap(&img, &overlap)) - continue; - slots_append(img.start); + /* On 32-bit, reduce region size to fit within max size. */ + if (IS_ENABLED(CONFIG_X86_32) && + region.start + region.size > KERNEL_IMAGE_SIZE) + region.size = KERNEL_IMAGE_SIZE - region.start; + + /* Return if region can't contain decompressed kernel */ + if (region.size < image_size) + return; + + /* If nothing overlaps, store the region and return. */ + if (!mem_avoid_overlap(®ion, &overlap)) { + store_slot_info(®ion, image_size); + return; + } + + /* Store beginning of region if holds at least image_size. */ + if (overlap.start > region.start + image_size) { + struct mem_vector beginning; + + beginning.start = region.start; + beginning.size = overlap.start - region.start; + store_slot_info(&beginning, image_size); + } + + /* Return if overlap extends to or past end of region. */ + if (overlap.start + overlap.size >= region.start + region.size) + return; + + /* Clip off the overlapping region and start over. */ + region.size -= overlap.start - region.start + overlap.size; + region.start = overlap.start + overlap.size; } } @@ -431,6 +450,10 @@ static unsigned long find_random_phys_addr(unsigned long minimum, for (i = 0; i < boot_params->e820_entries; i++) { process_e820_entry(&boot_params->e820_map[i], minimum, image_size); + if (slot_area_index == MAX_SLOT_AREA) { + debug_putstr("Aborted e820 scan (slot_areas full)!\n"); + break; + } } return slots_fetch_random(); -- cgit v1.2.1 From e066cc47776a89bbdaf4184c0e75f7d389f9ab48 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 25 May 2016 15:45:34 -0700 Subject: x86/KASLR: Allow randomization below the load address Currently the kernel image physical address randomization's lower boundary is the original kernel load address. For bootloaders that load kernels into very high memory (e.g. kexec), this means randomization takes place in a very small window at the top of memory, ignoring the large region of physical memory below the load address. Since mem_avoid[] is already correctly tracking the regions that must be avoided, this patch changes the minimum address to whatever is less: 512M (to conservatively avoid unknown things in lower memory) or the load address. Now, for example, if the kernel is loaded at 8G, [512M, 8G) will be added to the list of possible physical memory positions. Signed-off-by: Yinghai Lu [ Rewrote the changelog, refactored the code to use min(). ] Signed-off-by: Kees Cook Cc: Andrew Morton Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Baoquan He Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: H. Peter Anvin Cc: H.J. Lu Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1464216334-17200-6-git-send-email-keescook@chromium.org [ Edited the changelog some more, plus the code comment as well. ] Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/kaslr.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 36e28112523a..749c9e00c674 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -492,7 +492,7 @@ void choose_random_location(unsigned long input, unsigned long output_size, unsigned long *virt_addr) { - unsigned long random_addr; + unsigned long random_addr, min_addr; /* By default, keep output position unchanged. */ *virt_addr = *output; @@ -510,8 +510,15 @@ void choose_random_location(unsigned long input, /* Record the various known unsafe memory ranges. */ mem_avoid_init(input, input_size, *output); + /* + * Low end of the randomization range should be the + * smaller of 512M or the initial kernel image + * location: + */ + min_addr = min(*output, 512UL << 20); + /* Walk e820 and find a random address. */ - random_addr = find_random_phys_addr(*output, output_size); + random_addr = find_random_phys_addr(min_addr, output_size); if (!random_addr) { warn("KASLR disabled: could not find suitable E820 region!"); } else { -- cgit v1.2.1 From 6daa2ec0b3e3808c55329d12de3c157cf38b17b0 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Fri, 1 Jul 2016 15:34:40 +0800 Subject: x86/KASLR: Fix boot crash with certain memory configurations Ye Xiaolong reported this boot crash: | | XZ-compressed data is corrupt | | -- System halted | Fix the bug in mem_avoid_overlap() of finding the earliest overlap. Reported-and-tested-by: Ye Xiaolong Signed-off-by: Baoquan He Cc: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/kaslr.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 749c9e00c674..010ea16e5f77 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -285,6 +285,7 @@ static bool mem_avoid_overlap(struct mem_vector *img, if (mem_overlaps(img, &mem_avoid[i]) && mem_avoid[i].start < earliest) { *overlap = mem_avoid[i]; + earliest = overlap->start; is_overlapping = true; } } @@ -299,6 +300,7 @@ static bool mem_avoid_overlap(struct mem_vector *img, if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) { *overlap = avoid; + earliest = overlap->start; is_overlapping = true; } -- cgit v1.2.1 From d899a7d146a2ed8a7e6c2f61bcd232908bcbaabc Mon Sep 17 00:00:00 2001 From: Thomas Garnier Date: Tue, 21 Jun 2016 17:46:58 -0700 Subject: x86/mm: Refactor KASLR entropy functions Move the KASLR entropy functions into arch/x86/lib to be used in early kernel boot for KASLR memory randomization. Signed-off-by: Thomas Garnier Signed-off-by: Kees Cook Cc: Alexander Kuleshov Cc: Alexander Popov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Aneesh Kumar K.V Cc: Baoquan He Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Christian Borntraeger Cc: Dan Williams Cc: Dave Hansen Cc: Dave Young Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: H. Peter Anvin Cc: Jan Beulich Cc: Joerg Roedel Cc: Jonathan Corbet Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Kirill A. Shutemov Cc: Linus Torvalds Cc: Lv Zheng Cc: Mark Salter Cc: Martin Schwidefsky Cc: Matt Fleming Cc: Peter Zijlstra Cc: Stephen Smalley Cc: Thomas Gleixner Cc: Toshi Kani Cc: Xiao Guangrong Cc: Yinghai Lu Cc: kernel-hardening@lists.openwall.com Cc: linux-doc@vger.kernel.org Link: http://lkml.kernel.org/r/1466556426-32664-2-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/kaslr.c | 76 +++------------------------------------- 1 file changed, 5 insertions(+), 71 deletions(-) (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index 010ea16e5f77..a66854d99ee1 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -12,10 +12,6 @@ #include "misc.h" #include "error.h" -#include -#include -#include - #include #include #include @@ -26,26 +22,6 @@ static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION; -#define I8254_PORT_CONTROL 0x43 -#define I8254_PORT_COUNTER0 0x40 -#define I8254_CMD_READBACK 0xC0 -#define I8254_SELECT_COUNTER0 0x02 -#define I8254_STATUS_NOTREADY 0x40 -static inline u16 i8254(void) -{ - u16 status, timer; - - do { - outb(I8254_PORT_CONTROL, - I8254_CMD_READBACK | I8254_SELECT_COUNTER0); - status = inb(I8254_PORT_COUNTER0); - timer = inb(I8254_PORT_COUNTER0); - timer |= inb(I8254_PORT_COUNTER0) << 8; - } while (status & I8254_STATUS_NOTREADY); - - return timer; -} - static unsigned long rotate_xor(unsigned long hash, const void *area, size_t size) { @@ -62,7 +38,7 @@ static unsigned long rotate_xor(unsigned long hash, const void *area, } /* Attempt to create a simple but unpredictable starting entropy. */ -static unsigned long get_random_boot(void) +static unsigned long get_boot_seed(void) { unsigned long hash = 0; @@ -72,50 +48,8 @@ static unsigned long get_random_boot(void) return hash; } -static unsigned long get_random_long(const char *purpose) -{ -#ifdef CONFIG_X86_64 - const unsigned long mix_const = 0x5d6008cbf3848dd3UL; -#else - const unsigned long mix_const = 0x3f39e593UL; -#endif - unsigned long raw, random = get_random_boot(); - bool use_i8254 = true; - - debug_putstr(purpose); - debug_putstr(" KASLR using"); - - if (has_cpuflag(X86_FEATURE_RDRAND)) { - debug_putstr(" RDRAND"); - if (rdrand_long(&raw)) { - random ^= raw; - use_i8254 = false; - } - } - - if (has_cpuflag(X86_FEATURE_TSC)) { - debug_putstr(" RDTSC"); - raw = rdtsc(); - - random ^= raw; - use_i8254 = false; - } - - if (use_i8254) { - debug_putstr(" i8254"); - random ^= i8254(); - } - - /* Circular multiply for better bit diffusion */ - asm("mul %3" - : "=a" (random), "=d" (raw) - : "a" (random), "rm" (mix_const)); - random += raw; - - debug_putstr("...\n"); - - return random; -} +#define KASLR_COMPRESSED_BOOT +#include "../../lib/kaslr.c" struct mem_vector { unsigned long start; @@ -349,7 +283,7 @@ static unsigned long slots_fetch_random(void) if (slot_max == 0) return 0; - slot = get_random_long("Physical") % slot_max; + slot = kaslr_get_random_long("Physical") % slot_max; for (i = 0; i < slot_area_index; i++) { if (slot >= slot_areas[i].num) { @@ -479,7 +413,7 @@ static unsigned long find_random_virt_addr(unsigned long minimum, slots = (KERNEL_IMAGE_SIZE - minimum - image_size) / CONFIG_PHYSICAL_ALIGN + 1; - random_addr = get_random_long("Virtual") % slots; + random_addr = kaslr_get_random_long("Virtual") % slots; return random_addr * CONFIG_PHYSICAL_ALIGN + minimum; } -- cgit v1.2.1 From 021182e52fe01c1f7b126f97fd6ba048dc4234fd Mon Sep 17 00:00:00 2001 From: Thomas Garnier Date: Tue, 21 Jun 2016 17:47:03 -0700 Subject: x86/mm: Enable KASLR for physical mapping memory regions Add the physical mapping in the list of randomized memory regions. The physical memory mapping holds most allocations from boot and heap allocators. Knowing the base address and physical memory size, an attacker can deduce the PDE virtual address for the vDSO memory page. This attack was demonstrated at CanSecWest 2016, in the following presentation: "Getting Physical: Extreme Abuse of Intel Based Paged Systems": https://github.com/n3k/CansecWest2016_Getting_Physical_Extreme_Abuse_of_Intel_Based_Paging_Systems/blob/master/Presentation/CanSec2016_Presentation.pdf (See second part of the presentation). The exploits used against Linux worked successfully against 4.6+ but fail with KASLR memory enabled: https://github.com/n3k/CansecWest2016_Getting_Physical_Extreme_Abuse_of_Intel_Based_Paging_Systems/tree/master/Demos/Linux/exploits Similar research was done at Google leading to this patch proposal. Variants exists to overwrite /proc or /sys objects ACLs leading to elevation of privileges. These variants were tested against 4.6+. The page offset used by the compressed kernel retains the static value since it is not yet randomized during this boot stage. Signed-off-by: Thomas Garnier Signed-off-by: Kees Cook Cc: Alexander Kuleshov Cc: Alexander Popov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Aneesh Kumar K.V Cc: Baoquan He Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Christian Borntraeger Cc: Dan Williams Cc: Dave Hansen Cc: Dave Young Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: H. Peter Anvin Cc: Jan Beulich Cc: Joerg Roedel Cc: Jonathan Corbet Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Kirill A. Shutemov Cc: Linus Torvalds Cc: Lv Zheng Cc: Mark Salter Cc: Martin Schwidefsky Cc: Matt Fleming Cc: Peter Zijlstra Cc: Stephen Smalley Cc: Thomas Gleixner Cc: Toshi Kani Cc: Xiao Guangrong Cc: Yinghai Lu Cc: kernel-hardening@lists.openwall.com Cc: linux-doc@vger.kernel.org Link: http://lkml.kernel.org/r/1466556426-32664-7-git-send-email-keescook@chromium.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/pagetable.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/boot') diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c index 6e31a6aac4d3..56589d0a804b 100644 --- a/arch/x86/boot/compressed/pagetable.c +++ b/arch/x86/boot/compressed/pagetable.c @@ -20,6 +20,9 @@ /* These actually do the work of building the kernel identity maps. */ #include #include +/* Use the static base for this part of the boot process */ +#undef __PAGE_OFFSET +#define __PAGE_OFFSET __PAGE_OFFSET_BASE #include "../../mm/ident_map.c" /* Used by pgtable.h asm code to force instruction serialization. */ -- cgit v1.2.1