From 72d7c3b33c980843e756681fb4867dc1efd62a76 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 25 Aug 2010 13:39:17 -0700 Subject: x86: Use memblock to replace early_res 1. replace find_e820_area with memblock_find_in_range 2. replace reserve_early with memblock_x86_reserve_range 3. replace free_early with memblock_x86_free_range. 4. NO_BOOTMEM will switch to use memblock too. 5. use _e820, _early wrap in the patch, in following patch, will replace them all 6. because memblock_x86_free_range support partial free, we can remove some special care 7. Need to make sure that memblock_find_in_range() is called after memblock_x86_fill() so adjust some calling later in setup.c::setup_arch() -- corruption_check and mptable_update -v2: Move reserve_brk() early Before fill_memblock_area, to avoid overlap between brk and memblock_find_in_range() that could happen We have more then 128 RAM entry in E820 tables, and memblock_x86_fill() could use memblock_find_in_range() to find a new place for memblock.memory.region array. and We don't need to use extend_brk() after fill_memblock_area() So move reserve_brk() early before fill_memblock_area(). -v3: Move find_smp_config early To make sure memblock_find_in_range not find wrong place, if BIOS doesn't put mptable in right place. -v4: Treat RESERVED_KERN as RAM in memblock.memory. and they are already in memblock.reserved already.. use __NOT_KEEP_MEMBLOCK to make sure memblock related code could be freed later. -v5: Generic version __memblock_find_in_range() is going from high to low, and for 32bit active_region for 32bit does include high pages need to replace the limit with memblock.default_alloc_limit, aka get_max_mapped() -v6: Use current_limit instead -v7: check with MEMBLOCK_ERROR instead of -1ULL or -1L -v8: Set memblock_can_resize early to handle EFI with more RAM entries -v9: update after kmemleak changes in mainline Suggested-by: David S. Miller Suggested-by: Benjamin Herrenschmidt Suggested-by: Thomas Gleixner Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin --- arch/x86/kernel/setup.c | 46 +++++++++++++++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 13 deletions(-) (limited to 'arch/x86/kernel/setup.c') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b4ae4acbd031..bbe0aaf77494 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -614,7 +615,7 @@ static __init void reserve_ibft_region(void) addr = find_ibft_region(&size); if (size) - reserve_early_overlap_ok(addr, addr + size, "ibft"); + memblock_x86_reserve_range(addr, addr + size, "* ibft"); } #ifdef CONFIG_X86_RESERVE_LOW_64K @@ -708,6 +709,15 @@ static void __init trim_bios_range(void) sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); } +static u64 __init get_max_mapped(void) +{ + u64 end = max_pfn_mapped; + + end <<= PAGE_SHIFT; + + return end; +} + /* * Determine if we were loaded by an EFI loader. If so, then we have also been * passed the efi memmap, systab, etc., so we should use these data structures @@ -891,8 +901,6 @@ void __init setup_arch(char **cmdline_p) */ max_pfn = e820_end_of_ram_pfn(); - /* preallocate 4k for mptable mpc */ - early_reserve_e820_mpc_new(); /* update e820 for memory not covered by WB MTRRs */ mtrr_bp_init(); if (mtrr_trim_uncached_memory(max_pfn)) @@ -917,15 +925,6 @@ void __init setup_arch(char **cmdline_p) max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT; #endif -#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION - setup_bios_corruption_check(); -#endif - - printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n", - max_pfn_mapped< Date: Wed, 25 Aug 2010 13:39:17 -0700 Subject: x86, memblock: Replace e820_/_early string with memblock_ 1.include linux/memblock.h directly. so later could reduce e820.h reference. 2 this patch is done by sed scripts mainly -v2: use MEMBLOCK_ERROR instead of -1ULL or -1UL Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin --- arch/x86/kernel/setup.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) (limited to 'arch/x86/kernel/setup.c') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index bbe0aaf77494..a4f01733e879 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -302,7 +302,7 @@ static inline void init_gbpages(void) static void __init reserve_brk(void) { if (_brk_end > _brk_start) - reserve_early(__pa(_brk_start), __pa(_brk_end), "BRK"); + memblock_x86_reserve_range(__pa(_brk_start), __pa(_brk_end), "BRK"); /* Mark brk area as locked down and no longer taking any new allocations */ @@ -324,17 +324,16 @@ static void __init relocate_initrd(void) char *p, *q; /* We need to move the initrd down into lowmem */ - ramdisk_here = find_e820_area(0, end_of_lowmem, area_size, + ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size, PAGE_SIZE); - if (ramdisk_here == -1ULL) + if (ramdisk_here == MEMBLOCK_ERROR) panic("Cannot find place for new RAMDISK of size %lld\n", ramdisk_size); /* Note: this includes all the lowmem currently occupied by the initrd, we rely on that fact to keep the data intact. */ - reserve_early(ramdisk_here, ramdisk_here + area_size, - "NEW RAMDISK"); + memblock_x86_reserve_range(ramdisk_here, ramdisk_here + area_size, "NEW RAMDISK"); initrd_start = ramdisk_here + PAGE_OFFSET; initrd_end = initrd_start + ramdisk_size; printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n", @@ -390,7 +389,7 @@ static void __init reserve_initrd(void) initrd_start = 0; if (ramdisk_size >= (end_of_lowmem>>1)) { - free_early(ramdisk_image, ramdisk_end); + memblock_x86_free_range(ramdisk_image, ramdisk_end); printk(KERN_ERR "initrd too large to handle, " "disabling initrd\n"); return; @@ -413,7 +412,7 @@ static void __init reserve_initrd(void) relocate_initrd(); - free_early(ramdisk_image, ramdisk_end); + memblock_x86_free_range(ramdisk_image, ramdisk_end); } #else static void __init reserve_initrd(void) @@ -469,7 +468,7 @@ static void __init e820_reserve_setup_data(void) e820_print_map("reserve setup_data"); } -static void __init reserve_early_setup_data(void) +static void __init memblock_x86_reserve_range_setup_data(void) { struct setup_data *data; u64 pa_data; @@ -481,7 +480,7 @@ static void __init reserve_early_setup_data(void) while (pa_data) { data = early_memremap(pa_data, sizeof(*data)); sprintf(buf, "setup data %x", data->type); - reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf); + memblock_x86_reserve_range(pa_data, pa_data+sizeof(*data)+data->len, buf); pa_data = data->next; early_iounmap(data, sizeof(*data)); } @@ -519,23 +518,23 @@ static void __init reserve_crashkernel(void) if (crash_base <= 0) { const unsigned long long alignment = 16<<20; /* 16M */ - crash_base = find_e820_area(alignment, ULONG_MAX, crash_size, + crash_base = memblock_find_in_range(alignment, ULONG_MAX, crash_size, alignment); - if (crash_base == -1ULL) { + if (crash_base == MEMBLOCK_ERROR) { pr_info("crashkernel reservation failed - No suitable area found.\n"); return; } } else { unsigned long long start; - start = find_e820_area(crash_base, ULONG_MAX, crash_size, + start = memblock_find_in_range(crash_base, ULONG_MAX, crash_size, 1<<20); if (start != crash_base) { pr_info("crashkernel reservation failed - memory is in use.\n"); return; } } - reserve_early(crash_base, crash_base + crash_size, "CRASH KERNEL"); + memblock_x86_reserve_range(crash_base, crash_base + crash_size, "CRASH KERNEL"); printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " "for crashkernel (System RAM: %ldMB)\n", @@ -786,7 +785,7 @@ void __init setup_arch(char **cmdline_p) #endif 4)) { efi_enabled = 1; - efi_reserve_early(); + efi_memblock_x86_reserve_range(); } #endif @@ -846,7 +845,7 @@ void __init setup_arch(char **cmdline_p) vmi_activate(); /* after early param, so could get panic from serial */ - reserve_early_setup_data(); + memblock_x86_reserve_range_setup_data(); if (acpi_mps_check()) { #ifdef CONFIG_X86_LOCAL_APIC -- cgit v1.2.1 From 6f2a75369e7561e800d86927ecd83c970996b21f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 25 Aug 2010 13:39:18 -0700 Subject: x86, memblock: Use memblock_memory_size()/memblock_free_memory_size() to get correct dma_reserve memblock_memory_size() will return memory size in memblock.memory.region. memblock_free_memory_size() will return free memory size in memblock.memory.region. So We can get exact reseved size in specified range. Set the size right after initmem_init(), because later bootmem API will get area above 16M. (except some fallback). Later after we remove the bootmem, We could call that just before paging_init(). Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin --- arch/x86/kernel/setup.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel/setup.c') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index a4f01733e879..924c8f78e98e 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1013,6 +1013,7 @@ void __init setup_arch(char **cmdline_p) #endif initmem_init(0, max_pfn, acpi, k8); + memblock_find_dma_reserve(); #ifndef CONFIG_NO_BOOTMEM memblock_x86_to_bootmem(0, max_low_pfn< Date: Wed, 25 Aug 2010 13:39:18 -0700 Subject: x86: Remove old bootmem code Requested by Ingo, Thomas and HPA. The old bootmem code is no longer necessary, and the transition is complete. Remove it. Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin --- arch/x86/kernel/setup.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/x86/kernel/setup.c') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 924c8f78e98e..1d114ff6a078 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1014,10 +1014,6 @@ void __init setup_arch(char **cmdline_p) initmem_init(0, max_pfn, acpi, k8); memblock_find_dma_reserve(); -#ifndef CONFIG_NO_BOOTMEM - memblock_x86_to_bootmem(0, max_low_pfn< Date: Tue, 5 Oct 2010 16:05:14 -0700 Subject: x86, memblock: Fix crashkernel allocation Cai Qian found crashkernel is broken with the x86 memblock changes. 1. crashkernel=128M@32M always reported that range is used, even if the first kernel is small and does not usethat range 2. we always got following report when using "kexec -p" Could not find a free area of memory of a000 bytes... locate_hole failed The root cause is that generic memblock_find_in_range() will try to allocate from the top of the range, whereas the kexec code was written assuming that allocation was always near the bottom and that it could blindly extend memory upward. Unfortunately the kexec code doesn't have a system for requesting the range that it really needs, so this is subject to probabilistic failures. This patch hacks around the problem by limiting the target range heuristically to below the traditional bzImage max range. This number is arbitrary and not always correct, and a much better result would be obtained by having kexec communicate this number based on the kernel header information and any appropriate command line options. Reported-and-Bisected-by: CAI Qian Signed-off-by: Yinghai Lu LKML-Reference: <4CABAF2A.5090501@kernel.org> Cc: Vivek Goyal Signed-off-by: H. Peter Anvin --- arch/x86/kernel/setup.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel/setup.c') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index bf89e0a59b88..b11a238b2e35 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -502,6 +502,7 @@ static inline unsigned long long get_total_mem(void) return total << PAGE_SHIFT; } +#define DEFAULT_BZIMAGE_ADDR_MAX 0x37FFFFFF static void __init reserve_crashkernel(void) { unsigned long long total_mem; @@ -519,8 +520,12 @@ static void __init reserve_crashkernel(void) if (crash_base <= 0) { const unsigned long long alignment = 16<<20; /* 16M */ - crash_base = memblock_find_in_range(alignment, ULONG_MAX, crash_size, - alignment); + /* + * kexec want bzImage is below DEFAULT_BZIMAGE_ADDR_MAX + */ + crash_base = memblock_find_in_range(alignment, + DEFAULT_BZIMAGE_ADDR_MAX, crash_size, alignment); + if (crash_base == MEMBLOCK_ERROR) { pr_info("crashkernel reservation failed - No suitable area found.\n"); return; @@ -528,8 +533,8 @@ static void __init reserve_crashkernel(void) } else { unsigned long long start; - start = memblock_find_in_range(crash_base, ULONG_MAX, crash_size, - 1<<20); + start = memblock_find_in_range(crash_base, + crash_base + crash_size, crash_size, 1<<20); if (start != crash_base) { pr_info("crashkernel reservation failed - memory is in use.\n"); return; -- cgit v1.2.1 From 67e87f0a1c5cbc750f81ebf6a128e8ff6f4376cc Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 13 Oct 2010 16:34:15 -0700 Subject: x86-64: Only set max_pfn_mapped to 512 MiB if we enter via head_64.S head_64.S maps up to 512 MiB, but that is not necessarity true for other entry paths, such as Xen. Thus, co-locate the setting of max_pfn_mapped with the code to actually set up the page tables in head_64.S. The 32-bit code is already so co-located. (The Xen code already sets max_pfn_mapped correctly for its own use case.) -v2: Yinghai fixed the following bug in this patch: | | max_pfn_mapped is in .bss section, so we need to set that | after bss get cleared. Without that we crash on bootup. | | That is safe because Xen does not call x86_64_start_kernel(). | Signed-off-by: Jeremy Fitzhardinge Fixed-by: Yinghai Lu Signed-off-by: H. Peter Anvin LKML-Reference: <4CB6AB24.9020504@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel/setup.c') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b11a238b2e35..c3cebfe7bfc9 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -932,7 +932,6 @@ void __init setup_arch(char **cmdline_p) max_low_pfn = max_pfn; high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; - max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT; #endif /* -- cgit v1.2.1