diff options
Diffstat (limited to 'arch/i386/mm')
-rw-r--r-- | arch/i386/mm/boot_ioremap.c | 7 | ||||
-rw-r--r-- | arch/i386/mm/discontig.c | 105 | ||||
-rw-r--r-- | arch/i386/mm/extable.c | 2 | ||||
-rw-r--r-- | arch/i386/mm/fault.c | 25 | ||||
-rw-r--r-- | arch/i386/mm/highmem.c | 2 | ||||
-rw-r--r-- | arch/i386/mm/init.c | 82 | ||||
-rw-r--r-- | arch/i386/mm/pgtable.c | 30 |
7 files changed, 143 insertions, 110 deletions
diff --git a/arch/i386/mm/boot_ioremap.c b/arch/i386/mm/boot_ioremap.c index 5d44f4f5ff59..4de11f508c3a 100644 --- a/arch/i386/mm/boot_ioremap.c +++ b/arch/i386/mm/boot_ioremap.c @@ -29,8 +29,11 @@ */ #define BOOT_PTE_PTRS (PTRS_PER_PTE*2) -#define boot_pte_index(address) \ - (((address) >> PAGE_SHIFT) & (BOOT_PTE_PTRS - 1)) + +static unsigned long boot_pte_index(unsigned long vaddr) +{ + return __pa(vaddr) >> PAGE_SHIFT; +} static inline boot_pte_t* boot_vaddr_to_pte(void *address) { diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c index 7c392dc553b8..51e3739dd227 100644 --- a/arch/i386/mm/discontig.c +++ b/arch/i386/mm/discontig.c @@ -117,7 +117,8 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); void *node_remap_end_vaddr[MAX_NUMNODES]; void *node_remap_alloc_vaddr[MAX_NUMNODES]; - +static unsigned long kva_start_pfn; +static unsigned long kva_pages; /* * FLAT - support for basic PC memory model with discontig enabled, essentially * a single node with all available processors in it with a flat @@ -156,21 +157,6 @@ static void __init find_max_pfn_node(int nid) BUG(); } -/* Find the owning node for a pfn. */ -int early_pfn_to_nid(unsigned long pfn) -{ - int nid; - - for_each_node(nid) { - if (node_end_pfn[nid] == 0) - break; - if (node_start_pfn[nid] <= pfn && node_end_pfn[nid] >= pfn) - return nid; - } - - return 0; -} - /* * Allocate memory for the pg_data_t for this node via a crude pre-bootmem * method. For node zero take this from the bottom of memory, for @@ -226,6 +212,8 @@ static unsigned long calculate_numa_remap_pages(void) unsigned long pfn; for_each_online_node(nid) { + unsigned old_end_pfn = node_end_pfn[nid]; + /* * The acpi/srat node info can show hot-add memroy zones * where memory could be added but not currently present. @@ -275,6 +263,7 @@ static unsigned long calculate_numa_remap_pages(void) node_end_pfn[nid] -= size; node_remap_start_pfn[nid] = node_end_pfn[nid]; + shrink_active_range(nid, old_end_pfn, node_end_pfn[nid]); } printk("Reserving total of %ld pages for numa KVA remap\n", reserve_pages); @@ -286,7 +275,6 @@ unsigned long __init setup_memory(void) { int nid; unsigned long system_start_pfn, system_max_low_pfn; - unsigned long reserve_pages; /* * When mapping a NUMA machine we allocate the node_mem_map arrays @@ -298,14 +286,23 @@ unsigned long __init setup_memory(void) find_max_pfn(); get_memcfg_numa(); - reserve_pages = calculate_numa_remap_pages(); + kva_pages = calculate_numa_remap_pages(); /* partially used pages are not usable - thus round upwards */ system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end); - system_max_low_pfn = max_low_pfn = find_max_low_pfn() - reserve_pages; - printk("reserve_pages = %ld find_max_low_pfn() ~ %ld\n", - reserve_pages, max_low_pfn + reserve_pages); + kva_start_pfn = find_max_low_pfn() - kva_pages; + +#ifdef CONFIG_BLK_DEV_INITRD + /* Numa kva area is below the initrd */ + if (LOADER_TYPE && INITRD_START) + kva_start_pfn = PFN_DOWN(INITRD_START) - kva_pages; +#endif + kva_start_pfn -= kva_start_pfn & (PTRS_PER_PTE-1); + + system_max_low_pfn = max_low_pfn = find_max_low_pfn(); + printk("kva_start_pfn ~ %ld find_max_low_pfn() ~ %ld\n", + kva_start_pfn, max_low_pfn); printk("max_pfn = %ld\n", max_pfn); #ifdef CONFIG_HIGHMEM highstart_pfn = highend_pfn = max_pfn; @@ -313,6 +310,11 @@ unsigned long __init setup_memory(void) highstart_pfn = system_max_low_pfn; printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", pages_to_mb(highend_pfn - highstart_pfn)); + num_physpages = highend_pfn; + high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; +#else + num_physpages = system_max_low_pfn; + high_memory = (void *) __va(system_max_low_pfn * PAGE_SIZE - 1) + 1; #endif printk(KERN_NOTICE "%ldMB LOWMEM available.\n", pages_to_mb(system_max_low_pfn)); @@ -323,7 +325,7 @@ unsigned long __init setup_memory(void) (ulong) pfn_to_kaddr(max_low_pfn)); for_each_online_node(nid) { node_remap_start_vaddr[nid] = pfn_to_kaddr( - highstart_pfn + node_remap_offset[nid]); + kva_start_pfn + node_remap_offset[nid]); /* Init the node remap allocator */ node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] + (node_remap_size[nid] * PAGE_SIZE); @@ -338,7 +340,6 @@ unsigned long __init setup_memory(void) } printk("High memory starts at vaddr %08lx\n", (ulong) pfn_to_kaddr(highstart_pfn)); - vmalloc_earlyreserve = reserve_pages * PAGE_SIZE; for_each_online_node(nid) find_max_pfn_node(nid); @@ -348,48 +349,30 @@ unsigned long __init setup_memory(void) return max_low_pfn; } +void __init numa_kva_reserve(void) +{ + reserve_bootmem(PFN_PHYS(kva_start_pfn),PFN_PHYS(kva_pages)); +} + void __init zone_sizes_init(void) { int nid; - - - for_each_online_node(nid) { - unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; - unsigned long *zholes_size; - unsigned int max_dma; - - unsigned long low = max_low_pfn; - unsigned long start = node_start_pfn[nid]; - unsigned long high = node_end_pfn[nid]; - - max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; - - if (node_has_online_mem(nid)){ - if (start > low) { -#ifdef CONFIG_HIGHMEM - BUG_ON(start > high); - zones_size[ZONE_HIGHMEM] = high - start; -#endif - } else { - if (low < max_dma) - zones_size[ZONE_DMA] = low; - else { - BUG_ON(max_dma > low); - BUG_ON(low > high); - zones_size[ZONE_DMA] = max_dma; - zones_size[ZONE_NORMAL] = low - max_dma; -#ifdef CONFIG_HIGHMEM - zones_size[ZONE_HIGHMEM] = high - low; -#endif - } - } + unsigned long max_zone_pfns[MAX_NR_ZONES] = { + virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT, + max_low_pfn, + highend_pfn + }; + + /* If SRAT has not registered memory, register it now */ + if (find_max_pfn_with_active_regions() == 0) { + for_each_online_node(nid) { + if (node_has_online_mem(nid)) + add_active_range(nid, node_start_pfn[nid], + node_end_pfn[nid]); } - - zholes_size = get_zholes_size(nid); - - free_area_init_node(nid, NODE_DATA(nid), zones_size, start, - zholes_size); } + + free_area_init_nodes(max_zone_pfns); return; } @@ -409,7 +392,7 @@ void __init set_highmem_pages_init(int bad_ppro) zone_end_pfn = zone_start_pfn + zone->spanned_pages; printk("Initializing %s for node %d (%08lx:%08lx)\n", - zone->name, zone->zone_pgdat->node_id, + zone->name, zone_to_nid(zone), zone_start_pfn, zone_end_pfn); for (node_pfn = zone_start_pfn; node_pfn < zone_end_pfn; node_pfn++) { diff --git a/arch/i386/mm/extable.c b/arch/i386/mm/extable.c index de03c5430abc..0ce4f22a2635 100644 --- a/arch/i386/mm/extable.c +++ b/arch/i386/mm/extable.c @@ -11,7 +11,7 @@ int fixup_exception(struct pt_regs *regs) const struct exception_table_entry *fixup; #ifdef CONFIG_PNPBIOS - if (unlikely((regs->xcs & ~15) == (GDT_ENTRY_PNPBIOS_BASE << 3))) + if (unlikely(SEGMENT_IS_PNP_CODE(regs->xcs))) { extern u32 pnp_bios_fault_eip, pnp_bios_fault_esp; extern u32 pnp_bios_is_utter_crap; diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c index f7279468323a..5e17a3f43b41 100644 --- a/arch/i386/mm/fault.c +++ b/arch/i386/mm/fault.c @@ -27,21 +27,24 @@ #include <asm/uaccess.h> #include <asm/desc.h> #include <asm/kdebug.h> +#include <asm/segment.h> extern void die(const char *,struct pt_regs *,long); -#ifdef CONFIG_KPROBES -ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); +static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); + int register_page_fault_notifier(struct notifier_block *nb) { vmalloc_sync_all(); return atomic_notifier_chain_register(¬ify_page_fault_chain, nb); } +EXPORT_SYMBOL_GPL(register_page_fault_notifier); int unregister_page_fault_notifier(struct notifier_block *nb) { return atomic_notifier_chain_unregister(¬ify_page_fault_chain, nb); } +EXPORT_SYMBOL_GPL(unregister_page_fault_notifier); static inline int notify_page_fault(enum die_val val, const char *str, struct pt_regs *regs, long err, int trap, int sig) @@ -55,14 +58,6 @@ static inline int notify_page_fault(enum die_val val, const char *str, }; return atomic_notifier_call_chain(¬ify_page_fault_chain, val, &args); } -#else -static inline int notify_page_fault(enum die_val val, const char *str, - struct pt_regs *regs, long err, int trap, int sig) -{ - return NOTIFY_DONE; -} -#endif - /* * Unlock any spinlocks which will prevent us from getting the @@ -119,10 +114,10 @@ static inline unsigned long get_segment_eip(struct pt_regs *regs, } /* The standard kernel/user address space limit. */ - *eip_limit = (seg & 3) ? USER_DS.seg : KERNEL_DS.seg; + *eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg; /* By far the most common cases. */ - if (likely(seg == __USER_CS || seg == __KERNEL_CS)) + if (likely(SEGMENT_IS_FLAT_CODE(seg))) return eip; /* Check the segment exists, is within the current LDT/GDT size, @@ -436,11 +431,7 @@ good_area: write = 0; switch (error_code & 3) { default: /* 3: write, present */ -#ifdef TEST_VERIFY_AREA - if (regs->cs == KERNEL_CS) - printk("WP fault at %08lx\n", regs->eip); -#endif - /* fall through */ + /* fall through */ case 2: /* write, not present */ if (!(vma->vm_flags & VM_WRITE)) goto bad_area; diff --git a/arch/i386/mm/highmem.c b/arch/i386/mm/highmem.c index b6eb4dcb8777..ba44000b9069 100644 --- a/arch/i386/mm/highmem.c +++ b/arch/i386/mm/highmem.c @@ -54,7 +54,7 @@ void kunmap_atomic(void *kvaddr, enum km_type type) unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); - if (vaddr < FIXADDR_START) { // FIXME + if (vaddr >= PAGE_OFFSET && vaddr < (unsigned long)high_memory) { dec_preempt_count(); preempt_check_resched(); return; diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index 89e8486aac34..4a5a914b3432 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -435,16 +435,22 @@ u64 __supported_pte_mask __read_mostly = ~_PAGE_NX; * on Enable * off Disable */ -void __init noexec_setup(const char *str) +static int __init noexec_setup(char *str) { - if (!strncmp(str, "on",2) && cpu_has_nx) { - __supported_pte_mask |= _PAGE_NX; - disable_nx = 0; - } else if (!strncmp(str,"off",3)) { + if (!str || !strcmp(str, "on")) { + if (cpu_has_nx) { + __supported_pte_mask |= _PAGE_NX; + disable_nx = 0; + } + } else if (!strcmp(str,"off")) { disable_nx = 1; __supported_pte_mask &= ~_PAGE_NX; - } + } else + return -EINVAL; + + return 0; } +early_param("noexec", noexec_setup); int nx_enabled = 0; #ifdef CONFIG_X86_PAE @@ -552,18 +558,6 @@ static void __init test_wp_bit(void) } } -static void __init set_max_mapnr_init(void) -{ -#ifdef CONFIG_HIGHMEM - num_physpages = highend_pfn; -#else - num_physpages = max_low_pfn; -#endif -#ifdef CONFIG_FLATMEM - max_mapnr = num_physpages; -#endif -} - static struct kcore_list kcore_mem, kcore_vmalloc; void __init mem_init(void) @@ -590,14 +584,6 @@ void __init mem_init(void) } #endif - set_max_mapnr_init(); - -#ifdef CONFIG_HIGHMEM - high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; -#else - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; -#endif - /* this will put all low memory onto the freelists */ totalram_pages += free_all_bootmem(); @@ -629,6 +615,48 @@ void __init mem_init(void) (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) ); +#if 1 /* double-sanity-check paranoia */ + printk("virtual kernel memory layout:\n" + " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n" +#ifdef CONFIG_HIGHMEM + " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n" +#endif + " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n" + " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n" + " .init : 0x%08lx - 0x%08lx (%4ld kB)\n" + " .data : 0x%08lx - 0x%08lx (%4ld kB)\n" + " .text : 0x%08lx - 0x%08lx (%4ld kB)\n", + FIXADDR_START, FIXADDR_TOP, + (FIXADDR_TOP - FIXADDR_START) >> 10, + +#ifdef CONFIG_HIGHMEM + PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE, + (LAST_PKMAP*PAGE_SIZE) >> 10, +#endif + + VMALLOC_START, VMALLOC_END, + (VMALLOC_END - VMALLOC_START) >> 20, + + (unsigned long)__va(0), (unsigned long)high_memory, + ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20, + + (unsigned long)&__init_begin, (unsigned long)&__init_end, + ((unsigned long)&__init_end - (unsigned long)&__init_begin) >> 10, + + (unsigned long)&_etext, (unsigned long)&_edata, + ((unsigned long)&_edata - (unsigned long)&_etext) >> 10, + + (unsigned long)&_text, (unsigned long)&_etext, + ((unsigned long)&_etext - (unsigned long)&_text) >> 10); + +#ifdef CONFIG_HIGHMEM + BUG_ON(PKMAP_BASE+LAST_PKMAP*PAGE_SIZE > FIXADDR_START); + BUG_ON(VMALLOC_END > PKMAP_BASE); +#endif + BUG_ON(VMALLOC_START > VMALLOC_END); + BUG_ON((unsigned long)high_memory > VMALLOC_START); +#endif /* double-sanity-check paranoia */ + #ifdef CONFIG_X86_PAE if (!cpu_has_pae) panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!"); @@ -657,7 +685,7 @@ void __init mem_init(void) int arch_add_memory(int nid, u64 start, u64 size) { struct pglist_data *pgdata = &contig_page_data; - struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1; + struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM; unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c index bd98768d8764..10126e3f8174 100644 --- a/arch/i386/mm/pgtable.c +++ b/arch/i386/mm/pgtable.c @@ -12,6 +12,7 @@ #include <linux/slab.h> #include <linux/pagemap.h> #include <linux/spinlock.h> +#include <linux/module.h> #include <asm/system.h> #include <asm/pgtable.h> @@ -60,7 +61,9 @@ void show_mem(void) printk(KERN_INFO "%lu pages writeback\n", global_page_state(NR_WRITEBACK)); printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED)); - printk(KERN_INFO "%lu pages slab\n", global_page_state(NR_SLAB)); + printk(KERN_INFO "%lu pages slab\n", + global_page_state(NR_SLAB_RECLAIMABLE) + + global_page_state(NR_SLAB_UNRECLAIMABLE)); printk(KERN_INFO "%lu pages pagetables\n", global_page_state(NR_PAGETABLE)); } @@ -137,6 +140,12 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) __flush_tlb_one(vaddr); } +static int fixmaps; +#ifndef CONFIG_COMPAT_VDSO +unsigned long __FIXADDR_TOP = 0xfffff000; +EXPORT_SYMBOL(__FIXADDR_TOP); +#endif + void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags) { unsigned long address = __fix_to_virt(idx); @@ -146,6 +155,25 @@ void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags) return; } set_pte_pfn(address, phys >> PAGE_SHIFT, flags); + fixmaps++; +} + +/** + * reserve_top_address - reserves a hole in the top of kernel address space + * @reserve - size of hole to reserve + * + * Can be used to relocate the fixmap area and poke a hole in the top + * of kernel address space to make room for a hypervisor. + */ +void reserve_top_address(unsigned long reserve) +{ + BUG_ON(fixmaps > 0); +#ifdef CONFIG_COMPAT_VDSO + BUG_ON(reserve != 0); +#else + __FIXADDR_TOP = -reserve - PAGE_SIZE; + __VMALLOC_RESERVE += reserve; +#endif } pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) |