diff options
author | Dave Airlie <airlied@redhat.com> | 2015-04-20 11:32:26 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2015-04-20 13:05:20 +1000 |
commit | 2c33ce009ca2389dbf0535d0672214d09738e35e (patch) | |
tree | 6186a6458c3c160385d794a23eaf07c786a9e61b /arch/x86/mm | |
parent | cec32a47010647e8b0603726ebb75b990a4057a4 (diff) | |
parent | 09d51602cf84a1264946711dd4ea0dddbac599a1 (diff) | |
download | blackbird-op-linux-2c33ce009ca2389dbf0535d0672214d09738e35e.tar.gz blackbird-op-linux-2c33ce009ca2389dbf0535d0672214d09738e35e.zip |
Merge Linus master into drm-next
The merge is clean, but the arm build fails afterwards,
due to API changes in the regulator tree.
I've included the patch into the merge to fix the build.
Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/mm/fault.c | 8 | ||||
-rw-r--r-- | arch/x86/mm/init.c | 69 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 14 | ||||
-rw-r--r-- | arch/x86/mm/ioremap.c | 23 | ||||
-rw-r--r-- | arch/x86/mm/memtest.c | 118 | ||||
-rw-r--r-- | arch/x86/mm/mmap.c | 38 | ||||
-rw-r--r-- | arch/x86/mm/numa.c | 11 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 4 | ||||
-rw-r--r-- | arch/x86/mm/pat.c | 6 | ||||
-rw-r--r-- | arch/x86/mm/pgtable.c | 160 |
11 files changed, 240 insertions, 213 deletions
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index c4cc74006c61..a482d105172b 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -32,6 +32,4 @@ obj-$(CONFIG_AMD_NUMA) += amdtopology.o obj-$(CONFIG_ACPI_NUMA) += srat.o obj-$(CONFIG_NUMA_EMU) += numa_emulation.o -obj-$(CONFIG_MEMTEST) += memtest.o - obj-$(CONFIG_X86_INTEL_MPX) += mpx.o diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index ede025fb46f1..181c53bac3a7 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -59,7 +59,7 @@ static nokprobe_inline int kprobes_fault(struct pt_regs *regs) int ret = 0; /* kprobe_running() needs smp_processor_id() */ - if (kprobes_built_in() && !user_mode_vm(regs)) { + if (kprobes_built_in() && !user_mode(regs)) { preempt_disable(); if (kprobe_running() && kprobe_fault_handler(regs, 14)) ret = 1; @@ -148,7 +148,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) instr = (void *)convert_ip_to_linear(current, regs); max_instr = instr + 15; - if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE) + if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE_MAX) return 0; while (instr < max_instr) { @@ -1035,7 +1035,7 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs) if (error_code & PF_USER) return false; - if (!user_mode_vm(regs) && (regs->flags & X86_EFLAGS_AC)) + if (!user_mode(regs) && (regs->flags & X86_EFLAGS_AC)) return false; return true; @@ -1140,7 +1140,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, * User-mode registers count as a user access even for any * potential system fault or CPU buglet: */ - if (user_mode_vm(regs)) { + if (user_mode(regs)) { local_irq_enable(); error_code |= PF_USER; flags |= FAULT_FLAG_USER; diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index a110efca6d06..1d553186c434 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -29,29 +29,33 @@ /* * Tables translating between page_cache_type_t and pte encoding. - * Minimal supported modes are defined statically, modified if more supported - * cache modes are available. - * Index into __cachemode2pte_tbl is the cachemode. - * Index into __pte2cachemode_tbl are the caching attribute bits of the pte - * (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT) at index bit positions 0, 1, 2. + * + * Minimal supported modes are defined statically, they are modified + * during bootup if more supported cache modes are available. + * + * Index into __cachemode2pte_tbl[] is the cachemode. + * + * Index into __pte2cachemode_tbl[] are the caching attribute bits of the pte + * (_PAGE_PWT, _PAGE_PCD, _PAGE_PAT) at index bit positions 0, 1, 2. */ uint16_t __cachemode2pte_tbl[_PAGE_CACHE_MODE_NUM] = { - [_PAGE_CACHE_MODE_WB] = 0, - [_PAGE_CACHE_MODE_WC] = _PAGE_PWT, - [_PAGE_CACHE_MODE_UC_MINUS] = _PAGE_PCD, - [_PAGE_CACHE_MODE_UC] = _PAGE_PCD | _PAGE_PWT, - [_PAGE_CACHE_MODE_WT] = _PAGE_PCD, - [_PAGE_CACHE_MODE_WP] = _PAGE_PCD, + [_PAGE_CACHE_MODE_WB ] = 0 | 0 , + [_PAGE_CACHE_MODE_WC ] = _PAGE_PWT | 0 , + [_PAGE_CACHE_MODE_UC_MINUS] = 0 | _PAGE_PCD, + [_PAGE_CACHE_MODE_UC ] = _PAGE_PWT | _PAGE_PCD, + [_PAGE_CACHE_MODE_WT ] = 0 | _PAGE_PCD, + [_PAGE_CACHE_MODE_WP ] = 0 | _PAGE_PCD, }; EXPORT_SYMBOL(__cachemode2pte_tbl); + uint8_t __pte2cachemode_tbl[8] = { - [__pte2cm_idx(0)] = _PAGE_CACHE_MODE_WB, - [__pte2cm_idx(_PAGE_PWT)] = _PAGE_CACHE_MODE_WC, - [__pte2cm_idx(_PAGE_PCD)] = _PAGE_CACHE_MODE_UC_MINUS, - [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD)] = _PAGE_CACHE_MODE_UC, - [__pte2cm_idx(_PAGE_PAT)] = _PAGE_CACHE_MODE_WB, - [__pte2cm_idx(_PAGE_PWT | _PAGE_PAT)] = _PAGE_CACHE_MODE_WC, - [__pte2cm_idx(_PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS, + [__pte2cm_idx( 0 | 0 | 0 )] = _PAGE_CACHE_MODE_WB, + [__pte2cm_idx(_PAGE_PWT | 0 | 0 )] = _PAGE_CACHE_MODE_WC, + [__pte2cm_idx( 0 | _PAGE_PCD | 0 )] = _PAGE_CACHE_MODE_UC_MINUS, + [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | 0 )] = _PAGE_CACHE_MODE_UC, + [__pte2cm_idx( 0 | 0 | _PAGE_PAT)] = _PAGE_CACHE_MODE_WB, + [__pte2cm_idx(_PAGE_PWT | 0 | _PAGE_PAT)] = _PAGE_CACHE_MODE_WC, + [__pte2cm_idx(0 | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC_MINUS, [__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC, }; EXPORT_SYMBOL(__pte2cachemode_tbl); @@ -131,21 +135,7 @@ void __init early_alloc_pgt_buf(void) int after_bootmem; -int direct_gbpages -#ifdef CONFIG_DIRECT_GBPAGES - = 1 -#endif -; - -static void __init init_gbpages(void) -{ -#ifdef CONFIG_X86_64 - if (direct_gbpages && cpu_has_gbpages) - printk(KERN_INFO "Using GB pages for direct mapping\n"); - else - direct_gbpages = 0; -#endif -} +early_param_on_off("gbpages", "nogbpages", direct_gbpages, CONFIG_X86_DIRECT_GBPAGES); struct map_range { unsigned long start; @@ -157,16 +147,12 @@ static int page_size_mask; static void __init probe_page_size_mask(void) { - init_gbpages(); - #if !defined(CONFIG_DEBUG_PAGEALLOC) && !defined(CONFIG_KMEMCHECK) /* * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. * This will simplify cpa(), which otherwise needs to support splitting * large pages into small in interrupt context, etc. */ - if (direct_gbpages) - page_size_mask |= 1 << PG_LEVEL_1G; if (cpu_has_pse) page_size_mask |= 1 << PG_LEVEL_2M; #endif @@ -179,6 +165,15 @@ static void __init probe_page_size_mask(void) if (cpu_has_pge) { cr4_set_bits_and_update_boot(X86_CR4_PGE); __supported_pte_mask |= _PAGE_GLOBAL; + } else + __supported_pte_mask &= ~_PAGE_GLOBAL; + + /* Enable 1 GB linear kernel mappings if available: */ + if (direct_gbpages && cpu_has_gbpages) { + printk(KERN_INFO "Using GB pages for direct mapping\n"); + page_size_mask |= 1 << PG_LEVEL_1G; + } else { + direct_gbpages = 0; } } diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 30eb05ae7061..3fba623e3ba5 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -130,20 +130,6 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page, return 0; } -static int __init parse_direct_gbpages_off(char *arg) -{ - direct_gbpages = 0; - return 0; -} -early_param("nogbpages", parse_direct_gbpages_off); - -static int __init parse_direct_gbpages_on(char *arg) -{ - direct_gbpages = 1; - return 0; -} -early_param("gbpages", parse_direct_gbpages_on); - /* * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the * physical space so we can cache the place of the first one and move diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index fdf617c00e2f..5ead4d6cf3a7 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -67,8 +67,13 @@ static int __ioremap_check_ram(unsigned long start_pfn, unsigned long nr_pages, /* * Remap an arbitrary physical address space into the kernel virtual - * address space. Needed when the kernel wants to access high addresses - * directly. + * address space. It transparently creates kernel huge I/O mapping when + * the physical address is aligned by a huge page size (1GB or 2MB) and + * the requested size is at least the huge page size. + * + * NOTE: MTRRs can override PAT memory types with a 4KB granularity. + * Therefore, the mapping code falls back to use a smaller page toward 4KB + * when a mapping range is covered by non-WB type of MTRRs. * * NOTE! We need to allow non-page-aligned mappings too: we will obviously * have to convert them into an offset in a page-aligned mapping, but the @@ -326,6 +331,20 @@ void iounmap(volatile void __iomem *addr) } EXPORT_SYMBOL(iounmap); +int arch_ioremap_pud_supported(void) +{ +#ifdef CONFIG_X86_64 + return cpu_has_gbpages; +#else + return 0; +#endif +} + +int arch_ioremap_pmd_supported(void) +{ + return cpu_has_pse; +} + /* * Convert a physical pointer to a virtual kernel pointer for /dev/mem * access diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c deleted file mode 100644 index 1e9da795767a..000000000000 --- a/arch/x86/mm/memtest.c +++ /dev/null @@ -1,118 +0,0 @@ -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/string.h> -#include <linux/types.h> -#include <linux/mm.h> -#include <linux/smp.h> -#include <linux/init.h> -#include <linux/pfn.h> -#include <linux/memblock.h> - -static u64 patterns[] __initdata = { - /* The first entry has to be 0 to leave memtest with zeroed memory */ - 0, - 0xffffffffffffffffULL, - 0x5555555555555555ULL, - 0xaaaaaaaaaaaaaaaaULL, - 0x1111111111111111ULL, - 0x2222222222222222ULL, - 0x4444444444444444ULL, - 0x8888888888888888ULL, - 0x3333333333333333ULL, - 0x6666666666666666ULL, - 0x9999999999999999ULL, - 0xccccccccccccccccULL, - 0x7777777777777777ULL, - 0xbbbbbbbbbbbbbbbbULL, - 0xddddddddddddddddULL, - 0xeeeeeeeeeeeeeeeeULL, - 0x7a6c7258554e494cULL, /* yeah ;-) */ -}; - -static void __init reserve_bad_mem(u64 pattern, u64 start_bad, u64 end_bad) -{ - printk(KERN_INFO " %016llx bad mem addr %010llx - %010llx reserved\n", - (unsigned long long) pattern, - (unsigned long long) start_bad, - (unsigned long long) end_bad); - memblock_reserve(start_bad, end_bad - start_bad); -} - -static void __init memtest(u64 pattern, u64 start_phys, u64 size) -{ - u64 *p, *start, *end; - u64 start_bad, last_bad; - u64 start_phys_aligned; - const size_t incr = sizeof(pattern); - - start_phys_aligned = ALIGN(start_phys, incr); - start = __va(start_phys_aligned); - end = start + (size - (start_phys_aligned - start_phys)) / incr; - start_bad = 0; - last_bad = 0; - - for (p = start; p < end; p++) - *p = pattern; - - for (p = start; p < end; p++, start_phys_aligned += incr) { - if (*p == pattern) - continue; - if (start_phys_aligned == last_bad + incr) { - last_bad += incr; - continue; - } - if (start_bad) - reserve_bad_mem(pattern, start_bad, last_bad + incr); - start_bad = last_bad = start_phys_aligned; - } - if (start_bad) - reserve_bad_mem(pattern, start_bad, last_bad + incr); -} - -static void __init do_one_pass(u64 pattern, u64 start, u64 end) -{ - u64 i; - phys_addr_t this_start, this_end; - - for_each_free_mem_range(i, NUMA_NO_NODE, &this_start, &this_end, NULL) { - this_start = clamp_t(phys_addr_t, this_start, start, end); - this_end = clamp_t(phys_addr_t, this_end, start, end); - if (this_start < this_end) { - printk(KERN_INFO " %010llx - %010llx pattern %016llx\n", - (unsigned long long)this_start, - (unsigned long long)this_end, - (unsigned long long)cpu_to_be64(pattern)); - memtest(pattern, this_start, this_end - this_start); - } - } -} - -/* default is disabled */ -static int memtest_pattern __initdata; - -static int __init parse_memtest(char *arg) -{ - if (arg) - memtest_pattern = simple_strtoul(arg, NULL, 0); - else - memtest_pattern = ARRAY_SIZE(patterns); - - return 0; -} - -early_param("memtest", parse_memtest); - -void __init early_memtest(unsigned long start, unsigned long end) -{ - unsigned int i; - unsigned int idx = 0; - - if (!memtest_pattern) - return; - - printk(KERN_INFO "early_memtest: # of tests: %d\n", memtest_pattern); - for (i = memtest_pattern-1; i < UINT_MAX; --i) { - idx = i % ARRAY_SIZE(patterns); - do_one_pass(patterns[idx], start, end); - } -} diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index df4552bd239e..9d518d693b4b 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -65,24 +65,23 @@ static int mmap_is_legacy(void) return sysctl_legacy_va_layout; } -static unsigned long mmap_rnd(void) +unsigned long arch_mmap_rnd(void) { - unsigned long rnd = 0; + unsigned long rnd; /* - * 8 bits of randomness in 32bit mmaps, 20 address space bits - * 28 bits of randomness in 64bit mmaps, 40 address space bits - */ - if (current->flags & PF_RANDOMIZE) { - if (mmap_is_ia32()) - rnd = get_random_int() % (1<<8); - else - rnd = get_random_int() % (1<<28); - } + * 8 bits of randomness in 32bit mmaps, 20 address space bits + * 28 bits of randomness in 64bit mmaps, 40 address space bits + */ + if (mmap_is_ia32()) + rnd = (unsigned long)get_random_int() % (1<<8); + else + rnd = (unsigned long)get_random_int() % (1<<28); + return rnd << PAGE_SHIFT; } -static unsigned long mmap_base(void) +static unsigned long mmap_base(unsigned long rnd) { unsigned long gap = rlimit(RLIMIT_STACK); @@ -91,19 +90,19 @@ static unsigned long mmap_base(void) else if (gap > MAX_GAP) gap = MAX_GAP; - return PAGE_ALIGN(TASK_SIZE - gap - mmap_rnd()); + return PAGE_ALIGN(TASK_SIZE - gap - rnd); } /* * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64 * does, but not when emulating X86_32 */ -static unsigned long mmap_legacy_base(void) +static unsigned long mmap_legacy_base(unsigned long rnd) { if (mmap_is_ia32()) return TASK_UNMAPPED_BASE; else - return TASK_UNMAPPED_BASE + mmap_rnd(); + return TASK_UNMAPPED_BASE + rnd; } /* @@ -112,13 +111,18 @@ static unsigned long mmap_legacy_base(void) */ void arch_pick_mmap_layout(struct mm_struct *mm) { - mm->mmap_legacy_base = mmap_legacy_base(); - mm->mmap_base = mmap_base(); + unsigned long random_factor = 0UL; + + if (current->flags & PF_RANDOMIZE) + random_factor = arch_mmap_rnd(); + + mm->mmap_legacy_base = mmap_legacy_base(random_factor); if (mmap_is_legacy()) { mm->mmap_base = mm->mmap_legacy_base; mm->get_unmapped_area = arch_get_unmapped_area; } else { + mm->mmap_base = mmap_base(random_factor); mm->get_unmapped_area = arch_get_unmapped_area_topdown; } } diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index cd4785bbacb9..4053bb58bf92 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -482,9 +482,16 @@ static void __init numa_clear_kernel_node_hotplug(void) &memblock.reserved, mb->nid); } - /* Mark all kernel nodes. */ + /* + * Mark all kernel nodes. + * + * When booting with mem=nn[kMG] or in a kdump kernel, numa_meminfo + * may not include all the memblock.reserved memory ranges because + * trim_snb_memory() reserves specific pages for Sandy Bridge graphics. + */ for_each_memblock(reserved, r) - node_set(r->nid, numa_kernel_nodes); + if (r->nid != MAX_NUMNODES) + node_set(r->nid, numa_kernel_nodes); /* Clear MEMBLOCK_HOTPLUG flag for memory in kernel nodes. */ for (i = 0; i < numa_meminfo.nr_blks; i++) { diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 536ea2fb6e33..89af288ec674 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -81,11 +81,9 @@ void arch_report_meminfo(struct seq_file *m) seq_printf(m, "DirectMap4M: %8lu kB\n", direct_pages_count[PG_LEVEL_2M] << 12); #endif -#ifdef CONFIG_X86_64 if (direct_gbpages) seq_printf(m, "DirectMap1G: %8lu kB\n", direct_pages_count[PG_LEVEL_1G] << 20); -#endif } #else static inline void split_page_count(int level) { } @@ -1654,13 +1652,11 @@ int set_memory_ro(unsigned long addr, int numpages) { return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0); } -EXPORT_SYMBOL_GPL(set_memory_ro); int set_memory_rw(unsigned long addr, int numpages) { return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0); } -EXPORT_SYMBOL_GPL(set_memory_rw); int set_memory_np(unsigned long addr, int numpages) { diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 7ac68698406c..35af6771a95a 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -610,7 +610,7 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, } #ifdef CONFIG_STRICT_DEVMEM -/* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM*/ +/* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM */ static inline int range_is_allowed(unsigned long pfn, unsigned long size) { return 1; @@ -628,8 +628,8 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size) while (cursor < to) { if (!devmem_is_allowed(pfn)) { - printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx]\n", - current->comm, from, to - 1); + printk(KERN_INFO "Program %s tried to access /dev/mem between [mem %#010Lx-%#010Lx], PAT prevents it\n", + current->comm, from, to - 1); return 0; } cursor += PAGE_SIZE; diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 7b22adaad4f1..0b97d2c75df3 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -4,6 +4,7 @@ #include <asm/pgtable.h> #include <asm/tlb.h> #include <asm/fixmap.h> +#include <asm/mtrr.h> #define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO @@ -58,7 +59,7 @@ void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte) tlb_remove_page(tlb, pte); } -#if PAGETABLE_LEVELS > 2 +#if CONFIG_PGTABLE_LEVELS > 2 void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) { struct page *page = virt_to_page(pmd); @@ -74,14 +75,14 @@ void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd) tlb_remove_page(tlb, page); } -#if PAGETABLE_LEVELS > 3 +#if CONFIG_PGTABLE_LEVELS > 3 void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud) { paravirt_release_pud(__pa(pud) >> PAGE_SHIFT); tlb_remove_page(tlb, virt_to_page(pud)); } -#endif /* PAGETABLE_LEVELS > 3 */ -#endif /* PAGETABLE_LEVELS > 2 */ +#endif /* CONFIG_PGTABLE_LEVELS > 3 */ +#endif /* CONFIG_PGTABLE_LEVELS > 2 */ static inline void pgd_list_add(pgd_t *pgd) { @@ -117,9 +118,9 @@ static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd) /* If the pgd points to a shared pagetable level (either the ptes in non-PAE, or shared PMD in PAE), then just copy the references from swapper_pg_dir. */ - if (PAGETABLE_LEVELS == 2 || - (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD) || - PAGETABLE_LEVELS == 4) { + if (CONFIG_PGTABLE_LEVELS == 2 || + (CONFIG_PGTABLE_LEVELS == 3 && SHARED_KERNEL_PMD) || + CONFIG_PGTABLE_LEVELS == 4) { clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY, swapper_pg_dir + KERNEL_PGD_BOUNDARY, KERNEL_PGD_PTRS); @@ -275,12 +276,87 @@ static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[]) } } +/* + * Xen paravirt assumes pgd table should be in one page. 64 bit kernel also + * assumes that pgd should be in one page. + * + * But kernel with PAE paging that is not running as a Xen domain + * only needs to allocate 32 bytes for pgd instead of one page. + */ +#ifdef CONFIG_X86_PAE + +#include <linux/slab.h> + +#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) +#define PGD_ALIGN 32 + +static struct kmem_cache *pgd_cache; + +static int __init pgd_cache_init(void) +{ + /* + * When PAE kernel is running as a Xen domain, it does not use + * shared kernel pmd. And this requires a whole page for pgd. + */ + if (!SHARED_KERNEL_PMD) + return 0; + + /* + * when PAE kernel is not running as a Xen domain, it uses + * shared kernel pmd. Shared kernel pmd does not require a whole + * page for pgd. We are able to just allocate a 32-byte for pgd. + * During boot time, we create a 32-byte slab for pgd table allocation. + */ + pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_ALIGN, + SLAB_PANIC, NULL); + if (!pgd_cache) + return -ENOMEM; + + return 0; +} +core_initcall(pgd_cache_init); + +static inline pgd_t *_pgd_alloc(void) +{ + /* + * If no SHARED_KERNEL_PMD, PAE kernel is running as a Xen domain. + * We allocate one page for pgd. + */ + if (!SHARED_KERNEL_PMD) + return (pgd_t *)__get_free_page(PGALLOC_GFP); + + /* + * Now PAE kernel is not running as a Xen domain. We can allocate + * a 32-byte slab for pgd to save memory space. + */ + return kmem_cache_alloc(pgd_cache, PGALLOC_GFP); +} + +static inline void _pgd_free(pgd_t *pgd) +{ + if (!SHARED_KERNEL_PMD) + free_page((unsigned long)pgd); + else + kmem_cache_free(pgd_cache, pgd); +} +#else +static inline pgd_t *_pgd_alloc(void) +{ + return (pgd_t *)__get_free_page(PGALLOC_GFP); +} + +static inline void _pgd_free(pgd_t *pgd) +{ + free_page((unsigned long)pgd); +} +#endif /* CONFIG_X86_PAE */ + pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *pgd; pmd_t *pmds[PREALLOCATED_PMDS]; - pgd = (pgd_t *)__get_free_page(PGALLOC_GFP); + pgd = _pgd_alloc(); if (pgd == NULL) goto out; @@ -310,7 +386,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) out_free_pmds: free_pmds(mm, pmds); out_free_pgd: - free_page((unsigned long)pgd); + _pgd_free(pgd); out: return NULL; } @@ -320,7 +396,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd) pgd_mop_up_pmds(mm, pgd); pgd_dtor(pgd); paravirt_pgd_free(mm, pgd); - free_page((unsigned long)pgd); + _pgd_free(pgd); } /* @@ -485,3 +561,67 @@ void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys, { __native_set_fixmap(idx, pfn_pte(phys >> PAGE_SHIFT, flags)); } + +#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP +int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) +{ + u8 mtrr; + + /* + * Do not use a huge page when the range is covered by non-WB type + * of MTRRs. + */ + mtrr = mtrr_type_lookup(addr, addr + PUD_SIZE); + if ((mtrr != MTRR_TYPE_WRBACK) && (mtrr != 0xFF)) + return 0; + + prot = pgprot_4k_2_large(prot); + + set_pte((pte_t *)pud, pfn_pte( + (u64)addr >> PAGE_SHIFT, + __pgprot(pgprot_val(prot) | _PAGE_PSE))); + + return 1; +} + +int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) +{ + u8 mtrr; + + /* + * Do not use a huge page when the range is covered by non-WB type + * of MTRRs. + */ + mtrr = mtrr_type_lookup(addr, addr + PMD_SIZE); + if ((mtrr != MTRR_TYPE_WRBACK) && (mtrr != 0xFF)) + return 0; + + prot = pgprot_4k_2_large(prot); + + set_pte((pte_t *)pmd, pfn_pte( + (u64)addr >> PAGE_SHIFT, + __pgprot(pgprot_val(prot) | _PAGE_PSE))); + + return 1; +} + +int pud_clear_huge(pud_t *pud) +{ + if (pud_large(*pud)) { + pud_clear(pud); + return 1; + } + + return 0; +} + +int pmd_clear_huge(pmd_t *pmd) +{ + if (pmd_large(*pmd)) { + pmd_clear(pmd); + return 1; + } + + return 0; +} +#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ |