diff options
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r-- | arch/powerpc/mm/Makefile | 1 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_utils_64.c | 3 | ||||
-rw-r--r-- | arch/powerpc/mm/hugetlbpage-book3e.c | 121 | ||||
-rw-r--r-- | arch/powerpc/mm/hugetlbpage.c | 379 | ||||
-rw-r--r-- | arch/powerpc/mm/init_32.c | 9 | ||||
-rw-r--r-- | arch/powerpc/mm/mem.c | 5 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_context_nohash.c | 5 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable.c | 3 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_low_64e.S | 24 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_nohash.c | 46 |
10 files changed, 536 insertions, 60 deletions
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index bdca46e08382..991ee813d2a8 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -29,6 +29,7 @@ obj-$(CONFIG_PPC_MM_SLICES) += slice.o ifeq ($(CONFIG_HUGETLB_PAGE),y) obj-y += hugetlbpage.o obj-$(CONFIG_PPC_STD_MMU_64) += hugetlbpage-hash64.o +obj-$(CONFIG_PPC_BOOK3E_MMU) += hugetlbpage-book3e.o endif obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 26b2872b3d00..1f8b2a05e3d0 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -105,9 +105,6 @@ int mmu_kernel_ssize = MMU_SEGSIZE_256M; int mmu_highuser_ssize = MMU_SEGSIZE_256M; u16 mmu_slb_size = 64; EXPORT_SYMBOL_GPL(mmu_slb_size); -#ifdef CONFIG_HUGETLB_PAGE -unsigned int HPAGE_SHIFT; -#endif #ifdef CONFIG_PPC_64K_PAGES int mmu_ci_restrictions; #endif diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/hugetlbpage-book3e.c new file mode 100644 index 000000000000..1295b7c1cdac --- /dev/null +++ b/arch/powerpc/mm/hugetlbpage-book3e.c @@ -0,0 +1,121 @@ +/* + * PPC Huge TLB Page Support for Book3E MMU + * + * Copyright (C) 2009 David Gibson, IBM Corporation. + * Copyright (C) 2011 Becky Bruce, Freescale Semiconductor + * + */ +#include <linux/mm.h> +#include <linux/hugetlb.h> + +static inline int mmu_get_tsize(int psize) +{ + return mmu_psize_defs[psize].enc; +} + +static inline int book3e_tlb_exists(unsigned long ea, unsigned long pid) +{ + int found = 0; + + mtspr(SPRN_MAS6, pid << 16); + if (mmu_has_feature(MMU_FTR_USE_TLBRSRV)) { + asm volatile( + "li %0,0\n" + "tlbsx. 0,%1\n" + "bne 1f\n" + "li %0,1\n" + "1:\n" + : "=&r"(found) : "r"(ea)); + } else { + asm volatile( + "tlbsx 0,%1\n" + "mfspr %0,0x271\n" + "srwi %0,%0,31\n" + : "=&r"(found) : "r"(ea)); + } + + return found; +} + +void book3e_hugetlb_preload(struct mm_struct *mm, unsigned long ea, pte_t pte) +{ + unsigned long mas1, mas2; + u64 mas7_3; + unsigned long psize, tsize, shift; + unsigned long flags; + +#ifdef CONFIG_PPC_FSL_BOOK3E + int index, lz, ncams; + struct vm_area_struct *vma; +#endif + + if (unlikely(is_kernel_addr(ea))) + return; + +#ifdef CONFIG_MM_SLICES + psize = mmu_get_tsize(get_slice_psize(mm, ea)); + tsize = mmu_get_psize(psize); + shift = mmu_psize_defs[psize].shift; +#else + vma = find_vma(mm, ea); + psize = vma_mmu_pagesize(vma); /* returns actual size in bytes */ + asm (PPC_CNTLZL "%0,%1" : "=r" (lz) : "r" (psize)); + shift = 31 - lz; + tsize = 21 - lz; +#endif + + /* + * We can't be interrupted while we're setting up the MAS + * regusters or after we've confirmed that no tlb exists. + */ + local_irq_save(flags); + + if (unlikely(book3e_tlb_exists(ea, mm->context.id))) { + local_irq_restore(flags); + return; + } + +#ifdef CONFIG_PPC_FSL_BOOK3E + ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY; + + /* We have to use the CAM(TLB1) on FSL parts for hugepages */ + index = __get_cpu_var(next_tlbcam_idx); + mtspr(SPRN_MAS0, MAS0_ESEL(index) | MAS0_TLBSEL(1)); + + /* Just round-robin the entries and wrap when we hit the end */ + if (unlikely(index == ncams - 1)) + __get_cpu_var(next_tlbcam_idx) = tlbcam_index; + else + __get_cpu_var(next_tlbcam_idx)++; +#endif + mas1 = MAS1_VALID | MAS1_TID(mm->context.id) | MAS1_TSIZE(tsize); + mas2 = ea & ~((1UL << shift) - 1); + mas2 |= (pte_val(pte) >> PTE_WIMGE_SHIFT) & MAS2_WIMGE_MASK; + mas7_3 = (u64)pte_pfn(pte) << PAGE_SHIFT; + mas7_3 |= (pte_val(pte) >> PTE_BAP_SHIFT) & MAS3_BAP_MASK; + if (!pte_dirty(pte)) + mas7_3 &= ~(MAS3_SW|MAS3_UW); + + mtspr(SPRN_MAS1, mas1); + mtspr(SPRN_MAS2, mas2); + + if (mmu_has_feature(MMU_FTR_USE_PAIRED_MAS)) { + mtspr(SPRN_MAS7_MAS3, mas7_3); + } else { + mtspr(SPRN_MAS7, upper_32_bits(mas7_3)); + mtspr(SPRN_MAS3, lower_32_bits(mas7_3)); + } + + asm volatile ("tlbwe"); + + local_irq_restore(flags); +} + +void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr) +{ + struct hstate *hstate = hstate_file(vma->vm_file); + unsigned long tsize = huge_page_shift(hstate) - 10; + + __flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr, tsize, 0); + +} diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 0b9a5c1901b9..3a5f59dcbb33 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -1,7 +1,8 @@ /* - * PPC64 (POWER4) Huge TLB Page Support for Kernel. + * PPC Huge TLB Page Support for Kernel. * * Copyright (C) 2003 David Gibson, IBM Corporation. + * Copyright (C) 2011 Becky Bruce, Freescale Semiconductor * * Based on the IA-32 version: * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com> @@ -11,24 +12,39 @@ #include <linux/io.h> #include <linux/slab.h> #include <linux/hugetlb.h> +#include <linux/of_fdt.h> +#include <linux/memblock.h> +#include <linux/bootmem.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/tlb.h> +#include <asm/setup.h> #define PAGE_SHIFT_64K 16 #define PAGE_SHIFT_16M 24 #define PAGE_SHIFT_16G 34 -#define MAX_NUMBER_GPAGES 1024 +unsigned int HPAGE_SHIFT; -/* Tracks the 16G pages after the device tree is scanned and before the - * huge_boot_pages list is ready. */ -static unsigned long gpage_freearray[MAX_NUMBER_GPAGES]; +/* + * Tracks gpages after the device tree is scanned and before the + * huge_boot_pages list is ready. On 64-bit implementations, this is + * just used to track 16G pages and so is a single array. 32-bit + * implementations may have more than one gpage size due to limitations + * of the memory allocators, so we need multiple arrays + */ +#ifdef CONFIG_PPC64 +#define MAX_NUMBER_GPAGES 1024 +static u64 gpage_freearray[MAX_NUMBER_GPAGES]; static unsigned nr_gpages; - -/* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad() - * will choke on pointers to hugepte tables, which is handy for - * catching screwups early. */ +#else +#define MAX_NUMBER_GPAGES 128 +struct psize_gpages { + u64 gpage_list[MAX_NUMBER_GPAGES]; + unsigned int nr_gpages; +}; +static struct psize_gpages gpage_freearray[MMU_PAGE_COUNT]; +#endif static inline int shift_to_mmu_psize(unsigned int shift) { @@ -49,25 +65,6 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) #define hugepd_none(hpd) ((hpd).pd == 0) -static inline pte_t *hugepd_page(hugepd_t hpd) -{ - BUG_ON(!hugepd_ok(hpd)); - return (pte_t *)((hpd.pd & ~HUGEPD_SHIFT_MASK) | 0xc000000000000000); -} - -static inline unsigned int hugepd_shift(hugepd_t hpd) -{ - return hpd.pd & HUGEPD_SHIFT_MASK; -} - -static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, unsigned pdshift) -{ - unsigned long idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(*hpdp); - pte_t *dir = hugepd_page(*hpdp); - - return dir + idx; -} - pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift) { pgd_t *pg; @@ -93,7 +90,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift if (is_hugepd(pm)) hpdp = (hugepd_t *)pm; else if (!pmd_none(*pm)) { - return pte_offset_map(pm, ea); + return pte_offset_kernel(pm, ea); } } } @@ -114,8 +111,18 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, unsigned long address, unsigned pdshift, unsigned pshift) { - pte_t *new = kmem_cache_zalloc(PGT_CACHE(pdshift - pshift), - GFP_KERNEL|__GFP_REPEAT); + struct kmem_cache *cachep; + pte_t *new; + +#ifdef CONFIG_PPC64 + cachep = PGT_CACHE(pdshift - pshift); +#else + int i; + int num_hugepd = 1 << (pshift - pdshift); + cachep = hugepte_cache; +#endif + + new = kmem_cache_zalloc(cachep, GFP_KERNEL|__GFP_REPEAT); BUG_ON(pshift > HUGEPD_SHIFT_MASK); BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK); @@ -124,10 +131,31 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, return -ENOMEM; spin_lock(&mm->page_table_lock); +#ifdef CONFIG_PPC64 if (!hugepd_none(*hpdp)) - kmem_cache_free(PGT_CACHE(pdshift - pshift), new); + kmem_cache_free(cachep, new); else - hpdp->pd = ((unsigned long)new & ~0x8000000000000000) | pshift; + hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift; +#else + /* + * We have multiple higher-level entries that point to the same + * actual pte location. Fill in each as we go and backtrack on error. + * We need all of these so the DTLB pgtable walk code can find the + * right higher-level entry without knowing if it's a hugepage or not. + */ + for (i = 0; i < num_hugepd; i++, hpdp++) { + if (unlikely(!hugepd_none(*hpdp))) + break; + else + hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift; + } + /* If we bailed from the for loop early, an error occurred, clean up */ + if (i < num_hugepd) { + for (i = i - 1 ; i >= 0; i--, hpdp--) + hpdp->pd = 0; + kmem_cache_free(cachep, new); + } +#endif spin_unlock(&mm->page_table_lock); return 0; } @@ -169,11 +197,132 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz return hugepte_offset(hpdp, addr, pdshift); } +#ifdef CONFIG_PPC32 /* Build list of addresses of gigantic pages. This function is used in early * boot before the buddy or bootmem allocator is setup. */ -void add_gpage(unsigned long addr, unsigned long page_size, - unsigned long number_of_pages) +void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) +{ + unsigned int idx = shift_to_mmu_psize(__ffs(page_size)); + int i; + + if (addr == 0) + return; + + gpage_freearray[idx].nr_gpages = number_of_pages; + + for (i = 0; i < number_of_pages; i++) { + gpage_freearray[idx].gpage_list[i] = addr; + addr += page_size; + } +} + +/* + * Moves the gigantic page addresses from the temporary list to the + * huge_boot_pages list. + */ +int alloc_bootmem_huge_page(struct hstate *hstate) +{ + struct huge_bootmem_page *m; + int idx = shift_to_mmu_psize(hstate->order + PAGE_SHIFT); + int nr_gpages = gpage_freearray[idx].nr_gpages; + + if (nr_gpages == 0) + return 0; + +#ifdef CONFIG_HIGHMEM + /* + * If gpages can be in highmem we can't use the trick of storing the + * data structure in the page; allocate space for this + */ + m = alloc_bootmem(sizeof(struct huge_bootmem_page)); + m->phys = gpage_freearray[idx].gpage_list[--nr_gpages]; +#else + m = phys_to_virt(gpage_freearray[idx].gpage_list[--nr_gpages]); +#endif + + list_add(&m->list, &huge_boot_pages); + gpage_freearray[idx].nr_gpages = nr_gpages; + gpage_freearray[idx].gpage_list[nr_gpages] = 0; + m->hstate = hstate; + + return 1; +} +/* + * Scan the command line hugepagesz= options for gigantic pages; store those in + * a list that we use to allocate the memory once all options are parsed. + */ + +unsigned long gpage_npages[MMU_PAGE_COUNT]; + +static int __init do_gpage_early_setup(char *param, char *val) +{ + static phys_addr_t size; + unsigned long npages; + + /* + * The hugepagesz and hugepages cmdline options are interleaved. We + * use the size variable to keep track of whether or not this was done + * properly and skip over instances where it is incorrect. Other + * command-line parsing code will issue warnings, so we don't need to. + * + */ + if ((strcmp(param, "default_hugepagesz") == 0) || + (strcmp(param, "hugepagesz") == 0)) { + size = memparse(val, NULL); + } else if (strcmp(param, "hugepages") == 0) { + if (size != 0) { + if (sscanf(val, "%lu", &npages) <= 0) + npages = 0; + gpage_npages[shift_to_mmu_psize(__ffs(size))] = npages; + size = 0; + } + } + return 0; +} + + +/* + * This function allocates physical space for pages that are larger than the + * buddy allocator can handle. We want to allocate these in highmem because + * the amount of lowmem is limited. This means that this function MUST be + * called before lowmem_end_addr is set up in MMU_init() in order for the lmb + * allocate to grab highmem. + */ +void __init reserve_hugetlb_gpages(void) +{ + static __initdata char cmdline[COMMAND_LINE_SIZE]; + phys_addr_t size, base; + int i; + + strlcpy(cmdline, boot_command_line, COMMAND_LINE_SIZE); + parse_args("hugetlb gpages", cmdline, NULL, 0, &do_gpage_early_setup); + + /* + * Walk gpage list in reverse, allocating larger page sizes first. + * Skip over unsupported sizes, or sizes that have 0 gpages allocated. + * When we reach the point in the list where pages are no longer + * considered gpages, we're done. + */ + for (i = MMU_PAGE_COUNT-1; i >= 0; i--) { + if (mmu_psize_defs[i].shift == 0 || gpage_npages[i] == 0) + continue; + else if (mmu_psize_to_shift(i) < (MAX_ORDER + PAGE_SHIFT)) + break; + + size = (phys_addr_t)(1ULL << mmu_psize_to_shift(i)); + base = memblock_alloc_base(size * gpage_npages[i], size, + MEMBLOCK_ALLOC_ANYWHERE); + add_gpage(base, size, gpage_npages[i]); + } +} + +#else /* PPC64 */ + +/* Build list of addresses of gigantic pages. This function is used in early + * boot before the buddy or bootmem allocator is setup. + */ +void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) { if (!addr) return; @@ -199,19 +348,79 @@ int alloc_bootmem_huge_page(struct hstate *hstate) m->hstate = hstate; return 1; } +#endif int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) { return 0; } +#ifdef CONFIG_PPC32 +#define HUGEPD_FREELIST_SIZE \ + ((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t)) + +struct hugepd_freelist { + struct rcu_head rcu; + unsigned int index; + void *ptes[0]; +}; + +static DEFINE_PER_CPU(struct hugepd_freelist *, hugepd_freelist_cur); + +static void hugepd_free_rcu_callback(struct rcu_head *head) +{ + struct hugepd_freelist *batch = + container_of(head, struct hugepd_freelist, rcu); + unsigned int i; + + for (i = 0; i < batch->index; i++) + kmem_cache_free(hugepte_cache, batch->ptes[i]); + + free_page((unsigned long)batch); +} + +static void hugepd_free(struct mmu_gather *tlb, void *hugepte) +{ + struct hugepd_freelist **batchp; + + batchp = &__get_cpu_var(hugepd_freelist_cur); + + if (atomic_read(&tlb->mm->mm_users) < 2 || + cpumask_equal(mm_cpumask(tlb->mm), + cpumask_of(smp_processor_id()))) { + kmem_cache_free(hugepte_cache, hugepte); + return; + } + + if (*batchp == NULL) { + *batchp = (struct hugepd_freelist *)__get_free_page(GFP_ATOMIC); + (*batchp)->index = 0; + } + + (*batchp)->ptes[(*batchp)->index++] = hugepte; + if ((*batchp)->index == HUGEPD_FREELIST_SIZE) { + call_rcu_sched(&(*batchp)->rcu, hugepd_free_rcu_callback); + *batchp = NULL; + } +} +#endif + static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift, unsigned long start, unsigned long end, unsigned long floor, unsigned long ceiling) { pte_t *hugepte = hugepd_page(*hpdp); - unsigned shift = hugepd_shift(*hpdp); + int i; + unsigned long pdmask = ~((1UL << pdshift) - 1); + unsigned int num_hugepd = 1; + +#ifdef CONFIG_PPC64 + unsigned int shift = hugepd_shift(*hpdp); +#else + /* Note: On 32-bit the hpdp may be the first of several */ + num_hugepd = (1 << (hugepd_shift(*hpdp) - pdshift)); +#endif start &= pdmask; if (start < floor) @@ -224,9 +433,15 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif if (end - 1 > ceiling - 1) return; - hpdp->pd = 0; + for (i = 0; i < num_hugepd; i++, hpdp++) + hpdp->pd = 0; + tlb->need_flush = 1; +#ifdef CONFIG_PPC64 pgtable_free_tlb(tlb, hugepte, pdshift - shift); +#else + hugepd_free(tlb, hugepte); +#endif } static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, @@ -331,18 +546,27 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, * too. */ - pgd = pgd_offset(tlb->mm, addr); do { next = pgd_addr_end(addr, end); + pgd = pgd_offset(tlb->mm, addr); if (!is_hugepd(pgd)) { if (pgd_none_or_clear_bad(pgd)) continue; hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); } else { +#ifdef CONFIG_PPC32 + /* + * Increment next by the size of the huge mapping since + * on 32-bit there may be more than one entry at the pgd + * level for a single hugepage, but all of them point to + * the same kmem cache that holds the hugepte. + */ + next = addr + (1 << hugepd_shift(*(hugepd_t *)pgd)); +#endif free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT, addr, next, floor, ceiling); } - } while (pgd++, addr = next, addr != end); + } while (addr = next, addr != end); } struct page * @@ -466,17 +690,35 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { +#ifdef CONFIG_MM_SLICES struct hstate *hstate = hstate_file(file); int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0); +#else + return get_unmapped_area(file, addr, len, pgoff, flags); +#endif } unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) { +#ifdef CONFIG_MM_SLICES unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start); return 1UL << mmu_psize_to_shift(psize); +#else + if (!is_vm_hugetlb_page(vma)) + return PAGE_SIZE; + + return huge_page_size(hstate_vma(vma)); +#endif +} + +static inline bool is_power_of_4(unsigned long x) +{ + if (is_power_of_2(x)) + return (__ilog2(x) % 2) ? false : true; + return false; } static int __init add_huge_page_size(unsigned long long size) @@ -486,9 +728,14 @@ static int __init add_huge_page_size(unsigned long long size) /* Check that it is a page size supported by the hardware and * that it fits within pagetable and slice limits. */ +#ifdef CONFIG_PPC_FSL_BOOK3E + if ((size < PAGE_SIZE) || !is_power_of_4(size)) + return -EINVAL; +#else if (!is_power_of_2(size) || (shift > SLICE_HIGH_SHIFT) || (shift <= PAGE_SHIFT)) return -EINVAL; +#endif if ((mmu_psize = shift_to_mmu_psize(shift)) < 0) return -EINVAL; @@ -525,6 +772,46 @@ static int __init hugepage_setup_sz(char *str) } __setup("hugepagesz=", hugepage_setup_sz); +#ifdef CONFIG_FSL_BOOKE +struct kmem_cache *hugepte_cache; +static int __init hugetlbpage_init(void) +{ + int psize; + + for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { + unsigned shift; + + if (!mmu_psize_defs[psize].shift) + continue; + + shift = mmu_psize_to_shift(psize); + + /* Don't treat normal page sizes as huge... */ + if (shift != PAGE_SHIFT) + if (add_huge_page_size(1ULL << shift) < 0) + continue; + } + + /* + * Create a kmem cache for hugeptes. The bottom bits in the pte have + * size information encoded in them, so align them to allow this + */ + hugepte_cache = kmem_cache_create("hugepte-cache", sizeof(pte_t), + HUGEPD_SHIFT_MASK + 1, 0, NULL); + if (hugepte_cache == NULL) + panic("%s: Unable to create kmem cache for hugeptes\n", + __func__); + + /* Default hpage size = 4M */ + if (mmu_psize_defs[MMU_PAGE_4M].shift) + HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift; + else + panic("%s: Unable to set default huge page size\n", __func__); + + + return 0; +} +#else static int __init hugetlbpage_init(void) { int psize; @@ -567,15 +854,23 @@ static int __init hugetlbpage_init(void) return 0; } - +#endif module_init(hugetlbpage_init); void flush_dcache_icache_hugepage(struct page *page) { int i; + void *start; BUG_ON(!PageCompound(page)); - for (i = 0; i < (1UL << compound_order(page)); i++) - __flush_dcache_icache(page_address(page+i)); + for (i = 0; i < (1UL << compound_order(page)); i++) { + if (!PageHighMem(page)) { + __flush_dcache_icache(page_address(page+i)); + } else { + start = kmap_atomic(page+i, KM_PPC_SYNC_ICACHE); + __flush_dcache_icache(start); + kunmap_atomic(start, KM_PPC_SYNC_ICACHE); + } + } } diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index c77fef56dad6..161cefde5c15 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -32,6 +32,8 @@ #include <linux/pagemap.h> #include <linux/memblock.h> #include <linux/gfp.h> +#include <linux/slab.h> +#include <linux/hugetlb.h> #include <asm/pgalloc.h> #include <asm/prom.h> @@ -44,6 +46,7 @@ #include <asm/tlb.h> #include <asm/sections.h> #include <asm/system.h> +#include <asm/hugetlb.h> #include "mmu_decl.h" @@ -123,6 +126,12 @@ void __init MMU_init(void) /* parse args from command line */ MMU_setup(); + /* + * Reserve gigantic pages for hugetlb. This MUST occur before + * lowmem_end_addr is initialized below. + */ + reserve_hugetlb_gpages(); + if (memblock.memory.cnt > 1) { #ifndef CONFIG_WII memblock.memory.cnt = 1; diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index c781bbcf7338..ad9cf49dfb89 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -548,4 +548,9 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, return; hash_preload(vma->vm_mm, address, access, trap); #endif /* CONFIG_PPC_STD_MMU */ +#if (defined(CONFIG_PPC_BOOK3E_64) || defined(CONFIG_PPC_FSL_BOOK3E)) \ + && defined(CONFIG_HUGETLB_PAGE) + if (is_vm_hugetlb_page(vma)) + book3e_hugetlb_preload(vma->vm_mm, address, *ptep); +#endif } diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index 336807de550e..5b63bd3da4a9 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -292,6 +292,11 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm) mm->context.id = MMU_NO_CONTEXT; mm->context.active = 0; +#ifdef CONFIG_PPC_MM_SLICES + if (slice_mm_new_context(mm)) + slice_set_user_psize(mm, mmu_virtual_psize); +#endif + return 0; } diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index af40c8768a78..214130a4edc6 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -27,6 +27,7 @@ #include <linux/init.h> #include <linux/percpu.h> #include <linux/hardirq.h> +#include <linux/hugetlb.h> #include <asm/pgalloc.h> #include <asm/tlbflush.h> #include <asm/tlb.h> @@ -212,7 +213,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, entry = set_access_flags_filter(entry, vma, dirty); changed = !pte_same(*(ptep), entry); if (changed) { - if (!(vma->vm_flags & VM_HUGETLB)) + if (!is_vm_hugetlb_page(vma)) assert_pte_locked(vma->vm_mm, address); __ptep_set_access_flags(ptep, entry); flush_tlb_page_nohash(vma, address); diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S index 4ebb34bc01d6..dc4a5f385e41 100644 --- a/arch/powerpc/mm/tlb_low_64e.S +++ b/arch/powerpc/mm/tlb_low_64e.S @@ -553,24 +553,24 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV) rldicl r11,r16,64-VPTE_PGD_SHIFT,64-PGD_INDEX_SIZE-3 clrrdi r10,r11,3 ldx r15,r10,r15 - cmpldi cr0,r15,0 - beq virt_page_table_tlb_miss_fault + cmpdi cr0,r15,0 + bge virt_page_table_tlb_miss_fault #ifndef CONFIG_PPC_64K_PAGES /* Get to PUD entry */ rldicl r11,r16,64-VPTE_PUD_SHIFT,64-PUD_INDEX_SIZE-3 clrrdi r10,r11,3 ldx r15,r10,r15 - cmpldi cr0,r15,0 - beq virt_page_table_tlb_miss_fault + cmpdi cr0,r15,0 + bge virt_page_table_tlb_miss_fault #endif /* CONFIG_PPC_64K_PAGES */ /* Get to PMD entry */ rldicl r11,r16,64-VPTE_PMD_SHIFT,64-PMD_INDEX_SIZE-3 clrrdi r10,r11,3 ldx r15,r10,r15 - cmpldi cr0,r15,0 - beq virt_page_table_tlb_miss_fault + cmpdi cr0,r15,0 + bge virt_page_table_tlb_miss_fault /* Ok, we're all right, we can now create a kernel translation for * a 4K or 64K page from r16 -> r15. @@ -802,24 +802,24 @@ htw_tlb_miss: rldicl r11,r16,64-(PGDIR_SHIFT-3),64-PGD_INDEX_SIZE-3 clrrdi r10,r11,3 ldx r15,r10,r15 - cmpldi cr0,r15,0 - beq htw_tlb_miss_fault + cmpdi cr0,r15,0 + bge htw_tlb_miss_fault #ifndef CONFIG_PPC_64K_PAGES /* Get to PUD entry */ rldicl r11,r16,64-(PUD_SHIFT-3),64-PUD_INDEX_SIZE-3 clrrdi r10,r11,3 ldx r15,r10,r15 - cmpldi cr0,r15,0 - beq htw_tlb_miss_fault + cmpdi cr0,r15,0 + bge htw_tlb_miss_fault #endif /* CONFIG_PPC_64K_PAGES */ /* Get to PMD entry */ rldicl r11,r16,64-(PMD_SHIFT-3),64-PMD_INDEX_SIZE-3 clrrdi r10,r11,3 ldx r15,r10,r15 - cmpldi cr0,r15,0 - beq htw_tlb_miss_fault + cmpdi cr0,r15,0 + bge htw_tlb_miss_fault /* Ok, we're all right, we can now create an indirect entry for * a 1M or 256M page. diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index d32ec643c231..afc95c7304ae 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -36,14 +36,49 @@ #include <linux/spinlock.h> #include <linux/memblock.h> #include <linux/of_fdt.h> +#include <linux/hugetlb.h> #include <asm/tlbflush.h> #include <asm/tlb.h> #include <asm/code-patching.h> +#include <asm/hugetlb.h> #include "mmu_decl.h" -#ifdef CONFIG_PPC_BOOK3E +/* + * This struct lists the sw-supported page sizes. The hardawre MMU may support + * other sizes not listed here. The .ind field is only used on MMUs that have + * indirect page table entries. + */ +#ifdef CONFIG_PPC_BOOK3E_MMU +#ifdef CONFIG_FSL_BOOKE +struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { + [MMU_PAGE_4K] = { + .shift = 12, + .enc = BOOK3E_PAGESZ_4K, + }, + [MMU_PAGE_4M] = { + .shift = 22, + .enc = BOOK3E_PAGESZ_4M, + }, + [MMU_PAGE_16M] = { + .shift = 24, + .enc = BOOK3E_PAGESZ_16M, + }, + [MMU_PAGE_64M] = { + .shift = 26, + .enc = BOOK3E_PAGESZ_64M, + }, + [MMU_PAGE_256M] = { + .shift = 28, + .enc = BOOK3E_PAGESZ_256M, + }, + [MMU_PAGE_1G] = { + .shift = 30, + .enc = BOOK3E_PAGESZ_1GB, + }, +}; +#else struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { [MMU_PAGE_4K] = { .shift = 12, @@ -77,6 +112,8 @@ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { .enc = BOOK3E_PAGESZ_1GB, }, }; +#endif /* CONFIG_FSL_BOOKE */ + static inline int mmu_get_tsize(int psize) { return mmu_psize_defs[psize].enc; @@ -87,7 +124,7 @@ static inline int mmu_get_tsize(int psize) /* This isn't used on !Book3E for now */ return 0; } -#endif +#endif /* CONFIG_PPC_BOOK3E_MMU */ /* The variables below are currently only used on 64-bit Book3E * though this will probably be made common with other nohash @@ -266,6 +303,11 @@ void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) { +#ifdef CONFIG_HUGETLB_PAGE + if (is_vm_hugetlb_page(vma)) + flush_hugetlb_page(vma, vmaddr); +#endif + __flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr, mmu_get_tsize(mmu_virtual_psize), 0); } |