diff options
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r-- | arch/powerpc/mm/44x_mmu.c | 29 | ||||
-rw-r--r-- | arch/powerpc/mm/fault.c | 28 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_low_64.S | 17 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_utils_64.c | 93 | ||||
-rw-r--r-- | arch/powerpc/mm/hugetlbpage.c | 359 | ||||
-rw-r--r-- | arch/powerpc/mm/init_32.c | 7 | ||||
-rw-r--r-- | arch/powerpc/mm/init_64.c | 36 | ||||
-rw-r--r-- | arch/powerpc/mm/mem.c | 48 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_decl.h | 6 | ||||
-rw-r--r-- | arch/powerpc/mm/numa.c | 313 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_32.c | 28 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_64.c | 16 | ||||
-rw-r--r-- | arch/powerpc/mm/ppc_mmu_32.c | 27 | ||||
-rw-r--r-- | arch/powerpc/mm/slice.c | 177 | ||||
-rw-r--r-- | arch/powerpc/mm/stab.c | 4 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_64.c | 11 |
16 files changed, 833 insertions, 366 deletions
diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c index 953fb919eb06..98052ac96580 100644 --- a/arch/powerpc/mm/44x_mmu.c +++ b/arch/powerpc/mm/44x_mmu.c @@ -27,6 +27,7 @@ #include <asm/mmu.h> #include <asm/system.h> #include <asm/page.h> +#include <asm/cacheflush.h> #include "mmu_decl.h" @@ -37,11 +38,35 @@ unsigned int tlb_44x_index; /* = 0 */ unsigned int tlb_44x_hwater = PPC44x_TLB_SIZE - 1 - PPC44x_EARLY_TLBS; int icache_44x_need_flush; +static void __init ppc44x_update_tlb_hwater(void) +{ + extern unsigned int tlb_44x_patch_hwater_D[]; + extern unsigned int tlb_44x_patch_hwater_I[]; + + /* The TLB miss handlers hard codes the watermark in a cmpli + * instruction to improve performances rather than loading it + * from the global variable. Thus, we patch the instructions + * in the 2 TLB miss handlers when updating the value + */ + tlb_44x_patch_hwater_D[0] = (tlb_44x_patch_hwater_D[0] & 0xffff0000) | + tlb_44x_hwater; + flush_icache_range((unsigned long)&tlb_44x_patch_hwater_D[0], + (unsigned long)&tlb_44x_patch_hwater_D[1]); + tlb_44x_patch_hwater_I[0] = (tlb_44x_patch_hwater_I[0] & 0xffff0000) | + tlb_44x_hwater; + flush_icache_range((unsigned long)&tlb_44x_patch_hwater_I[0], + (unsigned long)&tlb_44x_patch_hwater_I[1]); +} + /* * "Pins" a 256MB TLB entry in AS0 for kernel lowmem */ static void __init ppc44x_pin_tlb(unsigned int virt, unsigned int phys) { + unsigned int entry = tlb_44x_hwater--; + + ppc44x_update_tlb_hwater(); + __asm__ __volatile__( "tlbwe %2,%3,%4\n" "tlbwe %1,%3,%5\n" @@ -50,7 +75,7 @@ static void __init ppc44x_pin_tlb(unsigned int virt, unsigned int phys) : "r" (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G), "r" (phys), "r" (virt | PPC44x_TLB_VALID | PPC44x_TLB_256M), - "r" (tlb_44x_hwater--), /* slot for this TLB entry */ + "r" (entry), "i" (PPC44x_TLB_PAGEID), "i" (PPC44x_TLB_XLAT), "i" (PPC44x_TLB_ATTRIB)); @@ -58,6 +83,8 @@ static void __init ppc44x_pin_tlb(unsigned int virt, unsigned int phys) void __init MMU_init_hw(void) { + ppc44x_update_tlb_hwater(); + flush_instruction_cache(); } diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 7b2510799266..565b7a237c84 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -100,31 +100,6 @@ static int store_updates_sp(struct pt_regs *regs) return 0; } -#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) -static void do_dabr(struct pt_regs *regs, unsigned long address, - unsigned long error_code) -{ - siginfo_t info; - - if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code, - 11, SIGSEGV) == NOTIFY_STOP) - return; - - if (debugger_dabr_match(regs)) - return; - - /* Clear the DABR */ - set_dabr(0); - - /* Deliver the signal to userspace */ - info.si_signo = SIGTRAP; - info.si_errno = 0; - info.si_code = TRAP_HWBKPT; - info.si_addr = (void __user *)address; - force_sig_info(SIGTRAP, &info, current); -} -#endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/ - /* * For 600- and 800-family processors, the error_code parameter is DSISR * for a data fault, SRR1 for an instruction fault. For 400-family processors @@ -306,7 +281,8 @@ good_area: flush_dcache_icache_page(page); set_bit(PG_arch_1, &page->flags); } - pte_update(ptep, 0, _PAGE_HWEXEC); + pte_update(ptep, 0, _PAGE_HWEXEC | + _PAGE_ACCESSED); _tlbie(address, mm->context.id); pte_unmap_unlock(ptep, ptl); up_read(&mm->mmap_sem); diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S index 70f4c833fa32..a719f53921a5 100644 --- a/arch/powerpc/mm/hash_low_64.S +++ b/arch/powerpc/mm/hash_low_64.S @@ -388,7 +388,7 @@ _GLOBAL(__hash_page_4K) */ rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */ or r30,r30,r31 - ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE + ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED oris r30,r30,_PAGE_COMBO@h /* Write the linux PTE atomically (setting busy) */ stdcx. r30,0,r6 @@ -468,7 +468,7 @@ END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE) * go to out-of-line code to try to modify the HPTE. We look for * the bit at (1 >> (index + 32)) */ - andi. r0,r31,_PAGE_HASHPTE + rldicl. r0,r31,64-12,48 li r26,0 /* Default hidx */ beq htab_insert_pte @@ -726,11 +726,11 @@ BEGIN_FTR_SECTION bne- ht64_bail_ok END_FTR_SECTION_IFCLR(CPU_FTR_CI_LARGE_PAGE) /* Prepare new PTE value (turn access RW into DIRTY, then - * add BUSY,HASHPTE and ACCESSED) + * add BUSY and ACCESSED) */ rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */ or r30,r30,r31 - ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE + ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED /* Write the linux PTE atomically (setting busy) */ stdcx. r30,0,r6 bne- 1b @@ -798,18 +798,21 @@ END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE) /* Check if we may already be in the hashtable, in this case, we * go to out-of-line code to try to modify the HPTE */ - andi. r0,r31,_PAGE_HASHPTE + rldicl. r0,r31,64-12,48 bne ht64_modify_pte ht64_insert_pte: /* Clear hpte bits in new pte (we also clear BUSY btw) and - * add _PAGE_HASHPTE + * add _PAGE_HPTE_SUB0 */ lis r0,_PAGE_HPTEFLAGS@h ori r0,r0,_PAGE_HPTEFLAGS@l andc r30,r30,r0 +#ifdef CONFIG_PPC_64K_PAGES + oris r30,r30,_PAGE_HPTE_SUB0@h +#else ori r30,r30,_PAGE_HASHPTE - +#endif /* Phyical address in r5 */ rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT sldi r5,r5,PAGE_SHIFT diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 0f2d239d94c4..5ce5a4dcd008 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -68,6 +68,7 @@ #define KB (1024) #define MB (1024*KB) +#define GB (1024L*MB) /* * Note: pte --> Linux PTE @@ -102,7 +103,6 @@ int mmu_kernel_ssize = MMU_SEGSIZE_256M; int mmu_highuser_ssize = MMU_SEGSIZE_256M; u16 mmu_slb_size = 64; #ifdef CONFIG_HUGETLB_PAGE -int mmu_huge_psize = MMU_PAGE_16M; unsigned int HPAGE_SHIFT; #endif #ifdef CONFIG_PPC_64K_PAGES @@ -120,7 +120,7 @@ static DEFINE_SPINLOCK(linear_map_hash_lock); /* Pre-POWER4 CPUs (4k pages only) */ -struct mmu_psize_def mmu_psize_defaults_old[] = { +static struct mmu_psize_def mmu_psize_defaults_old[] = { [MMU_PAGE_4K] = { .shift = 12, .sllp = 0, @@ -134,7 +134,7 @@ struct mmu_psize_def mmu_psize_defaults_old[] = { * * Support for 16Mb large pages */ -struct mmu_psize_def mmu_psize_defaults_gp[] = { +static struct mmu_psize_def mmu_psize_defaults_gp[] = { [MMU_PAGE_4K] = { .shift = 12, .sllp = 0, @@ -329,6 +329,44 @@ static int __init htab_dt_scan_page_sizes(unsigned long node, return 0; } +/* Scan for 16G memory blocks that have been set aside for huge pages + * and reserve those blocks for 16G huge pages. + */ +static int __init htab_dt_scan_hugepage_blocks(unsigned long node, + const char *uname, int depth, + void *data) { + char *type = of_get_flat_dt_prop(node, "device_type", NULL); + unsigned long *addr_prop; + u32 *page_count_prop; + unsigned int expected_pages; + long unsigned int phys_addr; + long unsigned int block_size; + + /* We are scanning "memory" nodes only */ + if (type == NULL || strcmp(type, "memory") != 0) + return 0; + + /* This property is the log base 2 of the number of virtual pages that + * will represent this memory block. */ + page_count_prop = of_get_flat_dt_prop(node, "ibm,expected#pages", NULL); + if (page_count_prop == NULL) + return 0; + expected_pages = (1 << page_count_prop[0]); + addr_prop = of_get_flat_dt_prop(node, "reg", NULL); + if (addr_prop == NULL) + return 0; + phys_addr = addr_prop[0]; + block_size = addr_prop[1]; + if (block_size != (16 * GB)) + return 0; + printk(KERN_INFO "Huge page(16GB) memory: " + "addr = 0x%lX size = 0x%lX pages = %d\n", + phys_addr, block_size, expected_pages); + lmb_reserve(phys_addr, block_size * expected_pages); + add_gpage(phys_addr, block_size, expected_pages); + return 0; +} + static void __init htab_init_page_sizes(void) { int rc; @@ -418,15 +456,18 @@ static void __init htab_init_page_sizes(void) ); #ifdef CONFIG_HUGETLB_PAGE - /* Init large page size. Currently, we pick 16M or 1M depending + /* Reserve 16G huge page memory sections for huge pages */ + of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL); + +/* Set default large page size. Currently, we pick 16M or 1M depending * on what is available */ if (mmu_psize_defs[MMU_PAGE_16M].shift) - set_huge_psize(MMU_PAGE_16M); + HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_16M].shift; /* With 4k/4level pagetables, we can't (for now) cope with a * huge page size < PMD_SIZE */ else if (mmu_psize_defs[MMU_PAGE_1M].shift) - set_huge_psize(MMU_PAGE_1M); + HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift; #endif /* CONFIG_HUGETLB_PAGE */ } @@ -533,8 +574,6 @@ void __init htab_initialize(void) unsigned long base = 0, size = 0, limit; int i; - extern unsigned long tce_alloc_start, tce_alloc_end; - DBG(" -> htab_initialize()\n"); /* Initialize segment sizes */ @@ -697,6 +736,28 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) return pp; } +#ifdef CONFIG_PPC_MM_SLICES +unsigned int get_paca_psize(unsigned long addr) +{ + unsigned long index, slices; + + if (addr < SLICE_LOW_TOP) { + slices = get_paca()->context.low_slices_psize; + index = GET_LOW_SLICE_INDEX(addr); + } else { + slices = get_paca()->context.high_slices_psize; + index = GET_HIGH_SLICE_INDEX(addr); + } + return (slices >> (index * 4)) & 0xF; +} + +#else +unsigned int get_paca_psize(unsigned long addr) +{ + return get_paca()->context.user_psize; +} +#endif + /* * Demote a segment to using 4k pages. * For now this makes the whole process use 4k pages. @@ -704,13 +765,13 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) #ifdef CONFIG_PPC_64K_PAGES void demote_segment_4k(struct mm_struct *mm, unsigned long addr) { - if (mm->context.user_psize == MMU_PAGE_4K) + if (get_slice_psize(mm, addr) == MMU_PAGE_4K) return; - slice_set_user_psize(mm, MMU_PAGE_4K); + slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K); #ifdef CONFIG_SPU_BASE spu_flush_all_slbs(mm); #endif - if (get_paca()->context.user_psize != MMU_PAGE_4K) { + if (get_paca_psize(addr) != MMU_PAGE_4K) { get_paca()->context = mm->context; slb_flush_and_rebolt(); } @@ -794,11 +855,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) DBG_LOW(" user region with no mm !\n"); return 1; } -#ifdef CONFIG_PPC_MM_SLICES psize = get_slice_psize(mm, ea); -#else - psize = mm->context.user_psize; -#endif ssize = user_segment_size(ea); vsid = get_vsid(mm->context.id, ea, ssize); break; @@ -831,7 +888,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) #ifdef CONFIG_HUGETLB_PAGE /* Handle hugepage regions */ - if (HPAGE_SHIFT && psize == mmu_huge_psize) { + if (HPAGE_SHIFT && mmu_huge_psizes[psize]) { DBG_LOW(" -> huge page !\n"); return hash_huge_page(mm, access, ea, vsid, local, trap); } @@ -870,7 +927,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) /* Do actual hashing */ #ifdef CONFIG_PPC_64K_PAGES /* If _PAGE_4K_PFN is set, make sure this is a 4k segment */ - if (pte_val(*ptep) & _PAGE_4K_PFN) { + if ((pte_val(*ptep) & _PAGE_4K_PFN) && psize == MMU_PAGE_64K) { demote_segment_4k(mm, ea); psize = MMU_PAGE_4K; } @@ -899,7 +956,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) } } if (user_region) { - if (psize != get_paca()->context.user_psize) { + if (psize != get_paca_psize(ea)) { get_paca()->context = mm->context; slb_flush_and_rebolt(); } diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index a02266dad215..ed0aab0208a6 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -24,21 +24,43 @@ #include <asm/cputable.h> #include <asm/spu.h> -#define HPAGE_SHIFT_64K 16 -#define HPAGE_SHIFT_16M 24 +#define PAGE_SHIFT_64K 16 +#define PAGE_SHIFT_16M 24 +#define PAGE_SHIFT_16G 34 #define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT) #define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT) +#define MAX_NUMBER_GPAGES 1024 -unsigned int hugepte_shift; -#define PTRS_PER_HUGEPTE (1 << hugepte_shift) -#define HUGEPTE_TABLE_SIZE (sizeof(pte_t) << hugepte_shift) +/* Tracks the 16G pages after the device tree is scanned and before the + * huge_boot_pages list is ready. */ +static unsigned long gpage_freearray[MAX_NUMBER_GPAGES]; +static unsigned nr_gpages; -#define HUGEPD_SHIFT (HPAGE_SHIFT + hugepte_shift) -#define HUGEPD_SIZE (1UL << HUGEPD_SHIFT) -#define HUGEPD_MASK (~(HUGEPD_SIZE-1)) +/* Array of valid huge page sizes - non-zero value(hugepte_shift) is + * stored for the huge page sizes that are valid. + */ +unsigned int mmu_huge_psizes[MMU_PAGE_COUNT] = { }; /* initialize all to 0 */ + +#define hugepte_shift mmu_huge_psizes +#define PTRS_PER_HUGEPTE(psize) (1 << hugepte_shift[psize]) +#define HUGEPTE_TABLE_SIZE(psize) (sizeof(pte_t) << hugepte_shift[psize]) + +#define HUGEPD_SHIFT(psize) (mmu_psize_to_shift(psize) \ + + hugepte_shift[psize]) +#define HUGEPD_SIZE(psize) (1UL << HUGEPD_SHIFT(psize)) +#define HUGEPD_MASK(psize) (~(HUGEPD_SIZE(psize)-1)) + +/* Subtract one from array size because we don't need a cache for 4K since + * is not a huge page size */ +#define huge_pgtable_cache(psize) (pgtable_cache[HUGEPTE_CACHE_NUM \ + + psize-1]) +#define HUGEPTE_CACHE_NAME(psize) (huge_pgtable_cache_name[psize]) -#define huge_pgtable_cache (pgtable_cache[HUGEPTE_CACHE_NUM]) +static const char *huge_pgtable_cache_name[MMU_PAGE_COUNT] = { + "unused_4K", "hugepte_cache_64K", "unused_64K_AP", + "hugepte_cache_1M", "hugepte_cache_16M", "hugepte_cache_16G" +}; /* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad() * will choke on pointers to hugepte tables, which is handy for @@ -49,24 +71,49 @@ typedef struct { unsigned long pd; } hugepd_t; #define hugepd_none(hpd) ((hpd).pd == 0) +static inline int shift_to_mmu_psize(unsigned int shift) +{ + switch (shift) { +#ifndef CONFIG_PPC_64K_PAGES + case PAGE_SHIFT_64K: + return MMU_PAGE_64K; +#endif + case PAGE_SHIFT_16M: + return MMU_PAGE_16M; + case PAGE_SHIFT_16G: + return MMU_PAGE_16G; + } + return -1; +} + +static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize) +{ + if (mmu_psize_defs[mmu_psize].shift) + return mmu_psize_defs[mmu_psize].shift; + BUG(); +} + static inline pte_t *hugepd_page(hugepd_t hpd) { BUG_ON(!(hpd.pd & HUGEPD_OK)); return (pte_t *)(hpd.pd & ~HUGEPD_OK); } -static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr) +static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, + struct hstate *hstate) { - unsigned long idx = ((addr >> HPAGE_SHIFT) & (PTRS_PER_HUGEPTE-1)); + unsigned int shift = huge_page_shift(hstate); + int psize = shift_to_mmu_psize(shift); + unsigned long idx = ((addr >> shift) & (PTRS_PER_HUGEPTE(psize)-1)); pte_t *dir = hugepd_page(*hpdp); return dir + idx; } static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, - unsigned long address) + unsigned long address, unsigned int psize) { - pte_t *new = kmem_cache_alloc(huge_pgtable_cache, + pte_t *new = kmem_cache_zalloc(huge_pgtable_cache(psize), GFP_KERNEL|__GFP_REPEAT); if (! new) @@ -74,7 +121,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, spin_lock(&mm->page_table_lock); if (!hugepd_none(*hpdp)) - kmem_cache_free(huge_pgtable_cache, new); + kmem_cache_free(huge_pgtable_cache(psize), new); else hpdp->pd = (unsigned long)new | HUGEPD_OK; spin_unlock(&mm->page_table_lock); @@ -83,27 +130,60 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, /* Base page size affects how we walk hugetlb page tables */ #ifdef CONFIG_PPC_64K_PAGES -#define hpmd_offset(pud, addr) pmd_offset(pud, addr) -#define hpmd_alloc(mm, pud, addr) pmd_alloc(mm, pud, addr) +#define hpmd_offset(pud, addr, h) pmd_offset(pud, addr) +#define hpmd_alloc(mm, pud, addr, h) pmd_alloc(mm, pud, addr) #else static inline -pmd_t *hpmd_offset(pud_t *pud, unsigned long addr) +pmd_t *hpmd_offset(pud_t *pud, unsigned long addr, struct hstate *hstate) { - if (HPAGE_SHIFT == HPAGE_SHIFT_64K) + if (huge_page_shift(hstate) == PAGE_SHIFT_64K) return pmd_offset(pud, addr); else return (pmd_t *) pud; } static inline -pmd_t *hpmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long addr) +pmd_t *hpmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long addr, + struct hstate *hstate) { - if (HPAGE_SHIFT == HPAGE_SHIFT_64K) + if (huge_page_shift(hstate) == PAGE_SHIFT_64K) return pmd_alloc(mm, pud, addr); else return (pmd_t *) pud; } #endif +/* Build list of addresses of gigantic pages. This function is used in early + * boot before the buddy or bootmem allocator is setup. + */ +void add_gpage(unsigned long addr, unsigned long page_size, + unsigned long number_of_pages) +{ + if (!addr) + return; + while (number_of_pages > 0) { + gpage_freearray[nr_gpages] = addr; + nr_gpages++; + number_of_pages--; + addr += page_size; + } +} + +/* Moves the gigantic page addresses from the temporary list to the + * huge_boot_pages list. + */ +int alloc_bootmem_huge_page(struct hstate *hstate) +{ + struct huge_bootmem_page *m; + if (nr_gpages == 0) + return 0; + m = phys_to_virt(gpage_freearray[--nr_gpages]); + gpage_freearray[nr_gpages] = 0; + list_add(&m->list, &huge_boot_pages); + m->hstate = hstate; + return 1; +} + + /* Modelled after find_linux_pte() */ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) { @@ -111,39 +191,52 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) pud_t *pu; pmd_t *pm; - BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize); + unsigned int psize; + unsigned int shift; + unsigned long sz; + struct hstate *hstate; + psize = get_slice_psize(mm, addr); + shift = mmu_psize_to_shift(psize); + sz = ((1UL) << shift); + hstate = size_to_hstate(sz); - addr &= HPAGE_MASK; + addr &= hstate->mask; pg = pgd_offset(mm, addr); if (!pgd_none(*pg)) { pu = pud_offset(pg, addr); if (!pud_none(*pu)) { - pm = hpmd_offset(pu, addr); + pm = hpmd_offset(pu, addr, hstate); if (!pmd_none(*pm)) - return hugepte_offset((hugepd_t *)pm, addr); + return hugepte_offset((hugepd_t *)pm, addr, + hstate); } } return NULL; } -pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) +pte_t *huge_pte_alloc(struct mm_struct *mm, + unsigned long addr, unsigned long sz) { pgd_t *pg; pud_t *pu; pmd_t *pm; hugepd_t *hpdp = NULL; + struct hstate *hstate; + unsigned int psize; + hstate = size_to_hstate(sz); - BUG_ON(get_slice_psize(mm, addr) != mmu_huge_psize); + psize = get_slice_psize(mm, addr); + BUG_ON(!mmu_huge_psizes[psize]); - addr &= HPAGE_MASK; + addr &= hstate->mask; pg = pgd_offset(mm, addr); pu = pud_alloc(mm, pg, addr); if (pu) { - pm = hpmd_alloc(mm, pu, addr); + pm = hpmd_alloc(mm, pu, addr, hstate); if (pm) hpdp = (hugepd_t *)pm; } @@ -151,10 +244,10 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) if (! hpdp) return NULL; - if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr)) + if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, psize)) return NULL; - return hugepte_offset(hpdp, addr); + return hugepte_offset(hpdp, addr, hstate); } int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) @@ -162,19 +255,22 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) return 0; } -static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp) +static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp, + unsigned int psize) { pte_t *hugepte = hugepd_page(*hpdp); hpdp->pd = 0; tlb->need_flush = 1; - pgtable_free_tlb(tlb, pgtable_free_cache(hugepte, HUGEPTE_CACHE_NUM, + pgtable_free_tlb(tlb, pgtable_free_cache(hugepte, + HUGEPTE_CACHE_NUM+psize-1, PGF_CACHENUM_MASK)); } static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, unsigned long addr, unsigned long end, - unsigned long floor, unsigned long ceiling) + unsigned long floor, unsigned long ceiling, + unsigned int psize) { pmd_t *pmd; unsigned long next; @@ -186,7 +282,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, next = pmd_addr_end(addr, end); if (pmd_none(*pmd)) continue; - free_hugepte_range(tlb, (hugepd_t *)pmd); + free_hugepte_range(tlb, (hugepd_t *)pmd, psize); } while (pmd++, addr = next, addr != end); start &= PUD_MASK; @@ -212,6 +308,9 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, pud_t *pud; unsigned long next; unsigned long start; + unsigned int shift; + unsigned int psize = get_slice_psize(tlb->mm, addr); + shift = mmu_psize_to_shift(psize); start = addr; pud = pud_offset(pgd, addr); @@ -220,16 +319,18 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, #ifdef CONFIG_PPC_64K_PAGES if (pud_none_or_clear_bad(pud)) continue; - hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling); + hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling, + psize); #else - if (HPAGE_SHIFT == HPAGE_SHIFT_64K) { + if (shift == PAGE_SHIFT_64K) { if (pud_none_or_clear_bad(pud)) continue; - hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling); + hugetlb_free_pmd_range(tlb, pud, addr, next, floor, + ceiling, psize); } else { if (pud_none(*pud)) continue; - free_hugepte_range(tlb, (hugepd_t *)pud); + free_hugepte_range(tlb, (hugepd_t *)pud, psize); } #endif } while (pud++, addr = next, addr != end); @@ -255,7 +356,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, * * Must be called with pagetable lock held. */ -void hugetlb_free_pgd_range(struct mmu_gather **tlb, +void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) { @@ -297,31 +398,33 @@ void hugetlb_free_pgd_range(struct mmu_gather **tlb, * now has no other vmas using it, so can be freed, we don't * bother to round floor or end up - the tests don't need that. */ + unsigned int psize = get_slice_psize(tlb->mm, addr); - addr &= HUGEPD_MASK; + addr &= HUGEPD_MASK(psize); if (addr < floor) { - addr += HUGEPD_SIZE; + addr += HUGEPD_SIZE(psize); if (!addr) return; } if (ceiling) { - ceiling &= HUGEPD_MASK; + ceiling &= HUGEPD_MASK(psize); if (!ceiling) return; } if (end - 1 > ceiling - 1) - end -= HUGEPD_SIZE; + end -= HUGEPD_SIZE(psize); if (addr > end - 1) return; start = addr; - pgd = pgd_offset((*tlb)->mm, addr); + pgd = pgd_offset(tlb->mm, addr); do { - BUG_ON(get_slice_psize((*tlb)->mm, addr) != mmu_huge_psize); + psize = get_slice_psize(tlb->mm, addr); + BUG_ON(!mmu_huge_psizes[psize]); next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; - hugetlb_free_pud_range(*tlb, pgd, addr, next, floor, ceiling); + hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); } while (pgd++, addr = next, addr != end); } @@ -334,7 +437,11 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, * necessary anymore if we make hpte_need_flush() get the * page size from the slices */ - pte_update(mm, addr & HPAGE_MASK, ptep, ~0UL, 1); + unsigned int psize = get_slice_psize(mm, addr); + unsigned int shift = mmu_psize_to_shift(psize); + unsigned long sz = ((1UL) << shift); + struct hstate *hstate = size_to_hstate(sz); + pte_update(mm, addr & hstate->mask, ptep, ~0UL, 1); } *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); } @@ -351,14 +458,19 @@ follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) { pte_t *ptep; struct page *page; + unsigned int mmu_psize = get_slice_psize(mm, address); - if (get_slice_psize(mm, address) != mmu_huge_psize) + /* Verify it is a huge page else bail. */ + if (!mmu_huge_psizes[mmu_psize]) return ERR_PTR(-EINVAL); ptep = huge_pte_offset(mm, address); page = pte_page(*ptep); - if (page) - page += (address % HPAGE_SIZE) / PAGE_SIZE; + if (page) { + unsigned int shift = mmu_psize_to_shift(mmu_psize); + unsigned long sz = ((1UL) << shift); + page += (address % sz) / PAGE_SIZE; + } return page; } @@ -368,6 +480,11 @@ int pmd_huge(pmd_t pmd) return 0; } +int pud_huge(pud_t pud) +{ + return 0; +} + struct page * follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write) @@ -381,15 +498,16 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { - return slice_get_unmapped_area(addr, len, flags, - mmu_huge_psize, 1, 0); + struct hstate *hstate = hstate_file(file); + int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate)); + return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0); } /* * Called by asm hashtable.S for doing lazy icache flush */ static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags, - pte_t pte, int trap) + pte_t pte, int trap, unsigned long sz) { struct page *page; int i; @@ -402,7 +520,7 @@ static unsigned int hash_huge_page_do_lazy_icache(unsigned long rflags, /* page is dirty */ if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) { if (trap == 0x400) { - for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) + for (i = 0; i < (sz / PAGE_SIZE); i++) __flush_dcache_icache(page_address(page+i)); set_bit(PG_arch_1, &page->flags); } else { @@ -418,11 +536,16 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, { pte_t *ptep; unsigned long old_pte, new_pte; - unsigned long va, rflags, pa; + unsigned long va, rflags, pa, sz; long slot; int err = 1; int ssize = user_segment_size(ea); + unsigned int mmu_psize; + int shift; + mmu_psize = get_slice_psize(mm, ea); + if (!mmu_huge_psizes[mmu_psize]) + goto out; ptep = huge_pte_offset(mm, ea); /* Search the Linux page table for a match with va */ @@ -458,38 +581,39 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, old_pte = pte_val(*ptep); if (old_pte & _PAGE_BUSY) goto out; - new_pte = old_pte | _PAGE_BUSY | - _PAGE_ACCESSED | _PAGE_HASHPTE; + new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED; } while(old_pte != __cmpxchg_u64((unsigned long *)ptep, old_pte, new_pte)); rflags = 0x2 | (!(new_pte & _PAGE_RW)); /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N); + shift = mmu_psize_to_shift(mmu_psize); + sz = ((1UL) << shift); if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) /* No CPU has hugepages but lacks no execute, so we * don't need to worry about that case */ rflags = hash_huge_page_do_lazy_icache(rflags, __pte(old_pte), - trap); + trap, sz); /* Check if pte already has an hpte (case 2) */ if (unlikely(old_pte & _PAGE_HASHPTE)) { /* There MIGHT be an HPTE for this pte */ unsigned long hash, slot; - hash = hpt_hash(va, HPAGE_SHIFT, ssize); + hash = hpt_hash(va, shift, ssize); if (old_pte & _PAGE_F_SECOND) hash = ~hash; slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += (old_pte & _PAGE_F_GIX) >> 12; - if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_huge_psize, + if (ppc_md.hpte_updatepp(slot, rflags, va, mmu_psize, ssize, local) == -1) old_pte &= ~_PAGE_HPTEFLAGS; } if (likely(!(old_pte & _PAGE_HASHPTE))) { - unsigned long hash = hpt_hash(va, HPAGE_SHIFT, ssize); + unsigned long hash = hpt_hash(va, shift, ssize); unsigned long hpte_group; pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT; @@ -499,16 +623,18 @@ repeat: HPTES_PER_GROUP) & ~0x7UL; /* clear HPTE slot informations in new PTE */ +#ifdef CONFIG_PPC_64K_PAGES + new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HPTE_SUB0; +#else new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; - +#endif /* Add in WIMG bits */ - /* XXX We should store these in the pte */ - /* --BenH: I think they are ... */ - rflags |= _PAGE_COHERENT; + rflags |= (new_pte & (_PAGE_WRITETHRU | _PAGE_NO_CACHE | + _PAGE_COHERENT | _PAGE_GUARDED)); /* Insert into the hash table, primary slot */ slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0, - mmu_huge_psize, ssize); + mmu_psize, ssize); /* Primary is full, try the secondary */ if (unlikely(slot == -1)) { @@ -516,7 +642,7 @@ repeat: HPTES_PER_GROUP) & ~0x7UL; slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, HPTE_V_SECONDARY, - mmu_huge_psize, ssize); + mmu_psize, ssize); if (slot == -1) { if (mftb() & 0x1) hpte_group = ((hash & htab_hash_mask) * @@ -548,45 +674,54 @@ void set_huge_psize(int psize) { /* Check that it is a page size supported by the hardware and * that it fits within pagetable limits. */ - if (mmu_psize_defs[psize].shift && mmu_psize_defs[psize].shift < SID_SHIFT && + if (mmu_psize_defs[psize].shift && + mmu_psize_defs[psize].shift < SID_SHIFT_1T && (mmu_psize_defs[psize].shift > MIN_HUGEPTE_SHIFT || - mmu_psize_defs[psize].shift == HPAGE_SHIFT_64K)) { - HPAGE_SHIFT = mmu_psize_defs[psize].shift; - mmu_huge_psize = psize; -#ifdef CONFIG_PPC_64K_PAGES - hugepte_shift = (PMD_SHIFT-HPAGE_SHIFT); -#else - if (HPAGE_SHIFT == HPAGE_SHIFT_64K) - hugepte_shift = (PMD_SHIFT-HPAGE_SHIFT); - else - hugepte_shift = (PUD_SHIFT-HPAGE_SHIFT); -#endif - + mmu_psize_defs[psize].shift == PAGE_SHIFT_64K || + mmu_psize_defs[psize].shift == PAGE_SHIFT_16G)) { + /* Return if huge page size has already been setup or is the + * same as the base page size. */ + if (mmu_huge_psizes[psize] || + mmu_psize_defs[psize].shift == PAGE_SHIFT) + return; + hugetlb_add_hstate(mmu_psize_defs[psize].shift - PAGE_SHIFT); + + switch (mmu_psize_defs[psize].shift) { + case PAGE_SHIFT_64K: + /* We only allow 64k hpages with 4k base page, + * which was checked above, and always put them + * at the PMD */ + hugepte_shift[psize] = PMD_SHIFT; + break; + case PAGE_SHIFT_16M: + /* 16M pages can be at two different levels + * of pagestables based on base page size */ + if (PAGE_SHIFT == PAGE_SHIFT_64K) + hugepte_shift[psize] = PMD_SHIFT; + else /* 4k base page */ + hugepte_shift[psize] = PUD_SHIFT; + break; + case PAGE_SHIFT_16G: + /* 16G pages are always at PGD level */ + hugepte_shift[psize] = PGDIR_SHIFT; + break; + } + hugepte_shift[psize] -= mmu_psize_defs[psize].shift; } else - HPAGE_SHIFT = 0; + hugepte_shift[psize] = 0; } static int __init hugepage_setup_sz(char *str) { unsigned long long size; - int mmu_psize = -1; + int mmu_psize; int shift; size = memparse(str, &str); shift = __ffs(size); - switch (shift) { -#ifndef CONFIG_PPC_64K_PAGES - case HPAGE_SHIFT_64K: - mmu_psize = MMU_PAGE_64K; - break; -#endif - case HPAGE_SHIFT_16M: - mmu_psize = MMU_PAGE_16M; - break; - } - - if (mmu_psize >=0 && mmu_psize_defs[mmu_psize].shift) + mmu_psize = shift_to_mmu_psize(shift); + if (mmu_psize >= 0 && mmu_psize_defs[mmu_psize].shift) set_huge_psize(mmu_psize); else printk(KERN_WARNING "Invalid huge page size specified(%llu)\n", size); @@ -595,23 +730,33 @@ static int __init hugepage_setup_sz(char *str) } __setup("hugepagesz=", hugepage_setup_sz); -static void zero_ctor(struct kmem_cache *cache, void *addr) -{ - memset(addr, 0, kmem_cache_size(cache)); -} - static int __init hugetlbpage_init(void) { + unsigned int psize; + if (!cpu_has_feature(CPU_FTR_16M_PAGE)) return -ENODEV; - - huge_pgtable_cache = kmem_cache_create("hugepte_cache", - HUGEPTE_TABLE_SIZE, - HUGEPTE_TABLE_SIZE, - 0, - zero_ctor); - if (! huge_pgtable_cache) - panic("hugetlbpage_init(): could not create hugepte cache\n"); + /* Add supported huge page sizes. Need to change HUGE_MAX_HSTATE + * and adjust PTE_NONCACHE_NUM if the number of supported huge page + * sizes changes. + */ + set_huge_psize(MMU_PAGE_16M); + set_huge_psize(MMU_PAGE_64K); + set_huge_psize(MMU_PAGE_16G); + + for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { + if (mmu_huge_psizes[psize]) { + huge_pgtable_cache(psize) = kmem_cache_create( + HUGEPTE_CACHE_NAME(psize), + HUGEPTE_TABLE_SIZE(psize), + HUGEPTE_TABLE_SIZE(psize), + 0, + NULL); + if (!huge_pgtable_cache(psize)) + panic("hugetlbpage_init(): could not create %s"\ + "\n", HUGEPTE_CACHE_NAME(psize)); + } + } return 0; } diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 1952b4d3fa7f..388ceda632f3 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -43,6 +43,7 @@ #include <asm/btext.h> #include <asm/tlb.h> #include <asm/sections.h> +#include <asm/system.h> #include "mmu_decl.h" @@ -56,8 +57,8 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); -unsigned long total_memory; -unsigned long total_lowmem; +phys_addr_t total_memory; +phys_addr_t total_lowmem; phys_addr_t memstart_addr = (phys_addr_t)~0ull; EXPORT_SYMBOL(memstart_addr); @@ -76,8 +77,6 @@ void MMU_init(void); /* XXX should be in current.h -- paulus */ extern struct task_struct *current_set[NR_CPUS]; -extern int init_bootmem_done; - /* * this tells the system to map all of ram with the segregs * (i.e. page tables) instead of the bats. diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 6aa65375abf5..4f7df85129d8 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -136,9 +136,14 @@ static int __init setup_kcore(void) module_init(setup_kcore); #endif -static void zero_ctor(struct kmem_cache *cache, void *addr) +static void pgd_ctor(void *addr) { - memset(addr, 0, kmem_cache_size(cache)); + memset(addr, 0, PGD_TABLE_SIZE); +} + +static void pmd_ctor(void *addr) +{ + memset(addr, 0, PMD_TABLE_SIZE); } static const unsigned int pgtable_cache_size[2] = { @@ -153,29 +158,18 @@ static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = { }; #ifdef CONFIG_HUGETLB_PAGE -/* Hugepages need one extra cache, initialized in hugetlbpage.c. We - * can't put into the tables above, because HPAGE_SHIFT is not compile - * time constant. */ -struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)+1]; +/* Hugepages need an extra cache per hugepagesize, initialized in + * hugetlbpage.c. We can't put into the tables above, because HPAGE_SHIFT + * is not compile time constant. */ +struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)+MMU_PAGE_COUNT]; #else struct kmem_cache *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)]; #endif void pgtable_cache_init(void) { - int i; - - for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) { - int size = pgtable_cache_size[i]; - const char *name = pgtable_cache_name[i]; - - pr_debug("Allocating page table cache %s (#%d) " - "for size: %08x...\n", name, i, size); - pgtable_cache[i] = kmem_cache_create(name, - size, size, - SLAB_PANIC, - zero_ctor); - } + pgtable_cache[0] = kmem_cache_create(pgtable_cache_name[0], PGD_TABLE_SIZE, PGD_TABLE_SIZE, SLAB_PANIC, pgd_ctor); + pgtable_cache[1] = kmem_cache_create(pgtable_cache_name[1], PMD_TABLE_SIZE, PMD_TABLE_SIZE, SLAB_PANIC, pmd_ctor); } #ifdef CONFIG_SPARSEMEM_VMEMMAP @@ -185,7 +179,7 @@ void pgtable_cache_init(void) * do this by hand as the proffered address may not be correctly aligned. * Subtraction of non-aligned pointers produces undefined results. */ -unsigned long __meminit vmemmap_section_start(unsigned long page) +static unsigned long __meminit vmemmap_section_start(unsigned long page) { unsigned long offset = page - ((unsigned long)(vmemmap)); @@ -198,7 +192,7 @@ unsigned long __meminit vmemmap_section_start(unsigned long page) * which overlaps this vmemmap page is initialised then this page is * initialised already. */ -int __meminit vmemmap_populated(unsigned long start, int page_size) +static int __meminit vmemmap_populated(unsigned long start, int page_size) { unsigned long end = start + page_size; diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 51f82d83bf14..702691cb9e82 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -44,6 +44,7 @@ #include <asm/btext.h> #include <asm/tlb.h> #include <asm/sections.h> +#include <asm/sparsemem.h> #include <asm/vdso.h> #include <asm/fixmap.h> @@ -185,45 +186,6 @@ walk_memory_resource(unsigned long start_pfn, unsigned long nr_pages, void *arg, } EXPORT_SYMBOL_GPL(walk_memory_resource); -void show_mem(void) -{ - unsigned long total = 0, reserved = 0; - unsigned long shared = 0, cached = 0; - unsigned long highmem = 0; - struct page *page; - pg_data_t *pgdat; - unsigned long i; - - printk("Mem-info:\n"); - show_free_areas(); - for_each_online_pgdat(pgdat) { - unsigned long flags; - pgdat_resize_lock(pgdat, &flags); - for (i = 0; i < pgdat->node_spanned_pages; i++) { - if (!pfn_valid(pgdat->node_start_pfn + i)) - continue; - page = pgdat_page_nr(pgdat, i); - total++; - if (PageHighMem(page)) - highmem++; - if (PageReserved(page)) - reserved++; - else if (PageSwapCache(page)) - cached++; - else if (page_count(page)) - shared += page_count(page) - 1; - } - pgdat_resize_unlock(pgdat, &flags); - } - printk("%ld pages of RAM\n", total); -#ifdef CONFIG_HIGHMEM - printk("%ld pages of HIGHMEM\n", highmem); -#endif - printk("%ld reserved pages\n", reserved); - printk("%ld pages shared\n", shared); - printk("%ld pages swap cached\n", cached); -} - /* * Initialize the bootmem system and give it all the memory we * have available. If we are using highmem, we only put the @@ -329,7 +291,7 @@ static int __init mark_nonram_nosave(void) void __init paging_init(void) { unsigned long total_ram = lmb_phys_mem_size(); - unsigned long top_of_ram = lmb_end_of_DRAM(); + phys_addr_t top_of_ram = lmb_end_of_DRAM(); unsigned long max_zone_pfns[MAX_NR_ZONES]; #ifdef CONFIG_PPC32 @@ -348,10 +310,10 @@ void __init paging_init(void) kmap_prot = PAGE_KERNEL; #endif /* CONFIG_HIGHMEM */ - printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", - top_of_ram, total_ram); + printk(KERN_DEBUG "Top of RAM: 0x%llx, Total RAM: 0x%lx\n", + (u64)top_of_ram, total_ram); printk(KERN_DEBUG "Memory hole size: %ldMB\n", - (top_of_ram - total_ram) >> 20); + (long int)((top_of_ram - total_ram) >> 20)); memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); #ifdef CONFIG_HIGHMEM max_zone_pfns[ZONE_DMA] = lowmem_end_addr >> PAGE_SHIFT; diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 04802252a64f..fab3cfad4099 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -29,7 +29,7 @@ extern void hash_preload(struct mm_struct *mm, unsigned long ea, #ifdef CONFIG_PPC32 extern void mapin_ram(void); extern int map_page(unsigned long va, phys_addr_t pa, int flags); -extern void setbat(int index, unsigned long virt, unsigned long phys, +extern void setbat(int index, unsigned long virt, phys_addr_t phys, unsigned int size, int flags); extern void settlbcam(int index, unsigned long virt, phys_addr_t phys, unsigned int size, int flags, unsigned int pid); @@ -49,8 +49,8 @@ extern unsigned int num_tlbcam_entries; extern unsigned long ioremap_bot; extern unsigned long __max_low_memory; extern phys_addr_t __initial_memory_limit_addr; -extern unsigned long total_memory; -extern unsigned long total_lowmem; +extern phys_addr_t total_memory; +extern phys_addr_t total_lowmem; extern phys_addr_t memstart_addr; extern phys_addr_t lowmem_end_addr; diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index dc704da363eb..d9a181351332 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -39,7 +39,6 @@ EXPORT_SYMBOL(numa_cpu_lookup_table); EXPORT_SYMBOL(numa_cpumask_lookup_table); EXPORT_SYMBOL(node_data); -static bootmem_data_t __initdata plat_node_bdata[MAX_NUMNODES]; static int min_common_depth; static int n_mem_addr_cells, n_mem_size_cells; @@ -268,6 +267,144 @@ static unsigned long __devinit read_n_cells(int n, const unsigned int **buf) return result; } +struct of_drconf_cell { + u64 base_addr; + u32 drc_index; + u32 reserved; + u32 aa_index; + u32 flags; +}; + +#define DRCONF_MEM_ASSIGNED 0x00000008 +#define DRCONF_MEM_AI_INVALID 0x00000040 +#define DRCONF_MEM_RESERVED 0x00000080 + +/* + * Read the next lmb list entry from the ibm,dynamic-memory property + * and return the information in the provided of_drconf_cell structure. + */ +static void read_drconf_cell(struct of_drconf_cell *drmem, const u32 **cellp) +{ + const u32 *cp; + + drmem->base_addr = read_n_cells(n_mem_addr_cells, cellp); + + cp = *cellp; + drmem->drc_index = cp[0]; + drmem->reserved = cp[1]; + drmem->aa_index = cp[2]; + drmem->flags = cp[3]; + + *cellp = cp + 4; +} + +/* + * Retreive and validate the ibm,dynamic-memory property of the device tree. + * + * The layout of the ibm,dynamic-memory property is a number N of lmb + * list entries followed by N lmb list entries. Each lmb list entry + * contains information as layed out in the of_drconf_cell struct above. + */ +static int of_get_drconf_memory(struct device_node *memory, const u32 **dm) +{ + const u32 *prop; + u32 len, entries; + + prop = of_get_property(memory, "ibm,dynamic-memory", &len); + if (!prop || len < sizeof(unsigned int)) + return 0; + + entries = *prop++; + + /* Now that we know the number of entries, revalidate the size + * of the property read in to ensure we have everything + */ + if (len < (entries * (n_mem_addr_cells + 4) + 1) * sizeof(unsigned int)) + return 0; + + *dm = prop; + return entries; +} + +/* + * Retreive and validate the ibm,lmb-size property for drconf memory + * from the device tree. + */ +static u64 of_get_lmb_size(struct device_node *memory) +{ + const u32 *prop; + u32 len; + + prop = of_get_property(memory, "ibm,lmb-size", &len); + if (!prop || len < sizeof(unsigned int)) + return 0; + + return read_n_cells(n_mem_size_cells, &prop); +} + +struct assoc_arrays { + u32 n_arrays; + u32 array_sz; + const u32 *arrays; +}; + +/* + * Retreive and validate the list of associativity arrays for drconf + * memory from the ibm,associativity-lookup-arrays property of the + * device tree.. + * + * The layout of the ibm,associativity-lookup-arrays property is a number N + * indicating the number of associativity arrays, followed by a number M + * indicating the size of each associativity array, followed by a list + * of N associativity arrays. + */ +static int of_get_assoc_arrays(struct device_node *memory, + struct assoc_arrays *aa) +{ + const u32 *prop; + u32 len; + + prop = of_get_property(memory, "ibm,associativity-lookup-arrays", &len); + if (!prop || len < 2 * sizeof(unsigned int)) + return -1; + + aa->n_arrays = *prop++; + aa->array_sz = *prop++; + + /* Now that we know the number of arrrays and size of each array, + * revalidate the size of the property read in. + */ + if (len < (aa->n_arrays * aa->array_sz + 2) * sizeof(unsigned int)) + return -1; + + aa->arrays = prop; + return 0; +} + +/* + * This is like of_node_to_nid_single() for memory represented in the + * ibm,dynamic-reconfiguration-memory node. + */ +static int of_drconf_to_nid_single(struct of_drconf_cell *drmem, + struct assoc_arrays *aa) +{ + int default_nid = 0; + int nid = default_nid; + int index; + + if (min_common_depth > 0 && min_common_depth <= aa->array_sz && + !(drmem->flags & DRCONF_MEM_AI_INVALID) && + drmem->aa_index < aa->n_arrays) { + index = drmem->aa_index * aa->array_sz + min_common_depth - 1; + nid = aa->arrays[index]; + + if (nid == 0xffff || nid >= MAX_NUMNODES) + nid = default_nid; + } + + return nid; +} + /* * Figure out to which domain a cpu belongs and stick it there. * Return the id of the domain used. @@ -355,57 +492,50 @@ static unsigned long __init numa_enforce_memory_limit(unsigned long start, */ static void __init parse_drconf_memory(struct device_node *memory) { - const unsigned int *lm, *dm, *aa; - unsigned int ls, ld, la; - unsigned int n, aam, aalen; - unsigned long lmb_size, size, start; - int nid, default_nid = 0; - unsigned int ai, flags; - - lm = of_get_property(memory, "ibm,lmb-size", &ls); - dm = of_get_property(memory, "ibm,dynamic-memory", &ld); - aa = of_get_property(memory, "ibm,associativity-lookup-arrays", &la); - if (!lm || !dm || !aa || - ls < sizeof(unsigned int) || ld < sizeof(unsigned int) || - la < 2 * sizeof(unsigned int)) + const u32 *dm; + unsigned int n, rc; + unsigned long lmb_size, size; + int nid; + struct assoc_arrays aa; + + n = of_get_drconf_memory(memory, &dm); + if (!n) + return; + + lmb_size = of_get_lmb_size(memory); + if (!lmb_size) return; - lmb_size = read_n_cells(n_mem_size_cells, &lm); - n = *dm++; /* number of LMBs */ - aam = *aa++; /* number of associativity lists */ - aalen = *aa++; /* length of each associativity list */ - if (ld < (n * (n_mem_addr_cells + 4) + 1) * sizeof(unsigned int) || - la < (aam * aalen + 2) * sizeof(unsigned int)) + rc = of_get_assoc_arrays(memory, &aa); + if (rc) return; for (; n != 0; --n) { - start = read_n_cells(n_mem_addr_cells, &dm); - ai = dm[2]; - flags = dm[3]; - dm += 4; - /* 0x80 == reserved, 0x8 = assigned to us */ - if ((flags & 0x80) || !(flags & 0x8)) + struct of_drconf_cell drmem; + + read_drconf_cell(&drmem, &dm); + + /* skip this block if the reserved bit is set in flags (0x80) + or if the block is not assigned to this partition (0x8) */ + if ((drmem.flags & DRCONF_MEM_RESERVED) + || !(drmem.flags & DRCONF_MEM_ASSIGNED)) continue; - nid = default_nid; - /* flags & 0x40 means associativity index is invalid */ - if (min_common_depth > 0 && min_common_depth <= aalen && - (flags & 0x40) == 0 && ai < aam) { - /* this is like of_node_to_nid_single */ - nid = aa[ai * aalen + min_common_depth - 1]; - if (nid == 0xffff || nid >= MAX_NUMNODES) - nid = default_nid; - } - fake_numa_create_new_node(((start + lmb_size) >> PAGE_SHIFT), - &nid); + nid = of_drconf_to_nid_single(&drmem, &aa); + + fake_numa_create_new_node( + ((drmem.base_addr + lmb_size) >> PAGE_SHIFT), + &nid); + node_set_online(nid); - size = numa_enforce_memory_limit(start, lmb_size); + size = numa_enforce_memory_limit(drmem.base_addr, lmb_size); if (!size) continue; - add_active_range(nid, start >> PAGE_SHIFT, - (start >> PAGE_SHIFT) + (size >> PAGE_SHIFT)); + add_active_range(nid, drmem.base_addr >> PAGE_SHIFT, + (drmem.base_addr >> PAGE_SHIFT) + + (size >> PAGE_SHIFT)); } } @@ -685,7 +815,7 @@ void __init do_init_bootmem(void) dbg("node %d\n", nid); dbg("NODE_DATA() = %p\n", NODE_DATA(nid)); - NODE_DATA(nid)->bdata = &plat_node_bdata[nid]; + NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; NODE_DATA(nid)->node_start_pfn = start_pfn; NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; @@ -770,6 +900,79 @@ early_param("numa", early_numa); #ifdef CONFIG_MEMORY_HOTPLUG /* + * Validate the node associated with the memory section we are + * trying to add. + */ +int valid_hot_add_scn(int *nid, unsigned long start, u32 lmb_size, + unsigned long scn_addr) +{ + nodemask_t nodes; + + if (*nid < 0 || !node_online(*nid)) + *nid = any_online_node(NODE_MASK_ALL); + + if ((scn_addr >= start) && (scn_addr < (start + lmb_size))) { + nodes_setall(nodes); + while (NODE_DATA(*nid)->node_spanned_pages == 0) { + node_clear(*nid, nodes); + *nid = any_online_node(nodes); + } + + return 1; + } + + return 0; +} + +/* + * Find the node associated with a hot added memory section represented + * by the ibm,dynamic-reconfiguration-memory node. + */ +static int hot_add_drconf_scn_to_nid(struct device_node *memory, + unsigned long scn_addr) +{ + const u32 *dm; + unsigned int n, rc; + unsigned long lmb_size; + int default_nid = any_online_node(NODE_MASK_ALL); + int nid; + struct assoc_arrays aa; + + n = of_get_drconf_memory(memory, &dm); + if (!n) + return default_nid;; + + lmb_size = of_get_lmb_size(memory); + if (!lmb_size) + return default_nid; + + rc = of_get_assoc_arrays(memory, &aa); + if (rc) + return default_nid; + + for (; n != 0; --n) { + struct of_drconf_cell drmem; + + read_drconf_cell(&drmem, &dm); + + /* skip this block if it is reserved or not assigned to + * this partition */ + if ((drmem.flags & DRCONF_MEM_RESERVED) + || !(drmem.flags & DRCONF_MEM_ASSIGNED)) + continue; + + nid = of_drconf_to_nid_single(&drmem, &aa); + + if (valid_hot_add_scn(&nid, drmem.base_addr, lmb_size, + scn_addr)) + return nid; + } + + BUG(); /* section address should be found above */ + return 0; +} + +/* * Find the node associated with a hot added memory section. Section * corresponds to a SPARSEMEM section, not an LMB. It is assumed that * sections are fully contained within a single LMB. @@ -777,12 +980,17 @@ early_param("numa", early_numa); int hot_add_scn_to_nid(unsigned long scn_addr) { struct device_node *memory = NULL; - nodemask_t nodes; - int default_nid = any_online_node(NODE_MASK_ALL); int nid; if (!numa_enabled || (min_common_depth < 0)) - return default_nid; + return any_online_node(NODE_MASK_ALL); + + memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); + if (memory) { + nid = hot_add_drconf_scn_to_nid(memory, scn_addr); + of_node_put(memory); + return nid; + } while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { unsigned long start, size; @@ -801,13 +1009,9 @@ ha_new_range: size = read_n_cells(n_mem_size_cells, &memcell_buf); nid = of_node_to_nid_single(memory); - /* Domains not present at boot default to 0 */ - if (nid < 0 || !node_online(nid)) - nid = default_nid; - - if ((scn_addr >= start) && (scn_addr < (start + size))) { + if (valid_hot_add_scn(&nid, start, size, scn_addr)) { of_node_put(memory); - goto got_nid; + return nid; } if (--ranges) /* process all ranges in cell */ @@ -815,14 +1019,5 @@ ha_new_range: } BUG(); /* section address should be found above */ return 0; - - /* Temporary code to ensure that returned node is not empty */ -got_nid: - nodes_setall(nodes); - while (NODE_DATA(nid)->node_spanned_pages == 0) { - node_clear(nid, nodes); - nid = any_online_node(nodes); - } - return nid; } #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index e0ff59f21135..2001abdb1912 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -53,9 +53,9 @@ extern void hash_page_sync(void); #endif #ifdef HAVE_BATS -extern unsigned long v_mapped_by_bats(unsigned long va); -extern unsigned long p_mapped_by_bats(unsigned long pa); -void setbat(int index, unsigned long virt, unsigned long phys, +extern phys_addr_t v_mapped_by_bats(unsigned long va); +extern unsigned long p_mapped_by_bats(phys_addr_t pa); +void setbat(int index, unsigned long virt, phys_addr_t phys, unsigned int size, int flags); #else /* !HAVE_BATS */ @@ -145,13 +145,20 @@ void pte_free(struct mm_struct *mm, pgtable_t ptepage) void __iomem * ioremap(phys_addr_t addr, unsigned long size) { - return __ioremap(addr, size, _PAGE_NO_CACHE); + return __ioremap(addr, size, _PAGE_NO_CACHE | _PAGE_GUARDED); } EXPORT_SYMBOL(ioremap); void __iomem * ioremap_flags(phys_addr_t addr, unsigned long size, unsigned long flags) { + /* writeable implies dirty for kernel addresses */ + if (flags & _PAGE_RW) + flags |= _PAGE_DIRTY | _PAGE_HWWRITE; + + /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */ + flags &= ~(_PAGE_USER | _PAGE_EXEC | _PAGE_HWEXEC); + return __ioremap(addr, size, flags); } EXPORT_SYMBOL(ioremap_flags); @@ -163,6 +170,14 @@ __ioremap(phys_addr_t addr, unsigned long size, unsigned long flags) phys_addr_t p; int err; + /* Make sure we have the base flags */ + if ((flags & _PAGE_PRESENT) == 0) + flags |= _PAGE_KERNEL; + + /* Non-cacheable page cannot be coherent */ + if (flags & _PAGE_NO_CACHE) + flags &= ~_PAGE_COHERENT; + /* * Choose an address to map it to. * Once the vmalloc system is running, we use it. @@ -219,11 +234,6 @@ __ioremap(phys_addr_t addr, unsigned long size, unsigned long flags) v = (ioremap_bot -= size); } - if ((flags & _PAGE_PRESENT) == 0) - flags |= _PAGE_KERNEL; - if (flags & _PAGE_NO_CACHE) - flags |= _PAGE_GUARDED; - /* * Should check if it is a candidate for a BAT mapping */ diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 3ef0ad2f9ca0..365e61ae5dbc 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -107,9 +107,18 @@ void __iomem * __ioremap_at(phys_addr_t pa, void *ea, unsigned long size, { unsigned long i; + /* Make sure we have the base flags */ if ((flags & _PAGE_PRESENT) == 0) flags |= pgprot_val(PAGE_KERNEL); + /* Non-cacheable page cannot be coherent */ + if (flags & _PAGE_NO_CACHE) + flags &= ~_PAGE_COHERENT; + + /* We don't support the 4K PFN hack with ioremap */ + if (flags & _PAGE_4K_PFN) + return NULL; + WARN_ON(pa & ~PAGE_MASK); WARN_ON(((unsigned long)ea) & ~PAGE_MASK); WARN_ON(size & ~PAGE_MASK); @@ -190,6 +199,13 @@ void __iomem * ioremap(phys_addr_t addr, unsigned long size) void __iomem * ioremap_flags(phys_addr_t addr, unsigned long size, unsigned long flags) { + /* writeable implies dirty for kernel addresses */ + if (flags & _PAGE_RW) + flags |= _PAGE_DIRTY; + + /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */ + flags &= ~(_PAGE_USER | _PAGE_EXEC); + if (ppc_md.ioremap) return ppc_md.ioremap(addr, size, flags); return __ioremap(addr, size, flags); diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index cef9f156874b..c53145f61942 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c @@ -38,21 +38,18 @@ struct hash_pte *Hash, *Hash_end; unsigned long Hash_size, Hash_mask; unsigned long _SDR1; -union ubat { /* BAT register values to be loaded */ - struct ppc_bat bat; - u32 word[2]; -} BATS[8][2]; /* 8 pairs of IBAT, DBAT */ +struct ppc_bat BATS[8][2]; /* 8 pairs of IBAT, DBAT */ struct batrange { /* stores address ranges mapped by BATs */ unsigned long start; unsigned long limit; - unsigned long phys; + phys_addr_t phys; } bat_addrs[8]; /* * Return PA for this VA if it is mapped by a BAT, or 0 */ -unsigned long v_mapped_by_bats(unsigned long va) +phys_addr_t v_mapped_by_bats(unsigned long va) { int b; for (b = 0; b < 4; ++b) @@ -64,7 +61,7 @@ unsigned long v_mapped_by_bats(unsigned long va) /* * Return VA for a given PA or 0 if not mapped */ -unsigned long p_mapped_by_bats(unsigned long pa) +unsigned long p_mapped_by_bats(phys_addr_t pa) { int b; for (b = 0; b < 4; ++b) @@ -119,12 +116,12 @@ unsigned long __init mmu_mapin_ram(void) * The parameters are not checked; in particular size must be a power * of 2 between 128k and 256M. */ -void __init setbat(int index, unsigned long virt, unsigned long phys, +void __init setbat(int index, unsigned long virt, phys_addr_t phys, unsigned int size, int flags) { unsigned int bl; int wimgxpp; - union ubat *bat = BATS[index]; + struct ppc_bat *bat = BATS[index]; if (((flags & _PAGE_NO_CACHE) == 0) && cpu_has_feature(CPU_FTR_NEED_COHERENT)) @@ -137,15 +134,15 @@ void __init setbat(int index, unsigned long virt, unsigned long phys, wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE | _PAGE_COHERENT | _PAGE_GUARDED); wimgxpp |= (flags & _PAGE_RW)? BPP_RW: BPP_RX; - bat[1].word[0] = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */ - bat[1].word[1] = phys | wimgxpp; + bat[1].batu = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */ + bat[1].batl = BAT_PHYS_ADDR(phys) | wimgxpp; #ifndef CONFIG_KGDB /* want user access for breakpoints */ if (flags & _PAGE_USER) #endif - bat[1].bat.batu.vp = 1; + bat[1].batu |= 1; /* Vp = 1 */ if (flags & _PAGE_GUARDED) { /* G bit must be zero in IBATs */ - bat[0].word[0] = bat[0].word[1] = 0; + bat[0].batu = bat[0].batl = 0; } else { /* make IBAT same as DBAT */ bat[0] = bat[1]; @@ -158,8 +155,8 @@ void __init setbat(int index, unsigned long virt, unsigned long phys, | _PAGE_COHERENT); wimgxpp |= (flags & _PAGE_RW)? ((flags & _PAGE_USER)? PP_RWRW: PP_RWXX): PP_RXRX; - bat->word[0] = virt | wimgxpp | 4; /* Ks=0, Ku=1 */ - bat->word[1] = phys | bl | 0x40; /* V=1 */ + bat->batu = virt | wimgxpp | 4; /* Ks=0, Ku=1 */ + bat->batl = phys | bl | 0x40; /* V=1 */ } bat_addrs[index].start = virt; diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index ad928edafb0a..db44e02e045b 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -215,10 +215,7 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz mm->context.high_slices_psize); spin_unlock_irqrestore(&slice_convert_lock, flags); - mb(); - /* XXX this is sub-optimal but will do for now */ - on_each_cpu(slice_flush_segments, mm, 0, 1); #ifdef CONFIG_SPU_BASE spu_flush_all_slbs(mm); #endif @@ -384,17 +381,34 @@ static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len, return slice_find_area_bottomup(mm, len, mask, psize, use_cache); } +#define or_mask(dst, src) do { \ + (dst).low_slices |= (src).low_slices; \ + (dst).high_slices |= (src).high_slices; \ +} while (0) + +#define andnot_mask(dst, src) do { \ + (dst).low_slices &= ~(src).low_slices; \ + (dst).high_slices &= ~(src).high_slices; \ +} while (0) + +#ifdef CONFIG_PPC_64K_PAGES +#define MMU_PAGE_BASE MMU_PAGE_64K +#else +#define MMU_PAGE_BASE MMU_PAGE_4K +#endif + unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, unsigned long flags, unsigned int psize, int topdown, int use_cache) { - struct slice_mask mask; + struct slice_mask mask = {0, 0}; struct slice_mask good_mask; struct slice_mask potential_mask = {0,0} /* silence stupid warning */; - int pmask_set = 0; + struct slice_mask compat_mask = {0, 0}; int fixed = (flags & MAP_FIXED); int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); struct mm_struct *mm = current->mm; + unsigned long newaddr; /* Sanity checks */ BUG_ON(mm->task_size == 0); @@ -416,21 +430,48 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, if (!fixed && addr) { addr = _ALIGN_UP(addr, 1ul << pshift); slice_dbg(" aligned addr=%lx\n", addr); + /* Ignore hint if it's too large or overlaps a VMA */ + if (addr > mm->task_size - len || + !slice_area_is_free(mm, addr, len)) + addr = 0; } - /* First makeup a "good" mask of slices that have the right size + /* First make up a "good" mask of slices that have the right size * already */ good_mask = slice_mask_for_size(mm, psize); slice_print_mask(" good_mask", good_mask); - /* First check hint if it's valid or if we have MAP_FIXED */ - if ((addr != 0 || fixed) && (mm->task_size - len) >= addr) { + /* + * Here "good" means slices that are already the right page size, + * "compat" means slices that have a compatible page size (i.e. + * 4k in a 64k pagesize kernel), and "free" means slices without + * any VMAs. + * + * If MAP_FIXED: + * check if fits in good | compat => OK + * check if fits in good | compat | free => convert free + * else bad + * If have hint: + * check if hint fits in good => OK + * check if hint fits in good | free => convert free + * Otherwise: + * search in good, found => OK + * search in good | free, found => convert free + * search in good | compat | free, found => convert free. + */ - /* Don't bother with hint if it overlaps a VMA */ - if (!fixed && !slice_area_is_free(mm, addr, len)) - goto search; +#ifdef CONFIG_PPC_64K_PAGES + /* If we support combo pages, we can allow 64k pages in 4k slices */ + if (psize == MMU_PAGE_64K) { + compat_mask = slice_mask_for_size(mm, MMU_PAGE_4K); + if (fixed) + or_mask(good_mask, compat_mask); + } +#endif + /* First check hint if it's valid or if we have MAP_FIXED */ + if (addr != 0 || fixed) { /* Build a mask for the requested range */ mask = slice_range_to_mask(addr, len); slice_print_mask(" mask", mask); @@ -442,54 +483,66 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, slice_dbg(" fits good !\n"); return addr; } - - /* We don't fit in the good mask, check what other slices are - * empty and thus can be converted + } else { + /* Now let's see if we can find something in the existing + * slices for that size */ - potential_mask = slice_mask_for_free(mm); - potential_mask.low_slices |= good_mask.low_slices; - potential_mask.high_slices |= good_mask.high_slices; - pmask_set = 1; - slice_print_mask(" potential", potential_mask); - if (slice_check_fit(mask, potential_mask)) { - slice_dbg(" fits potential !\n"); - goto convert; + newaddr = slice_find_area(mm, len, good_mask, psize, topdown, + use_cache); + if (newaddr != -ENOMEM) { + /* Found within the good mask, we don't have to setup, + * we thus return directly + */ + slice_dbg(" found area at 0x%lx\n", newaddr); + return newaddr; } } - /* If we have MAP_FIXED and failed the above step, then error out */ + /* We don't fit in the good mask, check what other slices are + * empty and thus can be converted + */ + potential_mask = slice_mask_for_free(mm); + or_mask(potential_mask, good_mask); + slice_print_mask(" potential", potential_mask); + + if ((addr != 0 || fixed) && slice_check_fit(mask, potential_mask)) { + slice_dbg(" fits potential !\n"); + goto convert; + } + + /* If we have MAP_FIXED and failed the above steps, then error out */ if (fixed) return -EBUSY; - search: slice_dbg(" search...\n"); - /* Now let's see if we can find something in the existing slices - * for that size + /* If we had a hint that didn't work out, see if we can fit + * anywhere in the good area. */ - addr = slice_find_area(mm, len, good_mask, psize, topdown, use_cache); - if (addr != -ENOMEM) { - /* Found within the good mask, we don't have to setup, - * we thus return directly - */ - slice_dbg(" found area at 0x%lx\n", addr); - return addr; - } - - /* Won't fit, check what can be converted */ - if (!pmask_set) { - potential_mask = slice_mask_for_free(mm); - potential_mask.low_slices |= good_mask.low_slices; - potential_mask.high_slices |= good_mask.high_slices; - pmask_set = 1; - slice_print_mask(" potential", potential_mask); + if (addr) { + addr = slice_find_area(mm, len, good_mask, psize, topdown, + use_cache); + if (addr != -ENOMEM) { + slice_dbg(" found area at 0x%lx\n", addr); + return addr; + } } /* Now let's see if we can find something in the existing slices - * for that size + * for that size plus free slices */ addr = slice_find_area(mm, len, potential_mask, psize, topdown, use_cache); + +#ifdef CONFIG_PPC_64K_PAGES + if (addr == -ENOMEM && psize == MMU_PAGE_64K) { + /* retry the search with 4k-page slices included */ + or_mask(potential_mask, compat_mask); + addr = slice_find_area(mm, len, potential_mask, psize, + topdown, use_cache); + } +#endif + if (addr == -ENOMEM) return -ENOMEM; @@ -498,7 +551,13 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, slice_print_mask(" mask", mask); convert: - slice_convert(mm, mask, psize); + andnot_mask(mask, good_mask); + andnot_mask(mask, compat_mask); + if (mask.low_slices || mask.high_slices) { + slice_convert(mm, mask, psize); + if (psize > MMU_PAGE_BASE) + on_each_cpu(slice_flush_segments, mm, 1); + } return addr; } @@ -598,6 +657,36 @@ void slice_set_user_psize(struct mm_struct *mm, unsigned int psize) spin_unlock_irqrestore(&slice_convert_lock, flags); } +void slice_set_psize(struct mm_struct *mm, unsigned long address, + unsigned int psize) +{ + unsigned long i, flags; + u64 *p; + + spin_lock_irqsave(&slice_convert_lock, flags); + if (address < SLICE_LOW_TOP) { + i = GET_LOW_SLICE_INDEX(address); + p = &mm->context.low_slices_psize; + } else { + i = GET_HIGH_SLICE_INDEX(address); + p = &mm->context.high_slices_psize; + } + *p = (*p & ~(0xful << (i * 4))) | ((unsigned long) psize << (i * 4)); + spin_unlock_irqrestore(&slice_convert_lock, flags); + +#ifdef CONFIG_SPU_BASE + spu_flush_all_slbs(mm); +#endif +} + +void slice_set_range_psize(struct mm_struct *mm, unsigned long start, + unsigned long len, unsigned int psize) +{ + struct slice_mask mask = slice_range_to_mask(start, len); + + slice_convert(mm, mask, psize); +} + /* * is_hugepage_only_range() is used by generic code to verify wether * a normal mmap mapping (non hugetlbfs) is valid on a given area. diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c index efbbd13d93e5..60e6032a8088 100644 --- a/arch/powerpc/mm/stab.c +++ b/arch/powerpc/mm/stab.c @@ -30,8 +30,8 @@ struct stab_entry { }; #define NR_STAB_CACHE_ENTRIES 8 -DEFINE_PER_CPU(long, stab_cache_ptr); -DEFINE_PER_CPU(long, stab_cache[NR_STAB_CACHE_ENTRIES]); +static DEFINE_PER_CPU(long, stab_cache_ptr); +static DEFINE_PER_CPU(long, stab_cache[NR_STAB_CACHE_ENTRIES]); /* * Create a segment table entry for the given esid/vsid pair. diff --git a/arch/powerpc/mm/tlb_64.c b/arch/powerpc/mm/tlb_64.c index e2d867ce1c7e..409fcc7b63ce 100644 --- a/arch/powerpc/mm/tlb_64.c +++ b/arch/powerpc/mm/tlb_64.c @@ -37,8 +37,8 @@ DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch); * include/asm-powerpc/tlb.h file -- tgall */ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); -DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); -unsigned long pte_freelist_forced_free; +static DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); +static unsigned long pte_freelist_forced_free; struct pte_freelist_batch { @@ -47,9 +47,6 @@ struct pte_freelist_batch pgtable_free_t tables[0]; }; -DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); -unsigned long pte_freelist_forced_free; - #define PTE_FREELIST_SIZE \ ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \ / sizeof(pgtable_free_t)) @@ -66,7 +63,7 @@ static void pgtable_free_now(pgtable_free_t pgf) { pte_freelist_forced_free++; - smp_call_function(pte_free_smp_sync, NULL, 0, 1); + smp_call_function(pte_free_smp_sync, NULL, 1); pgtable_free(pgf); } @@ -150,7 +147,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr, */ if (huge) { #ifdef CONFIG_HUGETLB_PAGE - psize = mmu_huge_psize; + psize = get_slice_psize(mm, addr);; #else BUG(); psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */ |