diff options
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r-- | arch/powerpc/mm/8xx_mmu.c | 131 | ||||
-rw-r--r-- | arch/powerpc/mm/copro_fault.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/fault.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/hash64_4k.c | 18 | ||||
-rw-r--r-- | arch/powerpc/mm/hash64_64k.c | 39 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_native_64.c | 50 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_utils_64.c | 189 | ||||
-rw-r--r-- | arch/powerpc/mm/hugepage-hash64.c | 17 | ||||
-rw-r--r-- | arch/powerpc/mm/hugetlbpage-hash64.c | 4 | ||||
-rw-r--r-- | arch/powerpc/mm/hugetlbpage.c | 9 | ||||
-rw-r--r-- | arch/powerpc/mm/init_32.c | 5 | ||||
-rw-r--r-- | arch/powerpc/mm/mem.c | 18 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_context_book3s64.c | 7 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_decl.h | 3 | ||||
-rw-r--r-- | arch/powerpc/mm/numa.c | 84 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable-book3s64.c | 5 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable-radix.c | 50 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_32.c | 4 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_64.c | 3 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb-radix.c | 157 |
20 files changed, 511 insertions, 286 deletions
diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c index 949100577db5..6c5025e81236 100644 --- a/arch/powerpc/mm/8xx_mmu.c +++ b/arch/powerpc/mm/8xx_mmu.c @@ -13,62 +13,115 @@ */ #include <linux/memblock.h> +#include <asm/fixmap.h> +#include <asm/code-patching.h> #include "mmu_decl.h" +#define IMMR_SIZE (FIX_IMMR_SIZE << PAGE_SHIFT) + extern int __map_without_ltlbs; + /* - * MMU_init_hw does the chip-specific initialization of the MMU hardware. + * Return PA for this VA if it is in IMMR area, or 0 */ -void __init MMU_init_hw(void) +phys_addr_t v_block_mapped(unsigned long va) { - /* Nothing to do for the time being but keep it similar to other PPC */ + unsigned long p = PHYS_IMMR_BASE; + + if (__map_without_ltlbs) + return 0; + if (va >= VIRT_IMMR_BASE && va < VIRT_IMMR_BASE + IMMR_SIZE) + return p + va - VIRT_IMMR_BASE; + return 0; +} + +/* + * Return VA for a given PA or 0 if not mapped + */ +unsigned long p_block_mapped(phys_addr_t pa) +{ + unsigned long p = PHYS_IMMR_BASE; + + if (__map_without_ltlbs) + return 0; + if (pa >= p && pa < p + IMMR_SIZE) + return VIRT_IMMR_BASE + pa - p; + return 0; } -#define LARGE_PAGE_SIZE_4M (1<<22) #define LARGE_PAGE_SIZE_8M (1<<23) -#define LARGE_PAGE_SIZE_64M (1<<26) -unsigned long __init mmu_mapin_ram(unsigned long top) +/* + * MMU_init_hw does the chip-specific initialization of the MMU hardware. + */ +void __init MMU_init_hw(void) { - unsigned long v, s, mapped; - phys_addr_t p; + /* PIN up to the 3 first 8Mb after IMMR in DTLB table */ +#ifdef CONFIG_PIN_TLB + unsigned long ctr = mfspr(SPRN_MD_CTR) & 0xfe000000; + unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY; +#ifdef CONFIG_PIN_TLB_IMMR + int i = 29; +#else + int i = 28; +#endif + unsigned long addr = 0; + unsigned long mem = total_lowmem; + + for (; i < 32 && mem >= LARGE_PAGE_SIZE_8M; i++) { + mtspr(SPRN_MD_CTR, ctr | (i << 8)); + mtspr(SPRN_MD_EPN, (unsigned long)__va(addr) | MD_EVALID); + mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID); + mtspr(SPRN_MD_RPN, addr | flags | _PAGE_PRESENT); + addr += LARGE_PAGE_SIZE_8M; + mem -= LARGE_PAGE_SIZE_8M; + } +#endif +} - v = KERNELBASE; - p = 0; - s = top; +static void mmu_mapin_immr(void) +{ + unsigned long p = PHYS_IMMR_BASE; + unsigned long v = VIRT_IMMR_BASE; + unsigned long f = pgprot_val(PAGE_KERNEL_NCG); + int offset; - if (__map_without_ltlbs) - return 0; + for (offset = 0; offset < IMMR_SIZE; offset += PAGE_SIZE) + map_page(v + offset, p + offset, f); +} -#ifdef CONFIG_PPC_4K_PAGES - while (s >= LARGE_PAGE_SIZE_8M) { - pmd_t *pmdp; - unsigned long val = p | MD_PS8MEG; +/* Address of instructions to patch */ +#ifndef CONFIG_PIN_TLB_IMMR +extern unsigned int DTLBMiss_jmp; +#endif +extern unsigned int DTLBMiss_cmp, FixupDAR_cmp; - pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v); - *pmdp++ = __pmd(val); - *pmdp++ = __pmd(val + LARGE_PAGE_SIZE_4M); +void mmu_patch_cmp_limit(unsigned int *addr, unsigned long mapped) +{ + unsigned int instr = *addr; - v += LARGE_PAGE_SIZE_8M; - p += LARGE_PAGE_SIZE_8M; - s -= LARGE_PAGE_SIZE_8M; - } -#else /* CONFIG_PPC_16K_PAGES */ - while (s >= LARGE_PAGE_SIZE_64M) { - pmd_t *pmdp; - unsigned long val = p | MD_PS8MEG; + instr &= 0xffff0000; + instr |= (unsigned long)__va(mapped) >> 16; + patch_instruction(addr, instr); +} - pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v); - *pmdp++ = __pmd(val); +unsigned long __init mmu_mapin_ram(unsigned long top) +{ + unsigned long mapped; - v += LARGE_PAGE_SIZE_64M; - p += LARGE_PAGE_SIZE_64M; - s -= LARGE_PAGE_SIZE_64M; - } + if (__map_without_ltlbs) { + mapped = 0; + mmu_mapin_immr(); +#ifndef CONFIG_PIN_TLB_IMMR + patch_instruction(&DTLBMiss_jmp, PPC_INST_NOP); #endif + } else { + mapped = top & ~(LARGE_PAGE_SIZE_8M - 1); + } - mapped = top - s; + mmu_patch_cmp_limit(&DTLBMiss_cmp, mapped); + mmu_patch_cmp_limit(&FixupDAR_cmp, mapped); /* If the size of RAM is not an exact power of two, we may not * have covered RAM in its entirety with 8 MiB @@ -77,7 +130,8 @@ unsigned long __init mmu_mapin_ram(unsigned long top) * coverage with normal-sized pages (or other reasons) do not * attempt to allocate outside the allowed range. */ - memblock_set_current_limit(mapped); + if (mapped) + memblock_set_current_limit(mapped); return mapped; } @@ -90,13 +144,8 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base, */ BUG_ON(first_memblock_base != 0); -#ifdef CONFIG_PIN_TLB /* 8xx can only access 24MB at the moment */ memblock_set_current_limit(min_t(u64, first_memblock_size, 0x01800000)); -#else - /* 8xx can only access 8MB at the moment */ - memblock_set_current_limit(min_t(u64, first_memblock_size, 0x00800000)); -#endif } /* diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c index 6527882ce05e..bb0354222b11 100644 --- a/arch/powerpc/mm/copro_fault.c +++ b/arch/powerpc/mm/copro_fault.c @@ -75,7 +75,7 @@ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea, } ret = 0; - *flt = handle_mm_fault(mm, vma, ea, is_write ? FAULT_FLAG_WRITE : 0); + *flt = handle_mm_fault(vma, ea, is_write ? FAULT_FLAG_WRITE : 0); if (unlikely(*flt & VM_FAULT_ERROR)) { if (*flt & VM_FAULT_OOM) { ret = -ENOMEM; diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index a67c6d781c52..a4db22f65021 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -429,7 +429,7 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - fault = handle_mm_fault(mm, vma, address, flags); + fault = handle_mm_fault(vma, address, flags); if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) { if (fault & VM_FAULT_SIGSEGV) goto bad_area; diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c index 6333b273d2d5..42c702b3be1f 100644 --- a/arch/powerpc/mm/hash64_4k.c +++ b/arch/powerpc/mm/hash64_4k.c @@ -70,8 +70,8 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT; - if (ppc_md.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_4K, - MMU_PAGE_4K, ssize, flags) == -1) + if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_4K, + MMU_PAGE_4K, ssize, flags) == -1) old_pte &= ~_PAGE_HPTEFLAGS; } @@ -84,21 +84,23 @@ repeat: hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; /* Insert into the hash table, primary slot */ - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0, - MMU_PAGE_4K, MMU_PAGE_4K, ssize); + slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0, + MMU_PAGE_4K, MMU_PAGE_4K, ssize); /* * Primary is full, try the secondary */ if (unlikely(slot == -1)) { hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, - rflags, HPTE_V_SECONDARY, - MMU_PAGE_4K, MMU_PAGE_4K, ssize); + slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, + rflags, + HPTE_V_SECONDARY, + MMU_PAGE_4K, + MMU_PAGE_4K, ssize); if (slot == -1) { if (mftb() & 0x1) hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - ppc_md.hpte_remove(hpte_group); + mmu_hash_ops.hpte_remove(hpte_group); /* * FIXME!! Should be try the group from which we removed ? */ diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c index 16644e1f4e6b..3bbbea07378c 100644 --- a/arch/powerpc/mm/hash64_64k.c +++ b/arch/powerpc/mm/hash64_64k.c @@ -133,9 +133,9 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += hidx & _PTEIDX_GROUP_IX; - ret = ppc_md.hpte_updatepp(slot, rflags, vpn, - MMU_PAGE_4K, MMU_PAGE_4K, - ssize, flags); + ret = mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, + MMU_PAGE_4K, MMU_PAGE_4K, + ssize, flags); /* *if we failed because typically the HPTE wasn't really here * we try an insertion. @@ -166,21 +166,22 @@ repeat: hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; /* Insert into the hash table, primary slot */ - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0, - MMU_PAGE_4K, MMU_PAGE_4K, ssize); + slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0, + MMU_PAGE_4K, MMU_PAGE_4K, ssize); /* * Primary is full, try the secondary */ if (unlikely(slot == -1)) { hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, - rflags, HPTE_V_SECONDARY, - MMU_PAGE_4K, MMU_PAGE_4K, ssize); + slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, + rflags, HPTE_V_SECONDARY, + MMU_PAGE_4K, MMU_PAGE_4K, + ssize); if (slot == -1) { if (mftb() & 0x1) hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - ppc_md.hpte_remove(hpte_group); + mmu_hash_ops.hpte_remove(hpte_group); /* * FIXME!! Should be try the group from which we removed ? */ @@ -272,8 +273,9 @@ int __hash_page_64K(unsigned long ea, unsigned long access, slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT; - if (ppc_md.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_64K, - MMU_PAGE_64K, ssize, flags) == -1) + if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_64K, + MMU_PAGE_64K, ssize, + flags) == -1) old_pte &= ~_PAGE_HPTEFLAGS; } @@ -286,21 +288,24 @@ repeat: hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; /* Insert into the hash table, primary slot */ - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0, - MMU_PAGE_64K, MMU_PAGE_64K, ssize); + slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0, + MMU_PAGE_64K, MMU_PAGE_64K, + ssize); /* * Primary is full, try the secondary */ if (unlikely(slot == -1)) { hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, - rflags, HPTE_V_SECONDARY, - MMU_PAGE_64K, MMU_PAGE_64K, ssize); + slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, + rflags, + HPTE_V_SECONDARY, + MMU_PAGE_64K, + MMU_PAGE_64K, ssize); if (slot == -1) { if (mftb() & 0x1) hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - ppc_md.hpte_remove(hpte_group); + mmu_hash_ops.hpte_remove(hpte_group); /* * FIXME!! Should be try the group from which we removed ? */ diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index d873f6507f72..88ce7d212320 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -55,7 +55,7 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize) * We need 14 to 65 bits of va for a tlibe of 4K page * With vpn we ignore the lower VPN_SHIFT bits already. * And top two bits are already ignored because we can - * only accomadate 76 bits in a 64 bit vpn with a VPN_SHIFT + * only accomodate 76 bits in a 64 bit vpn with a VPN_SHIFT * of 12. */ va = vpn << VPN_SHIFT; @@ -64,7 +64,8 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize) * Older versions of the architecture (2.02 and earler) require the * masking of the top 16 bits. */ - va &= ~(0xffffULL << 48); + if (mmu_has_feature(MMU_FTR_TLBIE_CROP_VA)) + va &= ~(0xffffULL << 48); switch (psize) { case MMU_PAGE_4K: @@ -113,7 +114,8 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize) * Older versions of the architecture (2.02 and earler) require the * masking of the top 16 bits. */ - va &= ~(0xffffULL << 48); + if (mmu_has_feature(MMU_FTR_TLBIE_CROP_VA)) + va &= ~(0xffffULL << 48); switch (psize) { case MMU_PAGE_4K: @@ -316,8 +318,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, DBG_LOW(" -> hit\n"); /* Update the HPTE */ hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) & - ~(HPTE_R_PP | HPTE_R_N)) | - (newpp & (HPTE_R_PP | HPTE_R_N | + ~(HPTE_R_PPP | HPTE_R_N)) | + (newpp & (HPTE_R_PPP | HPTE_R_N | HPTE_R_C))); } native_unlock_hpte(hptep); @@ -385,8 +387,8 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, /* Update the HPTE */ hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) & - ~(HPTE_R_PP | HPTE_R_N)) | - (newpp & (HPTE_R_PP | HPTE_R_N))); + ~(HPTE_R_PPP | HPTE_R_N)) | + (newpp & (HPTE_R_PPP | HPTE_R_N))); /* * Ensure it is out of the tlb too. Bolted entries base and * actual page size will be same. @@ -550,7 +552,11 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot, } } /* This works for all page sizes, and for 256M and 1T segments */ - *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT; + if (cpu_has_feature(CPU_FTR_ARCH_300)) + *ssize = hpte_r >> HPTE_R_3_0_SSIZE_SHIFT; + else + *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT; + shift = mmu_psize_defs[size].shift; avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm); @@ -601,7 +607,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot, * crashdump and all bets are off anyway. * * TODO: add batching support when enabled. remember, no dynamic memory here, - * athough there is the control page available... + * although there is the control page available... */ static void native_hpte_clear(void) { @@ -719,23 +725,29 @@ static void native_flush_hash_range(unsigned long number, int local) local_irq_restore(flags); } -static int native_update_partition_table(u64 patb1) +static int native_register_proc_table(unsigned long base, unsigned long page_size, + unsigned long table_size) { + unsigned long patb1 = base << 25; /* VSID */ + + patb1 |= (page_size << 5); /* sllp */ + patb1 |= table_size; + partition_tb->patb1 = cpu_to_be64(patb1); return 0; } void __init hpte_init_native(void) { - ppc_md.hpte_invalidate = native_hpte_invalidate; - ppc_md.hpte_updatepp = native_hpte_updatepp; - ppc_md.hpte_updateboltedpp = native_hpte_updateboltedpp; - ppc_md.hpte_insert = native_hpte_insert; - ppc_md.hpte_remove = native_hpte_remove; - ppc_md.hpte_clear_all = native_hpte_clear; - ppc_md.flush_hash_range = native_flush_hash_range; - ppc_md.hugepage_invalidate = native_hugepage_invalidate; + mmu_hash_ops.hpte_invalidate = native_hpte_invalidate; + mmu_hash_ops.hpte_updatepp = native_hpte_updatepp; + mmu_hash_ops.hpte_updateboltedpp = native_hpte_updateboltedpp; + mmu_hash_ops.hpte_insert = native_hpte_insert; + mmu_hash_ops.hpte_remove = native_hpte_remove; + mmu_hash_ops.hpte_clear_all = native_hpte_clear; + mmu_hash_ops.flush_hash_range = native_flush_hash_range; + mmu_hash_ops.hugepage_invalidate = native_hugepage_invalidate; if (cpu_has_feature(CPU_FTR_ARCH_300)) - ppc_md.update_partition_table = native_update_partition_table; + ppc_md.register_process_table = native_register_proc_table; } diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 59268969a0bc..b78b5d211278 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -34,6 +34,7 @@ #include <linux/signal.h> #include <linux/memblock.h> #include <linux/context_tracking.h> +#include <linux/libfdt.h> #include <asm/processor.h> #include <asm/pgtable.h> @@ -58,6 +59,7 @@ #include <asm/firmware.h> #include <asm/tm.h> #include <asm/trace.h> +#include <asm/ps3.h> #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) @@ -87,10 +89,6 @@ * */ -#ifdef CONFIG_U3_DART -extern unsigned long dart_tablebase; -#endif /* CONFIG_U3_DART */ - static unsigned long _SDR1; struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; EXPORT_SYMBOL_GPL(mmu_psize_defs); @@ -120,6 +118,8 @@ static u8 *linear_map_hash_slots; static unsigned long linear_map_hash_count; static DEFINE_SPINLOCK(linear_map_hash_lock); #endif /* CONFIG_DEBUG_PAGEALLOC */ +struct mmu_hash_ops mmu_hash_ops; +EXPORT_SYMBOL(mmu_hash_ops); /* There are definitions of page sizes arrays to be used when none * is provided by the firmware. @@ -159,6 +159,19 @@ static struct mmu_psize_def mmu_psize_defaults_gp[] = { }, }; +/* + * 'R' and 'C' update notes: + * - Under pHyp or KVM, the updatepp path will not set C, thus it *will* + * create writeable HPTEs without C set, because the hcall H_PROTECT + * that we use in that case will not update C + * - The above is however not a problem, because we also don't do that + * fancy "no flush" variant of eviction and we use H_REMOVE which will + * do the right thing and thus we don't have the race I described earlier + * + * - Under bare metal, we do have the race, so we need R and C set + * - We make sure R is always set and never lost + * - C is _PAGE_DIRTY, and *should* always be set for a writeable mapping + */ unsigned long htab_convert_pte_flags(unsigned long pteflags) { unsigned long rflags = 0; @@ -186,19 +199,28 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags) rflags |= 0x1; } /* - * Always add "C" bit for perf. Memory coherence is always enabled + * We can't allow hardware to update hpte bits. Hence always + * set 'R' bit and set 'C' if it is a write fault */ - rflags |= HPTE_R_C | HPTE_R_M; + rflags |= HPTE_R_R; + + if (pteflags & _PAGE_DIRTY) + rflags |= HPTE_R_C; /* * Add in WIG bits */ if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_TOLERANT) rflags |= HPTE_R_I; - if ((pteflags & _PAGE_CACHE_CTL ) == _PAGE_NON_IDEMPOTENT) + else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_NON_IDEMPOTENT) rflags |= (HPTE_R_I | HPTE_R_G); - if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_SAO) - rflags |= (HPTE_R_I | HPTE_R_W); + else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_SAO) + rflags |= (HPTE_R_W | HPTE_R_I | HPTE_R_M); + else + /* + * Add memory coherence if cache inhibited is not set + */ + rflags |= HPTE_R_M; return rflags; } @@ -256,9 +278,10 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, hash = hpt_hash(vpn, shift, ssize); hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); - BUG_ON(!ppc_md.hpte_insert); - ret = ppc_md.hpte_insert(hpteg, vpn, paddr, tprot, - HPTE_V_BOLTED, psize, psize, ssize); + BUG_ON(!mmu_hash_ops.hpte_insert); + ret = mmu_hash_ops.hpte_insert(hpteg, vpn, paddr, tprot, + HPTE_V_BOLTED, psize, psize, + ssize); if (ret < 0) break; @@ -283,11 +306,11 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend, shift = mmu_psize_defs[psize].shift; step = 1 << shift; - if (!ppc_md.hpte_removebolted) + if (!mmu_hash_ops.hpte_removebolted) return -ENODEV; for (vaddr = vstart; vaddr < vend; vaddr += step) { - rc = ppc_md.hpte_removebolted(vaddr, psize, ssize); + rc = mmu_hash_ops.hpte_removebolted(vaddr, psize, ssize); if (rc == -ENOENT) { ret = -ENOENT; continue; @@ -299,6 +322,15 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend, return ret; } +static bool disable_1tb_segments = false; + +static int __init parse_disable_1tb_segments(char *p) +{ + disable_1tb_segments = true; + return 0; +} +early_param("disable_1tb_segments", parse_disable_1tb_segments); + static int __init htab_dt_scan_seg_sizes(unsigned long node, const char *uname, int depth, void *data) @@ -317,6 +349,12 @@ static int __init htab_dt_scan_seg_sizes(unsigned long node, for (; size >= 4; size -= 4, ++prop) { if (be32_to_cpu(prop[0]) == 40) { DBG("1T segment support detected\n"); + + if (disable_1tb_segments) { + DBG("1T segments disabled by command line\n"); + break; + } + cur_cpu_spec->mmu_features |= MMU_FTR_1T_SEGMENT; return 1; } @@ -492,7 +530,8 @@ static bool might_have_hea(void) * we will never see an HEA ethernet device. */ #ifdef CONFIG_IBMEBUS - return !cpu_has_feature(CPU_FTR_ARCH_207S); + return !cpu_has_feature(CPU_FTR_ARCH_207S) && + !firmware_has_feature(FW_FEATURE_SPLPAR); #else return false; #endif @@ -558,7 +597,7 @@ found: * would stop us accessing the HEA ethernet. So if we * have the chance of ever seeing one, stay at 4k. */ - if (!might_have_hea() || !machine_is(pseries)) + if (!might_have_hea()) mmu_io_psize = MMU_PAGE_64K; } else mmu_ci_restrictions = 1; @@ -677,10 +716,9 @@ int remove_section_mapping(unsigned long start, unsigned long end) #endif /* CONFIG_MEMORY_HOTPLUG */ static void __init hash_init_partition_table(phys_addr_t hash_table, - unsigned long pteg_count) + unsigned long htab_size) { unsigned long ps_field; - unsigned long htab_size; unsigned long patb_size = 1UL << PATB_SIZE_SHIFT; /* @@ -688,7 +726,7 @@ static void __init hash_init_partition_table(phys_addr_t hash_table, * We can ignore that for lpid 0 */ ps_field = 0; - htab_size = __ilog2(pteg_count) - 11; + htab_size = __ilog2(htab_size) - 18; BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large."); partition_tb = __va(memblock_alloc_base(patb_size, patb_size, @@ -702,7 +740,7 @@ static void __init hash_init_partition_table(phys_addr_t hash_table, * For now UPRT is 0 for us. */ partition_tb->patb1 = 0; - DBG("Partition table %p\n", partition_tb); + pr_info("Partition table %p\n", partition_tb); /* * update partition table control register, * 64 K size. @@ -716,7 +754,7 @@ static void __init htab_initialize(void) unsigned long table; unsigned long pteg_count; unsigned long prot; - unsigned long base = 0, size = 0, limit; + unsigned long base = 0, size = 0; struct memblock_region *reg; DBG(" -> htab_initialize()\n"); @@ -742,7 +780,8 @@ static void __init htab_initialize(void) htab_hash_mask = pteg_count - 1; - if (firmware_has_feature(FW_FEATURE_LPAR)) { + if (firmware_has_feature(FW_FEATURE_LPAR) || + firmware_has_feature(FW_FEATURE_PS3_LV1)) { /* Using a hypervisor which owns the htab */ htab_address = NULL; _SDR1 = 0; @@ -753,20 +792,26 @@ static void __init htab_initialize(void) * Clear the htab if firmware assisted dump is active so * that we dont end up using old mappings. */ - if (is_fadump_active() && ppc_md.hpte_clear_all) - ppc_md.hpte_clear_all(); + if (is_fadump_active() && mmu_hash_ops.hpte_clear_all) + mmu_hash_ops.hpte_clear_all(); #endif } else { - /* Find storage for the HPT. Must be contiguous in - * the absolute address space. On cell we want it to be - * in the first 2 Gig so we can use it for IOMMU hacks. + unsigned long limit = MEMBLOCK_ALLOC_ANYWHERE; + +#ifdef CONFIG_PPC_CELL + /* + * Cell may require the hash table down low when using the + * Axon IOMMU in order to fit the dynamic region over it, see + * comments in cell/iommu.c */ - if (machine_is(cell)) + if (fdt_subnode_offset(initial_boot_params, 0, "axon") > 0) { limit = 0x80000000; - else - limit = MEMBLOCK_ALLOC_ANYWHERE; + pr_info("Hash table forced below 2G for Axon IOMMU\n"); + } +#endif /* CONFIG_PPC_CELL */ - table = memblock_alloc_base(htab_size_bytes, htab_size_bytes, limit); + table = memblock_alloc_base(htab_size_bytes, htab_size_bytes, + limit); DBG("Hash table allocated at %lx, size: %lx\n", table, htab_size_bytes); @@ -774,7 +819,7 @@ static void __init htab_initialize(void) htab_address = __va(table); /* htab absolute addr + encoded htabsize */ - _SDR1 = table + __ilog2(pteg_count) - 11; + _SDR1 = table + __ilog2(htab_size_bytes) - 18; /* Initialize the HPT with no entries */ memset((void *)table, 0, htab_size_bytes); @@ -783,7 +828,7 @@ static void __init htab_initialize(void) /* Set SDR1 */ mtspr(SPRN_SDR1, _SDR1); else - hash_init_partition_table(table, pteg_count); + hash_init_partition_table(table, htab_size_bytes); } prot = pgprot_val(PAGE_KERNEL); @@ -810,34 +855,6 @@ static void __init htab_initialize(void) DBG("creating mapping for region: %lx..%lx (prot: %lx)\n", base, size, prot); -#ifdef CONFIG_U3_DART - /* Do not map the DART space. Fortunately, it will be aligned - * in such a way that it will not cross two memblock regions and - * will fit within a single 16Mb page. - * The DART space is assumed to be a full 16Mb region even if - * we only use 2Mb of that space. We will use more of it later - * for AGP GART. We have to use a full 16Mb large page. - */ - DBG("DART base: %lx\n", dart_tablebase); - - if (dart_tablebase != 0 && dart_tablebase >= base - && dart_tablebase < (base + size)) { - unsigned long dart_table_end = dart_tablebase + 16 * MB; - if (base != dart_tablebase) - BUG_ON(htab_bolt_mapping(base, dart_tablebase, - __pa(base), prot, - mmu_linear_psize, - mmu_kernel_ssize)); - if ((base + size) > dart_table_end) - BUG_ON(htab_bolt_mapping(dart_tablebase+16*MB, - base + size, - __pa(dart_table_end), - prot, - mmu_linear_psize, - mmu_kernel_ssize)); - continue; - } -#endif /* CONFIG_U3_DART */ BUG_ON(htab_bolt_mapping(base, base + size, __pa(base), prot, mmu_linear_psize, mmu_kernel_ssize)); } @@ -900,12 +917,28 @@ void __init hash__early_init_mmu(void) vmemmap = (struct page *)H_VMEMMAP_BASE; ioremap_bot = IOREMAP_BASE; +#ifdef CONFIG_PCI + pci_io_base = ISA_IO_BASE; +#endif + + /* Select appropriate backend */ + if (firmware_has_feature(FW_FEATURE_PS3_LV1)) + ps3_early_mm_init(); + else if (firmware_has_feature(FW_FEATURE_LPAR)) + hpte_init_pseries(); + else if (IS_ENABLED(CONFIG_PPC_NATIVE)) + hpte_init_native(); + + if (!mmu_hash_ops.hpte_insert) + panic("hash__early_init_mmu: No MMU hash ops defined!\n"); + /* Initialize the MMU Hash table and create the linear mapping * of memory. Has to be done before SLB initialization as this is * currently where the page size encoding is obtained. */ htab_initialize(); + pr_info("Initializing hash mmu with SLB\n"); /* Initialize SLB management */ slb_initialize(); } @@ -1448,7 +1481,8 @@ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, * We use same base page size and actual psize, because we don't * use these functions for hugepage */ - ppc_md.hpte_invalidate(slot, vpn, psize, psize, ssize, local); + mmu_hash_ops.hpte_invalidate(slot, vpn, psize, psize, + ssize, local); } pte_iterate_hashed_end(); #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -1489,9 +1523,9 @@ void flush_hash_hugepage(unsigned long vsid, unsigned long addr, if (!hpte_slot_array) return; - if (ppc_md.hugepage_invalidate) { - ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array, - psize, ssize, local); + if (mmu_hash_ops.hugepage_invalidate) { + mmu_hash_ops.hugepage_invalidate(vsid, s_addr, hpte_slot_array, + psize, ssize, local); goto tm_abort; } /* @@ -1518,8 +1552,8 @@ void flush_hash_hugepage(unsigned long vsid, unsigned long addr, slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += hidx & _PTEIDX_GROUP_IX; - ppc_md.hpte_invalidate(slot, vpn, psize, - MMU_PAGE_16M, ssize, local); + mmu_hash_ops.hpte_invalidate(slot, vpn, psize, + MMU_PAGE_16M, ssize, local); } tm_abort: #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -1543,8 +1577,8 @@ tm_abort: void flush_hash_range(unsigned long number, int local) { - if (ppc_md.flush_hash_range) - ppc_md.flush_hash_range(number, local); + if (mmu_hash_ops.flush_hash_range) + mmu_hash_ops.flush_hash_range(number, local); else { int i; struct ppc64_tlb_batch *batch = @@ -1589,22 +1623,22 @@ repeat: HPTES_PER_GROUP) & ~0x7UL; /* Insert into the hash table, primary slot */ - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, vflags, - psize, psize, ssize); + slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, vflags, + psize, psize, ssize); /* Primary is full, try the secondary */ if (unlikely(slot == -1)) { hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, - vflags | HPTE_V_SECONDARY, - psize, psize, ssize); + slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, + vflags | HPTE_V_SECONDARY, + psize, psize, ssize); if (slot == -1) { if (mftb() & 0x1) hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP)&~0x7UL; - ppc_md.hpte_remove(hpte_group); + mmu_hash_ops.hpte_remove(hpte_group); goto repeat; } } @@ -1654,8 +1688,9 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi) hash = ~hash; slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += hidx & _PTEIDX_GROUP_IX; - ppc_md.hpte_invalidate(slot, vpn, mmu_linear_psize, mmu_linear_psize, - mmu_kernel_ssize, 0); + mmu_hash_ops.hpte_invalidate(slot, vpn, mmu_linear_psize, + mmu_linear_psize, + mmu_kernel_ssize, 0); } void __kernel_map_pages(struct page *page, int numpages, int enable) diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index ba3fc229468a..f20d16f849c5 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -103,8 +103,8 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += hidx & _PTEIDX_GROUP_IX; - ret = ppc_md.hpte_updatepp(slot, rflags, vpn, - psize, lpsize, ssize, flags); + ret = mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, + psize, lpsize, ssize, flags); /* * We failed to update, try to insert a new entry. */ @@ -131,23 +131,24 @@ repeat: hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; /* Insert into the hash table, primary slot */ - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0, - psize, lpsize, ssize); + slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0, + psize, lpsize, ssize); /* * Primary is full, try the secondary */ if (unlikely(slot == -1)) { hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, - rflags, HPTE_V_SECONDARY, - psize, lpsize, ssize); + slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, + rflags, + HPTE_V_SECONDARY, + psize, lpsize, ssize); if (slot == -1) { if (mftb() & 0x1) hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - ppc_md.hpte_remove(hpte_group); + mmu_hash_ops.hpte_remove(hpte_group); goto repeat; } } diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c index 3058560b6121..d5026f3800b6 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/hugetlbpage-hash64.c @@ -79,8 +79,8 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT; - if (ppc_md.hpte_updatepp(slot, rflags, vpn, mmu_psize, - mmu_psize, ssize, flags) == -1) + if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, mmu_psize, + mmu_psize, ssize, flags) == -1) old_pte &= ~_PAGE_HPTEFLAGS; } diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 5aac1a3f86cd..7372ee13eb1e 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -73,7 +73,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, cachep = PGT_CACHE(pdshift - pshift); #endif - new = kmem_cache_zalloc(cachep, GFP_KERNEL|__GFP_REPEAT); + new = kmem_cache_zalloc(cachep, GFP_KERNEL); BUG_ON(pshift > HUGEPD_SHIFT_MASK); BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK); @@ -81,6 +81,13 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, if (! new) return -ENOMEM; + /* + * Make sure other cpus find the hugepd set only after a + * properly initialized page table is visible to them. + * For more details look for comment in __pte_alloc(). + */ + smp_wmb(); + spin_lock(&mm->page_table_lock); #ifdef CONFIG_PPC_FSL_BOOK3E /* diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index c899fe340bbd..448685fbf27c 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -64,7 +64,7 @@ EXPORT_SYMBOL(memstart_addr); phys_addr_t kernstart_addr; EXPORT_SYMBOL(kernstart_addr); -#ifdef CONFIG_RELOCATABLE_PPC32 +#ifdef CONFIG_RELOCATABLE /* Used in __va()/__pa() */ long long virt_phys_offset; EXPORT_SYMBOL(virt_phys_offset); @@ -80,9 +80,6 @@ EXPORT_SYMBOL(agp_special_page); void MMU_init(void); -/* XXX should be in current.h -- paulus */ -extern struct task_struct *current_set[NR_CPUS]; - /* * this tells the system to map all of ram with the segregs * (i.e. page tables) instead of the bats. diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 2fd57fa48429..5f844337de21 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -116,6 +116,16 @@ int memory_add_physaddr_to_nid(u64 start) } #endif +int __weak create_section_mapping(unsigned long start, unsigned long end) +{ + return -ENODEV; +} + +int __weak remove_section_mapping(unsigned long start, unsigned long end) +{ + return -ENODEV; +} + int arch_add_memory(int nid, u64 start, u64 size, bool for_device) { struct pglist_data *pgdata; @@ -239,8 +249,14 @@ static int __init mark_nonram_nosave(void) static bool zone_limits_final; +/* + * The memory zones past TOP_ZONE are managed by generic mm code. + * These should be set to zero since that's what every other + * architecture does. + */ static unsigned long max_zone_pfns[MAX_NR_ZONES] = { - [0 ... MAX_NR_ZONES - 1] = ~0UL + [0 ... TOP_ZONE ] = ~0UL, + [TOP_ZONE + 1 ... MAX_NR_ZONES - 1] = 0 }; /* diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index 227b2a6c4544..b114f8b93ec9 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -65,7 +65,7 @@ static int radix__init_new_context(struct mm_struct *mm, int index) /* * set the process table entry, */ - rts_field = 3ull << PPC_BITLSHIFT(2); + rts_field = radix__get_tree_size(); process_tb[index].prtb0 = cpu_to_be64(rts_field | __pa(mm->pgd) | RADIX_PGD_INDEX_SIZE); return 0; } @@ -181,7 +181,10 @@ void destroy_context(struct mm_struct *mm) #ifdef CONFIG_PPC_RADIX_MMU void radix__switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) { - mtspr(SPRN_PID, next->context.id); asm volatile("isync": : :"memory"); + mtspr(SPRN_PID, next->context.id); + asm volatile("isync \n" + PPC_SLBIA(0x7) + : : :"memory"); } #endif diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 6af65327c993..f988db655e5b 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -154,9 +154,10 @@ struct tlbcam { }; #endif -#if defined(CONFIG_6xx) || defined(CONFIG_FSL_BOOKE) +#if defined(CONFIG_6xx) || defined(CONFIG_FSL_BOOKE) || defined(CONFIG_PPC_8xx) /* 6xx have BATS */ /* FSL_BOOKE have TLBCAM */ +/* 8xx have LTLB */ phys_addr_t v_block_mapped(unsigned long va); unsigned long p_block_mapped(phys_addr_t pa); #else diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 669a15e7fa76..75b9cd6150cc 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -581,30 +581,22 @@ static void verify_cpu_node_mapping(int cpu, int node) } } -static int cpu_numa_callback(struct notifier_block *nfb, unsigned long action, - void *hcpu) +/* Must run before sched domains notifier. */ +static int ppc_numa_cpu_prepare(unsigned int cpu) { - unsigned long lcpu = (unsigned long)hcpu; - int ret = NOTIFY_DONE, nid; + int nid; - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - nid = numa_setup_cpu(lcpu); - verify_cpu_node_mapping((int)lcpu, nid); - ret = NOTIFY_OK; - break; + nid = numa_setup_cpu(cpu); + verify_cpu_node_mapping(cpu, nid); + return 0; +} + +static int ppc_numa_cpu_dead(unsigned int cpu) +{ #ifdef CONFIG_HOTPLUG_CPU - case CPU_DEAD: - case CPU_DEAD_FROZEN: - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: - unmap_cpu_from_node(lcpu); - ret = NOTIFY_OK; - break; + unmap_cpu_from_node(cpu); #endif - } - return ret; + return 0; } /* @@ -913,11 +905,6 @@ static void __init dump_numa_memory_topology(void) } } -static struct notifier_block ppc64_numa_nb = { - .notifier_call = cpu_numa_callback, - .priority = 1 /* Must run before sched domains notifier. */ -}; - /* Initialize NODE_DATA for a node on the local memory */ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn) { @@ -985,15 +972,18 @@ void __init initmem_init(void) setup_node_to_cpumask_map(); reset_numa_cpu_lookup_table(); - register_cpu_notifier(&ppc64_numa_nb); + /* * We need the numa_cpu_lookup_table to be accurate for all CPUs, * even before we online them, so that we can use cpu_to_{node,mem} * early in boot, cf. smp_prepare_cpus(). + * _nocalls() + manual invocation is used because cpuhp is not yet + * initialized for the boot CPU. */ - for_each_present_cpu(cpu) { - numa_setup_cpu((unsigned long)cpu); - } + cpuhp_setup_state_nocalls(CPUHP_POWER_NUMA_PREPARE, "POWER_NUMA_PREPARE", + ppc_numa_cpu_prepare, ppc_numa_cpu_dead); + for_each_present_cpu(cpu) + numa_setup_cpu(cpu); } static int __init early_numa(char *p) @@ -1163,18 +1153,34 @@ int hot_add_scn_to_nid(unsigned long scn_addr) static u64 hot_add_drconf_memory_max(void) { - struct device_node *memory = NULL; - unsigned int drconf_cell_cnt = 0; - u64 lmb_size = 0; + struct device_node *memory = NULL; + struct device_node *dn = NULL; + unsigned int drconf_cell_cnt = 0; + u64 lmb_size = 0; const __be32 *dm = NULL; + const __be64 *lrdr = NULL; + struct of_drconf_cell drmem; + + dn = of_find_node_by_path("/rtas"); + if (dn) { + lrdr = of_get_property(dn, "ibm,lrdr-capacity", NULL); + of_node_put(dn); + if (lrdr) + return be64_to_cpup(lrdr); + } - memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); - if (memory) { - drconf_cell_cnt = of_get_drconf_memory(memory, &dm); - lmb_size = of_get_lmb_size(memory); - of_node_put(memory); - } - return lmb_size * drconf_cell_cnt; + memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); + if (memory) { + drconf_cell_cnt = of_get_drconf_memory(memory, &dm); + lmb_size = of_get_lmb_size(memory); + + /* Advance to the last cell, each cell has 6 32 bit integers */ + dm += (drconf_cell_cnt - 1) * 6; + read_drconf_cell(&drmem, &dm); + of_node_put(memory); + return drmem.base_addr + lmb_size; + } + return 0; } /* diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c index eb4451144746..670318766545 100644 --- a/arch/powerpc/mm/pgtable-book3s64.c +++ b/arch/powerpc/mm/pgtable-book3s64.c @@ -33,10 +33,7 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address, changed = !pmd_same(*(pmdp), entry); if (changed) { __ptep_set_access_flags(pmdp_ptep(pmdp), pmd_pte(entry)); - /* - * Since we are not supporting SW TLB systems, we don't - * have any thing similar to flush_tlb_page_nohash() - */ + flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); } return changed; } diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 18b2c11604fa..003ff48a11b6 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -21,8 +21,11 @@ #include <trace/events/thp.h> -static int native_update_partition_table(u64 patb1) +static int native_register_process_table(unsigned long base, unsigned long pg_sz, + unsigned long table_size) { + unsigned long patb1 = base | table_size | PATB_GR; + partition_tb->patb1 = cpu_to_be64(patb1); return 0; } @@ -160,32 +163,30 @@ redo: process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT); /* * Fill in the process table. - * we support 52 bits, hence 52-28 = 24, 11000 */ - rts_field = 3ull << PPC_BITLSHIFT(2); + rts_field = radix__get_tree_size(); process_tb->prtb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE); /* * Fill in the partition table. We are suppose to use effective address * of process table here. But our linear mapping also enable us to use * physical address here. */ - ppc_md.update_partition_table(__pa(process_tb) | (PRTB_SIZE_SHIFT - 12) | PATB_GR); + ppc_md.register_process_table(__pa(process_tb), 0, PRTB_SIZE_SHIFT - 12); pr_info("Process table %p and radix root for kernel: %p\n", process_tb, init_mm.pgd); } static void __init radix_init_partition_table(void) { unsigned long rts_field; - /* - * we support 52 bits, hence 52-28 = 24, 11000 - */ - rts_field = 3ull << PPC_BITLSHIFT(2); + + rts_field = radix__get_tree_size(); BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large."); partition_tb = early_alloc_pgtable(1UL << PATB_SIZE_SHIFT); partition_tb->patb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE | PATB_HR); - printk("Partition table %p\n", partition_tb); + pr_info("Initializing Radix MMU\n"); + pr_info("Partition table %p\n", partition_tb); memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); /* @@ -197,7 +198,7 @@ static void __init radix_init_partition_table(void) void __init radix_init_native(void) { - ppc_md.update_partition_table = native_update_partition_table; + ppc_md.register_process_table = native_register_process_table; } static int __init get_idx_from_shift(unsigned int shift) @@ -296,11 +297,6 @@ found: void __init radix__early_init_mmu(void) { unsigned long lpcr; - /* - * setup LPCR UPRT based on mmu_features - */ - lpcr = mfspr(SPRN_LPCR); - mtspr(SPRN_LPCR, lpcr | LPCR_UPRT); #ifdef CONFIG_PPC_64K_PAGES /* PAGE_SIZE mappings */ @@ -336,6 +332,11 @@ void __init radix__early_init_mmu(void) __vmalloc_end = RADIX_VMALLOC_END; vmemmap = (struct page *)RADIX_VMEMMAP_BASE; ioremap_bot = IOREMAP_BASE; + +#ifdef CONFIG_PCI + pci_io_base = ISA_IO_BASE; +#endif + /* * For now radix also use the same frag size */ @@ -343,8 +344,12 @@ void __init radix__early_init_mmu(void) __pte_frag_size_shift = H_PTE_FRAG_SIZE_SHIFT; radix_init_page_sizes(); - if (!firmware_has_feature(FW_FEATURE_LPAR)) + if (!firmware_has_feature(FW_FEATURE_LPAR)) { + radix_init_native(); + lpcr = mfspr(SPRN_LPCR); + mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR); radix_init_partition_table(); + } radix_init_pgtable(); } @@ -353,16 +358,15 @@ void radix__early_init_mmu_secondary(void) { unsigned long lpcr; /* - * setup LPCR UPRT based on mmu_features + * update partition table control register and UPRT */ - lpcr = mfspr(SPRN_LPCR); - mtspr(SPRN_LPCR, lpcr | LPCR_UPRT); - /* - * update partition table control register, 64 K size. - */ - if (!firmware_has_feature(FW_FEATURE_LPAR)) + if (!firmware_has_feature(FW_FEATURE_LPAR)) { + lpcr = mfspr(SPRN_LPCR); + mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR); + mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12)); + } } void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base, diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index bf7bf32b54f8..7f922f557936 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -84,7 +84,7 @@ __init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long add pte_t *pte; if (slab_is_available()) { - pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO); } else { pte = __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE)); if (pte) @@ -97,7 +97,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) { struct page *ptepage; - gfp_t flags = GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO; + gfp_t flags = GFP_KERNEL | __GFP_ZERO; ptepage = alloc_pages(flags, 0); if (!ptepage) diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index e009e0604a8a..f5e8d4edb808 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -350,8 +350,7 @@ static pte_t *get_from_cache(struct mm_struct *mm) static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel) { void *ret = NULL; - struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | - __GFP_REPEAT | __GFP_ZERO); + struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); if (!page) return NULL; if (!kernel && !pgtable_page_ctor(page)) { diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 0fdaf93a3e09..e1f22700fb16 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -12,26 +12,30 @@ #include <linux/mm.h> #include <linux/hugetlb.h> #include <linux/memblock.h> +#include <asm/ppc-opcode.h> #include <asm/tlb.h> #include <asm/tlbflush.h> static DEFINE_RAW_SPINLOCK(native_tlbie_lock); -static inline void __tlbiel_pid(unsigned long pid, int set) +#define RIC_FLUSH_TLB 0 +#define RIC_FLUSH_PWC 1 +#define RIC_FLUSH_ALL 2 + +static inline void __tlbiel_pid(unsigned long pid, int set, + unsigned long ric) { - unsigned long rb,rs,ric,prs,r; + unsigned long rb,rs,prs,r; rb = PPC_BIT(53); /* IS = 1 */ rb |= set << PPC_BITLSHIFT(51); rs = ((unsigned long)pid) << PPC_BITLSHIFT(31); prs = 1; /* process scoped */ r = 1; /* raidx format */ - ric = 2; /* invalidate all the caches */ asm volatile("ptesync": : :"memory"); - asm volatile(".long 0x7c000224 | (%0 << 11) | (%1 << 16) |" - "(%2 << 17) | (%3 << 18) | (%4 << 21)" + asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); asm volatile("ptesync": : :"memory"); } @@ -39,67 +43,61 @@ static inline void __tlbiel_pid(unsigned long pid, int set) /* * We use 128 set in radix mode and 256 set in hpt mode. */ -static inline void _tlbiel_pid(unsigned long pid) +static inline void _tlbiel_pid(unsigned long pid, unsigned long ric) { int set; for (set = 0; set < POWER9_TLB_SETS_RADIX ; set++) { - __tlbiel_pid(pid, set); + __tlbiel_pid(pid, set, ric); } return; } -static inline void _tlbie_pid(unsigned long pid) +static inline void _tlbie_pid(unsigned long pid, unsigned long ric) { - unsigned long rb,rs,ric,prs,r; + unsigned long rb,rs,prs,r; rb = PPC_BIT(53); /* IS = 1 */ rs = pid << PPC_BITLSHIFT(31); prs = 1; /* process scoped */ r = 1; /* raidx format */ - ric = 2; /* invalidate all the caches */ asm volatile("ptesync": : :"memory"); - asm volatile(".long 0x7c000264 | (%0 << 11) | (%1 << 16) |" - "(%2 << 17) | (%3 << 18) | (%4 << 21)" + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } static inline void _tlbiel_va(unsigned long va, unsigned long pid, - unsigned long ap) + unsigned long ap, unsigned long ric) { - unsigned long rb,rs,ric,prs,r; + unsigned long rb,rs,prs,r; rb = va & ~(PPC_BITMASK(52, 63)); rb |= ap << PPC_BITLSHIFT(58); rs = pid << PPC_BITLSHIFT(31); prs = 1; /* process scoped */ r = 1; /* raidx format */ - ric = 0; /* no cluster flush yet */ asm volatile("ptesync": : :"memory"); - asm volatile(".long 0x7c000224 | (%0 << 11) | (%1 << 16) |" - "(%2 << 17) | (%3 << 18) | (%4 << 21)" + asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); asm volatile("ptesync": : :"memory"); } static inline void _tlbie_va(unsigned long va, unsigned long pid, - unsigned long ap) + unsigned long ap, unsigned long ric) { - unsigned long rb,rs,ric,prs,r; + unsigned long rb,rs,prs,r; rb = va & ~(PPC_BITMASK(52, 63)); rb |= ap << PPC_BITLSHIFT(58); rs = pid << PPC_BITLSHIFT(31); prs = 1; /* process scoped */ r = 1; /* raidx format */ - ric = 0; /* no cluster flush yet */ asm volatile("ptesync": : :"memory"); - asm volatile(".long 0x7c000264 | (%0 << 11) | (%1 << 16) |" - "(%2 << 17) | (%3 << 18) | (%4 << 21)" + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } @@ -117,25 +115,40 @@ static inline void _tlbie_va(unsigned long va, unsigned long pid, */ void radix__local_flush_tlb_mm(struct mm_struct *mm) { - unsigned int pid; + unsigned long pid; preempt_disable(); pid = mm->context.id; if (pid != MMU_NO_CONTEXT) - _tlbiel_pid(pid); + _tlbiel_pid(pid, RIC_FLUSH_ALL); preempt_enable(); } EXPORT_SYMBOL(radix__local_flush_tlb_mm); +void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) +{ + unsigned long pid; + struct mm_struct *mm = tlb->mm; + + preempt_disable(); + + pid = mm->context.id; + if (pid != MMU_NO_CONTEXT) + _tlbiel_pid(pid, RIC_FLUSH_PWC); + + preempt_enable(); +} +EXPORT_SYMBOL(radix__local_flush_tlb_pwc); + void radix___local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, unsigned long ap, int nid) { - unsigned int pid; + unsigned long pid; preempt_disable(); pid = mm ? mm->context.id : 0; if (pid != MMU_NO_CONTEXT) - _tlbiel_va(vmaddr, pid, ap); + _tlbiel_va(vmaddr, pid, ap, RIC_FLUSH_TLB); preempt_enable(); } @@ -160,7 +173,7 @@ static int mm_is_core_local(struct mm_struct *mm) void radix__flush_tlb_mm(struct mm_struct *mm) { - unsigned int pid; + unsigned long pid; preempt_disable(); pid = mm->context.id; @@ -172,20 +185,46 @@ void radix__flush_tlb_mm(struct mm_struct *mm) if (lock_tlbie) raw_spin_lock(&native_tlbie_lock); - _tlbie_pid(pid); + _tlbie_pid(pid, RIC_FLUSH_ALL); if (lock_tlbie) raw_spin_unlock(&native_tlbie_lock); } else - _tlbiel_pid(pid); + _tlbiel_pid(pid, RIC_FLUSH_ALL); no_context: preempt_enable(); } EXPORT_SYMBOL(radix__flush_tlb_mm); +void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) +{ + unsigned long pid; + struct mm_struct *mm = tlb->mm; + + preempt_disable(); + + pid = mm->context.id; + if (unlikely(pid == MMU_NO_CONTEXT)) + goto no_context; + + if (!mm_is_core_local(mm)) { + int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); + + if (lock_tlbie) + raw_spin_lock(&native_tlbie_lock); + _tlbie_pid(pid, RIC_FLUSH_PWC); + if (lock_tlbie) + raw_spin_unlock(&native_tlbie_lock); + } else + _tlbiel_pid(pid, RIC_FLUSH_PWC); +no_context: + preempt_enable(); +} +EXPORT_SYMBOL(radix__flush_tlb_pwc); + void radix___flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, unsigned long ap, int nid) { - unsigned int pid; + unsigned long pid; preempt_disable(); pid = mm ? mm->context.id : 0; @@ -196,11 +235,11 @@ void radix___flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, if (lock_tlbie) raw_spin_lock(&native_tlbie_lock); - _tlbie_va(vmaddr, pid, ap); + _tlbie_va(vmaddr, pid, ap, RIC_FLUSH_TLB); if (lock_tlbie) raw_spin_unlock(&native_tlbie_lock); } else - _tlbiel_va(vmaddr, pid, ap); + _tlbiel_va(vmaddr, pid, ap, RIC_FLUSH_TLB); bail: preempt_enable(); } @@ -224,7 +263,7 @@ void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end) if (lock_tlbie) raw_spin_lock(&native_tlbie_lock); - _tlbie_pid(0); + _tlbie_pid(0, RIC_FLUSH_ALL); if (lock_tlbie) raw_spin_unlock(&native_tlbie_lock); } @@ -243,9 +282,61 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, } EXPORT_SYMBOL(radix__flush_tlb_range); +static int radix_get_mmu_psize(int page_size) +{ + int psize; + + if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift)) + psize = mmu_virtual_psize; + else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift)) + psize = MMU_PAGE_2M; + else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift)) + psize = MMU_PAGE_1G; + else + return -1; + return psize; +} void radix__tlb_flush(struct mmu_gather *tlb) { struct mm_struct *mm = tlb->mm; radix__flush_tlb_mm(mm); } + +void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa, + unsigned long page_size) +{ + unsigned long rb,rs,prs,r; + unsigned long ap; + unsigned long ric = RIC_FLUSH_TLB; + + ap = mmu_get_ap(radix_get_mmu_psize(page_size)); + rb = gpa & ~(PPC_BITMASK(52, 63)); + rb |= ap << PPC_BITLSHIFT(58); + rs = lpid & ((1UL << 32) - 1); + prs = 0; /* process scoped */ + r = 1; /* raidx format */ + + asm volatile("ptesync": : :"memory"); + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + asm volatile("eieio; tlbsync; ptesync": : :"memory"); +} +EXPORT_SYMBOL(radix__flush_tlb_lpid_va); + +void radix__flush_tlb_lpid(unsigned long lpid) +{ + unsigned long rb,rs,prs,r; + unsigned long ric = RIC_FLUSH_ALL; + + rb = 0x2 << PPC_BITLSHIFT(53); /* IS = 2 */ + rs = lpid & ((1UL << 32) - 1); + prs = 0; /* partition scoped */ + r = 1; /* raidx format */ + + asm volatile("ptesync": : :"memory"); + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + asm volatile("eieio; tlbsync; ptesync": : :"memory"); +} +EXPORT_SYMBOL(radix__flush_tlb_lpid); |