diff options
author | Jiri Kosina <jkosina@suse.cz> | 2014-11-20 14:42:02 +0100 |
---|---|---|
committer | Jiri Kosina <jkosina@suse.cz> | 2014-11-20 14:42:02 +0100 |
commit | a02001086bbfb4da35d1228bebc2f1b442db455f (patch) | |
tree | 62ab47936cef06fd08657ca5b6cd1df98c19be57 /arch/powerpc/mm | |
parent | eff264efeeb0898408e8c9df72d8a32621035bed (diff) | |
parent | fc14f9c1272f62c3e8d01300f52467c0d9af50f9 (diff) | |
download | blackbird-op-linux-a02001086bbfb4da35d1228bebc2f1b442db455f.tar.gz blackbird-op-linux-a02001086bbfb4da35d1228bebc2f1b442db455f.zip |
Merge Linus' tree to be be to apply submitted patches to newer code than
current trivial.git base
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r-- | arch/powerpc/mm/Makefile | 5 | ||||
-rw-r--r-- | arch/powerpc/mm/copro_fault.c | 148 | ||||
-rw-r--r-- | arch/powerpc/mm/dma-noncoherent.c | 1 | ||||
-rw-r--r-- | arch/powerpc/mm/fault.c | 48 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_native_64.c | 46 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_utils_64.c | 186 | ||||
-rw-r--r-- | arch/powerpc/mm/hugepage-hash64.c | 88 | ||||
-rw-r--r-- | arch/powerpc/mm/init_32.c | 6 | ||||
-rw-r--r-- | arch/powerpc/mm/init_64.c | 129 | ||||
-rw-r--r-- | arch/powerpc/mm/mem.c | 71 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_context_hash32.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/numa.c | 84 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_32.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_64.c | 46 | ||||
-rw-r--r-- | arch/powerpc/mm/ppc_mmu_32.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/slb.c | 3 | ||||
-rw-r--r-- | arch/powerpc/mm/slice.c | 15 | ||||
-rw-r--r-- | arch/powerpc/mm/stab.c | 286 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_hash64.c | 6 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_low_64e.S | 69 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_nohash.c | 111 |
22 files changed, 780 insertions, 576 deletions
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 51230ee6a407..325e861616a1 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -13,9 +13,7 @@ obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ tlb_nohash_low.o obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(CONFIG_WORD_SIZE)e.o hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o -obj-$(CONFIG_PPC_STD_MMU_64) += hash_utils_64.o \ - slb_low.o slb.o stab.o \ - $(hash64-y) +obj-$(CONFIG_PPC_STD_MMU_64) += hash_utils_64.o slb_low.o slb.o $(hash64-y) obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o obj-$(CONFIG_PPC_STD_MMU) += hash_low_$(CONFIG_WORD_SIZE).o \ tlb_hash$(CONFIG_WORD_SIZE).o \ @@ -36,3 +34,4 @@ obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hugepage-hash64.o obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o obj-$(CONFIG_HIGHMEM) += highmem.o +obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c new file mode 100644 index 000000000000..5a236f082c78 --- /dev/null +++ b/arch/powerpc/mm/copro_fault.c @@ -0,0 +1,148 @@ +/* + * CoProcessor (SPU/AFU) mm fault handler + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2007 + * + * Author: Arnd Bergmann <arndb@de.ibm.com> + * Author: Jeremy Kerr <jk@ozlabs.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/export.h> +#include <asm/reg.h> +#include <asm/copro.h> +#include <asm/spu.h> +#include <misc/cxl.h> + +/* + * This ought to be kept in sync with the powerpc specific do_page_fault + * function. Currently, there are a few corner cases that we haven't had + * to handle fortunately. + */ +int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea, + unsigned long dsisr, unsigned *flt) +{ + struct vm_area_struct *vma; + unsigned long is_write; + int ret; + + if (mm == NULL) + return -EFAULT; + + if (mm->pgd == NULL) + return -EFAULT; + + down_read(&mm->mmap_sem); + ret = -EFAULT; + vma = find_vma(mm, ea); + if (!vma) + goto out_unlock; + + if (ea < vma->vm_start) { + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto out_unlock; + if (expand_stack(vma, ea)) + goto out_unlock; + } + + is_write = dsisr & DSISR_ISSTORE; + if (is_write) { + if (!(vma->vm_flags & VM_WRITE)) + goto out_unlock; + } else { + if (dsisr & DSISR_PROTFAULT) + goto out_unlock; + if (!(vma->vm_flags & (VM_READ | VM_EXEC))) + goto out_unlock; + } + + ret = 0; + *flt = handle_mm_fault(mm, vma, ea, is_write ? FAULT_FLAG_WRITE : 0); + if (unlikely(*flt & VM_FAULT_ERROR)) { + if (*flt & VM_FAULT_OOM) { + ret = -ENOMEM; + goto out_unlock; + } else if (*flt & VM_FAULT_SIGBUS) { + ret = -EFAULT; + goto out_unlock; + } + BUG(); + } + + if (*flt & VM_FAULT_MAJOR) + current->maj_flt++; + else + current->min_flt++; + +out_unlock: + up_read(&mm->mmap_sem); + return ret; +} +EXPORT_SYMBOL_GPL(copro_handle_mm_fault); + +int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb) +{ + u64 vsid; + int psize, ssize; + + switch (REGION_ID(ea)) { + case USER_REGION_ID: + pr_devel("%s: 0x%llx -- USER_REGION_ID\n", __func__, ea); + psize = get_slice_psize(mm, ea); + ssize = user_segment_size(ea); + vsid = get_vsid(mm->context.id, ea, ssize); + break; + case VMALLOC_REGION_ID: + pr_devel("%s: 0x%llx -- VMALLOC_REGION_ID\n", __func__, ea); + if (ea < VMALLOC_END) + psize = mmu_vmalloc_psize; + else + psize = mmu_io_psize; + ssize = mmu_kernel_ssize; + vsid = get_kernel_vsid(ea, mmu_kernel_ssize); + break; + case KERNEL_REGION_ID: + pr_devel("%s: 0x%llx -- KERNEL_REGION_ID\n", __func__, ea); + psize = mmu_linear_psize; + ssize = mmu_kernel_ssize; + vsid = get_kernel_vsid(ea, mmu_kernel_ssize); + break; + default: + pr_debug("%s: invalid region access at %016llx\n", __func__, ea); + return 1; + } + + vsid = (vsid << slb_vsid_shift(ssize)) | SLB_VSID_USER; + + vsid |= mmu_psize_defs[psize].sllp | + ((ssize == MMU_SEGSIZE_1T) ? SLB_VSID_B_1T : 0); + + slb->esid = (ea & (ssize == MMU_SEGSIZE_1T ? ESID_MASK_1T : ESID_MASK)) | SLB_ESID_V; + slb->vsid = vsid; + + return 0; +} +EXPORT_SYMBOL_GPL(copro_calculate_slb); + +void copro_flush_all_slbs(struct mm_struct *mm) +{ +#ifdef CONFIG_SPU_BASE + spu_flush_all_slbs(mm); +#endif + cxl_slbia(mm); +} +EXPORT_SYMBOL_GPL(copro_flush_all_slbs); diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c index 7b6c10750179..d85e86aac7fb 100644 --- a/arch/powerpc/mm/dma-noncoherent.c +++ b/arch/powerpc/mm/dma-noncoherent.c @@ -33,6 +33,7 @@ #include <linux/export.h> #include <asm/tlbflush.h> +#include <asm/dma.h> #include "mmu_decl.h" diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 51ab9e7e6c39..08d659a9fcdb 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -30,9 +30,9 @@ #include <linux/kprobes.h> #include <linux/kdebug.h> #include <linux/perf_event.h> -#include <linux/magic.h> #include <linux/ratelimit.h> #include <linux/context_tracking.h> +#include <linux/hugetlb.h> #include <asm/firmware.h> #include <asm/page.h> @@ -114,22 +114,37 @@ static int store_updates_sp(struct pt_regs *regs) #define MM_FAULT_CONTINUE -1 #define MM_FAULT_ERR(sig) (sig) -static int do_sigbus(struct pt_regs *regs, unsigned long address) +static int do_sigbus(struct pt_regs *regs, unsigned long address, + unsigned int fault) { siginfo_t info; + unsigned int lsb = 0; up_read(¤t->mm->mmap_sem); - if (user_mode(regs)) { - current->thread.trap_nr = BUS_ADRERR; - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_ADRERR; - info.si_addr = (void __user *)address; - force_sig_info(SIGBUS, &info, current); - return MM_FAULT_RETURN; + if (!user_mode(regs)) + return MM_FAULT_ERR(SIGBUS); + + current->thread.trap_nr = BUS_ADRERR; + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_ADRERR; + info.si_addr = (void __user *)address; +#ifdef CONFIG_MEMORY_FAILURE + if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) { + pr_err("MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n", + current->comm, current->pid, address); + info.si_code = BUS_MCEERR_AR; } - return MM_FAULT_ERR(SIGBUS); + + if (fault & VM_FAULT_HWPOISON_LARGE) + lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault)); + if (fault & VM_FAULT_HWPOISON) + lsb = PAGE_SHIFT; +#endif + info.si_addr_lsb = lsb; + force_sig_info(SIGBUS, &info, current); + return MM_FAULT_RETURN; } static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault) @@ -170,11 +185,8 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault) return MM_FAULT_RETURN; } - /* Bus error. x86 handles HWPOISON here, we'll add this if/when - * we support the feature in HW - */ - if (fault & VM_FAULT_SIGBUS) - return do_sigbus(regs, addr); + if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) + return do_sigbus(regs, addr, fault); /* We don't understand the fault code, this is fatal */ BUG(); @@ -508,7 +520,6 @@ bail: void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) { const struct exception_table_entry *entry; - unsigned long *stackend; /* Are we prepared to handle this fault? */ if ((entry = search_exception_tables(regs->nip)) != NULL) { @@ -537,8 +548,7 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) printk(KERN_ALERT "Faulting instruction address: 0x%08lx\n", regs->nip); - stackend = end_of_stack(current); - if (current != &init_task && *stackend != STACK_END_MAGIC) + if (task_stack_end_corrupted(current)) printk(KERN_ALERT "Thread overran stack, or stack corrupted\n"); die("Kernel access of bad area", regs, sig); diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index cf1d325eae8b..ae4962a06476 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -29,6 +29,8 @@ #include <asm/kexec.h> #include <asm/ppc-opcode.h> +#include <misc/cxl.h> + #ifdef DEBUG_LOW #define DBG_LOW(fmt...) udbg_printf(fmt) #else @@ -149,9 +151,11 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize) static inline void tlbie(unsigned long vpn, int psize, int apsize, int ssize, int local) { - unsigned int use_local = local && mmu_has_feature(MMU_FTR_TLBIEL); + unsigned int use_local; int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); + use_local = local && mmu_has_feature(MMU_FTR_TLBIEL) && !cxl_ctx_in_use(); + if (use_local) use_local = mmu_psize_defs[psize].tlbiel; if (lock_tlbie && !use_local) @@ -412,18 +416,18 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, local_irq_restore(flags); } -static void native_hugepage_invalidate(struct mm_struct *mm, +static void native_hugepage_invalidate(unsigned long vsid, + unsigned long addr, unsigned char *hpte_slot_array, - unsigned long addr, int psize) + int psize, int ssize) { - int ssize = 0, i; - int lock_tlbie; + int i; struct hash_pte *hptep; int actual_psize = MMU_PAGE_16M; unsigned int max_hpte_count, valid; unsigned long flags, s_addr = addr; unsigned long hpte_v, want_v, shift; - unsigned long hidx, vpn = 0, vsid, hash, slot; + unsigned long hidx, vpn = 0, hash, slot; shift = mmu_psize_defs[psize].shift; max_hpte_count = 1U << (PMD_SHIFT - shift); @@ -437,15 +441,6 @@ static void native_hugepage_invalidate(struct mm_struct *mm, /* get the vpn */ addr = s_addr + (i * (1ul << shift)); - if (!is_kernel_addr(addr)) { - ssize = user_segment_size(addr); - vsid = get_vsid(mm->context.id, addr, ssize); - WARN_ON(vsid == 0); - } else { - vsid = get_kernel_vsid(addr, mmu_kernel_ssize); - ssize = mmu_kernel_ssize; - } - vpn = hpt_vpn(addr, vsid, ssize); hash = hpt_hash(vpn, shift, ssize); if (hidx & _PTEIDX_SECONDARY) @@ -465,22 +460,13 @@ static void native_hugepage_invalidate(struct mm_struct *mm, else /* Invalidate the hpte. NOTE: this also unlocks it */ hptep->v = 0; + /* + * We need to do tlb invalidate for all the address, tlbie + * instruction compares entry_VA in tlb with the VA specified + * here + */ + tlbie(vpn, psize, actual_psize, ssize, 0); } - /* - * Since this is a hugepage, we just need a single tlbie. - * use the last vpn. - */ - lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); - if (lock_tlbie) - raw_spin_lock(&native_tlbie_lock); - - asm volatile("ptesync":::"memory"); - __tlbie(vpn, psize, actual_psize, ssize); - asm volatile("eieio; tlbsync; ptesync":::"memory"); - - if (lock_tlbie) - raw_spin_unlock(&native_tlbie_lock); - local_irq_restore(flags); } diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 88fdd9d25077..d5339a3b9945 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -51,7 +51,7 @@ #include <asm/cacheflush.h> #include <asm/cputable.h> #include <asm/sections.h> -#include <asm/spu.h> +#include <asm/copro.h> #include <asm/udbg.h> #include <asm/code-patching.h> #include <asm/fadump.h> @@ -92,12 +92,14 @@ extern unsigned long dart_tablebase; static unsigned long _SDR1; struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; +EXPORT_SYMBOL_GPL(mmu_psize_defs); struct hash_pte *htab_address; unsigned long htab_size_bytes; unsigned long htab_hash_mask; EXPORT_SYMBOL_GPL(htab_hash_mask); int mmu_linear_psize = MMU_PAGE_4K; +EXPORT_SYMBOL_GPL(mmu_linear_psize); int mmu_virtual_psize = MMU_PAGE_4K; int mmu_vmalloc_psize = MMU_PAGE_4K; #ifdef CONFIG_SPARSEMEM_VMEMMAP @@ -105,6 +107,7 @@ int mmu_vmemmap_psize = MMU_PAGE_4K; #endif int mmu_io_psize = MMU_PAGE_4K; int mmu_kernel_ssize = MMU_SEGSIZE_256M; +EXPORT_SYMBOL_GPL(mmu_kernel_ssize); int mmu_highuser_ssize = MMU_SEGSIZE_256M; u16 mmu_slb_size = 64; EXPORT_SYMBOL_GPL(mmu_slb_size); @@ -243,7 +246,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, } #ifdef CONFIG_MEMORY_HOTPLUG -static int htab_remove_mapping(unsigned long vstart, unsigned long vend, +int htab_remove_mapping(unsigned long vstart, unsigned long vend, int psize, int ssize) { unsigned long vaddr; @@ -333,70 +336,69 @@ static int __init htab_dt_scan_page_sizes(unsigned long node, return 0; prop = of_get_flat_dt_prop(node, "ibm,segment-page-sizes", &size); - if (prop != NULL) { - pr_info("Page sizes from device-tree:\n"); - size /= 4; - cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE); - while(size > 0) { - unsigned int base_shift = be32_to_cpu(prop[0]); - unsigned int slbenc = be32_to_cpu(prop[1]); - unsigned int lpnum = be32_to_cpu(prop[2]); - struct mmu_psize_def *def; - int idx, base_idx; - - size -= 3; prop += 3; - base_idx = get_idx_from_shift(base_shift); - if (base_idx < 0) { - /* - * skip the pte encoding also - */ - prop += lpnum * 2; size -= lpnum * 2; + if (!prop) + return 0; + + pr_info("Page sizes from device-tree:\n"); + size /= 4; + cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE); + while(size > 0) { + unsigned int base_shift = be32_to_cpu(prop[0]); + unsigned int slbenc = be32_to_cpu(prop[1]); + unsigned int lpnum = be32_to_cpu(prop[2]); + struct mmu_psize_def *def; + int idx, base_idx; + + size -= 3; prop += 3; + base_idx = get_idx_from_shift(base_shift); + if (base_idx < 0) { + /* skip the pte encoding also */ + prop += lpnum * 2; size -= lpnum * 2; + continue; + } + def = &mmu_psize_defs[base_idx]; + if (base_idx == MMU_PAGE_16M) + cur_cpu_spec->mmu_features |= MMU_FTR_16M_PAGE; + + def->shift = base_shift; + if (base_shift <= 23) + def->avpnm = 0; + else + def->avpnm = (1 << (base_shift - 23)) - 1; + def->sllp = slbenc; + /* + * We don't know for sure what's up with tlbiel, so + * for now we only set it for 4K and 64K pages + */ + if (base_idx == MMU_PAGE_4K || base_idx == MMU_PAGE_64K) + def->tlbiel = 1; + else + def->tlbiel = 0; + + while (size > 0 && lpnum) { + unsigned int shift = be32_to_cpu(prop[0]); + int penc = be32_to_cpu(prop[1]); + + prop += 2; size -= 2; + lpnum--; + + idx = get_idx_from_shift(shift); + if (idx < 0) continue; - } - def = &mmu_psize_defs[base_idx]; - if (base_idx == MMU_PAGE_16M) - cur_cpu_spec->mmu_features |= MMU_FTR_16M_PAGE; - - def->shift = base_shift; - if (base_shift <= 23) - def->avpnm = 0; - else - def->avpnm = (1 << (base_shift - 23)) - 1; - def->sllp = slbenc; - /* - * We don't know for sure what's up with tlbiel, so - * for now we only set it for 4K and 64K pages - */ - if (base_idx == MMU_PAGE_4K || base_idx == MMU_PAGE_64K) - def->tlbiel = 1; - else - def->tlbiel = 0; - - while (size > 0 && lpnum) { - unsigned int shift = be32_to_cpu(prop[0]); - int penc = be32_to_cpu(prop[1]); - - prop += 2; size -= 2; - lpnum--; - - idx = get_idx_from_shift(shift); - if (idx < 0) - continue; - - if (penc == -1) - pr_err("Invalid penc for base_shift=%d " - "shift=%d\n", base_shift, shift); - - def->penc[idx] = penc; - pr_info("base_shift=%d: shift=%d, sllp=0x%04lx," - " avpnm=0x%08lx, tlbiel=%d, penc=%d\n", - base_shift, shift, def->sllp, - def->avpnm, def->tlbiel, def->penc[idx]); - } + + if (penc == -1) + pr_err("Invalid penc for base_shift=%d " + "shift=%d\n", base_shift, shift); + + def->penc[idx] = penc; + pr_info("base_shift=%d: shift=%d, sllp=0x%04lx," + " avpnm=0x%08lx, tlbiel=%d, penc=%d\n", + base_shift, shift, def->sllp, + def->avpnm, def->tlbiel, def->penc[idx]); } - return 1; } - return 0; + + return 1; } #ifdef CONFIG_HUGETLB_PAGE @@ -821,21 +823,14 @@ static void __init htab_initialize(void) void __init early_init_mmu(void) { - /* Setup initial STAB address in the PACA */ - get_paca()->stab_real = __pa((u64)&initial_stab); - get_paca()->stab_addr = (u64)&initial_stab; - /* Initialize the MMU Hash table and create the linear mapping - * of memory. Has to be done before stab/slb initialization as - * this is currently where the page size encoding is obtained + * of memory. Has to be done before SLB initialization as this is + * currently where the page size encoding is obtained. */ htab_initialize(); - /* Initialize stab / SLB management */ - if (mmu_has_feature(MMU_FTR_SLB)) - slb_initialize(); - else - stab_initialize(get_paca()->stab_real); + /* Initialize SLB management */ + slb_initialize(); } #ifdef CONFIG_SMP @@ -845,13 +840,8 @@ void early_init_mmu_secondary(void) if (!firmware_has_feature(FW_FEATURE_LPAR)) mtspr(SPRN_SDR1, _SDR1); - /* Initialize STAB/SLB. We use a virtual address as it works - * in real mode on pSeries. - */ - if (mmu_has_feature(MMU_FTR_SLB)) - slb_initialize(); - else - stab_initialize(get_paca()->stab_addr); + /* Initialize SLB */ + slb_initialize(); } #endif /* CONFIG_SMP */ @@ -879,7 +869,7 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) } #ifdef CONFIG_PPC_MM_SLICES -unsigned int get_paca_psize(unsigned long addr) +static unsigned int get_paca_psize(unsigned long addr) { u64 lpsizes; unsigned char *hpsizes; @@ -913,10 +903,8 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr) if (get_slice_psize(mm, addr) == MMU_PAGE_4K) return; slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K); -#ifdef CONFIG_SPU_BASE - spu_flush_all_slbs(mm); -#endif - if (get_paca_psize(addr) != MMU_PAGE_4K) { + copro_flush_all_slbs(mm); + if ((get_paca_psize(addr) != MMU_PAGE_4K) && (current->mm == mm)) { get_paca()->context = mm->context; slb_flush_and_rebolt(); } @@ -1001,12 +989,11 @@ static void check_paca_psize(unsigned long ea, struct mm_struct *mm, * -1 - critical hash insertion error * -2 - access not permitted by subpage protection mechanism */ -int hash_page(unsigned long ea, unsigned long access, unsigned long trap) +int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap) { enum ctx_state prev_state = exception_enter(); pgd_t *pgdir; unsigned long vsid; - struct mm_struct *mm; pte_t *ptep; unsigned hugeshift; const struct cpumask *tmp; @@ -1020,7 +1007,6 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) switch (REGION_ID(ea)) { case USER_REGION_ID: user_region = 1; - mm = current->mm; if (! mm) { DBG_LOW(" user region with no mm !\n"); rc = 1; @@ -1031,7 +1017,6 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) vsid = get_vsid(mm->context.id, ea, ssize); break; case VMALLOC_REGION_ID: - mm = &init_mm; vsid = get_kernel_vsid(ea, mmu_kernel_ssize); if (ea < VMALLOC_END) psize = mmu_vmalloc_psize; @@ -1116,7 +1101,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) WARN_ON(1); } #endif - check_paca_psize(ea, mm, psize, user_region); + if (current->mm == mm) + check_paca_psize(ea, mm, psize, user_region); goto bail; } @@ -1153,13 +1139,12 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) "to 4kB pages because of " "non-cacheable mapping\n"); psize = mmu_vmalloc_psize = MMU_PAGE_4K; -#ifdef CONFIG_SPU_BASE - spu_flush_all_slbs(mm); -#endif + copro_flush_all_slbs(mm); } } - check_paca_psize(ea, mm, psize, user_region); + if (current->mm == mm) + check_paca_psize(ea, mm, psize, user_region); #endif /* CONFIG_PPC_64K_PAGES */ #ifdef CONFIG_PPC_HAS_HASH_64K @@ -1194,6 +1179,17 @@ bail: exception_exit(prev_state); return rc; } +EXPORT_SYMBOL_GPL(hash_page_mm); + +int hash_page(unsigned long ea, unsigned long access, unsigned long trap) +{ + struct mm_struct *mm = current->mm; + + if (REGION_ID(ea) == VMALLOC_REGION_ID) + mm = &init_mm; + + return hash_page_mm(mm, ea, access, trap); +} EXPORT_SYMBOL_GPL(hash_page); void hash_preload(struct mm_struct *mm, unsigned long ea, diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index 826893fcb3a7..5f5e6328c21c 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -18,6 +18,57 @@ #include <linux/mm.h> #include <asm/machdep.h> +static void invalidate_old_hpte(unsigned long vsid, unsigned long addr, + pmd_t *pmdp, unsigned int psize, int ssize) +{ + int i, max_hpte_count, valid; + unsigned long s_addr; + unsigned char *hpte_slot_array; + unsigned long hidx, shift, vpn, hash, slot; + + s_addr = addr & HPAGE_PMD_MASK; + hpte_slot_array = get_hpte_slot_array(pmdp); + /* + * IF we try to do a HUGE PTE update after a withdraw is done. + * we will find the below NULL. This happens when we do + * split_huge_page_pmd + */ + if (!hpte_slot_array) + return; + + if (ppc_md.hugepage_invalidate) + return ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array, + psize, ssize); + /* + * No bluk hpte removal support, invalidate each entry + */ + shift = mmu_psize_defs[psize].shift; + max_hpte_count = HPAGE_PMD_SIZE >> shift; + for (i = 0; i < max_hpte_count; i++) { + /* + * 8 bits per each hpte entries + * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit] + */ + valid = hpte_valid(hpte_slot_array, i); + if (!valid) + continue; + hidx = hpte_hash_index(hpte_slot_array, i); + + /* get the vpn */ + addr = s_addr + (i * (1ul << shift)); + vpn = hpt_vpn(addr, vsid, ssize); + hash = hpt_hash(vpn, shift, ssize); + if (hidx & _PTEIDX_SECONDARY) + hash = ~hash; + + slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; + slot += hidx & _PTEIDX_GROUP_IX; + ppc_md.hpte_invalidate(slot, vpn, psize, + MMU_PAGE_16M, ssize, 0); + } +} + + int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, pmd_t *pmdp, unsigned long trap, int local, int ssize, unsigned int psize) @@ -33,7 +84,9 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, * atomically mark the linux large page PMD busy and dirty */ do { - old_pmd = pmd_val(*pmdp); + pmd_t pmd = ACCESS_ONCE(*pmdp); + + old_pmd = pmd_val(pmd); /* If PMD busy, retry the access */ if (unlikely(old_pmd & _PAGE_BUSY)) return 0; @@ -85,6 +138,15 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, vpn = hpt_vpn(ea, vsid, ssize); hash = hpt_hash(vpn, shift, ssize); hpte_slot_array = get_hpte_slot_array(pmdp); + if (psize == MMU_PAGE_4K) { + /* + * invalidate the old hpte entry if we have that mapped via 64K + * base page size. This is because demote_segment won't flush + * hash page table entries. + */ + if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO)) + invalidate_old_hpte(vsid, ea, pmdp, MMU_PAGE_64K, ssize); + } valid = hpte_valid(hpte_slot_array, index); if (valid) { @@ -107,11 +169,8 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, * safely update this here. */ valid = 0; - new_pmd &= ~_PAGE_HPTEFLAGS; hpte_slot_array[index] = 0; - } else - /* clear the busy bits and set the hash pte bits */ - new_pmd = (new_pmd & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; + } } if (!valid) { @@ -119,11 +178,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, /* insert new entry */ pa = pmd_pfn(__pmd(old_pmd)) << PAGE_SHIFT; -repeat: - hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - - /* clear the busy bits and set the hash pte bits */ - new_pmd = (new_pmd & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; + new_pmd |= _PAGE_HASHPTE; /* Add in WIMG bits */ rflags |= (new_pmd & (_PAGE_WRITETHRU | _PAGE_NO_CACHE | @@ -132,6 +187,8 @@ repeat: * enable the memory coherence always */ rflags |= HPTE_R_M; +repeat: + hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; /* Insert into the hash table, primary slot */ slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0, @@ -172,8 +229,17 @@ repeat: mark_hpte_slot_valid(hpte_slot_array, index, slot); } /* - * No need to use ldarx/stdcx here + * Mark the pte with _PAGE_COMBO, if we are trying to hash it with + * base page size 4k. + */ + if (psize == MMU_PAGE_4K) + new_pmd |= _PAGE_COMBO; + /* + * The hpte valid is stored in the pgtable whose address is in the + * second half of the PMD. Order this against clearing of the busy bit in + * huge pmd. */ + smp_wmb(); *pmdp = __pmd(new_pmd & ~_PAGE_BUSY); return 0; } diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index cff59f1bec23..415a51b028b9 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -103,14 +103,14 @@ unsigned long __max_low_memory = MAX_LOW_MEM; /* * Check for command-line options that affect what MMU_init will do. */ -void MMU_setup(void) +void __init MMU_setup(void) { /* Check for nobats option (used in mapin_ram). */ - if (strstr(cmd_line, "nobats")) { + if (strstr(boot_command_line, "nobats")) { __map_without_bats = 1; } - if (strstr(cmd_line, "noltlbs")) { + if (strstr(boot_command_line, "noltlbs")) { __map_without_ltlbs = 1; } #ifdef CONFIG_DEBUG_PAGEALLOC diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index e3734edffa69..3481556a1880 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -175,9 +175,10 @@ static unsigned long __meminit vmemmap_section_start(unsigned long page) static int __meminit vmemmap_populated(unsigned long start, int page_size) { unsigned long end = start + page_size; + start = (unsigned long)(pfn_to_page(vmemmap_section_start(start))); for (; start < end; start += (PAGES_PER_SECTION * sizeof(struct page))) - if (pfn_valid(vmemmap_section_start(start))) + if (pfn_valid(page_to_pfn((struct page *)start))) return 1; return 0; @@ -212,6 +213,13 @@ static void __meminit vmemmap_create_mapping(unsigned long start, for (i = 0; i < page_size; i += PAGE_SIZE) BUG_ON(map_kernel_page(start + i, phys, flags)); } + +#ifdef CONFIG_MEMORY_HOTPLUG +static void vmemmap_remove_mapping(unsigned long start, + unsigned long page_size) +{ +} +#endif #else /* CONFIG_PPC_BOOK3E */ static void __meminit vmemmap_create_mapping(unsigned long start, unsigned long page_size, @@ -223,17 +231,39 @@ static void __meminit vmemmap_create_mapping(unsigned long start, mmu_kernel_ssize); BUG_ON(mapped < 0); } + +#ifdef CONFIG_MEMORY_HOTPLUG +static void vmemmap_remove_mapping(unsigned long start, + unsigned long page_size) +{ + int mapped = htab_remove_mapping(start, start + page_size, + mmu_vmemmap_psize, + mmu_kernel_ssize); + BUG_ON(mapped < 0); +} +#endif + #endif /* CONFIG_PPC_BOOK3E */ struct vmemmap_backing *vmemmap_list; +static struct vmemmap_backing *next; +static int num_left; +static int num_freed; static __meminit struct vmemmap_backing * vmemmap_list_alloc(int node) { - static struct vmemmap_backing *next; - static int num_left; + struct vmemmap_backing *vmem_back; + /* get from freed entries first */ + if (num_freed) { + num_freed--; + vmem_back = next; + next = next->list; + + return vmem_back; + } /* allocate a page when required and hand out chunks */ - if (!next || !num_left) { + if (!num_left) { next = vmemmap_alloc_block(PAGE_SIZE, node); if (unlikely(!next)) { WARN_ON(1); @@ -296,10 +326,85 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) return 0; } -void vmemmap_free(unsigned long start, unsigned long end) +#ifdef CONFIG_MEMORY_HOTPLUG +static unsigned long vmemmap_list_free(unsigned long start) { + struct vmemmap_backing *vmem_back, *vmem_back_prev; + + vmem_back_prev = vmem_back = vmemmap_list; + + /* look for it with prev pointer recorded */ + for (; vmem_back; vmem_back = vmem_back->list) { + if (vmem_back->virt_addr == start) + break; + vmem_back_prev = vmem_back; + } + + if (unlikely(!vmem_back)) { + WARN_ON(1); + return 0; + } + + /* remove it from vmemmap_list */ + if (vmem_back == vmemmap_list) /* remove head */ + vmemmap_list = vmem_back->list; + else + vmem_back_prev->list = vmem_back->list; + + /* next point to this freed entry */ + vmem_back->list = next; + next = vmem_back; + num_freed++; + + return vmem_back->phys; } +void __ref vmemmap_free(unsigned long start, unsigned long end) +{ + unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; + + start = _ALIGN_DOWN(start, page_size); + + pr_debug("vmemmap_free %lx...%lx\n", start, end); + + for (; start < end; start += page_size) { + unsigned long addr; + + /* + * the section has already be marked as invalid, so + * vmemmap_populated() true means some other sections still + * in this page, so skip it. + */ + if (vmemmap_populated(start, page_size)) + continue; + + addr = vmemmap_list_free(start); + if (addr) { + struct page *page = pfn_to_page(addr >> PAGE_SHIFT); + + if (PageReserved(page)) { + /* allocated from bootmem */ + if (page_size < PAGE_SIZE) { + /* + * this shouldn't happen, but if it is + * the case, leave the memory there + */ + WARN_ON_ONCE(1); + } else { + unsigned int nr_pages = + 1 << get_order(page_size); + while (nr_pages--) + free_reserved_page(page++); + } + } else + free_pages((unsigned long)(__va(addr)), + get_order(page_size)); + + vmemmap_remove_mapping(start, page_size); + } + } +} +#endif void register_page_bootmem_memmap(unsigned long section_nr, struct page *start_page, unsigned long size) { @@ -331,16 +436,16 @@ struct page *realmode_pfn_to_page(unsigned long pfn) if (pg_va < vmem_back->virt_addr) continue; - /* Check that page struct is not split between real pages */ - if ((pg_va + sizeof(struct page)) > - (vmem_back->virt_addr + page_size)) - return NULL; - - page = (struct page *) (vmem_back->phys + pg_va - + /* After vmemmap_list entry free is possible, need check all */ + if ((pg_va + sizeof(struct page)) <= + (vmem_back->virt_addr + page_size)) { + page = (struct page *) (vmem_back->phys + pg_va - vmem_back->virt_addr); - return page; + return page; + } } + /* Probably that page struct is split between real pages */ return NULL; } EXPORT_SYMBOL_GPL(realmode_pfn_to_page); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 2c8e90f5789e..8ebaac75c940 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -128,7 +128,8 @@ int arch_add_memory(int nid, u64 start, u64 size) return -EINVAL; /* this should work for most non-highmem platforms */ - zone = pgdata->node_zones; + zone = pgdata->node_zones + + zone_for_memory(nid, start, size, 0); return __add_pages(nid, zone, start_pfn, nr_pages); } @@ -259,6 +260,60 @@ static int __init mark_nonram_nosave(void) } return 0; } +#else /* CONFIG_NEED_MULTIPLE_NODES */ +static int __init mark_nonram_nosave(void) +{ + return 0; +} +#endif + +static bool zone_limits_final; + +static unsigned long max_zone_pfns[MAX_NR_ZONES] = { + [0 ... MAX_NR_ZONES - 1] = ~0UL +}; + +/* + * Restrict the specified zone and all more restrictive zones + * to be below the specified pfn. May not be called after + * paging_init(). + */ +void __init limit_zone_pfn(enum zone_type zone, unsigned long pfn_limit) +{ + int i; + + if (WARN_ON(zone_limits_final)) + return; + + for (i = zone; i >= 0; i--) { + if (max_zone_pfns[i] > pfn_limit) + max_zone_pfns[i] = pfn_limit; + } +} + +/* + * Find the least restrictive zone that is entirely below the + * specified pfn limit. Returns < 0 if no suitable zone is found. + * + * pfn_limit must be u64 because it can exceed 32 bits even on 32-bit + * systems -- the DMA limit can be higher than any possible real pfn. + */ +int dma_pfn_limit_to_zone(u64 pfn_limit) +{ + enum zone_type top_zone = ZONE_NORMAL; + int i; + +#ifdef CONFIG_HIGHMEM + top_zone = ZONE_HIGHMEM; +#endif + + for (i = top_zone; i >= 0; i--) { + if (max_zone_pfns[i] <= pfn_limit) + return i; + } + + return -EPERM; +} /* * paging_init() sets up the page tables - in fact we've already done this. @@ -267,7 +322,7 @@ void __init paging_init(void) { unsigned long long total_ram = memblock_phys_mem_size(); phys_addr_t top_of_ram = memblock_end_of_DRAM(); - unsigned long max_zone_pfns[MAX_NR_ZONES]; + enum zone_type top_zone; #ifdef CONFIG_PPC32 unsigned long v = __fix_to_virt(__end_of_fixed_addresses - 1); @@ -289,18 +344,20 @@ void __init paging_init(void) (unsigned long long)top_of_ram, total_ram); printk(KERN_DEBUG "Memory hole size: %ldMB\n", (long int)((top_of_ram - total_ram) >> 20)); - memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); + #ifdef CONFIG_HIGHMEM - max_zone_pfns[ZONE_DMA] = lowmem_end_addr >> PAGE_SHIFT; - max_zone_pfns[ZONE_HIGHMEM] = top_of_ram >> PAGE_SHIFT; + top_zone = ZONE_HIGHMEM; + limit_zone_pfn(ZONE_NORMAL, lowmem_end_addr >> PAGE_SHIFT); #else - max_zone_pfns[ZONE_DMA] = top_of_ram >> PAGE_SHIFT; + top_zone = ZONE_NORMAL; #endif + + limit_zone_pfn(top_zone, top_of_ram >> PAGE_SHIFT); + zone_limits_final = true; free_area_init_nodes(max_zone_pfns); mark_nonram_nosave(); } -#endif /* ! CONFIG_NEED_MULTIPLE_NODES */ static void __init register_page_bootmem_info(void) { diff --git a/arch/powerpc/mm/mmu_context_hash32.c b/arch/powerpc/mm/mmu_context_hash32.c index 78fef6726e10..aa5a7fd89461 100644 --- a/arch/powerpc/mm/mmu_context_hash32.c +++ b/arch/powerpc/mm/mmu_context_hash32.c @@ -2,7 +2,7 @@ * This file contains the routines for handling the MMU on those * PowerPC implementations where the MMU substantially follows the * architecture specification. This includes the 6xx, 7xx, 7xxx, - * 8260, and POWER3 implementations but excludes the 8xx and 4xx. + * and 8260 implementations but excludes the 8xx and 4xx. * -- paulus * * Derived from arch/ppc/mm/init.c: diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 3b181b22cd46..b9d1dfdbe5bb 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -8,6 +8,8 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ +#define pr_fmt(fmt) "numa: " fmt + #include <linux/threads.h> #include <linux/bootmem.h> #include <linux/init.h> @@ -538,7 +540,7 @@ static int of_drconf_to_nid_single(struct of_drconf_cell *drmem, */ static int numa_setup_cpu(unsigned long lcpu) { - int nid; + int nid = -1; struct device_node *cpu; /* @@ -555,19 +557,21 @@ static int numa_setup_cpu(unsigned long lcpu) if (!cpu) { WARN_ON(1); - nid = 0; - goto out; + if (cpu_present(lcpu)) + goto out_present; + else + goto out; } nid = of_node_to_nid_single(cpu); +out_present: if (nid < 0 || !node_online(nid)) nid = first_online_node; -out: - map_cpu_to_node(lcpu, nid); + map_cpu_to_node(lcpu, nid); of_node_put(cpu); - +out: return nid; } @@ -611,8 +615,8 @@ static int cpu_numa_callback(struct notifier_block *nfb, unsigned long action, case CPU_UP_CANCELED: case CPU_UP_CANCELED_FROZEN: unmap_cpu_from_node(lcpu); - break; ret = NOTIFY_OK; + break; #endif } return ret; @@ -1049,7 +1053,7 @@ static void __init mark_reserved_regions_for_nid(int nid) void __init do_init_bootmem(void) { - int nid; + int nid, cpu; min_low_pfn = 0; max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; @@ -1122,16 +1126,14 @@ void __init do_init_bootmem(void) reset_numa_cpu_lookup_table(); register_cpu_notifier(&ppc64_numa_nb); - cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE, - (void *)(unsigned long)boot_cpuid); -} - -void __init paging_init(void) -{ - unsigned long max_zone_pfns[MAX_NR_ZONES]; - memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); - max_zone_pfns[ZONE_DMA] = memblock_end_of_DRAM() >> PAGE_SHIFT; - free_area_init_nodes(max_zone_pfns); + /* + * We need the numa_cpu_lookup_table to be accurate for all CPUs, + * even before we online them, so that we can use cpu_to_{node,mem} + * early in boot, cf. smp_prepare_cpus(). + */ + for_each_present_cpu(cpu) { + numa_setup_cpu((unsigned long)cpu); + } } static int __init early_numa(char *p) @@ -1153,6 +1155,22 @@ static int __init early_numa(char *p) } early_param("numa", early_numa); +static bool topology_updates_enabled = true; + +static int __init early_topology_updates(char *p) +{ + if (!p) + return 0; + + if (!strcmp(p, "off")) { + pr_info("Disabling topology updates\n"); + topology_updates_enabled = false; + } + + return 0; +} +early_param("topology_updates", early_topology_updates); + #ifdef CONFIG_MEMORY_HOTPLUG /* * Find the node associated with a hot added memory section for @@ -1442,8 +1460,11 @@ static long hcall_vphn(unsigned long cpu, __be32 *associativity) long retbuf[PLPAR_HCALL9_BUFSIZE] = {0}; u64 flags = 1; int hwcpu = get_hard_smp_processor_id(cpu); + int i; rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, hwcpu); + for (i = 0; i < 6; i++) + retbuf[i] = cpu_to_be64(retbuf[i]); vphn_unpack_associativity(retbuf, associativity); return rc; @@ -1488,11 +1509,14 @@ static int update_cpu_topology(void *data) cpu = smp_processor_id(); for (update = data; update; update = update->next) { + int new_nid = update->new_nid; if (cpu != update->cpu) continue; - unmap_cpu_from_node(update->cpu); - map_cpu_to_node(update->cpu, update->new_nid); + unmap_cpu_from_node(cpu); + map_cpu_to_node(cpu, new_nid); + set_cpu_numa_node(cpu, new_nid); + set_cpu_numa_mem(cpu, local_memory_node(new_nid)); vdso_getcpu_init(); } @@ -1539,6 +1563,9 @@ int arch_update_cpu_topology(void) struct device *dev; int weight, new_nid, i = 0; + if (!prrn_enabled && !vphn_enabled) + return 0; + weight = cpumask_weight(&cpu_associativity_changes_mask); if (!weight) return 0; @@ -1592,6 +1619,15 @@ int arch_update_cpu_topology(void) cpu = cpu_last_thread_sibling(cpu); } + pr_debug("Topology update for the following CPUs:\n"); + if (cpumask_weight(&updated_cpus)) { + for (ud = &updates[0]; ud; ud = ud->next) { + pr_debug("cpu %d moving from node %d " + "to %d\n", ud->cpu, + ud->old_nid, ud->new_nid); + } + } + /* * In cases where we have nothing to update (because the updates list * is too short or because the new topology is same as the old one), @@ -1800,8 +1836,12 @@ static const struct file_operations topology_ops = { static int topology_update_init(void) { - start_topology_update(); - proc_create("powerpc/topology_updates", 0644, NULL, &topology_ops); + /* Do not poll for changes if disabled at boot */ + if (topology_updates_enabled) + start_topology_update(); + + if (!proc_create("powerpc/topology_updates", 0644, NULL, &topology_ops)) + return -ENOMEM; return 0; } diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index c695943a513c..c90e602677c9 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -48,7 +48,7 @@ static inline int pte_looks_normal(pte_t pte) (_PAGE_PRESENT | _PAGE_USER); } -struct page * maybe_pte_to_page(pte_t pte) +static struct page *maybe_pte_to_page(pte_t pte) { unsigned long pfn = pte_pfn(pte); struct page *page; diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 343a87fa78b5..cf11342bf519 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -41,7 +41,7 @@ unsigned long ioremap_base; unsigned long ioremap_bot; EXPORT_SYMBOL(ioremap_bot); /* aka VMALLOC_END */ -#if defined(CONFIG_6xx) || defined(CONFIG_POWER3) +#ifdef CONFIG_6xx #define HAVE_BATS 1 #endif diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index f6ce1f111f5b..c8d709ab489d 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -54,6 +54,9 @@ #include "mmu_decl.h" +#define CREATE_TRACE_POINTS +#include <trace/events/thp.h> + /* Some sanity checking */ #if TASK_SIZE_USER64 > PGTABLE_RANGE #error TASK_SIZE_USER64 exceeds pagetable range @@ -68,7 +71,7 @@ unsigned long ioremap_bot = IOREMAP_BASE; #ifdef CONFIG_PPC_MMU_NOHASH -static void *early_alloc_pgtable(unsigned long size) +static __ref void *early_alloc_pgtable(unsigned long size) { void *pt; @@ -537,8 +540,9 @@ unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, old = pmd_val(*pmdp); *pmdp = __pmd((old & ~clr) | set); #endif + trace_hugepage_update(addr, old, clr, set); if (old & _PAGE_HASHPTE) - hpte_do_hugepage_flush(mm, addr, pmdp); + hpte_do_hugepage_flush(mm, addr, pmdp, old); return old; } @@ -642,10 +646,11 @@ void pmdp_splitting_flush(struct vm_area_struct *vma, * If we didn't had the splitting flag set, go and flush the * HPTE entries. */ + trace_hugepage_splitting(address, old); if (!(old & _PAGE_SPLITTING)) { /* We need to flush the hpte */ if (old & _PAGE_HASHPTE) - hpte_do_hugepage_flush(vma->vm_mm, address, pmdp); + hpte_do_hugepage_flush(vma->vm_mm, address, pmdp, old); } /* * This ensures that generic code that rely on IRQ disabling @@ -709,6 +714,7 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, assert_spin_locked(&mm->page_table_lock); WARN_ON(!pmd_trans_huge(pmd)); #endif + trace_hugepage_set_pmd(addr, pmd); return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd)); } @@ -723,7 +729,7 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, * neesd to be flushed. */ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp) + pmd_t *pmdp, unsigned long old_pmd) { int ssize, i; unsigned long s_addr; @@ -745,12 +751,29 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, if (!hpte_slot_array) return; - /* get the base page size */ + /* get the base page size,vsid and segment size */ +#ifdef CONFIG_DEBUG_VM psize = get_slice_psize(mm, s_addr); + BUG_ON(psize == MMU_PAGE_16M); +#endif + if (old_pmd & _PAGE_COMBO) + psize = MMU_PAGE_4K; + else + psize = MMU_PAGE_64K; + + if (!is_kernel_addr(s_addr)) { + ssize = user_segment_size(s_addr); + vsid = get_vsid(mm->context.id, s_addr, ssize); + WARN_ON(vsid == 0); + } else { + vsid = get_kernel_vsid(s_addr, mmu_kernel_ssize); + ssize = mmu_kernel_ssize; + } if (ppc_md.hugepage_invalidate) - return ppc_md.hugepage_invalidate(mm, hpte_slot_array, - s_addr, psize); + return ppc_md.hugepage_invalidate(vsid, s_addr, + hpte_slot_array, + psize, ssize); /* * No bluk hpte removal support, invalidate each entry */ @@ -768,15 +791,6 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, /* get the vpn */ addr = s_addr + (i * (1ul << shift)); - if (!is_kernel_addr(addr)) { - ssize = user_segment_size(addr); - vsid = get_vsid(mm->context.id, addr, ssize); - WARN_ON(vsid == 0); - } else { - vsid = get_kernel_vsid(addr, mmu_kernel_ssize); - ssize = mmu_kernel_ssize; - } - vpn = hpt_vpn(addr, vsid, ssize); hash = hpt_hash(vpn, shift, ssize); if (hidx & _PTEIDX_SECONDARY) diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index 11571e118831..5029dc19b517 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c @@ -2,7 +2,7 @@ * This file contains the routines for handling the MMU on those * PowerPC implementations where the MMU substantially follows the * architecture specification. This includes the 6xx, 7xx, 7xxx, - * 8260, and POWER3 implementations but excludes the 8xx and 4xx. + * and 8260 implementations but excludes the 8xx and 4xx. * -- paulus * * Derived from arch/ppc/mm/init.c: diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 0399a6702958..6e450ca66526 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -46,9 +46,6 @@ static inline unsigned long mk_esid_data(unsigned long ea, int ssize, return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | slot; } -#define slb_vsid_shift(ssize) \ - ((ssize) == MMU_SEGSIZE_256M? SLB_VSID_SHIFT: SLB_VSID_SHIFT_1T) - static inline unsigned long mk_vsid_data(unsigned long ea, int ssize, unsigned long flags) { diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index b0c75cc15efc..ded0ea1afde4 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -30,9 +30,11 @@ #include <linux/err.h> #include <linux/spinlock.h> #include <linux/export.h> +#include <linux/hugetlb.h> #include <asm/mman.h> #include <asm/mmu.h> -#include <asm/spu.h> +#include <asm/copro.h> +#include <asm/hugetlb.h> /* some sanity checks */ #if (PGTABLE_RANGE >> 43) > SLICE_MASK_SIZE @@ -232,9 +234,7 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz spin_unlock_irqrestore(&slice_convert_lock, flags); -#ifdef CONFIG_SPU_BASE - spu_flush_all_slbs(mm); -#endif + copro_flush_all_slbs(mm); } /* @@ -671,9 +671,7 @@ void slice_set_psize(struct mm_struct *mm, unsigned long address, spin_unlock_irqrestore(&slice_convert_lock, flags); -#ifdef CONFIG_SPU_BASE - spu_flush_all_slbs(mm); -#endif + copro_flush_all_slbs(mm); } void slice_set_range_psize(struct mm_struct *mm, unsigned long start, @@ -684,6 +682,7 @@ void slice_set_range_psize(struct mm_struct *mm, unsigned long start, slice_convert(mm, mask, psize); } +#ifdef CONFIG_HUGETLB_PAGE /* * is_hugepage_only_range() is used by generic code to verify whether * a normal mmap mapping (non hugetlbfs) is valid on a given area. @@ -728,4 +727,4 @@ int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, #endif return !slice_check_fit(mask, available); } - +#endif diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c deleted file mode 100644 index 3f8efa6f2997..000000000000 --- a/arch/powerpc/mm/stab.c +++ /dev/null @@ -1,286 +0,0 @@ -/* - * PowerPC64 Segment Translation Support. - * - * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com - * Copyright (c) 2001 Dave Engebretsen - * - * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/memblock.h> - -#include <asm/pgtable.h> -#include <asm/mmu.h> -#include <asm/mmu_context.h> -#include <asm/paca.h> -#include <asm/cputable.h> -#include <asm/prom.h> - -struct stab_entry { - unsigned long esid_data; - unsigned long vsid_data; -}; - -#define NR_STAB_CACHE_ENTRIES 8 -static DEFINE_PER_CPU(long, stab_cache_ptr); -static DEFINE_PER_CPU(long [NR_STAB_CACHE_ENTRIES], stab_cache); - -/* - * Create a segment table entry for the given esid/vsid pair. - */ -static int make_ste(unsigned long stab, unsigned long esid, unsigned long vsid) -{ - unsigned long esid_data, vsid_data; - unsigned long entry, group, old_esid, castout_entry, i; - unsigned int global_entry; - struct stab_entry *ste, *castout_ste; - unsigned long kernel_segment = (esid << SID_SHIFT) >= PAGE_OFFSET; - - vsid_data = vsid << STE_VSID_SHIFT; - esid_data = esid << SID_SHIFT | STE_ESID_KP | STE_ESID_V; - if (! kernel_segment) - esid_data |= STE_ESID_KS; - - /* Search the primary group first. */ - global_entry = (esid & 0x1f) << 3; - ste = (struct stab_entry *)(stab | ((esid & 0x1f) << 7)); - - /* Find an empty entry, if one exists. */ - for (group = 0; group < 2; group++) { - for (entry = 0; entry < 8; entry++, ste++) { - if (!(ste->esid_data & STE_ESID_V)) { - ste->vsid_data = vsid_data; - eieio(); - ste->esid_data = esid_data; - return (global_entry | entry); - } - } - /* Now search the secondary group. */ - global_entry = ((~esid) & 0x1f) << 3; - ste = (struct stab_entry *)(stab | (((~esid) & 0x1f) << 7)); - } - - /* - * Could not find empty entry, pick one with a round robin selection. - * Search all entries in the two groups. - */ - castout_entry = get_paca()->stab_rr; - for (i = 0; i < 16; i++) { - if (castout_entry < 8) { - global_entry = (esid & 0x1f) << 3; - ste = (struct stab_entry *)(stab | ((esid & 0x1f) << 7)); - castout_ste = ste + castout_entry; - } else { - global_entry = ((~esid) & 0x1f) << 3; - ste = (struct stab_entry *)(stab | (((~esid) & 0x1f) << 7)); - castout_ste = ste + (castout_entry - 8); - } - - /* Dont cast out the first kernel segment */ - if ((castout_ste->esid_data & ESID_MASK) != PAGE_OFFSET) - break; - - castout_entry = (castout_entry + 1) & 0xf; - } - - get_paca()->stab_rr = (castout_entry + 1) & 0xf; - - /* Modify the old entry to the new value. */ - - /* Force previous translations to complete. DRENG */ - asm volatile("isync" : : : "memory"); - - old_esid = castout_ste->esid_data >> SID_SHIFT; - castout_ste->esid_data = 0; /* Invalidate old entry */ - - asm volatile("sync" : : : "memory"); /* Order update */ - - castout_ste->vsid_data = vsid_data; - eieio(); /* Order update */ - castout_ste->esid_data = esid_data; - - asm volatile("slbie %0" : : "r" (old_esid << SID_SHIFT)); - /* Ensure completion of slbie */ - asm volatile("sync" : : : "memory"); - - return (global_entry | (castout_entry & 0x7)); -} - -/* - * Allocate a segment table entry for the given ea and mm - */ -static int __ste_allocate(unsigned long ea, struct mm_struct *mm) -{ - unsigned long vsid; - unsigned char stab_entry; - unsigned long offset; - - /* Kernel or user address? */ - if (is_kernel_addr(ea)) { - vsid = get_kernel_vsid(ea, MMU_SEGSIZE_256M); - } else { - if ((ea >= TASK_SIZE_USER64) || (! mm)) - return 1; - - vsid = get_vsid(mm->context.id, ea, MMU_SEGSIZE_256M); - } - - stab_entry = make_ste(get_paca()->stab_addr, GET_ESID(ea), vsid); - - if (!is_kernel_addr(ea)) { - offset = __get_cpu_var(stab_cache_ptr); - if (offset < NR_STAB_CACHE_ENTRIES) - __get_cpu_var(stab_cache[offset++]) = stab_entry; - else - offset = NR_STAB_CACHE_ENTRIES+1; - __get_cpu_var(stab_cache_ptr) = offset; - - /* Order update */ - asm volatile("sync":::"memory"); - } - - return 0; -} - -int ste_allocate(unsigned long ea) -{ - return __ste_allocate(ea, current->mm); -} - -/* - * Do the segment table work for a context switch: flush all user - * entries from the table, then preload some probably useful entries - * for the new task - */ -void switch_stab(struct task_struct *tsk, struct mm_struct *mm) -{ - struct stab_entry *stab = (struct stab_entry *) get_paca()->stab_addr; - struct stab_entry *ste; - unsigned long offset; - unsigned long pc = KSTK_EIP(tsk); - unsigned long stack = KSTK_ESP(tsk); - unsigned long unmapped_base; - - /* Force previous translations to complete. DRENG */ - asm volatile("isync" : : : "memory"); - - /* - * We need interrupts hard-disabled here, not just soft-disabled, - * so that a PMU interrupt can't occur, which might try to access - * user memory (to get a stack trace) and possible cause an STAB miss - * which would update the stab_cache/stab_cache_ptr per-cpu variables. - */ - hard_irq_disable(); - - offset = __get_cpu_var(stab_cache_ptr); - if (offset <= NR_STAB_CACHE_ENTRIES) { - int i; - - for (i = 0; i < offset; i++) { - ste = stab + __get_cpu_var(stab_cache[i]); - ste->esid_data = 0; /* invalidate entry */ - } - } else { - unsigned long entry; - - /* Invalidate all entries. */ - ste = stab; - - /* Never flush the first entry. */ - ste += 1; - for (entry = 1; - entry < (HW_PAGE_SIZE / sizeof(struct stab_entry)); - entry++, ste++) { - unsigned long ea; - ea = ste->esid_data & ESID_MASK; - if (!is_kernel_addr(ea)) { - ste->esid_data = 0; - } - } - } - - asm volatile("sync; slbia; sync":::"memory"); - - __get_cpu_var(stab_cache_ptr) = 0; - - /* Now preload some entries for the new task */ - if (test_tsk_thread_flag(tsk, TIF_32BIT)) - unmapped_base = TASK_UNMAPPED_BASE_USER32; - else - unmapped_base = TASK_UNMAPPED_BASE_USER64; - - __ste_allocate(pc, mm); - - if (GET_ESID(pc) == GET_ESID(stack)) - return; - - __ste_allocate(stack, mm); - - if ((GET_ESID(pc) == GET_ESID(unmapped_base)) - || (GET_ESID(stack) == GET_ESID(unmapped_base))) - return; - - __ste_allocate(unmapped_base, mm); - - /* Order update */ - asm volatile("sync" : : : "memory"); -} - -/* - * Allocate segment tables for secondary CPUs. These must all go in - * the first (bolted) segment, so that do_stab_bolted won't get a - * recursive segment miss on the segment table itself. - */ -void __init stabs_alloc(void) -{ - int cpu; - - if (mmu_has_feature(MMU_FTR_SLB)) - return; - - for_each_possible_cpu(cpu) { - unsigned long newstab; - - if (cpu == 0) - continue; /* stab for CPU 0 is statically allocated */ - - newstab = memblock_alloc_base(HW_PAGE_SIZE, HW_PAGE_SIZE, - 1<<SID_SHIFT); - newstab = (unsigned long)__va(newstab); - - memset((void *)newstab, 0, HW_PAGE_SIZE); - - paca[cpu].stab_addr = newstab; - paca[cpu].stab_real = __pa(newstab); - printk(KERN_INFO "Segment table for CPU %d at 0x%llx " - "virtual, 0x%llx absolute\n", - cpu, paca[cpu].stab_addr, paca[cpu].stab_real); - } -} - -/* - * Build an entry for the base kernel segment and put it into - * the segment table or SLB. All other segment table or SLB - * entries are faulted in. - */ -void stab_initialize(unsigned long stab) -{ - unsigned long vsid = get_kernel_vsid(PAGE_OFFSET, MMU_SEGSIZE_256M); - unsigned long stabreal; - - asm volatile("isync; slbia; isync":::"memory"); - make_ste(stab, GET_ESID(PAGE_OFFSET), vsid); - - /* Order update */ - asm volatile("sync":::"memory"); - - /* Set ASR */ - stabreal = get_paca()->stab_real | 0x1ul; - - mtspr(SPRN_ASR, stabreal); -} diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c index c99f6510a0b2..d2a94b85dbc2 100644 --- a/arch/powerpc/mm/tlb_hash64.c +++ b/arch/powerpc/mm/tlb_hash64.c @@ -30,6 +30,8 @@ #include <asm/tlb.h> #include <asm/bug.h> +#include <trace/events/thp.h> + DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch); /* @@ -213,10 +215,12 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, if (ptep == NULL) continue; pte = pte_val(*ptep); + if (hugepage_shift) + trace_hugepage_invalidate(start, pte_val(pte)); if (!(pte & _PAGE_HASHPTE)) continue; if (unlikely(hugepage_shift && pmd_trans_huge(*(pmd_t *)pte))) - hpte_do_hugepage_flush(mm, start, (pmd_t *)pte); + hpte_do_hugepage_flush(mm, start, (pmd_t *)ptep, pte); else hpte_need_flush(mm, start, ptep, pte, 0); } diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S index 356e8b41fb09..89bf95bd63b1 100644 --- a/arch/powerpc/mm/tlb_low_64e.S +++ b/arch/powerpc/mm/tlb_low_64e.S @@ -296,9 +296,12 @@ itlb_miss_fault_bolted: * r14 = page table base * r13 = PACA * r11 = tlb_per_core ptr - * r10 = cpu number + * r10 = crap (free to use) */ tlb_miss_common_e6500: + crmove cr2*4+2,cr0*4+2 /* cr2.eq != 0 if kernel address */ + +BEGIN_FTR_SECTION /* CPU_FTR_SMT */ /* * Search if we already have an indirect entry for that virtual * address, and if we do, bail out. @@ -309,6 +312,7 @@ tlb_miss_common_e6500: lhz r10,PACAPACAINDEX(r13) cmpdi r15,0 cmpdi cr1,r15,1 /* set cr1.eq = 0 for non-recursive */ + addi r10,r10,1 bne 2f stbcx. r10,0,r11 bne 1b @@ -322,18 +326,62 @@ tlb_miss_common_e6500: b 1b .previous + /* + * Erratum A-008139 says that we can't use tlbwe to change + * an indirect entry in any way (including replacing or + * invalidating) if the other thread could be in the process + * of a lookup. The workaround is to invalidate the entry + * with tlbilx before overwriting. + */ + + lbz r15,TCD_ESEL_NEXT(r11) + rlwinm r10,r15,16,0xff0000 + oris r10,r10,MAS0_TLBSEL(1)@h + mtspr SPRN_MAS0,r10 + isync + tlbre + mfspr r15,SPRN_MAS1 + andis. r15,r15,MAS1_VALID@h + beq 5f + +BEGIN_FTR_SECTION_NESTED(532) + mfspr r10,SPRN_MAS8 + rlwinm r10,r10,0,0x80000fff /* tgs,tlpid -> sgs,slpid */ + mtspr SPRN_MAS5,r10 +END_FTR_SECTION_NESTED(CPU_FTR_EMB_HV,CPU_FTR_EMB_HV,532) + + mfspr r10,SPRN_MAS1 + rlwinm r15,r10,0,0x3fff0000 /* tid -> spid */ + rlwimi r15,r10,20,0x00000003 /* ind,ts -> sind,sas */ + mfspr r10,SPRN_MAS6 + mtspr SPRN_MAS6,r15 + mfspr r15,SPRN_MAS2 + isync + tlbilxva 0,r15 + isync + + mtspr SPRN_MAS6,r10 + +5: +BEGIN_FTR_SECTION_NESTED(532) + li r10,0 + mtspr SPRN_MAS8,r10 + mtspr SPRN_MAS5,r10 +END_FTR_SECTION_NESTED(CPU_FTR_EMB_HV,CPU_FTR_EMB_HV,532) tlbsx 0,r16 mfspr r10,SPRN_MAS1 - andis. r10,r10,MAS1_VALID@h + andis. r15,r10,MAS1_VALID@h bne tlb_miss_done_e6500 - - /* Undo MAS-damage from the tlbsx */ +FTR_SECTION_ELSE mfspr r10,SPRN_MAS1 +ALT_FTR_SECTION_END_IFSET(CPU_FTR_SMT) + oris r10,r10,MAS1_VALID@h - mtspr SPRN_MAS1,r10 - mtspr SPRN_MAS2,r15 + beq cr2,4f + rlwinm r10,r10,0,16,1 /* Clear TID */ +4: mtspr SPRN_MAS1,r10 /* Now, we need to walk the page tables. First check if we are in * range. @@ -394,11 +442,13 @@ tlb_miss_common_e6500: tlb_miss_done_e6500: .macro tlb_unlock_e6500 +BEGIN_FTR_SECTION beq cr1,1f /* no unlock if lock was recursively grabbed */ li r15,0 isync stb r15,0(r11) 1: +END_FTR_SECTION_IFSET(CPU_FTR_SMT) .endm tlb_unlock_e6500 @@ -407,12 +457,9 @@ tlb_miss_done_e6500: rfi tlb_miss_kernel_e6500: - mfspr r10,SPRN_MAS1 ld r14,PACA_KERNELPGD(r13) - cmpldi cr0,r15,8 /* Check for vmalloc region */ - rlwinm r10,r10,0,16,1 /* Clear TID */ - mtspr SPRN_MAS1,r10 - beq+ tlb_miss_common_e6500 + cmpldi cr1,r15,8 /* Check for vmalloc region */ + beq+ cr1,tlb_miss_common_e6500 tlb_miss_fault_e6500: tlb_unlock_e6500 diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index 92cb18d52ea8..f38ea4df6a85 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -581,42 +581,10 @@ static void setup_mmu_htw(void) /* * Early initialization of the MMU TLB code */ -static void __early_init_mmu(int boot_cpu) +static void early_init_this_mmu(void) { unsigned int mas4; - /* XXX This will have to be decided at runtime, but right - * now our boot and TLB miss code hard wires it. Ideally - * we should find out a suitable page size and patch the - * TLB miss code (either that or use the PACA to store - * the value we want) - */ - mmu_linear_psize = MMU_PAGE_1G; - - /* XXX This should be decided at runtime based on supported - * page sizes in the TLB, but for now let's assume 16M is - * always there and a good fit (which it probably is) - * - * Freescale booke only supports 4K pages in TLB0, so use that. - */ - if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) - mmu_vmemmap_psize = MMU_PAGE_4K; - else - mmu_vmemmap_psize = MMU_PAGE_16M; - - /* XXX This code only checks for TLB 0 capabilities and doesn't - * check what page size combos are supported by the HW. It - * also doesn't handle the case where a separate array holds - * the IND entries from the array loaded by the PT. - */ - if (boot_cpu) { - /* Look for supported page sizes */ - setup_page_sizes(); - - /* Look for HW tablewalk support */ - setup_mmu_htw(); - } - /* Set MAS4 based on page table setting */ mas4 = 0x4 << MAS4_WIMGED_SHIFT; @@ -650,11 +618,6 @@ static void __early_init_mmu(int boot_cpu) } mtspr(SPRN_MAS4, mas4); - /* Set the global containing the top of the linear mapping - * for use by the TLB miss code - */ - linear_map_top = memblock_end_of_DRAM(); - #ifdef CONFIG_PPC_FSL_BOOK3E if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { unsigned int num_cams; @@ -662,10 +625,49 @@ static void __early_init_mmu(int boot_cpu) /* use a quarter of the TLBCAM for bolted linear map */ num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4; linear_map_top = map_mem_in_cams(linear_map_top, num_cams); + } +#endif - /* limit memory so we dont have linear faults */ - memblock_enforce_memory_limit(linear_map_top); + /* A sync won't hurt us after mucking around with + * the MMU configuration + */ + mb(); +} +static void __init early_init_mmu_global(void) +{ + /* XXX This will have to be decided at runtime, but right + * now our boot and TLB miss code hard wires it. Ideally + * we should find out a suitable page size and patch the + * TLB miss code (either that or use the PACA to store + * the value we want) + */ + mmu_linear_psize = MMU_PAGE_1G; + + /* XXX This should be decided at runtime based on supported + * page sizes in the TLB, but for now let's assume 16M is + * always there and a good fit (which it probably is) + * + * Freescale booke only supports 4K pages in TLB0, so use that. + */ + if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) + mmu_vmemmap_psize = MMU_PAGE_4K; + else + mmu_vmemmap_psize = MMU_PAGE_16M; + + /* XXX This code only checks for TLB 0 capabilities and doesn't + * check what page size combos are supported by the HW. It + * also doesn't handle the case where a separate array holds + * the IND entries from the array loaded by the PT. + */ + /* Look for supported page sizes */ + setup_page_sizes(); + + /* Look for HW tablewalk support */ + setup_mmu_htw(); + +#ifdef CONFIG_PPC_FSL_BOOK3E + if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { if (book3e_htw_mode == PPC_HTW_NONE) { extlb_level_exc = EX_TLB_SIZE; patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e); @@ -675,22 +677,41 @@ static void __early_init_mmu(int boot_cpu) } #endif - /* A sync won't hurt us after mucking around with - * the MMU configuration + /* Set the global containing the top of the linear mapping + * for use by the TLB miss code */ - mb(); + linear_map_top = memblock_end_of_DRAM(); +} + +static void __init early_mmu_set_memory_limit(void) +{ +#ifdef CONFIG_PPC_FSL_BOOK3E + if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { + /* + * Limit memory so we dont have linear faults. + * Unlike memblock_set_current_limit, which limits + * memory available during early boot, this permanently + * reduces the memory available to Linux. We need to + * do this because highmem is not supported on 64-bit. + */ + memblock_enforce_memory_limit(linear_map_top); + } +#endif memblock_set_current_limit(linear_map_top); } +/* boot cpu only */ void __init early_init_mmu(void) { - __early_init_mmu(1); + early_init_mmu_global(); + early_init_this_mmu(); + early_mmu_set_memory_limit(); } void early_init_mmu_secondary(void) { - __early_init_mmu(0); + early_init_this_mmu(); } void setup_initial_memory_limit(phys_addr_t first_memblock_base, |