summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/8xx_mmu.c131
-rw-r--r--arch/powerpc/mm/Makefile7
-rw-r--r--arch/powerpc/mm/copro_fault.c2
-rw-r--r--arch/powerpc/mm/fault.c8
-rw-r--r--arch/powerpc/mm/hash64_4k.c18
-rw-r--r--arch/powerpc/mm/hash64_64k.c39
-rw-r--r--arch/powerpc/mm/hash_low_32.S3
-rw-r--r--arch/powerpc/mm/hash_native_64.c98
-rw-r--r--arch/powerpc/mm/hash_utils_64.c378
-rw-r--r--arch/powerpc/mm/hugepage-hash64.c17
-rw-r--r--arch/powerpc/mm/hugetlbpage-hash64.c4
-rw-r--r--arch/powerpc/mm/hugetlbpage-radix.c39
-rw-r--r--arch/powerpc/mm/hugetlbpage.c16
-rw-r--r--arch/powerpc/mm/init_32.c7
-rw-r--r--arch/powerpc/mm/init_64.c22
-rw-r--r--arch/powerpc/mm/mem.c18
-rw-r--r--arch/powerpc/mm/mmu_context_book3s64.c7
-rw-r--r--arch/powerpc/mm/mmu_context_iommu.c81
-rw-r--r--arch/powerpc/mm/mmu_context_nohash.c56
-rw-r--r--arch/powerpc/mm/mmu_decl.h3
-rw-r--r--arch/powerpc/mm/numa.c84
-rw-r--r--arch/powerpc/mm/pgtable-book3s64.c21
-rw-r--r--arch/powerpc/mm/pgtable-radix.c93
-rw-r--r--arch/powerpc/mm/pgtable.c4
-rw-r--r--arch/powerpc/mm/pgtable_32.c6
-rw-r--r--arch/powerpc/mm/pgtable_64.c3
-rw-r--r--arch/powerpc/mm/slb_low.S15
-rw-r--r--arch/powerpc/mm/tlb-radix.c269
-rw-r--r--arch/powerpc/mm/tlb_hash32.c11
-rw-r--r--arch/powerpc/mm/tlb_nohash.c6
30 files changed, 969 insertions, 497 deletions
diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c
index 949100577db5..6c5025e81236 100644
--- a/arch/powerpc/mm/8xx_mmu.c
+++ b/arch/powerpc/mm/8xx_mmu.c
@@ -13,62 +13,115 @@
*/
#include <linux/memblock.h>
+#include <asm/fixmap.h>
+#include <asm/code-patching.h>
#include "mmu_decl.h"
+#define IMMR_SIZE (FIX_IMMR_SIZE << PAGE_SHIFT)
+
extern int __map_without_ltlbs;
+
/*
- * MMU_init_hw does the chip-specific initialization of the MMU hardware.
+ * Return PA for this VA if it is in IMMR area, or 0
*/
-void __init MMU_init_hw(void)
+phys_addr_t v_block_mapped(unsigned long va)
{
- /* Nothing to do for the time being but keep it similar to other PPC */
+ unsigned long p = PHYS_IMMR_BASE;
+
+ if (__map_without_ltlbs)
+ return 0;
+ if (va >= VIRT_IMMR_BASE && va < VIRT_IMMR_BASE + IMMR_SIZE)
+ return p + va - VIRT_IMMR_BASE;
+ return 0;
+}
+
+/*
+ * Return VA for a given PA or 0 if not mapped
+ */
+unsigned long p_block_mapped(phys_addr_t pa)
+{
+ unsigned long p = PHYS_IMMR_BASE;
+
+ if (__map_without_ltlbs)
+ return 0;
+ if (pa >= p && pa < p + IMMR_SIZE)
+ return VIRT_IMMR_BASE + pa - p;
+ return 0;
}
-#define LARGE_PAGE_SIZE_4M (1<<22)
#define LARGE_PAGE_SIZE_8M (1<<23)
-#define LARGE_PAGE_SIZE_64M (1<<26)
-unsigned long __init mmu_mapin_ram(unsigned long top)
+/*
+ * MMU_init_hw does the chip-specific initialization of the MMU hardware.
+ */
+void __init MMU_init_hw(void)
{
- unsigned long v, s, mapped;
- phys_addr_t p;
+ /* PIN up to the 3 first 8Mb after IMMR in DTLB table */
+#ifdef CONFIG_PIN_TLB
+ unsigned long ctr = mfspr(SPRN_MD_CTR) & 0xfe000000;
+ unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY;
+#ifdef CONFIG_PIN_TLB_IMMR
+ int i = 29;
+#else
+ int i = 28;
+#endif
+ unsigned long addr = 0;
+ unsigned long mem = total_lowmem;
+
+ for (; i < 32 && mem >= LARGE_PAGE_SIZE_8M; i++) {
+ mtspr(SPRN_MD_CTR, ctr | (i << 8));
+ mtspr(SPRN_MD_EPN, (unsigned long)__va(addr) | MD_EVALID);
+ mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID);
+ mtspr(SPRN_MD_RPN, addr | flags | _PAGE_PRESENT);
+ addr += LARGE_PAGE_SIZE_8M;
+ mem -= LARGE_PAGE_SIZE_8M;
+ }
+#endif
+}
- v = KERNELBASE;
- p = 0;
- s = top;
+static void mmu_mapin_immr(void)
+{
+ unsigned long p = PHYS_IMMR_BASE;
+ unsigned long v = VIRT_IMMR_BASE;
+ unsigned long f = pgprot_val(PAGE_KERNEL_NCG);
+ int offset;
- if (__map_without_ltlbs)
- return 0;
+ for (offset = 0; offset < IMMR_SIZE; offset += PAGE_SIZE)
+ map_page(v + offset, p + offset, f);
+}
-#ifdef CONFIG_PPC_4K_PAGES
- while (s >= LARGE_PAGE_SIZE_8M) {
- pmd_t *pmdp;
- unsigned long val = p | MD_PS8MEG;
+/* Address of instructions to patch */
+#ifndef CONFIG_PIN_TLB_IMMR
+extern unsigned int DTLBMiss_jmp;
+#endif
+extern unsigned int DTLBMiss_cmp, FixupDAR_cmp;
- pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v);
- *pmdp++ = __pmd(val);
- *pmdp++ = __pmd(val + LARGE_PAGE_SIZE_4M);
+void mmu_patch_cmp_limit(unsigned int *addr, unsigned long mapped)
+{
+ unsigned int instr = *addr;
- v += LARGE_PAGE_SIZE_8M;
- p += LARGE_PAGE_SIZE_8M;
- s -= LARGE_PAGE_SIZE_8M;
- }
-#else /* CONFIG_PPC_16K_PAGES */
- while (s >= LARGE_PAGE_SIZE_64M) {
- pmd_t *pmdp;
- unsigned long val = p | MD_PS8MEG;
+ instr &= 0xffff0000;
+ instr |= (unsigned long)__va(mapped) >> 16;
+ patch_instruction(addr, instr);
+}
- pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v);
- *pmdp++ = __pmd(val);
+unsigned long __init mmu_mapin_ram(unsigned long top)
+{
+ unsigned long mapped;
- v += LARGE_PAGE_SIZE_64M;
- p += LARGE_PAGE_SIZE_64M;
- s -= LARGE_PAGE_SIZE_64M;
- }
+ if (__map_without_ltlbs) {
+ mapped = 0;
+ mmu_mapin_immr();
+#ifndef CONFIG_PIN_TLB_IMMR
+ patch_instruction(&DTLBMiss_jmp, PPC_INST_NOP);
#endif
+ } else {
+ mapped = top & ~(LARGE_PAGE_SIZE_8M - 1);
+ }
- mapped = top - s;
+ mmu_patch_cmp_limit(&DTLBMiss_cmp, mapped);
+ mmu_patch_cmp_limit(&FixupDAR_cmp, mapped);
/* If the size of RAM is not an exact power of two, we may not
* have covered RAM in its entirety with 8 MiB
@@ -77,7 +130,8 @@ unsigned long __init mmu_mapin_ram(unsigned long top)
* coverage with normal-sized pages (or other reasons) do not
* attempt to allocate outside the allowed range.
*/
- memblock_set_current_limit(mapped);
+ if (mapped)
+ memblock_set_current_limit(mapped);
return mapped;
}
@@ -90,13 +144,8 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base,
*/
BUG_ON(first_memblock_base != 0);
-#ifdef CONFIG_PIN_TLB
/* 8xx can only access 24MB at the moment */
memblock_set_current_limit(min_t(u64, first_memblock_size, 0x01800000));
-#else
- /* 8xx can only access 8MB at the moment */
- memblock_set_current_limit(min_t(u64, first_memblock_size, 0x00800000));
-#endif
}
/*
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index f2cea6d5e764..1a4e570f7894 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -7,17 +7,16 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
obj-y := fault.o mem.o pgtable.o mmap.o \
- init_$(CONFIG_WORD_SIZE).o \
- pgtable_$(CONFIG_WORD_SIZE).o
+ init_$(BITS).o pgtable_$(BITS).o
obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \
tlb_nohash_low.o
-obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(CONFIG_WORD_SIZE)e.o
+obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(BITS)e.o
hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o
obj-$(CONFIG_PPC_BOOK3E_64) += pgtable-book3e.o
obj-$(CONFIG_PPC_STD_MMU_64) += pgtable-hash64.o hash_utils_64.o slb_low.o slb.o $(hash64-y) mmu_context_book3s64.o pgtable-book3s64.o
obj-$(CONFIG_PPC_RADIX_MMU) += pgtable-radix.o tlb-radix.o
obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o hash_low_32.o mmu_context_hash32.o
-obj-$(CONFIG_PPC_STD_MMU) += tlb_hash$(CONFIG_WORD_SIZE).o
+obj-$(CONFIG_PPC_STD_MMU) += tlb_hash$(BITS).o
ifeq ($(CONFIG_PPC_STD_MMU_64),y)
obj-$(CONFIG_PPC_4K_PAGES) += hash64_4k.o
obj-$(CONFIG_PPC_64K_PAGES) += hash64_64k.o
diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c
index 6527882ce05e..bb0354222b11 100644
--- a/arch/powerpc/mm/copro_fault.c
+++ b/arch/powerpc/mm/copro_fault.c
@@ -75,7 +75,7 @@ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
}
ret = 0;
- *flt = handle_mm_fault(mm, vma, ea, is_write ? FAULT_FLAG_WRITE : 0);
+ *flt = handle_mm_fault(vma, ea, is_write ? FAULT_FLAG_WRITE : 0);
if (unlikely(*flt & VM_FAULT_ERROR)) {
if (*flt & VM_FAULT_OOM) {
ret = -ENOMEM;
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index a67c6d781c52..d0b137d96df1 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -26,7 +26,7 @@
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/highmem.h>
-#include <linux/module.h>
+#include <linux/extable.h>
#include <linux/kprobes.h>
#include <linux/kdebug.h>
#include <linux/perf_event.h>
@@ -205,7 +205,7 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault)
* The return value is 0 if the fault was handled, or the signal
* number if this is a kernel fault that can't be handled here.
*/
-int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
+int do_page_fault(struct pt_regs *regs, unsigned long address,
unsigned long error_code)
{
enum ctx_state prev_state = exception_enter();
@@ -429,7 +429,7 @@ good_area:
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
- fault = handle_mm_fault(mm, vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags);
if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) {
if (fault & VM_FAULT_SIGSEGV)
goto bad_area;
@@ -498,8 +498,8 @@ bad_area_nosemaphore:
bail:
exception_exit(prev_state);
return rc;
-
}
+NOKPROBE_SYMBOL(do_page_fault);
/*
* bad_page_fault is called when we have a bad access from the kernel.
diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c
index 6333b273d2d5..42c702b3be1f 100644
--- a/arch/powerpc/mm/hash64_4k.c
+++ b/arch/powerpc/mm/hash64_4k.c
@@ -70,8 +70,8 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT;
- if (ppc_md.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_4K,
- MMU_PAGE_4K, ssize, flags) == -1)
+ if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_4K,
+ MMU_PAGE_4K, ssize, flags) == -1)
old_pte &= ~_PAGE_HPTEFLAGS;
}
@@ -84,21 +84,23 @@ repeat:
hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
/* Insert into the hash table, primary slot */
- slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0,
- MMU_PAGE_4K, MMU_PAGE_4K, ssize);
+ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0,
+ MMU_PAGE_4K, MMU_PAGE_4K, ssize);
/*
* Primary is full, try the secondary
*/
if (unlikely(slot == -1)) {
hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
- slot = ppc_md.hpte_insert(hpte_group, vpn, pa,
- rflags, HPTE_V_SECONDARY,
- MMU_PAGE_4K, MMU_PAGE_4K, ssize);
+ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa,
+ rflags,
+ HPTE_V_SECONDARY,
+ MMU_PAGE_4K,
+ MMU_PAGE_4K, ssize);
if (slot == -1) {
if (mftb() & 0x1)
hpte_group = ((hash & htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
- ppc_md.hpte_remove(hpte_group);
+ mmu_hash_ops.hpte_remove(hpte_group);
/*
* FIXME!! Should be try the group from which we removed ?
*/
diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
index 16644e1f4e6b..3bbbea07378c 100644
--- a/arch/powerpc/mm/hash64_64k.c
+++ b/arch/powerpc/mm/hash64_64k.c
@@ -133,9 +133,9 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
slot += hidx & _PTEIDX_GROUP_IX;
- ret = ppc_md.hpte_updatepp(slot, rflags, vpn,
- MMU_PAGE_4K, MMU_PAGE_4K,
- ssize, flags);
+ ret = mmu_hash_ops.hpte_updatepp(slot, rflags, vpn,
+ MMU_PAGE_4K, MMU_PAGE_4K,
+ ssize, flags);
/*
*if we failed because typically the HPTE wasn't really here
* we try an insertion.
@@ -166,21 +166,22 @@ repeat:
hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
/* Insert into the hash table, primary slot */
- slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0,
- MMU_PAGE_4K, MMU_PAGE_4K, ssize);
+ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0,
+ MMU_PAGE_4K, MMU_PAGE_4K, ssize);
/*
* Primary is full, try the secondary
*/
if (unlikely(slot == -1)) {
hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
- slot = ppc_md.hpte_insert(hpte_group, vpn, pa,
- rflags, HPTE_V_SECONDARY,
- MMU_PAGE_4K, MMU_PAGE_4K, ssize);
+ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa,
+ rflags, HPTE_V_SECONDARY,
+ MMU_PAGE_4K, MMU_PAGE_4K,
+ ssize);
if (slot == -1) {
if (mftb() & 0x1)
hpte_group = ((hash & htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
- ppc_md.hpte_remove(hpte_group);
+ mmu_hash_ops.hpte_remove(hpte_group);
/*
* FIXME!! Should be try the group from which we removed ?
*/
@@ -272,8 +273,9 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT;
- if (ppc_md.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_64K,
- MMU_PAGE_64K, ssize, flags) == -1)
+ if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_64K,
+ MMU_PAGE_64K, ssize,
+ flags) == -1)
old_pte &= ~_PAGE_HPTEFLAGS;
}
@@ -286,21 +288,24 @@ repeat:
hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
/* Insert into the hash table, primary slot */
- slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0,
- MMU_PAGE_64K, MMU_PAGE_64K, ssize);
+ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0,
+ MMU_PAGE_64K, MMU_PAGE_64K,
+ ssize);
/*
* Primary is full, try the secondary
*/
if (unlikely(slot == -1)) {
hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
- slot = ppc_md.hpte_insert(hpte_group, vpn, pa,
- rflags, HPTE_V_SECONDARY,
- MMU_PAGE_64K, MMU_PAGE_64K, ssize);
+ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa,
+ rflags,
+ HPTE_V_SECONDARY,
+ MMU_PAGE_64K,
+ MMU_PAGE_64K, ssize);
if (slot == -1) {
if (mftb() & 0x1)
hpte_group = ((hash & htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
- ppc_md.hpte_remove(hpte_group);
+ mmu_hash_ops.hpte_remove(hpte_group);
/*
* FIXME!! Should be try the group from which we removed ?
*/
diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S
index 115347f74ce5..09cc50c8dace 100644
--- a/arch/powerpc/mm/hash_low_32.S
+++ b/arch/powerpc/mm/hash_low_32.S
@@ -26,6 +26,7 @@
#include <asm/ppc_asm.h>
#include <asm/thread_info.h>
#include <asm/asm-offsets.h>
+#include <asm/export.h>
#ifdef CONFIG_SMP
.section .bss
@@ -33,6 +34,7 @@
.globl mmu_hash_lock
mmu_hash_lock:
.space 4
+EXPORT_SYMBOL(mmu_hash_lock)
#endif /* CONFIG_SMP */
/*
@@ -575,6 +577,7 @@ _GLOBAL(flush_hash_pages)
rlwinm r8,r8,0,31,29 /* clear HASHPTE bit */
stwcx. r8,0,r5 /* update the pte */
bne- 33b
+EXPORT_SYMBOL(flush_hash_pages)
/* Get the address of the primary PTE group in the hash table (r3) */
_GLOBAL(flush_hash_patch_A)
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index d873f6507f72..83ddc0e171b0 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -55,7 +55,7 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
* We need 14 to 65 bits of va for a tlibe of 4K page
* With vpn we ignore the lower VPN_SHIFT bits already.
* And top two bits are already ignored because we can
- * only accomadate 76 bits in a 64 bit vpn with a VPN_SHIFT
+ * only accomodate 76 bits in a 64 bit vpn with a VPN_SHIFT
* of 12.
*/
va = vpn << VPN_SHIFT;
@@ -64,15 +64,15 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
* Older versions of the architecture (2.02 and earler) require the
* masking of the top 16 bits.
*/
- va &= ~(0xffffULL << 48);
+ if (mmu_has_feature(MMU_FTR_TLBIE_CROP_VA))
+ va &= ~(0xffffULL << 48);
switch (psize) {
case MMU_PAGE_4K:
/* clear out bits after (52) [0....52.....63] */
va &= ~((1ul << (64 - 52)) - 1);
va |= ssize << 8;
- sllp = ((mmu_psize_defs[apsize].sllp & SLB_VSID_L) >> 6) |
- ((mmu_psize_defs[apsize].sllp & SLB_VSID_LP) >> 4);
+ sllp = get_sllp_encoding(apsize);
va |= sllp << 5;
asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2)
: : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
@@ -113,15 +113,15 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
* Older versions of the architecture (2.02 and earler) require the
* masking of the top 16 bits.
*/
- va &= ~(0xffffULL << 48);
+ if (mmu_has_feature(MMU_FTR_TLBIE_CROP_VA))
+ va &= ~(0xffffULL << 48);
switch (psize) {
case MMU_PAGE_4K:
/* clear out bits after(52) [0....52.....63] */
va &= ~((1ul << (64 - 52)) - 1);
va |= ssize << 8;
- sllp = ((mmu_psize_defs[apsize].sllp & SLB_VSID_L) >> 6) |
- ((mmu_psize_defs[apsize].sllp & SLB_VSID_LP) >> 4);
+ sllp = get_sllp_encoding(apsize);
va |= sllp << 5;
asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)"
: : "r"(va) : "memory");
@@ -316,8 +316,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
DBG_LOW(" -> hit\n");
/* Update the HPTE */
hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) &
- ~(HPTE_R_PP | HPTE_R_N)) |
- (newpp & (HPTE_R_PP | HPTE_R_N |
+ ~(HPTE_R_PPP | HPTE_R_N)) |
+ (newpp & (HPTE_R_PPP | HPTE_R_N |
HPTE_R_C)));
}
native_unlock_hpte(hptep);
@@ -385,8 +385,8 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
/* Update the HPTE */
hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) &
- ~(HPTE_R_PP | HPTE_R_N)) |
- (newpp & (HPTE_R_PP | HPTE_R_N)));
+ ~(HPTE_R_PPP | HPTE_R_N)) |
+ (newpp & (HPTE_R_PPP | HPTE_R_N)));
/*
* Ensure it is out of the tlb too. Bolted entries base and
* actual page size will be same.
@@ -493,36 +493,6 @@ static void native_hugepage_invalidate(unsigned long vsid,
}
#endif
-static inline int __hpte_actual_psize(unsigned int lp, int psize)
-{
- int i, shift;
- unsigned int mask;
-
- /* start from 1 ignoring MMU_PAGE_4K */
- for (i = 1; i < MMU_PAGE_COUNT; i++) {
-
- /* invalid penc */
- if (mmu_psize_defs[psize].penc[i] == -1)
- continue;
- /*
- * encoding bits per actual page size
- * PTE LP actual page size
- * rrrr rrrz >=8KB
- * rrrr rrzz >=16KB
- * rrrr rzzz >=32KB
- * rrrr zzzz >=64KB
- * .......
- */
- shift = mmu_psize_defs[i].shift - LP_SHIFT;
- if (shift > LP_BITS)
- shift = LP_BITS;
- mask = (1 << shift) - 1;
- if ((lp & mask) == mmu_psize_defs[psize].penc[i])
- return i;
- }
- return -1;
-}
-
static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
int *psize, int *apsize, int *ssize, unsigned long *vpn)
{
@@ -538,19 +508,15 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
size = MMU_PAGE_4K;
a_size = MMU_PAGE_4K;
} else {
- for (size = 0; size < MMU_PAGE_COUNT; size++) {
-
- /* valid entries have a shift value */
- if (!mmu_psize_defs[size].shift)
- continue;
-
- a_size = __hpte_actual_psize(lp, size);
- if (a_size != -1)
- break;
- }
+ size = hpte_page_sizes[lp] & 0xf;
+ a_size = hpte_page_sizes[lp] >> 4;
}
/* This works for all page sizes, and for 256M and 1T segments */
- *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ *ssize = hpte_r >> HPTE_R_3_0_SSIZE_SHIFT;
+ else
+ *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
+
shift = mmu_psize_defs[size].shift;
avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm);
@@ -601,7 +567,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
* crashdump and all bets are off anyway.
*
* TODO: add batching support when enabled. remember, no dynamic memory here,
- * athough there is the control page available...
+ * although there is the control page available...
*/
static void native_hpte_clear(void)
{
@@ -719,23 +685,29 @@ static void native_flush_hash_range(unsigned long number, int local)
local_irq_restore(flags);
}
-static int native_update_partition_table(u64 patb1)
+static int native_register_proc_table(unsigned long base, unsigned long page_size,
+ unsigned long table_size)
{
+ unsigned long patb1 = base << 25; /* VSID */
+
+ patb1 |= (page_size << 5); /* sllp */
+ patb1 |= table_size;
+
partition_tb->patb1 = cpu_to_be64(patb1);
return 0;
}
void __init hpte_init_native(void)
{
- ppc_md.hpte_invalidate = native_hpte_invalidate;
- ppc_md.hpte_updatepp = native_hpte_updatepp;
- ppc_md.hpte_updateboltedpp = native_hpte_updateboltedpp;
- ppc_md.hpte_insert = native_hpte_insert;
- ppc_md.hpte_remove = native_hpte_remove;
- ppc_md.hpte_clear_all = native_hpte_clear;
- ppc_md.flush_hash_range = native_flush_hash_range;
- ppc_md.hugepage_invalidate = native_hugepage_invalidate;
+ mmu_hash_ops.hpte_invalidate = native_hpte_invalidate;
+ mmu_hash_ops.hpte_updatepp = native_hpte_updatepp;
+ mmu_hash_ops.hpte_updateboltedpp = native_hpte_updateboltedpp;
+ mmu_hash_ops.hpte_insert = native_hpte_insert;
+ mmu_hash_ops.hpte_remove = native_hpte_remove;
+ mmu_hash_ops.hpte_clear_all = native_hpte_clear;
+ mmu_hash_ops.flush_hash_range = native_flush_hash_range;
+ mmu_hash_ops.hugepage_invalidate = native_hugepage_invalidate;
if (cpu_has_feature(CPU_FTR_ARCH_300))
- ppc_md.update_partition_table = native_update_partition_table;
+ register_process_table = native_register_proc_table;
}
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 59268969a0bc..44d3c3a38e3e 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -34,6 +34,7 @@
#include <linux/signal.h>
#include <linux/memblock.h>
#include <linux/context_tracking.h>
+#include <linux/libfdt.h>
#include <asm/processor.h>
#include <asm/pgtable.h>
@@ -58,6 +59,7 @@
#include <asm/firmware.h>
#include <asm/tm.h>
#include <asm/trace.h>
+#include <asm/ps3.h>
#ifdef DEBUG
#define DBG(fmt...) udbg_printf(fmt)
@@ -87,14 +89,13 @@
*
*/
-#ifdef CONFIG_U3_DART
-extern unsigned long dart_tablebase;
-#endif /* CONFIG_U3_DART */
-
static unsigned long _SDR1;
struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
EXPORT_SYMBOL_GPL(mmu_psize_defs);
+u8 hpte_page_sizes[1 << LP_BITS];
+EXPORT_SYMBOL_GPL(hpte_page_sizes);
+
struct hash_pte *htab_address;
unsigned long htab_size_bytes;
unsigned long htab_hash_mask;
@@ -120,6 +121,8 @@ static u8 *linear_map_hash_slots;
static unsigned long linear_map_hash_count;
static DEFINE_SPINLOCK(linear_map_hash_lock);
#endif /* CONFIG_DEBUG_PAGEALLOC */
+struct mmu_hash_ops mmu_hash_ops;
+EXPORT_SYMBOL(mmu_hash_ops);
/* There are definitions of page sizes arrays to be used when none
* is provided by the firmware.
@@ -159,6 +162,19 @@ static struct mmu_psize_def mmu_psize_defaults_gp[] = {
},
};
+/*
+ * 'R' and 'C' update notes:
+ * - Under pHyp or KVM, the updatepp path will not set C, thus it *will*
+ * create writeable HPTEs without C set, because the hcall H_PROTECT
+ * that we use in that case will not update C
+ * - The above is however not a problem, because we also don't do that
+ * fancy "no flush" variant of eviction and we use H_REMOVE which will
+ * do the right thing and thus we don't have the race I described earlier
+ *
+ * - Under bare metal, we do have the race, so we need R and C set
+ * - We make sure R is always set and never lost
+ * - C is _PAGE_DIRTY, and *should* always be set for a writeable mapping
+ */
unsigned long htab_convert_pte_flags(unsigned long pteflags)
{
unsigned long rflags = 0;
@@ -186,19 +202,28 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags)
rflags |= 0x1;
}
/*
- * Always add "C" bit for perf. Memory coherence is always enabled
+ * We can't allow hardware to update hpte bits. Hence always
+ * set 'R' bit and set 'C' if it is a write fault
*/
- rflags |= HPTE_R_C | HPTE_R_M;
+ rflags |= HPTE_R_R;
+
+ if (pteflags & _PAGE_DIRTY)
+ rflags |= HPTE_R_C;
/*
* Add in WIG bits
*/
if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_TOLERANT)
rflags |= HPTE_R_I;
- if ((pteflags & _PAGE_CACHE_CTL ) == _PAGE_NON_IDEMPOTENT)
+ else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_NON_IDEMPOTENT)
rflags |= (HPTE_R_I | HPTE_R_G);
- if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_SAO)
- rflags |= (HPTE_R_I | HPTE_R_W);
+ else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_SAO)
+ rflags |= (HPTE_R_W | HPTE_R_I | HPTE_R_M);
+ else
+ /*
+ * Add memory coherence if cache inhibited is not set
+ */
+ rflags |= HPTE_R_M;
return rflags;
}
@@ -256,9 +281,10 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
hash = hpt_hash(vpn, shift, ssize);
hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
- BUG_ON(!ppc_md.hpte_insert);
- ret = ppc_md.hpte_insert(hpteg, vpn, paddr, tprot,
- HPTE_V_BOLTED, psize, psize, ssize);
+ BUG_ON(!mmu_hash_ops.hpte_insert);
+ ret = mmu_hash_ops.hpte_insert(hpteg, vpn, paddr, tprot,
+ HPTE_V_BOLTED, psize, psize,
+ ssize);
if (ret < 0)
break;
@@ -283,11 +309,11 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend,
shift = mmu_psize_defs[psize].shift;
step = 1 << shift;
- if (!ppc_md.hpte_removebolted)
+ if (!mmu_hash_ops.hpte_removebolted)
return -ENODEV;
for (vaddr = vstart; vaddr < vend; vaddr += step) {
- rc = ppc_md.hpte_removebolted(vaddr, psize, ssize);
+ rc = mmu_hash_ops.hpte_removebolted(vaddr, psize, ssize);
if (rc == -ENOENT) {
ret = -ENOENT;
continue;
@@ -299,6 +325,15 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend,
return ret;
}
+static bool disable_1tb_segments = false;
+
+static int __init parse_disable_1tb_segments(char *p)
+{
+ disable_1tb_segments = true;
+ return 0;
+}
+early_param("disable_1tb_segments", parse_disable_1tb_segments);
+
static int __init htab_dt_scan_seg_sizes(unsigned long node,
const char *uname, int depth,
void *data)
@@ -317,6 +352,12 @@ static int __init htab_dt_scan_seg_sizes(unsigned long node,
for (; size >= 4; size -= 4, ++prop) {
if (be32_to_cpu(prop[0]) == 40) {
DBG("1T segment support detected\n");
+
+ if (disable_1tb_segments) {
+ DBG("1T segments disabled by command line\n");
+ break;
+ }
+
cur_cpu_spec->mmu_features |= MMU_FTR_1T_SEGMENT;
return 1;
}
@@ -325,11 +366,6 @@ static int __init htab_dt_scan_seg_sizes(unsigned long node,
return 0;
}
-static void __init htab_init_seg_sizes(void)
-{
- of_scan_flat_dt(htab_dt_scan_seg_sizes, NULL);
-}
-
static int __init get_idx_from_shift(unsigned int shift)
{
int idx = -1;
@@ -492,7 +528,8 @@ static bool might_have_hea(void)
* we will never see an HEA ethernet device.
*/
#ifdef CONFIG_IBMEBUS
- return !cpu_has_feature(CPU_FTR_ARCH_207S);
+ return !cpu_has_feature(CPU_FTR_ARCH_207S) &&
+ firmware_has_feature(FW_FEATURE_SPLPAR);
#else
return false;
#endif
@@ -500,7 +537,7 @@ static bool might_have_hea(void)
#endif /* #ifdef CONFIG_PPC_64K_PAGES */
-static void __init htab_init_page_sizes(void)
+static void __init htab_scan_page_sizes(void)
{
int rc;
@@ -515,17 +552,75 @@ static void __init htab_init_page_sizes(void)
* Try to find the available page sizes in the device-tree
*/
rc = of_scan_flat_dt(htab_dt_scan_page_sizes, NULL);
- if (rc != 0) /* Found */
- goto found;
-
- /*
- * Not in the device-tree, let's fallback on known size
- * list for 16M capable GP & GR
- */
- if (mmu_has_feature(MMU_FTR_16M_PAGE))
+ if (rc == 0 && early_mmu_has_feature(MMU_FTR_16M_PAGE)) {
+ /*
+ * Nothing in the device-tree, but the CPU supports 16M pages,
+ * so let's fallback on a known size list for 16M capable CPUs.
+ */
memcpy(mmu_psize_defs, mmu_psize_defaults_gp,
sizeof(mmu_psize_defaults_gp));
-found:
+ }
+
+#ifdef CONFIG_HUGETLB_PAGE
+ /* Reserve 16G huge page memory sections for huge pages */
+ of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL);
+#endif /* CONFIG_HUGETLB_PAGE */
+}
+
+/*
+ * Fill in the hpte_page_sizes[] array.
+ * We go through the mmu_psize_defs[] array looking for all the
+ * supported base/actual page size combinations. Each combination
+ * has a unique pagesize encoding (penc) value in the low bits of
+ * the LP field of the HPTE. For actual page sizes less than 1MB,
+ * some of the upper LP bits are used for RPN bits, meaning that
+ * we need to fill in several entries in hpte_page_sizes[].
+ *
+ * In diagrammatic form, with r = RPN bits and z = page size bits:
+ * PTE LP actual page size
+ * rrrr rrrz >=8KB
+ * rrrr rrzz >=16KB
+ * rrrr rzzz >=32KB
+ * rrrr zzzz >=64KB
+ * ...
+ *
+ * The zzzz bits are implementation-specific but are chosen so that
+ * no encoding for a larger page size uses the same value in its
+ * low-order N bits as the encoding for the 2^(12+N) byte page size
+ * (if it exists).
+ */
+static void init_hpte_page_sizes(void)
+{
+ long int ap, bp;
+ long int shift, penc;
+
+ for (bp = 0; bp < MMU_PAGE_COUNT; ++bp) {
+ if (!mmu_psize_defs[bp].shift)
+ continue; /* not a supported page size */
+ for (ap = bp; ap < MMU_PAGE_COUNT; ++ap) {
+ penc = mmu_psize_defs[bp].penc[ap];
+ if (penc == -1)
+ continue;
+ shift = mmu_psize_defs[ap].shift - LP_SHIFT;
+ if (shift <= 0)
+ continue; /* should never happen */
+ /*
+ * For page sizes less than 1MB, this loop
+ * replicates the entry for all possible values
+ * of the rrrr bits.
+ */
+ while (penc < (1 << LP_BITS)) {
+ hpte_page_sizes[penc] = (ap << 4) | bp;
+ penc += 1 << shift;
+ }
+ }
+ }
+}
+
+static void __init htab_init_page_sizes(void)
+{
+ init_hpte_page_sizes();
+
if (!debug_pagealloc_enabled()) {
/*
* Pick a size for the linear mapping. Currently, we only
@@ -558,7 +653,7 @@ found:
* would stop us accessing the HEA ethernet. So if we
* have the chance of ever seeing one, stay at 4k.
*/
- if (!might_have_hea() || !machine_is(pseries))
+ if (!might_have_hea())
mmu_io_psize = MMU_PAGE_64K;
} else
mmu_ci_restrictions = 1;
@@ -591,11 +686,6 @@ found:
,mmu_psize_defs[mmu_vmemmap_psize].shift
#endif
);
-
-#ifdef CONFIG_HUGETLB_PAGE
- /* Reserve 16G huge page memory sections for huge pages */
- of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL);
-#endif /* CONFIG_HUGETLB_PAGE */
}
static int __init htab_dt_scan_pftsize(unsigned long node,
@@ -676,11 +766,33 @@ int remove_section_mapping(unsigned long start, unsigned long end)
}
#endif /* CONFIG_MEMORY_HOTPLUG */
+static void update_hid_for_hash(void)
+{
+ unsigned long hid0;
+ unsigned long rb = 3UL << PPC_BITLSHIFT(53); /* IS = 3 */
+
+ asm volatile("ptesync": : :"memory");
+ /* prs = 0, ric = 2, rs = 0, r = 1 is = 3 */
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(0), "i"(0), "i"(2), "r"(0) : "memory");
+ asm volatile("eieio; tlbsync; ptesync; isync; slbia": : :"memory");
+ /*
+ * now switch the HID
+ */
+ hid0 = mfspr(SPRN_HID0);
+ hid0 &= ~HID0_POWER9_RADIX;
+ mtspr(SPRN_HID0, hid0);
+ asm volatile("isync": : :"memory");
+
+ /* Wait for it to happen */
+ while ((mfspr(SPRN_HID0) & HID0_POWER9_RADIX))
+ cpu_relax();
+}
+
static void __init hash_init_partition_table(phys_addr_t hash_table,
- unsigned long pteg_count)
+ unsigned long htab_size)
{
unsigned long ps_field;
- unsigned long htab_size;
unsigned long patb_size = 1UL << PATB_SIZE_SHIFT;
/*
@@ -688,7 +800,7 @@ static void __init hash_init_partition_table(phys_addr_t hash_table,
* We can ignore that for lpid 0
*/
ps_field = 0;
- htab_size = __ilog2(pteg_count) - 11;
+ htab_size = __ilog2(htab_size) - 18;
BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large.");
partition_tb = __va(memblock_alloc_base(patb_size, patb_size,
@@ -702,7 +814,9 @@ static void __init hash_init_partition_table(phys_addr_t hash_table,
* For now UPRT is 0 for us.
*/
partition_tb->patb1 = 0;
- DBG("Partition table %p\n", partition_tb);
+ pr_info("Partition table %p\n", partition_tb);
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+ update_hid_for_hash();
/*
* update partition table control register,
* 64 K size.
@@ -716,17 +830,11 @@ static void __init htab_initialize(void)
unsigned long table;
unsigned long pteg_count;
unsigned long prot;
- unsigned long base = 0, size = 0, limit;
+ unsigned long base = 0, size = 0;
struct memblock_region *reg;
DBG(" -> htab_initialize()\n");
- /* Initialize segment sizes */
- htab_init_seg_sizes();
-
- /* Initialize page sizes */
- htab_init_page_sizes();
-
if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) {
mmu_kernel_ssize = MMU_SEGSIZE_1T;
mmu_highuser_ssize = MMU_SEGSIZE_1T;
@@ -742,7 +850,8 @@ static void __init htab_initialize(void)
htab_hash_mask = pteg_count - 1;
- if (firmware_has_feature(FW_FEATURE_LPAR)) {
+ if (firmware_has_feature(FW_FEATURE_LPAR) ||
+ firmware_has_feature(FW_FEATURE_PS3_LV1)) {
/* Using a hypervisor which owns the htab */
htab_address = NULL;
_SDR1 = 0;
@@ -753,20 +862,26 @@ static void __init htab_initialize(void)
* Clear the htab if firmware assisted dump is active so
* that we dont end up using old mappings.
*/
- if (is_fadump_active() && ppc_md.hpte_clear_all)
- ppc_md.hpte_clear_all();
+ if (is_fadump_active() && mmu_hash_ops.hpte_clear_all)
+ mmu_hash_ops.hpte_clear_all();
#endif
} else {
- /* Find storage for the HPT. Must be contiguous in
- * the absolute address space. On cell we want it to be
- * in the first 2 Gig so we can use it for IOMMU hacks.
+ unsigned long limit = MEMBLOCK_ALLOC_ANYWHERE;
+
+#ifdef CONFIG_PPC_CELL
+ /*
+ * Cell may require the hash table down low when using the
+ * Axon IOMMU in order to fit the dynamic region over it, see
+ * comments in cell/iommu.c
*/
- if (machine_is(cell))
+ if (fdt_subnode_offset(initial_boot_params, 0, "axon") > 0) {
limit = 0x80000000;
- else
- limit = MEMBLOCK_ALLOC_ANYWHERE;
+ pr_info("Hash table forced below 2G for Axon IOMMU\n");
+ }
+#endif /* CONFIG_PPC_CELL */
- table = memblock_alloc_base(htab_size_bytes, htab_size_bytes, limit);
+ table = memblock_alloc_base(htab_size_bytes, htab_size_bytes,
+ limit);
DBG("Hash table allocated at %lx, size: %lx\n", table,
htab_size_bytes);
@@ -774,7 +889,7 @@ static void __init htab_initialize(void)
htab_address = __va(table);
/* htab absolute addr + encoded htabsize */
- _SDR1 = table + __ilog2(pteg_count) - 11;
+ _SDR1 = table + __ilog2(htab_size_bytes) - 18;
/* Initialize the HPT with no entries */
memset((void *)table, 0, htab_size_bytes);
@@ -783,7 +898,7 @@ static void __init htab_initialize(void)
/* Set SDR1 */
mtspr(SPRN_SDR1, _SDR1);
else
- hash_init_partition_table(table, pteg_count);
+ hash_init_partition_table(table, htab_size_bytes);
}
prot = pgprot_val(PAGE_KERNEL);
@@ -810,34 +925,6 @@ static void __init htab_initialize(void)
DBG("creating mapping for region: %lx..%lx (prot: %lx)\n",
base, size, prot);
-#ifdef CONFIG_U3_DART
- /* Do not map the DART space. Fortunately, it will be aligned
- * in such a way that it will not cross two memblock regions and
- * will fit within a single 16Mb page.
- * The DART space is assumed to be a full 16Mb region even if
- * we only use 2Mb of that space. We will use more of it later
- * for AGP GART. We have to use a full 16Mb large page.
- */
- DBG("DART base: %lx\n", dart_tablebase);
-
- if (dart_tablebase != 0 && dart_tablebase >= base
- && dart_tablebase < (base + size)) {
- unsigned long dart_table_end = dart_tablebase + 16 * MB;
- if (base != dart_tablebase)
- BUG_ON(htab_bolt_mapping(base, dart_tablebase,
- __pa(base), prot,
- mmu_linear_psize,
- mmu_kernel_ssize));
- if ((base + size) > dart_table_end)
- BUG_ON(htab_bolt_mapping(dart_tablebase+16*MB,
- base + size,
- __pa(dart_table_end),
- prot,
- mmu_linear_psize,
- mmu_kernel_ssize));
- continue;
- }
-#endif /* CONFIG_U3_DART */
BUG_ON(htab_bolt_mapping(base, base + size, __pa(base),
prot, mmu_linear_psize, mmu_kernel_ssize));
}
@@ -868,8 +955,19 @@ static void __init htab_initialize(void)
#undef KB
#undef MB
+void __init hash__early_init_devtree(void)
+{
+ /* Initialize segment sizes */
+ of_scan_flat_dt(htab_dt_scan_seg_sizes, NULL);
+
+ /* Initialize page sizes */
+ htab_scan_page_sizes();
+}
+
void __init hash__early_init_mmu(void)
{
+ htab_init_page_sizes();
+
/*
* initialize page table size
*/
@@ -900,12 +998,28 @@ void __init hash__early_init_mmu(void)
vmemmap = (struct page *)H_VMEMMAP_BASE;
ioremap_bot = IOREMAP_BASE;
+#ifdef CONFIG_PCI
+ pci_io_base = ISA_IO_BASE;
+#endif
+
+ /* Select appropriate backend */
+ if (firmware_has_feature(FW_FEATURE_PS3_LV1))
+ ps3_early_mm_init();
+ else if (firmware_has_feature(FW_FEATURE_LPAR))
+ hpte_init_pseries();
+ else if (IS_ENABLED(CONFIG_PPC_NATIVE))
+ hpte_init_native();
+
+ if (!mmu_hash_ops.hpte_insert)
+ panic("hash__early_init_mmu: No MMU hash ops defined!\n");
+
/* Initialize the MMU Hash table and create the linear mapping
* of memory. Has to be done before SLB initialization as this is
* currently where the page size encoding is obtained.
*/
htab_initialize();
+ pr_info("Initializing hash mmu with SLB\n");
/* Initialize SLB management */
slb_initialize();
}
@@ -1426,6 +1540,29 @@ out_exit:
local_irq_restore(flags);
}
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+static inline void tm_flush_hash_page(int local)
+{
+ /*
+ * Transactions are not aborted by tlbiel, only tlbie. Without, syncing a
+ * page back to a block device w/PIO could pick up transactional data
+ * (bad!) so we force an abort here. Before the sync the page will be
+ * made read-only, which will flush_hash_page. BIG ISSUE here: if the
+ * kernel uses a page from userspace without unmapping it first, it may
+ * see the speculated version.
+ */
+ if (local && cpu_has_feature(CPU_FTR_TM) && current->thread.regs &&
+ MSR_TM_ACTIVE(current->thread.regs->msr)) {
+ tm_enable();
+ tm_abort(TM_CAUSE_TLBI);
+ }
+}
+#else
+static inline void tm_flush_hash_page(int local)
+{
+}
+#endif
+
/* WARNING: This is called from hash_low_64.S, if you change this prototype,
* do not forget to update the assembly call site !
*/
@@ -1448,24 +1585,11 @@ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize,
* We use same base page size and actual psize, because we don't
* use these functions for hugepage
*/
- ppc_md.hpte_invalidate(slot, vpn, psize, psize, ssize, local);
+ mmu_hash_ops.hpte_invalidate(slot, vpn, psize, psize,
+ ssize, local);
} pte_iterate_hashed_end();
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- /* Transactions are not aborted by tlbiel, only tlbie.
- * Without, syncing a page back to a block device w/ PIO could pick up
- * transactional data (bad!) so we force an abort here. Before the
- * sync the page will be made read-only, which will flush_hash_page.
- * BIG ISSUE here: if the kernel uses a page from userspace without
- * unmapping it first, it may see the speculated version.
- */
- if (local && cpu_has_feature(CPU_FTR_TM) &&
- current->thread.regs &&
- MSR_TM_ACTIVE(current->thread.regs->msr)) {
- tm_enable();
- tm_abort(TM_CAUSE_TLBI);
- }
-#endif
+ tm_flush_hash_page(local);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -1489,9 +1613,9 @@ void flush_hash_hugepage(unsigned long vsid, unsigned long addr,
if (!hpte_slot_array)
return;
- if (ppc_md.hugepage_invalidate) {
- ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array,
- psize, ssize, local);
+ if (mmu_hash_ops.hugepage_invalidate) {
+ mmu_hash_ops.hugepage_invalidate(vsid, s_addr, hpte_slot_array,
+ psize, ssize, local);
goto tm_abort;
}
/*
@@ -1518,33 +1642,18 @@ void flush_hash_hugepage(unsigned long vsid, unsigned long addr,
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
slot += hidx & _PTEIDX_GROUP_IX;
- ppc_md.hpte_invalidate(slot, vpn, psize,
- MMU_PAGE_16M, ssize, local);
+ mmu_hash_ops.hpte_invalidate(slot, vpn, psize,
+ MMU_PAGE_16M, ssize, local);
}
tm_abort:
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- /* Transactions are not aborted by tlbiel, only tlbie.
- * Without, syncing a page back to a block device w/ PIO could pick up
- * transactional data (bad!) so we force an abort here. Before the
- * sync the page will be made read-only, which will flush_hash_page.
- * BIG ISSUE here: if the kernel uses a page from userspace without
- * unmapping it first, it may see the speculated version.
- */
- if (local && cpu_has_feature(CPU_FTR_TM) &&
- current->thread.regs &&
- MSR_TM_ACTIVE(current->thread.regs->msr)) {
- tm_enable();
- tm_abort(TM_CAUSE_TLBI);
- }
-#endif
- return;
+ tm_flush_hash_page(local);
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
void flush_hash_range(unsigned long number, int local)
{
- if (ppc_md.flush_hash_range)
- ppc_md.flush_hash_range(number, local);
+ if (mmu_hash_ops.flush_hash_range)
+ mmu_hash_ops.flush_hash_range(number, local);
else {
int i;
struct ppc64_tlb_batch *batch =
@@ -1589,22 +1698,22 @@ repeat:
HPTES_PER_GROUP) & ~0x7UL;
/* Insert into the hash table, primary slot */
- slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, vflags,
- psize, psize, ssize);
+ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, vflags,
+ psize, psize, ssize);
/* Primary is full, try the secondary */
if (unlikely(slot == -1)) {
hpte_group = ((~hash & htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
- slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags,
- vflags | HPTE_V_SECONDARY,
- psize, psize, ssize);
+ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags,
+ vflags | HPTE_V_SECONDARY,
+ psize, psize, ssize);
if (slot == -1) {
if (mftb() & 0x1)
hpte_group = ((hash & htab_hash_mask) *
HPTES_PER_GROUP)&~0x7UL;
- ppc_md.hpte_remove(hpte_group);
+ mmu_hash_ops.hpte_remove(hpte_group);
goto repeat;
}
}
@@ -1654,8 +1763,9 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi)
hash = ~hash;
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
slot += hidx & _PTEIDX_GROUP_IX;
- ppc_md.hpte_invalidate(slot, vpn, mmu_linear_psize, mmu_linear_psize,
- mmu_kernel_ssize, 0);
+ mmu_hash_ops.hpte_invalidate(slot, vpn, mmu_linear_psize,
+ mmu_linear_psize,
+ mmu_kernel_ssize, 0);
}
void __kernel_map_pages(struct page *page, int numpages, int enable)
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
index ba3fc229468a..f20d16f849c5 100644
--- a/arch/powerpc/mm/hugepage-hash64.c
+++ b/arch/powerpc/mm/hugepage-hash64.c
@@ -103,8 +103,8 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
slot += hidx & _PTEIDX_GROUP_IX;
- ret = ppc_md.hpte_updatepp(slot, rflags, vpn,
- psize, lpsize, ssize, flags);
+ ret = mmu_hash_ops.hpte_updatepp(slot, rflags, vpn,
+ psize, lpsize, ssize, flags);
/*
* We failed to update, try to insert a new entry.
*/
@@ -131,23 +131,24 @@ repeat:
hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
/* Insert into the hash table, primary slot */
- slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0,
- psize, lpsize, ssize);
+ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0,
+ psize, lpsize, ssize);
/*
* Primary is full, try the secondary
*/
if (unlikely(slot == -1)) {
hpte_group = ((~hash & htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
- slot = ppc_md.hpte_insert(hpte_group, vpn, pa,
- rflags, HPTE_V_SECONDARY,
- psize, lpsize, ssize);
+ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa,
+ rflags,
+ HPTE_V_SECONDARY,
+ psize, lpsize, ssize);
if (slot == -1) {
if (mftb() & 0x1)
hpte_group = ((hash & htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
- ppc_md.hpte_remove(hpte_group);
+ mmu_hash_ops.hpte_remove(hpte_group);
goto repeat;
}
}
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index 3058560b6121..d5026f3800b6 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -79,8 +79,8 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT;
- if (ppc_md.hpte_updatepp(slot, rflags, vpn, mmu_psize,
- mmu_psize, ssize, flags) == -1)
+ if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, mmu_psize,
+ mmu_psize, ssize, flags) == -1)
old_pte &= ~_PAGE_HPTEFLAGS;
}
diff --git a/arch/powerpc/mm/hugetlbpage-radix.c b/arch/powerpc/mm/hugetlbpage-radix.c
index 1e11559e1aac..35254a678456 100644
--- a/arch/powerpc/mm/hugetlbpage-radix.c
+++ b/arch/powerpc/mm/hugetlbpage-radix.c
@@ -5,39 +5,34 @@
#include <asm/cacheflush.h>
#include <asm/machdep.h>
#include <asm/mman.h>
+#include <asm/tlb.h>
void radix__flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
{
- unsigned long ap, shift;
+ int psize;
struct hstate *hstate = hstate_file(vma->vm_file);
- shift = huge_page_shift(hstate);
- if (shift == mmu_psize_defs[MMU_PAGE_2M].shift)
- ap = mmu_get_ap(MMU_PAGE_2M);
- else if (shift == mmu_psize_defs[MMU_PAGE_1G].shift)
- ap = mmu_get_ap(MMU_PAGE_1G);
- else {
- WARN(1, "Wrong huge page shift\n");
- return ;
- }
- radix___flush_tlb_page(vma->vm_mm, vmaddr, ap, 0);
+ psize = hstate_get_psize(hstate);
+ radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, psize);
}
void radix__local_flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
{
- unsigned long ap, shift;
+ int psize;
struct hstate *hstate = hstate_file(vma->vm_file);
- shift = huge_page_shift(hstate);
- if (shift == mmu_psize_defs[MMU_PAGE_2M].shift)
- ap = mmu_get_ap(MMU_PAGE_2M);
- else if (shift == mmu_psize_defs[MMU_PAGE_1G].shift)
- ap = mmu_get_ap(MMU_PAGE_1G);
- else {
- WARN(1, "Wrong huge page shift\n");
- return ;
- }
- radix___local_flush_tlb_page(vma->vm_mm, vmaddr, ap, 0);
+ psize = hstate_get_psize(hstate);
+ radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, psize);
+}
+
+void radix__flush_hugetlb_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end)
+{
+ int psize;
+ struct hstate *hstate = hstate_file(vma->vm_file);
+
+ psize = hstate_get_psize(hstate);
+ radix__flush_tlb_range_psize(vma->vm_mm, start, end, psize);
}
/*
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 5aac1a3f86cd..a5d3ecdabc44 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -73,7 +73,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
cachep = PGT_CACHE(pdshift - pshift);
#endif
- new = kmem_cache_zalloc(cachep, GFP_KERNEL|__GFP_REPEAT);
+ new = kmem_cache_zalloc(cachep, GFP_KERNEL);
BUG_ON(pshift > HUGEPD_SHIFT_MASK);
BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK);
@@ -81,6 +81,13 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
if (! new)
return -ENOMEM;
+ /*
+ * Make sure other cpus find the hugepd set only after a
+ * properly initialized page table is visible to them.
+ * For more details look for comment in __pte_alloc().
+ */
+ smp_wmb();
+
spin_lock(&mm->page_table_lock);
#ifdef CONFIG_PPC_FSL_BOOK3E
/*
@@ -1012,8 +1019,15 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
pte = READ_ONCE(*ptep);
mask = _PAGE_PRESENT | _PAGE_READ;
+
+ /*
+ * On some CPUs like the 8xx, _PAGE_RW hence _PAGE_WRITE is defined
+ * as 0 and _PAGE_RO has to be set when a page is not writable
+ */
if (write)
mask |= _PAGE_WRITE;
+ else
+ mask |= _PAGE_RO;
if ((pte_val(pte) & mask) != mask)
return 0;
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index c899fe340bbd..8a7c38b8d335 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -64,7 +64,7 @@ EXPORT_SYMBOL(memstart_addr);
phys_addr_t kernstart_addr;
EXPORT_SYMBOL(kernstart_addr);
-#ifdef CONFIG_RELOCATABLE_PPC32
+#ifdef CONFIG_RELOCATABLE
/* Used in __va()/__pa() */
long long virt_phys_offset;
EXPORT_SYMBOL(virt_phys_offset);
@@ -80,9 +80,6 @@ EXPORT_SYMBOL(agp_special_page);
void MMU_init(void);
-/* XXX should be in current.h -- paulus */
-extern struct task_struct *current_set[NR_CPUS];
-
/*
* this tells the system to map all of ram with the segregs
* (i.e. page tables) instead of the bats.
@@ -140,7 +137,7 @@ void __init MMU_init(void)
if (memblock.memory.cnt > 1) {
#ifndef CONFIG_WII
memblock_enforce_memory_limit(memblock.memory.regions[0].size);
- printk(KERN_WARNING "Only using first contiguous memory region");
+ pr_warn("Only using first contiguous memory region\n");
#else
wii_memory_fixups();
#endif
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 33709bdb0419..16ada1eb7e26 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -411,3 +411,25 @@ struct page *realmode_pfn_to_page(unsigned long pfn)
EXPORT_SYMBOL_GPL(realmode_pfn_to_page);
#endif /* CONFIG_SPARSEMEM_VMEMMAP/CONFIG_FLATMEM */
+
+#ifdef CONFIG_PPC_STD_MMU_64
+static bool disable_radix;
+static int __init parse_disable_radix(char *p)
+{
+ disable_radix = true;
+ return 0;
+}
+early_param("disable_radix", parse_disable_radix);
+
+void __init mmu_early_init_devtree(void)
+{
+ /* Disable radix mode based on kernel command line. */
+ if (disable_radix)
+ cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
+
+ if (early_radix_enabled())
+ radix__early_init_devtree();
+ else
+ hash__early_init_devtree();
+}
+#endif /* CONFIG_PPC_STD_MMU_64 */
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 2fd57fa48429..5f844337de21 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -116,6 +116,16 @@ int memory_add_physaddr_to_nid(u64 start)
}
#endif
+int __weak create_section_mapping(unsigned long start, unsigned long end)
+{
+ return -ENODEV;
+}
+
+int __weak remove_section_mapping(unsigned long start, unsigned long end)
+{
+ return -ENODEV;
+}
+
int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
{
struct pglist_data *pgdata;
@@ -239,8 +249,14 @@ static int __init mark_nonram_nosave(void)
static bool zone_limits_final;
+/*
+ * The memory zones past TOP_ZONE are managed by generic mm code.
+ * These should be set to zero since that's what every other
+ * architecture does.
+ */
static unsigned long max_zone_pfns[MAX_NR_ZONES] = {
- [0 ... MAX_NR_ZONES - 1] = ~0UL
+ [0 ... TOP_ZONE ] = ~0UL,
+ [TOP_ZONE + 1 ... MAX_NR_ZONES - 1] = 0
};
/*
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index 227b2a6c4544..b114f8b93ec9 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -65,7 +65,7 @@ static int radix__init_new_context(struct mm_struct *mm, int index)
/*
* set the process table entry,
*/
- rts_field = 3ull << PPC_BITLSHIFT(2);
+ rts_field = radix__get_tree_size();
process_tb[index].prtb0 = cpu_to_be64(rts_field | __pa(mm->pgd) | RADIX_PGD_INDEX_SIZE);
return 0;
}
@@ -181,7 +181,10 @@ void destroy_context(struct mm_struct *mm)
#ifdef CONFIG_PPC_RADIX_MMU
void radix__switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
{
- mtspr(SPRN_PID, next->context.id);
asm volatile("isync": : :"memory");
+ mtspr(SPRN_PID, next->context.id);
+ asm volatile("isync \n"
+ PPC_SLBIA(0x7)
+ : : :"memory");
}
#endif
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
index da6a2168ae9e..e0f1c33601dd 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -15,6 +15,9 @@
#include <linux/rculist.h>
#include <linux/vmalloc.h>
#include <linux/mutex.h>
+#include <linux/migrate.h>
+#include <linux/hugetlb.h>
+#include <linux/swap.h>
#include <asm/mmu_context.h>
static DEFINE_MUTEX(mem_list_mutex);
@@ -72,6 +75,55 @@ bool mm_iommu_preregistered(void)
}
EXPORT_SYMBOL_GPL(mm_iommu_preregistered);
+/*
+ * Taken from alloc_migrate_target with changes to remove CMA allocations
+ */
+struct page *new_iommu_non_cma_page(struct page *page, unsigned long private,
+ int **resultp)
+{
+ gfp_t gfp_mask = GFP_USER;
+ struct page *new_page;
+
+ if (PageHuge(page) || PageTransHuge(page) || PageCompound(page))
+ return NULL;
+
+ if (PageHighMem(page))
+ gfp_mask |= __GFP_HIGHMEM;
+
+ /*
+ * We don't want the allocation to force an OOM if possibe
+ */
+ new_page = alloc_page(gfp_mask | __GFP_NORETRY | __GFP_NOWARN);
+ return new_page;
+}
+
+static int mm_iommu_move_page_from_cma(struct page *page)
+{
+ int ret = 0;
+ LIST_HEAD(cma_migrate_pages);
+
+ /* Ignore huge pages for now */
+ if (PageHuge(page) || PageTransHuge(page) || PageCompound(page))
+ return -EBUSY;
+
+ lru_add_drain();
+ ret = isolate_lru_page(page);
+ if (ret)
+ return ret;
+
+ list_add(&page->lru, &cma_migrate_pages);
+ put_page(page); /* Drop the gup reference */
+
+ ret = migrate_pages(&cma_migrate_pages, new_iommu_non_cma_page,
+ NULL, 0, MIGRATE_SYNC, MR_CMA);
+ if (ret) {
+ if (!list_empty(&cma_migrate_pages))
+ putback_movable_pages(&cma_migrate_pages);
+ }
+
+ return 0;
+}
+
long mm_iommu_get(unsigned long ua, unsigned long entries,
struct mm_iommu_table_group_mem_t **pmem)
{
@@ -124,15 +176,36 @@ long mm_iommu_get(unsigned long ua, unsigned long entries,
for (i = 0; i < entries; ++i) {
if (1 != get_user_pages_fast(ua + (i << PAGE_SHIFT),
1/* pages */, 1/* iswrite */, &page)) {
+ ret = -EFAULT;
for (j = 0; j < i; ++j)
- put_page(pfn_to_page(
- mem->hpas[j] >> PAGE_SHIFT));
+ put_page(pfn_to_page(mem->hpas[j] >>
+ PAGE_SHIFT));
vfree(mem->hpas);
kfree(mem);
- ret = -EFAULT;
goto unlock_exit;
}
-
+ /*
+ * If we get a page from the CMA zone, since we are going to
+ * be pinning these entries, we might as well move them out
+ * of the CMA zone if possible. NOTE: faulting in + migration
+ * can be expensive. Batching can be considered later
+ */
+ if (get_pageblock_migratetype(page) == MIGRATE_CMA) {
+ if (mm_iommu_move_page_from_cma(page))
+ goto populate;
+ if (1 != get_user_pages_fast(ua + (i << PAGE_SHIFT),
+ 1/* pages */, 1/* iswrite */,
+ &page)) {
+ ret = -EFAULT;
+ for (j = 0; j < i; ++j)
+ put_page(pfn_to_page(mem->hpas[j] >>
+ PAGE_SHIFT));
+ vfree(mem->hpas);
+ kfree(mem);
+ goto unlock_exit;
+ }
+ }
+populate:
mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT;
}
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index 7d95bc402dba..c491f2c8f2b9 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -369,44 +369,34 @@ void destroy_context(struct mm_struct *mm)
}
#ifdef CONFIG_SMP
-
-static int mmu_context_cpu_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
+static int mmu_ctx_cpu_prepare(unsigned int cpu)
{
- unsigned int cpu = (unsigned int)(long)hcpu;
-
/* We don't touch CPU 0 map, it's allocated at aboot and kept
* around forever
*/
if (cpu == boot_cpuid)
- return NOTIFY_OK;
-
- switch (action) {
- case CPU_UP_PREPARE:
- case CPU_UP_PREPARE_FROZEN:
- pr_devel("MMU: Allocating stale context map for CPU %d\n", cpu);
- stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL);
- break;
-#ifdef CONFIG_HOTPLUG_CPU
- case CPU_UP_CANCELED:
- case CPU_UP_CANCELED_FROZEN:
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu);
- kfree(stale_map[cpu]);
- stale_map[cpu] = NULL;
-
- /* We also clear the cpu_vm_mask bits of CPUs going away */
- clear_tasks_mm_cpumask(cpu);
- break;
-#endif /* CONFIG_HOTPLUG_CPU */
- }
- return NOTIFY_OK;
+ return 0;
+
+ pr_devel("MMU: Allocating stale context map for CPU %d\n", cpu);
+ stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL);
+ return 0;
}
-static struct notifier_block mmu_context_cpu_nb = {
- .notifier_call = mmu_context_cpu_notify,
-};
+static int mmu_ctx_cpu_dead(unsigned int cpu)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+ if (cpu == boot_cpuid)
+ return 0;
+
+ pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu);
+ kfree(stale_map[cpu]);
+ stale_map[cpu] = NULL;
+
+ /* We also clear the cpu_vm_mask bits of CPUs going away */
+ clear_tasks_mm_cpumask(cpu);
+#endif
+ return 0;
+}
#endif /* CONFIG_SMP */
@@ -469,7 +459,9 @@ void __init mmu_context_init(void)
#else
stale_map[boot_cpuid] = memblock_virt_alloc(CTX_MAP_SIZE, 0);
- register_cpu_notifier(&mmu_context_cpu_nb);
+ cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE,
+ "powerpc/mmu/ctx:prepare",
+ mmu_ctx_cpu_prepare, mmu_ctx_cpu_dead);
#endif
printk(KERN_INFO
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 6af65327c993..f988db655e5b 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -154,9 +154,10 @@ struct tlbcam {
};
#endif
-#if defined(CONFIG_6xx) || defined(CONFIG_FSL_BOOKE)
+#if defined(CONFIG_6xx) || defined(CONFIG_FSL_BOOKE) || defined(CONFIG_PPC_8xx)
/* 6xx have BATS */
/* FSL_BOOKE have TLBCAM */
+/* 8xx have LTLB */
phys_addr_t v_block_mapped(unsigned long va);
unsigned long p_block_mapped(phys_addr_t pa);
#else
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 669a15e7fa76..75b9cd6150cc 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -581,30 +581,22 @@ static void verify_cpu_node_mapping(int cpu, int node)
}
}
-static int cpu_numa_callback(struct notifier_block *nfb, unsigned long action,
- void *hcpu)
+/* Must run before sched domains notifier. */
+static int ppc_numa_cpu_prepare(unsigned int cpu)
{
- unsigned long lcpu = (unsigned long)hcpu;
- int ret = NOTIFY_DONE, nid;
+ int nid;
- switch (action) {
- case CPU_UP_PREPARE:
- case CPU_UP_PREPARE_FROZEN:
- nid = numa_setup_cpu(lcpu);
- verify_cpu_node_mapping((int)lcpu, nid);
- ret = NOTIFY_OK;
- break;
+ nid = numa_setup_cpu(cpu);
+ verify_cpu_node_mapping(cpu, nid);
+ return 0;
+}
+
+static int ppc_numa_cpu_dead(unsigned int cpu)
+{
#ifdef CONFIG_HOTPLUG_CPU
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- case CPU_UP_CANCELED:
- case CPU_UP_CANCELED_FROZEN:
- unmap_cpu_from_node(lcpu);
- ret = NOTIFY_OK;
- break;
+ unmap_cpu_from_node(cpu);
#endif
- }
- return ret;
+ return 0;
}
/*
@@ -913,11 +905,6 @@ static void __init dump_numa_memory_topology(void)
}
}
-static struct notifier_block ppc64_numa_nb = {
- .notifier_call = cpu_numa_callback,
- .priority = 1 /* Must run before sched domains notifier. */
-};
-
/* Initialize NODE_DATA for a node on the local memory */
static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
{
@@ -985,15 +972,18 @@ void __init initmem_init(void)
setup_node_to_cpumask_map();
reset_numa_cpu_lookup_table();
- register_cpu_notifier(&ppc64_numa_nb);
+
/*
* We need the numa_cpu_lookup_table to be accurate for all CPUs,
* even before we online them, so that we can use cpu_to_{node,mem}
* early in boot, cf. smp_prepare_cpus().
+ * _nocalls() + manual invocation is used because cpuhp is not yet
+ * initialized for the boot CPU.
*/
- for_each_present_cpu(cpu) {
- numa_setup_cpu((unsigned long)cpu);
- }
+ cpuhp_setup_state_nocalls(CPUHP_POWER_NUMA_PREPARE, "POWER_NUMA_PREPARE",
+ ppc_numa_cpu_prepare, ppc_numa_cpu_dead);
+ for_each_present_cpu(cpu)
+ numa_setup_cpu(cpu);
}
static int __init early_numa(char *p)
@@ -1163,18 +1153,34 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
static u64 hot_add_drconf_memory_max(void)
{
- struct device_node *memory = NULL;
- unsigned int drconf_cell_cnt = 0;
- u64 lmb_size = 0;
+ struct device_node *memory = NULL;
+ struct device_node *dn = NULL;
+ unsigned int drconf_cell_cnt = 0;
+ u64 lmb_size = 0;
const __be32 *dm = NULL;
+ const __be64 *lrdr = NULL;
+ struct of_drconf_cell drmem;
+
+ dn = of_find_node_by_path("/rtas");
+ if (dn) {
+ lrdr = of_get_property(dn, "ibm,lrdr-capacity", NULL);
+ of_node_put(dn);
+ if (lrdr)
+ return be64_to_cpup(lrdr);
+ }
- memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
- if (memory) {
- drconf_cell_cnt = of_get_drconf_memory(memory, &dm);
- lmb_size = of_get_lmb_size(memory);
- of_node_put(memory);
- }
- return lmb_size * drconf_cell_cnt;
+ memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+ if (memory) {
+ drconf_cell_cnt = of_get_drconf_memory(memory, &dm);
+ lmb_size = of_get_lmb_size(memory);
+
+ /* Advance to the last cell, each cell has 6 32 bit integers */
+ dm += (drconf_cell_cnt - 1) * 6;
+ read_drconf_cell(&drmem, &dm);
+ of_node_put(memory);
+ return drmem.base_addr + lmb_size;
+ }
+ return 0;
}
/*
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index eb4451144746..f4f437cbabf1 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -14,6 +14,9 @@
#include "mmu_decl.h"
#include <trace/events/thp.h>
+int (*register_process_table)(unsigned long base, unsigned long page_size,
+ unsigned long tbl_size);
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/*
* This is called when relaxing access to a hugepage. It's also called in the page
@@ -32,11 +35,8 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
#endif
changed = !pmd_same(*(pmdp), entry);
if (changed) {
- __ptep_set_access_flags(pmdp_ptep(pmdp), pmd_pte(entry));
- /*
- * Since we are not supporting SW TLB systems, we don't
- * have any thing similar to flush_tlb_page_nohash()
- */
+ __ptep_set_access_flags(vma->vm_mm, pmdp_ptep(pmdp), pmd_pte(entry));
+ flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
}
return changed;
}
@@ -69,7 +69,7 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp)
{
pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0);
- flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+ flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
/*
* This ensures that generic code that rely on IRQ disabling
* to prevent a parallel THP split work as expected.
@@ -116,3 +116,12 @@ void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
return;
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+/* For use by kexec */
+void mmu_cleanup_all(void)
+{
+ if (radix_enabled())
+ radix__mmu_cleanup_all();
+ else if (mmu_hash_ops.hpte_clear_all)
+ mmu_hash_ops.hpte_clear_all();
+}
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 18b2c11604fa..ed7bddc456b7 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -21,8 +21,11 @@
#include <trace/events/thp.h>
-static int native_update_partition_table(u64 patb1)
+static int native_register_process_table(unsigned long base, unsigned long pg_sz,
+ unsigned long table_size)
{
+ unsigned long patb1 = base | table_size | PATB_GR;
+
partition_tb->patb1 = cpu_to_be64(patb1);
return 0;
}
@@ -160,32 +163,30 @@ redo:
process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT);
/*
* Fill in the process table.
- * we support 52 bits, hence 52-28 = 24, 11000
*/
- rts_field = 3ull << PPC_BITLSHIFT(2);
+ rts_field = radix__get_tree_size();
process_tb->prtb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE);
/*
* Fill in the partition table. We are suppose to use effective address
* of process table here. But our linear mapping also enable us to use
* physical address here.
*/
- ppc_md.update_partition_table(__pa(process_tb) | (PRTB_SIZE_SHIFT - 12) | PATB_GR);
+ register_process_table(__pa(process_tb), 0, PRTB_SIZE_SHIFT - 12);
pr_info("Process table %p and radix root for kernel: %p\n", process_tb, init_mm.pgd);
}
static void __init radix_init_partition_table(void)
{
unsigned long rts_field;
- /*
- * we support 52 bits, hence 52-28 = 24, 11000
- */
- rts_field = 3ull << PPC_BITLSHIFT(2);
+
+ rts_field = radix__get_tree_size();
BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large.");
partition_tb = early_alloc_pgtable(1UL << PATB_SIZE_SHIFT);
partition_tb->patb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) |
RADIX_PGD_INDEX_SIZE | PATB_HR);
- printk("Partition table %p\n", partition_tb);
+ pr_info("Initializing Radix MMU\n");
+ pr_info("Partition table %p\n", partition_tb);
memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
/*
@@ -197,7 +198,7 @@ static void __init radix_init_partition_table(void)
void __init radix_init_native(void)
{
- ppc_md.update_partition_table = native_update_partition_table;
+ register_process_table = native_register_process_table;
}
static int __init get_idx_from_shift(unsigned int shift)
@@ -263,7 +264,7 @@ static int __init radix_dt_scan_page_sizes(unsigned long node,
return 1;
}
-static void __init radix_init_page_sizes(void)
+void __init radix__early_init_devtree(void)
{
int rc;
@@ -293,14 +294,35 @@ found:
return;
}
-void __init radix__early_init_mmu(void)
+static void update_hid_for_radix(void)
{
- unsigned long lpcr;
+ unsigned long hid0;
+ unsigned long rb = 3UL << PPC_BITLSHIFT(53); /* IS = 3 */
+
+ asm volatile("ptesync": : :"memory");
+ /* prs = 0, ric = 2, rs = 0, r = 1 is = 3 */
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(1), "i"(0), "i"(2), "r"(0) : "memory");
+ /* prs = 1, ric = 2, rs = 0, r = 1 is = 3 */
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(1), "i"(1), "i"(2), "r"(0) : "memory");
+ asm volatile("eieio; tlbsync; ptesync; isync; slbia": : :"memory");
/*
- * setup LPCR UPRT based on mmu_features
+ * now switch the HID
*/
- lpcr = mfspr(SPRN_LPCR);
- mtspr(SPRN_LPCR, lpcr | LPCR_UPRT);
+ hid0 = mfspr(SPRN_HID0);
+ hid0 |= HID0_POWER9_RADIX;
+ mtspr(SPRN_HID0, hid0);
+ asm volatile("isync": : :"memory");
+
+ /* Wait for it to happen */
+ while (!(mfspr(SPRN_HID0) & HID0_POWER9_RADIX))
+ cpu_relax();
+}
+
+void __init radix__early_init_mmu(void)
+{
+ unsigned long lpcr;
#ifdef CONFIG_PPC_64K_PAGES
/* PAGE_SIZE mappings */
@@ -336,15 +358,25 @@ void __init radix__early_init_mmu(void)
__vmalloc_end = RADIX_VMALLOC_END;
vmemmap = (struct page *)RADIX_VMEMMAP_BASE;
ioremap_bot = IOREMAP_BASE;
+
+#ifdef CONFIG_PCI
+ pci_io_base = ISA_IO_BASE;
+#endif
+
/*
* For now radix also use the same frag size
*/
__pte_frag_nr = H_PTE_FRAG_NR;
__pte_frag_size_shift = H_PTE_FRAG_SIZE_SHIFT;
- radix_init_page_sizes();
- if (!firmware_has_feature(FW_FEATURE_LPAR))
+ if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+ radix_init_native();
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+ update_hid_for_radix();
+ lpcr = mfspr(SPRN_LPCR);
+ mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
radix_init_partition_table();
+ }
radix_init_pgtable();
}
@@ -353,16 +385,27 @@ void radix__early_init_mmu_secondary(void)
{
unsigned long lpcr;
/*
- * setup LPCR UPRT based on mmu_features
- */
- lpcr = mfspr(SPRN_LPCR);
- mtspr(SPRN_LPCR, lpcr | LPCR_UPRT);
- /*
- * update partition table control register, 64 K size.
+ * update partition table control register and UPRT
*/
- if (!firmware_has_feature(FW_FEATURE_LPAR))
+ if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+ lpcr = mfspr(SPRN_LPCR);
+ mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
+
mtspr(SPRN_PTCR,
__pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
+ }
+}
+
+void radix__mmu_cleanup_all(void)
+{
+ unsigned long lpcr;
+
+ if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+ lpcr = mfspr(SPRN_LPCR);
+ mtspr(SPRN_LPCR, lpcr & ~LPCR_UPRT);
+ mtspr(SPRN_PTCR, 0);
+ radix__flush_tlb_all();
+ }
}
void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base,
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 88a307504b5a..911fdfb63ec1 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -224,8 +224,8 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
if (changed) {
if (!is_vm_hugetlb_page(vma))
assert_pte_locked(vma->vm_mm, address);
- __ptep_set_access_flags(ptep, entry);
- flush_tlb_page_nohash(vma, address);
+ __ptep_set_access_flags(vma->vm_mm, ptep, entry);
+ flush_tlb_page(vma, address);
}
return changed;
}
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index bf7bf32b54f8..0ae0572bc239 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -79,12 +79,12 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
#endif
}
-__init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
{
pte_t *pte;
if (slab_is_available()) {
- pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+ pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
} else {
pte = __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE));
if (pte)
@@ -97,7 +97,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
struct page *ptepage;
- gfp_t flags = GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO;
+ gfp_t flags = GFP_KERNEL | __GFP_ZERO;
ptepage = alloc_pages(flags, 0);
if (!ptepage)
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index e009e0604a8a..f5e8d4edb808 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -350,8 +350,7 @@ static pte_t *get_from_cache(struct mm_struct *mm)
static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel)
{
void *ret = NULL;
- struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK |
- __GFP_REPEAT | __GFP_ZERO);
+ struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
if (!page)
return NULL;
if (!kernel && !pgtable_page_ctor(page)) {
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
index dfdb90cb4403..e2974fcd20f1 100644
--- a/arch/powerpc/mm/slb_low.S
+++ b/arch/powerpc/mm/slb_low.S
@@ -113,7 +113,12 @@ BEGIN_FTR_SECTION
END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
b slb_finish_load_1T
-0:
+0: /*
+ * For userspace addresses, make sure this is region 0.
+ */
+ cmpdi r9, 0
+ bne 8f
+
/* when using slices, we extract the psize off the slice bitmaps
* and then we need to get the sllp encoding off the mmu_psize_defs
* array.
@@ -173,11 +178,9 @@ BEGIN_FTR_SECTION
END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
b slb_finish_load
-8: /* invalid EA */
- li r10,0 /* BAD_VSID */
- li r9,0 /* BAD_VSID */
- li r11,SLB_VSID_USER /* flags don't much matter */
- b slb_finish_load
+8: /* invalid EA - return an error indication */
+ crset 4*cr0+eq /* indicate failure */
+ blr
/*
* Finish loading of an SLB entry and return
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index 0fdaf93a3e09..0e49ec541ab5 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -12,26 +12,30 @@
#include <linux/mm.h>
#include <linux/hugetlb.h>
#include <linux/memblock.h>
+#include <asm/ppc-opcode.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
static DEFINE_RAW_SPINLOCK(native_tlbie_lock);
-static inline void __tlbiel_pid(unsigned long pid, int set)
+#define RIC_FLUSH_TLB 0
+#define RIC_FLUSH_PWC 1
+#define RIC_FLUSH_ALL 2
+
+static inline void __tlbiel_pid(unsigned long pid, int set,
+ unsigned long ric)
{
- unsigned long rb,rs,ric,prs,r;
+ unsigned long rb,rs,prs,r;
rb = PPC_BIT(53); /* IS = 1 */
rb |= set << PPC_BITLSHIFT(51);
rs = ((unsigned long)pid) << PPC_BITLSHIFT(31);
prs = 1; /* process scoped */
r = 1; /* raidx format */
- ric = 2; /* invalidate all the caches */
asm volatile("ptesync": : :"memory");
- asm volatile(".long 0x7c000224 | (%0 << 11) | (%1 << 16) |"
- "(%2 << 17) | (%3 << 18) | (%4 << 21)"
+ asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
asm volatile("ptesync": : :"memory");
}
@@ -39,67 +43,61 @@ static inline void __tlbiel_pid(unsigned long pid, int set)
/*
* We use 128 set in radix mode and 256 set in hpt mode.
*/
-static inline void _tlbiel_pid(unsigned long pid)
+static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
{
int set;
for (set = 0; set < POWER9_TLB_SETS_RADIX ; set++) {
- __tlbiel_pid(pid, set);
+ __tlbiel_pid(pid, set, ric);
}
return;
}
-static inline void _tlbie_pid(unsigned long pid)
+static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
{
- unsigned long rb,rs,ric,prs,r;
+ unsigned long rb,rs,prs,r;
rb = PPC_BIT(53); /* IS = 1 */
rs = pid << PPC_BITLSHIFT(31);
prs = 1; /* process scoped */
r = 1; /* raidx format */
- ric = 2; /* invalidate all the caches */
asm volatile("ptesync": : :"memory");
- asm volatile(".long 0x7c000264 | (%0 << 11) | (%1 << 16) |"
- "(%2 << 17) | (%3 << 18) | (%4 << 21)"
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
static inline void _tlbiel_va(unsigned long va, unsigned long pid,
- unsigned long ap)
+ unsigned long ap, unsigned long ric)
{
- unsigned long rb,rs,ric,prs,r;
+ unsigned long rb,rs,prs,r;
rb = va & ~(PPC_BITMASK(52, 63));
rb |= ap << PPC_BITLSHIFT(58);
rs = pid << PPC_BITLSHIFT(31);
prs = 1; /* process scoped */
r = 1; /* raidx format */
- ric = 0; /* no cluster flush yet */
asm volatile("ptesync": : :"memory");
- asm volatile(".long 0x7c000224 | (%0 << 11) | (%1 << 16) |"
- "(%2 << 17) | (%3 << 18) | (%4 << 21)"
+ asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
asm volatile("ptesync": : :"memory");
}
static inline void _tlbie_va(unsigned long va, unsigned long pid,
- unsigned long ap)
+ unsigned long ap, unsigned long ric)
{
- unsigned long rb,rs,ric,prs,r;
+ unsigned long rb,rs,prs,r;
rb = va & ~(PPC_BITMASK(52, 63));
rb |= ap << PPC_BITLSHIFT(58);
rs = pid << PPC_BITLSHIFT(31);
prs = 1; /* process scoped */
r = 1; /* raidx format */
- ric = 0; /* no cluster flush yet */
asm volatile("ptesync": : :"memory");
- asm volatile(".long 0x7c000264 | (%0 << 11) | (%1 << 16) |"
- "(%2 << 17) | (%3 << 18) | (%4 << 21)"
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
@@ -117,25 +115,41 @@ static inline void _tlbie_va(unsigned long va, unsigned long pid,
*/
void radix__local_flush_tlb_mm(struct mm_struct *mm)
{
- unsigned int pid;
+ unsigned long pid;
preempt_disable();
pid = mm->context.id;
if (pid != MMU_NO_CONTEXT)
- _tlbiel_pid(pid);
+ _tlbiel_pid(pid, RIC_FLUSH_ALL);
preempt_enable();
}
EXPORT_SYMBOL(radix__local_flush_tlb_mm);
-void radix___local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
- unsigned long ap, int nid)
+void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr)
{
- unsigned int pid;
+ unsigned long pid;
+ struct mm_struct *mm = tlb->mm;
+
+ preempt_disable();
+
+ pid = mm->context.id;
+ if (pid != MMU_NO_CONTEXT)
+ _tlbiel_pid(pid, RIC_FLUSH_PWC);
+
+ preempt_enable();
+}
+EXPORT_SYMBOL(radix__local_flush_tlb_pwc);
+
+void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
+ int psize)
+{
+ unsigned long pid;
+ unsigned long ap = mmu_get_ap(psize);
preempt_disable();
pid = mm ? mm->context.id : 0;
if (pid != MMU_NO_CONTEXT)
- _tlbiel_va(vmaddr, pid, ap);
+ _tlbiel_va(vmaddr, pid, ap, RIC_FLUSH_TLB);
preempt_enable();
}
@@ -146,23 +160,43 @@ void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmadd
if (vma && is_vm_hugetlb_page(vma))
return __local_flush_hugetlb_page(vma, vmaddr);
#endif
- radix___local_flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr,
- mmu_get_ap(mmu_virtual_psize), 0);
+ radix__local_flush_tlb_page_psize(vma ? vma->vm_mm : NULL, vmaddr,
+ mmu_virtual_psize);
}
EXPORT_SYMBOL(radix__local_flush_tlb_page);
#ifdef CONFIG_SMP
-static int mm_is_core_local(struct mm_struct *mm)
+void radix__flush_tlb_mm(struct mm_struct *mm)
{
- return cpumask_subset(mm_cpumask(mm),
- topology_sibling_cpumask(smp_processor_id()));
+ unsigned long pid;
+
+ preempt_disable();
+ pid = mm->context.id;
+ if (unlikely(pid == MMU_NO_CONTEXT))
+ goto no_context;
+
+ if (!mm_is_core_local(mm)) {
+ int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
+
+ if (lock_tlbie)
+ raw_spin_lock(&native_tlbie_lock);
+ _tlbie_pid(pid, RIC_FLUSH_ALL);
+ if (lock_tlbie)
+ raw_spin_unlock(&native_tlbie_lock);
+ } else
+ _tlbiel_pid(pid, RIC_FLUSH_ALL);
+no_context:
+ preempt_enable();
}
+EXPORT_SYMBOL(radix__flush_tlb_mm);
-void radix__flush_tlb_mm(struct mm_struct *mm)
+void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr)
{
- unsigned int pid;
+ unsigned long pid;
+ struct mm_struct *mm = tlb->mm;
preempt_disable();
+
pid = mm->context.id;
if (unlikely(pid == MMU_NO_CONTEXT))
goto no_context;
@@ -172,20 +206,21 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
if (lock_tlbie)
raw_spin_lock(&native_tlbie_lock);
- _tlbie_pid(pid);
+ _tlbie_pid(pid, RIC_FLUSH_PWC);
if (lock_tlbie)
raw_spin_unlock(&native_tlbie_lock);
} else
- _tlbiel_pid(pid);
+ _tlbiel_pid(pid, RIC_FLUSH_PWC);
no_context:
preempt_enable();
}
-EXPORT_SYMBOL(radix__flush_tlb_mm);
+EXPORT_SYMBOL(radix__flush_tlb_pwc);
-void radix___flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
- unsigned long ap, int nid)
+void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
+ int psize)
{
- unsigned int pid;
+ unsigned long pid;
+ unsigned long ap = mmu_get_ap(psize);
preempt_disable();
pid = mm ? mm->context.id : 0;
@@ -196,11 +231,11 @@ void radix___flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
if (lock_tlbie)
raw_spin_lock(&native_tlbie_lock);
- _tlbie_va(vmaddr, pid, ap);
+ _tlbie_va(vmaddr, pid, ap, RIC_FLUSH_TLB);
if (lock_tlbie)
raw_spin_unlock(&native_tlbie_lock);
} else
- _tlbiel_va(vmaddr, pid, ap);
+ _tlbiel_va(vmaddr, pid, ap, RIC_FLUSH_TLB);
bail:
preempt_enable();
}
@@ -211,8 +246,8 @@ void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
if (vma && is_vm_hugetlb_page(vma))
return flush_hugetlb_page(vma, vmaddr);
#endif
- radix___flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr,
- mmu_get_ap(mmu_virtual_psize), 0);
+ radix__flush_tlb_page_psize(vma ? vma->vm_mm : NULL, vmaddr,
+ mmu_virtual_psize);
}
EXPORT_SYMBOL(radix__flush_tlb_page);
@@ -224,7 +259,7 @@ void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
if (lock_tlbie)
raw_spin_lock(&native_tlbie_lock);
- _tlbie_pid(0);
+ _tlbie_pid(0, RIC_FLUSH_ALL);
if (lock_tlbie)
raw_spin_unlock(&native_tlbie_lock);
}
@@ -243,9 +278,149 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
}
EXPORT_SYMBOL(radix__flush_tlb_range);
+static int radix_get_mmu_psize(int page_size)
+{
+ int psize;
+
+ if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift))
+ psize = mmu_virtual_psize;
+ else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift))
+ psize = MMU_PAGE_2M;
+ else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift))
+ psize = MMU_PAGE_1G;
+ else
+ return -1;
+ return psize;
+}
void radix__tlb_flush(struct mmu_gather *tlb)
{
+ int psize = 0;
struct mm_struct *mm = tlb->mm;
- radix__flush_tlb_mm(mm);
+ int page_size = tlb->page_size;
+
+ psize = radix_get_mmu_psize(page_size);
+ /*
+ * if page size is not something we understand, do a full mm flush
+ */
+ if (psize != -1 && !tlb->fullmm && !tlb->need_flush_all)
+ radix__flush_tlb_range_psize(mm, tlb->start, tlb->end, psize);
+ else
+ radix__flush_tlb_mm(mm);
+}
+
+#define TLB_FLUSH_ALL -1UL
+/*
+ * Number of pages above which we will do a bcast tlbie. Just a
+ * number at this point copied from x86
+ */
+static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
+
+void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
+ unsigned long end, int psize)
+{
+ unsigned long pid;
+ unsigned long addr;
+ int local = mm_is_core_local(mm);
+ unsigned long ap = mmu_get_ap(psize);
+ int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
+ unsigned long page_size = 1UL << mmu_psize_defs[psize].shift;
+
+
+ preempt_disable();
+ pid = mm ? mm->context.id : 0;
+ if (unlikely(pid == MMU_NO_CONTEXT))
+ goto err_out;
+
+ if (end == TLB_FLUSH_ALL ||
+ (end - start) > tlb_single_page_flush_ceiling * page_size) {
+ if (local)
+ _tlbiel_pid(pid, RIC_FLUSH_TLB);
+ else
+ _tlbie_pid(pid, RIC_FLUSH_TLB);
+ goto err_out;
+ }
+ for (addr = start; addr < end; addr += page_size) {
+
+ if (local)
+ _tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
+ else {
+ if (lock_tlbie)
+ raw_spin_lock(&native_tlbie_lock);
+ _tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
+ if (lock_tlbie)
+ raw_spin_unlock(&native_tlbie_lock);
+ }
+ }
+err_out:
+ preempt_enable();
+}
+
+void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa,
+ unsigned long page_size)
+{
+ unsigned long rb,rs,prs,r;
+ unsigned long ap;
+ unsigned long ric = RIC_FLUSH_TLB;
+
+ ap = mmu_get_ap(radix_get_mmu_psize(page_size));
+ rb = gpa & ~(PPC_BITMASK(52, 63));
+ rb |= ap << PPC_BITLSHIFT(58);
+ rs = lpid & ((1UL << 32) - 1);
+ prs = 0; /* process scoped */
+ r = 1; /* raidx format */
+
+ asm volatile("ptesync": : :"memory");
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+}
+EXPORT_SYMBOL(radix__flush_tlb_lpid_va);
+
+void radix__flush_tlb_lpid(unsigned long lpid)
+{
+ unsigned long rb,rs,prs,r;
+ unsigned long ric = RIC_FLUSH_ALL;
+
+ rb = 0x2 << PPC_BITLSHIFT(53); /* IS = 2 */
+ rs = lpid & ((1UL << 32) - 1);
+ prs = 0; /* partition scoped */
+ r = 1; /* raidx format */
+
+ asm volatile("ptesync": : :"memory");
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+}
+EXPORT_SYMBOL(radix__flush_tlb_lpid);
+
+void radix__flush_pmd_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+ radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M);
+}
+EXPORT_SYMBOL(radix__flush_pmd_tlb_range);
+
+void radix__flush_tlb_all(void)
+{
+ unsigned long rb,prs,r,rs;
+ unsigned long ric = RIC_FLUSH_ALL;
+
+ rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */
+ prs = 0; /* partition scoped */
+ r = 1; /* raidx format */
+ rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */
+
+ asm volatile("ptesync": : :"memory");
+ /*
+ * now flush guest entries by passing PRS = 1 and LPID != 0
+ */
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory");
+ /*
+ * now flush host entires by passing PRS = 0 and LPID == 0
+ */
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory");
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
diff --git a/arch/powerpc/mm/tlb_hash32.c b/arch/powerpc/mm/tlb_hash32.c
index 558e30cce33e..702d7689d714 100644
--- a/arch/powerpc/mm/tlb_hash32.c
+++ b/arch/powerpc/mm/tlb_hash32.c
@@ -49,17 +49,6 @@ void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, unsigned long addr)
EXPORT_SYMBOL(flush_hash_entry);
/*
- * Called by ptep_set_access_flags, must flush on CPUs for which the
- * DSI handler can't just "fixup" the TLB on a write fault
- */
-void flush_tlb_page_nohash(struct vm_area_struct *vma, unsigned long addr)
-{
- if (Hash != 0)
- return;
- _tlbie(addr);
-}
-
-/*
* Called at the end of a mmu_gather operation to make sure the
* TLB flush is completely done.
*/
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index f4668488512c..050badc0ebd3 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -215,12 +215,6 @@ EXPORT_SYMBOL(local_flush_tlb_page);
static DEFINE_RAW_SPINLOCK(tlbivax_lock);
-static int mm_is_core_local(struct mm_struct *mm)
-{
- return cpumask_subset(mm_cpumask(mm),
- topology_sibling_cpumask(smp_processor_id()));
-}
-
struct tlb_flush_param {
unsigned long addr;
unsigned int pid;
OpenPOWER on IntegriCloud