summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r--arch/powerpc/mm/8xx_mmu.c131
-rw-r--r--arch/powerpc/mm/copro_fault.c2
-rw-r--r--arch/powerpc/mm/fault.c2
-rw-r--r--arch/powerpc/mm/hash64_4k.c18
-rw-r--r--arch/powerpc/mm/hash64_64k.c39
-rw-r--r--arch/powerpc/mm/hash_native_64.c50
-rw-r--r--arch/powerpc/mm/hash_utils_64.c189
-rw-r--r--arch/powerpc/mm/hugepage-hash64.c17
-rw-r--r--arch/powerpc/mm/hugetlbpage-hash64.c4
-rw-r--r--arch/powerpc/mm/hugetlbpage.c9
-rw-r--r--arch/powerpc/mm/init_32.c5
-rw-r--r--arch/powerpc/mm/mem.c18
-rw-r--r--arch/powerpc/mm/mmu_context_book3s64.c7
-rw-r--r--arch/powerpc/mm/mmu_decl.h3
-rw-r--r--arch/powerpc/mm/numa.c84
-rw-r--r--arch/powerpc/mm/pgtable-book3s64.c5
-rw-r--r--arch/powerpc/mm/pgtable-radix.c50
-rw-r--r--arch/powerpc/mm/pgtable_32.c4
-rw-r--r--arch/powerpc/mm/pgtable_64.c3
-rw-r--r--arch/powerpc/mm/tlb-radix.c157
20 files changed, 511 insertions, 286 deletions
diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c
index 949100577db5..6c5025e81236 100644
--- a/arch/powerpc/mm/8xx_mmu.c
+++ b/arch/powerpc/mm/8xx_mmu.c
@@ -13,62 +13,115 @@
*/
#include <linux/memblock.h>
+#include <asm/fixmap.h>
+#include <asm/code-patching.h>
#include "mmu_decl.h"
+#define IMMR_SIZE (FIX_IMMR_SIZE << PAGE_SHIFT)
+
extern int __map_without_ltlbs;
+
/*
- * MMU_init_hw does the chip-specific initialization of the MMU hardware.
+ * Return PA for this VA if it is in IMMR area, or 0
*/
-void __init MMU_init_hw(void)
+phys_addr_t v_block_mapped(unsigned long va)
{
- /* Nothing to do for the time being but keep it similar to other PPC */
+ unsigned long p = PHYS_IMMR_BASE;
+
+ if (__map_without_ltlbs)
+ return 0;
+ if (va >= VIRT_IMMR_BASE && va < VIRT_IMMR_BASE + IMMR_SIZE)
+ return p + va - VIRT_IMMR_BASE;
+ return 0;
+}
+
+/*
+ * Return VA for a given PA or 0 if not mapped
+ */
+unsigned long p_block_mapped(phys_addr_t pa)
+{
+ unsigned long p = PHYS_IMMR_BASE;
+
+ if (__map_without_ltlbs)
+ return 0;
+ if (pa >= p && pa < p + IMMR_SIZE)
+ return VIRT_IMMR_BASE + pa - p;
+ return 0;
}
-#define LARGE_PAGE_SIZE_4M (1<<22)
#define LARGE_PAGE_SIZE_8M (1<<23)
-#define LARGE_PAGE_SIZE_64M (1<<26)
-unsigned long __init mmu_mapin_ram(unsigned long top)
+/*
+ * MMU_init_hw does the chip-specific initialization of the MMU hardware.
+ */
+void __init MMU_init_hw(void)
{
- unsigned long v, s, mapped;
- phys_addr_t p;
+ /* PIN up to the 3 first 8Mb after IMMR in DTLB table */
+#ifdef CONFIG_PIN_TLB
+ unsigned long ctr = mfspr(SPRN_MD_CTR) & 0xfe000000;
+ unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY;
+#ifdef CONFIG_PIN_TLB_IMMR
+ int i = 29;
+#else
+ int i = 28;
+#endif
+ unsigned long addr = 0;
+ unsigned long mem = total_lowmem;
+
+ for (; i < 32 && mem >= LARGE_PAGE_SIZE_8M; i++) {
+ mtspr(SPRN_MD_CTR, ctr | (i << 8));
+ mtspr(SPRN_MD_EPN, (unsigned long)__va(addr) | MD_EVALID);
+ mtspr(SPRN_MD_TWC, MD_PS8MEG | MD_SVALID);
+ mtspr(SPRN_MD_RPN, addr | flags | _PAGE_PRESENT);
+ addr += LARGE_PAGE_SIZE_8M;
+ mem -= LARGE_PAGE_SIZE_8M;
+ }
+#endif
+}
- v = KERNELBASE;
- p = 0;
- s = top;
+static void mmu_mapin_immr(void)
+{
+ unsigned long p = PHYS_IMMR_BASE;
+ unsigned long v = VIRT_IMMR_BASE;
+ unsigned long f = pgprot_val(PAGE_KERNEL_NCG);
+ int offset;
- if (__map_without_ltlbs)
- return 0;
+ for (offset = 0; offset < IMMR_SIZE; offset += PAGE_SIZE)
+ map_page(v + offset, p + offset, f);
+}
-#ifdef CONFIG_PPC_4K_PAGES
- while (s >= LARGE_PAGE_SIZE_8M) {
- pmd_t *pmdp;
- unsigned long val = p | MD_PS8MEG;
+/* Address of instructions to patch */
+#ifndef CONFIG_PIN_TLB_IMMR
+extern unsigned int DTLBMiss_jmp;
+#endif
+extern unsigned int DTLBMiss_cmp, FixupDAR_cmp;
- pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v);
- *pmdp++ = __pmd(val);
- *pmdp++ = __pmd(val + LARGE_PAGE_SIZE_4M);
+void mmu_patch_cmp_limit(unsigned int *addr, unsigned long mapped)
+{
+ unsigned int instr = *addr;
- v += LARGE_PAGE_SIZE_8M;
- p += LARGE_PAGE_SIZE_8M;
- s -= LARGE_PAGE_SIZE_8M;
- }
-#else /* CONFIG_PPC_16K_PAGES */
- while (s >= LARGE_PAGE_SIZE_64M) {
- pmd_t *pmdp;
- unsigned long val = p | MD_PS8MEG;
+ instr &= 0xffff0000;
+ instr |= (unsigned long)__va(mapped) >> 16;
+ patch_instruction(addr, instr);
+}
- pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v);
- *pmdp++ = __pmd(val);
+unsigned long __init mmu_mapin_ram(unsigned long top)
+{
+ unsigned long mapped;
- v += LARGE_PAGE_SIZE_64M;
- p += LARGE_PAGE_SIZE_64M;
- s -= LARGE_PAGE_SIZE_64M;
- }
+ if (__map_without_ltlbs) {
+ mapped = 0;
+ mmu_mapin_immr();
+#ifndef CONFIG_PIN_TLB_IMMR
+ patch_instruction(&DTLBMiss_jmp, PPC_INST_NOP);
#endif
+ } else {
+ mapped = top & ~(LARGE_PAGE_SIZE_8M - 1);
+ }
- mapped = top - s;
+ mmu_patch_cmp_limit(&DTLBMiss_cmp, mapped);
+ mmu_patch_cmp_limit(&FixupDAR_cmp, mapped);
/* If the size of RAM is not an exact power of two, we may not
* have covered RAM in its entirety with 8 MiB
@@ -77,7 +130,8 @@ unsigned long __init mmu_mapin_ram(unsigned long top)
* coverage with normal-sized pages (or other reasons) do not
* attempt to allocate outside the allowed range.
*/
- memblock_set_current_limit(mapped);
+ if (mapped)
+ memblock_set_current_limit(mapped);
return mapped;
}
@@ -90,13 +144,8 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base,
*/
BUG_ON(first_memblock_base != 0);
-#ifdef CONFIG_PIN_TLB
/* 8xx can only access 24MB at the moment */
memblock_set_current_limit(min_t(u64, first_memblock_size, 0x01800000));
-#else
- /* 8xx can only access 8MB at the moment */
- memblock_set_current_limit(min_t(u64, first_memblock_size, 0x00800000));
-#endif
}
/*
diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c
index 6527882ce05e..bb0354222b11 100644
--- a/arch/powerpc/mm/copro_fault.c
+++ b/arch/powerpc/mm/copro_fault.c
@@ -75,7 +75,7 @@ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
}
ret = 0;
- *flt = handle_mm_fault(mm, vma, ea, is_write ? FAULT_FLAG_WRITE : 0);
+ *flt = handle_mm_fault(vma, ea, is_write ? FAULT_FLAG_WRITE : 0);
if (unlikely(*flt & VM_FAULT_ERROR)) {
if (*flt & VM_FAULT_OOM) {
ret = -ENOMEM;
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index a67c6d781c52..a4db22f65021 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -429,7 +429,7 @@ good_area:
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
- fault = handle_mm_fault(mm, vma, address, flags);
+ fault = handle_mm_fault(vma, address, flags);
if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) {
if (fault & VM_FAULT_SIGSEGV)
goto bad_area;
diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c
index 6333b273d2d5..42c702b3be1f 100644
--- a/arch/powerpc/mm/hash64_4k.c
+++ b/arch/powerpc/mm/hash64_4k.c
@@ -70,8 +70,8 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT;
- if (ppc_md.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_4K,
- MMU_PAGE_4K, ssize, flags) == -1)
+ if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_4K,
+ MMU_PAGE_4K, ssize, flags) == -1)
old_pte &= ~_PAGE_HPTEFLAGS;
}
@@ -84,21 +84,23 @@ repeat:
hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
/* Insert into the hash table, primary slot */
- slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0,
- MMU_PAGE_4K, MMU_PAGE_4K, ssize);
+ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0,
+ MMU_PAGE_4K, MMU_PAGE_4K, ssize);
/*
* Primary is full, try the secondary
*/
if (unlikely(slot == -1)) {
hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
- slot = ppc_md.hpte_insert(hpte_group, vpn, pa,
- rflags, HPTE_V_SECONDARY,
- MMU_PAGE_4K, MMU_PAGE_4K, ssize);
+ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa,
+ rflags,
+ HPTE_V_SECONDARY,
+ MMU_PAGE_4K,
+ MMU_PAGE_4K, ssize);
if (slot == -1) {
if (mftb() & 0x1)
hpte_group = ((hash & htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
- ppc_md.hpte_remove(hpte_group);
+ mmu_hash_ops.hpte_remove(hpte_group);
/*
* FIXME!! Should be try the group from which we removed ?
*/
diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
index 16644e1f4e6b..3bbbea07378c 100644
--- a/arch/powerpc/mm/hash64_64k.c
+++ b/arch/powerpc/mm/hash64_64k.c
@@ -133,9 +133,9 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
slot += hidx & _PTEIDX_GROUP_IX;
- ret = ppc_md.hpte_updatepp(slot, rflags, vpn,
- MMU_PAGE_4K, MMU_PAGE_4K,
- ssize, flags);
+ ret = mmu_hash_ops.hpte_updatepp(slot, rflags, vpn,
+ MMU_PAGE_4K, MMU_PAGE_4K,
+ ssize, flags);
/*
*if we failed because typically the HPTE wasn't really here
* we try an insertion.
@@ -166,21 +166,22 @@ repeat:
hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
/* Insert into the hash table, primary slot */
- slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0,
- MMU_PAGE_4K, MMU_PAGE_4K, ssize);
+ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0,
+ MMU_PAGE_4K, MMU_PAGE_4K, ssize);
/*
* Primary is full, try the secondary
*/
if (unlikely(slot == -1)) {
hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
- slot = ppc_md.hpte_insert(hpte_group, vpn, pa,
- rflags, HPTE_V_SECONDARY,
- MMU_PAGE_4K, MMU_PAGE_4K, ssize);
+ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa,
+ rflags, HPTE_V_SECONDARY,
+ MMU_PAGE_4K, MMU_PAGE_4K,
+ ssize);
if (slot == -1) {
if (mftb() & 0x1)
hpte_group = ((hash & htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
- ppc_md.hpte_remove(hpte_group);
+ mmu_hash_ops.hpte_remove(hpte_group);
/*
* FIXME!! Should be try the group from which we removed ?
*/
@@ -272,8 +273,9 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT;
- if (ppc_md.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_64K,
- MMU_PAGE_64K, ssize, flags) == -1)
+ if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_64K,
+ MMU_PAGE_64K, ssize,
+ flags) == -1)
old_pte &= ~_PAGE_HPTEFLAGS;
}
@@ -286,21 +288,24 @@ repeat:
hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
/* Insert into the hash table, primary slot */
- slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0,
- MMU_PAGE_64K, MMU_PAGE_64K, ssize);
+ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0,
+ MMU_PAGE_64K, MMU_PAGE_64K,
+ ssize);
/*
* Primary is full, try the secondary
*/
if (unlikely(slot == -1)) {
hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
- slot = ppc_md.hpte_insert(hpte_group, vpn, pa,
- rflags, HPTE_V_SECONDARY,
- MMU_PAGE_64K, MMU_PAGE_64K, ssize);
+ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa,
+ rflags,
+ HPTE_V_SECONDARY,
+ MMU_PAGE_64K,
+ MMU_PAGE_64K, ssize);
if (slot == -1) {
if (mftb() & 0x1)
hpte_group = ((hash & htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
- ppc_md.hpte_remove(hpte_group);
+ mmu_hash_ops.hpte_remove(hpte_group);
/*
* FIXME!! Should be try the group from which we removed ?
*/
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index d873f6507f72..88ce7d212320 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -55,7 +55,7 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
* We need 14 to 65 bits of va for a tlibe of 4K page
* With vpn we ignore the lower VPN_SHIFT bits already.
* And top two bits are already ignored because we can
- * only accomadate 76 bits in a 64 bit vpn with a VPN_SHIFT
+ * only accomodate 76 bits in a 64 bit vpn with a VPN_SHIFT
* of 12.
*/
va = vpn << VPN_SHIFT;
@@ -64,7 +64,8 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
* Older versions of the architecture (2.02 and earler) require the
* masking of the top 16 bits.
*/
- va &= ~(0xffffULL << 48);
+ if (mmu_has_feature(MMU_FTR_TLBIE_CROP_VA))
+ va &= ~(0xffffULL << 48);
switch (psize) {
case MMU_PAGE_4K:
@@ -113,7 +114,8 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
* Older versions of the architecture (2.02 and earler) require the
* masking of the top 16 bits.
*/
- va &= ~(0xffffULL << 48);
+ if (mmu_has_feature(MMU_FTR_TLBIE_CROP_VA))
+ va &= ~(0xffffULL << 48);
switch (psize) {
case MMU_PAGE_4K:
@@ -316,8 +318,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
DBG_LOW(" -> hit\n");
/* Update the HPTE */
hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) &
- ~(HPTE_R_PP | HPTE_R_N)) |
- (newpp & (HPTE_R_PP | HPTE_R_N |
+ ~(HPTE_R_PPP | HPTE_R_N)) |
+ (newpp & (HPTE_R_PPP | HPTE_R_N |
HPTE_R_C)));
}
native_unlock_hpte(hptep);
@@ -385,8 +387,8 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
/* Update the HPTE */
hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) &
- ~(HPTE_R_PP | HPTE_R_N)) |
- (newpp & (HPTE_R_PP | HPTE_R_N)));
+ ~(HPTE_R_PPP | HPTE_R_N)) |
+ (newpp & (HPTE_R_PPP | HPTE_R_N)));
/*
* Ensure it is out of the tlb too. Bolted entries base and
* actual page size will be same.
@@ -550,7 +552,11 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
}
}
/* This works for all page sizes, and for 256M and 1T segments */
- *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ *ssize = hpte_r >> HPTE_R_3_0_SSIZE_SHIFT;
+ else
+ *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
+
shift = mmu_psize_defs[size].shift;
avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm);
@@ -601,7 +607,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
* crashdump and all bets are off anyway.
*
* TODO: add batching support when enabled. remember, no dynamic memory here,
- * athough there is the control page available...
+ * although there is the control page available...
*/
static void native_hpte_clear(void)
{
@@ -719,23 +725,29 @@ static void native_flush_hash_range(unsigned long number, int local)
local_irq_restore(flags);
}
-static int native_update_partition_table(u64 patb1)
+static int native_register_proc_table(unsigned long base, unsigned long page_size,
+ unsigned long table_size)
{
+ unsigned long patb1 = base << 25; /* VSID */
+
+ patb1 |= (page_size << 5); /* sllp */
+ patb1 |= table_size;
+
partition_tb->patb1 = cpu_to_be64(patb1);
return 0;
}
void __init hpte_init_native(void)
{
- ppc_md.hpte_invalidate = native_hpte_invalidate;
- ppc_md.hpte_updatepp = native_hpte_updatepp;
- ppc_md.hpte_updateboltedpp = native_hpte_updateboltedpp;
- ppc_md.hpte_insert = native_hpte_insert;
- ppc_md.hpte_remove = native_hpte_remove;
- ppc_md.hpte_clear_all = native_hpte_clear;
- ppc_md.flush_hash_range = native_flush_hash_range;
- ppc_md.hugepage_invalidate = native_hugepage_invalidate;
+ mmu_hash_ops.hpte_invalidate = native_hpte_invalidate;
+ mmu_hash_ops.hpte_updatepp = native_hpte_updatepp;
+ mmu_hash_ops.hpte_updateboltedpp = native_hpte_updateboltedpp;
+ mmu_hash_ops.hpte_insert = native_hpte_insert;
+ mmu_hash_ops.hpte_remove = native_hpte_remove;
+ mmu_hash_ops.hpte_clear_all = native_hpte_clear;
+ mmu_hash_ops.flush_hash_range = native_flush_hash_range;
+ mmu_hash_ops.hugepage_invalidate = native_hugepage_invalidate;
if (cpu_has_feature(CPU_FTR_ARCH_300))
- ppc_md.update_partition_table = native_update_partition_table;
+ ppc_md.register_process_table = native_register_proc_table;
}
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 59268969a0bc..b78b5d211278 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -34,6 +34,7 @@
#include <linux/signal.h>
#include <linux/memblock.h>
#include <linux/context_tracking.h>
+#include <linux/libfdt.h>
#include <asm/processor.h>
#include <asm/pgtable.h>
@@ -58,6 +59,7 @@
#include <asm/firmware.h>
#include <asm/tm.h>
#include <asm/trace.h>
+#include <asm/ps3.h>
#ifdef DEBUG
#define DBG(fmt...) udbg_printf(fmt)
@@ -87,10 +89,6 @@
*
*/
-#ifdef CONFIG_U3_DART
-extern unsigned long dart_tablebase;
-#endif /* CONFIG_U3_DART */
-
static unsigned long _SDR1;
struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
EXPORT_SYMBOL_GPL(mmu_psize_defs);
@@ -120,6 +118,8 @@ static u8 *linear_map_hash_slots;
static unsigned long linear_map_hash_count;
static DEFINE_SPINLOCK(linear_map_hash_lock);
#endif /* CONFIG_DEBUG_PAGEALLOC */
+struct mmu_hash_ops mmu_hash_ops;
+EXPORT_SYMBOL(mmu_hash_ops);
/* There are definitions of page sizes arrays to be used when none
* is provided by the firmware.
@@ -159,6 +159,19 @@ static struct mmu_psize_def mmu_psize_defaults_gp[] = {
},
};
+/*
+ * 'R' and 'C' update notes:
+ * - Under pHyp or KVM, the updatepp path will not set C, thus it *will*
+ * create writeable HPTEs without C set, because the hcall H_PROTECT
+ * that we use in that case will not update C
+ * - The above is however not a problem, because we also don't do that
+ * fancy "no flush" variant of eviction and we use H_REMOVE which will
+ * do the right thing and thus we don't have the race I described earlier
+ *
+ * - Under bare metal, we do have the race, so we need R and C set
+ * - We make sure R is always set and never lost
+ * - C is _PAGE_DIRTY, and *should* always be set for a writeable mapping
+ */
unsigned long htab_convert_pte_flags(unsigned long pteflags)
{
unsigned long rflags = 0;
@@ -186,19 +199,28 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags)
rflags |= 0x1;
}
/*
- * Always add "C" bit for perf. Memory coherence is always enabled
+ * We can't allow hardware to update hpte bits. Hence always
+ * set 'R' bit and set 'C' if it is a write fault
*/
- rflags |= HPTE_R_C | HPTE_R_M;
+ rflags |= HPTE_R_R;
+
+ if (pteflags & _PAGE_DIRTY)
+ rflags |= HPTE_R_C;
/*
* Add in WIG bits
*/
if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_TOLERANT)
rflags |= HPTE_R_I;
- if ((pteflags & _PAGE_CACHE_CTL ) == _PAGE_NON_IDEMPOTENT)
+ else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_NON_IDEMPOTENT)
rflags |= (HPTE_R_I | HPTE_R_G);
- if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_SAO)
- rflags |= (HPTE_R_I | HPTE_R_W);
+ else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_SAO)
+ rflags |= (HPTE_R_W | HPTE_R_I | HPTE_R_M);
+ else
+ /*
+ * Add memory coherence if cache inhibited is not set
+ */
+ rflags |= HPTE_R_M;
return rflags;
}
@@ -256,9 +278,10 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
hash = hpt_hash(vpn, shift, ssize);
hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
- BUG_ON(!ppc_md.hpte_insert);
- ret = ppc_md.hpte_insert(hpteg, vpn, paddr, tprot,
- HPTE_V_BOLTED, psize, psize, ssize);
+ BUG_ON(!mmu_hash_ops.hpte_insert);
+ ret = mmu_hash_ops.hpte_insert(hpteg, vpn, paddr, tprot,
+ HPTE_V_BOLTED, psize, psize,
+ ssize);
if (ret < 0)
break;
@@ -283,11 +306,11 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend,
shift = mmu_psize_defs[psize].shift;
step = 1 << shift;
- if (!ppc_md.hpte_removebolted)
+ if (!mmu_hash_ops.hpte_removebolted)
return -ENODEV;
for (vaddr = vstart; vaddr < vend; vaddr += step) {
- rc = ppc_md.hpte_removebolted(vaddr, psize, ssize);
+ rc = mmu_hash_ops.hpte_removebolted(vaddr, psize, ssize);
if (rc == -ENOENT) {
ret = -ENOENT;
continue;
@@ -299,6 +322,15 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend,
return ret;
}
+static bool disable_1tb_segments = false;
+
+static int __init parse_disable_1tb_segments(char *p)
+{
+ disable_1tb_segments = true;
+ return 0;
+}
+early_param("disable_1tb_segments", parse_disable_1tb_segments);
+
static int __init htab_dt_scan_seg_sizes(unsigned long node,
const char *uname, int depth,
void *data)
@@ -317,6 +349,12 @@ static int __init htab_dt_scan_seg_sizes(unsigned long node,
for (; size >= 4; size -= 4, ++prop) {
if (be32_to_cpu(prop[0]) == 40) {
DBG("1T segment support detected\n");
+
+ if (disable_1tb_segments) {
+ DBG("1T segments disabled by command line\n");
+ break;
+ }
+
cur_cpu_spec->mmu_features |= MMU_FTR_1T_SEGMENT;
return 1;
}
@@ -492,7 +530,8 @@ static bool might_have_hea(void)
* we will never see an HEA ethernet device.
*/
#ifdef CONFIG_IBMEBUS
- return !cpu_has_feature(CPU_FTR_ARCH_207S);
+ return !cpu_has_feature(CPU_FTR_ARCH_207S) &&
+ !firmware_has_feature(FW_FEATURE_SPLPAR);
#else
return false;
#endif
@@ -558,7 +597,7 @@ found:
* would stop us accessing the HEA ethernet. So if we
* have the chance of ever seeing one, stay at 4k.
*/
- if (!might_have_hea() || !machine_is(pseries))
+ if (!might_have_hea())
mmu_io_psize = MMU_PAGE_64K;
} else
mmu_ci_restrictions = 1;
@@ -677,10 +716,9 @@ int remove_section_mapping(unsigned long start, unsigned long end)
#endif /* CONFIG_MEMORY_HOTPLUG */
static void __init hash_init_partition_table(phys_addr_t hash_table,
- unsigned long pteg_count)
+ unsigned long htab_size)
{
unsigned long ps_field;
- unsigned long htab_size;
unsigned long patb_size = 1UL << PATB_SIZE_SHIFT;
/*
@@ -688,7 +726,7 @@ static void __init hash_init_partition_table(phys_addr_t hash_table,
* We can ignore that for lpid 0
*/
ps_field = 0;
- htab_size = __ilog2(pteg_count) - 11;
+ htab_size = __ilog2(htab_size) - 18;
BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large.");
partition_tb = __va(memblock_alloc_base(patb_size, patb_size,
@@ -702,7 +740,7 @@ static void __init hash_init_partition_table(phys_addr_t hash_table,
* For now UPRT is 0 for us.
*/
partition_tb->patb1 = 0;
- DBG("Partition table %p\n", partition_tb);
+ pr_info("Partition table %p\n", partition_tb);
/*
* update partition table control register,
* 64 K size.
@@ -716,7 +754,7 @@ static void __init htab_initialize(void)
unsigned long table;
unsigned long pteg_count;
unsigned long prot;
- unsigned long base = 0, size = 0, limit;
+ unsigned long base = 0, size = 0;
struct memblock_region *reg;
DBG(" -> htab_initialize()\n");
@@ -742,7 +780,8 @@ static void __init htab_initialize(void)
htab_hash_mask = pteg_count - 1;
- if (firmware_has_feature(FW_FEATURE_LPAR)) {
+ if (firmware_has_feature(FW_FEATURE_LPAR) ||
+ firmware_has_feature(FW_FEATURE_PS3_LV1)) {
/* Using a hypervisor which owns the htab */
htab_address = NULL;
_SDR1 = 0;
@@ -753,20 +792,26 @@ static void __init htab_initialize(void)
* Clear the htab if firmware assisted dump is active so
* that we dont end up using old mappings.
*/
- if (is_fadump_active() && ppc_md.hpte_clear_all)
- ppc_md.hpte_clear_all();
+ if (is_fadump_active() && mmu_hash_ops.hpte_clear_all)
+ mmu_hash_ops.hpte_clear_all();
#endif
} else {
- /* Find storage for the HPT. Must be contiguous in
- * the absolute address space. On cell we want it to be
- * in the first 2 Gig so we can use it for IOMMU hacks.
+ unsigned long limit = MEMBLOCK_ALLOC_ANYWHERE;
+
+#ifdef CONFIG_PPC_CELL
+ /*
+ * Cell may require the hash table down low when using the
+ * Axon IOMMU in order to fit the dynamic region over it, see
+ * comments in cell/iommu.c
*/
- if (machine_is(cell))
+ if (fdt_subnode_offset(initial_boot_params, 0, "axon") > 0) {
limit = 0x80000000;
- else
- limit = MEMBLOCK_ALLOC_ANYWHERE;
+ pr_info("Hash table forced below 2G for Axon IOMMU\n");
+ }
+#endif /* CONFIG_PPC_CELL */
- table = memblock_alloc_base(htab_size_bytes, htab_size_bytes, limit);
+ table = memblock_alloc_base(htab_size_bytes, htab_size_bytes,
+ limit);
DBG("Hash table allocated at %lx, size: %lx\n", table,
htab_size_bytes);
@@ -774,7 +819,7 @@ static void __init htab_initialize(void)
htab_address = __va(table);
/* htab absolute addr + encoded htabsize */
- _SDR1 = table + __ilog2(pteg_count) - 11;
+ _SDR1 = table + __ilog2(htab_size_bytes) - 18;
/* Initialize the HPT with no entries */
memset((void *)table, 0, htab_size_bytes);
@@ -783,7 +828,7 @@ static void __init htab_initialize(void)
/* Set SDR1 */
mtspr(SPRN_SDR1, _SDR1);
else
- hash_init_partition_table(table, pteg_count);
+ hash_init_partition_table(table, htab_size_bytes);
}
prot = pgprot_val(PAGE_KERNEL);
@@ -810,34 +855,6 @@ static void __init htab_initialize(void)
DBG("creating mapping for region: %lx..%lx (prot: %lx)\n",
base, size, prot);
-#ifdef CONFIG_U3_DART
- /* Do not map the DART space. Fortunately, it will be aligned
- * in such a way that it will not cross two memblock regions and
- * will fit within a single 16Mb page.
- * The DART space is assumed to be a full 16Mb region even if
- * we only use 2Mb of that space. We will use more of it later
- * for AGP GART. We have to use a full 16Mb large page.
- */
- DBG("DART base: %lx\n", dart_tablebase);
-
- if (dart_tablebase != 0 && dart_tablebase >= base
- && dart_tablebase < (base + size)) {
- unsigned long dart_table_end = dart_tablebase + 16 * MB;
- if (base != dart_tablebase)
- BUG_ON(htab_bolt_mapping(base, dart_tablebase,
- __pa(base), prot,
- mmu_linear_psize,
- mmu_kernel_ssize));
- if ((base + size) > dart_table_end)
- BUG_ON(htab_bolt_mapping(dart_tablebase+16*MB,
- base + size,
- __pa(dart_table_end),
- prot,
- mmu_linear_psize,
- mmu_kernel_ssize));
- continue;
- }
-#endif /* CONFIG_U3_DART */
BUG_ON(htab_bolt_mapping(base, base + size, __pa(base),
prot, mmu_linear_psize, mmu_kernel_ssize));
}
@@ -900,12 +917,28 @@ void __init hash__early_init_mmu(void)
vmemmap = (struct page *)H_VMEMMAP_BASE;
ioremap_bot = IOREMAP_BASE;
+#ifdef CONFIG_PCI
+ pci_io_base = ISA_IO_BASE;
+#endif
+
+ /* Select appropriate backend */
+ if (firmware_has_feature(FW_FEATURE_PS3_LV1))
+ ps3_early_mm_init();
+ else if (firmware_has_feature(FW_FEATURE_LPAR))
+ hpte_init_pseries();
+ else if (IS_ENABLED(CONFIG_PPC_NATIVE))
+ hpte_init_native();
+
+ if (!mmu_hash_ops.hpte_insert)
+ panic("hash__early_init_mmu: No MMU hash ops defined!\n");
+
/* Initialize the MMU Hash table and create the linear mapping
* of memory. Has to be done before SLB initialization as this is
* currently where the page size encoding is obtained.
*/
htab_initialize();
+ pr_info("Initializing hash mmu with SLB\n");
/* Initialize SLB management */
slb_initialize();
}
@@ -1448,7 +1481,8 @@ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize,
* We use same base page size and actual psize, because we don't
* use these functions for hugepage
*/
- ppc_md.hpte_invalidate(slot, vpn, psize, psize, ssize, local);
+ mmu_hash_ops.hpte_invalidate(slot, vpn, psize, psize,
+ ssize, local);
} pte_iterate_hashed_end();
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -1489,9 +1523,9 @@ void flush_hash_hugepage(unsigned long vsid, unsigned long addr,
if (!hpte_slot_array)
return;
- if (ppc_md.hugepage_invalidate) {
- ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array,
- psize, ssize, local);
+ if (mmu_hash_ops.hugepage_invalidate) {
+ mmu_hash_ops.hugepage_invalidate(vsid, s_addr, hpte_slot_array,
+ psize, ssize, local);
goto tm_abort;
}
/*
@@ -1518,8 +1552,8 @@ void flush_hash_hugepage(unsigned long vsid, unsigned long addr,
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
slot += hidx & _PTEIDX_GROUP_IX;
- ppc_md.hpte_invalidate(slot, vpn, psize,
- MMU_PAGE_16M, ssize, local);
+ mmu_hash_ops.hpte_invalidate(slot, vpn, psize,
+ MMU_PAGE_16M, ssize, local);
}
tm_abort:
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -1543,8 +1577,8 @@ tm_abort:
void flush_hash_range(unsigned long number, int local)
{
- if (ppc_md.flush_hash_range)
- ppc_md.flush_hash_range(number, local);
+ if (mmu_hash_ops.flush_hash_range)
+ mmu_hash_ops.flush_hash_range(number, local);
else {
int i;
struct ppc64_tlb_batch *batch =
@@ -1589,22 +1623,22 @@ repeat:
HPTES_PER_GROUP) & ~0x7UL;
/* Insert into the hash table, primary slot */
- slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, vflags,
- psize, psize, ssize);
+ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, vflags,
+ psize, psize, ssize);
/* Primary is full, try the secondary */
if (unlikely(slot == -1)) {
hpte_group = ((~hash & htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
- slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags,
- vflags | HPTE_V_SECONDARY,
- psize, psize, ssize);
+ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags,
+ vflags | HPTE_V_SECONDARY,
+ psize, psize, ssize);
if (slot == -1) {
if (mftb() & 0x1)
hpte_group = ((hash & htab_hash_mask) *
HPTES_PER_GROUP)&~0x7UL;
- ppc_md.hpte_remove(hpte_group);
+ mmu_hash_ops.hpte_remove(hpte_group);
goto repeat;
}
}
@@ -1654,8 +1688,9 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi)
hash = ~hash;
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
slot += hidx & _PTEIDX_GROUP_IX;
- ppc_md.hpte_invalidate(slot, vpn, mmu_linear_psize, mmu_linear_psize,
- mmu_kernel_ssize, 0);
+ mmu_hash_ops.hpte_invalidate(slot, vpn, mmu_linear_psize,
+ mmu_linear_psize,
+ mmu_kernel_ssize, 0);
}
void __kernel_map_pages(struct page *page, int numpages, int enable)
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
index ba3fc229468a..f20d16f849c5 100644
--- a/arch/powerpc/mm/hugepage-hash64.c
+++ b/arch/powerpc/mm/hugepage-hash64.c
@@ -103,8 +103,8 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
slot += hidx & _PTEIDX_GROUP_IX;
- ret = ppc_md.hpte_updatepp(slot, rflags, vpn,
- psize, lpsize, ssize, flags);
+ ret = mmu_hash_ops.hpte_updatepp(slot, rflags, vpn,
+ psize, lpsize, ssize, flags);
/*
* We failed to update, try to insert a new entry.
*/
@@ -131,23 +131,24 @@ repeat:
hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
/* Insert into the hash table, primary slot */
- slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0,
- psize, lpsize, ssize);
+ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0,
+ psize, lpsize, ssize);
/*
* Primary is full, try the secondary
*/
if (unlikely(slot == -1)) {
hpte_group = ((~hash & htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
- slot = ppc_md.hpte_insert(hpte_group, vpn, pa,
- rflags, HPTE_V_SECONDARY,
- psize, lpsize, ssize);
+ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa,
+ rflags,
+ HPTE_V_SECONDARY,
+ psize, lpsize, ssize);
if (slot == -1) {
if (mftb() & 0x1)
hpte_group = ((hash & htab_hash_mask) *
HPTES_PER_GROUP) & ~0x7UL;
- ppc_md.hpte_remove(hpte_group);
+ mmu_hash_ops.hpte_remove(hpte_group);
goto repeat;
}
}
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index 3058560b6121..d5026f3800b6 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -79,8 +79,8 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT;
- if (ppc_md.hpte_updatepp(slot, rflags, vpn, mmu_psize,
- mmu_psize, ssize, flags) == -1)
+ if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, mmu_psize,
+ mmu_psize, ssize, flags) == -1)
old_pte &= ~_PAGE_HPTEFLAGS;
}
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 5aac1a3f86cd..7372ee13eb1e 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -73,7 +73,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
cachep = PGT_CACHE(pdshift - pshift);
#endif
- new = kmem_cache_zalloc(cachep, GFP_KERNEL|__GFP_REPEAT);
+ new = kmem_cache_zalloc(cachep, GFP_KERNEL);
BUG_ON(pshift > HUGEPD_SHIFT_MASK);
BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK);
@@ -81,6 +81,13 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
if (! new)
return -ENOMEM;
+ /*
+ * Make sure other cpus find the hugepd set only after a
+ * properly initialized page table is visible to them.
+ * For more details look for comment in __pte_alloc().
+ */
+ smp_wmb();
+
spin_lock(&mm->page_table_lock);
#ifdef CONFIG_PPC_FSL_BOOK3E
/*
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index c899fe340bbd..448685fbf27c 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -64,7 +64,7 @@ EXPORT_SYMBOL(memstart_addr);
phys_addr_t kernstart_addr;
EXPORT_SYMBOL(kernstart_addr);
-#ifdef CONFIG_RELOCATABLE_PPC32
+#ifdef CONFIG_RELOCATABLE
/* Used in __va()/__pa() */
long long virt_phys_offset;
EXPORT_SYMBOL(virt_phys_offset);
@@ -80,9 +80,6 @@ EXPORT_SYMBOL(agp_special_page);
void MMU_init(void);
-/* XXX should be in current.h -- paulus */
-extern struct task_struct *current_set[NR_CPUS];
-
/*
* this tells the system to map all of ram with the segregs
* (i.e. page tables) instead of the bats.
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 2fd57fa48429..5f844337de21 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -116,6 +116,16 @@ int memory_add_physaddr_to_nid(u64 start)
}
#endif
+int __weak create_section_mapping(unsigned long start, unsigned long end)
+{
+ return -ENODEV;
+}
+
+int __weak remove_section_mapping(unsigned long start, unsigned long end)
+{
+ return -ENODEV;
+}
+
int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
{
struct pglist_data *pgdata;
@@ -239,8 +249,14 @@ static int __init mark_nonram_nosave(void)
static bool zone_limits_final;
+/*
+ * The memory zones past TOP_ZONE are managed by generic mm code.
+ * These should be set to zero since that's what every other
+ * architecture does.
+ */
static unsigned long max_zone_pfns[MAX_NR_ZONES] = {
- [0 ... MAX_NR_ZONES - 1] = ~0UL
+ [0 ... TOP_ZONE ] = ~0UL,
+ [TOP_ZONE + 1 ... MAX_NR_ZONES - 1] = 0
};
/*
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c
index 227b2a6c4544..b114f8b93ec9 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/mmu_context_book3s64.c
@@ -65,7 +65,7 @@ static int radix__init_new_context(struct mm_struct *mm, int index)
/*
* set the process table entry,
*/
- rts_field = 3ull << PPC_BITLSHIFT(2);
+ rts_field = radix__get_tree_size();
process_tb[index].prtb0 = cpu_to_be64(rts_field | __pa(mm->pgd) | RADIX_PGD_INDEX_SIZE);
return 0;
}
@@ -181,7 +181,10 @@ void destroy_context(struct mm_struct *mm)
#ifdef CONFIG_PPC_RADIX_MMU
void radix__switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
{
- mtspr(SPRN_PID, next->context.id);
asm volatile("isync": : :"memory");
+ mtspr(SPRN_PID, next->context.id);
+ asm volatile("isync \n"
+ PPC_SLBIA(0x7)
+ : : :"memory");
}
#endif
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 6af65327c993..f988db655e5b 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -154,9 +154,10 @@ struct tlbcam {
};
#endif
-#if defined(CONFIG_6xx) || defined(CONFIG_FSL_BOOKE)
+#if defined(CONFIG_6xx) || defined(CONFIG_FSL_BOOKE) || defined(CONFIG_PPC_8xx)
/* 6xx have BATS */
/* FSL_BOOKE have TLBCAM */
+/* 8xx have LTLB */
phys_addr_t v_block_mapped(unsigned long va);
unsigned long p_block_mapped(phys_addr_t pa);
#else
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 669a15e7fa76..75b9cd6150cc 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -581,30 +581,22 @@ static void verify_cpu_node_mapping(int cpu, int node)
}
}
-static int cpu_numa_callback(struct notifier_block *nfb, unsigned long action,
- void *hcpu)
+/* Must run before sched domains notifier. */
+static int ppc_numa_cpu_prepare(unsigned int cpu)
{
- unsigned long lcpu = (unsigned long)hcpu;
- int ret = NOTIFY_DONE, nid;
+ int nid;
- switch (action) {
- case CPU_UP_PREPARE:
- case CPU_UP_PREPARE_FROZEN:
- nid = numa_setup_cpu(lcpu);
- verify_cpu_node_mapping((int)lcpu, nid);
- ret = NOTIFY_OK;
- break;
+ nid = numa_setup_cpu(cpu);
+ verify_cpu_node_mapping(cpu, nid);
+ return 0;
+}
+
+static int ppc_numa_cpu_dead(unsigned int cpu)
+{
#ifdef CONFIG_HOTPLUG_CPU
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- case CPU_UP_CANCELED:
- case CPU_UP_CANCELED_FROZEN:
- unmap_cpu_from_node(lcpu);
- ret = NOTIFY_OK;
- break;
+ unmap_cpu_from_node(cpu);
#endif
- }
- return ret;
+ return 0;
}
/*
@@ -913,11 +905,6 @@ static void __init dump_numa_memory_topology(void)
}
}
-static struct notifier_block ppc64_numa_nb = {
- .notifier_call = cpu_numa_callback,
- .priority = 1 /* Must run before sched domains notifier. */
-};
-
/* Initialize NODE_DATA for a node on the local memory */
static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
{
@@ -985,15 +972,18 @@ void __init initmem_init(void)
setup_node_to_cpumask_map();
reset_numa_cpu_lookup_table();
- register_cpu_notifier(&ppc64_numa_nb);
+
/*
* We need the numa_cpu_lookup_table to be accurate for all CPUs,
* even before we online them, so that we can use cpu_to_{node,mem}
* early in boot, cf. smp_prepare_cpus().
+ * _nocalls() + manual invocation is used because cpuhp is not yet
+ * initialized for the boot CPU.
*/
- for_each_present_cpu(cpu) {
- numa_setup_cpu((unsigned long)cpu);
- }
+ cpuhp_setup_state_nocalls(CPUHP_POWER_NUMA_PREPARE, "POWER_NUMA_PREPARE",
+ ppc_numa_cpu_prepare, ppc_numa_cpu_dead);
+ for_each_present_cpu(cpu)
+ numa_setup_cpu(cpu);
}
static int __init early_numa(char *p)
@@ -1163,18 +1153,34 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
static u64 hot_add_drconf_memory_max(void)
{
- struct device_node *memory = NULL;
- unsigned int drconf_cell_cnt = 0;
- u64 lmb_size = 0;
+ struct device_node *memory = NULL;
+ struct device_node *dn = NULL;
+ unsigned int drconf_cell_cnt = 0;
+ u64 lmb_size = 0;
const __be32 *dm = NULL;
+ const __be64 *lrdr = NULL;
+ struct of_drconf_cell drmem;
+
+ dn = of_find_node_by_path("/rtas");
+ if (dn) {
+ lrdr = of_get_property(dn, "ibm,lrdr-capacity", NULL);
+ of_node_put(dn);
+ if (lrdr)
+ return be64_to_cpup(lrdr);
+ }
- memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
- if (memory) {
- drconf_cell_cnt = of_get_drconf_memory(memory, &dm);
- lmb_size = of_get_lmb_size(memory);
- of_node_put(memory);
- }
- return lmb_size * drconf_cell_cnt;
+ memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+ if (memory) {
+ drconf_cell_cnt = of_get_drconf_memory(memory, &dm);
+ lmb_size = of_get_lmb_size(memory);
+
+ /* Advance to the last cell, each cell has 6 32 bit integers */
+ dm += (drconf_cell_cnt - 1) * 6;
+ read_drconf_cell(&drmem, &dm);
+ of_node_put(memory);
+ return drmem.base_addr + lmb_size;
+ }
+ return 0;
}
/*
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index eb4451144746..670318766545 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -33,10 +33,7 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
changed = !pmd_same(*(pmdp), entry);
if (changed) {
__ptep_set_access_flags(pmdp_ptep(pmdp), pmd_pte(entry));
- /*
- * Since we are not supporting SW TLB systems, we don't
- * have any thing similar to flush_tlb_page_nohash()
- */
+ flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
}
return changed;
}
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index 18b2c11604fa..003ff48a11b6 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -21,8 +21,11 @@
#include <trace/events/thp.h>
-static int native_update_partition_table(u64 patb1)
+static int native_register_process_table(unsigned long base, unsigned long pg_sz,
+ unsigned long table_size)
{
+ unsigned long patb1 = base | table_size | PATB_GR;
+
partition_tb->patb1 = cpu_to_be64(patb1);
return 0;
}
@@ -160,32 +163,30 @@ redo:
process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT);
/*
* Fill in the process table.
- * we support 52 bits, hence 52-28 = 24, 11000
*/
- rts_field = 3ull << PPC_BITLSHIFT(2);
+ rts_field = radix__get_tree_size();
process_tb->prtb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE);
/*
* Fill in the partition table. We are suppose to use effective address
* of process table here. But our linear mapping also enable us to use
* physical address here.
*/
- ppc_md.update_partition_table(__pa(process_tb) | (PRTB_SIZE_SHIFT - 12) | PATB_GR);
+ ppc_md.register_process_table(__pa(process_tb), 0, PRTB_SIZE_SHIFT - 12);
pr_info("Process table %p and radix root for kernel: %p\n", process_tb, init_mm.pgd);
}
static void __init radix_init_partition_table(void)
{
unsigned long rts_field;
- /*
- * we support 52 bits, hence 52-28 = 24, 11000
- */
- rts_field = 3ull << PPC_BITLSHIFT(2);
+
+ rts_field = radix__get_tree_size();
BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large.");
partition_tb = early_alloc_pgtable(1UL << PATB_SIZE_SHIFT);
partition_tb->patb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) |
RADIX_PGD_INDEX_SIZE | PATB_HR);
- printk("Partition table %p\n", partition_tb);
+ pr_info("Initializing Radix MMU\n");
+ pr_info("Partition table %p\n", partition_tb);
memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
/*
@@ -197,7 +198,7 @@ static void __init radix_init_partition_table(void)
void __init radix_init_native(void)
{
- ppc_md.update_partition_table = native_update_partition_table;
+ ppc_md.register_process_table = native_register_process_table;
}
static int __init get_idx_from_shift(unsigned int shift)
@@ -296,11 +297,6 @@ found:
void __init radix__early_init_mmu(void)
{
unsigned long lpcr;
- /*
- * setup LPCR UPRT based on mmu_features
- */
- lpcr = mfspr(SPRN_LPCR);
- mtspr(SPRN_LPCR, lpcr | LPCR_UPRT);
#ifdef CONFIG_PPC_64K_PAGES
/* PAGE_SIZE mappings */
@@ -336,6 +332,11 @@ void __init radix__early_init_mmu(void)
__vmalloc_end = RADIX_VMALLOC_END;
vmemmap = (struct page *)RADIX_VMEMMAP_BASE;
ioremap_bot = IOREMAP_BASE;
+
+#ifdef CONFIG_PCI
+ pci_io_base = ISA_IO_BASE;
+#endif
+
/*
* For now radix also use the same frag size
*/
@@ -343,8 +344,12 @@ void __init radix__early_init_mmu(void)
__pte_frag_size_shift = H_PTE_FRAG_SIZE_SHIFT;
radix_init_page_sizes();
- if (!firmware_has_feature(FW_FEATURE_LPAR))
+ if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+ radix_init_native();
+ lpcr = mfspr(SPRN_LPCR);
+ mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
radix_init_partition_table();
+ }
radix_init_pgtable();
}
@@ -353,16 +358,15 @@ void radix__early_init_mmu_secondary(void)
{
unsigned long lpcr;
/*
- * setup LPCR UPRT based on mmu_features
+ * update partition table control register and UPRT
*/
- lpcr = mfspr(SPRN_LPCR);
- mtspr(SPRN_LPCR, lpcr | LPCR_UPRT);
- /*
- * update partition table control register, 64 K size.
- */
- if (!firmware_has_feature(FW_FEATURE_LPAR))
+ if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+ lpcr = mfspr(SPRN_LPCR);
+ mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
+
mtspr(SPRN_PTCR,
__pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
+ }
}
void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base,
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index bf7bf32b54f8..7f922f557936 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -84,7 +84,7 @@ __init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long add
pte_t *pte;
if (slab_is_available()) {
- pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+ pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
} else {
pte = __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE));
if (pte)
@@ -97,7 +97,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
struct page *ptepage;
- gfp_t flags = GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO;
+ gfp_t flags = GFP_KERNEL | __GFP_ZERO;
ptepage = alloc_pages(flags, 0);
if (!ptepage)
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index e009e0604a8a..f5e8d4edb808 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -350,8 +350,7 @@ static pte_t *get_from_cache(struct mm_struct *mm)
static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel)
{
void *ret = NULL;
- struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK |
- __GFP_REPEAT | __GFP_ZERO);
+ struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
if (!page)
return NULL;
if (!kernel && !pgtable_page_ctor(page)) {
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index 0fdaf93a3e09..e1f22700fb16 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -12,26 +12,30 @@
#include <linux/mm.h>
#include <linux/hugetlb.h>
#include <linux/memblock.h>
+#include <asm/ppc-opcode.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
static DEFINE_RAW_SPINLOCK(native_tlbie_lock);
-static inline void __tlbiel_pid(unsigned long pid, int set)
+#define RIC_FLUSH_TLB 0
+#define RIC_FLUSH_PWC 1
+#define RIC_FLUSH_ALL 2
+
+static inline void __tlbiel_pid(unsigned long pid, int set,
+ unsigned long ric)
{
- unsigned long rb,rs,ric,prs,r;
+ unsigned long rb,rs,prs,r;
rb = PPC_BIT(53); /* IS = 1 */
rb |= set << PPC_BITLSHIFT(51);
rs = ((unsigned long)pid) << PPC_BITLSHIFT(31);
prs = 1; /* process scoped */
r = 1; /* raidx format */
- ric = 2; /* invalidate all the caches */
asm volatile("ptesync": : :"memory");
- asm volatile(".long 0x7c000224 | (%0 << 11) | (%1 << 16) |"
- "(%2 << 17) | (%3 << 18) | (%4 << 21)"
+ asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
asm volatile("ptesync": : :"memory");
}
@@ -39,67 +43,61 @@ static inline void __tlbiel_pid(unsigned long pid, int set)
/*
* We use 128 set in radix mode and 256 set in hpt mode.
*/
-static inline void _tlbiel_pid(unsigned long pid)
+static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
{
int set;
for (set = 0; set < POWER9_TLB_SETS_RADIX ; set++) {
- __tlbiel_pid(pid, set);
+ __tlbiel_pid(pid, set, ric);
}
return;
}
-static inline void _tlbie_pid(unsigned long pid)
+static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
{
- unsigned long rb,rs,ric,prs,r;
+ unsigned long rb,rs,prs,r;
rb = PPC_BIT(53); /* IS = 1 */
rs = pid << PPC_BITLSHIFT(31);
prs = 1; /* process scoped */
r = 1; /* raidx format */
- ric = 2; /* invalidate all the caches */
asm volatile("ptesync": : :"memory");
- asm volatile(".long 0x7c000264 | (%0 << 11) | (%1 << 16) |"
- "(%2 << 17) | (%3 << 18) | (%4 << 21)"
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
static inline void _tlbiel_va(unsigned long va, unsigned long pid,
- unsigned long ap)
+ unsigned long ap, unsigned long ric)
{
- unsigned long rb,rs,ric,prs,r;
+ unsigned long rb,rs,prs,r;
rb = va & ~(PPC_BITMASK(52, 63));
rb |= ap << PPC_BITLSHIFT(58);
rs = pid << PPC_BITLSHIFT(31);
prs = 1; /* process scoped */
r = 1; /* raidx format */
- ric = 0; /* no cluster flush yet */
asm volatile("ptesync": : :"memory");
- asm volatile(".long 0x7c000224 | (%0 << 11) | (%1 << 16) |"
- "(%2 << 17) | (%3 << 18) | (%4 << 21)"
+ asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
asm volatile("ptesync": : :"memory");
}
static inline void _tlbie_va(unsigned long va, unsigned long pid,
- unsigned long ap)
+ unsigned long ap, unsigned long ric)
{
- unsigned long rb,rs,ric,prs,r;
+ unsigned long rb,rs,prs,r;
rb = va & ~(PPC_BITMASK(52, 63));
rb |= ap << PPC_BITLSHIFT(58);
rs = pid << PPC_BITLSHIFT(31);
prs = 1; /* process scoped */
r = 1; /* raidx format */
- ric = 0; /* no cluster flush yet */
asm volatile("ptesync": : :"memory");
- asm volatile(".long 0x7c000264 | (%0 << 11) | (%1 << 16) |"
- "(%2 << 17) | (%3 << 18) | (%4 << 21)"
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
@@ -117,25 +115,40 @@ static inline void _tlbie_va(unsigned long va, unsigned long pid,
*/
void radix__local_flush_tlb_mm(struct mm_struct *mm)
{
- unsigned int pid;
+ unsigned long pid;
preempt_disable();
pid = mm->context.id;
if (pid != MMU_NO_CONTEXT)
- _tlbiel_pid(pid);
+ _tlbiel_pid(pid, RIC_FLUSH_ALL);
preempt_enable();
}
EXPORT_SYMBOL(radix__local_flush_tlb_mm);
+void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr)
+{
+ unsigned long pid;
+ struct mm_struct *mm = tlb->mm;
+
+ preempt_disable();
+
+ pid = mm->context.id;
+ if (pid != MMU_NO_CONTEXT)
+ _tlbiel_pid(pid, RIC_FLUSH_PWC);
+
+ preempt_enable();
+}
+EXPORT_SYMBOL(radix__local_flush_tlb_pwc);
+
void radix___local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
unsigned long ap, int nid)
{
- unsigned int pid;
+ unsigned long pid;
preempt_disable();
pid = mm ? mm->context.id : 0;
if (pid != MMU_NO_CONTEXT)
- _tlbiel_va(vmaddr, pid, ap);
+ _tlbiel_va(vmaddr, pid, ap, RIC_FLUSH_TLB);
preempt_enable();
}
@@ -160,7 +173,7 @@ static int mm_is_core_local(struct mm_struct *mm)
void radix__flush_tlb_mm(struct mm_struct *mm)
{
- unsigned int pid;
+ unsigned long pid;
preempt_disable();
pid = mm->context.id;
@@ -172,20 +185,46 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
if (lock_tlbie)
raw_spin_lock(&native_tlbie_lock);
- _tlbie_pid(pid);
+ _tlbie_pid(pid, RIC_FLUSH_ALL);
if (lock_tlbie)
raw_spin_unlock(&native_tlbie_lock);
} else
- _tlbiel_pid(pid);
+ _tlbiel_pid(pid, RIC_FLUSH_ALL);
no_context:
preempt_enable();
}
EXPORT_SYMBOL(radix__flush_tlb_mm);
+void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr)
+{
+ unsigned long pid;
+ struct mm_struct *mm = tlb->mm;
+
+ preempt_disable();
+
+ pid = mm->context.id;
+ if (unlikely(pid == MMU_NO_CONTEXT))
+ goto no_context;
+
+ if (!mm_is_core_local(mm)) {
+ int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
+
+ if (lock_tlbie)
+ raw_spin_lock(&native_tlbie_lock);
+ _tlbie_pid(pid, RIC_FLUSH_PWC);
+ if (lock_tlbie)
+ raw_spin_unlock(&native_tlbie_lock);
+ } else
+ _tlbiel_pid(pid, RIC_FLUSH_PWC);
+no_context:
+ preempt_enable();
+}
+EXPORT_SYMBOL(radix__flush_tlb_pwc);
+
void radix___flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
unsigned long ap, int nid)
{
- unsigned int pid;
+ unsigned long pid;
preempt_disable();
pid = mm ? mm->context.id : 0;
@@ -196,11 +235,11 @@ void radix___flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
if (lock_tlbie)
raw_spin_lock(&native_tlbie_lock);
- _tlbie_va(vmaddr, pid, ap);
+ _tlbie_va(vmaddr, pid, ap, RIC_FLUSH_TLB);
if (lock_tlbie)
raw_spin_unlock(&native_tlbie_lock);
} else
- _tlbiel_va(vmaddr, pid, ap);
+ _tlbiel_va(vmaddr, pid, ap, RIC_FLUSH_TLB);
bail:
preempt_enable();
}
@@ -224,7 +263,7 @@ void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
if (lock_tlbie)
raw_spin_lock(&native_tlbie_lock);
- _tlbie_pid(0);
+ _tlbie_pid(0, RIC_FLUSH_ALL);
if (lock_tlbie)
raw_spin_unlock(&native_tlbie_lock);
}
@@ -243,9 +282,61 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
}
EXPORT_SYMBOL(radix__flush_tlb_range);
+static int radix_get_mmu_psize(int page_size)
+{
+ int psize;
+
+ if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift))
+ psize = mmu_virtual_psize;
+ else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift))
+ psize = MMU_PAGE_2M;
+ else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift))
+ psize = MMU_PAGE_1G;
+ else
+ return -1;
+ return psize;
+}
void radix__tlb_flush(struct mmu_gather *tlb)
{
struct mm_struct *mm = tlb->mm;
radix__flush_tlb_mm(mm);
}
+
+void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa,
+ unsigned long page_size)
+{
+ unsigned long rb,rs,prs,r;
+ unsigned long ap;
+ unsigned long ric = RIC_FLUSH_TLB;
+
+ ap = mmu_get_ap(radix_get_mmu_psize(page_size));
+ rb = gpa & ~(PPC_BITMASK(52, 63));
+ rb |= ap << PPC_BITLSHIFT(58);
+ rs = lpid & ((1UL << 32) - 1);
+ prs = 0; /* process scoped */
+ r = 1; /* raidx format */
+
+ asm volatile("ptesync": : :"memory");
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+}
+EXPORT_SYMBOL(radix__flush_tlb_lpid_va);
+
+void radix__flush_tlb_lpid(unsigned long lpid)
+{
+ unsigned long rb,rs,prs,r;
+ unsigned long ric = RIC_FLUSH_ALL;
+
+ rb = 0x2 << PPC_BITLSHIFT(53); /* IS = 2 */
+ rs = lpid & ((1UL << 32) - 1);
+ prs = 0; /* partition scoped */
+ r = 1; /* raidx format */
+
+ asm volatile("ptesync": : :"memory");
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+}
+EXPORT_SYMBOL(radix__flush_tlb_lpid);
OpenPOWER on IntegriCloud