From c7d54842deb1fa357cff75b988275a1c9f259140 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 29 Apr 2016 23:25:30 +1000 Subject: powerpc/mm: Use _PAGE_READ to indicate Read access This splits the _PAGE_RW bit into _PAGE_READ and _PAGE_WRITE. It also removes the dependency on _PAGE_USER for implying read only. Few things to note here is that, we have read implied with write and execute permission. Hence we should always find _PAGE_READ set on hash pte fault. We still can't switch PROT_NONE to !(_PAGE_RWX). Auto numa depends on marking a prot none pte _PAGE_WRITE. (For more details look at b191f9b106ea "mm: numa: preserve PTE write permissions across a NUMA hinting fault") Cc: Arnd Bergmann Cc: Jeremy Kerr Cc: Frederic Barrat Acked-by: Ian Munsie Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_utils_64.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 7635b1c6b5da..ed190e42bbc5 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -175,8 +175,9 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags) * or PP=0x3 for read-only (including writeable but clean pages). */ if (pteflags & _PAGE_USER) { - rflags |= 0x2; - if (!((pteflags & _PAGE_RW) && (pteflags & _PAGE_DIRTY))) + if (pteflags & _PAGE_RWX) + rflags |= 0x2; + if (!((pteflags & _PAGE_WRITE) && (pteflags & _PAGE_DIRTY))) rflags |= 0x1; } /* @@ -1209,7 +1210,7 @@ EXPORT_SYMBOL_GPL(hash_page); int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap, unsigned long dsisr) { - unsigned long access = _PAGE_PRESENT; + unsigned long access = _PAGE_PRESENT | _PAGE_READ; unsigned long flags = 0; struct mm_struct *mm = current->mm; @@ -1220,7 +1221,7 @@ int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap, flags |= HPTE_NOHPTE_UPDATE; if (dsisr & DSISR_ISSTORE) - access |= _PAGE_RW; + access |= _PAGE_WRITE; /* * We need to set the _PAGE_USER bit if MSR_PR is set or if we are * accessing a userspace segment (even from the kernel). We assume -- cgit v1.2.3 From 73a1441a9b2ae05c15eb7cc90d4de379b44249bf Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 29 Apr 2016 23:25:31 +1000 Subject: powerpc/mm/subpage: Clear RWX bit to indicate no access Subpage protection used to depend on the _PAGE_USER bit to implement no access mode. This patch switches that to use _PAGE_RWX. We clear Read, Write and Execute access from the pte instead of clearing _PAGE_USER now. This was done so that we can switch to _PAGE_PRIVILEGED in a later patch. subpage_protection() returns pte bits that need to be cleared. Instead of updating the interface to handle no-access in a separate way, it appears simpler to clear RWX acecss to indicate no access. We still don't insert hash ptes for no access implied by !_PAGE_RWX. Hence we should not get PROT_FAULT with change. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_utils_64.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index ed190e42bbc5..36e00371ba5a 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -921,7 +921,7 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr) * Userspace sets the subpage permissions using the subpage_prot system call. * * Result is 0: full permissions, _PAGE_RW: read-only, - * _PAGE_USER or _PAGE_USER|_PAGE_RW: no access. + * _PAGE_RWX: no access. */ static int subpage_protection(struct mm_struct *mm, unsigned long ea) { @@ -947,8 +947,13 @@ static int subpage_protection(struct mm_struct *mm, unsigned long ea) /* extract 2-bit bitfield for this 4k subpage */ spp >>= 30 - 2 * ((ea >> 12) & 0xf); - /* turn 0,1,2,3 into combination of _PAGE_USER and _PAGE_RW */ - spp = ((spp & 2) ? _PAGE_USER : 0) | ((spp & 1) ? _PAGE_RW : 0); + /* + * 0 -> full premission + * 1 -> Read only + * 2 -> no access. + * We return the flag that need to be cleared. + */ + spp = ((spp & 2) ? _PAGE_RWX : 0) | ((spp & 1) ? _PAGE_WRITE : 0); return spp; } -- cgit v1.2.3 From ac29c64089b74d107edb90879e63a2f7a03cd66b Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 29 Apr 2016 23:25:34 +1000 Subject: powerpc/mm: Replace _PAGE_USER with _PAGE_PRIVILEGED _PAGE_PRIVILEGED means the page can be accessed only by the kernel. This is done to keep pte bits similar to PowerISA 3.0 Radix PTE format. User pages are now marked by clearing _PAGE_PRIVILEGED bit. Previously we allowed the kernel to have a privileged page in the lower address range (USER_REGION). With this patch such access is denied. We also prevent a kernel access to a non-privileged page in higher address range (ie, REGION_ID != 0). Both the above access scenarios should never happen. Cc: Arnd Bergmann Cc: Jeremy Kerr Cc: Frederic Barrat Acked-by: Ian Munsie Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/hash.h | 34 ++++++++++++++-------------- arch/powerpc/include/asm/book3s/64/pgtable.h | 18 ++++++++++++++- arch/powerpc/mm/hash64_4k.c | 2 +- arch/powerpc/mm/hash64_64k.c | 4 ++-- arch/powerpc/mm/hash_utils_64.c | 16 ++++++++----- arch/powerpc/mm/hugepage-hash64.c | 2 +- arch/powerpc/mm/hugetlbpage-hash64.c | 3 ++- arch/powerpc/mm/hugetlbpage.c | 2 +- arch/powerpc/mm/pgtable.c | 15 ++++++++++-- arch/powerpc/mm/pgtable_64.c | 15 +++++++++--- arch/powerpc/platforms/cell/spufs/fault.c | 2 +- drivers/misc/cxl/fault.c | 4 +++- 12 files changed, 80 insertions(+), 37 deletions(-) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index fb76f9cf49c9..5e7e74d30eb9 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -20,7 +20,7 @@ #define _PAGE_READ 0x00004 /* read access allowed */ #define _PAGE_RW (_PAGE_READ | _PAGE_WRITE) #define _PAGE_RWX (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC) -#define _PAGE_USER 0x00008 /* page may be accessed by userspace */ +#define _PAGE_PRIVILEGED 0x00008 /* kernel access only */ #define _PAGE_GUARDED 0x00010 /* G: guarded (side-effect) page */ /* M (memory coherence) is always set in the HPTE, so we don't need it here */ #define _PAGE_COHERENT 0x0 @@ -114,10 +114,13 @@ #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN #endif /* CONFIG_PPC_MM_SLICES */ -/* No separate kernel read-only */ -#define _PAGE_KERNEL_RW (_PAGE_RW | _PAGE_DIRTY) /* user access blocked by key */ +/* + * No separate kernel read-only, user access blocked by key + */ +#define _PAGE_KERNEL_RW (_PAGE_PRIVILEGED | _PAGE_RW | _PAGE_DIRTY) #define _PAGE_KERNEL_RO _PAGE_KERNEL_RW -#define _PAGE_KERNEL_RWX (_PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC) +#define _PAGE_KERNEL_RWX (_PAGE_PRIVILEGED | _PAGE_DIRTY | \ + _PAGE_RW | _PAGE_EXEC) /* Strong Access Ordering */ #define _PAGE_SAO (_PAGE_WRITETHRU | _PAGE_NO_CACHE | _PAGE_COHERENT) @@ -147,7 +150,7 @@ */ #define PAGE_PROT_BITS (_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \ _PAGE_WRITETHRU | _PAGE_4K_PFN | \ - _PAGE_USER | _PAGE_ACCESSED | _PAGE_READ |\ + _PAGE_PRIVILEGED | _PAGE_ACCESSED | _PAGE_READ |\ _PAGE_WRITE | _PAGE_DIRTY | _PAGE_EXEC | \ _PAGE_SOFT_DIRTY) /* @@ -169,16 +172,13 @@ * * Note due to the way vm flags are laid out, the bits are XWR */ -#define PAGE_NONE __pgprot(_PAGE_BASE) -#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW) -#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | \ - _PAGE_EXEC) -#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ) -#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ| \ - _PAGE_EXEC) -#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ) -#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_READ| \ - _PAGE_EXEC) +#define PAGE_NONE __pgprot(_PAGE_BASE | _PAGE_PRIVILEGED) +#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_RW) +#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_RW | _PAGE_EXEC) +#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_READ) +#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC) +#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_READ) +#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC) #define __P000 PAGE_NONE #define __P001 PAGE_READONLY @@ -419,8 +419,8 @@ static inline pte_t pte_clear_soft_dirty(pte_t pte) */ static inline int pte_protnone(pte_t pte) { - return (pte_val(pte) & - (_PAGE_PRESENT | _PAGE_USER)) == _PAGE_PRESENT; + return (pte_val(pte) & (_PAGE_PRESENT | _PAGE_PRIVILEGED)) == + (_PAGE_PRESENT | _PAGE_PRIVILEGED); } #endif /* CONFIG_NUMA_BALANCING */ diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 60e84260a07d..b609729e0d76 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -187,7 +187,7 @@ extern struct page *pgd_page(pgd_t pgd); static inline bool pte_user(pte_t pte) { - return !!(pte_val(pte) & _PAGE_USER); + return !(pte_val(pte) & _PAGE_PRIVILEGED); } #ifdef CONFIG_MEM_SOFT_DIRTY @@ -211,6 +211,22 @@ static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) } #endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */ +static inline bool check_pte_access(unsigned long access, unsigned long ptev) +{ + /* + * This check for _PAGE_RWX and _PAGE_PRESENT bits + */ + if (access & ~ptev) + return false; + /* + * This check for access to privilege space + */ + if ((access & _PAGE_PRIVILEGED) != (ptev & _PAGE_PRIVILEGED)) + return false; + + return true; +} + void pgtable_cache_add(unsigned shift, void (*ctor)(void *)); void pgtable_cache_init(void); diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c index 491b7d137cd8..529e49204f6b 100644 --- a/arch/powerpc/mm/hash64_4k.c +++ b/arch/powerpc/mm/hash64_4k.c @@ -37,7 +37,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, if (unlikely(old_pte & _PAGE_BUSY)) return 0; /* If PTE permissions don't match, take page fault */ - if (unlikely(access & ~old_pte)) + if (unlikely(!check_pte_access(access, old_pte))) return 1; /* * Try to lock the PTE, add ACCESSED and DIRTY if it was diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c index 2d3472173d79..e7782862362b 100644 --- a/arch/powerpc/mm/hash64_64k.c +++ b/arch/powerpc/mm/hash64_64k.c @@ -69,7 +69,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, if (unlikely(old_pte & _PAGE_BUSY)) return 0; /* If PTE permissions don't match, take page fault */ - if (unlikely(access & ~old_pte)) + if (unlikely(!check_pte_access(access, old_pte))) return 1; /* * Try to lock the PTE, add ACCESSED and DIRTY if it was @@ -237,7 +237,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access, if (unlikely(old_pte & _PAGE_BUSY)) return 0; /* If PTE permissions don't match, take page fault */ - if (unlikely(access & ~old_pte)) + if (unlikely(!check_pte_access(access, old_pte))) return 1; /* * Check if PTE has the cache-inhibit bit set diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 36e00371ba5a..dc0f6a00ccbd 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -174,7 +174,7 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags) * User area is mapped with PP=0x2 for read/write * or PP=0x3 for read-only (including writeable but clean pages). */ - if (pteflags & _PAGE_USER) { + if (!(pteflags & _PAGE_PRIVILEGED)) { if (pteflags & _PAGE_RWX) rflags |= 0x2; if (!((pteflags & _PAGE_WRITE) && (pteflags & _PAGE_DIRTY))) @@ -1090,7 +1090,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, /* Pre-check access permissions (will be re-checked atomically * in __hash_page_XX but this pre-check is a fast path */ - if (access & ~pte_val(*ptep)) { + if (!check_pte_access(access, pte_val(*ptep))) { DBG_LOW(" no access !\n"); rc = 1; goto bail; @@ -1228,12 +1228,16 @@ int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap, if (dsisr & DSISR_ISSTORE) access |= _PAGE_WRITE; /* - * We need to set the _PAGE_USER bit if MSR_PR is set or if we are - * accessing a userspace segment (even from the kernel). We assume - * kernel addresses always have the high bit set. + * We set _PAGE_PRIVILEGED only when + * kernel mode access kernel space. + * + * _PAGE_PRIVILEGED is NOT set + * 1) when kernel mode access user space + * 2) user space access kernel space. */ + access |= _PAGE_PRIVILEGED; if ((msr & MSR_PR) || (REGION_ID(ea) == USER_REGION_ID)) - access |= _PAGE_USER; + access &= ~_PAGE_PRIVILEGED; if (trap == 0x400) access |= _PAGE_EXEC; diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index b4b6668d1b24..6cb6bdd254bb 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -40,7 +40,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, if (unlikely(old_pmd & _PAGE_BUSY)) return 0; /* If PMD permissions don't match, take page fault */ - if (unlikely(access & ~old_pmd)) + if (unlikely(!check_pte_access(access, old_pmd))) return 1; /* * Try to lock the PTE, add ACCESSED and DIRTY if it was diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c index cdca743cdaf1..bf9078440256 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/hugetlbpage-hash64.c @@ -50,8 +50,9 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, if (unlikely(old_pte & _PAGE_BUSY)) return 0; /* If PTE permissions don't match, take page fault */ - if (unlikely(access & ~old_pte)) + if (unlikely(!check_pte_access(access, old_pte))) return 1; + /* Try to lock the PTE, add ACCESSED and DIRTY if it was * a write access */ new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED; diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 0bf269b00de9..6d910960217e 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -1003,7 +1003,7 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, end = pte_end; pte = READ_ONCE(*ptep); - mask = _PAGE_PRESENT | _PAGE_USER | _PAGE_READ; + mask = _PAGE_PRESENT | _PAGE_READ; if (write) mask |= _PAGE_WRITE; diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index ef7b922c655c..125fb4b54445 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -43,9 +43,20 @@ static inline int is_exec_fault(void) */ static inline int pte_looks_normal(pte_t pte) { + +#if defined(CONFIG_PPC_BOOK3S_64) + if ((pte_val(pte) & + (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE)) == + _PAGE_PRESENT) { + if (pte_user(pte)) + return 1; + } + return 0; +#else return (pte_val(pte) & - (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER)) == - (_PAGE_PRESENT | _PAGE_USER); + (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER)) == + (_PAGE_PRESENT | _PAGE_USER); +#endif } static struct page *maybe_pte_to_page(pte_t pte) diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 16bc751f10df..603db71ff21d 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -280,8 +280,17 @@ void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size, if (flags & _PAGE_WRITE) flags |= _PAGE_DIRTY; - /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */ - flags &= ~(_PAGE_USER | _PAGE_EXEC); + /* we don't want to let _PAGE_EXEC leak out */ + flags &= ~_PAGE_EXEC; + /* + * Force kernel mapping. + */ +#if defined(CONFIG_PPC_BOOK3S_64) + flags |= _PAGE_PRIVILEGED; +#else + flags &= ~_PAGE_USER; +#endif + #ifdef _PAGE_BAP_SR /* _PAGE_USER contains _PAGE_BAP_SR on BookE using the new PTE format @@ -664,7 +673,7 @@ void pmdp_huge_split_prepare(struct vm_area_struct *vma, * the translation is still valid, because we will withdraw * pgtable_t after this. */ - pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_USER, 0); + pmd_hugepage_update(vma->vm_mm, address, pmdp, 0, _PAGE_PRIVILEGED); } diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c index c3a3bf1745b7..e29e4d5afa2d 100644 --- a/arch/powerpc/platforms/cell/spufs/fault.c +++ b/arch/powerpc/platforms/cell/spufs/fault.c @@ -141,7 +141,7 @@ int spufs_handle_class1(struct spu_context *ctx) /* we must not hold the lock when entering copro_handle_mm_fault */ spu_release(ctx); - access = (_PAGE_PRESENT | _PAGE_READ | _PAGE_USER); + access = (_PAGE_PRESENT | _PAGE_READ); access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_WRITE : 0UL; local_irq_save(flags); ret = hash_page(ea, access, 0x300, dsisr); diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c index 0feeacedcef1..377e650a2a1d 100644 --- a/drivers/misc/cxl/fault.c +++ b/drivers/misc/cxl/fault.c @@ -152,8 +152,10 @@ static void cxl_handle_page_fault(struct cxl_context *ctx, access = _PAGE_PRESENT | _PAGE_READ; if (dsisr & CXL_PSL_DSISR_An_S) access |= _PAGE_WRITE; + + access |= _PAGE_PRIVILEGED; if ((!ctx->kernel) || (REGION_ID(dar) == USER_REGION_ID)) - access |= _PAGE_USER; + access &= ~_PAGE_PRIVILEGED; if (dsisr & DSISR_NOHPTE) inv_flags |= HPTE_NOHPTE_UPDATE; -- cgit v1.2.3 From e58e87adc8bf92e9c6abb1dc1f4af2500aa07ff3 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 29 Apr 2016 23:25:36 +1000 Subject: powerpc/mm: Update _PAGE_KERNEL_RO PS3 had used a PPP bit hack to implement a read only mapping in the kernel area. Since we are bolting the ioremap area, it used the pte flags _PAGE_PRESENT | _PAGE_USER to get a PPP value of 0x3 there by resulting in a read only mapping. This means the area can be accessed by user space, but kernel will never return such an address to user space. But we can do better by implementing a read only kernel mapping using PPP bits 0b110. This also allows us to do read only kernel mapping for radix in later patches. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/hash.h | 4 ++-- arch/powerpc/mm/hash_utils_64.c | 17 +++++++++++------ arch/powerpc/platforms/ps3/spu.c | 2 +- 3 files changed, 14 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index e1abe39e9dc2..eb224792095f 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -119,10 +119,10 @@ #endif /* CONFIG_PPC_MM_SLICES */ /* - * No separate kernel read-only, user access blocked by key + * user access blocked by key */ #define _PAGE_KERNEL_RW (_PAGE_PRIVILEGED | _PAGE_RW | _PAGE_DIRTY) -#define _PAGE_KERNEL_RO _PAGE_KERNEL_RW +#define _PAGE_KERNEL_RO (_PAGE_PRIVILEGED | _PAGE_READ) #define _PAGE_KERNEL_RWX (_PAGE_PRIVILEGED | _PAGE_DIRTY | \ _PAGE_RW | _PAGE_EXEC) diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index dc0f6a00ccbd..38ed869c119e 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -167,14 +167,19 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags) if ((pteflags & _PAGE_EXEC) == 0) rflags |= HPTE_R_N; /* - * PP bits: + * PPP bits: * Linux uses slb key 0 for kernel and 1 for user. - * kernel areas are mapped with PP=00 - * and there is no kernel RO (_PAGE_KERNEL_RO). - * User area is mapped with PP=0x2 for read/write - * or PP=0x3 for read-only (including writeable but clean pages). + * kernel RW areas are mapped with PPP=0b000 + * User area is mapped with PPP=0b010 for read/write + * or PPP=0b011 for read-only (including writeable but clean pages). */ - if (!(pteflags & _PAGE_PRIVILEGED)) { + if (pteflags & _PAGE_PRIVILEGED) { + /* + * Kernel read only mapped with ppp bits 0b110 + */ + if (!(pteflags & _PAGE_WRITE)) + rflags |= (HPTE_R_PP0 | 0x2); + } else { if (pteflags & _PAGE_RWX) rflags |= 0x2; if (!((pteflags & _PAGE_WRITE) && (pteflags & _PAGE_DIRTY))) diff --git a/arch/powerpc/platforms/ps3/spu.c b/arch/powerpc/platforms/ps3/spu.c index a0bca05e26b0..5e8a40f9739f 100644 --- a/arch/powerpc/platforms/ps3/spu.c +++ b/arch/powerpc/platforms/ps3/spu.c @@ -205,7 +205,7 @@ static void spu_unmap(struct spu *spu) static int __init setup_areas(struct spu *spu) { struct table {char* name; unsigned long addr; unsigned long size;}; - static const unsigned long shadow_flags = _PAGE_NO_CACHE | 3; + unsigned long shadow_flags = pgprot_val(pgprot_noncached_wc(PAGE_KERNEL_RO)); spu_pdata(spu)->shadow = __ioremap(spu_pdata(spu)->shadow_addr, sizeof(struct spe_shadow), -- cgit v1.2.3 From 30bda41aba4efb2370c97e2cbe7385de93ccc372 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 29 Apr 2016 23:25:38 +1000 Subject: powerpc/mm: Drop WIMG in favour of new constants PowerISA 3.0 introduces two pte bits with the below meaning for radix: 00 -> Normal Memory 01 -> Strong Access Order (SAO) 10 -> Non idempotent I/O (Cache inhibited and guarded) 11 -> Tolerant I/O (Cache inhibited) We drop the existing WIMG bits in the Linux page table in favour of the above constants. We loose _PAGE_WRITETHRU with this conversion. We only use writethru via pgprot_cached_wthru() which is used by fbdev/controlfb.c which is Apple control display and also PPC32. With respect to _PAGE_COHERENCE, we have been marking hpte always coherent for some time now. htab_convert_pte_flags() always added HPTE_R_M. NOTE: KVM changes need closer review. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/hash.h | 70 ++++++++++++++----------------- arch/powerpc/include/asm/kvm_book3s_64.h | 27 +++++------- arch/powerpc/kvm/book3s_64_mmu_hv.c | 11 +++-- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 12 +++--- arch/powerpc/mm/hash64_64k.c | 2 +- arch/powerpc/mm/hash_utils_64.c | 18 ++++---- arch/powerpc/mm/pgtable.c | 8 ++-- arch/powerpc/mm/pgtable_64.c | 4 -- arch/powerpc/platforms/pseries/lpar.c | 4 -- 9 files changed, 66 insertions(+), 90 deletions(-) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index eb224792095f..eee951fd6a33 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -21,11 +21,9 @@ #define _PAGE_RW (_PAGE_READ | _PAGE_WRITE) #define _PAGE_RWX (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC) #define _PAGE_PRIVILEGED 0x00008 /* kernel access only */ -#define _PAGE_GUARDED 0x00010 /* G: guarded (side-effect) page */ -/* M (memory coherence) is always set in the HPTE, so we don't need it here */ -#define _PAGE_COHERENT 0x0 -#define _PAGE_NO_CACHE 0x00020 /* I: cache inhibit */ -#define _PAGE_WRITETHRU 0x00040 /* W: cache write-through */ +#define _PAGE_SAO 0x00010 /* Strong access order */ +#define _PAGE_NON_IDEMPOTENT 0x00020 /* non idempotent memory */ +#define _PAGE_TOLERANT 0x00030 /* tolerant memory, cache inhibited */ #define _PAGE_DIRTY 0x00080 /* C: page changed */ #define _PAGE_ACCESSED 0x00100 /* R: page referenced */ #define _PAGE_SPECIAL 0x00400 /* software: special page */ @@ -43,7 +41,12 @@ #define _PAGE_HASHPTE (1ul << 61) /* PTE has associated HPTE */ #define _PAGE_PTE (1ul << 62) /* distinguishes PTEs from pointers */ #define _PAGE_PRESENT (1ul << 63) /* pte contains a translation */ - +/* + * Drivers request for cache inhibited pte mapping using _PAGE_NO_CACHE + * Instead of fixing all of them, add an alternate define which + * maps CI pte mapping. + */ +#define _PAGE_NO_CACHE _PAGE_TOLERANT /* * We need to differentiate between explicit huge page and THP huge * page, since THP huge page also need to track real subpage details @@ -126,9 +129,6 @@ #define _PAGE_KERNEL_RWX (_PAGE_PRIVILEGED | _PAGE_DIRTY | \ _PAGE_RW | _PAGE_EXEC) -/* Strong Access Ordering */ -#define _PAGE_SAO (_PAGE_WRITETHRU | _PAGE_NO_CACHE | _PAGE_COHERENT) - /* No page size encoding in the linux PTE */ #define _PAGE_PSIZE 0 @@ -147,10 +147,9 @@ /* * Mask of bits returned by pte_pgprot() */ -#define PAGE_PROT_BITS (_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \ - _PAGE_WRITETHRU | _PAGE_4K_PFN | \ - _PAGE_PRIVILEGED | _PAGE_ACCESSED | _PAGE_READ |\ - _PAGE_WRITE | _PAGE_DIRTY | _PAGE_EXEC | \ +#define PAGE_PROT_BITS (_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT | \ + _PAGE_4K_PFN | _PAGE_PRIVILEGED | _PAGE_ACCESSED | \ + _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_EXEC | \ _PAGE_SOFT_DIRTY) /* * We define 2 sets of base prot bits, one for basic pages (ie, @@ -159,7 +158,7 @@ * the processor might need it for DMA coherency. */ #define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_PSIZE) -#define _PAGE_BASE (_PAGE_BASE_NC | _PAGE_COHERENT) +#define _PAGE_BASE (_PAGE_BASE_NC) /* Permission masks used to generate the __P and __S table, * @@ -200,9 +199,9 @@ /* Permission masks used for kernel mappings */ #define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW) #define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \ - _PAGE_NO_CACHE) + _PAGE_TOLERANT) #define PAGE_KERNEL_NCG __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \ - _PAGE_NO_CACHE | _PAGE_GUARDED) + _PAGE_NON_IDEMPOTENT) #define PAGE_KERNEL_X __pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX) #define PAGE_KERNEL_RO __pgprot(_PAGE_BASE | _PAGE_KERNEL_RO) #define PAGE_KERNEL_ROX __pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX) @@ -509,45 +508,26 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, *ptep = pte; } -/* - * Macro to mark a page protection value as "uncacheable". - */ - -#define _PAGE_CACHE_CTL (_PAGE_COHERENT | _PAGE_GUARDED | _PAGE_NO_CACHE | \ - _PAGE_WRITETHRU) +#define _PAGE_CACHE_CTL (_PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT) #define pgprot_noncached pgprot_noncached static inline pgprot_t pgprot_noncached(pgprot_t prot) { return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | - _PAGE_NO_CACHE | _PAGE_GUARDED); + _PAGE_NON_IDEMPOTENT); } #define pgprot_noncached_wc pgprot_noncached_wc static inline pgprot_t pgprot_noncached_wc(pgprot_t prot) { return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | - _PAGE_NO_CACHE); + _PAGE_TOLERANT); } #define pgprot_cached pgprot_cached static inline pgprot_t pgprot_cached(pgprot_t prot) { - return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | - _PAGE_COHERENT); -} - -#define pgprot_cached_wthru pgprot_cached_wthru -static inline pgprot_t pgprot_cached_wthru(pgprot_t prot) -{ - return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | - _PAGE_COHERENT | _PAGE_WRITETHRU); -} - -#define pgprot_cached_noncoherent pgprot_cached_noncoherent -static inline pgprot_t pgprot_cached_noncoherent(pgprot_t prot) -{ - return __pgprot(pgprot_val(prot) & ~_PAGE_CACHE_CTL); + return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL)); } #define pgprot_writecombine pgprot_writecombine @@ -555,6 +535,18 @@ static inline pgprot_t pgprot_writecombine(pgprot_t prot) { return pgprot_noncached_wc(prot); } +/* + * check a pte mapping have cache inhibited property + */ +static inline bool pte_ci(pte_t pte) +{ + unsigned long pte_v = pte_val(pte); + + if (((pte_v & _PAGE_CACHE_CTL) == _PAGE_TOLERANT) || + ((pte_v & _PAGE_CACHE_CTL) == _PAGE_NON_IDEMPOTENT)) + return true; + return false; +} #ifdef CONFIG_TRANSPARENT_HUGEPAGE extern void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 42da513fec2d..3c380c247d4e 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -276,19 +276,24 @@ static inline unsigned long hpte_make_readonly(unsigned long ptel) return ptel; } -static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type) +static inline bool hpte_cache_flags_ok(unsigned long hptel, bool is_ci) { - unsigned int wimg = ptel & HPTE_R_WIMG; + unsigned int wimg = hptel & HPTE_R_WIMG; /* Handle SAO */ if (wimg == (HPTE_R_W | HPTE_R_I | HPTE_R_M) && cpu_has_feature(CPU_FTR_ARCH_206)) wimg = HPTE_R_M; - if (!io_type) + if (!is_ci) return wimg == HPTE_R_M; - - return (wimg & (HPTE_R_W | HPTE_R_I)) == io_type; + /* + * if host is mapped cache inhibited, make sure hptel also have + * cache inhibited. + */ + if (wimg & HPTE_R_W) /* FIXME!! is this ok for all guest. ? */ + return false; + return !!(wimg & HPTE_R_I); } /* @@ -325,18 +330,6 @@ static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing) return new_pte; } - -/* Return HPTE cache control bits corresponding to Linux pte bits */ -static inline unsigned long hpte_cache_bits(unsigned long pte_val) -{ -#if _PAGE_NO_CACHE == HPTE_R_I && _PAGE_WRITETHRU == HPTE_R_W - return pte_val & (HPTE_R_W | HPTE_R_I); -#else - return ((pte_val & _PAGE_NO_CACHE) ? HPTE_R_I : 0) + - ((pte_val & _PAGE_WRITETHRU) ? HPTE_R_W : 0); -#endif -} - static inline bool hpte_read_permission(unsigned long pp, unsigned long key) { if (key) diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index c7b78d8336b2..05f09ae82587 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -447,7 +447,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, struct revmap_entry *rev; struct page *page, *pages[1]; long index, ret, npages; - unsigned long is_io; + bool is_ci; unsigned int writing, write_ok; struct vm_area_struct *vma; unsigned long rcbits; @@ -503,7 +503,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, smp_rmb(); ret = -EFAULT; - is_io = 0; + is_ci = false; pfn = 0; page = NULL; pte_size = PAGE_SIZE; @@ -521,7 +521,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, pfn = vma->vm_pgoff + ((hva - vma->vm_start) >> PAGE_SHIFT); pte_size = psize; - is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot)); + is_ci = pte_ci(__pte((pgprot_val(vma->vm_page_prot)))); write_ok = vma->vm_flags & VM_WRITE; } up_read(¤t->mm->mmap_sem); @@ -558,10 +558,9 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, goto out_put; /* Check WIMG vs. the actual page we're accessing */ - if (!hpte_cache_flags_ok(r, is_io)) { - if (is_io) + if (!hpte_cache_flags_ok(r, is_ci)) { + if (is_ci) goto out_put; - /* * Allow guest to map emulated device memory as * uncacheable, but actually make it cacheable. diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 4cb8db05f3e5..99b4e9d5dd23 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -175,7 +175,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, unsigned long g_ptel; struct kvm_memory_slot *memslot; unsigned hpage_shift; - unsigned long is_io; + bool is_ci; unsigned long *rmap; pte_t *ptep; unsigned int writing; @@ -199,7 +199,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, gfn = gpa >> PAGE_SHIFT; memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); pa = 0; - is_io = ~0ul; + is_ci = false; rmap = NULL; if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) { /* Emulated MMIO - mark this with key=31 */ @@ -250,7 +250,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, if (writing && !pte_write(pte)) /* make the actual HPTE be read-only */ ptel = hpte_make_readonly(ptel); - is_io = hpte_cache_bits(pte_val(pte)); + is_ci = pte_ci(pte); pa = pte_pfn(pte) << PAGE_SHIFT; pa |= hva & (host_pte_size - 1); pa |= gpa & ~PAGE_MASK; @@ -267,9 +267,9 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, else pteh |= HPTE_V_ABSENT; - /* Check WIMG */ - if (is_io != ~0ul && !hpte_cache_flags_ok(ptel, is_io)) { - if (is_io) + /*If we had host pte mapping then Check WIMG */ + if (ptep && !hpte_cache_flags_ok(ptel, is_ci)) { + if (is_ci) return H_PARAMETER; /* * Allow guest to map emulated device memory as diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c index e7782862362b..675331083728 100644 --- a/arch/powerpc/mm/hash64_64k.c +++ b/arch/powerpc/mm/hash64_64k.c @@ -244,7 +244,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access, * If so, bail out and refault as a 4k page */ if (!mmu_has_feature(MMU_FTR_CI_LARGE_PAGE) && - unlikely(old_pte & _PAGE_NO_CACHE)) + unlikely(pte_ci(pte))) return 0; /* * Try to lock the PTE, add ACCESSED and DIRTY if it was diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 38ed869c119e..eb928d86ce01 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -192,12 +192,13 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags) /* * Add in WIG bits */ - if (pteflags & _PAGE_WRITETHRU) - rflags |= HPTE_R_W; - if (pteflags & _PAGE_NO_CACHE) + + if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_TOLERANT) rflags |= HPTE_R_I; - if (pteflags & _PAGE_GUARDED) - rflags |= HPTE_R_G; + if ((pteflags & _PAGE_CACHE_CTL ) == _PAGE_NON_IDEMPOTENT) + rflags |= (HPTE_R_I | HPTE_R_G); + if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_SAO) + rflags |= (HPTE_R_I | HPTE_R_W); return rflags; } @@ -1142,8 +1143,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, /* If this PTE is non-cacheable and we have restrictions on * using non cacheable large pages, then we switch to 4k */ - if (mmu_ci_restrictions && psize == MMU_PAGE_64K && - (pte_val(*ptep) & _PAGE_NO_CACHE)) { + if (mmu_ci_restrictions && psize == MMU_PAGE_64K && pte_ci(*ptep)) { if (user_region) { demote_segment_4k(mm, ea); psize = MMU_PAGE_4K; @@ -1297,13 +1297,13 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, WARN_ON(hugepage_shift); #ifdef CONFIG_PPC_64K_PAGES - /* If either _PAGE_4K_PFN or _PAGE_NO_CACHE is set (and we are on + /* If either _PAGE_4K_PFN or cache inhibited is set (and we are on * a 64K kernel), then we don't preload, hash_page() will take * care of it once we actually try to access the page. * That way we don't have to duplicate all of the logic for segment * page size demotion here */ - if (pte_val(*ptep) & (_PAGE_4K_PFN | _PAGE_NO_CACHE)) + if ((pte_val(*ptep) & _PAGE_4K_PFN) || pte_ci(*ptep)) goto out_exit; #endif /* CONFIG_PPC_64K_PAGES */ diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 125fb4b54445..db277b6d8e8b 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -38,16 +38,16 @@ static inline int is_exec_fault(void) /* We only try to do i/d cache coherency on stuff that looks like * reasonably "normal" PTEs. We currently require a PTE to be present - * and we avoid _PAGE_SPECIAL and _PAGE_NO_CACHE. We also only do that + * and we avoid _PAGE_SPECIAL and cache inhibited pte. We also only do that * on userspace PTEs */ static inline int pte_looks_normal(pte_t pte) { #if defined(CONFIG_PPC_BOOK3S_64) - if ((pte_val(pte) & - (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE)) == - _PAGE_PRESENT) { + if ((pte_val(pte) & (_PAGE_PRESENT | _PAGE_SPECIAL)) == _PAGE_PRESENT) { + if (pte_ci(pte)) + return 0; if (pte_user(pte)) return 1; } diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index ea60a24c17a6..b3040b4535da 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -167,10 +167,6 @@ void __iomem * __ioremap_at(phys_addr_t pa, void *ea, unsigned long size, if ((flags & _PAGE_PRESENT) == 0) flags |= pgprot_val(PAGE_KERNEL); - /* Non-cacheable page cannot be coherent */ - if (flags & _PAGE_NO_CACHE) - flags &= ~_PAGE_COHERENT; - /* We don't support the 4K PFN hack with ioremap */ if (flags & _PAGE_4K_PFN) return NULL; diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 2415a0d31f8f..0d4608990702 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -152,10 +152,6 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group, /* Exact = 0 */ flags = 0; - /* Make pHyp happy */ - if ((rflags & _PAGE_NO_CACHE) && !(rflags & _PAGE_WRITETHRU)) - hpte_r &= ~HPTE_R_M; - if (firmware_has_feature(FW_FEATURE_XCMO) && !(hpte_r & HPTE_R_N)) flags |= H_COALESCE_CAND; -- cgit v1.2.3 From 50de596de8be6de75401a2190b90a822e8a1bab2 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 29 Apr 2016 23:25:43 +1000 Subject: powerpc/mm/hash: Add support for Power9 Hash PowerISA 3.0 adds a parition table indexed by LPID. Parition table allows us to specify the MMU model that will be used for guest and host translation. This patch adds support with SLB based hash model (UPRT = 0). What is required with this model is to support the new hash page table entry format and also setup partition table such that we use hash table for address translation. We don't have segment table support yet. In order to make sure we don't load KVM module on Power9 (since we don't have kvm support yet) this patch also disables KVM on Power9. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/mmu-hash.h | 13 +++++++-- arch/powerpc/kvm/book3s_hv.c | 6 ++++ arch/powerpc/kvm/book3s_pr.c | 6 +++- arch/powerpc/mm/hash_native_64.c | 11 ++++++- arch/powerpc/mm/hash_utils_64.c | 42 +++++++++++++++++++++++++-- arch/powerpc/mm/pgtable_64.c | 7 +++++ arch/powerpc/platforms/ps3/htab.c | 2 +- arch/powerpc/platforms/pseries/lpar.c | 2 +- 8 files changed, 81 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index ce73736b42db..843b5d839904 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -78,6 +78,10 @@ #define HPTE_V_SECONDARY ASM_CONST(0x0000000000000002) #define HPTE_V_VALID ASM_CONST(0x0000000000000001) +/* + * ISA 3.0 have a different HPTE format. + */ +#define HPTE_R_3_0_SSIZE_SHIFT 58 #define HPTE_R_PP0 ASM_CONST(0x8000000000000000) #define HPTE_R_TS ASM_CONST(0x4000000000000000) #define HPTE_R_KEY_HI ASM_CONST(0x3000000000000000) @@ -224,7 +228,8 @@ static inline unsigned long hpte_encode_avpn(unsigned long vpn, int psize, */ v = (vpn >> (23 - VPN_SHIFT)) & ~(mmu_psize_defs[psize].avpnm); v <<= HPTE_V_AVPN_SHIFT; - v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT; + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT; return v; } @@ -248,8 +253,12 @@ static inline unsigned long hpte_encode_v(unsigned long vpn, int base_psize, * aligned for the requested page size */ static inline unsigned long hpte_encode_r(unsigned long pa, int base_psize, - int actual_psize) + int actual_psize, int ssize) { + + if (cpu_has_feature(CPU_FTR_ARCH_300)) + pa |= ((unsigned long) ssize) << HPTE_R_3_0_SSIZE_SHIFT; + /* A 4K page needs no special encoding */ if (actual_psize == MMU_PAGE_4K) return pa & HPTE_R_RPN; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 84fb4fcfaa41..4cd37b498fbd 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -3271,6 +3271,12 @@ static int kvmppc_core_check_processor_compat_hv(void) if (!cpu_has_feature(CPU_FTR_HVMODE) || !cpu_has_feature(CPU_FTR_ARCH_206)) return -EIO; + /* + * Disable KVM for Power9, untill the required bits merged. + */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) + return -EIO; + return 0; } diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 95bceca8f40e..ffbaf40b7f31 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1683,7 +1683,11 @@ static void kvmppc_core_destroy_vm_pr(struct kvm *kvm) static int kvmppc_core_check_processor_compat_pr(void) { - /* we are always compatible */ + /* + * Disable KVM for Power9 untill the required bits merged. + */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) + return -EIO; return 0; } diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 8eaac81347fd..d873f6507f72 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -221,7 +221,7 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn, return -1; hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID; - hpte_r = hpte_encode_r(pa, psize, apsize) | rflags; + hpte_r = hpte_encode_r(pa, psize, apsize, ssize) | rflags; if (!(vflags & HPTE_V_BOLTED)) { DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n", @@ -719,6 +719,12 @@ static void native_flush_hash_range(unsigned long number, int local) local_irq_restore(flags); } +static int native_update_partition_table(u64 patb1) +{ + partition_tb->patb1 = cpu_to_be64(patb1); + return 0; +} + void __init hpte_init_native(void) { ppc_md.hpte_invalidate = native_hpte_invalidate; @@ -729,4 +735,7 @@ void __init hpte_init_native(void) ppc_md.hpte_clear_all = native_hpte_clear; ppc_md.flush_hash_range = native_flush_hash_range; ppc_md.hugepage_invalidate = native_hugepage_invalidate; + + if (cpu_has_feature(CPU_FTR_ARCH_300)) + ppc_md.update_partition_table = native_update_partition_table; } diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index eb928d86ce01..f76a033d1e16 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -676,6 +676,41 @@ int remove_section_mapping(unsigned long start, unsigned long end) } #endif /* CONFIG_MEMORY_HOTPLUG */ +static void __init hash_init_partition_table(phys_addr_t hash_table, + unsigned long pteg_count) +{ + unsigned long ps_field; + unsigned long htab_size; + unsigned long patb_size = 1UL << PATB_SIZE_SHIFT; + + /* + * slb llp encoding for the page size used in VPM real mode. + * We can ignore that for lpid 0 + */ + ps_field = 0; + htab_size = __ilog2(pteg_count) - 11; + + BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large."); + partition_tb = __va(memblock_alloc_base(patb_size, patb_size, + MEMBLOCK_ALLOC_ANYWHERE)); + + /* Initialize the Partition Table with no entries */ + memset((void *)partition_tb, 0, patb_size); + partition_tb->patb0 = cpu_to_be64(ps_field | hash_table | htab_size); + /* + * FIXME!! This should be done via update_partition table + * For now UPRT is 0 for us. + */ + partition_tb->patb1 = 0; + DBG("Partition table %p\n", partition_tb); + /* + * update partition table control register, + * 64 K size. + */ + mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12)); + +} + static void __init htab_initialize(void) { unsigned long table; @@ -744,8 +779,11 @@ static void __init htab_initialize(void) /* Initialize the HPT with no entries */ memset((void *)table, 0, htab_size_bytes); - /* Set SDR1 */ - mtspr(SPRN_SDR1, _SDR1); + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + /* Set SDR1 */ + mtspr(SPRN_SDR1, _SDR1); + else + hash_init_partition_table(table, pteg_count); } prot = pgprot_val(PAGE_KERNEL); diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 177e4893438c..8f3b2942fba8 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -69,6 +69,13 @@ #endif #endif +#ifdef CONFIG_PPC_BOOK3S_64 +/* + * partition table and process table for ISA 3.0 + */ +struct prtb_entry *process_tb; +struct patb_entry *partition_tb; +#endif unsigned long ioremap_bot = IOREMAP_BASE; #ifdef CONFIG_PPC_MMU_NOHASH diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c index 2f95d33cf34a..c9a3e677192a 100644 --- a/arch/powerpc/platforms/ps3/htab.c +++ b/arch/powerpc/platforms/ps3/htab.c @@ -63,7 +63,7 @@ static long ps3_hpte_insert(unsigned long hpte_group, unsigned long vpn, vflags &= ~HPTE_V_SECONDARY; hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID; - hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize, apsize) | rflags; + hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize, apsize, ssize) | rflags; spin_lock_irqsave(&ps3_htab_lock, flags); diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 0d4608990702..e4ceba2b1551 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -139,7 +139,7 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group, hpte_group, vpn, pa, rflags, vflags, psize); hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID; - hpte_r = hpte_encode_r(pa, psize, apsize) | rflags; + hpte_r = hpte_encode_r(pa, psize, apsize, ssize) | rflags; if (!(vflags & HPTE_V_BOLTED)) pr_devel(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r); -- cgit v1.2.3 From 945537df7a107e0dad7b0d3d14df46cfba46f4c5 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 29 Apr 2016 23:25:45 +1000 Subject: powerpc/mm/book3s: Rename hash specific PTE bits to carry H_ prefix This helps to make following hash only pte bits easier. We have kept _PAGE_CHG_MASK, _HPAGE_CHG_MASK and _PAGE_PROT_BITS as it is in this patch eventhough they use hash specific bits. Using them in radix as it is should be ok, because with radix we expect those bit positions to be zero. Only renames in this patch, no change in functionality. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/hash-4k.h | 12 ++++-- arch/powerpc/include/asm/book3s/64/hash-64k.h | 38 +++++++++++-------- arch/powerpc/include/asm/book3s/64/hash.h | 38 +++++++++---------- arch/powerpc/include/asm/book3s/64/pgtable.h | 4 +- arch/powerpc/include/asm/kvm_book3s_64.h | 4 +- arch/powerpc/include/asm/pte-common.h | 4 ++ arch/powerpc/mm/hash64_4k.c | 21 ++++++----- arch/powerpc/mm/hash64_64k.c | 53 ++++++++++++++------------- arch/powerpc/mm/hash_utils_64.c | 8 ++-- arch/powerpc/mm/hugepage-hash64.c | 14 +++---- arch/powerpc/mm/hugetlbpage-hash64.c | 20 +++++----- arch/powerpc/mm/pgtable_64.c | 8 ++-- arch/powerpc/mm/tlb_hash64.c | 4 +- 13 files changed, 121 insertions(+), 107 deletions(-) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index 772850e517f3..2f818cbd8aa6 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -48,10 +48,14 @@ #define PGD_MASKED_BITS 0 /* PTE flags to conserve for HPTE identification */ -#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | \ - _PAGE_F_SECOND | _PAGE_F_GIX) - -#define _PAGE_4K_PFN 0 +#define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_HASHPTE | \ + H_PAGE_F_SECOND | H_PAGE_F_GIX) +/* + * Not supported by 4k linux page size + */ +#define H_PAGE_4K_PFN 0x0 +#define H_PAGE_THP_HUGE 0x0 +#define H_PAGE_COMBO 0x0 #ifndef __ASSEMBLY__ /* * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range() diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index ecc7ce0f4baf..607cf3219d88 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -29,17 +29,23 @@ #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) -#define _PAGE_COMBO 0x00001000 /* this is a combo 4k page */ -#define _PAGE_4K_PFN 0x00002000 /* PFN is for a single 4k page */ +#define H_PAGE_COMBO 0x00001000 /* this is a combo 4k page */ +#define H_PAGE_4K_PFN 0x00002000 /* PFN is for a single 4k page */ /* - * Used to track subpage group valid if _PAGE_COMBO is set - * This overloads _PAGE_F_GIX and _PAGE_F_SECOND + * We need to differentiate between explicit huge page and THP huge + * page, since THP huge page also need to track real subpage details */ -#define _PAGE_COMBO_VALID (_PAGE_F_GIX | _PAGE_F_SECOND) +#define H_PAGE_THP_HUGE H_PAGE_4K_PFN + +/* + * Used to track subpage group valid if H_PAGE_COMBO is set + * This overloads H_PAGE_F_GIX and H_PAGE_F_SECOND + */ +#define H_PAGE_COMBO_VALID (H_PAGE_F_GIX | H_PAGE_F_SECOND) /* PTE flags to conserve for HPTE identification */ -#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_F_SECOND | \ - _PAGE_F_GIX | _PAGE_HASHPTE | _PAGE_COMBO) +#define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_F_SECOND | \ + H_PAGE_F_GIX | H_PAGE_HASHPTE | H_PAGE_COMBO) /* * we support 16 fragments per PTE page of 64K size. */ @@ -75,9 +81,9 @@ static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep) rpte.pte = pte; rpte.hidx = 0; - if (pte_val(pte) & _PAGE_COMBO) { + if (pte_val(pte) & H_PAGE_COMBO) { /* - * Make sure we order the hidx load against the _PAGE_COMBO + * Make sure we order the hidx load against the H_PAGE_COMBO * check. The store side ordering is done in __hash_page_4K */ smp_rmb(); @@ -89,9 +95,9 @@ static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep) static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index) { - if ((pte_val(rpte.pte) & _PAGE_COMBO)) + if ((pte_val(rpte.pte) & H_PAGE_COMBO)) return (rpte.hidx >> (index<<2)) & 0xf; - return (pte_val(rpte.pte) >> _PAGE_F_GIX_SHIFT) & 0xf; + return (pte_val(rpte.pte) >> H_PAGE_F_GIX_SHIFT) & 0xf; } #define __rpte_to_pte(r) ((r).pte) @@ -114,7 +120,7 @@ extern bool __rpte_sub_valid(real_pte_t rpte, unsigned long index); #define pte_iterate_hashed_end() } while(0); } } while(0) #define pte_pagesize_index(mm, addr, pte) \ - (((pte) & _PAGE_COMBO)? MMU_PAGE_4K: MMU_PAGE_64K) + (((pte) & H_PAGE_COMBO)? MMU_PAGE_4K: MMU_PAGE_64K) extern int remap_pfn_range(struct vm_area_struct *, unsigned long addr, unsigned long pfn, unsigned long size, pgprot_t); @@ -126,7 +132,7 @@ static inline int remap_4k_pfn(struct vm_area_struct *vma, unsigned long addr, return -EINVAL; } return remap_pfn_range(vma, addr, pfn, PAGE_SIZE, - __pgprot(pgprot_val(prot) | _PAGE_4K_PFN)); + __pgprot(pgprot_val(prot) | H_PAGE_4K_PFN)); } #define PTE_TABLE_SIZE PTE_FRAG_SIZE @@ -255,8 +261,8 @@ static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array, */ static inline int pmd_trans_huge(pmd_t pmd) { - return !!((pmd_val(pmd) & (_PAGE_PTE | _PAGE_THP_HUGE)) == - (_PAGE_PTE | _PAGE_THP_HUGE)); + return !!((pmd_val(pmd) & (_PAGE_PTE | H_PAGE_THP_HUGE)) == + (_PAGE_PTE | H_PAGE_THP_HUGE)); } static inline int pmd_large(pmd_t pmd) @@ -280,7 +286,7 @@ static inline int __pmdp_test_and_clear_young(struct mm_struct *mm, { unsigned long old; - if ((pmd_val(*pmdp) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0) + if ((pmd_val(*pmdp) & (_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0) return 0; old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0); return ((old & _PAGE_ACCESSED) != 0); diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 4fffc963ac14..556670212781 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -26,19 +26,22 @@ #define _PAGE_TOLERANT 0x00030 /* tolerant memory, cache inhibited */ #define _PAGE_DIRTY 0x00080 /* C: page changed */ #define _PAGE_ACCESSED 0x00100 /* R: page referenced */ -#define _PAGE_SPECIAL 0x00400 /* software: special page */ -#define _PAGE_BUSY 0x00800 /* software: PTE & hash are busy */ - +/* + * Software bits + */ #ifdef CONFIG_MEM_SOFT_DIRTY -#define _PAGE_SOFT_DIRTY 0x200 /* software: software dirty tracking */ +#define _PAGE_SOFT_DIRTY 0x00200 /* software: software dirty tracking */ #else -#define _PAGE_SOFT_DIRTY 0x000 +#define _PAGE_SOFT_DIRTY 0x00000 #endif +#define _PAGE_SPECIAL 0x00400 /* software: special page */ +#define H_PAGE_BUSY 0x00800 /* software: PTE & hash are busy */ + -#define _PAGE_F_GIX_SHIFT 57 -#define _PAGE_F_GIX (7ul << 57) /* HPTE index within HPTEG */ -#define _PAGE_F_SECOND (1ul << 60) /* HPTE is in 2ndary HPTEG */ -#define _PAGE_HASHPTE (1ul << 61) /* PTE has associated HPTE */ +#define H_PAGE_F_GIX_SHIFT 57 +#define H_PAGE_F_GIX (7ul << 57) /* HPTE index within HPTEG */ +#define H_PAGE_F_SECOND (1ul << 60) /* HPTE is in 2ndary HPTEG */ +#define H_PAGE_HASHPTE (1ul << 61) /* PTE has associated HPTE */ #define _PAGE_PTE (1ul << 62) /* distinguishes PTEs from pointers */ #define _PAGE_PRESENT (1ul << 63) /* pte contains a translation */ /* @@ -47,11 +50,6 @@ * maps CI pte mapping. */ #define _PAGE_NO_CACHE _PAGE_TOLERANT -/* - * We need to differentiate between explicit huge page and THP huge - * page, since THP huge page also need to track real subpage details - */ -#define _PAGE_THP_HUGE _PAGE_4K_PFN /* * We support 57 bit real address in pte. Clear everything above 57, and * every thing below PAGE_SHIFT; @@ -61,7 +59,7 @@ * set of bits not changed in pmd_modify. */ #define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \ - _PAGE_ACCESSED | _PAGE_THP_HUGE | _PAGE_PTE | \ + _PAGE_ACCESSED | H_PAGE_THP_HUGE | _PAGE_PTE | \ _PAGE_SOFT_DIRTY) @@ -148,7 +146,7 @@ * Mask of bits returned by pte_pgprot() */ #define PAGE_PROT_BITS (_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT | \ - _PAGE_4K_PFN | _PAGE_PRIVILEGED | _PAGE_ACCESSED | \ + H_PAGE_4K_PFN | _PAGE_PRIVILEGED | _PAGE_ACCESSED | \ _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_EXEC | \ _PAGE_SOFT_DIRTY) /* @@ -262,14 +260,14 @@ static inline unsigned long pte_update(struct mm_struct *mm, bne- 1b" : "=&r" (old_be), "=&r" (tmp_be), "=m" (*ptep) : "r" (ptep), "r" (cpu_to_be64(clr)), "m" (*ptep), - "r" (cpu_to_be64(_PAGE_BUSY)), "r" (cpu_to_be64(set)) + "r" (cpu_to_be64(H_PAGE_BUSY)), "r" (cpu_to_be64(set)) : "cc" ); /* huge pages use the old page table lock */ if (!huge) assert_pte_locked(mm, addr); old = be64_to_cpu(old_be); - if (old & _PAGE_HASHPTE) + if (old & H_PAGE_HASHPTE) hpte_need_flush(mm, addr, ptep, old, huge); return old; @@ -287,7 +285,7 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm, { unsigned long old; - if ((pte_val(*ptep) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0) + if ((pte_val(*ptep) & (_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0) return 0; old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0); return (old & _PAGE_ACCESSED) != 0; @@ -355,7 +353,7 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry) stdcx. %0,0,%4\n\ bne- 1b" :"=&r" (old), "=&r" (tmp), "=m" (*ptep) - :"r" (val), "r" (ptep), "m" (*ptep), "r" (cpu_to_be64(_PAGE_BUSY)) + :"r" (val), "r" (ptep), "m" (*ptep), "r" (cpu_to_be64(H_PAGE_BUSY)) :"cc"); } diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 4745d314acfb..65eb819609b1 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -45,7 +45,7 @@ #define __real_pte(e,p) ((real_pte_t){(e)}) #define __rpte_to_pte(r) ((r).pte) -#define __rpte_to_hidx(r,index) (pte_val(__rpte_to_pte(r)) >>_PAGE_F_GIX_SHIFT) +#define __rpte_to_hidx(r,index) (pte_val(__rpte_to_pte(r)) >> H_PAGE_F_GIX_SHIFT) #define pte_iterate_hashed_subpages(rpte, psize, va, index, shift) \ do { \ @@ -287,7 +287,7 @@ static inline int pmd_protnone(pmd_t pmd) static inline pmd_t pmd_mkhuge(pmd_t pmd) { - return __pmd(pmd_val(pmd) | (_PAGE_PTE | _PAGE_THP_HUGE)); + return __pmd(pmd_val(pmd) | (_PAGE_PTE | H_PAGE_THP_HUGE)); } #define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 3c380c247d4e..1f4497fb5b83 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -310,9 +310,9 @@ static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing) */ old_pte = READ_ONCE(*ptep); /* - * wait until _PAGE_BUSY is clear then set it atomically + * wait until H_PAGE_BUSY is clear then set it atomically */ - if (unlikely(pte_val(old_pte) & _PAGE_BUSY)) { + if (unlikely(pte_val(old_pte) & H_PAGE_BUSY)) { cpu_relax(); continue; } diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h index 5f457535573b..2eeaf80d41b7 100644 --- a/arch/powerpc/include/asm/pte-common.h +++ b/arch/powerpc/include/asm/pte-common.h @@ -206,3 +206,7 @@ static inline bool pte_user(pte_t pte) #define _PAGE_READ 0 #define _PAGE_WRITE _PAGE_RW #endif + +#ifndef H_PAGE_4K_PFN +#define H_PAGE_4K_PFN 0 +#endif diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c index 529e49204f6b..6333b273d2d5 100644 --- a/arch/powerpc/mm/hash64_4k.c +++ b/arch/powerpc/mm/hash64_4k.c @@ -34,7 +34,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, old_pte = pte_val(pte); /* If PTE busy, retry the access */ - if (unlikely(old_pte & _PAGE_BUSY)) + if (unlikely(old_pte & H_PAGE_BUSY)) return 0; /* If PTE permissions don't match, take page fault */ if (unlikely(!check_pte_access(access, old_pte))) @@ -42,9 +42,9 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, /* * Try to lock the PTE, add ACCESSED and DIRTY if it was * a write access. Since this is 4K insert of 64K page size - * also add _PAGE_COMBO + * also add H_PAGE_COMBO */ - new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED; + new_pte = old_pte | H_PAGE_BUSY | _PAGE_ACCESSED; if (access & _PAGE_WRITE) new_pte |= _PAGE_DIRTY; } while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte))); @@ -60,22 +60,22 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap); vpn = hpt_vpn(ea, vsid, ssize); - if (unlikely(old_pte & _PAGE_HASHPTE)) { + if (unlikely(old_pte & H_PAGE_HASHPTE)) { /* * There MIGHT be an HPTE for this pte */ hash = hpt_hash(vpn, shift, ssize); - if (old_pte & _PAGE_F_SECOND) + if (old_pte & H_PAGE_F_SECOND) hash = ~hash; slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; - slot += (old_pte & _PAGE_F_GIX) >> _PAGE_F_GIX_SHIFT; + slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT; if (ppc_md.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_4K, MMU_PAGE_4K, ssize, flags) == -1) old_pte &= ~_PAGE_HPTEFLAGS; } - if (likely(!(old_pte & _PAGE_HASHPTE))) { + if (likely(!(old_pte & H_PAGE_HASHPTE))) { pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT; hash = hpt_hash(vpn, shift, ssize); @@ -115,9 +115,10 @@ repeat: MMU_PAGE_4K, MMU_PAGE_4K, old_pte); return -1; } - new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; - new_pte |= (slot << _PAGE_F_GIX_SHIFT) & (_PAGE_F_SECOND | _PAGE_F_GIX); + new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE; + new_pte |= (slot << H_PAGE_F_GIX_SHIFT) & + (H_PAGE_F_SECOND | H_PAGE_F_GIX); } - *ptep = __pte(new_pte & ~_PAGE_BUSY); + *ptep = __pte(new_pte & ~H_PAGE_BUSY); return 0; } diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c index 675331083728..16644e1f4e6b 100644 --- a/arch/powerpc/mm/hash64_64k.c +++ b/arch/powerpc/mm/hash64_64k.c @@ -23,7 +23,7 @@ bool __rpte_sub_valid(real_pte_t rpte, unsigned long index) unsigned long g_idx; unsigned long ptev = pte_val(rpte.pte); - g_idx = (ptev & _PAGE_COMBO_VALID) >> _PAGE_F_GIX_SHIFT; + g_idx = (ptev & H_PAGE_COMBO_VALID) >> H_PAGE_F_GIX_SHIFT; index = index >> 2; if (g_idx & (0x1 << index)) return true; @@ -37,12 +37,12 @@ static unsigned long mark_subptegroup_valid(unsigned long ptev, unsigned long in { unsigned long g_idx; - if (!(ptev & _PAGE_COMBO)) + if (!(ptev & H_PAGE_COMBO)) return ptev; index = index >> 2; g_idx = 0x1 << index; - return ptev | (g_idx << _PAGE_F_GIX_SHIFT); + return ptev | (g_idx << H_PAGE_F_GIX_SHIFT); } int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, @@ -66,7 +66,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, old_pte = pte_val(pte); /* If PTE busy, retry the access */ - if (unlikely(old_pte & _PAGE_BUSY)) + if (unlikely(old_pte & H_PAGE_BUSY)) return 0; /* If PTE permissions don't match, take page fault */ if (unlikely(!check_pte_access(access, old_pte))) @@ -74,9 +74,9 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, /* * Try to lock the PTE, add ACCESSED and DIRTY if it was * a write access. Since this is 4K insert of 64K page size - * also add _PAGE_COMBO + * also add H_PAGE_COMBO */ - new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED | _PAGE_COMBO; + new_pte = old_pte | H_PAGE_BUSY | _PAGE_ACCESSED | H_PAGE_COMBO; if (access & _PAGE_WRITE) new_pte |= _PAGE_DIRTY; } while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte))); @@ -103,21 +103,21 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, /* *None of the sub 4k page is hashed */ - if (!(old_pte & _PAGE_HASHPTE)) + if (!(old_pte & H_PAGE_HASHPTE)) goto htab_insert_hpte; /* * Check if the pte was already inserted into the hash table * as a 64k HW page, and invalidate the 64k HPTE if so. */ - if (!(old_pte & _PAGE_COMBO)) { + if (!(old_pte & H_PAGE_COMBO)) { flush_hash_page(vpn, rpte, MMU_PAGE_64K, ssize, flags); /* * clear the old slot details from the old and new pte. * On hash insert failure we use old pte value and we don't * want slot information there if we have a insert failure. */ - old_pte &= ~(_PAGE_HASHPTE | _PAGE_F_GIX | _PAGE_F_SECOND); - new_pte &= ~(_PAGE_HASHPTE | _PAGE_F_GIX | _PAGE_F_SECOND); + old_pte &= ~(H_PAGE_HASHPTE | H_PAGE_F_GIX | H_PAGE_F_SECOND); + new_pte &= ~(H_PAGE_HASHPTE | H_PAGE_F_GIX | H_PAGE_F_SECOND); goto htab_insert_hpte; } /* @@ -143,15 +143,15 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, if (ret == -1) goto htab_insert_hpte; - *ptep = __pte(new_pte & ~_PAGE_BUSY); + *ptep = __pte(new_pte & ~H_PAGE_BUSY); return 0; } htab_insert_hpte: /* - * handle _PAGE_4K_PFN case + * handle H_PAGE_4K_PFN case */ - if (old_pte & _PAGE_4K_PFN) { + if (old_pte & H_PAGE_4K_PFN) { /* * All the sub 4k page have the same * physical address. @@ -199,20 +199,20 @@ repeat: } /* * Insert slot number & secondary bit in PTE second half, - * clear _PAGE_BUSY and set appropriate HPTE slot bit - * Since we have _PAGE_BUSY set on ptep, we can be sure + * clear H_PAGE_BUSY and set appropriate HPTE slot bit + * Since we have H_PAGE_BUSY set on ptep, we can be sure * nobody is undating hidx. */ hidxp = (unsigned long *)(ptep + PTRS_PER_PTE); rpte.hidx &= ~(0xfUL << (subpg_index << 2)); *hidxp = rpte.hidx | (slot << (subpg_index << 2)); new_pte = mark_subptegroup_valid(new_pte, subpg_index); - new_pte |= _PAGE_HASHPTE; + new_pte |= H_PAGE_HASHPTE; /* * check __real_pte for details on matching smp_rmb() */ smp_wmb(); - *ptep = __pte(new_pte & ~_PAGE_BUSY); + *ptep = __pte(new_pte & ~H_PAGE_BUSY); return 0; } @@ -234,7 +234,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access, old_pte = pte_val(pte); /* If PTE busy, retry the access */ - if (unlikely(old_pte & _PAGE_BUSY)) + if (unlikely(old_pte & H_PAGE_BUSY)) return 0; /* If PTE permissions don't match, take page fault */ if (unlikely(!check_pte_access(access, old_pte))) @@ -250,7 +250,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access, * Try to lock the PTE, add ACCESSED and DIRTY if it was * a write access. */ - new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED; + new_pte = old_pte | H_PAGE_BUSY | _PAGE_ACCESSED; if (access & _PAGE_WRITE) new_pte |= _PAGE_DIRTY; } while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte))); @@ -262,22 +262,22 @@ int __hash_page_64K(unsigned long ea, unsigned long access, rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap); vpn = hpt_vpn(ea, vsid, ssize); - if (unlikely(old_pte & _PAGE_HASHPTE)) { + if (unlikely(old_pte & H_PAGE_HASHPTE)) { /* * There MIGHT be an HPTE for this pte */ hash = hpt_hash(vpn, shift, ssize); - if (old_pte & _PAGE_F_SECOND) + if (old_pte & H_PAGE_F_SECOND) hash = ~hash; slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; - slot += (old_pte & _PAGE_F_GIX) >> _PAGE_F_GIX_SHIFT; + slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT; if (ppc_md.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_64K, MMU_PAGE_64K, ssize, flags) == -1) old_pte &= ~_PAGE_HPTEFLAGS; } - if (likely(!(old_pte & _PAGE_HASHPTE))) { + if (likely(!(old_pte & H_PAGE_HASHPTE))) { pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT; hash = hpt_hash(vpn, shift, ssize); @@ -317,9 +317,10 @@ repeat: MMU_PAGE_64K, MMU_PAGE_64K, old_pte); return -1; } - new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; - new_pte |= (slot << _PAGE_F_GIX_SHIFT) & (_PAGE_F_SECOND | _PAGE_F_GIX); + new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE; + new_pte |= (slot << H_PAGE_F_GIX_SHIFT) & + (H_PAGE_F_SECOND | H_PAGE_F_GIX); } - *ptep = __pte(new_pte & ~_PAGE_BUSY); + *ptep = __pte(new_pte & ~H_PAGE_BUSY); return 0; } diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index f76a033d1e16..2a193f28b6c7 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1172,8 +1172,8 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, #endif /* Do actual hashing */ #ifdef CONFIG_PPC_64K_PAGES - /* If _PAGE_4K_PFN is set, make sure this is a 4k segment */ - if ((pte_val(*ptep) & _PAGE_4K_PFN) && psize == MMU_PAGE_64K) { + /* If H_PAGE_4K_PFN is set, make sure this is a 4k segment */ + if ((pte_val(*ptep) & H_PAGE_4K_PFN) && psize == MMU_PAGE_64K) { demote_segment_4k(mm, ea); psize = MMU_PAGE_4K; } @@ -1335,13 +1335,13 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, WARN_ON(hugepage_shift); #ifdef CONFIG_PPC_64K_PAGES - /* If either _PAGE_4K_PFN or cache inhibited is set (and we are on + /* If either H_PAGE_4K_PFN or cache inhibited is set (and we are on * a 64K kernel), then we don't preload, hash_page() will take * care of it once we actually try to access the page. * That way we don't have to duplicate all of the logic for segment * page size demotion here */ - if ((pte_val(*ptep) & _PAGE_4K_PFN) || pte_ci(*ptep)) + if ((pte_val(*ptep) & H_PAGE_4K_PFN) || pte_ci(*ptep)) goto out_exit; #endif /* CONFIG_PPC_64K_PAGES */ diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index 6cb6bdd254bb..ba3fc229468a 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -37,7 +37,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, old_pmd = pmd_val(pmd); /* If PMD busy, retry the access */ - if (unlikely(old_pmd & _PAGE_BUSY)) + if (unlikely(old_pmd & H_PAGE_BUSY)) return 0; /* If PMD permissions don't match, take page fault */ if (unlikely(!check_pte_access(access, old_pmd))) @@ -46,7 +46,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, * Try to lock the PTE, add ACCESSED and DIRTY if it was * a write access */ - new_pmd = old_pmd | _PAGE_BUSY | _PAGE_ACCESSED; + new_pmd = old_pmd | H_PAGE_BUSY | _PAGE_ACCESSED; if (access & _PAGE_WRITE) new_pmd |= _PAGE_DIRTY; } while (!pmd_xchg(pmdp, __pmd(old_pmd), __pmd(new_pmd))); @@ -78,7 +78,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, * base page size. This is because demote_segment won't flush * hash page table entries. */ - if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO)) { + if ((old_pmd & H_PAGE_HASHPTE) && !(old_pmd & H_PAGE_COMBO)) { flush_hash_hugepage(vsid, ea, pmdp, MMU_PAGE_64K, ssize, flags); /* @@ -125,7 +125,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, hash = hpt_hash(vpn, shift, ssize); /* insert new entry */ pa = pmd_pfn(__pmd(old_pmd)) << PAGE_SHIFT; - new_pmd |= _PAGE_HASHPTE; + new_pmd |= H_PAGE_HASHPTE; repeat: hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; @@ -169,17 +169,17 @@ repeat: mark_hpte_slot_valid(hpte_slot_array, index, slot); } /* - * Mark the pte with _PAGE_COMBO, if we are trying to hash it with + * Mark the pte with H_PAGE_COMBO, if we are trying to hash it with * base page size 4k. */ if (psize == MMU_PAGE_4K) - new_pmd |= _PAGE_COMBO; + new_pmd |= H_PAGE_COMBO; /* * The hpte valid is stored in the pgtable whose address is in the * second half of the PMD. Order this against clearing of the busy bit in * huge pmd. */ smp_wmb(); - *pmdp = __pmd(new_pmd & ~_PAGE_BUSY); + *pmdp = __pmd(new_pmd & ~H_PAGE_BUSY); return 0; } diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c index bf9078440256..3058560b6121 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/hugetlbpage-hash64.c @@ -47,7 +47,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, do { old_pte = pte_val(*ptep); /* If PTE busy, retry the access */ - if (unlikely(old_pte & _PAGE_BUSY)) + if (unlikely(old_pte & H_PAGE_BUSY)) return 0; /* If PTE permissions don't match, take page fault */ if (unlikely(!check_pte_access(access, old_pte))) @@ -55,7 +55,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, /* Try to lock the PTE, add ACCESSED and DIRTY if it was * a write access */ - new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED; + new_pte = old_pte | H_PAGE_BUSY | _PAGE_ACCESSED; if (access & _PAGE_WRITE) new_pte |= _PAGE_DIRTY; } while(!pte_xchg(ptep, __pte(old_pte), __pte(new_pte))); @@ -69,28 +69,28 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap); /* Check if pte already has an hpte (case 2) */ - if (unlikely(old_pte & _PAGE_HASHPTE)) { + if (unlikely(old_pte & H_PAGE_HASHPTE)) { /* There MIGHT be an HPTE for this pte */ unsigned long hash, slot; hash = hpt_hash(vpn, shift, ssize); - if (old_pte & _PAGE_F_SECOND) + if (old_pte & H_PAGE_F_SECOND) hash = ~hash; slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; - slot += (old_pte & _PAGE_F_GIX) >> _PAGE_F_GIX_SHIFT; + slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT; if (ppc_md.hpte_updatepp(slot, rflags, vpn, mmu_psize, mmu_psize, ssize, flags) == -1) old_pte &= ~_PAGE_HPTEFLAGS; } - if (likely(!(old_pte & _PAGE_HASHPTE))) { + if (likely(!(old_pte & H_PAGE_HASHPTE))) { unsigned long hash = hpt_hash(vpn, shift, ssize); pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT; /* clear HPTE slot informations in new PTE */ - new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; + new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE; slot = hpte_insert_repeating(hash, vpn, pa, rflags, 0, mmu_psize, ssize); @@ -106,14 +106,14 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, return -1; } - new_pte |= (slot << _PAGE_F_GIX_SHIFT) & - (_PAGE_F_SECOND | _PAGE_F_GIX); + new_pte |= (slot << H_PAGE_F_GIX_SHIFT) & + (H_PAGE_F_SECOND | H_PAGE_F_GIX); } /* * No need to use ldarx/stdcx here */ - *ptep = __pte(new_pte & ~_PAGE_BUSY); + *ptep = __pte(new_pte & ~H_PAGE_BUSY); return 0; } diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 1bfb112e1453..1fe356cad7f6 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -92,7 +92,7 @@ void __iomem * __ioremap_at(phys_addr_t pa, void *ea, unsigned long size, flags |= pgprot_val(PAGE_KERNEL); /* We don't support the 4K PFN hack with ioremap */ - if (flags & _PAGE_4K_PFN) + if (flags & H_PAGE_4K_PFN) return NULL; WARN_ON(pa & ~PAGE_MASK); @@ -462,13 +462,13 @@ unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, bne- 1b" : "=&r" (old_be), "=&r" (tmp), "=m" (*pmdp) : "r" (pmdp), "r" (cpu_to_be64(clr)), "m" (*pmdp), - "r" (cpu_to_be64(_PAGE_BUSY)), "r" (cpu_to_be64(set)) + "r" (cpu_to_be64(H_PAGE_BUSY)), "r" (cpu_to_be64(set)) : "cc" ); old = be64_to_cpu(old_be); trace_hugepage_update(addr, old, clr, set); - if (old & _PAGE_HASHPTE) + if (old & H_PAGE_HASHPTE) hpte_do_hugepage_flush(mm, addr, pmdp, old); return old; } @@ -640,7 +640,7 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, psize = get_slice_psize(mm, addr); BUG_ON(psize == MMU_PAGE_16M); #endif - if (old_pmd & _PAGE_COMBO) + if (old_pmd & H_PAGE_COMBO) psize = MMU_PAGE_4K; else psize = MMU_PAGE_64K; diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c index f7b80391bee7..38497cf5e31b 100644 --- a/arch/powerpc/mm/tlb_hash64.c +++ b/arch/powerpc/mm/tlb_hash64.c @@ -218,7 +218,7 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start, pte = pte_val(*ptep); if (is_thp) trace_hugepage_invalidate(start, pte); - if (!(pte & _PAGE_HASHPTE)) + if (!(pte & H_PAGE_HASHPTE)) continue; if (unlikely(is_thp)) hpte_do_hugepage_flush(mm, start, (pmd_t *)ptep, pte); @@ -248,7 +248,7 @@ void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr) start_pte = pte_offset_map(pmd, addr); for (pte = start_pte; pte < start_pte + PTRS_PER_PTE; pte++) { unsigned long pteval = pte_val(*pte); - if (pteval & _PAGE_HASHPTE) + if (pteval & H_PAGE_HASHPTE) hpte_need_flush(mm, addr, pte, pteval, 0); addr += PAGE_SIZE; } -- cgit v1.2.3 From dd1842a2a448bb66d74aa02a550df6be8c25f20b Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 29 Apr 2016 23:25:49 +1000 Subject: powerpc/mm: Make page table size a variable Radix and hash MMU models support different page table sizes. Make the #defines a variable so that existing code can work with variable sizes. Slice related code is only used by hash, so use hash constants there. We will replicate some of the boundary conditions with resepct to TASK_SIZE using radix values too. Right now we do boundary condition check using hash constants. Swapper pgdir size is initialized in asm code. We select the max pgd size to keep it simple. For now we select hash pgdir. When adding radix we will switch that to radix pgdir which is 64K. BUILD_BUG_ON check which is removed is already done in hugepage_init() using MAYBE_BUILD_BUG_ON(). Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/hash-4k.h | 45 ++++---------------- arch/powerpc/include/asm/book3s/64/hash-64k.h | 46 +++++--------------- arch/powerpc/include/asm/book3s/64/hash.h | 14 ++++--- arch/powerpc/include/asm/book3s/64/mmu-hash.h | 4 +- arch/powerpc/include/asm/book3s/64/pgtable.h | 60 +++++++++++++++++++++++++++ arch/powerpc/include/asm/page_64.h | 2 +- arch/powerpc/kernel/asm-offsets.c | 4 ++ arch/powerpc/mm/hash_utils_64.c | 12 ++++++ arch/powerpc/mm/init_64.c | 4 +- arch/powerpc/mm/pgtable-book3e.c | 1 + arch/powerpc/mm/pgtable-hash64.c | 1 + arch/powerpc/mm/pgtable_64.c | 33 ++++++++++----- arch/powerpc/mm/slb_low.S | 2 +- arch/powerpc/mm/slice.c | 4 +- 14 files changed, 135 insertions(+), 97 deletions(-) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index 2f818cbd8aa6..dcb9d6e94a0c 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -5,48 +5,20 @@ * for each page table entry. The PMD and PGD level use a 32b record for * each entry by assuming that each entry is page aligned. */ -#define PTE_INDEX_SIZE 9 -#define PMD_INDEX_SIZE 7 -#define PUD_INDEX_SIZE 9 -#define PGD_INDEX_SIZE 9 +#define H_PTE_INDEX_SIZE 9 +#define H_PMD_INDEX_SIZE 7 +#define H_PUD_INDEX_SIZE 9 +#define H_PGD_INDEX_SIZE 9 #ifndef __ASSEMBLY__ -#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE) -#define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) -#define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE) -#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) -#endif /* __ASSEMBLY__ */ - -#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) -#define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) -#define PTRS_PER_PUD (1 << PUD_INDEX_SIZE) -#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) - -/* PMD_SHIFT determines what a second-level page table entry can map */ -#define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE) -#define PMD_SIZE (1UL << PMD_SHIFT) -#define PMD_MASK (~(PMD_SIZE-1)) +#define H_PTE_TABLE_SIZE (sizeof(pte_t) << H_PTE_INDEX_SIZE) +#define H_PMD_TABLE_SIZE (sizeof(pmd_t) << H_PMD_INDEX_SIZE) +#define H_PUD_TABLE_SIZE (sizeof(pud_t) << H_PUD_INDEX_SIZE) +#define H_PGD_TABLE_SIZE (sizeof(pgd_t) << H_PGD_INDEX_SIZE) /* With 4k base page size, hugepage PTEs go at the PMD level */ #define MIN_HUGEPTE_SHIFT PMD_SHIFT -/* PUD_SHIFT determines what a third-level page table entry can map */ -#define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) -#define PUD_SIZE (1UL << PUD_SHIFT) -#define PUD_MASK (~(PUD_SIZE-1)) - -/* PGDIR_SHIFT determines what a fourth-level page table entry can map */ -#define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE) -#define PGDIR_SIZE (1UL << PGDIR_SHIFT) -#define PGDIR_MASK (~(PGDIR_SIZE-1)) - -/* Bits to mask out from a PMD to get to the PTE page */ -#define PMD_MASKED_BITS 0 -/* Bits to mask out from a PUD to get to the PMD page */ -#define PUD_MASKED_BITS 0 -/* Bits to mask out from a PGD to get to the PUD page */ -#define PGD_MASKED_BITS 0 - /* PTE flags to conserve for HPTE identification */ #define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_HASHPTE | \ H_PAGE_F_SECOND | H_PAGE_F_GIX) @@ -56,7 +28,6 @@ #define H_PAGE_4K_PFN 0x0 #define H_PAGE_THP_HUGE 0x0 #define H_PAGE_COMBO 0x0 -#ifndef __ASSEMBLY__ /* * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range() */ diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index 607cf3219d88..7d2a9b6a55ca 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -1,34 +1,14 @@ #ifndef _ASM_POWERPC_BOOK3S_64_HASH_64K_H #define _ASM_POWERPC_BOOK3S_64_HASH_64K_H -#define PTE_INDEX_SIZE 8 -#define PMD_INDEX_SIZE 5 -#define PUD_INDEX_SIZE 5 -#define PGD_INDEX_SIZE 12 - -#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) -#define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) -#define PTRS_PER_PUD (1 << PUD_INDEX_SIZE) -#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) +#define H_PTE_INDEX_SIZE 8 +#define H_PMD_INDEX_SIZE 5 +#define H_PUD_INDEX_SIZE 5 +#define H_PGD_INDEX_SIZE 12 /* With 4k base page size, hugepage PTEs go at the PMD level */ #define MIN_HUGEPTE_SHIFT PAGE_SHIFT -/* PMD_SHIFT determines what a second-level page table entry can map */ -#define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE) -#define PMD_SIZE (1UL << PMD_SHIFT) -#define PMD_MASK (~(PMD_SIZE-1)) - -/* PUD_SHIFT determines what a third-level page table entry can map */ -#define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) -#define PUD_SIZE (1UL << PUD_SHIFT) -#define PUD_MASK (~(PUD_SIZE-1)) - -/* PGDIR_SHIFT determines what a fourth-level page table entry can map */ -#define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE) -#define PGDIR_SIZE (1UL << PGDIR_SHIFT) -#define PGDIR_MASK (~(PGDIR_SIZE-1)) - #define H_PAGE_COMBO 0x00001000 /* this is a combo 4k page */ #define H_PAGE_4K_PFN 0x00002000 /* PFN is for a single 4k page */ /* @@ -57,13 +37,6 @@ #define PTE_FRAG_SIZE_SHIFT 12 #define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT) -/* Bits to mask out from a PMD to get to the PTE page */ -#define PMD_MASKED_BITS 0xc0000000000000ffUL -/* Bits to mask out from a PUD to get to the PMD page */ -#define PUD_MASKED_BITS 0xc0000000000000ffUL -/* Bits to mask out from a PGD to get to the PUD page */ -#define PGD_MASKED_BITS 0xc0000000000000ffUL - #ifndef __ASSEMBLY__ #include @@ -135,14 +108,15 @@ static inline int remap_4k_pfn(struct vm_area_struct *vma, unsigned long addr, __pgprot(pgprot_val(prot) | H_PAGE_4K_PFN)); } -#define PTE_TABLE_SIZE PTE_FRAG_SIZE +#define H_PTE_TABLE_SIZE PTE_FRAG_SIZE #ifdef CONFIG_TRANSPARENT_HUGEPAGE -#define PMD_TABLE_SIZE ((sizeof(pmd_t) << PMD_INDEX_SIZE) + (sizeof(unsigned long) << PMD_INDEX_SIZE)) +#define H_PMD_TABLE_SIZE ((sizeof(pmd_t) << PMD_INDEX_SIZE) + \ + (sizeof(unsigned long) << PMD_INDEX_SIZE)) #else -#define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) +#define H_PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) #endif -#define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE) -#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) +#define H_PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE) +#define H_PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) #ifdef CONFIG_HUGETLB_PAGE /* diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 04c004d62f49..03044af6f1f5 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -29,14 +29,18 @@ /* * Size of EA range mapped by our pagetables. */ -#define PGTABLE_EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \ - PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT) -#define PGTABLE_RANGE (ASM_CONST(1) << PGTABLE_EADDR_SIZE) +#define H_PGTABLE_EADDR_SIZE (H_PTE_INDEX_SIZE + H_PMD_INDEX_SIZE + \ + H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT) +#define H_PGTABLE_RANGE (ASM_CONST(1) << H_PGTABLE_EADDR_SIZE) #ifdef CONFIG_TRANSPARENT_HUGEPAGE -#define PMD_CACHE_INDEX (PMD_INDEX_SIZE + 1) +/* + * only with hash we need to use the second half of pmd page table + * to store pointer to deposited pgtable_t + */ +#define H_PMD_CACHE_INDEX (H_PMD_INDEX_SIZE + 1) #else -#define PMD_CACHE_INDEX PMD_INDEX_SIZE +#define H_PMD_CACHE_INDEX H_PMD_INDEX_SIZE #endif /* * Define the address range of the kernel non-linear virtual area diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 843b5d839904..7da61b85406b 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -462,7 +462,7 @@ extern void slb_set_size(u16 size); add rt,rt,rx /* 4 bits per slice and we have one slice per 1TB */ -#define SLICE_ARRAY_SIZE (PGTABLE_RANGE >> 41) +#define SLICE_ARRAY_SIZE (H_PGTABLE_RANGE >> 41) #ifndef __ASSEMBLY__ @@ -533,7 +533,7 @@ static inline unsigned long get_vsid(unsigned long context, unsigned long ea, /* * Bad address. We return VSID 0 for that */ - if ((ea & ~REGION_MASK) >= PGTABLE_RANGE) + if ((ea & ~REGION_MASK) >= H_PGTABLE_RANGE) return 0; if (ssize == MMU_SEGSIZE_256M) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 7ec176674b78..f30f40443343 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -144,6 +144,66 @@ #define PAGE_KERNEL_EXEC PAGE_KERNEL_X #define PAGE_AGP (PAGE_KERNEL_NC) +#ifndef __ASSEMBLY__ +/* + * page table defines + */ +extern unsigned long __pte_index_size; +extern unsigned long __pmd_index_size; +extern unsigned long __pud_index_size; +extern unsigned long __pgd_index_size; +extern unsigned long __pmd_cache_index; +#define PTE_INDEX_SIZE __pte_index_size +#define PMD_INDEX_SIZE __pmd_index_size +#define PUD_INDEX_SIZE __pud_index_size +#define PGD_INDEX_SIZE __pgd_index_size +#define PMD_CACHE_INDEX __pmd_cache_index +/* + * Because of use of pte fragments and THP, size of page table + * are not always derived out of index size above. + */ +extern unsigned long __pte_table_size; +extern unsigned long __pmd_table_size; +extern unsigned long __pud_table_size; +extern unsigned long __pgd_table_size; +#define PTE_TABLE_SIZE __pte_table_size +#define PMD_TABLE_SIZE __pmd_table_size +#define PUD_TABLE_SIZE __pud_table_size +#define PGD_TABLE_SIZE __pgd_table_size +/* + * Pgtable size used by swapper, init in asm code + * We will switch this later to radix PGD + */ +#define MAX_PGD_TABLE_SIZE (sizeof(pgd_t) << H_PGD_INDEX_SIZE) + +#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) +#define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) +#define PTRS_PER_PUD (1 << PUD_INDEX_SIZE) +#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) + +/* PMD_SHIFT determines what a second-level page table entry can map */ +#define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE) +#define PMD_SIZE (1UL << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE-1)) + +/* PUD_SHIFT determines what a third-level page table entry can map */ +#define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) +#define PUD_SIZE (1UL << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE-1)) + +/* PGDIR_SHIFT determines what a fourth-level page table entry can map */ +#define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE) +#define PGDIR_SIZE (1UL << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE-1)) + +/* Bits to mask out from a PMD to get to the PTE page */ +#define PMD_MASKED_BITS 0xc0000000000000ffUL +/* Bits to mask out from a PUD to get to the PMD page */ +#define PUD_MASKED_BITS 0xc0000000000000ffUL +/* Bits to mask out from a PGD to get to the PUD page */ +#define PGD_MASKED_BITS 0xc0000000000000ffUL +#endif /* __ASSEMBLY__ */ + #include #include diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h index d908a46d05c0..77488857c26d 100644 --- a/arch/powerpc/include/asm/page_64.h +++ b/arch/powerpc/include/asm/page_64.h @@ -93,7 +93,7 @@ extern u64 ppc64_pft_size; #define SLICE_LOW_TOP (0x100000000ul) #define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT) -#define SLICE_NUM_HIGH (PGTABLE_RANGE >> SLICE_HIGH_SHIFT) +#define SLICE_NUM_HIGH (H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT) #define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT) #define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT) diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index c9370d4e36bd..9ea09551a2cd 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -438,7 +438,11 @@ int main(void) DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry)); #endif +#ifdef MAX_PGD_TABLE_SIZE + DEFINE(PGD_TABLE_SIZE, MAX_PGD_TABLE_SIZE); +#else DEFINE(PGD_TABLE_SIZE, PGD_TABLE_SIZE); +#endif DEFINE(PTE_SIZE, sizeof(pte_t)); #ifdef CONFIG_KVM diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 2a193f28b6c7..aafc613656fc 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -870,6 +870,18 @@ static void __init htab_initialize(void) void __init early_init_mmu(void) { + /* + * initialize page table size + */ + __pte_index_size = H_PTE_INDEX_SIZE; + __pmd_index_size = H_PMD_INDEX_SIZE; + __pud_index_size = H_PUD_INDEX_SIZE; + __pgd_index_size = H_PGD_INDEX_SIZE; + __pmd_cache_index = H_PMD_CACHE_INDEX; + __pte_table_size = H_PTE_TABLE_SIZE; + __pmd_table_size = H_PMD_TABLE_SIZE; + __pud_table_size = H_PUD_TABLE_SIZE; + __pgd_table_size = H_PGD_TABLE_SIZE; /* Initialize the MMU Hash table and create the linear mapping * of memory. Has to be done before SLB initialization as this is * currently where the page size encoding is obtained. diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 8d1daf7d9785..09ca65e55b58 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -66,11 +66,11 @@ #include "mmu_decl.h" #ifdef CONFIG_PPC_STD_MMU_64 -#if PGTABLE_RANGE > USER_VSID_RANGE +#if H_PGTABLE_RANGE > USER_VSID_RANGE #warning Limited user VSID range means pagetable space is wasted #endif -#if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE) +#if (TASK_SIZE_USER64 < H_PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE) #warning TASK_SIZE is smaller than it needs to be. #endif #endif /* CONFIG_PPC_STD_MMU_64 */ diff --git a/arch/powerpc/mm/pgtable-book3e.c b/arch/powerpc/mm/pgtable-book3e.c index b323735a8360..a2298930f990 100644 --- a/arch/powerpc/mm/pgtable-book3e.c +++ b/arch/powerpc/mm/pgtable-book3e.c @@ -77,6 +77,7 @@ int map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flags) pmd_t *pmdp; pte_t *ptep; + BUILD_BUG_ON(TASK_SIZE_USER64 > PGTABLE_RANGE); if (slab_is_available()) { pgdp = pgd_offset_k(ea); pudp = pud_alloc(&init_mm, pgdp, ea); diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/pgtable-hash64.c index f405a67c807a..6f5fa68c784e 100644 --- a/arch/powerpc/mm/pgtable-hash64.c +++ b/arch/powerpc/mm/pgtable-hash64.c @@ -61,6 +61,7 @@ int map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flags) pmd_t *pmdp; pte_t *ptep; + BUILD_BUG_ON(TASK_SIZE_USER64 > H_PGTABLE_RANGE); if (slab_is_available()) { pgdp = pgd_offset_k(ea); pudp = pud_alloc(&init_mm, pgdp, ea); diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 1fe356cad7f6..dafcd9ff78df 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -58,11 +58,6 @@ #define CREATE_TRACE_POINTS #include -/* Some sanity checking */ -#if TASK_SIZE_USER64 > PGTABLE_RANGE -#error TASK_SIZE_USER64 exceeds pagetable range -#endif - #ifdef CONFIG_PPC_STD_MMU_64 #if TASK_SIZE_USER64 > (1UL << (ESID_BITS + SID_SHIFT)) #error TASK_SIZE_USER64 exceeds user VSID range @@ -75,6 +70,28 @@ */ struct prtb_entry *process_tb; struct patb_entry *partition_tb; +/* + * page table size + */ +unsigned long __pte_index_size; +EXPORT_SYMBOL(__pte_index_size); +unsigned long __pmd_index_size; +EXPORT_SYMBOL(__pmd_index_size); +unsigned long __pud_index_size; +EXPORT_SYMBOL(__pud_index_size); +unsigned long __pgd_index_size; +EXPORT_SYMBOL(__pgd_index_size); +unsigned long __pmd_cache_index; +EXPORT_SYMBOL(__pmd_cache_index); +unsigned long __pte_table_size; +EXPORT_SYMBOL(__pte_table_size); +unsigned long __pmd_table_size; +EXPORT_SYMBOL(__pmd_table_size); +unsigned long __pud_table_size; +EXPORT_SYMBOL(__pud_table_size); +unsigned long __pgd_table_size; +EXPORT_SYMBOL(__pgd_table_size); + #endif unsigned long ioremap_bot = IOREMAP_BASE; @@ -739,12 +756,6 @@ pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, int has_transparent_hugepage(void) { - BUILD_BUG_ON_MSG((PMD_SHIFT - PAGE_SHIFT) >= MAX_ORDER, - "hugepages can't be allocated by the buddy allocator"); - - BUILD_BUG_ON_MSG((PMD_SHIFT - PAGE_SHIFT) < 2, - "We need more than 2 pages to do deferred thp split"); - if (!mmu_has_feature(MMU_FTR_16M_PAGE)) return 0; /* diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index d3374004d20d..15b8f712b50b 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S @@ -35,7 +35,7 @@ _GLOBAL(slb_allocate_realmode) * check for bad kernel/user address * (ea & ~REGION_MASK) >= PGTABLE_RANGE */ - rldicr. r9,r3,4,(63 - PGTABLE_EADDR_SIZE - 4) + rldicr. r9,r3,4,(63 - H_PGTABLE_EADDR_SIZE - 4) bne- 8f srdi r9,r3,60 /* get region */ diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index 42954f0b47ac..ee21b8699cee 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -37,8 +37,8 @@ #include /* some sanity checks */ -#if (PGTABLE_RANGE >> 43) > SLICE_MASK_SIZE -#error PGTABLE_RANGE exceeds slice_mask high_slices size +#if (H_PGTABLE_RANGE >> 43) > SLICE_MASK_SIZE +#error H_PGTABLE_RANGE exceeds slice_mask high_slices size #endif static DEFINE_SPINLOCK(slice_convert_lock); -- cgit v1.2.3 From 756d08d1ba169ed9eae4d48e9a0af014e86593fc Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 29 Apr 2016 23:25:57 +1000 Subject: powerpc/mm: Abstract early MMU init in preparation for radix Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/mmu.h | 20 ++++++++++++++++++++ arch/powerpc/include/asm/mmu.h | 14 +++++++------- arch/powerpc/mm/hash_utils_64.c | 6 +++--- 3 files changed, 30 insertions(+), 10 deletions(-) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 593d9e3ce8e7..37943f7af60f 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -97,5 +97,25 @@ extern int mmu_vmalloc_psize; extern int mmu_vmemmap_psize; extern int mmu_io_psize; +/* MMU initialization */ +extern void hash__early_init_mmu(void); +static inline void early_init_mmu(void) +{ + return hash__early_init_mmu(); +} +extern void hash__early_init_mmu_secondary(void); +static inline void early_init_mmu_secondary(void) +{ + return hash__early_init_mmu_secondary(); +} + +extern void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base, + phys_addr_t first_memblock_size); +static inline void setup_initial_memory_limit(phys_addr_t first_memblock_base, + phys_addr_t first_memblock_size) +{ + return hash__setup_initial_memory_limit(first_memblock_base, + first_memblock_size); +} #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_BOOK3S_64_MMU_H_ */ diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index decde4c4870d..34070e982f1d 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -122,13 +122,6 @@ static inline void mmu_clear_feature(unsigned long feature) extern unsigned int __start___mmu_ftr_fixup, __stop___mmu_ftr_fixup; -/* MMU initialization */ -extern void early_init_mmu(void); -extern void early_init_mmu_secondary(void); - -extern void setup_initial_memory_limit(phys_addr_t first_memblock_base, - phys_addr_t first_memblock_size); - #ifdef CONFIG_PPC64 /* This is our real memory area size on ppc64 server, on embedded, we * make it match the size our of bolted TLB area @@ -185,6 +178,13 @@ static inline void assert_pte_locked(struct mm_struct *mm, unsigned long addr) #include #else /* CONFIG_PPC_BOOK3S_64 */ +#ifndef __ASSEMBLY__ +/* MMU initialization */ +extern void early_init_mmu(void); +extern void early_init_mmu_secondary(void); +extern void setup_initial_memory_limit(phys_addr_t first_memblock_base, + phys_addr_t first_memblock_size); +#endif /* __ASSEMBLY__ */ #endif #if defined(CONFIG_PPC_STD_MMU_32) diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index aafc613656fc..e6b53cde676e 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -868,7 +868,7 @@ static void __init htab_initialize(void) #undef KB #undef MB -void __init early_init_mmu(void) +void __init hash__early_init_mmu(void) { /* * initialize page table size @@ -893,7 +893,7 @@ void __init early_init_mmu(void) } #ifdef CONFIG_SMP -void early_init_mmu_secondary(void) +void hash__early_init_mmu_secondary(void) { /* Initialize hash table for that CPU */ if (!firmware_has_feature(FW_FEATURE_LPAR)) @@ -1635,7 +1635,7 @@ void __kernel_map_pages(struct page *page, int numpages, int enable) } #endif /* CONFIG_DEBUG_PAGEALLOC */ -void setup_initial_memory_limit(phys_addr_t first_memblock_base, +void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size) { /* We don't currently support the first MEMBLOCK not mapping 0 -- cgit v1.2.3 From b5dcc6096971b7c11b443b79ae8b69ccb01ec04e Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 29 Apr 2016 23:26:12 +1000 Subject: powerpc/mm/radix: Update PTCR on secondary CPUs Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_utils_64.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index e6b53cde676e..faaadeff7c1e 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -896,9 +896,13 @@ void __init hash__early_init_mmu(void) void hash__early_init_mmu_secondary(void) { /* Initialize hash table for that CPU */ - if (!firmware_has_feature(FW_FEATURE_LPAR)) - mtspr(SPRN_SDR1, _SDR1); - + if (!firmware_has_feature(FW_FEATURE_LPAR)) { + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + mtspr(SPRN_SDR1, _SDR1); + else + mtspr(SPRN_PTCR, + __pa(partition_tb) | (PATB_SIZE_SHIFT - 12)); + } /* Initialize SLB */ slb_initialize(); } -- cgit v1.2.3 From a2f41eb99208adf4732529c4df1fd12b39f4c333 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 29 Apr 2016 23:26:19 +1000 Subject: powerpc/mm: Add radix pgalloc details Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/pgalloc.h | 34 ++++++++++++++++++++++++---- arch/powerpc/include/asm/book3s/64/pgtable.h | 10 ++++++-- arch/powerpc/mm/hash_utils_64.c | 7 ++++++ arch/powerpc/mm/pgtable-radix.c | 5 +++- arch/powerpc/mm/pgtable_64.c | 6 +++++ 5 files changed, 55 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index faad1319ba26..488279edb1f0 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -50,19 +50,45 @@ extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift); extern void __tlb_remove_table(void *_table); #endif +static inline pgd_t *radix__pgd_alloc(struct mm_struct *mm) +{ +#ifdef CONFIG_PPC_64K_PAGES + return (pgd_t *)__get_free_page(PGALLOC_GFP); +#else + struct page *page; + page = alloc_pages(PGALLOC_GFP, 4); + if (!page) + return NULL; + return (pgd_t *) page_address(page); +#endif +} + +static inline void radix__pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ +#ifdef CONFIG_PPC_64K_PAGES + free_page((unsigned long)pgd); +#else + free_pages((unsigned long)pgd, 4); +#endif +} + static inline pgd_t *pgd_alloc(struct mm_struct *mm) { + if (radix_enabled()) + return radix__pgd_alloc(mm); return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), GFP_KERNEL); } static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) { + if (radix_enabled()) + return radix__pgd_free(mm, pgd); kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd); } static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) { - pgd_set(pgd, __pgtable_ptr_val(pud)); + pgd_set(pgd, __pgtable_ptr_val(pud) | PGD_VAL_BITS); } static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) @@ -78,7 +104,7 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud) static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) { - pud_set(pud, __pgtable_ptr_val(pmd)); + pud_set(pud, __pgtable_ptr_val(pmd) | PUD_VAL_BITS); } static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, @@ -107,13 +133,13 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) { - pmd_set(pmd, __pgtable_ptr_val(pte)); + pmd_set(pmd, __pgtable_ptr_val(pte) | PMD_VAL_BITS); } static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte_page) { - pmd_set(pmd, __pgtable_ptr_val(pte_page)); + pmd_set(pmd, __pgtable_ptr_val(pte_page) | PMD_VAL_BITS); } static inline pgtable_t pmd_pgtable(pmd_t pmd) diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 9bc2471a8207..32a975694e53 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -170,11 +170,17 @@ extern unsigned long __pgd_table_size; #define PMD_TABLE_SIZE __pmd_table_size #define PUD_TABLE_SIZE __pud_table_size #define PGD_TABLE_SIZE __pgd_table_size + +extern unsigned long __pmd_val_bits; +extern unsigned long __pud_val_bits; +extern unsigned long __pgd_val_bits; +#define PMD_VAL_BITS __pmd_val_bits +#define PUD_VAL_BITS __pud_val_bits +#define PGD_VAL_BITS __pgd_val_bits /* * Pgtable size used by swapper, init in asm code - * We will switch this later to radix PGD */ -#define MAX_PGD_TABLE_SIZE (sizeof(pgd_t) << H_PGD_INDEX_SIZE) +#define MAX_PGD_TABLE_SIZE (sizeof(pgd_t) << RADIX_PGD_INDEX_SIZE) #define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) #define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index faaadeff7c1e..64165a730160 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -882,6 +882,13 @@ void __init hash__early_init_mmu(void) __pmd_table_size = H_PMD_TABLE_SIZE; __pud_table_size = H_PUD_TABLE_SIZE; __pgd_table_size = H_PGD_TABLE_SIZE; + /* + * 4k use hugepd format, so for hash set then to + * zero + */ + __pmd_val_bits = 0; + __pud_val_bits = 0; + __pgd_val_bits = 0; /* Initialize the MMU Hash table and create the linear mapping * of memory. Has to be done before SLB initialization as this is * currently where the page size encoding is obtained. diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 04ec9760380d..6182b6c3c9c3 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -324,8 +324,11 @@ void __init radix__early_init_mmu(void) __pud_table_size = RADIX_PUD_TABLE_SIZE; __pgd_table_size = RADIX_PGD_TABLE_SIZE; - radix_init_page_sizes(); + __pmd_val_bits = RADIX_PMD_VAL_BITS; + __pud_val_bits = RADIX_PUD_VAL_BITS; + __pgd_val_bits = RADIX_PGD_VAL_BITS; + radix_init_page_sizes(); if (!firmware_has_feature(FW_FEATURE_LPAR)) radix_init_partition_table(); diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 326c3d43edcd..b8c75e65157c 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -91,6 +91,12 @@ unsigned long __pud_table_size; EXPORT_SYMBOL(__pud_table_size); unsigned long __pgd_table_size; EXPORT_SYMBOL(__pgd_table_size); +unsigned long __pmd_val_bits; +EXPORT_SYMBOL(__pmd_val_bits); +unsigned long __pud_val_bits; +EXPORT_SYMBOL(__pud_val_bits); +unsigned long __pgd_val_bits; +EXPORT_SYMBOL(__pgd_val_bits); #endif unsigned long ioremap_bot = IOREMAP_BASE; -- cgit v1.2.3 From d6a9996e84ac4beb7713e9485f4563e100a9b03e Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 29 Apr 2016 23:26:21 +1000 Subject: powerpc/mm: vmalloc abstraction in preparation for radix The vmalloc range differs between hash and radix config. Hence make VMALLOC_START and related constants a variable which will be runtime initialized depending on whether hash or radix mode is active. Signed-off-by: Aneesh Kumar K.V [mpe: Fix missing init of ioremap_bot in pgtable_64.c for ppc64e] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/hash.h | 14 +++--- arch/powerpc/include/asm/book3s/64/pgtable.h | 15 ++++-- arch/powerpc/include/asm/book3s/64/radix.h | 68 ++++++++++++++++++++++++++++ arch/powerpc/kernel/pci_64.c | 3 +- arch/powerpc/mm/hash_utils_64.c | 8 ++++ arch/powerpc/mm/pgtable-radix.c | 7 +++ arch/powerpc/mm/pgtable_64.c | 15 +++++- arch/powerpc/mm/slb_low.S | 2 +- 8 files changed, 118 insertions(+), 14 deletions(-) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index cd3e91583c81..f61cad3de4e6 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -45,17 +45,17 @@ /* * Define the address range of the kernel non-linear virtual area */ -#define KERN_VIRT_START ASM_CONST(0xD000000000000000) -#define KERN_VIRT_SIZE ASM_CONST(0x0000100000000000) +#define H_KERN_VIRT_START ASM_CONST(0xD000000000000000) +#define H_KERN_VIRT_SIZE ASM_CONST(0x0000100000000000) /* * The vmalloc space starts at the beginning of that region, and * occupies half of it on hash CPUs and a quarter of it on Book3E * (we keep a quarter for the virtual memmap) */ -#define VMALLOC_START KERN_VIRT_START -#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1) -#define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE) +#define H_VMALLOC_START H_KERN_VIRT_START +#define H_VMALLOC_SIZE (H_KERN_VIRT_SIZE >> 1) +#define H_VMALLOC_END (H_VMALLOC_START + H_VMALLOC_SIZE) /* * Region IDs @@ -64,7 +64,7 @@ #define REGION_MASK (0xfUL << REGION_SHIFT) #define REGION_ID(ea) (((unsigned long)(ea)) >> REGION_SHIFT) -#define VMALLOC_REGION_ID (REGION_ID(VMALLOC_START)) +#define VMALLOC_REGION_ID (REGION_ID(H_VMALLOC_START)) #define KERNEL_REGION_ID (REGION_ID(PAGE_OFFSET)) #define VMEMMAP_REGION_ID (0xfUL) /* Server only */ #define USER_REGION_ID (0UL) @@ -73,7 +73,7 @@ * Defines the address of the vmemap area, in its own region on * hash table CPUs. */ -#define VMEMMAP_BASE (VMEMMAP_REGION_ID << REGION_SHIFT) +#define H_VMEMMAP_BASE (VMEMMAP_REGION_ID << REGION_SHIFT) #ifdef CONFIG_PPC_MM_SLICES #define HAVE_ARCH_UNMAPPED_AREA diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 32a975694e53..f5628f96134b 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -208,6 +208,18 @@ extern unsigned long __pgd_val_bits; #define PUD_MASKED_BITS 0xc0000000000000ffUL /* Bits to mask out from a PGD to get to the PUD page */ #define PGD_MASKED_BITS 0xc0000000000000ffUL + +extern unsigned long __vmalloc_start; +extern unsigned long __vmalloc_end; +#define VMALLOC_START __vmalloc_start +#define VMALLOC_END __vmalloc_end + +extern unsigned long __kernel_virt_start; +extern unsigned long __kernel_virt_size; +#define KERN_VIRT_START __kernel_virt_start +#define KERN_VIRT_SIZE __kernel_virt_size +extern struct page *vmemmap; +extern unsigned long ioremap_bot; #endif /* __ASSEMBLY__ */ #include @@ -220,7 +232,6 @@ extern unsigned long __pgd_val_bits; #endif #include - /* * The second half of the kernel virtual space is used for IO mappings, * it's itself carved into the PIO region (ISA and PHB IO space) and @@ -239,8 +250,6 @@ extern unsigned long __pgd_val_bits; #define IOREMAP_BASE (PHB_IO_END) #define IOREMAP_END (KERN_VIRT_START + KERN_VIRT_SIZE) -#define vmemmap ((struct page *)VMEMMAP_BASE) - /* Advertise special mapping type for AGP */ #define HAVE_PAGE_AGP diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index 63eb629a8b64..f4709024b466 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -31,6 +31,74 @@ RADIX_PUD_INDEX_SIZE + RADIX_PGD_INDEX_SIZE + PAGE_SHIFT) #define RADIX_PGTABLE_RANGE (ASM_CONST(1) << RADIX_PGTABLE_EADDR_SIZE) +/* + * We support 52 bit address space, Use top bit for kernel + * virtual mapping. Also make sure kernel fit in the top + * quadrant. + * + * +------------------+ + * +------------------+ Kernel virtual map (0xc008000000000000) + * | | + * | | + * | | + * 0b11......+------------------+ Kernel linear map (0xc....) + * | | + * | 2 quadrant | + * | | + * 0b10......+------------------+ + * | | + * | 1 quadrant | + * | | + * 0b01......+------------------+ + * | | + * | 0 quadrant | + * | | + * 0b00......+------------------+ + * + * + * 3rd quadrant expanded: + * +------------------------------+ + * | | + * | | + * | | + * +------------------------------+ Kernel IO map end (0xc010000000000000) + * | | + * | | + * | 1/2 of virtual map | + * | | + * | | + * +------------------------------+ Kernel IO map start + * | | + * | 1/4 of virtual map | + * | | + * +------------------------------+ Kernel vmemap start + * | | + * | 1/4 of virtual map | + * | | + * +------------------------------+ Kernel virt start (0xc008000000000000) + * | | + * | | + * | | + * +------------------------------+ Kernel linear (0xc.....) + */ + +#define RADIX_KERN_VIRT_START ASM_CONST(0xc008000000000000) +#define RADIX_KERN_VIRT_SIZE ASM_CONST(0x0008000000000000) + +/* + * The vmalloc space starts at the beginning of that region, and + * occupies a quarter of it on radix config. + * (we keep a quarter for the virtual memmap) + */ +#define RADIX_VMALLOC_START RADIX_KERN_VIRT_START +#define RADIX_VMALLOC_SIZE (RADIX_KERN_VIRT_SIZE >> 2) +#define RADIX_VMALLOC_END (RADIX_VMALLOC_START + RADIX_VMALLOC_SIZE) +/* + * Defines the address of the vmemap area, in its own region on + * hash table CPUs. + */ +#define RADIX_VMEMMAP_BASE (RADIX_VMALLOC_END) + #ifndef __ASSEMBLY__ #define RADIX_PTE_TABLE_SIZE (sizeof(pte_t) << RADIX_PTE_INDEX_SIZE) #define RADIX_PMD_TABLE_SIZE (sizeof(pmd_t) << RADIX_PMD_INDEX_SIZE) diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 41503d7d53a1..3759df52bd67 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -38,7 +38,7 @@ * ISA drivers use hard coded offsets. If no ISA bus exists nothing * is mapped on the first 64K of IO space */ -unsigned long pci_io_base = ISA_IO_BASE; +unsigned long pci_io_base; EXPORT_SYMBOL(pci_io_base); static int __init pcibios_init(void) @@ -47,6 +47,7 @@ static int __init pcibios_init(void) printk(KERN_INFO "PCI: Probing PCI hardware\n"); + pci_io_base = ISA_IO_BASE; /* For now, override phys_mem_access_prot. If we need it,g * later, we may move that initialization to each ppc_md */ diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 64165a730160..68aee435ea97 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -889,6 +889,14 @@ void __init hash__early_init_mmu(void) __pmd_val_bits = 0; __pud_val_bits = 0; __pgd_val_bits = 0; + + __kernel_virt_start = H_KERN_VIRT_START; + __kernel_virt_size = H_KERN_VIRT_SIZE; + __vmalloc_start = H_VMALLOC_START; + __vmalloc_end = H_VMALLOC_END; + vmemmap = (struct page *)H_VMEMMAP_BASE; + ioremap_bot = IOREMAP_BASE; + /* Initialize the MMU Hash table and create the linear mapping * of memory. Has to be done before SLB initialization as this is * currently where the page size encoding is obtained. diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 6182b6c3c9c3..13afacdfa96e 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -328,6 +328,13 @@ void __init radix__early_init_mmu(void) __pud_val_bits = RADIX_PUD_VAL_BITS; __pgd_val_bits = RADIX_PGD_VAL_BITS; + __kernel_virt_start = RADIX_KERN_VIRT_START; + __kernel_virt_size = RADIX_KERN_VIRT_SIZE; + __vmalloc_start = RADIX_VMALLOC_START; + __vmalloc_end = RADIX_VMALLOC_END; + vmemmap = (struct page *)RADIX_VMEMMAP_BASE; + ioremap_bot = IOREMAP_BASE; + radix_init_page_sizes(); if (!firmware_has_feature(FW_FEATURE_LPAR)) radix_init_partition_table(); diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index b8c75e65157c..216e2bda8f2c 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -97,9 +97,20 @@ unsigned long __pud_val_bits; EXPORT_SYMBOL(__pud_val_bits); unsigned long __pgd_val_bits; EXPORT_SYMBOL(__pgd_val_bits); - -#endif +unsigned long __kernel_virt_start; +EXPORT_SYMBOL(__kernel_virt_start); +unsigned long __kernel_virt_size; +EXPORT_SYMBOL(__kernel_virt_size); +unsigned long __vmalloc_start; +EXPORT_SYMBOL(__vmalloc_start); +unsigned long __vmalloc_end; +EXPORT_SYMBOL(__vmalloc_end); +struct page *vmemmap; +EXPORT_SYMBOL(vmemmap); +unsigned long ioremap_bot; +#else /* !CONFIG_PPC_BOOK3S_64 */ unsigned long ioremap_bot = IOREMAP_BASE; +#endif /** * __ioremap_at - Low level function to establish the page tables diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index 15b8f712b50b..dfdb90cb4403 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S @@ -91,7 +91,7 @@ slb_miss_kernel_load_vmemmap: * can be demoted from 64K -> 4K dynamically on some machines */ clrldi r11,r10,48 - cmpldi r11,(VMALLOC_SIZE >> 28) - 1 + cmpldi r11,(H_VMALLOC_SIZE >> 28) - 1 bgt 5f lhz r11,PACAVMALLOCSLLP(r13) b 6f -- cgit v1.2.3 From 5ed7ecd08a0807d6d616c3d958402f9c723bb048 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 29 Apr 2016 23:26:23 +1000 Subject: powerpc/mm: pte_frag abstraction In this patch we make the number of pte fragments per level 4 page table page a variable. Radix level 4 table size is 256 bytes and hence we can have 256 fragments per level 4 page. We don't update the fragment count in this patch. We need to do performance measurements to find the right value for fragment count. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/hash-4k.h | 2 ++ arch/powerpc/include/asm/book3s/64/hash-64k.h | 4 ++-- arch/powerpc/include/asm/book3s/64/pgtable.h | 6 ++++++ arch/powerpc/mm/hash_utils_64.c | 3 +++ arch/powerpc/mm/pgtable-radix.c | 5 +++++ arch/powerpc/mm/pgtable_64.c | 4 ++++ 6 files changed, 22 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index dcb9d6e94a0c..99f360e0ecb0 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -28,6 +28,8 @@ #define H_PAGE_4K_PFN 0x0 #define H_PAGE_THP_HUGE 0x0 #define H_PAGE_COMBO 0x0 +#define H_PTE_FRAG_NR 0 +#define H_PTE_FRAG_SIZE_SHIFT 0 /* * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range() */ diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index c00e39fa3eba..80d2abe25280 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -29,12 +29,12 @@ /* * we support 16 fragments per PTE page of 64K size. */ -#define PTE_FRAG_NR 16 +#define H_PTE_FRAG_NR 16 /* * We use a 2K PTE page fragment and another 2K for storing * real_pte_t hash index */ -#define PTE_FRAG_SIZE_SHIFT 12 +#define H_PTE_FRAG_SIZE_SHIFT 12 #define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT) #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index f5628f96134b..9175a688e70f 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -177,6 +177,12 @@ extern unsigned long __pgd_val_bits; #define PMD_VAL_BITS __pmd_val_bits #define PUD_VAL_BITS __pud_val_bits #define PGD_VAL_BITS __pgd_val_bits + +extern unsigned long __pte_frag_nr; +#define PTE_FRAG_NR __pte_frag_nr +extern unsigned long __pte_frag_size_shift; +#define PTE_FRAG_SIZE_SHIFT __pte_frag_size_shift +#define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT) /* * Pgtable size used by swapper, init in asm code */ diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 68aee435ea97..262082e51db1 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -873,6 +873,9 @@ void __init hash__early_init_mmu(void) /* * initialize page table size */ + __pte_frag_nr = H_PTE_FRAG_NR; + __pte_frag_size_shift = H_PTE_FRAG_SIZE_SHIFT; + __pte_index_size = H_PTE_INDEX_SIZE; __pmd_index_size = H_PMD_INDEX_SIZE; __pud_index_size = H_PUD_INDEX_SIZE; diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 13afacdfa96e..a5a5253c3ea1 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -334,6 +334,11 @@ void __init radix__early_init_mmu(void) __vmalloc_end = RADIX_VMALLOC_END; vmemmap = (struct page *)RADIX_VMEMMAP_BASE; ioremap_bot = IOREMAP_BASE; + /* + * For now radix also use the same frag size + */ + __pte_frag_nr = H_PTE_FRAG_NR; + __pte_frag_size_shift = H_PTE_FRAG_SIZE_SHIFT; radix_init_page_sizes(); if (!firmware_has_feature(FW_FEATURE_LPAR)) diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 216e2bda8f2c..95df08e73559 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -107,6 +107,10 @@ unsigned long __vmalloc_end; EXPORT_SYMBOL(__vmalloc_end); struct page *vmemmap; EXPORT_SYMBOL(vmemmap); +unsigned long __pte_frag_nr; +EXPORT_SYMBOL(__pte_frag_nr); +unsigned long __pte_frag_size_shift; +EXPORT_SYMBOL(__pte_frag_size_shift); unsigned long ioremap_bot; #else /* !CONFIG_PPC_BOOK3S_64 */ unsigned long ioremap_bot = IOREMAP_BASE; -- cgit v1.2.3 From 8bbc9b7b001eaab8abf7e9e24edf1bb285c8d825 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 6 May 2016 16:46:00 +1000 Subject: powerpc/mm/hash64: Factor out hash preload psize check Currently we have a check in hash_preload() against the psize, which is only included when CONFIG_PPC_MM_SLICES is enabled. We want to expand this check in a subsequent patch, so factor it out to allow that. As a bonus it removes the #ifdef in the C code. Unfortunately we can't put this in the existing CONFIG_PPC_MM_SLICES block because it would require a forward declaration. Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_utils_64.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 262082e51db1..fbe747ed8c2d 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1322,6 +1322,22 @@ int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap, return hash_page_mm(mm, ea, access, trap, flags); } +#ifdef CONFIG_PPC_MM_SLICES +static bool should_hash_preload(struct mm_struct *mm, unsigned long ea) +{ + /* We only prefault standard pages for now */ + if (unlikely(get_slice_psize(mm, ea) != mm->context.user_psize)) + return false; + + return true; +} +#else +static bool should_hash_preload(struct mm_struct *mm, unsigned long ea) +{ + return true; +} +#endif + void hash_preload(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap) { @@ -1334,11 +1350,8 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, BUG_ON(REGION_ID(ea) != USER_REGION_ID); -#ifdef CONFIG_PPC_MM_SLICES - /* We only prefault standard pages for now */ - if (unlikely(get_slice_psize(mm, ea) != mm->context.user_psize)) + if (!should_hash_preload(mm, ea)) return; -#endif DBG_LOW("hash_preload(mm=%p, mm->pgdir=%p, ea=%016lx, access=%lx," " trap=%lx\n", mm, mm->pgd, ea, access, trap); -- cgit v1.2.3 From aac55d7573c5d46ed9a62818d5d3e69dd2060105 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 6 May 2016 16:47:12 +1000 Subject: powerpc/mm/hash64: Fix subpage protection with 4K HPTE config With Linux page size of 64K and hardware only supporting 4K HPTE, if we use subpage protection, we always fail for the subpage 0 as shown below (using the selftest subpage_prot test): 520175565: (4520111850): Failed at 0x3fffad4b0000 (p=13,sp=0,w=0), want=fault, got=pass ! 4520890210: (4520826495): Failed at 0x3fffad5b0000 (p=29,sp=0,w=0), want=fault, got=pass ! 4521574251: (4521510536): Failed at 0x3fffad6b0000 (p=45,sp=0,w=0), want=fault, got=pass ! 4522258324: (4522194609): Failed at 0x3fffad7b0000 (p=61,sp=0,w=0), want=fault, got=pass ! This is because hash preload wrongly inserts the HPTE entry for subpage 0 without looking at the subpage protection information. Fix it by teaching should_hash_preload() not to preload if we have subpage protection configured for that range. It appears this has been broken since it was introduced in 2008. Fixes: fa28237cfcc5 ("[POWERPC] Provide a way to protect 4k subpages when using 64k pages") Signed-off-by: Aneesh Kumar K.V [mpe: Rework into should_hash_preload() to avoid build fails w/SLICES=n] Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_utils_64.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index fbe747ed8c2d..59268969a0bc 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -1325,8 +1325,16 @@ int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap, #ifdef CONFIG_PPC_MM_SLICES static bool should_hash_preload(struct mm_struct *mm, unsigned long ea) { + int psize = get_slice_psize(mm, ea); + /* We only prefault standard pages for now */ - if (unlikely(get_slice_psize(mm, ea) != mm->context.user_psize)) + if (unlikely(psize != mm->context.user_psize)) + return false; + + /* + * Don't prefault if subpage protection is enabled for the EA. + */ + if (unlikely((psize == MMU_PAGE_4K) && subpage_protection(mm, ea))) return false; return true; -- cgit v1.2.3 From dc47c0c1f8099fccb2c1e2f3775855066a9e4484 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 31 May 2016 11:56:30 +0530 Subject: powerpc/mm/hash: Fix the reference bit update when handling hash fault When we converted the asm routines to C functions, we missed updating HPTE_R_R based on _PAGE_ACCESSED. ASM code used to copy over the lower bits from pte via. andi. r3,r30,0x1fe /* Get basic set of flags */ We also update the code such that we won't update the Change bit ('C' bit) always. This was added by commit c5cf0e30bf3d8 ("powerpc: Fix buglet with MMU hash management"). With hash64, we need to make sure that hardware doesn't do a pte update directly. This is because we do end up with entries in TLB with no hash page table entry. This happens because when we find a hash bucket full, we "evict" a more/less random entry from it. When we do that we don't invalidate the TLB (hpte_remove) because we assume the old translation is still technically "valid". For more info look at commit 0608d692463("powerpc/mm: Always invalidate tlb on hpte invalidate and update"). Thus it's critical that valid hash PTEs always have reference bit set and writeable ones have change bit set. We do this by hashing a non-dirty linux PTE as read-only and always setting _PAGE_ACCESSED (and thus R) when hashing anything else in. Any attempt by Linux at clearing those bits also removes the corresponding hash entry. Commit 5cf0e30bf3d8 did that for 'C' bit by enabling 'C' bit always. We don't really need to do that because we never map a RW pte entry without setting 'C' bit. On READ fault on a RW pte entry, we still map it READ only, hence a store update in the page will still cause a hash pte fault. This patch reverts the part of commit c5cf0e30bf3d8 ("[PATCH] powerpc: Fix buglet with MMU hash management") and retain the updatepp part. - If we hit the updatepp path on native, the old code without that commit, would fail to set C bcause native_hpte_updatepp() was implemented to filter the same bits as H_PROTECT and not let C through thus we would "upgrade" a RO HPTE to RW without setting C thus causing the bug. So the real fix in that commit was the change to native_hpte_updatepp Fixes: 89ff725051d1 ("powerpc/mm: Convert __hash_page_64K to C") Cc: stable@vger.kernel.org # v4.5+ Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_utils_64.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 59268969a0bc..b2740c67e172 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -159,6 +159,19 @@ static struct mmu_psize_def mmu_psize_defaults_gp[] = { }, }; +/* + * 'R' and 'C' update notes: + * - Under pHyp or KVM, the updatepp path will not set C, thus it *will* + * create writeable HPTEs without C set, because the hcall H_PROTECT + * that we use in that case will not update C + * - The above is however not a problem, because we also don't do that + * fancy "no flush" variant of eviction and we use H_REMOVE which will + * do the right thing and thus we don't have the race I described earlier + * + * - Under bare metal, we do have the race, so we need R and C set + * - We make sure R is always set and never lost + * - C is _PAGE_DIRTY, and *should* always be set for a writeable mapping + */ unsigned long htab_convert_pte_flags(unsigned long pteflags) { unsigned long rflags = 0; @@ -186,9 +199,14 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags) rflags |= 0x1; } /* - * Always add "C" bit for perf. Memory coherence is always enabled + * We can't allow hardware to update hpte bits. Hence always + * set 'R' bit and set 'C' if it is a write fault + * Memory coherence is always enabled */ - rflags |= HPTE_R_C | HPTE_R_M; + rflags |= HPTE_R_R | HPTE_R_M; + + if (pteflags & _PAGE_DIRTY) + rflags |= HPTE_R_C; /* * Add in WIG bits */ -- cgit v1.2.3 From e568006b9d828403397668864d9797dc4bfefd28 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Fri, 17 Jun 2016 11:32:00 +0530 Subject: powerpc/mm/hash: Don't add memory coherence if cache inhibited is set H_ENTER hcall handling in qemu had assumptions that a cache inhibited hpte entry won't have memory conference set. Also older kernel mentioned that some version of pHyp required this (the code removed by the below commit says: /* Make pHyp happy */ if ((rflags & _PAGE_NO_CACHE) && !(rflags & _PAGE_WRITETHRU)) hpte_r &= ~HPTE_R_M; But with older kernel we had some inconsistent memory conherence mapping. We always enabled memory conherence in the page fault path and removed memory conherence is _PAGE_NO_CACHE was set when we mapped the page via htab_bolt_mapping. The commit mentioned below tried to consolidate that by always enabling memory conherence. But as mentioned above that breaks Qemu H_ENTER handling. This patch update this such that we enable memory conherence only if cache inhibited is not set and bring fault handling, lpar and bolt mapping in sync. Fixes: commit 30bda41aba4e("powerpc/mm: Drop WIMG in favour of new constant") Reported-by: Darrick J. Wong Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_utils_64.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index b2740c67e172..5b22ba0b58bc 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -201,9 +201,8 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags) /* * We can't allow hardware to update hpte bits. Hence always * set 'R' bit and set 'C' if it is a write fault - * Memory coherence is always enabled */ - rflags |= HPTE_R_R | HPTE_R_M; + rflags |= HPTE_R_R; if (pteflags & _PAGE_DIRTY) rflags |= HPTE_R_C; @@ -213,10 +212,15 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags) if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_TOLERANT) rflags |= HPTE_R_I; - if ((pteflags & _PAGE_CACHE_CTL ) == _PAGE_NON_IDEMPOTENT) + else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_NON_IDEMPOTENT) rflags |= (HPTE_R_I | HPTE_R_G); - if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_SAO) - rflags |= (HPTE_R_I | HPTE_R_W); + else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_SAO) + rflags |= (HPTE_R_W | HPTE_R_I | HPTE_R_M); + else + /* + * Add memory coherence if cache inhibited is not set + */ + rflags |= HPTE_R_M; return rflags; } -- cgit v1.2.3 From bfa37087aa04e45f56c41142dfceecb79b8e6ef9 Mon Sep 17 00:00:00 2001 From: Darren Stevens Date: Wed, 29 Jun 2016 21:06:28 +0100 Subject: powerpc: Initialise pci_io_base as early as possible Commit d6a9996e84ac ("powerpc/mm: vmalloc abstraction in preparation for radix") turned kernel memory and IO addresses from #defined constants to variables initialised at runtime. On PA6T (pasemi) systems the setup_arch() machine call initialises the onboard PCI-e root-ports, and uses pci_io_base to do this, which is now before its value has been set, resulting in a panic early in boot before console IO is initialised. Move the pci_io_base initialisation to the same place as vmalloc ranges are set (hash__early_init_mmu()/radix__early_init_mmu()) - this is the earliest possible place we can initialise it. Fixes: d6a9996e84ac ("powerpc/mm: vmalloc abstraction in preparation for radix") Reported-by: Christian Zigotzky Signed-off-by: Darren Stevens Reviewed-by: Aneesh Kumar K.V [mpe: Add #ifdef CONFIG_PCI, massage change log slightly] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/pgtable.h | 1 + arch/powerpc/kernel/pci_64.c | 1 - arch/powerpc/mm/hash_utils_64.c | 4 ++++ arch/powerpc/mm/pgtable-radix.c | 5 +++++ 4 files changed, 10 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 88a5ecaa157b..ab84c89c9e98 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -230,6 +230,7 @@ extern unsigned long __kernel_virt_size; #define KERN_VIRT_SIZE __kernel_virt_size extern struct page *vmemmap; extern unsigned long ioremap_bot; +extern unsigned long pci_io_base; #endif /* __ASSEMBLY__ */ #include diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 3759df52bd67..a5ae49a2dcc4 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -47,7 +47,6 @@ static int __init pcibios_init(void) printk(KERN_INFO "PCI: Probing PCI hardware\n"); - pci_io_base = ISA_IO_BASE; /* For now, override phys_mem_access_prot. If we need it,g * later, we may move that initialization to each ppc_md */ diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 5b22ba0b58bc..2971ea18c768 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -922,6 +922,10 @@ void __init hash__early_init_mmu(void) vmemmap = (struct page *)H_VMEMMAP_BASE; ioremap_bot = IOREMAP_BASE; +#ifdef CONFIG_PCI + pci_io_base = ISA_IO_BASE; +#endif + /* Initialize the MMU Hash table and create the linear mapping * of memory. Has to be done before SLB initialization as this is * currently where the page size encoding is obtained. diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index e58707deef5c..7931e1496f0d 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -328,6 +328,11 @@ void __init radix__early_init_mmu(void) __vmalloc_end = RADIX_VMALLOC_END; vmemmap = (struct page *)RADIX_VMEMMAP_BASE; ioremap_bot = IOREMAP_BASE; + +#ifdef CONFIG_PCI + pci_io_base = ISA_IO_BASE; +#endif + /* * For now radix also use the same frag size */ -- cgit v1.2.3 From faf7882962e78a4c8ebb846f4520c858ee184dca Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Tue, 5 Jul 2016 11:43:21 +1000 Subject: powerpc/mm: Add a parameter to disable 1TB segs This patch adds the kernel command line parameter "no_tb_segs" which forces the kernel to use 256MB rather than 1TB segments. Forcing the use of 256MB segments makes it considerably easier to test code that depends on an SLB miss occurring. Suggested-by: Michael Neuling Suggested-by: Michael Ellerman Signed-off-by: Oliver O'Halloran Signed-off-by: Michael Ellerman --- Documentation/kernel-parameters.txt | 6 ++++++ arch/powerpc/mm/hash_utils_64.c | 15 +++++++++++++++ 2 files changed, 21 insertions(+) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 82b42c958d1c..738bae4a5958 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -920,6 +920,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. dhash_entries= [KNL] Set number of hash buckets for dentry cache. + disable_1tb_segments [PPC] + Disables the use of 1TB hash page table segments. This + causes the kernel to fall back to 256MB segments which + can be useful when debugging issues that require an SLB + miss to occur. + disable= [IPV6] See Documentation/networking/ipv6.txt. diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index b2740c67e172..fab46dbe68b3 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -317,6 +317,15 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend, return ret; } +static bool disable_1tb_segments = false; + +static int __init parse_disable_1tb_segments(char *p) +{ + disable_1tb_segments = true; + return 0; +} +early_param("disable_1tb_segments", parse_disable_1tb_segments); + static int __init htab_dt_scan_seg_sizes(unsigned long node, const char *uname, int depth, void *data) @@ -335,6 +344,12 @@ static int __init htab_dt_scan_seg_sizes(unsigned long node, for (; size >= 4; size -= 4, ++prop) { if (be32_to_cpu(prop[0]) == 40) { DBG("1T segment support detected\n"); + + if (disable_1tb_segments) { + DBG("1T segments disabled by command line\n"); + break; + } + cur_cpu_spec->mmu_features |= MMU_FTR_1T_SEGMENT; return 1; } -- cgit v1.2.3 From 56547411a07b0aabf55ce8b841dfdb7daced1250 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 13 Jul 2016 15:05:25 +0530 Subject: powerpc/mm: Print formation regarding the the MMU mode This helps in easily identifying the MMU mode with which the kernel is operating. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_utils_64.c | 3 ++- arch/powerpc/mm/pgtable-radix.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 1e9117e58a78..4f0fd470ded4 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -739,7 +739,7 @@ static void __init hash_init_partition_table(phys_addr_t hash_table, * For now UPRT is 0 for us. */ partition_tb->patb1 = 0; - DBG("Partition table %p\n", partition_tb); + pr_info("Partition table %p\n", partition_tb); /* * update partition table control register, * 64 K size. @@ -947,6 +947,7 @@ void __init hash__early_init_mmu(void) */ htab_initialize(); + pr_info("Initializing hash mmu with SLB\n"); /* Initialize SLB management */ slb_initialize(); } diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 4732fa3fe591..8636bf120985 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -182,7 +182,8 @@ static void __init radix_init_partition_table(void) partition_tb = early_alloc_pgtable(1UL << PATB_SIZE_SHIFT); partition_tb->patb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE | PATB_HR); - printk("Partition table %p\n", partition_tb); + pr_info("Initializing Radix MMU\n"); + pr_info("Partition table %p\n", partition_tb); memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); /* -- cgit v1.2.3 From 4b7a350480506bf292193e9c1db6fc19d57321ec Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 13 Jul 2016 15:05:26 +0530 Subject: powerpc/mm/hash: Update SDR1 size encoding as documented in ISA 3.0 ISA 3.0 document hash table size in bytes = 2^(HTABSIZE + 18) No functionality change by this patch. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_utils_64.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 4f0fd470ded4..7d0955e04f08 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -714,10 +714,9 @@ int remove_section_mapping(unsigned long start, unsigned long end) #endif /* CONFIG_MEMORY_HOTPLUG */ static void __init hash_init_partition_table(phys_addr_t hash_table, - unsigned long pteg_count) + unsigned long htab_size) { unsigned long ps_field; - unsigned long htab_size; unsigned long patb_size = 1UL << PATB_SIZE_SHIFT; /* @@ -725,7 +724,7 @@ static void __init hash_init_partition_table(phys_addr_t hash_table, * We can ignore that for lpid 0 */ ps_field = 0; - htab_size = __ilog2(pteg_count) - 11; + htab_size = __ilog2(htab_size) - 18; BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large."); partition_tb = __va(memblock_alloc_base(patb_size, patb_size, @@ -811,7 +810,7 @@ static void __init htab_initialize(void) htab_address = __va(table); /* htab absolute addr + encoded htabsize */ - _SDR1 = table + __ilog2(pteg_count) - 11; + _SDR1 = table + __ilog2(htab_size_bytes) - 18; /* Initialize the HPT with no entries */ memset((void *)table, 0, htab_size_bytes); @@ -820,7 +819,7 @@ static void __init htab_initialize(void) /* Set SDR1 */ mtspr(SPRN_SDR1, _SDR1); else - hash_init_partition_table(table, pteg_count); + hash_init_partition_table(table, htab_size_bytes); } prot = pgprot_val(PAGE_KERNEL); -- cgit v1.2.3 From c40785ad305b32e9b0b5fbc888f1f5d57f29bf44 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:03:47 +1000 Subject: powerpc/dart: Use a cachable DART Instead of punching a hole in the linear mapping, just use normal cachable memory, and apply the flush sequence documented in the CPC625 (aka U3) user manual. This allows us to remove quite a bit of code related to the early allocation of the DART and the hole in the linear mapping. We can also get rid of the copy of the DART for suspend/resume as the original memory can just be saved/restored now, as long as we properly sync the caches. Signed-off-by: Benjamin Herrenschmidt [mpe: Integrate dart_init() fix to return ENODEV when DART disabled] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/iommu.h | 1 - arch/powerpc/mm/hash_utils_64.c | 32 ------ arch/powerpc/platforms/maple/setup.c | 7 -- arch/powerpc/platforms/powermac/setup.c | 8 -- arch/powerpc/sysdev/dart_iommu.c | 184 +++++++++++++++----------------- 5 files changed, 88 insertions(+), 144 deletions(-) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 7b87bab09564..f49a72a9062d 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -273,7 +273,6 @@ extern void iommu_init_early_pSeries(void); extern void iommu_init_early_dart(struct pci_controller_ops *controller_ops); extern void iommu_init_early_pasemi(void); -extern void alloc_dart_table(void); #if defined(CONFIG_PPC64) && defined(CONFIG_PM) static inline void iommu_save(void) { diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 7d0955e04f08..859ecaa928e9 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -87,10 +87,6 @@ * */ -#ifdef CONFIG_U3_DART -extern unsigned long dart_tablebase; -#endif /* CONFIG_U3_DART */ - static unsigned long _SDR1; struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT]; EXPORT_SYMBOL_GPL(mmu_psize_defs); @@ -846,34 +842,6 @@ static void __init htab_initialize(void) DBG("creating mapping for region: %lx..%lx (prot: %lx)\n", base, size, prot); -#ifdef CONFIG_U3_DART - /* Do not map the DART space. Fortunately, it will be aligned - * in such a way that it will not cross two memblock regions and - * will fit within a single 16Mb page. - * The DART space is assumed to be a full 16Mb region even if - * we only use 2Mb of that space. We will use more of it later - * for AGP GART. We have to use a full 16Mb large page. - */ - DBG("DART base: %lx\n", dart_tablebase); - - if (dart_tablebase != 0 && dart_tablebase >= base - && dart_tablebase < (base + size)) { - unsigned long dart_table_end = dart_tablebase + 16 * MB; - if (base != dart_tablebase) - BUG_ON(htab_bolt_mapping(base, dart_tablebase, - __pa(base), prot, - mmu_linear_psize, - mmu_kernel_ssize)); - if ((base + size) > dart_table_end) - BUG_ON(htab_bolt_mapping(dart_tablebase+16*MB, - base + size, - __pa(dart_table_end), - prot, - mmu_linear_psize, - mmu_kernel_ssize)); - continue; - } -#endif /* CONFIG_U3_DART */ BUG_ON(htab_bolt_mapping(base, base + size, __pa(base), prot, mmu_linear_psize, mmu_kernel_ssize)); } diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c index 5f8f6f966608..99b9b96ab059 100644 --- a/arch/powerpc/platforms/maple/setup.c +++ b/arch/powerpc/platforms/maple/setup.c @@ -305,13 +305,6 @@ static int __init maple_probe(void) if (!of_flat_dt_is_compatible(root, "Momentum,Maple") && !of_flat_dt_is_compatible(root, "Momentum,Apache")) return 0; - /* - * On U3, the DART (iommu) must be allocated now since it - * has an impact on htab_initialize (due to the large page it - * occupies having to be broken up so the DART itself is not - * part of the cacheable linar mapping - */ - alloc_dart_table(); hpte_init_native(); pm_power_off = maple_power_off; diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index bd83b52c9830..fc0b69f6e3d4 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -607,14 +607,6 @@ static int __init pmac_probe(void) return 0; #ifdef CONFIG_PPC64 - /* - * On U3, the DART (iommu) must be allocated now since it - * has an impact on htab_initialize (due to the large page it - * occupies having to be broken up so the DART itself is not - * part of the cacheable linar mapping - */ - alloc_dart_table(); - hpte_init_native(); #endif diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index b7348637eae0..26904f4879ec 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -48,16 +48,10 @@ #include "dart.h" -/* Physical base address and size of the DART table */ -unsigned long dart_tablebase; /* exported to htab_initialize */ +/* DART table address and size */ +static u32 *dart_tablebase; static unsigned long dart_tablesize; -/* Virtual base address of the DART table */ -static u32 *dart_vbase; -#ifdef CONFIG_PM -static u32 *dart_copy; -#endif - /* Mapped base address for the dart */ static unsigned int __iomem *dart; @@ -151,6 +145,34 @@ wait_more: spin_unlock_irqrestore(&invalidate_lock, flags); } +static void dart_cache_sync(unsigned int *base, unsigned int count) +{ + /* + * We add 1 to the number of entries to flush, following a + * comment in Darwin indicating that the memory controller + * can prefetch unmapped memory under some circumstances. + */ + unsigned long start = (unsigned long)base; + unsigned long end = start + (count + 1) * sizeof(unsigned int); + unsigned int tmp; + + /* Perform a standard cache flush */ + flush_inval_dcache_range(start, end); + + /* + * Perform the sequence described in the CPC925 manual to + * ensure all the data gets to a point the cache incoherent + * DART hardware will see. + */ + asm volatile(" sync;" + " isync;" + " dcbf 0,%1;" + " sync;" + " isync;" + " lwz %0,0(%1);" + " isync" : "=r" (tmp) : "r" (end) : "memory"); +} + static void dart_flush(struct iommu_table *tbl) { mb(); @@ -165,13 +187,13 @@ static int dart_build(struct iommu_table *tbl, long index, enum dma_data_direction direction, struct dma_attrs *attrs) { - unsigned int *dp; + unsigned int *dp, *orig_dp; unsigned int rpn; long l; DBG("dart: build at: %lx, %lx, addr: %x\n", index, npages, uaddr); - dp = ((unsigned int*)tbl->it_base) + index; + orig_dp = dp = ((unsigned int*)tbl->it_base) + index; /* On U3, all memory is contiguous, so we can move this * out of the loop. @@ -184,11 +206,7 @@ static int dart_build(struct iommu_table *tbl, long index, uaddr += DART_PAGE_SIZE; } - - /* make sure all updates have reached memory */ - mb(); - in_be32((unsigned __iomem *)dp); - mb(); + dart_cache_sync(orig_dp, npages); if (dart_is_u4) { rpn = index; @@ -203,7 +221,8 @@ static int dart_build(struct iommu_table *tbl, long index, static void dart_free(struct iommu_table *tbl, long index, long npages) { - unsigned int *dp; + unsigned int *dp, *orig_dp; + long orig_npages = npages; /* We don't worry about flushing the TLB cache. The only drawback of * not doing it is that we won't catch buggy device drivers doing @@ -212,34 +231,30 @@ static void dart_free(struct iommu_table *tbl, long index, long npages) DBG("dart: free at: %lx, %lx\n", index, npages); - dp = ((unsigned int *)tbl->it_base) + index; + orig_dp = dp = ((unsigned int *)tbl->it_base) + index; while (npages--) *(dp++) = dart_emptyval; -} + dart_cache_sync(orig_dp, orig_npages); +} -static int __init dart_init(struct device_node *dart_node) +static void allocate_dart(void) { - unsigned int i; - unsigned long tmp, base, size; - struct resource r; - - if (dart_tablebase == 0 || dart_tablesize == 0) { - printk(KERN_INFO "DART: table not allocated, using " - "direct DMA\n"); - return -ENODEV; - } + unsigned long tmp; - if (of_address_to_resource(dart_node, 0, &r)) - panic("DART: can't get register base ! "); + /* 512 pages (2MB) is max DART tablesize. */ + dart_tablesize = 1UL << 21; - /* Make sure nothing from the DART range remains in the CPU cache - * from a previous mapping that existed before the kernel took - * over + /* + * 16MB (1 << 24) alignment. We allocate a full 16Mb chuck since we + * will blow up an entire large page anyway in the kernel mapping. */ - flush_dcache_phys_range(dart_tablebase, - dart_tablebase + dart_tablesize); + dart_tablebase = __va(memblock_alloc_base(1UL<<24, + 1UL<<24, 0x80000000L)); + + /* There is no point scanning the DART space for leaks*/ + kmemleak_no_scan((void *)dart_tablebase); /* Allocate a spare page to map all invalid DART pages. We need to do * that to work around what looks like a problem with the HT bridge @@ -249,20 +264,51 @@ static int __init dart_init(struct device_node *dart_node) dart_emptyval = DARTMAP_VALID | ((tmp >> DART_PAGE_SHIFT) & DARTMAP_RPNMASK); + printk(KERN_INFO "DART table allocated at: %p\n", dart_tablebase); +} + +static int __init dart_init(struct device_node *dart_node) +{ + unsigned int i; + unsigned long base, size; + struct resource r; + + /* IOMMU disabled by the user ? bail out */ + if (iommu_is_off) + return -ENODEV; + + /* + * Only use the DART if the machine has more than 1GB of RAM + * or if requested with iommu=on on cmdline. + * + * 1GB of RAM is picked as limit because some default devices + * (i.e. Airport Extreme) have 30 bit address range limits. + */ + + if (!iommu_force_on && memblock_end_of_DRAM() <= 0x40000000ull) + return -ENODEV; + + /* Get DART registers */ + if (of_address_to_resource(dart_node, 0, &r)) + panic("DART: can't get register base ! "); + /* Map in DART registers */ dart = ioremap(r.start, resource_size(&r)); if (dart == NULL) panic("DART: Cannot map registers!"); - /* Map in DART table */ - dart_vbase = ioremap(__pa(dart_tablebase), dart_tablesize); + /* Allocate the DART and dummy page */ + allocate_dart(); /* Fill initial table */ for (i = 0; i < dart_tablesize/4; i++) - dart_vbase[i] = dart_emptyval; + dart_tablebase[i] = dart_emptyval; + + /* Push to memory */ + dart_cache_sync(dart_tablebase, dart_tablesize / sizeof(u32)); /* Initialize DART with table base and enable it. */ - base = dart_tablebase >> DART_PAGE_SHIFT; + base = ((unsigned long)dart_tablebase) >> DART_PAGE_SHIFT; size = dart_tablesize >> DART_PAGE_SHIFT; if (dart_is_u4) { size &= DART_SIZE_U4_SIZE_MASK; @@ -301,7 +347,7 @@ static void iommu_table_dart_setup(void) iommu_table_dart.it_page_shift = IOMMU_PAGE_SHIFT_4K; /* Initialize the common IOMMU code */ - iommu_table_dart.it_base = (unsigned long)dart_vbase; + iommu_table_dart.it_base = (unsigned long)dart_tablebase; iommu_table_dart.it_index = 0; iommu_table_dart.it_blocksize = 1; iommu_table_dart.it_ops = &iommu_dart_ops; @@ -404,75 +450,21 @@ void __init iommu_init_early_dart(struct pci_controller_ops *controller_ops) } #ifdef CONFIG_PM -static void iommu_dart_save(void) -{ - memcpy(dart_copy, dart_vbase, 2*1024*1024); -} - static void iommu_dart_restore(void) { - memcpy(dart_vbase, dart_copy, 2*1024*1024); + dart_cache_sync(dart_tablebase, dart_tablesize / sizeof(u32)); dart_tlb_invalidate_all(); } static int __init iommu_init_late_dart(void) { - unsigned long tbasepfn; - struct page *p; - - /* if no dart table exists then we won't need to save it - * and the area has also not been reserved */ if (!dart_tablebase) return 0; - tbasepfn = __pa(dart_tablebase) >> PAGE_SHIFT; - register_nosave_region_late(tbasepfn, - tbasepfn + ((1<<24) >> PAGE_SHIFT)); - - /* For suspend we need to copy the dart contents because - * it is not part of the regular mapping (see above) and - * thus not saved automatically. The memory for this copy - * must be allocated early because we need 2 MB. */ - p = alloc_pages(GFP_KERNEL, 21 - PAGE_SHIFT); - BUG_ON(!p); - dart_copy = page_address(p); - - ppc_md.iommu_save = iommu_dart_save; ppc_md.iommu_restore = iommu_dart_restore; return 0; } late_initcall(iommu_init_late_dart); -#endif - -void __init alloc_dart_table(void) -{ - /* Only reserve DART space if machine has more than 1GB of RAM - * or if requested with iommu=on on cmdline. - * - * 1GB of RAM is picked as limit because some default devices - * (i.e. Airport Extreme) have 30 bit address range limits. - */ - - if (iommu_is_off) - return; - - if (!iommu_force_on && memblock_end_of_DRAM() <= 0x40000000ull) - return; - - /* 512 pages (2MB) is max DART tablesize. */ - dart_tablesize = 1UL << 21; - /* 16MB (1 << 24) alignment. We allocate a full 16Mb chuck since we - * will blow up an entire large page anyway in the kernel mapping - */ - dart_tablebase = (unsigned long) - __va(memblock_alloc_base(1UL<<24, 1UL<<24, 0x80000000L)); - /* - * The DART space is later unmapped from the kernel linear mapping and - * accessing dart_tablebase during kmemleak scanning will fault. - */ - kmemleak_no_scan((void *)dart_tablebase); - - printk(KERN_INFO "DART table allocated at: %lx\n", dart_tablebase); -} +#endif /* CONFIG_PM */ -- cgit v1.2.3 From 166dd7d3fbf2df183926f0e4b4855f6cbd8da945 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:03:51 +1000 Subject: powerpc/64: Move MMU backend selection out of platform code We move it into early_mmu_init() based on firmware features. For PS3, we have to move the setting of these into early_init_devtree(). Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/ps3.h | 2 ++ arch/powerpc/kernel/prom.c | 6 ++++++ arch/powerpc/mm/hash_utils_64.c | 14 ++++++++++++++ arch/powerpc/mm/pgtable-radix.c | 1 + arch/powerpc/platforms/cell/setup.c | 1 - arch/powerpc/platforms/maple/setup.c | 1 - arch/powerpc/platforms/pasemi/setup.c | 2 -- arch/powerpc/platforms/powermac/setup.c | 4 ---- arch/powerpc/platforms/powernv/setup.c | 5 ----- arch/powerpc/platforms/ps3/setup.c | 15 +++++++++------ arch/powerpc/platforms/pseries/setup.c | 7 ------- 11 files changed, 32 insertions(+), 26 deletions(-) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/include/asm/ps3.h b/arch/powerpc/include/asm/ps3.h index a1bc7e758422..a19f831a4cc9 100644 --- a/arch/powerpc/include/asm/ps3.h +++ b/arch/powerpc/include/asm/ps3.h @@ -526,4 +526,6 @@ void ps3_sync_irq(int node); u32 ps3_get_hw_thread_id(int cpu); u64 ps3_get_spe_id(void *arg); +void ps3_early_mm_init(void); + #endif diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 2bd1784e65b3..bae3db791150 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -759,6 +759,12 @@ void __init early_init_devtree(void *params) /* Now try to figure out if we are running on LPAR and so on */ pseries_probe_fw_features(); +#ifdef CONFIG_PPC_PS3 + /* Identify PS3 firmware */ + if (of_flat_dt_is_compatible(of_get_flat_dt_root(), "sony,ps3")) + powerpc_firmware_features |= FW_FEATURE_PS3_POSSIBLE; +#endif + DBG(" <- early_init_devtree()\n"); } diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 859ecaa928e9..336fad6524df 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -58,6 +58,7 @@ #include #include #include +#include #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) @@ -872,6 +873,11 @@ static void __init htab_initialize(void) #undef KB #undef MB +void __init __weak hpte_init_lpar(void) +{ + panic("FW_FEATURE_LPAR set but no LPAR support compiled\n"); +} + void __init hash__early_init_mmu(void) { /* @@ -908,6 +914,14 @@ void __init hash__early_init_mmu(void) pci_io_base = ISA_IO_BASE; #endif + /* Select appropriate backend */ + if (firmware_has_feature(FW_FEATURE_PS3_LV1)) + ps3_early_mm_init(); + else if (firmware_has_feature(FW_FEATURE_LPAR)) + hpte_init_lpar(); + else + hpte_init_native(); + /* Initialize the MMU Hash table and create the linear mapping * of memory. Has to be done before SLB initialization as this is * currently where the page size encoding is obtained. diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 4ea1094c9fc4..003ff48a11b6 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -345,6 +345,7 @@ void __init radix__early_init_mmu(void) radix_init_page_sizes(); if (!firmware_has_feature(FW_FEATURE_LPAR)) { + radix_init_native(); lpcr = mfspr(SPRN_LPCR); mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR); radix_init_partition_table(); diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c index 36cff28d0293..e342f7833db5 100644 --- a/arch/powerpc/platforms/cell/setup.c +++ b/arch/powerpc/platforms/cell/setup.c @@ -261,7 +261,6 @@ static int __init cell_probe(void) !of_flat_dt_is_compatible(root, "IBM,CPBW-1.0")) return 0; - hpte_init_native(); pm_power_off = rtas_power_off; return 1; diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c index 99b9b96ab059..b1ecd991a0fb 100644 --- a/arch/powerpc/platforms/maple/setup.c +++ b/arch/powerpc/platforms/maple/setup.c @@ -306,7 +306,6 @@ static int __init maple_probe(void) !of_flat_dt_is_compatible(root, "Momentum,Apache")) return 0; - hpte_init_native(); pm_power_off = maple_power_off; return 1; diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c index 7349644c9828..924d01d9b375 100644 --- a/arch/powerpc/platforms/pasemi/setup.c +++ b/arch/powerpc/platforms/pasemi/setup.c @@ -426,8 +426,6 @@ static int __init pas_probe(void) !of_flat_dt_is_compatible(root, "pasemi,pwrficient")) return 0; - hpte_init_native(); - alloc_iobmap_l2(); return 1; diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index 4ad61687c670..795bf8cfddd9 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -599,10 +599,6 @@ static int __init pmac_probe(void) !of_flat_dt_is_compatible(root, "MacRISC")) return 0; -#ifdef CONFIG_PPC64 - hpte_init_native(); -#endif - #ifdef CONFIG_PPC32 /* isa_io_base gets set in pmac_pci_init */ ISA_DMA_THRESHOLD = ~0L; diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 8492bbbcfc08..f70ea83502b2 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -273,11 +273,6 @@ static int __init pnv_probe(void) if (!of_flat_dt_is_compatible(root, "ibm,powernv")) return 0; - if (IS_ENABLED(CONFIG_PPC_RADIX_MMU) && radix_enabled()) - radix_init_native(); - else if (IS_ENABLED(CONFIG_PPC_STD_MMU_64)) - hpte_init_native(); - if (firmware_has_feature(FW_FEATURE_OPAL)) pnv_setup_machdep_opal(); diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c index 486ecd017535..b7fdf88474e8 100644 --- a/arch/powerpc/platforms/ps3/setup.c +++ b/arch/powerpc/platforms/ps3/setup.c @@ -226,9 +226,17 @@ static void __init ps3_progress(char *s, unsigned short hex) printk("*** %04x : %s\n", hex, s ? s : ""); } -static int __init ps3_probe(void) +void __init ps3_early_mm_init(void) { unsigned long htab_size; + + ps3_mm_init(); + ps3_mm_vas_create(&htab_size); + ps3_hpte_init(htab_size); +} + +static int __init ps3_probe(void) +{ unsigned long dt_root; DBG(" -> %s:%d\n", __func__, __LINE__); @@ -237,12 +245,7 @@ static int __init ps3_probe(void) if (!of_flat_dt_is_compatible(dt_root, "sony,ps3")) return 0; - powerpc_firmware_features |= FW_FEATURE_PS3_POSSIBLE; - ps3_os_area_save_params(); - ps3_mm_init(); - ps3_mm_vas_create(&htab_size); - ps3_hpte_init(htab_size); pm_power_off = ps3_power_off; DBG(" <- %s:%d\n", __func__, __LINE__); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index ba7dc126b5e5..240721348ebc 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -687,13 +687,6 @@ static int __init pSeries_probe(void) of_flat_dt_is_compatible(root, "IBM,CBEA")) return 0; - pr_debug("pSeries detected, looking for LPAR capability...\n"); - - if (firmware_has_feature(FW_FEATURE_LPAR)) - hpte_init_lpar(); - else - hpte_init_native(); - pm_power_off = pseries_power_off; pr_debug("Machine is%s LPAR !\n", -- cgit v1.2.3 From 5556ecf5e9fa1c7bcc50d0aa74aec28f30d9590a Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:03:53 +1000 Subject: powerpc/mm/hash: Don't use machine_is() early during boot Use the device-tree instead as we'll be moving probe_machine() out of early_setup Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_utils_64.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 336fad6524df..a9472ea269b8 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -749,7 +750,7 @@ static void __init htab_initialize(void) unsigned long table; unsigned long pteg_count; unsigned long prot; - unsigned long base = 0, size = 0, limit; + unsigned long base = 0, size = 0; struct memblock_region *reg; DBG(" -> htab_initialize()\n"); @@ -775,7 +776,8 @@ static void __init htab_initialize(void) htab_hash_mask = pteg_count - 1; - if (firmware_has_feature(FW_FEATURE_LPAR)) { + if (firmware_has_feature(FW_FEATURE_LPAR) || + firmware_has_feature(FW_FEATURE_PS3_LV1)) { /* Using a hypervisor which owns the htab */ htab_address = NULL; _SDR1 = 0; @@ -790,16 +792,22 @@ static void __init htab_initialize(void) ppc_md.hpte_clear_all(); #endif } else { - /* Find storage for the HPT. Must be contiguous in - * the absolute address space. On cell we want it to be - * in the first 2 Gig so we can use it for IOMMU hacks. + unsigned long limit = MEMBLOCK_ALLOC_ANYWHERE; + +#ifdef CONFIG_PPC_CELL + /* + * Cell may require the hash table down low when using the + * Axon IOMMU in order to fit the dynamic region over it, see + * comments in cell/iommu.c */ - if (machine_is(cell)) + if (fdt_subnode_offset(initial_boot_params, 0, "axon") > 0) { limit = 0x80000000; - else - limit = MEMBLOCK_ALLOC_ANYWHERE; + pr_info("Hash table forced below 2G for Axon IOMMU\n"); + } +#endif /* CONFIG_PPC_CELL */ - table = memblock_alloc_base(htab_size_bytes, htab_size_bytes, limit); + table = memblock_alloc_base(htab_size_bytes, htab_size_bytes, + limit); DBG("Hash table allocated at %lx, size: %lx\n", table, htab_size_bytes); -- cgit v1.2.3 From 2b4e3ad8f5790cae6e0356c5fc200588bd2c915c Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:03:56 +1000 Subject: powerpc/mm/hash64: Don't test for machine type to detect HEA special case Instead, check for FW_FEATURE_SPLPAR. This should be roughtly equivalent as all pseries machiens that can have an HEA also support SPLPAR and no other machine type does. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_utils_64.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index a9472ea269b8..eab3074a6a37 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -527,7 +527,8 @@ static bool might_have_hea(void) * we will never see an HEA ethernet device. */ #ifdef CONFIG_IBMEBUS - return !cpu_has_feature(CPU_FTR_ARCH_207S); + return !cpu_has_feature(CPU_FTR_ARCH_207S) && + !firmware_has_feature(FW_FEATURE_SPLPAR); #else return false; #endif @@ -593,7 +594,7 @@ found: * would stop us accessing the HEA ethernet. So if we * have the chance of ever seeing one, stay at 4k. */ - if (!might_have_hea() || !machine_is(pseries)) + if (!might_have_hea()) mmu_io_psize = MMU_PAGE_64K; } else mmu_ci_restrictions = 1; -- cgit v1.2.3 From 7025776ed1ebdfa1959932e7a4662c2f88607df0 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 5 Jul 2016 15:03:58 +1000 Subject: powerpc/mm: Move hash table ops to a separate structure Moving probe_machine() to after mmu init will cause the ppc_md fields relative to the hash table management to be overwritten. Since we have essentially disconnected the machine type from the hash backend ops, finish the job by moving them to a different structure. The only callback that didn't quite fix is update_partition_table since this is not specific to hash, so I moved it to a standalone variable for now. We can revisit later if needed. Signed-off-by: Benjamin Herrenschmidt [mpe: Fix ppc64e build failure in kexec] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/mmu-hash.h | 39 ++++++++++++++++++++ arch/powerpc/include/asm/machdep.h | 36 ------------------- arch/powerpc/kernel/machine_kexec_64.c | 9 +++-- arch/powerpc/kernel/misc_64.S | 2 +- arch/powerpc/kvm/book3s_64_mmu_host.c | 18 +++++----- arch/powerpc/mm/hash64_4k.c | 18 +++++----- arch/powerpc/mm/hash64_64k.c | 39 +++++++++++--------- arch/powerpc/mm/hash_native_64.c | 16 ++++----- arch/powerpc/mm/hash_utils_64.c | 51 +++++++++++++++------------ arch/powerpc/mm/hugepage-hash64.c | 17 ++++----- arch/powerpc/mm/hugetlbpage-hash64.c | 4 +-- arch/powerpc/platforms/ps3/htab.c | 12 +++---- arch/powerpc/platforms/pseries/lpar.c | 18 +++++----- 13 files changed, 151 insertions(+), 128 deletions(-) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 2c2d55580ae2..b0f4dffe12ae 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -124,6 +124,45 @@ #ifndef __ASSEMBLY__ +struct mmu_hash_ops { + void (*hpte_invalidate)(unsigned long slot, + unsigned long vpn, + int bpsize, int apsize, + int ssize, int local); + long (*hpte_updatepp)(unsigned long slot, + unsigned long newpp, + unsigned long vpn, + int bpsize, int apsize, + int ssize, unsigned long flags); + void (*hpte_updateboltedpp)(unsigned long newpp, + unsigned long ea, + int psize, int ssize); + long (*hpte_insert)(unsigned long hpte_group, + unsigned long vpn, + unsigned long prpn, + unsigned long rflags, + unsigned long vflags, + int psize, int apsize, + int ssize); + long (*hpte_remove)(unsigned long hpte_group); + int (*hpte_removebolted)(unsigned long ea, + int psize, int ssize); + void (*flush_hash_range)(unsigned long number, int local); + void (*hugepage_invalidate)(unsigned long vsid, + unsigned long addr, + unsigned char *hpte_slot_array, + int psize, int ssize, int local); + /* + * Special for kexec. + * To be called in real mode with interrupts disabled. No locks are + * taken as such, concurrent access on pre POWER5 hardware could result + * in a deadlock. + * The linear mapping is destroyed as well. + */ + void (*hpte_clear_all)(void); +}; +extern struct mmu_hash_ops mmu_hash_ops; + struct hash_pte { __be64 v; __be64 r; diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index e62e7d33bda0..5b2edf5ba114 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -34,42 +34,6 @@ struct pci_host_bridge; struct machdep_calls { char *name; #ifdef CONFIG_PPC64 - void (*hpte_invalidate)(unsigned long slot, - unsigned long vpn, - int bpsize, int apsize, - int ssize, int local); - long (*hpte_updatepp)(unsigned long slot, - unsigned long newpp, - unsigned long vpn, - int bpsize, int apsize, - int ssize, unsigned long flags); - void (*hpte_updateboltedpp)(unsigned long newpp, - unsigned long ea, - int psize, int ssize); - long (*hpte_insert)(unsigned long hpte_group, - unsigned long vpn, - unsigned long prpn, - unsigned long rflags, - unsigned long vflags, - int psize, int apsize, - int ssize); - long (*hpte_remove)(unsigned long hpte_group); - int (*hpte_removebolted)(unsigned long ea, - int psize, int ssize); - void (*flush_hash_range)(unsigned long number, int local); - void (*hugepage_invalidate)(unsigned long vsid, - unsigned long addr, - unsigned char *hpte_slot_array, - int psize, int ssize, int local); - /* - * Special for kexec. - * To be called in real mode with interrupts disabled. No locks are - * taken as such, concurrent access on pre POWER5 hardware could result - * in a deadlock. - * The linear mapping is destroyed as well. - */ - void (*hpte_clear_all)(void); - void __iomem * (*ioremap)(phys_addr_t addr, unsigned long size, unsigned long flags, void *caller); void (*iounmap)(volatile void __iomem *token); diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index 50bf55135ef8..4c780a342282 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -55,7 +55,7 @@ int default_machine_kexec_prepare(struct kimage *image) const unsigned long *basep; const unsigned int *sizep; - if (!ppc_md.hpte_clear_all) + if (!mmu_hash_ops.hpte_clear_all) return -ENOENT; /* @@ -380,7 +380,12 @@ void default_machine_kexec(struct kimage *image) */ kexec_sequence(&kexec_stack, image->start, image, page_address(image->control_code_page), - ppc_md.hpte_clear_all); +#ifdef CONFIG_PPC_STD_MMU + mmu_hash_ops.hpte_clear_all +#else + NULL +#endif + ); /* NOTREACHED */ } diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 7a8519052b14..cb195157b318 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -667,7 +667,7 @@ _GLOBAL(kexec_sequence) mr r12,r27 #endif mtctr r12 - bctrl /* ppc_md.hpte_clear_all(void); */ + bctrl /* mmu_hash_ops.hpte_clear_all(void); */ #endif /* !CONFIG_PPC_BOOK3E */ /* diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index 114edace6cdd..a587e8f4fd26 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -34,9 +34,9 @@ void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) { - ppc_md.hpte_invalidate(pte->slot, pte->host_vpn, - pte->pagesize, pte->pagesize, MMU_SEGSIZE_256M, - false); + mmu_hash_ops.hpte_invalidate(pte->slot, pte->host_vpn, + pte->pagesize, pte->pagesize, + MMU_SEGSIZE_256M, false); } /* We keep 512 gvsid->hvsid entries, mapping the guest ones to the array using @@ -169,13 +169,13 @@ map_again: /* In case we tried normal mapping already, let's nuke old entries */ if (attempt > 1) - if (ppc_md.hpte_remove(hpteg) < 0) { + if (mmu_hash_ops.hpte_remove(hpteg) < 0) { r = -1; goto out_unlock; } - ret = ppc_md.hpte_insert(hpteg, vpn, hpaddr, rflags, vflags, - hpsize, hpsize, MMU_SEGSIZE_256M); + ret = mmu_hash_ops.hpte_insert(hpteg, vpn, hpaddr, rflags, vflags, + hpsize, hpsize, MMU_SEGSIZE_256M); if (ret < 0) { /* If we couldn't map a primary PTE, try a secondary */ @@ -187,8 +187,10 @@ map_again: trace_kvm_book3s_64_mmu_map(rflags, hpteg, vpn, hpaddr, orig_pte); - /* The ppc_md code may give us a secondary entry even though we - asked for a primary. Fix up. */ + /* + * The mmu_hash_ops code may give us a secondary entry even + * though we asked for a primary. Fix up. + */ if ((ret & _PTEIDX_SECONDARY) && !(vflags & HPTE_V_SECONDARY)) { hash = ~hash; hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c index 6333b273d2d5..42c702b3be1f 100644 --- a/arch/powerpc/mm/hash64_4k.c +++ b/arch/powerpc/mm/hash64_4k.c @@ -70,8 +70,8 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT; - if (ppc_md.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_4K, - MMU_PAGE_4K, ssize, flags) == -1) + if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_4K, + MMU_PAGE_4K, ssize, flags) == -1) old_pte &= ~_PAGE_HPTEFLAGS; } @@ -84,21 +84,23 @@ repeat: hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; /* Insert into the hash table, primary slot */ - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0, - MMU_PAGE_4K, MMU_PAGE_4K, ssize); + slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0, + MMU_PAGE_4K, MMU_PAGE_4K, ssize); /* * Primary is full, try the secondary */ if (unlikely(slot == -1)) { hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, - rflags, HPTE_V_SECONDARY, - MMU_PAGE_4K, MMU_PAGE_4K, ssize); + slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, + rflags, + HPTE_V_SECONDARY, + MMU_PAGE_4K, + MMU_PAGE_4K, ssize); if (slot == -1) { if (mftb() & 0x1) hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - ppc_md.hpte_remove(hpte_group); + mmu_hash_ops.hpte_remove(hpte_group); /* * FIXME!! Should be try the group from which we removed ? */ diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c index 16644e1f4e6b..3bbbea07378c 100644 --- a/arch/powerpc/mm/hash64_64k.c +++ b/arch/powerpc/mm/hash64_64k.c @@ -133,9 +133,9 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += hidx & _PTEIDX_GROUP_IX; - ret = ppc_md.hpte_updatepp(slot, rflags, vpn, - MMU_PAGE_4K, MMU_PAGE_4K, - ssize, flags); + ret = mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, + MMU_PAGE_4K, MMU_PAGE_4K, + ssize, flags); /* *if we failed because typically the HPTE wasn't really here * we try an insertion. @@ -166,21 +166,22 @@ repeat: hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; /* Insert into the hash table, primary slot */ - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0, - MMU_PAGE_4K, MMU_PAGE_4K, ssize); + slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0, + MMU_PAGE_4K, MMU_PAGE_4K, ssize); /* * Primary is full, try the secondary */ if (unlikely(slot == -1)) { hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, - rflags, HPTE_V_SECONDARY, - MMU_PAGE_4K, MMU_PAGE_4K, ssize); + slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, + rflags, HPTE_V_SECONDARY, + MMU_PAGE_4K, MMU_PAGE_4K, + ssize); if (slot == -1) { if (mftb() & 0x1) hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - ppc_md.hpte_remove(hpte_group); + mmu_hash_ops.hpte_remove(hpte_group); /* * FIXME!! Should be try the group from which we removed ? */ @@ -272,8 +273,9 @@ int __hash_page_64K(unsigned long ea, unsigned long access, slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT; - if (ppc_md.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_64K, - MMU_PAGE_64K, ssize, flags) == -1) + if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_64K, + MMU_PAGE_64K, ssize, + flags) == -1) old_pte &= ~_PAGE_HPTEFLAGS; } @@ -286,21 +288,24 @@ repeat: hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; /* Insert into the hash table, primary slot */ - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0, - MMU_PAGE_64K, MMU_PAGE_64K, ssize); + slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0, + MMU_PAGE_64K, MMU_PAGE_64K, + ssize); /* * Primary is full, try the secondary */ if (unlikely(slot == -1)) { hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, - rflags, HPTE_V_SECONDARY, - MMU_PAGE_64K, MMU_PAGE_64K, ssize); + slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, + rflags, + HPTE_V_SECONDARY, + MMU_PAGE_64K, + MMU_PAGE_64K, ssize); if (slot == -1) { if (mftb() & 0x1) hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - ppc_md.hpte_remove(hpte_group); + mmu_hash_ops.hpte_remove(hpte_group); /* * FIXME!! Should be try the group from which we removed ? */ diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index cb3b4c98b637..88ce7d212320 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -739,14 +739,14 @@ static int native_register_proc_table(unsigned long base, unsigned long page_siz void __init hpte_init_native(void) { - ppc_md.hpte_invalidate = native_hpte_invalidate; - ppc_md.hpte_updatepp = native_hpte_updatepp; - ppc_md.hpte_updateboltedpp = native_hpte_updateboltedpp; - ppc_md.hpte_insert = native_hpte_insert; - ppc_md.hpte_remove = native_hpte_remove; - ppc_md.hpte_clear_all = native_hpte_clear; - ppc_md.flush_hash_range = native_flush_hash_range; - ppc_md.hugepage_invalidate = native_hugepage_invalidate; + mmu_hash_ops.hpte_invalidate = native_hpte_invalidate; + mmu_hash_ops.hpte_updatepp = native_hpte_updatepp; + mmu_hash_ops.hpte_updateboltedpp = native_hpte_updateboltedpp; + mmu_hash_ops.hpte_insert = native_hpte_insert; + mmu_hash_ops.hpte_remove = native_hpte_remove; + mmu_hash_ops.hpte_clear_all = native_hpte_clear; + mmu_hash_ops.flush_hash_range = native_flush_hash_range; + mmu_hash_ops.hugepage_invalidate = native_hugepage_invalidate; if (cpu_has_feature(CPU_FTR_ARCH_300)) ppc_md.register_process_table = native_register_proc_table; diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index eab3074a6a37..341632471b9d 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -118,6 +118,8 @@ static u8 *linear_map_hash_slots; static unsigned long linear_map_hash_count; static DEFINE_SPINLOCK(linear_map_hash_lock); #endif /* CONFIG_DEBUG_PAGEALLOC */ +struct mmu_hash_ops mmu_hash_ops; +EXPORT_SYMBOL(mmu_hash_ops); /* There are definitions of page sizes arrays to be used when none * is provided by the firmware. @@ -276,9 +278,10 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, hash = hpt_hash(vpn, shift, ssize); hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); - BUG_ON(!ppc_md.hpte_insert); - ret = ppc_md.hpte_insert(hpteg, vpn, paddr, tprot, - HPTE_V_BOLTED, psize, psize, ssize); + BUG_ON(!mmu_hash_ops.hpte_insert); + ret = mmu_hash_ops.hpte_insert(hpteg, vpn, paddr, tprot, + HPTE_V_BOLTED, psize, psize, + ssize); if (ret < 0) break; @@ -303,11 +306,11 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend, shift = mmu_psize_defs[psize].shift; step = 1 << shift; - if (!ppc_md.hpte_removebolted) + if (!mmu_hash_ops.hpte_removebolted) return -ENODEV; for (vaddr = vstart; vaddr < vend; vaddr += step) { - rc = ppc_md.hpte_removebolted(vaddr, psize, ssize); + rc = mmu_hash_ops.hpte_removebolted(vaddr, psize, ssize); if (rc == -ENOENT) { ret = -ENOENT; continue; @@ -789,8 +792,8 @@ static void __init htab_initialize(void) * Clear the htab if firmware assisted dump is active so * that we dont end up using old mappings. */ - if (is_fadump_active() && ppc_md.hpte_clear_all) - ppc_md.hpte_clear_all(); + if (is_fadump_active() && mmu_hash_ops.hpte_clear_all) + mmu_hash_ops.hpte_clear_all(); #endif } else { unsigned long limit = MEMBLOCK_ALLOC_ANYWHERE; @@ -1480,7 +1483,8 @@ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, * We use same base page size and actual psize, because we don't * use these functions for hugepage */ - ppc_md.hpte_invalidate(slot, vpn, psize, psize, ssize, local); + mmu_hash_ops.hpte_invalidate(slot, vpn, psize, psize, + ssize, local); } pte_iterate_hashed_end(); #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -1521,9 +1525,9 @@ void flush_hash_hugepage(unsigned long vsid, unsigned long addr, if (!hpte_slot_array) return; - if (ppc_md.hugepage_invalidate) { - ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array, - psize, ssize, local); + if (mmu_hash_ops.hugepage_invalidate) { + mmu_hash_ops.hugepage_invalidate(vsid, s_addr, hpte_slot_array, + psize, ssize, local); goto tm_abort; } /* @@ -1550,8 +1554,8 @@ void flush_hash_hugepage(unsigned long vsid, unsigned long addr, slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += hidx & _PTEIDX_GROUP_IX; - ppc_md.hpte_invalidate(slot, vpn, psize, - MMU_PAGE_16M, ssize, local); + mmu_hash_ops.hpte_invalidate(slot, vpn, psize, + MMU_PAGE_16M, ssize, local); } tm_abort: #ifdef CONFIG_PPC_TRANSACTIONAL_MEM @@ -1575,8 +1579,8 @@ tm_abort: void flush_hash_range(unsigned long number, int local) { - if (ppc_md.flush_hash_range) - ppc_md.flush_hash_range(number, local); + if (mmu_hash_ops.flush_hash_range) + mmu_hash_ops.flush_hash_range(number, local); else { int i; struct ppc64_tlb_batch *batch = @@ -1621,22 +1625,22 @@ repeat: HPTES_PER_GROUP) & ~0x7UL; /* Insert into the hash table, primary slot */ - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, vflags, - psize, psize, ssize); + slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, vflags, + psize, psize, ssize); /* Primary is full, try the secondary */ if (unlikely(slot == -1)) { hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, - vflags | HPTE_V_SECONDARY, - psize, psize, ssize); + slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, + vflags | HPTE_V_SECONDARY, + psize, psize, ssize); if (slot == -1) { if (mftb() & 0x1) hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP)&~0x7UL; - ppc_md.hpte_remove(hpte_group); + mmu_hash_ops.hpte_remove(hpte_group); goto repeat; } } @@ -1686,8 +1690,9 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi) hash = ~hash; slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += hidx & _PTEIDX_GROUP_IX; - ppc_md.hpte_invalidate(slot, vpn, mmu_linear_psize, mmu_linear_psize, - mmu_kernel_ssize, 0); + mmu_hash_ops.hpte_invalidate(slot, vpn, mmu_linear_psize, + mmu_linear_psize, + mmu_kernel_ssize, 0); } void __kernel_map_pages(struct page *page, int numpages, int enable) diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index ba3fc229468a..f20d16f849c5 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -103,8 +103,8 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += hidx & _PTEIDX_GROUP_IX; - ret = ppc_md.hpte_updatepp(slot, rflags, vpn, - psize, lpsize, ssize, flags); + ret = mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, + psize, lpsize, ssize, flags); /* * We failed to update, try to insert a new entry. */ @@ -131,23 +131,24 @@ repeat: hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; /* Insert into the hash table, primary slot */ - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0, - psize, lpsize, ssize); + slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0, + psize, lpsize, ssize); /* * Primary is full, try the secondary */ if (unlikely(slot == -1)) { hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - slot = ppc_md.hpte_insert(hpte_group, vpn, pa, - rflags, HPTE_V_SECONDARY, - psize, lpsize, ssize); + slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, + rflags, + HPTE_V_SECONDARY, + psize, lpsize, ssize); if (slot == -1) { if (mftb() & 0x1) hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; - ppc_md.hpte_remove(hpte_group); + mmu_hash_ops.hpte_remove(hpte_group); goto repeat; } } diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c index 3058560b6121..d5026f3800b6 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/hugetlbpage-hash64.c @@ -79,8 +79,8 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; slot += (old_pte & H_PAGE_F_GIX) >> H_PAGE_F_GIX_SHIFT; - if (ppc_md.hpte_updatepp(slot, rflags, vpn, mmu_psize, - mmu_psize, ssize, flags) == -1) + if (mmu_hash_ops.hpte_updatepp(slot, rflags, vpn, mmu_psize, + mmu_psize, ssize, flags) == -1) old_pte &= ~_PAGE_HPTEFLAGS; } diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c index c9a3e677192a..cb3c50328de8 100644 --- a/arch/powerpc/platforms/ps3/htab.c +++ b/arch/powerpc/platforms/ps3/htab.c @@ -195,12 +195,12 @@ static void ps3_hpte_clear(void) void __init ps3_hpte_init(unsigned long htab_size) { - ppc_md.hpte_invalidate = ps3_hpte_invalidate; - ppc_md.hpte_updatepp = ps3_hpte_updatepp; - ppc_md.hpte_updateboltedpp = ps3_hpte_updateboltedpp; - ppc_md.hpte_insert = ps3_hpte_insert; - ppc_md.hpte_remove = ps3_hpte_remove; - ppc_md.hpte_clear_all = ps3_hpte_clear; + mmu_hash_ops.hpte_invalidate = ps3_hpte_invalidate; + mmu_hash_ops.hpte_updatepp = ps3_hpte_updatepp; + mmu_hash_ops.hpte_updateboltedpp = ps3_hpte_updateboltedpp; + mmu_hash_ops.hpte_insert = ps3_hpte_insert; + mmu_hash_ops.hpte_remove = ps3_hpte_remove; + mmu_hash_ops.hpte_clear_all = ps3_hpte_clear; ppc64_pft_size = __ilog2(htab_size); } diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 03c732afef34..0e91388d0af9 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -591,15 +591,15 @@ __setup("bulk_remove=", disable_bulk_remove); void __init hpte_init_lpar(void) { - ppc_md.hpte_invalidate = pSeries_lpar_hpte_invalidate; - ppc_md.hpte_updatepp = pSeries_lpar_hpte_updatepp; - ppc_md.hpte_updateboltedpp = pSeries_lpar_hpte_updateboltedpp; - ppc_md.hpte_insert = pSeries_lpar_hpte_insert; - ppc_md.hpte_remove = pSeries_lpar_hpte_remove; - ppc_md.hpte_removebolted = pSeries_lpar_hpte_removebolted; - ppc_md.flush_hash_range = pSeries_lpar_flush_hash_range; - ppc_md.hpte_clear_all = pSeries_lpar_hptab_clear; - ppc_md.hugepage_invalidate = pSeries_lpar_hugepage_invalidate; + mmu_hash_ops.hpte_invalidate = pSeries_lpar_hpte_invalidate; + mmu_hash_ops.hpte_updatepp = pSeries_lpar_hpte_updatepp; + mmu_hash_ops.hpte_updateboltedpp = pSeries_lpar_hpte_updateboltedpp; + mmu_hash_ops.hpte_insert = pSeries_lpar_hpte_insert; + mmu_hash_ops.hpte_remove = pSeries_lpar_hpte_remove; + mmu_hash_ops.hpte_removebolted = pSeries_lpar_hpte_removebolted; + mmu_hash_ops.flush_hash_range = pSeries_lpar_flush_hash_range; + mmu_hash_ops.hpte_clear_all = pSeries_lpar_hptab_clear; + mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate; } #ifdef CONFIG_PPC_SMLPAR -- cgit v1.2.3 From 7353644fa9df875aee778a802e3d28f1e3578512 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 25 Jul 2016 11:54:41 +1000 Subject: powerpc/mm: Fix build break when PPC_NATIVE=n The recent commit to rework the hash MMU setup broke the build when CONFIG_PPC_NATIVE=n. Fix it by adding an IS_ENABLED() check before calling hpte_init_native(). Removing the else clause opens the possibility that we don't set any ops, which would probably lead to a strange crash later. So add a check that we correctly initialised at least one member of the struct. Fixes: 166dd7d3fbf2 ("powerpc/64: Move MMU backend selection out of platform code") Reported-by: Stephen Rothwell Acked-by: Stephen Rothwell Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_utils_64.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 341632471b9d..381b5894cc99 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -931,9 +931,12 @@ void __init hash__early_init_mmu(void) ps3_early_mm_init(); else if (firmware_has_feature(FW_FEATURE_LPAR)) hpte_init_lpar(); - else + else if IS_ENABLED(CONFIG_PPC_NATIVE) hpte_init_native(); + if (!mmu_hash_ops.hpte_insert) + panic("hash__early_init_mmu: No MMU hash ops defined!\n"); + /* Initialize the MMU Hash table and create the linear mapping * of memory. Has to be done before SLB initialization as this is * currently where the page size encoding is obtained. -- cgit v1.2.3 From 6364e84e855ae9a0558ac873e3ff50ecb75bb40a Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 26 Jul 2016 10:33:03 +1000 Subject: powerpc/mm: Rename hpte_init_lpar() and move the fallback to a header hpte_init_lpar() is part of the pseries platform, so name it as such. Move the fallback implementation for when PSERIES=n into the header, dropping the weak implementation. The panic() is now handled by the calling code. Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/mmu-hash.h | 7 ++++++- arch/powerpc/mm/hash_utils_64.c | 7 +------ arch/powerpc/platforms/pseries/lpar.c | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index b0f4dffe12ae..450b017fdc19 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -391,8 +391,13 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend, extern void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages); extern void demote_segment_4k(struct mm_struct *mm, unsigned long addr); +#ifdef CONFIG_PPC_PSERIES +void hpte_init_pseries(void); +#else +static inline void hpte_init_pseries(void) { } +#endif + extern void hpte_init_native(void); -extern void hpte_init_lpar(void); extern void hpte_init_beat(void); extern void hpte_init_beat_v3(void); diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 381b5894cc99..1ff11c1bb182 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -885,11 +885,6 @@ static void __init htab_initialize(void) #undef KB #undef MB -void __init __weak hpte_init_lpar(void) -{ - panic("FW_FEATURE_LPAR set but no LPAR support compiled\n"); -} - void __init hash__early_init_mmu(void) { /* @@ -930,7 +925,7 @@ void __init hash__early_init_mmu(void) if (firmware_has_feature(FW_FEATURE_PS3_LV1)) ps3_early_mm_init(); else if (firmware_has_feature(FW_FEATURE_LPAR)) - hpte_init_lpar(); + hpte_init_pseries(); else if IS_ENABLED(CONFIG_PPC_NATIVE) hpte_init_native(); diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 0e91388d0af9..86707e67843f 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -589,7 +589,7 @@ static int __init disable_bulk_remove(char *str) __setup("bulk_remove=", disable_bulk_remove); -void __init hpte_init_lpar(void) +void __init hpte_init_pseries(void) { mmu_hash_ops.hpte_invalidate = pSeries_lpar_hpte_invalidate; mmu_hash_ops.hpte_updatepp = pSeries_lpar_hpte_updatepp; -- cgit v1.2.3 From fbef66f0adcddf4475e19f3d09df22fb34e633f6 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Thu, 28 Jul 2016 12:35:02 +1000 Subject: powerpc/mm: Parenthesise IS_ENABLED() in if condition Currently IS_ENABLED() produces an expression surrounded by parentheses, which allows this code to compile, generating eg: else if (1 || 0) hpte_init_native(); However a change to the macro in the kbuild tree will break this in future by removing the parentheses. Fixes: 7353644fa9df ("powerpc/mm: Fix build break when PPC_NATIVE=n") Signed-off-by: Stephen Rothwell Signed-off-by: Michael Ellerman --- arch/powerpc/mm/hash_utils_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 1ff11c1bb182..b78b5d211278 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -926,7 +926,7 @@ void __init hash__early_init_mmu(void) ps3_early_mm_init(); else if (firmware_has_feature(FW_FEATURE_LPAR)) hpte_init_pseries(); - else if IS_ENABLED(CONFIG_PPC_NATIVE) + else if (IS_ENABLED(CONFIG_PPC_NATIVE)) hpte_init_native(); if (!mmu_hash_ops.hpte_insert) -- cgit v1.2.3 From bacf9cf88303c0df5794ca45dd9f297740a00913 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 26 Jul 2016 21:31:59 +1000 Subject: powerpc/mm: Do hash device tree scanning earlier Currently MMU initialisation (early_init_mmu()) consists of a mixture of scanning the device tree, setting MMU feature bits, and then also doing actual initialisation of MMU data structures. We'd like to decouple the setting of the MMU features from the actual setup. So split out the device tree scanning, and associated code, and call it from mmu_init_early_devtree(). Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/mmu.h | 1 + arch/powerpc/mm/hash_utils_64.c | 53 ++++++++++++++++---------------- arch/powerpc/mm/init_64.c | 3 ++ 3 files changed, 31 insertions(+), 26 deletions(-) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 4eb4bd019716..358f1410dc0d 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -108,6 +108,7 @@ extern int mmu_io_psize; /* MMU initialization */ void mmu_early_init_devtree(void); +void hash__early_init_devtree(void); extern void radix_init_native(void); extern void hash__early_init_mmu(void); extern void radix__early_init_mmu(void); diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index b78b5d211278..1a96b284b1a6 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -363,11 +363,6 @@ static int __init htab_dt_scan_seg_sizes(unsigned long node, return 0; } -static void __init htab_init_seg_sizes(void) -{ - of_scan_flat_dt(htab_dt_scan_seg_sizes, NULL); -} - static int __init get_idx_from_shift(unsigned int shift) { int idx = -1; @@ -539,7 +534,7 @@ static bool might_have_hea(void) #endif /* #ifdef CONFIG_PPC_64K_PAGES */ -static void __init htab_init_page_sizes(void) +static void __init htab_scan_page_sizes(void) { int rc; @@ -554,17 +549,23 @@ static void __init htab_init_page_sizes(void) * Try to find the available page sizes in the device-tree */ rc = of_scan_flat_dt(htab_dt_scan_page_sizes, NULL); - if (rc != 0) /* Found */ - goto found; - - /* - * Not in the device-tree, let's fallback on known size - * list for 16M capable GP & GR - */ - if (mmu_has_feature(MMU_FTR_16M_PAGE)) + if (rc == 0 && mmu_has_feature(MMU_FTR_16M_PAGE)) { + /* + * Nothing in the device-tree, but the CPU supports 16M pages, + * so let's fallback on a known size list for 16M capable CPUs. + */ memcpy(mmu_psize_defs, mmu_psize_defaults_gp, sizeof(mmu_psize_defaults_gp)); -found: + } + +#ifdef CONFIG_HUGETLB_PAGE + /* Reserve 16G huge page memory sections for huge pages */ + of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL); +#endif /* CONFIG_HUGETLB_PAGE */ +} + +static void __init htab_init_page_sizes(void) +{ if (!debug_pagealloc_enabled()) { /* * Pick a size for the linear mapping. Currently, we only @@ -630,11 +631,6 @@ found: ,mmu_psize_defs[mmu_vmemmap_psize].shift #endif ); - -#ifdef CONFIG_HUGETLB_PAGE - /* Reserve 16G huge page memory sections for huge pages */ - of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL); -#endif /* CONFIG_HUGETLB_PAGE */ } static int __init htab_dt_scan_pftsize(unsigned long node, @@ -759,12 +755,6 @@ static void __init htab_initialize(void) DBG(" -> htab_initialize()\n"); - /* Initialize segment sizes */ - htab_init_seg_sizes(); - - /* Initialize page sizes */ - htab_init_page_sizes(); - if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) { mmu_kernel_ssize = MMU_SEGSIZE_1T; mmu_highuser_ssize = MMU_SEGSIZE_1T; @@ -885,8 +875,19 @@ static void __init htab_initialize(void) #undef KB #undef MB +void __init hash__early_init_devtree(void) +{ + /* Initialize segment sizes */ + of_scan_flat_dt(htab_dt_scan_seg_sizes, NULL); + + /* Initialize page sizes */ + htab_scan_page_sizes(); +} + void __init hash__early_init_mmu(void) { + htab_init_page_sizes(); + /* * initialize page table size */ diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 0d51e6e25db5..d023333c6c9a 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -426,5 +426,8 @@ void __init mmu_early_init_devtree(void) /* Disable radix mode based on kernel command line. */ if (disable_radix) cur_cpu_spec->mmu_features &= ~MMU_FTR_RADIX; + + if (!radix_enabled()) + hash__early_init_devtree(); } #endif /* CONFIG_PPC_STD_MMU_64 */ -- cgit v1.2.3 From b8f1b4f8606b40b478072fb551f79e2984d44fad Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Sat, 23 Jul 2016 14:42:35 +0530 Subject: powerpc/mm: Convert early cpu/mmu feature check to use the new helpers This switches early feature checks to use the non static key variant of the function. In later patches we will be switching cpu_has_feature() and mmu_has_feature() to use static keys and we can use them only after static key/jump label is initialized. Any check for feature before jump label init should be done using this new helper. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/64/mmu.h | 2 +- arch/powerpc/kernel/paca.c | 2 +- arch/powerpc/kernel/setup_64.c | 4 ++-- arch/powerpc/mm/hash_utils_64.c | 2 +- arch/powerpc/mm/init_64.c | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/mm/hash_utils_64.c') diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 70c995870297..0cbde6a6750b 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -128,7 +128,7 @@ extern void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base, static inline void setup_initial_memory_limit(phys_addr_t first_memblock_base, phys_addr_t first_memblock_size) { - if (radix_enabled()) + if (early_radix_enabled()) return radix__setup_initial_memory_limit(first_memblock_base, first_memblock_size); return hash__setup_initial_memory_limit(first_memblock_base, diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index 93dae296b6be..fa20060ff7a5 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -184,7 +184,7 @@ void setup_paca(struct paca_struct *new_paca) * if we do a GET_PACA() before the feature fixups have been * applied */ - if (cpu_has_feature(CPU_FTR_HVMODE)) + if (early_cpu_has_feature(CPU_FTR_HVMODE)) mtspr(SPRN_SPRG_HPACA, local_paca); #endif mtspr(SPRN_SPRG_PACA, local_paca); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 984696136f96..eafb9a79e011 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -227,8 +227,8 @@ static void __init configure_exceptions(void) opal_configure_cores(); /* Enable AIL if supported, and we are in hypervisor mode */ - if (cpu_has_feature(CPU_FTR_HVMODE) && - cpu_has_feature(CPU_FTR_ARCH_207S)) { + if (early_cpu_has_feature(CPU_FTR_HVMODE) && + early_cpu_has_feature(CPU_FTR_ARCH_207S)) { unsigned long lpcr = mfspr(SPRN_LPCR); mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3); } diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 1a96b284b1a6..0821556e16f4 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -549,7 +549,7 @@ static void __init htab_scan_page_sizes(void) * Try to find the available page sizes in the device-tree */ rc = of_scan_flat_dt(htab_dt_scan_page_sizes, NULL); - if (rc == 0 && mmu_has_feature(MMU_FTR_16M_PAGE)) { + if (rc == 0 && early_mmu_has_feature(MMU_FTR_16M_PAGE)) { /* * Nothing in the device-tree, but the CPU supports 16M pages, * so let's fallback on a known size list for 16M capable CPUs. diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 6259f5db525b..16ada1eb7e26 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -427,7 +427,7 @@ void __init mmu_early_init_devtree(void) if (disable_radix) cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; - if (radix_enabled()) + if (early_radix_enabled()) radix__early_init_devtree(); else hash__early_init_devtree(); -- cgit v1.2.3