diff options
Diffstat (limited to 'arch/powerpc/mm/init_64.c')
-rw-r--r-- | arch/powerpc/mm/init_64.c | 138 |
1 files changed, 60 insertions, 78 deletions
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 16ada1eb7e26..c22f207aa656 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -42,6 +42,8 @@ #include <linux/memblock.h> #include <linux/hugetlb.h> #include <linux/slab.h> +#include <linux/of_fdt.h> +#include <linux/libfdt.h> #include <asm/pgalloc.h> #include <asm/page.h> @@ -51,7 +53,7 @@ #include <asm/mmu_context.h> #include <asm/pgtable.h> #include <asm/mmu.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/smp.h> #include <asm/machdep.h> #include <asm/tlb.h> @@ -80,83 +82,6 @@ EXPORT_SYMBOL_GPL(memstart_addr); phys_addr_t kernstart_addr; EXPORT_SYMBOL_GPL(kernstart_addr); -static void pgd_ctor(void *addr) -{ - memset(addr, 0, PGD_TABLE_SIZE); -} - -static void pud_ctor(void *addr) -{ - memset(addr, 0, PUD_TABLE_SIZE); -} - -static void pmd_ctor(void *addr) -{ - memset(addr, 0, PMD_TABLE_SIZE); -} - -struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE]; - -/* - * Create a kmem_cache() for pagetables. This is not used for PTE - * pages - they're linked to struct page, come from the normal free - * pages pool and have a different entry size (see real_pte_t) to - * everything else. Caches created by this function are used for all - * the higher level pagetables, and for hugepage pagetables. - */ -void pgtable_cache_add(unsigned shift, void (*ctor)(void *)) -{ - char *name; - unsigned long table_size = sizeof(void *) << shift; - unsigned long align = table_size; - - /* When batching pgtable pointers for RCU freeing, we store - * the index size in the low bits. Table alignment must be - * big enough to fit it. - * - * Likewise, hugeapge pagetable pointers contain a (different) - * shift value in the low bits. All tables must be aligned so - * as to leave enough 0 bits in the address to contain it. */ - unsigned long minalign = max(MAX_PGTABLE_INDEX_SIZE + 1, - HUGEPD_SHIFT_MASK + 1); - struct kmem_cache *new; - - /* It would be nice if this was a BUILD_BUG_ON(), but at the - * moment, gcc doesn't seem to recognize is_power_of_2 as a - * constant expression, so so much for that. */ - BUG_ON(!is_power_of_2(minalign)); - BUG_ON((shift < 1) || (shift > MAX_PGTABLE_INDEX_SIZE)); - - if (PGT_CACHE(shift)) - return; /* Already have a cache of this size */ - - align = max_t(unsigned long, align, minalign); - name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift); - new = kmem_cache_create(name, table_size, align, 0, ctor); - kfree(name); - pgtable_cache[shift - 1] = new; - pr_debug("Allocated pgtable cache for order %d\n", shift); -} - - -void pgtable_cache_init(void) -{ - pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor); - pgtable_cache_add(PMD_CACHE_INDEX, pmd_ctor); - /* - * In all current configs, when the PUD index exists it's the - * same size as either the pgd or pmd index except with THP enabled - * on book3s 64 - */ - if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE)) - pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor); - - if (!PGT_CACHE(PGD_INDEX_SIZE) || !PGT_CACHE(PMD_CACHE_INDEX)) - panic("Couldn't allocate pgtable caches"); - if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE)) - panic("Couldn't allocate pud pgtable caches"); -} - #ifdef CONFIG_SPARSEMEM_VMEMMAP /* * Given an address within the vmemmap, determine the pfn of the page that @@ -421,12 +346,69 @@ static int __init parse_disable_radix(char *p) } early_param("disable_radix", parse_disable_radix); +/* + * If we're running under a hypervisor, we need to check the contents of + * /chosen/ibm,architecture-vec-5 to see if the hypervisor is willing to do + * radix. If not, we clear the radix feature bit so we fall back to hash. + */ +static void early_check_vec5(void) +{ + unsigned long root, chosen; + int size; + const u8 *vec5; + u8 mmu_supported; + + root = of_get_flat_dt_root(); + chosen = of_get_flat_dt_subnode_by_name(root, "chosen"); + if (chosen == -FDT_ERR_NOTFOUND) { + cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; + return; + } + vec5 = of_get_flat_dt_prop(chosen, "ibm,architecture-vec-5", &size); + if (!vec5) { + cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; + return; + } + if (size <= OV5_INDX(OV5_MMU_SUPPORT)) { + cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; + return; + } + + /* Check for supported configuration */ + mmu_supported = vec5[OV5_INDX(OV5_MMU_SUPPORT)] & + OV5_FEAT(OV5_MMU_SUPPORT); + if (mmu_supported == OV5_FEAT(OV5_MMU_RADIX)) { + /* Hypervisor only supports radix - check enabled && GTSE */ + if (!early_radix_enabled()) { + pr_warn("WARNING: Ignoring cmdline option disable_radix\n"); + } + if (!(vec5[OV5_INDX(OV5_RADIX_GTSE)] & + OV5_FEAT(OV5_RADIX_GTSE))) { + pr_warn("WARNING: Hypervisor doesn't support RADIX with GTSE\n"); + } + /* Do radix anyway - the hypervisor said we had to */ + cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX; + } else if (mmu_supported == OV5_FEAT(OV5_MMU_HASH)) { + /* Hypervisor only supports hash - disable radix */ + cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; + } +} + void __init mmu_early_init_devtree(void) { /* Disable radix mode based on kernel command line. */ if (disable_radix) cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; + /* + * Check /chosen/ibm,architecture-vec-5 if running as a guest. + * When running bare-metal, we can use radix if we like + * even though the ibm,architecture-vec-5 property created by + * skiboot doesn't have the necessary bits set. + */ + if (!(mfmsr() & MSR_HV)) + early_check_vec5(); + if (early_radix_enabled()) radix__early_init_devtree(); else |