diff options
Diffstat (limited to 'arch/arm/mm')
-rw-r--r-- | arch/arm/mm/Kconfig | 59 | ||||
-rw-r--r-- | arch/arm/mm/Makefile | 3 | ||||
-rw-r--r-- | arch/arm/mm/alignment.c | 19 | ||||
-rw-r--r-- | arch/arm/mm/cache-feroceon-l2.c | 1 | ||||
-rw-r--r-- | arch/arm/mm/cache-l2x0.c | 1498 | ||||
-rw-r--r-- | arch/arm/mm/dma-mapping.c | 2 | ||||
-rw-r--r-- | arch/arm/mm/init.c | 9 | ||||
-rw-r--r-- | arch/arm/mm/l2c-common.c | 20 | ||||
-rw-r--r-- | arch/arm/mm/l2c-l2x0-resume.S | 58 | ||||
-rw-r--r-- | arch/arm/mm/mm.h | 4 | ||||
-rw-r--r-- | arch/arm/mm/mmu.c | 115 | ||||
-rw-r--r-- | arch/arm/mm/proc-v7m.S | 8 |
12 files changed, 1232 insertions, 564 deletions
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index f5ad9ee70426..eda0dd0ab97b 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -420,29 +420,29 @@ config CPU_32v3 bool select CPU_USE_DOMAINS if MMU select NEEDS_SYSCALL_FOR_CMPXCHG if SMP - select TLS_REG_EMUL if SMP || !MMU select NEED_KUSER_HELPERS + select TLS_REG_EMUL if SMP || !MMU config CPU_32v4 bool select CPU_USE_DOMAINS if MMU select NEEDS_SYSCALL_FOR_CMPXCHG if SMP - select TLS_REG_EMUL if SMP || !MMU select NEED_KUSER_HELPERS + select TLS_REG_EMUL if SMP || !MMU config CPU_32v4T bool select CPU_USE_DOMAINS if MMU select NEEDS_SYSCALL_FOR_CMPXCHG if SMP - select TLS_REG_EMUL if SMP || !MMU select NEED_KUSER_HELPERS + select TLS_REG_EMUL if SMP || !MMU config CPU_32v5 bool select CPU_USE_DOMAINS if MMU select NEEDS_SYSCALL_FOR_CMPXCHG if SMP - select TLS_REG_EMUL if SMP || !MMU select NEED_KUSER_HELPERS + select TLS_REG_EMUL if SMP || !MMU config CPU_32v6 bool @@ -897,6 +897,57 @@ config CACHE_PL310 This option enables optimisations for the PL310 cache controller. +config PL310_ERRATA_588369 + bool "PL310 errata: Clean & Invalidate maintenance operations do not invalidate clean lines" + depends on CACHE_L2X0 + help + The PL310 L2 cache controller implements three types of Clean & + Invalidate maintenance operations: by Physical Address + (offset 0x7F0), by Index/Way (0x7F8) and by Way (0x7FC). + They are architecturally defined to behave as the execution of a + clean operation followed immediately by an invalidate operation, + both performing to the same memory location. This functionality + is not correctly implemented in PL310 as clean lines are not + invalidated as a result of these operations. + +config PL310_ERRATA_727915 + bool "PL310 errata: Background Clean & Invalidate by Way operation can cause data corruption" + depends on CACHE_L2X0 + help + PL310 implements the Clean & Invalidate by Way L2 cache maintenance + operation (offset 0x7FC). This operation runs in background so that + PL310 can handle normal accesses while it is in progress. Under very + rare circumstances, due to this erratum, write data can be lost when + PL310 treats a cacheable write transaction during a Clean & + Invalidate by Way operation. + +config PL310_ERRATA_753970 + bool "PL310 errata: cache sync operation may be faulty" + depends on CACHE_PL310 + help + This option enables the workaround for the 753970 PL310 (r3p0) erratum. + + Under some condition the effect of cache sync operation on + the store buffer still remains when the operation completes. + This means that the store buffer is always asked to drain and + this prevents it from merging any further writes. The workaround + is to replace the normal offset of cache sync operation (0x730) + by another offset targeting an unmapped PL310 register 0x740. + This has the same effect as the cache sync operation: store buffer + drain and waiting for all buffers empty. + +config PL310_ERRATA_769419 + bool "PL310 errata: no automatic Store Buffer drain" + depends on CACHE_L2X0 + help + On revisions of the PL310 prior to r3p2, the Store Buffer does + not automatically drain. This can cause normal, non-cacheable + writes to be retained when the memory system is idle, leading + to suboptimal I/O performance for drivers using coherent DMA. + This option adds a write barrier to the cpu_idle loop so that, + on systems with an outer cache, the store buffer is drained + explicitly. + config CACHE_TAUROS2 bool "Enable the Tauros2 L2 cache controller" depends on (ARCH_DOVE || ARCH_MMP || CPU_PJ4) diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index 7f39ce2f841f..91da64de440f 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -95,7 +95,8 @@ obj-$(CONFIG_CPU_V7M) += proc-v7m.o AFLAGS_proc-v6.o :=-Wa,-march=armv6 AFLAGS_proc-v7.o :=-Wa,-march=armv7-a +obj-$(CONFIG_OUTER_CACHE) += l2c-common.o obj-$(CONFIG_CACHE_FEROCEON_L2) += cache-feroceon-l2.o -obj-$(CONFIG_CACHE_L2X0) += cache-l2x0.o +obj-$(CONFIG_CACHE_L2X0) += cache-l2x0.o l2c-l2x0-resume.o obj-$(CONFIG_CACHE_XSC3L2) += cache-xsc3l2.o obj-$(CONFIG_CACHE_TAUROS2) += cache-tauros2.o diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 924036473b16..b8cb1a2688a0 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -28,6 +28,7 @@ #include <asm/opcodes.h> #include "fault.h" +#include "mm.h" /* * 32-bit misaligned trap handler (c) 1998 San Mehat (CCC) -July 1998 @@ -81,6 +82,7 @@ static unsigned long ai_word; static unsigned long ai_dword; static unsigned long ai_multi; static int ai_usermode; +static unsigned long cr_no_alignment; core_param(alignment, ai_usermode, int, 0600); @@ -91,7 +93,7 @@ core_param(alignment, ai_usermode, int, 0600); /* Return true if and only if the ARMv6 unaligned access model is in use. */ static bool cpu_is_v6_unaligned(void) { - return cpu_architecture() >= CPU_ARCH_ARMv6 && (cr_alignment & CR_U); + return cpu_architecture() >= CPU_ARCH_ARMv6 && get_cr() & CR_U; } static int safe_usermode(int new_usermode, bool warn) @@ -949,6 +951,13 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) return 0; } +static int __init noalign_setup(char *__unused) +{ + set_cr(__clear_cr(CR_A)); + return 1; +} +__setup("noalign", noalign_setup); + /* * This needs to be done after sysctl_init, otherwise sys/ will be * overwritten. Actually, this shouldn't be in sys/ at all since @@ -966,14 +975,12 @@ static int __init alignment_init(void) return -ENOMEM; #endif -#ifdef CONFIG_CPU_CP15 if (cpu_is_v6_unaligned()) { - cr_alignment &= ~CR_A; - cr_no_alignment &= ~CR_A; - set_cr(cr_alignment); + set_cr(__clear_cr(CR_A)); ai_usermode = safe_usermode(ai_usermode, false); } -#endif + + cr_no_alignment = get_cr() & ~CR_A; hook_fault_code(FAULT_CODE_ALIGNMENT, do_alignment, SIGBUS, BUS_ADRALN, "alignment exception"); diff --git a/arch/arm/mm/cache-feroceon-l2.c b/arch/arm/mm/cache-feroceon-l2.c index dc814a548056..e028a7f2ebcc 100644 --- a/arch/arm/mm/cache-feroceon-l2.c +++ b/arch/arm/mm/cache-feroceon-l2.c @@ -350,7 +350,6 @@ void __init feroceon_l2_init(int __l2_wt_override) outer_cache.inv_range = feroceon_l2_inv_range; outer_cache.clean_range = feroceon_l2_clean_range; outer_cache.flush_range = feroceon_l2_flush_range; - outer_cache.inv_all = l2_inv_all; enable_l2(); diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index 7abde2ce8973..efc5cabf70e0 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -16,18 +16,33 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include <linux/cpu.h> #include <linux/err.h> #include <linux/init.h> +#include <linux/smp.h> #include <linux/spinlock.h> #include <linux/io.h> #include <linux/of.h> #include <linux/of_address.h> #include <asm/cacheflush.h> +#include <asm/cp15.h> +#include <asm/cputype.h> #include <asm/hardware/cache-l2x0.h> #include "cache-tauros3.h" #include "cache-aurora-l2.h" +struct l2c_init_data { + const char *type; + unsigned way_size_0; + unsigned num_lock; + void (*of_parse)(const struct device_node *, u32 *, u32 *); + void (*enable)(void __iomem *, u32, unsigned); + void (*fixup)(void __iomem *, u32, struct outer_cache_fns *); + void (*save)(void __iomem *); + struct outer_cache_fns outer_cache; +}; + #define CACHE_LINE_SIZE 32 static void __iomem *l2x0_base; @@ -36,96 +51,116 @@ static u32 l2x0_way_mask; /* Bitmask of active ways */ static u32 l2x0_size; static unsigned long sync_reg_offset = L2X0_CACHE_SYNC; -/* Aurora don't have the cache ID register available, so we have to - * pass it though the device tree */ -static u32 cache_id_part_number_from_dt; - struct l2x0_regs l2x0_saved_regs; -struct l2x0_of_data { - void (*setup)(const struct device_node *, u32 *, u32 *); - void (*save)(void); - struct outer_cache_fns outer_cache; -}; - -static bool of_init = false; - -static inline void cache_wait_way(void __iomem *reg, unsigned long mask) +/* + * Common code for all cache controllers. + */ +static inline void l2c_wait_mask(void __iomem *reg, unsigned long mask) { /* wait for cache operation by line or way to complete */ while (readl_relaxed(reg) & mask) cpu_relax(); } -#ifdef CONFIG_CACHE_PL310 -static inline void cache_wait(void __iomem *reg, unsigned long mask) +/* + * By default, we write directly to secure registers. Platforms must + * override this if they are running non-secure. + */ +static void l2c_write_sec(unsigned long val, void __iomem *base, unsigned reg) { - /* cache operations by line are atomic on PL310 */ + if (val == readl_relaxed(base + reg)) + return; + if (outer_cache.write_sec) + outer_cache.write_sec(val, reg); + else + writel_relaxed(val, base + reg); } -#else -#define cache_wait cache_wait_way -#endif -static inline void cache_sync(void) +/* + * This should only be called when we have a requirement that the + * register be written due to a work-around, as platforms running + * in non-secure mode may not be able to access this register. + */ +static inline void l2c_set_debug(void __iomem *base, unsigned long val) { - void __iomem *base = l2x0_base; - - writel_relaxed(0, base + sync_reg_offset); - cache_wait(base + L2X0_CACHE_SYNC, 1); + l2c_write_sec(val, base, L2X0_DEBUG_CTRL); } -static inline void l2x0_clean_line(unsigned long addr) +static void __l2c_op_way(void __iomem *reg) { - void __iomem *base = l2x0_base; - cache_wait(base + L2X0_CLEAN_LINE_PA, 1); - writel_relaxed(addr, base + L2X0_CLEAN_LINE_PA); + writel_relaxed(l2x0_way_mask, reg); + l2c_wait_mask(reg, l2x0_way_mask); } -static inline void l2x0_inv_line(unsigned long addr) +static inline void l2c_unlock(void __iomem *base, unsigned num) { - void __iomem *base = l2x0_base; - cache_wait(base + L2X0_INV_LINE_PA, 1); - writel_relaxed(addr, base + L2X0_INV_LINE_PA); + unsigned i; + + for (i = 0; i < num; i++) { + writel_relaxed(0, base + L2X0_LOCKDOWN_WAY_D_BASE + + i * L2X0_LOCKDOWN_STRIDE); + writel_relaxed(0, base + L2X0_LOCKDOWN_WAY_I_BASE + + i * L2X0_LOCKDOWN_STRIDE); + } } -#if defined(CONFIG_PL310_ERRATA_588369) || defined(CONFIG_PL310_ERRATA_727915) -static inline void debug_writel(unsigned long val) +/* + * Enable the L2 cache controller. This function must only be + * called when the cache controller is known to be disabled. + */ +static void l2c_enable(void __iomem *base, u32 aux, unsigned num_lock) { - if (outer_cache.set_debug) - outer_cache.set_debug(val); + unsigned long flags; + + l2c_write_sec(aux, base, L2X0_AUX_CTRL); + + l2c_unlock(base, num_lock); + + local_irq_save(flags); + __l2c_op_way(base + L2X0_INV_WAY); + writel_relaxed(0, base + sync_reg_offset); + l2c_wait_mask(base + sync_reg_offset, 1); + local_irq_restore(flags); + + l2c_write_sec(L2X0_CTRL_EN, base, L2X0_CTRL); } -static void pl310_set_debug(unsigned long val) +static void l2c_disable(void) { - writel_relaxed(val, l2x0_base + L2X0_DEBUG_CTRL); + void __iomem *base = l2x0_base; + + outer_cache.flush_all(); + l2c_write_sec(0, base, L2X0_CTRL); + dsb(st); } -#else -/* Optimised out for non-errata case */ -static inline void debug_writel(unsigned long val) + +#ifdef CONFIG_CACHE_PL310 +static inline void cache_wait(void __iomem *reg, unsigned long mask) { + /* cache operations by line are atomic on PL310 */ } - -#define pl310_set_debug NULL +#else +#define cache_wait l2c_wait_mask #endif -#ifdef CONFIG_PL310_ERRATA_588369 -static inline void l2x0_flush_line(unsigned long addr) +static inline void cache_sync(void) { void __iomem *base = l2x0_base; - /* Clean by PA followed by Invalidate by PA */ - cache_wait(base + L2X0_CLEAN_LINE_PA, 1); - writel_relaxed(addr, base + L2X0_CLEAN_LINE_PA); - cache_wait(base + L2X0_INV_LINE_PA, 1); - writel_relaxed(addr, base + L2X0_INV_LINE_PA); + writel_relaxed(0, base + sync_reg_offset); + cache_wait(base + L2X0_CACHE_SYNC, 1); } -#else -static inline void l2x0_flush_line(unsigned long addr) +#if defined(CONFIG_PL310_ERRATA_588369) || defined(CONFIG_PL310_ERRATA_727915) +static inline void debug_writel(unsigned long val) +{ + l2c_set_debug(l2x0_base, val); +} +#else +/* Optimised out for non-errata case */ +static inline void debug_writel(unsigned long val) { - void __iomem *base = l2x0_base; - cache_wait(base + L2X0_CLEAN_INV_LINE_PA, 1); - writel_relaxed(addr, base + L2X0_CLEAN_INV_LINE_PA); } #endif @@ -141,8 +176,7 @@ static void l2x0_cache_sync(void) static void __l2x0_flush_all(void) { debug_writel(0x03); - writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_CLEAN_INV_WAY); - cache_wait_way(l2x0_base + L2X0_CLEAN_INV_WAY, l2x0_way_mask); + __l2c_op_way(l2x0_base + L2X0_CLEAN_INV_WAY); cache_sync(); debug_writel(0x00); } @@ -157,275 +191,883 @@ static void l2x0_flush_all(void) raw_spin_unlock_irqrestore(&l2x0_lock, flags); } -static void l2x0_clean_all(void) +static void l2x0_disable(void) { unsigned long flags; - /* clean all ways */ raw_spin_lock_irqsave(&l2x0_lock, flags); - writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_CLEAN_WAY); - cache_wait_way(l2x0_base + L2X0_CLEAN_WAY, l2x0_way_mask); - cache_sync(); + __l2x0_flush_all(); + l2c_write_sec(0, l2x0_base, L2X0_CTRL); + dsb(st); raw_spin_unlock_irqrestore(&l2x0_lock, flags); } -static void l2x0_inv_all(void) +static void l2c_save(void __iomem *base) { - unsigned long flags; + l2x0_saved_regs.aux_ctrl = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); +} - /* invalidate all ways */ - raw_spin_lock_irqsave(&l2x0_lock, flags); - /* Invalidating when L2 is enabled is a nono */ - BUG_ON(readl(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN); - writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_INV_WAY); - cache_wait_way(l2x0_base + L2X0_INV_WAY, l2x0_way_mask); - cache_sync(); - raw_spin_unlock_irqrestore(&l2x0_lock, flags); +/* + * L2C-210 specific code. + * + * The L2C-2x0 PA, set/way and sync operations are atomic, but we must + * ensure that no background operation is running. The way operations + * are all background tasks. + * + * While a background operation is in progress, any new operation is + * ignored (unspecified whether this causes an error.) Thankfully, not + * used on SMP. + * + * Never has a different sync register other than L2X0_CACHE_SYNC, but + * we use sync_reg_offset here so we can share some of this with L2C-310. + */ +static void __l2c210_cache_sync(void __iomem *base) +{ + writel_relaxed(0, base + sync_reg_offset); } -static void l2x0_inv_range(unsigned long start, unsigned long end) +static void __l2c210_op_pa_range(void __iomem *reg, unsigned long start, + unsigned long end) +{ + while (start < end) { + writel_relaxed(start, reg); + start += CACHE_LINE_SIZE; + } +} + +static void l2c210_inv_range(unsigned long start, unsigned long end) { void __iomem *base = l2x0_base; - unsigned long flags; - raw_spin_lock_irqsave(&l2x0_lock, flags); if (start & (CACHE_LINE_SIZE - 1)) { start &= ~(CACHE_LINE_SIZE - 1); - debug_writel(0x03); - l2x0_flush_line(start); - debug_writel(0x00); + writel_relaxed(start, base + L2X0_CLEAN_INV_LINE_PA); start += CACHE_LINE_SIZE; } if (end & (CACHE_LINE_SIZE - 1)) { end &= ~(CACHE_LINE_SIZE - 1); - debug_writel(0x03); - l2x0_flush_line(end); - debug_writel(0x00); + writel_relaxed(end, base + L2X0_CLEAN_INV_LINE_PA); } + __l2c210_op_pa_range(base + L2X0_INV_LINE_PA, start, end); + __l2c210_cache_sync(base); +} + +static void l2c210_clean_range(unsigned long start, unsigned long end) +{ + void __iomem *base = l2x0_base; + + start &= ~(CACHE_LINE_SIZE - 1); + __l2c210_op_pa_range(base + L2X0_CLEAN_LINE_PA, start, end); + __l2c210_cache_sync(base); +} + +static void l2c210_flush_range(unsigned long start, unsigned long end) +{ + void __iomem *base = l2x0_base; + + start &= ~(CACHE_LINE_SIZE - 1); + __l2c210_op_pa_range(base + L2X0_CLEAN_INV_LINE_PA, start, end); + __l2c210_cache_sync(base); +} + +static void l2c210_flush_all(void) +{ + void __iomem *base = l2x0_base; + + BUG_ON(!irqs_disabled()); + + __l2c_op_way(base + L2X0_CLEAN_INV_WAY); + __l2c210_cache_sync(base); +} + +static void l2c210_sync(void) +{ + __l2c210_cache_sync(l2x0_base); +} + +static void l2c210_resume(void) +{ + void __iomem *base = l2x0_base; + + if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) + l2c_enable(base, l2x0_saved_regs.aux_ctrl, 1); +} + +static const struct l2c_init_data l2c210_data __initconst = { + .type = "L2C-210", + .way_size_0 = SZ_8K, + .num_lock = 1, + .enable = l2c_enable, + .save = l2c_save, + .outer_cache = { + .inv_range = l2c210_inv_range, + .clean_range = l2c210_clean_range, + .flush_range = l2c210_flush_range, + .flush_all = l2c210_flush_all, + .disable = l2c_disable, + .sync = l2c210_sync, + .resume = l2c210_resume, + }, +}; + +/* + * L2C-220 specific code. + * + * All operations are background operations: they have to be waited for. + * Conflicting requests generate a slave error (which will cause an + * imprecise abort.) Never uses sync_reg_offset, so we hard-code the + * sync register here. + * + * However, we can re-use the l2c210_resume call. + */ +static inline void __l2c220_cache_sync(void __iomem *base) +{ + writel_relaxed(0, base + L2X0_CACHE_SYNC); + l2c_wait_mask(base + L2X0_CACHE_SYNC, 1); +} + +static void l2c220_op_way(void __iomem *base, unsigned reg) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&l2x0_lock, flags); + __l2c_op_way(base + reg); + __l2c220_cache_sync(base); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static unsigned long l2c220_op_pa_range(void __iomem *reg, unsigned long start, + unsigned long end, unsigned long flags) +{ + raw_spinlock_t *lock = &l2x0_lock; + while (start < end) { unsigned long blk_end = start + min(end - start, 4096UL); while (start < blk_end) { - l2x0_inv_line(start); + l2c_wait_mask(reg, 1); + writel_relaxed(start, reg); start += CACHE_LINE_SIZE; } if (blk_end < end) { - raw_spin_unlock_irqrestore(&l2x0_lock, flags); - raw_spin_lock_irqsave(&l2x0_lock, flags); + raw_spin_unlock_irqrestore(lock, flags); + raw_spin_lock_irqsave(lock, flags); } } - cache_wait(base + L2X0_INV_LINE_PA, 1); - cache_sync(); - raw_spin_unlock_irqrestore(&l2x0_lock, flags); + + return flags; } -static void l2x0_clean_range(unsigned long start, unsigned long end) +static void l2c220_inv_range(unsigned long start, unsigned long end) { void __iomem *base = l2x0_base; unsigned long flags; - if ((end - start) >= l2x0_size) { - l2x0_clean_all(); - return; - } - raw_spin_lock_irqsave(&l2x0_lock, flags); - start &= ~(CACHE_LINE_SIZE - 1); - while (start < end) { - unsigned long blk_end = start + min(end - start, 4096UL); - - while (start < blk_end) { - l2x0_clean_line(start); + if ((start | end) & (CACHE_LINE_SIZE - 1)) { + if (start & (CACHE_LINE_SIZE - 1)) { + start &= ~(CACHE_LINE_SIZE - 1); + writel_relaxed(start, base + L2X0_CLEAN_INV_LINE_PA); start += CACHE_LINE_SIZE; } - if (blk_end < end) { - raw_spin_unlock_irqrestore(&l2x0_lock, flags); - raw_spin_lock_irqsave(&l2x0_lock, flags); + if (end & (CACHE_LINE_SIZE - 1)) { + end &= ~(CACHE_LINE_SIZE - 1); + l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1); + writel_relaxed(end, base + L2X0_CLEAN_INV_LINE_PA); } } - cache_wait(base + L2X0_CLEAN_LINE_PA, 1); - cache_sync(); + + flags = l2c220_op_pa_range(base + L2X0_INV_LINE_PA, + start, end, flags); + l2c_wait_mask(base + L2X0_INV_LINE_PA, 1); + __l2c220_cache_sync(base); raw_spin_unlock_irqrestore(&l2x0_lock, flags); } -static void l2x0_flush_range(unsigned long start, unsigned long end) +static void l2c220_clean_range(unsigned long start, unsigned long end) { void __iomem *base = l2x0_base; unsigned long flags; + start &= ~(CACHE_LINE_SIZE - 1); if ((end - start) >= l2x0_size) { - l2x0_flush_all(); + l2c220_op_way(base, L2X0_CLEAN_WAY); return; } raw_spin_lock_irqsave(&l2x0_lock, flags); + flags = l2c220_op_pa_range(base + L2X0_CLEAN_LINE_PA, + start, end, flags); + l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1); + __l2c220_cache_sync(base); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static void l2c220_flush_range(unsigned long start, unsigned long end) +{ + void __iomem *base = l2x0_base; + unsigned long flags; + start &= ~(CACHE_LINE_SIZE - 1); + if ((end - start) >= l2x0_size) { + l2c220_op_way(base, L2X0_CLEAN_INV_WAY); + return; + } + + raw_spin_lock_irqsave(&l2x0_lock, flags); + flags = l2c220_op_pa_range(base + L2X0_CLEAN_INV_LINE_PA, + start, end, flags); + l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1); + __l2c220_cache_sync(base); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static void l2c220_flush_all(void) +{ + l2c220_op_way(l2x0_base, L2X0_CLEAN_INV_WAY); +} + +static void l2c220_sync(void) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&l2x0_lock, flags); + __l2c220_cache_sync(l2x0_base); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static void l2c220_enable(void __iomem *base, u32 aux, unsigned num_lock) +{ + /* + * Always enable non-secure access to the lockdown registers - + * we write to them as part of the L2C enable sequence so they + * need to be accessible. + */ + aux |= L220_AUX_CTRL_NS_LOCKDOWN; + + l2c_enable(base, aux, num_lock); +} + +static const struct l2c_init_data l2c220_data = { + .type = "L2C-220", + .way_size_0 = SZ_8K, + .num_lock = 1, + .enable = l2c220_enable, + .save = l2c_save, + .outer_cache = { + .inv_range = l2c220_inv_range, + .clean_range = l2c220_clean_range, + .flush_range = l2c220_flush_range, + .flush_all = l2c220_flush_all, + .disable = l2c_disable, + .sync = l2c220_sync, + .resume = l2c210_resume, + }, +}; + +/* + * L2C-310 specific code. + * + * Very similar to L2C-210, the PA, set/way and sync operations are atomic, + * and the way operations are all background tasks. However, issuing an + * operation while a background operation is in progress results in a + * SLVERR response. We can reuse: + * + * __l2c210_cache_sync (using sync_reg_offset) + * l2c210_sync + * l2c210_inv_range (if 588369 is not applicable) + * l2c210_clean_range + * l2c210_flush_range (if 588369 is not applicable) + * l2c210_flush_all (if 727915 is not applicable) + * + * Errata: + * 588369: PL310 R0P0->R1P0, fixed R2P0. + * Affects: all clean+invalidate operations + * clean and invalidate skips the invalidate step, so we need to issue + * separate operations. We also require the above debug workaround + * enclosing this code fragment on affected parts. On unaffected parts, + * we must not use this workaround without the debug register writes + * to avoid exposing a problem similar to 727915. + * + * 727915: PL310 R2P0->R3P0, fixed R3P1. + * Affects: clean+invalidate by way + * clean and invalidate by way runs in the background, and a store can + * hit the line between the clean operation and invalidate operation, + * resulting in the store being lost. + * + * 752271: PL310 R3P0->R3P1-50REL0, fixed R3P2. + * Affects: 8x64-bit (double fill) line fetches + * double fill line fetches can fail to cause dirty data to be evicted + * from the cache before the new data overwrites the second line. + * + * 753970: PL310 R3P0, fixed R3P1. + * Affects: sync + * prevents merging writes after the sync operation, until another L2C + * operation is performed (or a number of other conditions.) + * + * 769419: PL310 R0P0->R3P1, fixed R3P2. + * Affects: store buffer + * store buffer is not automatically drained. + */ +static void l2c310_inv_range_erratum(unsigned long start, unsigned long end) +{ + void __iomem *base = l2x0_base; + + if ((start | end) & (CACHE_LINE_SIZE - 1)) { + unsigned long flags; + + /* Erratum 588369 for both clean+invalidate operations */ + raw_spin_lock_irqsave(&l2x0_lock, flags); + l2c_set_debug(base, 0x03); + + if (start & (CACHE_LINE_SIZE - 1)) { + start &= ~(CACHE_LINE_SIZE - 1); + writel_relaxed(start, base + L2X0_CLEAN_LINE_PA); + writel_relaxed(start, base + L2X0_INV_LINE_PA); + start += CACHE_LINE_SIZE; + } + + if (end & (CACHE_LINE_SIZE - 1)) { + end &= ~(CACHE_LINE_SIZE - 1); + writel_relaxed(end, base + L2X0_CLEAN_LINE_PA); + writel_relaxed(end, base + L2X0_INV_LINE_PA); + } + + l2c_set_debug(base, 0x00); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); + } + + __l2c210_op_pa_range(base + L2X0_INV_LINE_PA, start, end); + __l2c210_cache_sync(base); +} + +static void l2c310_flush_range_erratum(unsigned long start, unsigned long end) +{ + raw_spinlock_t *lock = &l2x0_lock; + unsigned long flags; + void __iomem *base = l2x0_base; + + raw_spin_lock_irqsave(lock, flags); while (start < end) { unsigned long blk_end = start + min(end - start, 4096UL); - debug_writel(0x03); + l2c_set_debug(base, 0x03); while (start < blk_end) { - l2x0_flush_line(start); + writel_relaxed(start, base + L2X0_CLEAN_LINE_PA); + writel_relaxed(start, base + L2X0_INV_LINE_PA); start += CACHE_LINE_SIZE; } - debug_writel(0x00); + l2c_set_debug(base, 0x00); if (blk_end < end) { - raw_spin_unlock_irqrestore(&l2x0_lock, flags); - raw_spin_lock_irqsave(&l2x0_lock, flags); + raw_spin_unlock_irqrestore(lock, flags); + raw_spin_lock_irqsave(lock, flags); } } - cache_wait(base + L2X0_CLEAN_INV_LINE_PA, 1); - cache_sync(); - raw_spin_unlock_irqrestore(&l2x0_lock, flags); + raw_spin_unlock_irqrestore(lock, flags); + __l2c210_cache_sync(base); } -static void l2x0_disable(void) +static void l2c310_flush_all_erratum(void) { + void __iomem *base = l2x0_base; unsigned long flags; raw_spin_lock_irqsave(&l2x0_lock, flags); - __l2x0_flush_all(); - writel_relaxed(0, l2x0_base + L2X0_CTRL); - dsb(st); + l2c_set_debug(base, 0x03); + __l2c_op_way(base + L2X0_CLEAN_INV_WAY); + l2c_set_debug(base, 0x00); + __l2c210_cache_sync(base); raw_spin_unlock_irqrestore(&l2x0_lock, flags); } -static void l2x0_unlock(u32 cache_id) +static void __init l2c310_save(void __iomem *base) { - int lockregs; - int i; + unsigned revision; - switch (cache_id & L2X0_CACHE_ID_PART_MASK) { - case L2X0_CACHE_ID_PART_L310: - lockregs = 8; - break; - case AURORA_CACHE_ID: - lockregs = 4; + l2c_save(base); + + l2x0_saved_regs.tag_latency = readl_relaxed(base + + L310_TAG_LATENCY_CTRL); + l2x0_saved_regs.data_latency = readl_relaxed(base + + L310_DATA_LATENCY_CTRL); + l2x0_saved_regs.filter_end = readl_relaxed(base + + L310_ADDR_FILTER_END); + l2x0_saved_regs.filter_start = readl_relaxed(base + + L310_ADDR_FILTER_START); + + revision = readl_relaxed(base + L2X0_CACHE_ID) & + L2X0_CACHE_ID_RTL_MASK; + + /* From r2p0, there is Prefetch offset/control register */ + if (revision >= L310_CACHE_ID_RTL_R2P0) + l2x0_saved_regs.prefetch_ctrl = readl_relaxed(base + + L310_PREFETCH_CTRL); + + /* From r3p0, there is Power control register */ + if (revision >= L310_CACHE_ID_RTL_R3P0) + l2x0_saved_regs.pwr_ctrl = readl_relaxed(base + + L310_POWER_CTRL); +} + +static void l2c310_resume(void) +{ + void __iomem *base = l2x0_base; + + if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) { + unsigned revision; + + /* restore pl310 setup */ + writel_relaxed(l2x0_saved_regs.tag_latency, + base + L310_TAG_LATENCY_CTRL); + writel_relaxed(l2x0_saved_regs.data_latency, + base + L310_DATA_LATENCY_CTRL); + writel_relaxed(l2x0_saved_regs.filter_end, + base + L310_ADDR_FILTER_END); + writel_relaxed(l2x0_saved_regs.filter_start, + base + L310_ADDR_FILTER_START); + + revision = readl_relaxed(base + L2X0_CACHE_ID) & + L2X0_CACHE_ID_RTL_MASK; + + if (revision >= L310_CACHE_ID_RTL_R2P0) + l2c_write_sec(l2x0_saved_regs.prefetch_ctrl, base, + L310_PREFETCH_CTRL); + if (revision >= L310_CACHE_ID_RTL_R3P0) + l2c_write_sec(l2x0_saved_regs.pwr_ctrl, base, + L310_POWER_CTRL); + + l2c_enable(base, l2x0_saved_regs.aux_ctrl, 8); + + /* Re-enable full-line-of-zeros for Cortex-A9 */ + if (l2x0_saved_regs.aux_ctrl & L310_AUX_CTRL_FULL_LINE_ZERO) + set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1)); + } +} + +static int l2c310_cpu_enable_flz(struct notifier_block *nb, unsigned long act, void *data) +{ + switch (act & ~CPU_TASKS_FROZEN) { + case CPU_STARTING: + set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1)); break; - default: - /* L210 and unknown types */ - lockregs = 1; + case CPU_DYING: + set_auxcr(get_auxcr() & ~(BIT(3) | BIT(2) | BIT(1))); break; } + return NOTIFY_OK; +} - for (i = 0; i < lockregs; i++) { - writel_relaxed(0x0, l2x0_base + L2X0_LOCKDOWN_WAY_D_BASE + - i * L2X0_LOCKDOWN_STRIDE); - writel_relaxed(0x0, l2x0_base + L2X0_LOCKDOWN_WAY_I_BASE + - i * L2X0_LOCKDOWN_STRIDE); +static void __init l2c310_enable(void __iomem *base, u32 aux, unsigned num_lock) +{ + unsigned rev = readl_relaxed(base + L2X0_CACHE_ID) & L2X0_CACHE_ID_PART_MASK; + bool cortex_a9 = read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9; + + if (rev >= L310_CACHE_ID_RTL_R2P0) { + if (cortex_a9) { + aux |= L310_AUX_CTRL_EARLY_BRESP; + pr_info("L2C-310 enabling early BRESP for Cortex-A9\n"); + } else if (aux & L310_AUX_CTRL_EARLY_BRESP) { + pr_warn("L2C-310 early BRESP only supported with Cortex-A9\n"); + aux &= ~L310_AUX_CTRL_EARLY_BRESP; + } + } + + if (cortex_a9) { + u32 aux_cur = readl_relaxed(base + L2X0_AUX_CTRL); + u32 acr = get_auxcr(); + + pr_debug("Cortex-A9 ACR=0x%08x\n", acr); + + if (acr & BIT(3) && !(aux_cur & L310_AUX_CTRL_FULL_LINE_ZERO)) + pr_err("L2C-310: full line of zeros enabled in Cortex-A9 but not L2C-310 - invalid\n"); + + if (aux & L310_AUX_CTRL_FULL_LINE_ZERO && !(acr & BIT(3))) + pr_err("L2C-310: enabling full line of zeros but not enabled in Cortex-A9\n"); + + if (!(aux & L310_AUX_CTRL_FULL_LINE_ZERO) && !outer_cache.write_sec) { + aux |= L310_AUX_CTRL_FULL_LINE_ZERO; + pr_info("L2C-310 full line of zeros enabled for Cortex-A9\n"); + } + } else if (aux & (L310_AUX_CTRL_FULL_LINE_ZERO | L310_AUX_CTRL_EARLY_BRESP)) { + pr_err("L2C-310: disabling Cortex-A9 specific feature bits\n"); + aux &= ~(L310_AUX_CTRL_FULL_LINE_ZERO | L310_AUX_CTRL_EARLY_BRESP); + } + + if (aux & (L310_AUX_CTRL_DATA_PREFETCH | L310_AUX_CTRL_INSTR_PREFETCH)) { + u32 prefetch = readl_relaxed(base + L310_PREFETCH_CTRL); + + pr_info("L2C-310 %s%s prefetch enabled, offset %u lines\n", + aux & L310_AUX_CTRL_INSTR_PREFETCH ? "I" : "", + aux & L310_AUX_CTRL_DATA_PREFETCH ? "D" : "", + 1 + (prefetch & L310_PREFETCH_CTRL_OFFSET_MASK)); + } + + /* r3p0 or later has power control register */ + if (rev >= L310_CACHE_ID_RTL_R3P0) { + u32 power_ctrl; + + l2c_write_sec(L310_DYNAMIC_CLK_GATING_EN | L310_STNDBY_MODE_EN, + base, L310_POWER_CTRL); + power_ctrl = readl_relaxed(base + L310_POWER_CTRL); + pr_info("L2C-310 dynamic clock gating %sabled, standby mode %sabled\n", + power_ctrl & L310_DYNAMIC_CLK_GATING_EN ? "en" : "dis", + power_ctrl & L310_STNDBY_MODE_EN ? "en" : "dis"); + } + + /* + * Always enable non-secure access to the lockdown registers - + * we write to them as part of the L2C enable sequence so they + * need to be accessible. + */ + aux |= L310_AUX_CTRL_NS_LOCKDOWN; + + l2c_enable(base, aux, num_lock); + + if (aux & L310_AUX_CTRL_FULL_LINE_ZERO) { + set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1)); + cpu_notifier(l2c310_cpu_enable_flz, 0); } } -void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask) +static void __init l2c310_fixup(void __iomem *base, u32 cache_id, + struct outer_cache_fns *fns) { - u32 aux; - u32 cache_id; - u32 way_size = 0; - int ways; - int way_size_shift = L2X0_WAY_SIZE_SHIFT; - const char *type; + unsigned revision = cache_id & L2X0_CACHE_ID_RTL_MASK; + const char *errata[8]; + unsigned n = 0; + + if (IS_ENABLED(CONFIG_PL310_ERRATA_588369) && + revision < L310_CACHE_ID_RTL_R2P0 && + /* For bcm compatibility */ + fns->inv_range == l2c210_inv_range) { + fns->inv_range = l2c310_inv_range_erratum; + fns->flush_range = l2c310_flush_range_erratum; + errata[n++] = "588369"; + } - l2x0_base = base; - if (cache_id_part_number_from_dt) - cache_id = cache_id_part_number_from_dt; - else - cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID); - aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); + if (IS_ENABLED(CONFIG_PL310_ERRATA_727915) && + revision >= L310_CACHE_ID_RTL_R2P0 && + revision < L310_CACHE_ID_RTL_R3P1) { + fns->flush_all = l2c310_flush_all_erratum; + errata[n++] = "727915"; + } + + if (revision >= L310_CACHE_ID_RTL_R3P0 && + revision < L310_CACHE_ID_RTL_R3P2) { + u32 val = readl_relaxed(base + L310_PREFETCH_CTRL); + /* I don't think bit23 is required here... but iMX6 does so */ + if (val & (BIT(30) | BIT(23))) { + val &= ~(BIT(30) | BIT(23)); + l2c_write_sec(val, base, L310_PREFETCH_CTRL); + errata[n++] = "752271"; + } + } + + if (IS_ENABLED(CONFIG_PL310_ERRATA_753970) && + revision == L310_CACHE_ID_RTL_R3P0) { + sync_reg_offset = L2X0_DUMMY_REG; + errata[n++] = "753970"; + } + + if (IS_ENABLED(CONFIG_PL310_ERRATA_769419)) + errata[n++] = "769419"; + + if (n) { + unsigned i; + pr_info("L2C-310 errat%s", n > 1 ? "a" : "um"); + for (i = 0; i < n; i++) + pr_cont(" %s", errata[i]); + pr_cont(" enabled\n"); + } +} + +static void l2c310_disable(void) +{ + /* + * If full-line-of-zeros is enabled, we must first disable it in the + * Cortex-A9 auxiliary control register before disabling the L2 cache. + */ + if (l2x0_saved_regs.aux_ctrl & L310_AUX_CTRL_FULL_LINE_ZERO) + set_auxcr(get_auxcr() & ~(BIT(3) | BIT(2) | BIT(1))); + + l2c_disable(); +} + +static const struct l2c_init_data l2c310_init_fns __initconst = { + .type = "L2C-310", + .way_size_0 = SZ_8K, + .num_lock = 8, + .enable = l2c310_enable, + .fixup = l2c310_fixup, + .save = l2c310_save, + .outer_cache = { + .inv_range = l2c210_inv_range, + .clean_range = l2c210_clean_range, + .flush_range = l2c210_flush_range, + .flush_all = l2c210_flush_all, + .disable = l2c310_disable, + .sync = l2c210_sync, + .resume = l2c310_resume, + }, +}; + +static void __init __l2c_init(const struct l2c_init_data *data, + u32 aux_val, u32 aux_mask, u32 cache_id) +{ + struct outer_cache_fns fns; + unsigned way_size_bits, ways; + u32 aux, old_aux; + + /* + * Sanity check the aux values. aux_mask is the bits we preserve + * from reading the hardware register, and aux_val is the bits we + * set. + */ + if (aux_val & aux_mask) + pr_alert("L2C: platform provided aux values permit register corruption.\n"); + + old_aux = aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); aux &= aux_mask; aux |= aux_val; + if (old_aux != aux) + pr_warn("L2C: DT/platform modifies aux control register: 0x%08x -> 0x%08x\n", + old_aux, aux); + /* Determine the number of ways */ switch (cache_id & L2X0_CACHE_ID_PART_MASK) { case L2X0_CACHE_ID_PART_L310: + if ((aux_val | ~aux_mask) & (L2C_AUX_CTRL_WAY_SIZE_MASK | L310_AUX_CTRL_ASSOCIATIVITY_16)) + pr_warn("L2C: DT/platform tries to modify or specify cache size\n"); if (aux & (1 << 16)) ways = 16; else ways = 8; - type = "L310"; -#ifdef CONFIG_PL310_ERRATA_753970 - /* Unmapped register. */ - sync_reg_offset = L2X0_DUMMY_REG; -#endif - if ((cache_id & L2X0_CACHE_ID_RTL_MASK) <= L2X0_CACHE_ID_RTL_R3P0) - outer_cache.set_debug = pl310_set_debug; break; + case L2X0_CACHE_ID_PART_L210: + case L2X0_CACHE_ID_PART_L220: ways = (aux >> 13) & 0xf; - type = "L210"; break; case AURORA_CACHE_ID: - sync_reg_offset = AURORA_SYNC_REG; ways = (aux >> 13) & 0xf; ways = 2 << ((ways + 1) >> 2); - way_size_shift = AURORA_WAY_SIZE_SHIFT; - type = "Aurora"; break; + default: /* Assume unknown chips have 8 ways */ ways = 8; - type = "L2x0 series"; break; } l2x0_way_mask = (1 << ways) - 1; /* - * L2 cache Size = Way size * Number of ways + * way_size_0 is the size that a way_size value of zero would be + * given the calculation: way_size = way_size_0 << way_size_bits. + * So, if way_size_bits=0 is reserved, but way_size_bits=1 is 16k, + * then way_size_0 would be 8k. + * + * L2 cache size = number of ways * way size. */ - way_size = (aux & L2X0_AUX_CTRL_WAY_SIZE_MASK) >> 17; - way_size = 1 << (way_size + way_size_shift); + way_size_bits = (aux & L2C_AUX_CTRL_WAY_SIZE_MASK) >> + L2C_AUX_CTRL_WAY_SIZE_SHIFT; + l2x0_size = ways * (data->way_size_0 << way_size_bits); - l2x0_size = ways * way_size * SZ_1K; + fns = data->outer_cache; + fns.write_sec = outer_cache.write_sec; + if (data->fixup) + data->fixup(l2x0_base, cache_id, &fns); /* - * Check if l2x0 controller is already enabled. - * If you are booting from non-secure mode - * accessing the below registers will fault. + * Check if l2x0 controller is already enabled. If we are booting + * in non-secure mode accessing the below registers will fault. */ - if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) { - /* Make sure that I&D is not locked down when starting */ - l2x0_unlock(cache_id); + if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) + data->enable(l2x0_base, aux, data->num_lock); - /* l2x0 controller is disabled */ - writel_relaxed(aux, l2x0_base + L2X0_AUX_CTRL); + outer_cache = fns; - l2x0_inv_all(); - - /* enable L2X0 */ - writel_relaxed(L2X0_CTRL_EN, l2x0_base + L2X0_CTRL); - } + /* + * It is strange to save the register state before initialisation, + * but hey, this is what the DT implementations decided to do. + */ + if (data->save) + data->save(l2x0_base); /* Re-read it in case some bits are reserved. */ aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); - /* Save the value for resuming. */ - l2x0_saved_regs.aux_ctrl = aux; + pr_info("%s cache controller enabled, %d ways, %d kB\n", + data->type, ways, l2x0_size >> 10); + pr_info("%s: CACHE_ID 0x%08x, AUX_CTRL 0x%08x\n", + data->type, cache_id, aux); +} + +void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask) +{ + const struct l2c_init_data *data; + u32 cache_id; + + l2x0_base = base; + + cache_id = readl_relaxed(base + L2X0_CACHE_ID); + + switch (cache_id & L2X0_CACHE_ID_PART_MASK) { + default: + case L2X0_CACHE_ID_PART_L210: + data = &l2c210_data; + break; - if (!of_init) { - outer_cache.inv_range = l2x0_inv_range; - outer_cache.clean_range = l2x0_clean_range; - outer_cache.flush_range = l2x0_flush_range; - outer_cache.sync = l2x0_cache_sync; - outer_cache.flush_all = l2x0_flush_all; - outer_cache.inv_all = l2x0_inv_all; - outer_cache.disable = l2x0_disable; + case L2X0_CACHE_ID_PART_L220: + data = &l2c220_data; + break; + + case L2X0_CACHE_ID_PART_L310: + data = &l2c310_init_fns; + break; } - pr_info("%s cache controller enabled\n", type); - pr_info("l2x0: %d ways, CACHE_ID 0x%08x, AUX_CTRL 0x%08x, Cache size: %d kB\n", - ways, cache_id, aux, l2x0_size >> 10); + __l2c_init(data, aux_val, aux_mask, cache_id); } #ifdef CONFIG_OF static int l2_wt_override; +/* Aurora don't have the cache ID register available, so we have to + * pass it though the device tree */ +static u32 cache_id_part_number_from_dt; + +static void __init l2x0_of_parse(const struct device_node *np, + u32 *aux_val, u32 *aux_mask) +{ + u32 data[2] = { 0, 0 }; + u32 tag = 0; + u32 dirty = 0; + u32 val = 0, mask = 0; + + of_property_read_u32(np, "arm,tag-latency", &tag); + if (tag) { + mask |= L2X0_AUX_CTRL_TAG_LATENCY_MASK; + val |= (tag - 1) << L2X0_AUX_CTRL_TAG_LATENCY_SHIFT; + } + + of_property_read_u32_array(np, "arm,data-latency", + data, ARRAY_SIZE(data)); + if (data[0] && data[1]) { + mask |= L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK | + L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK; + val |= ((data[0] - 1) << L2X0_AUX_CTRL_DATA_RD_LATENCY_SHIFT) | + ((data[1] - 1) << L2X0_AUX_CTRL_DATA_WR_LATENCY_SHIFT); + } + + of_property_read_u32(np, "arm,dirty-latency", &dirty); + if (dirty) { + mask |= L2X0_AUX_CTRL_DIRTY_LATENCY_MASK; + val |= (dirty - 1) << L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT; + } + + *aux_val &= ~mask; + *aux_val |= val; + *aux_mask &= ~mask; +} + +static const struct l2c_init_data of_l2c210_data __initconst = { + .type = "L2C-210", + .way_size_0 = SZ_8K, + .num_lock = 1, + .of_parse = l2x0_of_parse, + .enable = l2c_enable, + .save = l2c_save, + .outer_cache = { + .inv_range = l2c210_inv_range, + .clean_range = l2c210_clean_range, + .flush_range = l2c210_flush_range, + .flush_all = l2c210_flush_all, + .disable = l2c_disable, + .sync = l2c210_sync, + .resume = l2c210_resume, + }, +}; + +static const struct l2c_init_data of_l2c220_data __initconst = { + .type = "L2C-220", + .way_size_0 = SZ_8K, + .num_lock = 1, + .of_parse = l2x0_of_parse, + .enable = l2c220_enable, + .save = l2c_save, + .outer_cache = { + .inv_range = l2c220_inv_range, + .clean_range = l2c220_clean_range, + .flush_range = l2c220_flush_range, + .flush_all = l2c220_flush_all, + .disable = l2c_disable, + .sync = l2c220_sync, + .resume = l2c210_resume, + }, +}; + +static void __init l2c310_of_parse(const struct device_node *np, + u32 *aux_val, u32 *aux_mask) +{ + u32 data[3] = { 0, 0, 0 }; + u32 tag[3] = { 0, 0, 0 }; + u32 filter[2] = { 0, 0 }; + + of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag)); + if (tag[0] && tag[1] && tag[2]) + writel_relaxed( + L310_LATENCY_CTRL_RD(tag[0] - 1) | + L310_LATENCY_CTRL_WR(tag[1] - 1) | + L310_LATENCY_CTRL_SETUP(tag[2] - 1), + l2x0_base + L310_TAG_LATENCY_CTRL); + + of_property_read_u32_array(np, "arm,data-latency", + data, ARRAY_SIZE(data)); + if (data[0] && data[1] && data[2]) + writel_relaxed( + L310_LATENCY_CTRL_RD(data[0] - 1) | + L310_LATENCY_CTRL_WR(data[1] - 1) | + L310_LATENCY_CTRL_SETUP(data[2] - 1), + l2x0_base + L310_DATA_LATENCY_CTRL); + + of_property_read_u32_array(np, "arm,filter-ranges", + filter, ARRAY_SIZE(filter)); + if (filter[1]) { + writel_relaxed(ALIGN(filter[0] + filter[1], SZ_1M), + l2x0_base + L310_ADDR_FILTER_END); + writel_relaxed((filter[0] & ~(SZ_1M - 1)) | L310_ADDR_FILTER_EN, + l2x0_base + L310_ADDR_FILTER_START); + } +} + +static const struct l2c_init_data of_l2c310_data __initconst = { + .type = "L2C-310", + .way_size_0 = SZ_8K, + .num_lock = 8, + .of_parse = l2c310_of_parse, + .enable = l2c310_enable, + .fixup = l2c310_fixup, + .save = l2c310_save, + .outer_cache = { + .inv_range = l2c210_inv_range, + .clean_range = l2c210_clean_range, + .flush_range = l2c210_flush_range, + .flush_all = l2c210_flush_all, + .disable = l2c310_disable, + .sync = l2c210_sync, + .resume = l2c310_resume, + }, +}; + /* * Note that the end addresses passed to Linux primitives are * noninclusive, while the hardware cache range operations use @@ -524,6 +1166,100 @@ static void aurora_flush_range(unsigned long start, unsigned long end) } } +static void aurora_save(void __iomem *base) +{ + l2x0_saved_regs.ctrl = readl_relaxed(base + L2X0_CTRL); + l2x0_saved_regs.aux_ctrl = readl_relaxed(base + L2X0_AUX_CTRL); +} + +static void aurora_resume(void) +{ + void __iomem *base = l2x0_base; + + if (!(readl(base + L2X0_CTRL) & L2X0_CTRL_EN)) { + writel_relaxed(l2x0_saved_regs.aux_ctrl, base + L2X0_AUX_CTRL); + writel_relaxed(l2x0_saved_regs.ctrl, base + L2X0_CTRL); + } +} + +/* + * For Aurora cache in no outer mode, enable via the CP15 coprocessor + * broadcasting of cache commands to L2. + */ +static void __init aurora_enable_no_outer(void __iomem *base, u32 aux, + unsigned num_lock) +{ + u32 u; + + asm volatile("mrc p15, 1, %0, c15, c2, 0" : "=r" (u)); + u |= AURORA_CTRL_FW; /* Set the FW bit */ + asm volatile("mcr p15, 1, %0, c15, c2, 0" : : "r" (u)); + + isb(); + + l2c_enable(base, aux, num_lock); +} + +static void __init aurora_fixup(void __iomem *base, u32 cache_id, + struct outer_cache_fns *fns) +{ + sync_reg_offset = AURORA_SYNC_REG; +} + +static void __init aurora_of_parse(const struct device_node *np, + u32 *aux_val, u32 *aux_mask) +{ + u32 val = AURORA_ACR_REPLACEMENT_TYPE_SEMIPLRU; + u32 mask = AURORA_ACR_REPLACEMENT_MASK; + + of_property_read_u32(np, "cache-id-part", + &cache_id_part_number_from_dt); + + /* Determine and save the write policy */ + l2_wt_override = of_property_read_bool(np, "wt-override"); + + if (l2_wt_override) { + val |= AURORA_ACR_FORCE_WRITE_THRO_POLICY; + mask |= AURORA_ACR_FORCE_WRITE_POLICY_MASK; + } + + *aux_val &= ~mask; + *aux_val |= val; + *aux_mask &= ~mask; +} + +static const struct l2c_init_data of_aurora_with_outer_data __initconst = { + .type = "Aurora", + .way_size_0 = SZ_4K, + .num_lock = 4, + .of_parse = aurora_of_parse, + .enable = l2c_enable, + .fixup = aurora_fixup, + .save = aurora_save, + .outer_cache = { + .inv_range = aurora_inv_range, + .clean_range = aurora_clean_range, + .flush_range = aurora_flush_range, + .flush_all = l2x0_flush_all, + .disable = l2x0_disable, + .sync = l2x0_cache_sync, + .resume = aurora_resume, + }, +}; + +static const struct l2c_init_data of_aurora_no_outer_data __initconst = { + .type = "Aurora", + .way_size_0 = SZ_4K, + .num_lock = 4, + .of_parse = aurora_of_parse, + .enable = aurora_enable_no_outer, + .fixup = aurora_fixup, + .save = aurora_save, + .outer_cache = { + .resume = aurora_resume, + }, +}; + /* * For certain Broadcom SoCs, depending on the address range, different offsets * need to be added to the address before passing it to L2 for @@ -588,16 +1324,16 @@ static void bcm_inv_range(unsigned long start, unsigned long end) /* normal case, no cross section between start and end */ if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) { - l2x0_inv_range(new_start, new_end); + l2c210_inv_range(new_start, new_end); return; } /* They cross sections, so it can only be a cross from section * 2 to section 3 */ - l2x0_inv_range(new_start, + l2c210_inv_range(new_start, bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1)); - l2x0_inv_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR), + l2c210_inv_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR), new_end); } @@ -610,26 +1346,21 @@ static void bcm_clean_range(unsigned long start, unsigned long end) if (unlikely(end <= start)) return; - if ((end - start) >= l2x0_size) { - l2x0_clean_all(); - return; - } - new_start = bcm_l2_phys_addr(start); new_end = bcm_l2_phys_addr(end); /* normal case, no cross section between start and end */ if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) { - l2x0_clean_range(new_start, new_end); + l2c210_clean_range(new_start, new_end); return; } /* They cross sections, so it can only be a cross from section * 2 to section 3 */ - l2x0_clean_range(new_start, + l2c210_clean_range(new_start, bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1)); - l2x0_clean_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR), + l2c210_clean_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR), new_end); } @@ -643,7 +1374,7 @@ static void bcm_flush_range(unsigned long start, unsigned long end) return; if ((end - start) >= l2x0_size) { - l2x0_flush_all(); + outer_cache.flush_all(); return; } @@ -652,283 +1383,67 @@ static void bcm_flush_range(unsigned long start, unsigned long end) /* normal case, no cross section between start and end */ if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) { - l2x0_flush_range(new_start, new_end); + l2c210_flush_range(new_start, new_end); return; } /* They cross sections, so it can only be a cross from section * 2 to section 3 */ - l2x0_flush_range(new_start, + l2c210_flush_range(new_start, bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1)); - l2x0_flush_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR), + l2c210_flush_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR), new_end); } -static void __init l2x0_of_setup(const struct device_node *np, - u32 *aux_val, u32 *aux_mask) -{ - u32 data[2] = { 0, 0 }; - u32 tag = 0; - u32 dirty = 0; - u32 val = 0, mask = 0; - - of_property_read_u32(np, "arm,tag-latency", &tag); - if (tag) { - mask |= L2X0_AUX_CTRL_TAG_LATENCY_MASK; - val |= (tag - 1) << L2X0_AUX_CTRL_TAG_LATENCY_SHIFT; - } - - of_property_read_u32_array(np, "arm,data-latency", - data, ARRAY_SIZE(data)); - if (data[0] && data[1]) { - mask |= L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK | - L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK; - val |= ((data[0] - 1) << L2X0_AUX_CTRL_DATA_RD_LATENCY_SHIFT) | - ((data[1] - 1) << L2X0_AUX_CTRL_DATA_WR_LATENCY_SHIFT); - } - - of_property_read_u32(np, "arm,dirty-latency", &dirty); - if (dirty) { - mask |= L2X0_AUX_CTRL_DIRTY_LATENCY_MASK; - val |= (dirty - 1) << L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT; - } - - *aux_val &= ~mask; - *aux_val |= val; - *aux_mask &= ~mask; -} - -static void __init pl310_of_setup(const struct device_node *np, - u32 *aux_val, u32 *aux_mask) -{ - u32 data[3] = { 0, 0, 0 }; - u32 tag[3] = { 0, 0, 0 }; - u32 filter[2] = { 0, 0 }; - - of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag)); - if (tag[0] && tag[1] && tag[2]) - writel_relaxed( - ((tag[0] - 1) << L2X0_LATENCY_CTRL_RD_SHIFT) | - ((tag[1] - 1) << L2X0_LATENCY_CTRL_WR_SHIFT) | - ((tag[2] - 1) << L2X0_LATENCY_CTRL_SETUP_SHIFT), - l2x0_base + L2X0_TAG_LATENCY_CTRL); - - of_property_read_u32_array(np, "arm,data-latency", - data, ARRAY_SIZE(data)); - if (data[0] && data[1] && data[2]) - writel_relaxed( - ((data[0] - 1) << L2X0_LATENCY_CTRL_RD_SHIFT) | - ((data[1] - 1) << L2X0_LATENCY_CTRL_WR_SHIFT) | - ((data[2] - 1) << L2X0_LATENCY_CTRL_SETUP_SHIFT), - l2x0_base + L2X0_DATA_LATENCY_CTRL); - - of_property_read_u32_array(np, "arm,filter-ranges", - filter, ARRAY_SIZE(filter)); - if (filter[1]) { - writel_relaxed(ALIGN(filter[0] + filter[1], SZ_1M), - l2x0_base + L2X0_ADDR_FILTER_END); - writel_relaxed((filter[0] & ~(SZ_1M - 1)) | L2X0_ADDR_FILTER_EN, - l2x0_base + L2X0_ADDR_FILTER_START); - } -} - -static void __init pl310_save(void) -{ - u32 l2x0_revision = readl_relaxed(l2x0_base + L2X0_CACHE_ID) & - L2X0_CACHE_ID_RTL_MASK; - - l2x0_saved_regs.tag_latency = readl_relaxed(l2x0_base + - L2X0_TAG_LATENCY_CTRL); - l2x0_saved_regs.data_latency = readl_relaxed(l2x0_base + - L2X0_DATA_LATENCY_CTRL); - l2x0_saved_regs.filter_end = readl_relaxed(l2x0_base + - L2X0_ADDR_FILTER_END); - l2x0_saved_regs.filter_start = readl_relaxed(l2x0_base + - L2X0_ADDR_FILTER_START); - - if (l2x0_revision >= L2X0_CACHE_ID_RTL_R2P0) { - /* - * From r2p0, there is Prefetch offset/control register - */ - l2x0_saved_regs.prefetch_ctrl = readl_relaxed(l2x0_base + - L2X0_PREFETCH_CTRL); - /* - * From r3p0, there is Power control register - */ - if (l2x0_revision >= L2X0_CACHE_ID_RTL_R3P0) - l2x0_saved_regs.pwr_ctrl = readl_relaxed(l2x0_base + - L2X0_POWER_CTRL); - } -} +/* Broadcom L2C-310 start from ARMs R3P2 or later, and require no fixups */ +static const struct l2c_init_data of_bcm_l2x0_data __initconst = { + .type = "BCM-L2C-310", + .way_size_0 = SZ_8K, + .num_lock = 8, + .of_parse = l2c310_of_parse, + .enable = l2c310_enable, + .save = l2c310_save, + .outer_cache = { + .inv_range = bcm_inv_range, + .clean_range = bcm_clean_range, + .flush_range = bcm_flush_range, + .flush_all = l2c210_flush_all, + .disable = l2c310_disable, + .sync = l2c210_sync, + .resume = l2c310_resume, + }, +}; -static void aurora_save(void) +static void __init tauros3_save(void __iomem *base) { - l2x0_saved_regs.ctrl = readl_relaxed(l2x0_base + L2X0_CTRL); - l2x0_saved_regs.aux_ctrl = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); -} + l2c_save(base); -static void __init tauros3_save(void) -{ l2x0_saved_regs.aux2_ctrl = - readl_relaxed(l2x0_base + TAUROS3_AUX2_CTRL); + readl_relaxed(base + TAUROS3_AUX2_CTRL); l2x0_saved_regs.prefetch_ctrl = - readl_relaxed(l2x0_base + L2X0_PREFETCH_CTRL); -} - -static void l2x0_resume(void) -{ - if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) { - /* restore aux ctrl and enable l2 */ - l2x0_unlock(readl_relaxed(l2x0_base + L2X0_CACHE_ID)); - - writel_relaxed(l2x0_saved_regs.aux_ctrl, l2x0_base + - L2X0_AUX_CTRL); - - l2x0_inv_all(); - - writel_relaxed(L2X0_CTRL_EN, l2x0_base + L2X0_CTRL); - } -} - -static void pl310_resume(void) -{ - u32 l2x0_revision; - - if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) { - /* restore pl310 setup */ - writel_relaxed(l2x0_saved_regs.tag_latency, - l2x0_base + L2X0_TAG_LATENCY_CTRL); - writel_relaxed(l2x0_saved_regs.data_latency, - l2x0_base + L2X0_DATA_LATENCY_CTRL); - writel_relaxed(l2x0_saved_regs.filter_end, - l2x0_base + L2X0_ADDR_FILTER_END); - writel_relaxed(l2x0_saved_regs.filter_start, - l2x0_base + L2X0_ADDR_FILTER_START); - - l2x0_revision = readl_relaxed(l2x0_base + L2X0_CACHE_ID) & - L2X0_CACHE_ID_RTL_MASK; - - if (l2x0_revision >= L2X0_CACHE_ID_RTL_R2P0) { - writel_relaxed(l2x0_saved_regs.prefetch_ctrl, - l2x0_base + L2X0_PREFETCH_CTRL); - if (l2x0_revision >= L2X0_CACHE_ID_RTL_R3P0) - writel_relaxed(l2x0_saved_regs.pwr_ctrl, - l2x0_base + L2X0_POWER_CTRL); - } - } - - l2x0_resume(); -} - -static void aurora_resume(void) -{ - if (!(readl(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) { - writel_relaxed(l2x0_saved_regs.aux_ctrl, - l2x0_base + L2X0_AUX_CTRL); - writel_relaxed(l2x0_saved_regs.ctrl, l2x0_base + L2X0_CTRL); - } + readl_relaxed(base + L310_PREFETCH_CTRL); } static void tauros3_resume(void) { - if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) { + void __iomem *base = l2x0_base; + + if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) { writel_relaxed(l2x0_saved_regs.aux2_ctrl, - l2x0_base + TAUROS3_AUX2_CTRL); + base + TAUROS3_AUX2_CTRL); writel_relaxed(l2x0_saved_regs.prefetch_ctrl, - l2x0_base + L2X0_PREFETCH_CTRL); - } + base + L310_PREFETCH_CTRL); - l2x0_resume(); -} - -static void __init aurora_broadcast_l2_commands(void) -{ - __u32 u; - /* Enable Broadcasting of cache commands to L2*/ - __asm__ __volatile__("mrc p15, 1, %0, c15, c2, 0" : "=r"(u)); - u |= AURORA_CTRL_FW; /* Set the FW bit */ - __asm__ __volatile__("mcr p15, 1, %0, c15, c2, 0\n" : : "r"(u)); - isb(); -} - -static void __init aurora_of_setup(const struct device_node *np, - u32 *aux_val, u32 *aux_mask) -{ - u32 val = AURORA_ACR_REPLACEMENT_TYPE_SEMIPLRU; - u32 mask = AURORA_ACR_REPLACEMENT_MASK; - - of_property_read_u32(np, "cache-id-part", - &cache_id_part_number_from_dt); - - /* Determine and save the write policy */ - l2_wt_override = of_property_read_bool(np, "wt-override"); - - if (l2_wt_override) { - val |= AURORA_ACR_FORCE_WRITE_THRO_POLICY; - mask |= AURORA_ACR_FORCE_WRITE_POLICY_MASK; + l2c_enable(base, l2x0_saved_regs.aux_ctrl, 8); } - - *aux_val &= ~mask; - *aux_val |= val; - *aux_mask &= ~mask; } -static const struct l2x0_of_data pl310_data = { - .setup = pl310_of_setup, - .save = pl310_save, - .outer_cache = { - .resume = pl310_resume, - .inv_range = l2x0_inv_range, - .clean_range = l2x0_clean_range, - .flush_range = l2x0_flush_range, - .sync = l2x0_cache_sync, - .flush_all = l2x0_flush_all, - .inv_all = l2x0_inv_all, - .disable = l2x0_disable, - }, -}; - -static const struct l2x0_of_data l2x0_data = { - .setup = l2x0_of_setup, - .save = NULL, - .outer_cache = { - .resume = l2x0_resume, - .inv_range = l2x0_inv_range, - .clean_range = l2x0_clean_range, - .flush_range = l2x0_flush_range, - .sync = l2x0_cache_sync, - .flush_all = l2x0_flush_all, - .inv_all = l2x0_inv_all, - .disable = l2x0_disable, - }, -}; - -static const struct l2x0_of_data aurora_with_outer_data = { - .setup = aurora_of_setup, - .save = aurora_save, - .outer_cache = { - .resume = aurora_resume, - .inv_range = aurora_inv_range, - .clean_range = aurora_clean_range, - .flush_range = aurora_flush_range, - .sync = l2x0_cache_sync, - .flush_all = l2x0_flush_all, - .inv_all = l2x0_inv_all, - .disable = l2x0_disable, - }, -}; - -static const struct l2x0_of_data aurora_no_outer_data = { - .setup = aurora_of_setup, - .save = aurora_save, - .outer_cache = { - .resume = aurora_resume, - }, -}; - -static const struct l2x0_of_data tauros3_data = { - .setup = NULL, +static const struct l2c_init_data of_tauros3_data __initconst = { + .type = "Tauros3", + .way_size_0 = SZ_8K, + .num_lock = 8, + .enable = l2c_enable, .save = tauros3_save, /* Tauros3 broadcasts L1 cache operations to L2 */ .outer_cache = { @@ -936,43 +1451,26 @@ static const struct l2x0_of_data tauros3_data = { }, }; -static const struct l2x0_of_data bcm_l2x0_data = { - .setup = pl310_of_setup, - .save = pl310_save, - .outer_cache = { - .resume = pl310_resume, - .inv_range = bcm_inv_range, - .clean_range = bcm_clean_range, - .flush_range = bcm_flush_range, - .sync = l2x0_cache_sync, - .flush_all = l2x0_flush_all, - .inv_all = l2x0_inv_all, - .disable = l2x0_disable, - }, -}; - +#define L2C_ID(name, fns) { .compatible = name, .data = (void *)&fns } static const struct of_device_id l2x0_ids[] __initconst = { - { .compatible = "arm,l210-cache", .data = (void *)&l2x0_data }, - { .compatible = "arm,l220-cache", .data = (void *)&l2x0_data }, - { .compatible = "arm,pl310-cache", .data = (void *)&pl310_data }, - { .compatible = "bcm,bcm11351-a2-pl310-cache", /* deprecated name */ - .data = (void *)&bcm_l2x0_data}, - { .compatible = "brcm,bcm11351-a2-pl310-cache", - .data = (void *)&bcm_l2x0_data}, - { .compatible = "marvell,aurora-outer-cache", - .data = (void *)&aurora_with_outer_data}, - { .compatible = "marvell,aurora-system-cache", - .data = (void *)&aurora_no_outer_data}, - { .compatible = "marvell,tauros3-cache", - .data = (void *)&tauros3_data }, + L2C_ID("arm,l210-cache", of_l2c210_data), + L2C_ID("arm,l220-cache", of_l2c220_data), + L2C_ID("arm,pl310-cache", of_l2c310_data), + L2C_ID("brcm,bcm11351-a2-pl310-cache", of_bcm_l2x0_data), + L2C_ID("marvell,aurora-outer-cache", of_aurora_with_outer_data), + L2C_ID("marvell,aurora-system-cache", of_aurora_no_outer_data), + L2C_ID("marvell,tauros3-cache", of_tauros3_data), + /* Deprecated IDs */ + L2C_ID("bcm,bcm11351-a2-pl310-cache", of_bcm_l2x0_data), {} }; int __init l2x0_of_init(u32 aux_val, u32 aux_mask) { + const struct l2c_init_data *data; struct device_node *np; - const struct l2x0_of_data *data; struct resource res; + u32 cache_id, old_aux; np = of_find_matching_node(NULL, l2x0_ids); if (!np) @@ -989,23 +1487,29 @@ int __init l2x0_of_init(u32 aux_val, u32 aux_mask) data = of_match_node(l2x0_ids, np)->data; - /* L2 configuration can only be changed if the cache is disabled */ - if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) { - if (data->setup) - data->setup(np, &aux_val, &aux_mask); - - /* For aurora cache in no outer mode select the - * correct mode using the coprocessor*/ - if (data == &aurora_no_outer_data) - aurora_broadcast_l2_commands(); + old_aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); + if (old_aux != ((old_aux & aux_mask) | aux_val)) { + pr_warn("L2C: platform modifies aux control register: 0x%08x -> 0x%08x\n", + old_aux, (old_aux & aux_mask) | aux_val); + } else if (aux_mask != ~0U && aux_val != 0) { + pr_alert("L2C: platform provided aux values match the hardware, so have no effect. Please remove them.\n"); } - if (data->save) - data->save(); + /* All L2 caches are unified, so this property should be specified */ + if (!of_property_read_bool(np, "cache-unified")) + pr_err("L2C: device tree omits to specify unified cache\n"); + + /* L2 configuration can only be changed if the cache is disabled */ + if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) + if (data->of_parse) + data->of_parse(np, &aux_val, &aux_mask); + + if (cache_id_part_number_from_dt) + cache_id = cache_id_part_number_from_dt; + else + cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID); - of_init = true; - memcpy(&outer_cache, &data->outer_cache, sizeof(outer_cache)); - l2x0_init(l2x0_base, aux_val, aux_mask); + __l2c_init(data, aux_val, aux_mask, cache_id); return 0; } diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 137463bcbeac..b05e08c4734c 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1964,8 +1964,8 @@ arm_iommu_create_mapping(struct bus_type *bus, dma_addr_t base, size_t size) mapping->nr_bitmaps = 1; mapping->extensions = extensions; mapping->base = base; - mapping->size = bitmap_size << PAGE_SHIFT; mapping->bits = BITS_PER_BYTE * bitmap_size; + mapping->size = mapping->bits << PAGE_SHIFT; spin_lock_init(&mapping->lock); diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index c8ab21dc2178..5958ac05181e 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -23,6 +23,7 @@ #include <linux/dma-contiguous.h> #include <linux/sizes.h> +#include <asm/cp15.h> #include <asm/mach-types.h> #include <asm/memblock.h> #include <asm/prom.h> @@ -36,6 +37,14 @@ #include "mm.h" +#ifdef CONFIG_CPU_CP15_MMU +unsigned long __init __clear_cr(unsigned long mask) +{ + cr_alignment = cr_alignment & ~mask; + return cr_alignment; +} +#endif + static phys_addr_t phys_initrd_start __initdata = 0; static unsigned long phys_initrd_size __initdata = 0; diff --git a/arch/arm/mm/l2c-common.c b/arch/arm/mm/l2c-common.c new file mode 100644 index 000000000000..10a3cf28c362 --- /dev/null +++ b/arch/arm/mm/l2c-common.c @@ -0,0 +1,20 @@ +/* + * Copyright (C) 2010 ARM Ltd. + * Written by Catalin Marinas <catalin.marinas@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/bug.h> +#include <linux/smp.h> +#include <asm/outercache.h> + +void outer_disable(void) +{ + WARN_ON(!irqs_disabled()); + WARN_ON(num_online_cpus() > 1); + + if (outer_cache.disable) + outer_cache.disable(); +} diff --git a/arch/arm/mm/l2c-l2x0-resume.S b/arch/arm/mm/l2c-l2x0-resume.S new file mode 100644 index 000000000000..99b05f21a59a --- /dev/null +++ b/arch/arm/mm/l2c-l2x0-resume.S @@ -0,0 +1,58 @@ +/* + * L2C-310 early resume code. This can be used by platforms to restore + * the settings of their L2 cache controller before restoring the + * processor state. + * + * This code can only be used to if you are running in the secure world. + */ +#include <linux/linkage.h> +#include <asm/hardware/cache-l2x0.h> + + .text + +ENTRY(l2c310_early_resume) + adr r0, 1f + ldr r2, [r0] + add r0, r2, r0 + + ldmia r0, {r1, r2, r3, r4, r5, r6, r7, r8} + @ r1 = phys address of L2C-310 controller + @ r2 = aux_ctrl + @ r3 = tag_latency + @ r4 = data_latency + @ r5 = filter_start + @ r6 = filter_end + @ r7 = prefetch_ctrl + @ r8 = pwr_ctrl + + @ Check that the address has been initialised + teq r1, #0 + moveq pc, lr + + @ The prefetch and power control registers are revision dependent + @ and can be written whether or not the L2 cache is enabled + ldr r0, [r1, #L2X0_CACHE_ID] + and r0, r0, #L2X0_CACHE_ID_RTL_MASK + cmp r0, #L310_CACHE_ID_RTL_R2P0 + strcs r7, [r1, #L310_PREFETCH_CTRL] + cmp r0, #L310_CACHE_ID_RTL_R3P0 + strcs r8, [r1, #L310_POWER_CTRL] + + @ Don't setup the L2 cache if it is already enabled + ldr r0, [r1, #L2X0_CTRL] + tst r0, #L2X0_CTRL_EN + movne pc, lr + + str r3, [r1, #L310_TAG_LATENCY_CTRL] + str r4, [r1, #L310_DATA_LATENCY_CTRL] + str r6, [r1, #L310_ADDR_FILTER_END] + str r5, [r1, #L310_ADDR_FILTER_START] + + str r2, [r1, #L2X0_AUX_CTRL] + mov r9, #L2X0_CTRL_EN + str r9, [r1, #L2X0_CTRL] + mov pc, lr +ENDPROC(l2c310_early_resume) + + .align +1: .long l2x0_saved_regs - . diff --git a/arch/arm/mm/mm.h b/arch/arm/mm/mm.h index 7ea641b7aa7d..ce727d47275c 100644 --- a/arch/arm/mm/mm.h +++ b/arch/arm/mm/mm.h @@ -2,6 +2,8 @@ #include <linux/list.h> #include <linux/vmalloc.h> +#include <asm/pgtable.h> + /* the upper-most page table pointer */ extern pmd_t *top_pmd; @@ -93,3 +95,5 @@ extern phys_addr_t arm_lowmem_limit; void __init bootmem_init(void); void arm_mm_memblock_reserve(void); void dma_contiguous_remap(void); + +unsigned long __clear_cr(unsigned long mask); diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 82ea2b3fb9b5..ab14b79b03f0 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -118,28 +118,54 @@ static struct cachepolicy cache_policies[] __initdata = { }; #ifdef CONFIG_CPU_CP15 +static unsigned long initial_pmd_value __initdata = 0; + /* - * These are useful for identifying cache coherency - * problems by allowing the cache or the cache and - * writebuffer to be turned off. (Note: the write - * buffer should not be on and the cache off). + * Initialise the cache_policy variable with the initial state specified + * via the "pmd" value. This is used to ensure that on ARMv6 and later, + * the C code sets the page tables up with the same policy as the head + * assembly code, which avoids an illegal state where the TLBs can get + * confused. See comments in early_cachepolicy() for more information. */ -static int __init early_cachepolicy(char *p) +void __init init_default_cache_policy(unsigned long pmd) { int i; + initial_pmd_value = pmd; + + pmd &= PMD_SECT_TEX(1) | PMD_SECT_BUFFERABLE | PMD_SECT_CACHEABLE; + + for (i = 0; i < ARRAY_SIZE(cache_policies); i++) + if (cache_policies[i].pmd == pmd) { + cachepolicy = i; + break; + } + + if (i == ARRAY_SIZE(cache_policies)) + pr_err("ERROR: could not find cache policy\n"); +} + +/* + * These are useful for identifying cache coherency problems by allowing + * the cache or the cache and writebuffer to be turned off. (Note: the + * write buffer should not be on and the cache off). + */ +static int __init early_cachepolicy(char *p) +{ + int i, selected = -1; + for (i = 0; i < ARRAY_SIZE(cache_policies); i++) { int len = strlen(cache_policies[i].policy); if (memcmp(p, cache_policies[i].policy, len) == 0) { - cachepolicy = i; - cr_alignment &= ~cache_policies[i].cr_mask; - cr_no_alignment &= ~cache_policies[i].cr_mask; + selected = i; break; } } - if (i == ARRAY_SIZE(cache_policies)) - printk(KERN_ERR "ERROR: unknown or unsupported cache policy\n"); + + if (selected == -1) + pr_err("ERROR: unknown or unsupported cache policy\n"); + /* * This restriction is partly to do with the way we boot; it is * unpredictable to have memory mapped using two different sets of @@ -147,12 +173,18 @@ static int __init early_cachepolicy(char *p) * change these attributes once the initial assembly has setup the * page tables. */ - if (cpu_architecture() >= CPU_ARCH_ARMv6) { - printk(KERN_WARNING "Only cachepolicy=writeback supported on ARMv6 and later\n"); - cachepolicy = CPOLICY_WRITEBACK; + if (cpu_architecture() >= CPU_ARCH_ARMv6 && selected != cachepolicy) { + pr_warn("Only cachepolicy=%s supported on ARMv6 and later\n", + cache_policies[cachepolicy].policy); + return 0; + } + + if (selected != cachepolicy) { + unsigned long cr = __clear_cr(cache_policies[selected].cr_mask); + cachepolicy = selected; + flush_cache_all(); + set_cr(cr); } - flush_cache_all(); - set_cr(cr_alignment); return 0; } early_param("cachepolicy", early_cachepolicy); @@ -187,35 +219,6 @@ static int __init early_ecc(char *p) early_param("ecc", early_ecc); #endif -static int __init noalign_setup(char *__unused) -{ - cr_alignment &= ~CR_A; - cr_no_alignment &= ~CR_A; - set_cr(cr_alignment); - return 1; -} -__setup("noalign", noalign_setup); - -#ifndef CONFIG_SMP -void adjust_cr(unsigned long mask, unsigned long set) -{ - unsigned long flags; - - mask &= ~CR_A; - - set &= mask; - - local_irq_save(flags); - - cr_no_alignment = (cr_no_alignment & ~mask) | set; - cr_alignment = (cr_alignment & ~mask) | set; - - set_cr((get_cr() & ~mask) | set); - - local_irq_restore(flags); -} -#endif - #else /* ifdef CONFIG_CPU_CP15 */ static int __init early_cachepolicy(char *p) @@ -415,8 +418,17 @@ static void __init build_mem_type_table(void) cachepolicy = CPOLICY_WRITEBACK; ecc_mask = 0; } - if (is_smp()) - cachepolicy = CPOLICY_WRITEALLOC; + + if (is_smp()) { + if (cachepolicy != CPOLICY_WRITEALLOC) { + pr_warn("Forcing write-allocate cache policy for SMP\n"); + cachepolicy = CPOLICY_WRITEALLOC; + } + if (!(initial_pmd_value & PMD_SECT_S)) { + pr_warn("Forcing shared mappings for SMP\n"); + initial_pmd_value |= PMD_SECT_S; + } + } /* * Strip out features not present on earlier architectures. @@ -540,11 +552,12 @@ static void __init build_mem_type_table(void) mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; #endif - if (is_smp()) { - /* - * Mark memory with the "shared" attribute - * for SMP systems - */ + /* + * If the initial page tables were created with the S bit + * set, then we need to do the same here for the same + * reasons given in early_cachepolicy(). + */ + if (initial_pmd_value & PMD_SECT_S) { user_pgprot |= L_PTE_SHARED; kern_pgprot |= L_PTE_SHARED; vecs_pgprot |= L_PTE_SHARED; diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S index 0c93588fcb91..1ca37c72f12f 100644 --- a/arch/arm/mm/proc-v7m.S +++ b/arch/arm/mm/proc-v7m.S @@ -123,6 +123,11 @@ __v7m_setup: mov pc, lr ENDPROC(__v7m_setup) + .align 2 +__v7m_setup_stack: + .space 4 * 8 @ 8 registers +__v7m_setup_stack_top: + define_processor_functions v7m, dabort=nommu_early_abort, pabort=legacy_pabort, nommu=1 .section ".rodata" @@ -152,6 +157,3 @@ __v7m_proc_info: .long nop_cache_fns @ proc_info_list.cache .size __v7m_proc_info, . - __v7m_proc_info -__v7m_setup_stack: - .space 4 * 8 @ 8 registers -__v7m_setup_stack_top: |