diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-06-05 15:57:04 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-06-05 15:57:04 -0700 |
commit | eb3d3ec567e868c8a3bfbfdfc9465ffd52983d11 (patch) | |
tree | 75acf38b8d73cd281e5ce4dcc941faf48e244b98 /arch/arm/mm/cache-l2x0.c | |
parent | c3c55a07203947f72afa50a3218460b27307c47d (diff) | |
parent | bd63ce27d9d62bc40a962b991cbbbe4f0dc913d2 (diff) | |
download | talos-op-linux-eb3d3ec567e868c8a3bfbfdfc9465ffd52983d11.tar.gz talos-op-linux-eb3d3ec567e868c8a3bfbfdfc9465ffd52983d11.zip |
Merge branch 'for-linus' of git://ftp.arm.linux.org.uk/~rmk/linux-arm into next
Pull ARM updates from Russell King:
- Major clean-up of the L2 cache support code. The existing mess was
becoming rather unmaintainable through all the additions that others
have done over time. This turns it into a much nicer structure, and
implements a few performance improvements as well.
- Clean up some of the CP15 control register tweaks for alignment
support, moving some code and data into alignment.c
- DMA properties for ARM, from Santosh and reviewed by DT people. This
adds DT properties to specify bus translations we can't discover
automatically, and to indicate whether devices are coherent.
- Hibernation support for ARM
- Make ftrace work with read-only text in modules
- add suspend support for PJ4B CPUs
- rework interrupt masking for undefined instruction handling, which
allows us to enable interrupts earlier in the handling of these
exceptions.
- support for big endian page tables
- fix stacktrace support to exclude stacktrace functions from the
trace, and add save_stack_trace_regs() implementation so that kprobes
can record stack traces.
- Add support for the Cortex-A17 CPU.
- Remove last vestiges of ARM710 support.
- Removal of ARM "meminfo" structure, finally converting us solely to
memblock to handle the early memory initialisation.
* 'for-linus' of git://ftp.arm.linux.org.uk/~rmk/linux-arm: (142 commits)
ARM: ensure C page table setup code follows assembly code (part II)
ARM: ensure C page table setup code follows assembly code
ARM: consolidate last remaining open-coded alignment trap enable
ARM: remove global cr_no_alignment
ARM: remove CPU_CP15 conditional from alignment.c
ARM: remove unused adjust_cr() function
ARM: move "noalign" command line option to alignment.c
ARM: provide common method to clear bits in CPU control register
ARM: 8025/1: Get rid of meminfo
ARM: 8060/1: mm: allow sub-architectures to override PCI I/O memory type
ARM: 8066/1: correction for ARM patch 8031/2
ARM: 8049/1: ftrace/add save_stack_trace_regs() implementation
ARM: 8065/1: remove last use of CONFIG_CPU_ARM710
ARM: 8062/1: Modify ldrt fixup handler to re-execute the userspace instruction
ARM: 8047/1: rwsem: use asm-generic rwsem implementation
ARM: l2c: trial at enabling some Cortex-A9 optimisations
ARM: l2c: add warnings for stuff modifying aux_ctrl register values
ARM: l2c: print a warning with L2C-310 caches if the cache size is modified
ARM: l2c: remove old .set_debug method
ARM: l2c: kill L2X0_AUX_CTRL_MASK before anyone else makes use of this
...
Diffstat (limited to 'arch/arm/mm/cache-l2x0.c')
-rw-r--r-- | arch/arm/mm/cache-l2x0.c | 1498 |
1 files changed, 1001 insertions, 497 deletions
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index 7abde2ce8973..efc5cabf70e0 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -16,18 +16,33 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include <linux/cpu.h> #include <linux/err.h> #include <linux/init.h> +#include <linux/smp.h> #include <linux/spinlock.h> #include <linux/io.h> #include <linux/of.h> #include <linux/of_address.h> #include <asm/cacheflush.h> +#include <asm/cp15.h> +#include <asm/cputype.h> #include <asm/hardware/cache-l2x0.h> #include "cache-tauros3.h" #include "cache-aurora-l2.h" +struct l2c_init_data { + const char *type; + unsigned way_size_0; + unsigned num_lock; + void (*of_parse)(const struct device_node *, u32 *, u32 *); + void (*enable)(void __iomem *, u32, unsigned); + void (*fixup)(void __iomem *, u32, struct outer_cache_fns *); + void (*save)(void __iomem *); + struct outer_cache_fns outer_cache; +}; + #define CACHE_LINE_SIZE 32 static void __iomem *l2x0_base; @@ -36,96 +51,116 @@ static u32 l2x0_way_mask; /* Bitmask of active ways */ static u32 l2x0_size; static unsigned long sync_reg_offset = L2X0_CACHE_SYNC; -/* Aurora don't have the cache ID register available, so we have to - * pass it though the device tree */ -static u32 cache_id_part_number_from_dt; - struct l2x0_regs l2x0_saved_regs; -struct l2x0_of_data { - void (*setup)(const struct device_node *, u32 *, u32 *); - void (*save)(void); - struct outer_cache_fns outer_cache; -}; - -static bool of_init = false; - -static inline void cache_wait_way(void __iomem *reg, unsigned long mask) +/* + * Common code for all cache controllers. + */ +static inline void l2c_wait_mask(void __iomem *reg, unsigned long mask) { /* wait for cache operation by line or way to complete */ while (readl_relaxed(reg) & mask) cpu_relax(); } -#ifdef CONFIG_CACHE_PL310 -static inline void cache_wait(void __iomem *reg, unsigned long mask) +/* + * By default, we write directly to secure registers. Platforms must + * override this if they are running non-secure. + */ +static void l2c_write_sec(unsigned long val, void __iomem *base, unsigned reg) { - /* cache operations by line are atomic on PL310 */ + if (val == readl_relaxed(base + reg)) + return; + if (outer_cache.write_sec) + outer_cache.write_sec(val, reg); + else + writel_relaxed(val, base + reg); } -#else -#define cache_wait cache_wait_way -#endif -static inline void cache_sync(void) +/* + * This should only be called when we have a requirement that the + * register be written due to a work-around, as platforms running + * in non-secure mode may not be able to access this register. + */ +static inline void l2c_set_debug(void __iomem *base, unsigned long val) { - void __iomem *base = l2x0_base; - - writel_relaxed(0, base + sync_reg_offset); - cache_wait(base + L2X0_CACHE_SYNC, 1); + l2c_write_sec(val, base, L2X0_DEBUG_CTRL); } -static inline void l2x0_clean_line(unsigned long addr) +static void __l2c_op_way(void __iomem *reg) { - void __iomem *base = l2x0_base; - cache_wait(base + L2X0_CLEAN_LINE_PA, 1); - writel_relaxed(addr, base + L2X0_CLEAN_LINE_PA); + writel_relaxed(l2x0_way_mask, reg); + l2c_wait_mask(reg, l2x0_way_mask); } -static inline void l2x0_inv_line(unsigned long addr) +static inline void l2c_unlock(void __iomem *base, unsigned num) { - void __iomem *base = l2x0_base; - cache_wait(base + L2X0_INV_LINE_PA, 1); - writel_relaxed(addr, base + L2X0_INV_LINE_PA); + unsigned i; + + for (i = 0; i < num; i++) { + writel_relaxed(0, base + L2X0_LOCKDOWN_WAY_D_BASE + + i * L2X0_LOCKDOWN_STRIDE); + writel_relaxed(0, base + L2X0_LOCKDOWN_WAY_I_BASE + + i * L2X0_LOCKDOWN_STRIDE); + } } -#if defined(CONFIG_PL310_ERRATA_588369) || defined(CONFIG_PL310_ERRATA_727915) -static inline void debug_writel(unsigned long val) +/* + * Enable the L2 cache controller. This function must only be + * called when the cache controller is known to be disabled. + */ +static void l2c_enable(void __iomem *base, u32 aux, unsigned num_lock) { - if (outer_cache.set_debug) - outer_cache.set_debug(val); + unsigned long flags; + + l2c_write_sec(aux, base, L2X0_AUX_CTRL); + + l2c_unlock(base, num_lock); + + local_irq_save(flags); + __l2c_op_way(base + L2X0_INV_WAY); + writel_relaxed(0, base + sync_reg_offset); + l2c_wait_mask(base + sync_reg_offset, 1); + local_irq_restore(flags); + + l2c_write_sec(L2X0_CTRL_EN, base, L2X0_CTRL); } -static void pl310_set_debug(unsigned long val) +static void l2c_disable(void) { - writel_relaxed(val, l2x0_base + L2X0_DEBUG_CTRL); + void __iomem *base = l2x0_base; + + outer_cache.flush_all(); + l2c_write_sec(0, base, L2X0_CTRL); + dsb(st); } -#else -/* Optimised out for non-errata case */ -static inline void debug_writel(unsigned long val) + +#ifdef CONFIG_CACHE_PL310 +static inline void cache_wait(void __iomem *reg, unsigned long mask) { + /* cache operations by line are atomic on PL310 */ } - -#define pl310_set_debug NULL +#else +#define cache_wait l2c_wait_mask #endif -#ifdef CONFIG_PL310_ERRATA_588369 -static inline void l2x0_flush_line(unsigned long addr) +static inline void cache_sync(void) { void __iomem *base = l2x0_base; - /* Clean by PA followed by Invalidate by PA */ - cache_wait(base + L2X0_CLEAN_LINE_PA, 1); - writel_relaxed(addr, base + L2X0_CLEAN_LINE_PA); - cache_wait(base + L2X0_INV_LINE_PA, 1); - writel_relaxed(addr, base + L2X0_INV_LINE_PA); + writel_relaxed(0, base + sync_reg_offset); + cache_wait(base + L2X0_CACHE_SYNC, 1); } -#else -static inline void l2x0_flush_line(unsigned long addr) +#if defined(CONFIG_PL310_ERRATA_588369) || defined(CONFIG_PL310_ERRATA_727915) +static inline void debug_writel(unsigned long val) +{ + l2c_set_debug(l2x0_base, val); +} +#else +/* Optimised out for non-errata case */ +static inline void debug_writel(unsigned long val) { - void __iomem *base = l2x0_base; - cache_wait(base + L2X0_CLEAN_INV_LINE_PA, 1); - writel_relaxed(addr, base + L2X0_CLEAN_INV_LINE_PA); } #endif @@ -141,8 +176,7 @@ static void l2x0_cache_sync(void) static void __l2x0_flush_all(void) { debug_writel(0x03); - writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_CLEAN_INV_WAY); - cache_wait_way(l2x0_base + L2X0_CLEAN_INV_WAY, l2x0_way_mask); + __l2c_op_way(l2x0_base + L2X0_CLEAN_INV_WAY); cache_sync(); debug_writel(0x00); } @@ -157,275 +191,883 @@ static void l2x0_flush_all(void) raw_spin_unlock_irqrestore(&l2x0_lock, flags); } -static void l2x0_clean_all(void) +static void l2x0_disable(void) { unsigned long flags; - /* clean all ways */ raw_spin_lock_irqsave(&l2x0_lock, flags); - writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_CLEAN_WAY); - cache_wait_way(l2x0_base + L2X0_CLEAN_WAY, l2x0_way_mask); - cache_sync(); + __l2x0_flush_all(); + l2c_write_sec(0, l2x0_base, L2X0_CTRL); + dsb(st); raw_spin_unlock_irqrestore(&l2x0_lock, flags); } -static void l2x0_inv_all(void) +static void l2c_save(void __iomem *base) { - unsigned long flags; + l2x0_saved_regs.aux_ctrl = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); +} - /* invalidate all ways */ - raw_spin_lock_irqsave(&l2x0_lock, flags); - /* Invalidating when L2 is enabled is a nono */ - BUG_ON(readl(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN); - writel_relaxed(l2x0_way_mask, l2x0_base + L2X0_INV_WAY); - cache_wait_way(l2x0_base + L2X0_INV_WAY, l2x0_way_mask); - cache_sync(); - raw_spin_unlock_irqrestore(&l2x0_lock, flags); +/* + * L2C-210 specific code. + * + * The L2C-2x0 PA, set/way and sync operations are atomic, but we must + * ensure that no background operation is running. The way operations + * are all background tasks. + * + * While a background operation is in progress, any new operation is + * ignored (unspecified whether this causes an error.) Thankfully, not + * used on SMP. + * + * Never has a different sync register other than L2X0_CACHE_SYNC, but + * we use sync_reg_offset here so we can share some of this with L2C-310. + */ +static void __l2c210_cache_sync(void __iomem *base) +{ + writel_relaxed(0, base + sync_reg_offset); } -static void l2x0_inv_range(unsigned long start, unsigned long end) +static void __l2c210_op_pa_range(void __iomem *reg, unsigned long start, + unsigned long end) +{ + while (start < end) { + writel_relaxed(start, reg); + start += CACHE_LINE_SIZE; + } +} + +static void l2c210_inv_range(unsigned long start, unsigned long end) { void __iomem *base = l2x0_base; - unsigned long flags; - raw_spin_lock_irqsave(&l2x0_lock, flags); if (start & (CACHE_LINE_SIZE - 1)) { start &= ~(CACHE_LINE_SIZE - 1); - debug_writel(0x03); - l2x0_flush_line(start); - debug_writel(0x00); + writel_relaxed(start, base + L2X0_CLEAN_INV_LINE_PA); start += CACHE_LINE_SIZE; } if (end & (CACHE_LINE_SIZE - 1)) { end &= ~(CACHE_LINE_SIZE - 1); - debug_writel(0x03); - l2x0_flush_line(end); - debug_writel(0x00); + writel_relaxed(end, base + L2X0_CLEAN_INV_LINE_PA); } + __l2c210_op_pa_range(base + L2X0_INV_LINE_PA, start, end); + __l2c210_cache_sync(base); +} + +static void l2c210_clean_range(unsigned long start, unsigned long end) +{ + void __iomem *base = l2x0_base; + + start &= ~(CACHE_LINE_SIZE - 1); + __l2c210_op_pa_range(base + L2X0_CLEAN_LINE_PA, start, end); + __l2c210_cache_sync(base); +} + +static void l2c210_flush_range(unsigned long start, unsigned long end) +{ + void __iomem *base = l2x0_base; + + start &= ~(CACHE_LINE_SIZE - 1); + __l2c210_op_pa_range(base + L2X0_CLEAN_INV_LINE_PA, start, end); + __l2c210_cache_sync(base); +} + +static void l2c210_flush_all(void) +{ + void __iomem *base = l2x0_base; + + BUG_ON(!irqs_disabled()); + + __l2c_op_way(base + L2X0_CLEAN_INV_WAY); + __l2c210_cache_sync(base); +} + +static void l2c210_sync(void) +{ + __l2c210_cache_sync(l2x0_base); +} + +static void l2c210_resume(void) +{ + void __iomem *base = l2x0_base; + + if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) + l2c_enable(base, l2x0_saved_regs.aux_ctrl, 1); +} + +static const struct l2c_init_data l2c210_data __initconst = { + .type = "L2C-210", + .way_size_0 = SZ_8K, + .num_lock = 1, + .enable = l2c_enable, + .save = l2c_save, + .outer_cache = { + .inv_range = l2c210_inv_range, + .clean_range = l2c210_clean_range, + .flush_range = l2c210_flush_range, + .flush_all = l2c210_flush_all, + .disable = l2c_disable, + .sync = l2c210_sync, + .resume = l2c210_resume, + }, +}; + +/* + * L2C-220 specific code. + * + * All operations are background operations: they have to be waited for. + * Conflicting requests generate a slave error (which will cause an + * imprecise abort.) Never uses sync_reg_offset, so we hard-code the + * sync register here. + * + * However, we can re-use the l2c210_resume call. + */ +static inline void __l2c220_cache_sync(void __iomem *base) +{ + writel_relaxed(0, base + L2X0_CACHE_SYNC); + l2c_wait_mask(base + L2X0_CACHE_SYNC, 1); +} + +static void l2c220_op_way(void __iomem *base, unsigned reg) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&l2x0_lock, flags); + __l2c_op_way(base + reg); + __l2c220_cache_sync(base); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static unsigned long l2c220_op_pa_range(void __iomem *reg, unsigned long start, + unsigned long end, unsigned long flags) +{ + raw_spinlock_t *lock = &l2x0_lock; + while (start < end) { unsigned long blk_end = start + min(end - start, 4096UL); while (start < blk_end) { - l2x0_inv_line(start); + l2c_wait_mask(reg, 1); + writel_relaxed(start, reg); start += CACHE_LINE_SIZE; } if (blk_end < end) { - raw_spin_unlock_irqrestore(&l2x0_lock, flags); - raw_spin_lock_irqsave(&l2x0_lock, flags); + raw_spin_unlock_irqrestore(lock, flags); + raw_spin_lock_irqsave(lock, flags); } } - cache_wait(base + L2X0_INV_LINE_PA, 1); - cache_sync(); - raw_spin_unlock_irqrestore(&l2x0_lock, flags); + + return flags; } -static void l2x0_clean_range(unsigned long start, unsigned long end) +static void l2c220_inv_range(unsigned long start, unsigned long end) { void __iomem *base = l2x0_base; unsigned long flags; - if ((end - start) >= l2x0_size) { - l2x0_clean_all(); - return; - } - raw_spin_lock_irqsave(&l2x0_lock, flags); - start &= ~(CACHE_LINE_SIZE - 1); - while (start < end) { - unsigned long blk_end = start + min(end - start, 4096UL); - - while (start < blk_end) { - l2x0_clean_line(start); + if ((start | end) & (CACHE_LINE_SIZE - 1)) { + if (start & (CACHE_LINE_SIZE - 1)) { + start &= ~(CACHE_LINE_SIZE - 1); + writel_relaxed(start, base + L2X0_CLEAN_INV_LINE_PA); start += CACHE_LINE_SIZE; } - if (blk_end < end) { - raw_spin_unlock_irqrestore(&l2x0_lock, flags); - raw_spin_lock_irqsave(&l2x0_lock, flags); + if (end & (CACHE_LINE_SIZE - 1)) { + end &= ~(CACHE_LINE_SIZE - 1); + l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1); + writel_relaxed(end, base + L2X0_CLEAN_INV_LINE_PA); } } - cache_wait(base + L2X0_CLEAN_LINE_PA, 1); - cache_sync(); + + flags = l2c220_op_pa_range(base + L2X0_INV_LINE_PA, + start, end, flags); + l2c_wait_mask(base + L2X0_INV_LINE_PA, 1); + __l2c220_cache_sync(base); raw_spin_unlock_irqrestore(&l2x0_lock, flags); } -static void l2x0_flush_range(unsigned long start, unsigned long end) +static void l2c220_clean_range(unsigned long start, unsigned long end) { void __iomem *base = l2x0_base; unsigned long flags; + start &= ~(CACHE_LINE_SIZE - 1); if ((end - start) >= l2x0_size) { - l2x0_flush_all(); + l2c220_op_way(base, L2X0_CLEAN_WAY); return; } raw_spin_lock_irqsave(&l2x0_lock, flags); + flags = l2c220_op_pa_range(base + L2X0_CLEAN_LINE_PA, + start, end, flags); + l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1); + __l2c220_cache_sync(base); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static void l2c220_flush_range(unsigned long start, unsigned long end) +{ + void __iomem *base = l2x0_base; + unsigned long flags; + start &= ~(CACHE_LINE_SIZE - 1); + if ((end - start) >= l2x0_size) { + l2c220_op_way(base, L2X0_CLEAN_INV_WAY); + return; + } + + raw_spin_lock_irqsave(&l2x0_lock, flags); + flags = l2c220_op_pa_range(base + L2X0_CLEAN_INV_LINE_PA, + start, end, flags); + l2c_wait_mask(base + L2X0_CLEAN_INV_LINE_PA, 1); + __l2c220_cache_sync(base); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static void l2c220_flush_all(void) +{ + l2c220_op_way(l2x0_base, L2X0_CLEAN_INV_WAY); +} + +static void l2c220_sync(void) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&l2x0_lock, flags); + __l2c220_cache_sync(l2x0_base); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); +} + +static void l2c220_enable(void __iomem *base, u32 aux, unsigned num_lock) +{ + /* + * Always enable non-secure access to the lockdown registers - + * we write to them as part of the L2C enable sequence so they + * need to be accessible. + */ + aux |= L220_AUX_CTRL_NS_LOCKDOWN; + + l2c_enable(base, aux, num_lock); +} + +static const struct l2c_init_data l2c220_data = { + .type = "L2C-220", + .way_size_0 = SZ_8K, + .num_lock = 1, + .enable = l2c220_enable, + .save = l2c_save, + .outer_cache = { + .inv_range = l2c220_inv_range, + .clean_range = l2c220_clean_range, + .flush_range = l2c220_flush_range, + .flush_all = l2c220_flush_all, + .disable = l2c_disable, + .sync = l2c220_sync, + .resume = l2c210_resume, + }, +}; + +/* + * L2C-310 specific code. + * + * Very similar to L2C-210, the PA, set/way and sync operations are atomic, + * and the way operations are all background tasks. However, issuing an + * operation while a background operation is in progress results in a + * SLVERR response. We can reuse: + * + * __l2c210_cache_sync (using sync_reg_offset) + * l2c210_sync + * l2c210_inv_range (if 588369 is not applicable) + * l2c210_clean_range + * l2c210_flush_range (if 588369 is not applicable) + * l2c210_flush_all (if 727915 is not applicable) + * + * Errata: + * 588369: PL310 R0P0->R1P0, fixed R2P0. + * Affects: all clean+invalidate operations + * clean and invalidate skips the invalidate step, so we need to issue + * separate operations. We also require the above debug workaround + * enclosing this code fragment on affected parts. On unaffected parts, + * we must not use this workaround without the debug register writes + * to avoid exposing a problem similar to 727915. + * + * 727915: PL310 R2P0->R3P0, fixed R3P1. + * Affects: clean+invalidate by way + * clean and invalidate by way runs in the background, and a store can + * hit the line between the clean operation and invalidate operation, + * resulting in the store being lost. + * + * 752271: PL310 R3P0->R3P1-50REL0, fixed R3P2. + * Affects: 8x64-bit (double fill) line fetches + * double fill line fetches can fail to cause dirty data to be evicted + * from the cache before the new data overwrites the second line. + * + * 753970: PL310 R3P0, fixed R3P1. + * Affects: sync + * prevents merging writes after the sync operation, until another L2C + * operation is performed (or a number of other conditions.) + * + * 769419: PL310 R0P0->R3P1, fixed R3P2. + * Affects: store buffer + * store buffer is not automatically drained. + */ +static void l2c310_inv_range_erratum(unsigned long start, unsigned long end) +{ + void __iomem *base = l2x0_base; + + if ((start | end) & (CACHE_LINE_SIZE - 1)) { + unsigned long flags; + + /* Erratum 588369 for both clean+invalidate operations */ + raw_spin_lock_irqsave(&l2x0_lock, flags); + l2c_set_debug(base, 0x03); + + if (start & (CACHE_LINE_SIZE - 1)) { + start &= ~(CACHE_LINE_SIZE - 1); + writel_relaxed(start, base + L2X0_CLEAN_LINE_PA); + writel_relaxed(start, base + L2X0_INV_LINE_PA); + start += CACHE_LINE_SIZE; + } + + if (end & (CACHE_LINE_SIZE - 1)) { + end &= ~(CACHE_LINE_SIZE - 1); + writel_relaxed(end, base + L2X0_CLEAN_LINE_PA); + writel_relaxed(end, base + L2X0_INV_LINE_PA); + } + + l2c_set_debug(base, 0x00); + raw_spin_unlock_irqrestore(&l2x0_lock, flags); + } + + __l2c210_op_pa_range(base + L2X0_INV_LINE_PA, start, end); + __l2c210_cache_sync(base); +} + +static void l2c310_flush_range_erratum(unsigned long start, unsigned long end) +{ + raw_spinlock_t *lock = &l2x0_lock; + unsigned long flags; + void __iomem *base = l2x0_base; + + raw_spin_lock_irqsave(lock, flags); while (start < end) { unsigned long blk_end = start + min(end - start, 4096UL); - debug_writel(0x03); + l2c_set_debug(base, 0x03); while (start < blk_end) { - l2x0_flush_line(start); + writel_relaxed(start, base + L2X0_CLEAN_LINE_PA); + writel_relaxed(start, base + L2X0_INV_LINE_PA); start += CACHE_LINE_SIZE; } - debug_writel(0x00); + l2c_set_debug(base, 0x00); if (blk_end < end) { - raw_spin_unlock_irqrestore(&l2x0_lock, flags); - raw_spin_lock_irqsave(&l2x0_lock, flags); + raw_spin_unlock_irqrestore(lock, flags); + raw_spin_lock_irqsave(lock, flags); } } - cache_wait(base + L2X0_CLEAN_INV_LINE_PA, 1); - cache_sync(); - raw_spin_unlock_irqrestore(&l2x0_lock, flags); + raw_spin_unlock_irqrestore(lock, flags); + __l2c210_cache_sync(base); } -static void l2x0_disable(void) +static void l2c310_flush_all_erratum(void) { + void __iomem *base = l2x0_base; unsigned long flags; raw_spin_lock_irqsave(&l2x0_lock, flags); - __l2x0_flush_all(); - writel_relaxed(0, l2x0_base + L2X0_CTRL); - dsb(st); + l2c_set_debug(base, 0x03); + __l2c_op_way(base + L2X0_CLEAN_INV_WAY); + l2c_set_debug(base, 0x00); + __l2c210_cache_sync(base); raw_spin_unlock_irqrestore(&l2x0_lock, flags); } -static void l2x0_unlock(u32 cache_id) +static void __init l2c310_save(void __iomem *base) { - int lockregs; - int i; + unsigned revision; - switch (cache_id & L2X0_CACHE_ID_PART_MASK) { - case L2X0_CACHE_ID_PART_L310: - lockregs = 8; - break; - case AURORA_CACHE_ID: - lockregs = 4; + l2c_save(base); + + l2x0_saved_regs.tag_latency = readl_relaxed(base + + L310_TAG_LATENCY_CTRL); + l2x0_saved_regs.data_latency = readl_relaxed(base + + L310_DATA_LATENCY_CTRL); + l2x0_saved_regs.filter_end = readl_relaxed(base + + L310_ADDR_FILTER_END); + l2x0_saved_regs.filter_start = readl_relaxed(base + + L310_ADDR_FILTER_START); + + revision = readl_relaxed(base + L2X0_CACHE_ID) & + L2X0_CACHE_ID_RTL_MASK; + + /* From r2p0, there is Prefetch offset/control register */ + if (revision >= L310_CACHE_ID_RTL_R2P0) + l2x0_saved_regs.prefetch_ctrl = readl_relaxed(base + + L310_PREFETCH_CTRL); + + /* From r3p0, there is Power control register */ + if (revision >= L310_CACHE_ID_RTL_R3P0) + l2x0_saved_regs.pwr_ctrl = readl_relaxed(base + + L310_POWER_CTRL); +} + +static void l2c310_resume(void) +{ + void __iomem *base = l2x0_base; + + if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) { + unsigned revision; + + /* restore pl310 setup */ + writel_relaxed(l2x0_saved_regs.tag_latency, + base + L310_TAG_LATENCY_CTRL); + writel_relaxed(l2x0_saved_regs.data_latency, + base + L310_DATA_LATENCY_CTRL); + writel_relaxed(l2x0_saved_regs.filter_end, + base + L310_ADDR_FILTER_END); + writel_relaxed(l2x0_saved_regs.filter_start, + base + L310_ADDR_FILTER_START); + + revision = readl_relaxed(base + L2X0_CACHE_ID) & + L2X0_CACHE_ID_RTL_MASK; + + if (revision >= L310_CACHE_ID_RTL_R2P0) + l2c_write_sec(l2x0_saved_regs.prefetch_ctrl, base, + L310_PREFETCH_CTRL); + if (revision >= L310_CACHE_ID_RTL_R3P0) + l2c_write_sec(l2x0_saved_regs.pwr_ctrl, base, + L310_POWER_CTRL); + + l2c_enable(base, l2x0_saved_regs.aux_ctrl, 8); + + /* Re-enable full-line-of-zeros for Cortex-A9 */ + if (l2x0_saved_regs.aux_ctrl & L310_AUX_CTRL_FULL_LINE_ZERO) + set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1)); + } +} + +static int l2c310_cpu_enable_flz(struct notifier_block *nb, unsigned long act, void *data) +{ + switch (act & ~CPU_TASKS_FROZEN) { + case CPU_STARTING: + set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1)); break; - default: - /* L210 and unknown types */ - lockregs = 1; + case CPU_DYING: + set_auxcr(get_auxcr() & ~(BIT(3) | BIT(2) | BIT(1))); break; } + return NOTIFY_OK; +} - for (i = 0; i < lockregs; i++) { - writel_relaxed(0x0, l2x0_base + L2X0_LOCKDOWN_WAY_D_BASE + - i * L2X0_LOCKDOWN_STRIDE); - writel_relaxed(0x0, l2x0_base + L2X0_LOCKDOWN_WAY_I_BASE + - i * L2X0_LOCKDOWN_STRIDE); +static void __init l2c310_enable(void __iomem *base, u32 aux, unsigned num_lock) +{ + unsigned rev = readl_relaxed(base + L2X0_CACHE_ID) & L2X0_CACHE_ID_PART_MASK; + bool cortex_a9 = read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9; + + if (rev >= L310_CACHE_ID_RTL_R2P0) { + if (cortex_a9) { + aux |= L310_AUX_CTRL_EARLY_BRESP; + pr_info("L2C-310 enabling early BRESP for Cortex-A9\n"); + } else if (aux & L310_AUX_CTRL_EARLY_BRESP) { + pr_warn("L2C-310 early BRESP only supported with Cortex-A9\n"); + aux &= ~L310_AUX_CTRL_EARLY_BRESP; + } + } + + if (cortex_a9) { + u32 aux_cur = readl_relaxed(base + L2X0_AUX_CTRL); + u32 acr = get_auxcr(); + + pr_debug("Cortex-A9 ACR=0x%08x\n", acr); + + if (acr & BIT(3) && !(aux_cur & L310_AUX_CTRL_FULL_LINE_ZERO)) + pr_err("L2C-310: full line of zeros enabled in Cortex-A9 but not L2C-310 - invalid\n"); + + if (aux & L310_AUX_CTRL_FULL_LINE_ZERO && !(acr & BIT(3))) + pr_err("L2C-310: enabling full line of zeros but not enabled in Cortex-A9\n"); + + if (!(aux & L310_AUX_CTRL_FULL_LINE_ZERO) && !outer_cache.write_sec) { + aux |= L310_AUX_CTRL_FULL_LINE_ZERO; + pr_info("L2C-310 full line of zeros enabled for Cortex-A9\n"); + } + } else if (aux & (L310_AUX_CTRL_FULL_LINE_ZERO | L310_AUX_CTRL_EARLY_BRESP)) { + pr_err("L2C-310: disabling Cortex-A9 specific feature bits\n"); + aux &= ~(L310_AUX_CTRL_FULL_LINE_ZERO | L310_AUX_CTRL_EARLY_BRESP); + } + + if (aux & (L310_AUX_CTRL_DATA_PREFETCH | L310_AUX_CTRL_INSTR_PREFETCH)) { + u32 prefetch = readl_relaxed(base + L310_PREFETCH_CTRL); + + pr_info("L2C-310 %s%s prefetch enabled, offset %u lines\n", + aux & L310_AUX_CTRL_INSTR_PREFETCH ? "I" : "", + aux & L310_AUX_CTRL_DATA_PREFETCH ? "D" : "", + 1 + (prefetch & L310_PREFETCH_CTRL_OFFSET_MASK)); + } + + /* r3p0 or later has power control register */ + if (rev >= L310_CACHE_ID_RTL_R3P0) { + u32 power_ctrl; + + l2c_write_sec(L310_DYNAMIC_CLK_GATING_EN | L310_STNDBY_MODE_EN, + base, L310_POWER_CTRL); + power_ctrl = readl_relaxed(base + L310_POWER_CTRL); + pr_info("L2C-310 dynamic clock gating %sabled, standby mode %sabled\n", + power_ctrl & L310_DYNAMIC_CLK_GATING_EN ? "en" : "dis", + power_ctrl & L310_STNDBY_MODE_EN ? "en" : "dis"); + } + + /* + * Always enable non-secure access to the lockdown registers - + * we write to them as part of the L2C enable sequence so they + * need to be accessible. + */ + aux |= L310_AUX_CTRL_NS_LOCKDOWN; + + l2c_enable(base, aux, num_lock); + + if (aux & L310_AUX_CTRL_FULL_LINE_ZERO) { + set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1)); + cpu_notifier(l2c310_cpu_enable_flz, 0); } } -void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask) +static void __init l2c310_fixup(void __iomem *base, u32 cache_id, + struct outer_cache_fns *fns) { - u32 aux; - u32 cache_id; - u32 way_size = 0; - int ways; - int way_size_shift = L2X0_WAY_SIZE_SHIFT; - const char *type; + unsigned revision = cache_id & L2X0_CACHE_ID_RTL_MASK; + const char *errata[8]; + unsigned n = 0; + + if (IS_ENABLED(CONFIG_PL310_ERRATA_588369) && + revision < L310_CACHE_ID_RTL_R2P0 && + /* For bcm compatibility */ + fns->inv_range == l2c210_inv_range) { + fns->inv_range = l2c310_inv_range_erratum; + fns->flush_range = l2c310_flush_range_erratum; + errata[n++] = "588369"; + } - l2x0_base = base; - if (cache_id_part_number_from_dt) - cache_id = cache_id_part_number_from_dt; - else - cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID); - aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); + if (IS_ENABLED(CONFIG_PL310_ERRATA_727915) && + revision >= L310_CACHE_ID_RTL_R2P0 && + revision < L310_CACHE_ID_RTL_R3P1) { + fns->flush_all = l2c310_flush_all_erratum; + errata[n++] = "727915"; + } + + if (revision >= L310_CACHE_ID_RTL_R3P0 && + revision < L310_CACHE_ID_RTL_R3P2) { + u32 val = readl_relaxed(base + L310_PREFETCH_CTRL); + /* I don't think bit23 is required here... but iMX6 does so */ + if (val & (BIT(30) | BIT(23))) { + val &= ~(BIT(30) | BIT(23)); + l2c_write_sec(val, base, L310_PREFETCH_CTRL); + errata[n++] = "752271"; + } + } + + if (IS_ENABLED(CONFIG_PL310_ERRATA_753970) && + revision == L310_CACHE_ID_RTL_R3P0) { + sync_reg_offset = L2X0_DUMMY_REG; + errata[n++] = "753970"; + } + + if (IS_ENABLED(CONFIG_PL310_ERRATA_769419)) + errata[n++] = "769419"; + + if (n) { + unsigned i; + pr_info("L2C-310 errat%s", n > 1 ? "a" : "um"); + for (i = 0; i < n; i++) + pr_cont(" %s", errata[i]); + pr_cont(" enabled\n"); + } +} + +static void l2c310_disable(void) +{ + /* + * If full-line-of-zeros is enabled, we must first disable it in the + * Cortex-A9 auxiliary control register before disabling the L2 cache. + */ + if (l2x0_saved_regs.aux_ctrl & L310_AUX_CTRL_FULL_LINE_ZERO) + set_auxcr(get_auxcr() & ~(BIT(3) | BIT(2) | BIT(1))); + + l2c_disable(); +} + +static const struct l2c_init_data l2c310_init_fns __initconst = { + .type = "L2C-310", + .way_size_0 = SZ_8K, + .num_lock = 8, + .enable = l2c310_enable, + .fixup = l2c310_fixup, + .save = l2c310_save, + .outer_cache = { + .inv_range = l2c210_inv_range, + .clean_range = l2c210_clean_range, + .flush_range = l2c210_flush_range, + .flush_all = l2c210_flush_all, + .disable = l2c310_disable, + .sync = l2c210_sync, + .resume = l2c310_resume, + }, +}; + +static void __init __l2c_init(const struct l2c_init_data *data, + u32 aux_val, u32 aux_mask, u32 cache_id) +{ + struct outer_cache_fns fns; + unsigned way_size_bits, ways; + u32 aux, old_aux; + + /* + * Sanity check the aux values. aux_mask is the bits we preserve + * from reading the hardware register, and aux_val is the bits we + * set. + */ + if (aux_val & aux_mask) + pr_alert("L2C: platform provided aux values permit register corruption.\n"); + + old_aux = aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); aux &= aux_mask; aux |= aux_val; + if (old_aux != aux) + pr_warn("L2C: DT/platform modifies aux control register: 0x%08x -> 0x%08x\n", + old_aux, aux); + /* Determine the number of ways */ switch (cache_id & L2X0_CACHE_ID_PART_MASK) { case L2X0_CACHE_ID_PART_L310: + if ((aux_val | ~aux_mask) & (L2C_AUX_CTRL_WAY_SIZE_MASK | L310_AUX_CTRL_ASSOCIATIVITY_16)) + pr_warn("L2C: DT/platform tries to modify or specify cache size\n"); if (aux & (1 << 16)) ways = 16; else ways = 8; - type = "L310"; -#ifdef CONFIG_PL310_ERRATA_753970 - /* Unmapped register. */ - sync_reg_offset = L2X0_DUMMY_REG; -#endif - if ((cache_id & L2X0_CACHE_ID_RTL_MASK) <= L2X0_CACHE_ID_RTL_R3P0) - outer_cache.set_debug = pl310_set_debug; break; + case L2X0_CACHE_ID_PART_L210: + case L2X0_CACHE_ID_PART_L220: ways = (aux >> 13) & 0xf; - type = "L210"; break; case AURORA_CACHE_ID: - sync_reg_offset = AURORA_SYNC_REG; ways = (aux >> 13) & 0xf; ways = 2 << ((ways + 1) >> 2); - way_size_shift = AURORA_WAY_SIZE_SHIFT; - type = "Aurora"; break; + default: /* Assume unknown chips have 8 ways */ ways = 8; - type = "L2x0 series"; break; } l2x0_way_mask = (1 << ways) - 1; /* - * L2 cache Size = Way size * Number of ways + * way_size_0 is the size that a way_size value of zero would be + * given the calculation: way_size = way_size_0 << way_size_bits. + * So, if way_size_bits=0 is reserved, but way_size_bits=1 is 16k, + * then way_size_0 would be 8k. + * + * L2 cache size = number of ways * way size. */ - way_size = (aux & L2X0_AUX_CTRL_WAY_SIZE_MASK) >> 17; - way_size = 1 << (way_size + way_size_shift); + way_size_bits = (aux & L2C_AUX_CTRL_WAY_SIZE_MASK) >> + L2C_AUX_CTRL_WAY_SIZE_SHIFT; + l2x0_size = ways * (data->way_size_0 << way_size_bits); - l2x0_size = ways * way_size * SZ_1K; + fns = data->outer_cache; + fns.write_sec = outer_cache.write_sec; + if (data->fixup) + data->fixup(l2x0_base, cache_id, &fns); /* - * Check if l2x0 controller is already enabled. - * If you are booting from non-secure mode - * accessing the below registers will fault. + * Check if l2x0 controller is already enabled. If we are booting + * in non-secure mode accessing the below registers will fault. */ - if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) { - /* Make sure that I&D is not locked down when starting */ - l2x0_unlock(cache_id); + if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) + data->enable(l2x0_base, aux, data->num_lock); - /* l2x0 controller is disabled */ - writel_relaxed(aux, l2x0_base + L2X0_AUX_CTRL); + outer_cache = fns; - l2x0_inv_all(); - - /* enable L2X0 */ - writel_relaxed(L2X0_CTRL_EN, l2x0_base + L2X0_CTRL); - } + /* + * It is strange to save the register state before initialisation, + * but hey, this is what the DT implementations decided to do. + */ + if (data->save) + data->save(l2x0_base); /* Re-read it in case some bits are reserved. */ aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); - /* Save the value for resuming. */ - l2x0_saved_regs.aux_ctrl = aux; + pr_info("%s cache controller enabled, %d ways, %d kB\n", + data->type, ways, l2x0_size >> 10); + pr_info("%s: CACHE_ID 0x%08x, AUX_CTRL 0x%08x\n", + data->type, cache_id, aux); +} + +void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask) +{ + const struct l2c_init_data *data; + u32 cache_id; + + l2x0_base = base; + + cache_id = readl_relaxed(base + L2X0_CACHE_ID); + + switch (cache_id & L2X0_CACHE_ID_PART_MASK) { + default: + case L2X0_CACHE_ID_PART_L210: + data = &l2c210_data; + break; - if (!of_init) { - outer_cache.inv_range = l2x0_inv_range; - outer_cache.clean_range = l2x0_clean_range; - outer_cache.flush_range = l2x0_flush_range; - outer_cache.sync = l2x0_cache_sync; - outer_cache.flush_all = l2x0_flush_all; - outer_cache.inv_all = l2x0_inv_all; - outer_cache.disable = l2x0_disable; + case L2X0_CACHE_ID_PART_L220: + data = &l2c220_data; + break; + + case L2X0_CACHE_ID_PART_L310: + data = &l2c310_init_fns; + break; } - pr_info("%s cache controller enabled\n", type); - pr_info("l2x0: %d ways, CACHE_ID 0x%08x, AUX_CTRL 0x%08x, Cache size: %d kB\n", - ways, cache_id, aux, l2x0_size >> 10); + __l2c_init(data, aux_val, aux_mask, cache_id); } #ifdef CONFIG_OF static int l2_wt_override; +/* Aurora don't have the cache ID register available, so we have to + * pass it though the device tree */ +static u32 cache_id_part_number_from_dt; + +static void __init l2x0_of_parse(const struct device_node *np, + u32 *aux_val, u32 *aux_mask) +{ + u32 data[2] = { 0, 0 }; + u32 tag = 0; + u32 dirty = 0; + u32 val = 0, mask = 0; + + of_property_read_u32(np, "arm,tag-latency", &tag); + if (tag) { + mask |= L2X0_AUX_CTRL_TAG_LATENCY_MASK; + val |= (tag - 1) << L2X0_AUX_CTRL_TAG_LATENCY_SHIFT; + } + + of_property_read_u32_array(np, "arm,data-latency", + data, ARRAY_SIZE(data)); + if (data[0] && data[1]) { + mask |= L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK | + L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK; + val |= ((data[0] - 1) << L2X0_AUX_CTRL_DATA_RD_LATENCY_SHIFT) | + ((data[1] - 1) << L2X0_AUX_CTRL_DATA_WR_LATENCY_SHIFT); + } + + of_property_read_u32(np, "arm,dirty-latency", &dirty); + if (dirty) { + mask |= L2X0_AUX_CTRL_DIRTY_LATENCY_MASK; + val |= (dirty - 1) << L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT; + } + + *aux_val &= ~mask; + *aux_val |= val; + *aux_mask &= ~mask; +} + +static const struct l2c_init_data of_l2c210_data __initconst = { + .type = "L2C-210", + .way_size_0 = SZ_8K, + .num_lock = 1, + .of_parse = l2x0_of_parse, + .enable = l2c_enable, + .save = l2c_save, + .outer_cache = { + .inv_range = l2c210_inv_range, + .clean_range = l2c210_clean_range, + .flush_range = l2c210_flush_range, + .flush_all = l2c210_flush_all, + .disable = l2c_disable, + .sync = l2c210_sync, + .resume = l2c210_resume, + }, +}; + +static const struct l2c_init_data of_l2c220_data __initconst = { + .type = "L2C-220", + .way_size_0 = SZ_8K, + .num_lock = 1, + .of_parse = l2x0_of_parse, + .enable = l2c220_enable, + .save = l2c_save, + .outer_cache = { + .inv_range = l2c220_inv_range, + .clean_range = l2c220_clean_range, + .flush_range = l2c220_flush_range, + .flush_all = l2c220_flush_all, + .disable = l2c_disable, + .sync = l2c220_sync, + .resume = l2c210_resume, + }, +}; + +static void __init l2c310_of_parse(const struct device_node *np, + u32 *aux_val, u32 *aux_mask) +{ + u32 data[3] = { 0, 0, 0 }; + u32 tag[3] = { 0, 0, 0 }; + u32 filter[2] = { 0, 0 }; + + of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag)); + if (tag[0] && tag[1] && tag[2]) + writel_relaxed( + L310_LATENCY_CTRL_RD(tag[0] - 1) | + L310_LATENCY_CTRL_WR(tag[1] - 1) | + L310_LATENCY_CTRL_SETUP(tag[2] - 1), + l2x0_base + L310_TAG_LATENCY_CTRL); + + of_property_read_u32_array(np, "arm,data-latency", + data, ARRAY_SIZE(data)); + if (data[0] && data[1] && data[2]) + writel_relaxed( + L310_LATENCY_CTRL_RD(data[0] - 1) | + L310_LATENCY_CTRL_WR(data[1] - 1) | + L310_LATENCY_CTRL_SETUP(data[2] - 1), + l2x0_base + L310_DATA_LATENCY_CTRL); + + of_property_read_u32_array(np, "arm,filter-ranges", + filter, ARRAY_SIZE(filter)); + if (filter[1]) { + writel_relaxed(ALIGN(filter[0] + filter[1], SZ_1M), + l2x0_base + L310_ADDR_FILTER_END); + writel_relaxed((filter[0] & ~(SZ_1M - 1)) | L310_ADDR_FILTER_EN, + l2x0_base + L310_ADDR_FILTER_START); + } +} + +static const struct l2c_init_data of_l2c310_data __initconst = { + .type = "L2C-310", + .way_size_0 = SZ_8K, + .num_lock = 8, + .of_parse = l2c310_of_parse, + .enable = l2c310_enable, + .fixup = l2c310_fixup, + .save = l2c310_save, + .outer_cache = { + .inv_range = l2c210_inv_range, + .clean_range = l2c210_clean_range, + .flush_range = l2c210_flush_range, + .flush_all = l2c210_flush_all, + .disable = l2c310_disable, + .sync = l2c210_sync, + .resume = l2c310_resume, + }, +}; + /* * Note that the end addresses passed to Linux primitives are * noninclusive, while the hardware cache range operations use @@ -524,6 +1166,100 @@ static void aurora_flush_range(unsigned long start, unsigned long end) } } +static void aurora_save(void __iomem *base) +{ + l2x0_saved_regs.ctrl = readl_relaxed(base + L2X0_CTRL); + l2x0_saved_regs.aux_ctrl = readl_relaxed(base + L2X0_AUX_CTRL); +} + +static void aurora_resume(void) +{ + void __iomem *base = l2x0_base; + + if (!(readl(base + L2X0_CTRL) & L2X0_CTRL_EN)) { + writel_relaxed(l2x0_saved_regs.aux_ctrl, base + L2X0_AUX_CTRL); + writel_relaxed(l2x0_saved_regs.ctrl, base + L2X0_CTRL); + } +} + +/* + * For Aurora cache in no outer mode, enable via the CP15 coprocessor + * broadcasting of cache commands to L2. + */ +static void __init aurora_enable_no_outer(void __iomem *base, u32 aux, + unsigned num_lock) +{ + u32 u; + + asm volatile("mrc p15, 1, %0, c15, c2, 0" : "=r" (u)); + u |= AURORA_CTRL_FW; /* Set the FW bit */ + asm volatile("mcr p15, 1, %0, c15, c2, 0" : : "r" (u)); + + isb(); + + l2c_enable(base, aux, num_lock); +} + +static void __init aurora_fixup(void __iomem *base, u32 cache_id, + struct outer_cache_fns *fns) +{ + sync_reg_offset = AURORA_SYNC_REG; +} + +static void __init aurora_of_parse(const struct device_node *np, + u32 *aux_val, u32 *aux_mask) +{ + u32 val = AURORA_ACR_REPLACEMENT_TYPE_SEMIPLRU; + u32 mask = AURORA_ACR_REPLACEMENT_MASK; + + of_property_read_u32(np, "cache-id-part", + &cache_id_part_number_from_dt); + + /* Determine and save the write policy */ + l2_wt_override = of_property_read_bool(np, "wt-override"); + + if (l2_wt_override) { + val |= AURORA_ACR_FORCE_WRITE_THRO_POLICY; + mask |= AURORA_ACR_FORCE_WRITE_POLICY_MASK; + } + + *aux_val &= ~mask; + *aux_val |= val; + *aux_mask &= ~mask; +} + +static const struct l2c_init_data of_aurora_with_outer_data __initconst = { + .type = "Aurora", + .way_size_0 = SZ_4K, + .num_lock = 4, + .of_parse = aurora_of_parse, + .enable = l2c_enable, + .fixup = aurora_fixup, + .save = aurora_save, + .outer_cache = { + .inv_range = aurora_inv_range, + .clean_range = aurora_clean_range, + .flush_range = aurora_flush_range, + .flush_all = l2x0_flush_all, + .disable = l2x0_disable, + .sync = l2x0_cache_sync, + .resume = aurora_resume, + }, +}; + +static const struct l2c_init_data of_aurora_no_outer_data __initconst = { + .type = "Aurora", + .way_size_0 = SZ_4K, + .num_lock = 4, + .of_parse = aurora_of_parse, + .enable = aurora_enable_no_outer, + .fixup = aurora_fixup, + .save = aurora_save, + .outer_cache = { + .resume = aurora_resume, + }, +}; + /* * For certain Broadcom SoCs, depending on the address range, different offsets * need to be added to the address before passing it to L2 for @@ -588,16 +1324,16 @@ static void bcm_inv_range(unsigned long start, unsigned long end) /* normal case, no cross section between start and end */ if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) { - l2x0_inv_range(new_start, new_end); + l2c210_inv_range(new_start, new_end); return; } /* They cross sections, so it can only be a cross from section * 2 to section 3 */ - l2x0_inv_range(new_start, + l2c210_inv_range(new_start, bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1)); - l2x0_inv_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR), + l2c210_inv_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR), new_end); } @@ -610,26 +1346,21 @@ static void bcm_clean_range(unsigned long start, unsigned long end) if (unlikely(end <= start)) return; - if ((end - start) >= l2x0_size) { - l2x0_clean_all(); - return; - } - new_start = bcm_l2_phys_addr(start); new_end = bcm_l2_phys_addr(end); /* normal case, no cross section between start and end */ if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) { - l2x0_clean_range(new_start, new_end); + l2c210_clean_range(new_start, new_end); return; } /* They cross sections, so it can only be a cross from section * 2 to section 3 */ - l2x0_clean_range(new_start, + l2c210_clean_range(new_start, bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1)); - l2x0_clean_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR), + l2c210_clean_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR), new_end); } @@ -643,7 +1374,7 @@ static void bcm_flush_range(unsigned long start, unsigned long end) return; if ((end - start) >= l2x0_size) { - l2x0_flush_all(); + outer_cache.flush_all(); return; } @@ -652,283 +1383,67 @@ static void bcm_flush_range(unsigned long start, unsigned long end) /* normal case, no cross section between start and end */ if (likely(bcm_addr_is_sys_emi(end) || !bcm_addr_is_sys_emi(start))) { - l2x0_flush_range(new_start, new_end); + l2c210_flush_range(new_start, new_end); return; } /* They cross sections, so it can only be a cross from section * 2 to section 3 */ - l2x0_flush_range(new_start, + l2c210_flush_range(new_start, bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR-1)); - l2x0_flush_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR), + l2c210_flush_range(bcm_l2_phys_addr(BCM_VC_EMI_SEC3_START_ADDR), new_end); } -static void __init l2x0_of_setup(const struct device_node *np, - u32 *aux_val, u32 *aux_mask) -{ - u32 data[2] = { 0, 0 }; - u32 tag = 0; - u32 dirty = 0; - u32 val = 0, mask = 0; - - of_property_read_u32(np, "arm,tag-latency", &tag); - if (tag) { - mask |= L2X0_AUX_CTRL_TAG_LATENCY_MASK; - val |= (tag - 1) << L2X0_AUX_CTRL_TAG_LATENCY_SHIFT; - } - - of_property_read_u32_array(np, "arm,data-latency", - data, ARRAY_SIZE(data)); - if (data[0] && data[1]) { - mask |= L2X0_AUX_CTRL_DATA_RD_LATENCY_MASK | - L2X0_AUX_CTRL_DATA_WR_LATENCY_MASK; - val |= ((data[0] - 1) << L2X0_AUX_CTRL_DATA_RD_LATENCY_SHIFT) | - ((data[1] - 1) << L2X0_AUX_CTRL_DATA_WR_LATENCY_SHIFT); - } - - of_property_read_u32(np, "arm,dirty-latency", &dirty); - if (dirty) { - mask |= L2X0_AUX_CTRL_DIRTY_LATENCY_MASK; - val |= (dirty - 1) << L2X0_AUX_CTRL_DIRTY_LATENCY_SHIFT; - } - - *aux_val &= ~mask; - *aux_val |= val; - *aux_mask &= ~mask; -} - -static void __init pl310_of_setup(const struct device_node *np, - u32 *aux_val, u32 *aux_mask) -{ - u32 data[3] = { 0, 0, 0 }; - u32 tag[3] = { 0, 0, 0 }; - u32 filter[2] = { 0, 0 }; - - of_property_read_u32_array(np, "arm,tag-latency", tag, ARRAY_SIZE(tag)); - if (tag[0] && tag[1] && tag[2]) - writel_relaxed( - ((tag[0] - 1) << L2X0_LATENCY_CTRL_RD_SHIFT) | - ((tag[1] - 1) << L2X0_LATENCY_CTRL_WR_SHIFT) | - ((tag[2] - 1) << L2X0_LATENCY_CTRL_SETUP_SHIFT), - l2x0_base + L2X0_TAG_LATENCY_CTRL); - - of_property_read_u32_array(np, "arm,data-latency", - data, ARRAY_SIZE(data)); - if (data[0] && data[1] && data[2]) - writel_relaxed( - ((data[0] - 1) << L2X0_LATENCY_CTRL_RD_SHIFT) | - ((data[1] - 1) << L2X0_LATENCY_CTRL_WR_SHIFT) | - ((data[2] - 1) << L2X0_LATENCY_CTRL_SETUP_SHIFT), - l2x0_base + L2X0_DATA_LATENCY_CTRL); - - of_property_read_u32_array(np, "arm,filter-ranges", - filter, ARRAY_SIZE(filter)); - if (filter[1]) { - writel_relaxed(ALIGN(filter[0] + filter[1], SZ_1M), - l2x0_base + L2X0_ADDR_FILTER_END); - writel_relaxed((filter[0] & ~(SZ_1M - 1)) | L2X0_ADDR_FILTER_EN, - l2x0_base + L2X0_ADDR_FILTER_START); - } -} - -static void __init pl310_save(void) -{ - u32 l2x0_revision = readl_relaxed(l2x0_base + L2X0_CACHE_ID) & - L2X0_CACHE_ID_RTL_MASK; - - l2x0_saved_regs.tag_latency = readl_relaxed(l2x0_base + - L2X0_TAG_LATENCY_CTRL); - l2x0_saved_regs.data_latency = readl_relaxed(l2x0_base + - L2X0_DATA_LATENCY_CTRL); - l2x0_saved_regs.filter_end = readl_relaxed(l2x0_base + - L2X0_ADDR_FILTER_END); - l2x0_saved_regs.filter_start = readl_relaxed(l2x0_base + - L2X0_ADDR_FILTER_START); - - if (l2x0_revision >= L2X0_CACHE_ID_RTL_R2P0) { - /* - * From r2p0, there is Prefetch offset/control register - */ - l2x0_saved_regs.prefetch_ctrl = readl_relaxed(l2x0_base + - L2X0_PREFETCH_CTRL); - /* - * From r3p0, there is Power control register - */ - if (l2x0_revision >= L2X0_CACHE_ID_RTL_R3P0) - l2x0_saved_regs.pwr_ctrl = readl_relaxed(l2x0_base + - L2X0_POWER_CTRL); - } -} +/* Broadcom L2C-310 start from ARMs R3P2 or later, and require no fixups */ +static const struct l2c_init_data of_bcm_l2x0_data __initconst = { + .type = "BCM-L2C-310", + .way_size_0 = SZ_8K, + .num_lock = 8, + .of_parse = l2c310_of_parse, + .enable = l2c310_enable, + .save = l2c310_save, + .outer_cache = { + .inv_range = bcm_inv_range, + .clean_range = bcm_clean_range, + .flush_range = bcm_flush_range, + .flush_all = l2c210_flush_all, + .disable = l2c310_disable, + .sync = l2c210_sync, + .resume = l2c310_resume, + }, +}; -static void aurora_save(void) +static void __init tauros3_save(void __iomem *base) { - l2x0_saved_regs.ctrl = readl_relaxed(l2x0_base + L2X0_CTRL); - l2x0_saved_regs.aux_ctrl = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); -} + l2c_save(base); -static void __init tauros3_save(void) -{ l2x0_saved_regs.aux2_ctrl = - readl_relaxed(l2x0_base + TAUROS3_AUX2_CTRL); + readl_relaxed(base + TAUROS3_AUX2_CTRL); l2x0_saved_regs.prefetch_ctrl = - readl_relaxed(l2x0_base + L2X0_PREFETCH_CTRL); -} - -static void l2x0_resume(void) -{ - if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) { - /* restore aux ctrl and enable l2 */ - l2x0_unlock(readl_relaxed(l2x0_base + L2X0_CACHE_ID)); - - writel_relaxed(l2x0_saved_regs.aux_ctrl, l2x0_base + - L2X0_AUX_CTRL); - - l2x0_inv_all(); - - writel_relaxed(L2X0_CTRL_EN, l2x0_base + L2X0_CTRL); - } -} - -static void pl310_resume(void) -{ - u32 l2x0_revision; - - if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) { - /* restore pl310 setup */ - writel_relaxed(l2x0_saved_regs.tag_latency, - l2x0_base + L2X0_TAG_LATENCY_CTRL); - writel_relaxed(l2x0_saved_regs.data_latency, - l2x0_base + L2X0_DATA_LATENCY_CTRL); - writel_relaxed(l2x0_saved_regs.filter_end, - l2x0_base + L2X0_ADDR_FILTER_END); - writel_relaxed(l2x0_saved_regs.filter_start, - l2x0_base + L2X0_ADDR_FILTER_START); - - l2x0_revision = readl_relaxed(l2x0_base + L2X0_CACHE_ID) & - L2X0_CACHE_ID_RTL_MASK; - - if (l2x0_revision >= L2X0_CACHE_ID_RTL_R2P0) { - writel_relaxed(l2x0_saved_regs.prefetch_ctrl, - l2x0_base + L2X0_PREFETCH_CTRL); - if (l2x0_revision >= L2X0_CACHE_ID_RTL_R3P0) - writel_relaxed(l2x0_saved_regs.pwr_ctrl, - l2x0_base + L2X0_POWER_CTRL); - } - } - - l2x0_resume(); -} - -static void aurora_resume(void) -{ - if (!(readl(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) { - writel_relaxed(l2x0_saved_regs.aux_ctrl, - l2x0_base + L2X0_AUX_CTRL); - writel_relaxed(l2x0_saved_regs.ctrl, l2x0_base + L2X0_CTRL); - } + readl_relaxed(base + L310_PREFETCH_CTRL); } static void tauros3_resume(void) { - if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) { + void __iomem *base = l2x0_base; + + if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) { writel_relaxed(l2x0_saved_regs.aux2_ctrl, - l2x0_base + TAUROS3_AUX2_CTRL); + base + TAUROS3_AUX2_CTRL); writel_relaxed(l2x0_saved_regs.prefetch_ctrl, - l2x0_base + L2X0_PREFETCH_CTRL); - } + base + L310_PREFETCH_CTRL); - l2x0_resume(); -} - -static void __init aurora_broadcast_l2_commands(void) -{ - __u32 u; - /* Enable Broadcasting of cache commands to L2*/ - __asm__ __volatile__("mrc p15, 1, %0, c15, c2, 0" : "=r"(u)); - u |= AURORA_CTRL_FW; /* Set the FW bit */ - __asm__ __volatile__("mcr p15, 1, %0, c15, c2, 0\n" : : "r"(u)); - isb(); -} - -static void __init aurora_of_setup(const struct device_node *np, - u32 *aux_val, u32 *aux_mask) -{ - u32 val = AURORA_ACR_REPLACEMENT_TYPE_SEMIPLRU; - u32 mask = AURORA_ACR_REPLACEMENT_MASK; - - of_property_read_u32(np, "cache-id-part", - &cache_id_part_number_from_dt); - - /* Determine and save the write policy */ - l2_wt_override = of_property_read_bool(np, "wt-override"); - - if (l2_wt_override) { - val |= AURORA_ACR_FORCE_WRITE_THRO_POLICY; - mask |= AURORA_ACR_FORCE_WRITE_POLICY_MASK; + l2c_enable(base, l2x0_saved_regs.aux_ctrl, 8); } - - *aux_val &= ~mask; - *aux_val |= val; - *aux_mask &= ~mask; } -static const struct l2x0_of_data pl310_data = { - .setup = pl310_of_setup, - .save = pl310_save, - .outer_cache = { - .resume = pl310_resume, - .inv_range = l2x0_inv_range, - .clean_range = l2x0_clean_range, - .flush_range = l2x0_flush_range, - .sync = l2x0_cache_sync, - .flush_all = l2x0_flush_all, - .inv_all = l2x0_inv_all, - .disable = l2x0_disable, - }, -}; - -static const struct l2x0_of_data l2x0_data = { - .setup = l2x0_of_setup, - .save = NULL, - .outer_cache = { - .resume = l2x0_resume, - .inv_range = l2x0_inv_range, - .clean_range = l2x0_clean_range, - .flush_range = l2x0_flush_range, - .sync = l2x0_cache_sync, - .flush_all = l2x0_flush_all, - .inv_all = l2x0_inv_all, - .disable = l2x0_disable, - }, -}; - -static const struct l2x0_of_data aurora_with_outer_data = { - .setup = aurora_of_setup, - .save = aurora_save, - .outer_cache = { - .resume = aurora_resume, - .inv_range = aurora_inv_range, - .clean_range = aurora_clean_range, - .flush_range = aurora_flush_range, - .sync = l2x0_cache_sync, - .flush_all = l2x0_flush_all, - .inv_all = l2x0_inv_all, - .disable = l2x0_disable, - }, -}; - -static const struct l2x0_of_data aurora_no_outer_data = { - .setup = aurora_of_setup, - .save = aurora_save, - .outer_cache = { - .resume = aurora_resume, - }, -}; - -static const struct l2x0_of_data tauros3_data = { - .setup = NULL, +static const struct l2c_init_data of_tauros3_data __initconst = { + .type = "Tauros3", + .way_size_0 = SZ_8K, + .num_lock = 8, + .enable = l2c_enable, .save = tauros3_save, /* Tauros3 broadcasts L1 cache operations to L2 */ .outer_cache = { @@ -936,43 +1451,26 @@ static const struct l2x0_of_data tauros3_data = { }, }; -static const struct l2x0_of_data bcm_l2x0_data = { - .setup = pl310_of_setup, - .save = pl310_save, - .outer_cache = { - .resume = pl310_resume, - .inv_range = bcm_inv_range, - .clean_range = bcm_clean_range, - .flush_range = bcm_flush_range, - .sync = l2x0_cache_sync, - .flush_all = l2x0_flush_all, - .inv_all = l2x0_inv_all, - .disable = l2x0_disable, - }, -}; - +#define L2C_ID(name, fns) { .compatible = name, .data = (void *)&fns } static const struct of_device_id l2x0_ids[] __initconst = { - { .compatible = "arm,l210-cache", .data = (void *)&l2x0_data }, - { .compatible = "arm,l220-cache", .data = (void *)&l2x0_data }, - { .compatible = "arm,pl310-cache", .data = (void *)&pl310_data }, - { .compatible = "bcm,bcm11351-a2-pl310-cache", /* deprecated name */ - .data = (void *)&bcm_l2x0_data}, - { .compatible = "brcm,bcm11351-a2-pl310-cache", - .data = (void *)&bcm_l2x0_data}, - { .compatible = "marvell,aurora-outer-cache", - .data = (void *)&aurora_with_outer_data}, - { .compatible = "marvell,aurora-system-cache", - .data = (void *)&aurora_no_outer_data}, - { .compatible = "marvell,tauros3-cache", - .data = (void *)&tauros3_data }, + L2C_ID("arm,l210-cache", of_l2c210_data), + L2C_ID("arm,l220-cache", of_l2c220_data), + L2C_ID("arm,pl310-cache", of_l2c310_data), + L2C_ID("brcm,bcm11351-a2-pl310-cache", of_bcm_l2x0_data), + L2C_ID("marvell,aurora-outer-cache", of_aurora_with_outer_data), + L2C_ID("marvell,aurora-system-cache", of_aurora_no_outer_data), + L2C_ID("marvell,tauros3-cache", of_tauros3_data), + /* Deprecated IDs */ + L2C_ID("bcm,bcm11351-a2-pl310-cache", of_bcm_l2x0_data), {} }; int __init l2x0_of_init(u32 aux_val, u32 aux_mask) { + const struct l2c_init_data *data; struct device_node *np; - const struct l2x0_of_data *data; struct resource res; + u32 cache_id, old_aux; np = of_find_matching_node(NULL, l2x0_ids); if (!np) @@ -989,23 +1487,29 @@ int __init l2x0_of_init(u32 aux_val, u32 aux_mask) data = of_match_node(l2x0_ids, np)->data; - /* L2 configuration can only be changed if the cache is disabled */ - if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) { - if (data->setup) - data->setup(np, &aux_val, &aux_mask); - - /* For aurora cache in no outer mode select the - * correct mode using the coprocessor*/ - if (data == &aurora_no_outer_data) - aurora_broadcast_l2_commands(); + old_aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); + if (old_aux != ((old_aux & aux_mask) | aux_val)) { + pr_warn("L2C: platform modifies aux control register: 0x%08x -> 0x%08x\n", + old_aux, (old_aux & aux_mask) | aux_val); + } else if (aux_mask != ~0U && aux_val != 0) { + pr_alert("L2C: platform provided aux values match the hardware, so have no effect. Please remove them.\n"); } - if (data->save) - data->save(); + /* All L2 caches are unified, so this property should be specified */ + if (!of_property_read_bool(np, "cache-unified")) + pr_err("L2C: device tree omits to specify unified cache\n"); + + /* L2 configuration can only be changed if the cache is disabled */ + if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) + if (data->of_parse) + data->of_parse(np, &aux_val, &aux_mask); + + if (cache_id_part_number_from_dt) + cache_id = cache_id_part_number_from_dt; + else + cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID); - of_init = true; - memcpy(&outer_cache, &data->outer_cache, sizeof(outer_cache)); - l2x0_init(l2x0_base, aux_val, aux_mask); + __l2c_init(data, aux_val, aux_mask, cache_id); return 0; } |