diff options
Diffstat (limited to 'arch/arc/mm')
-rw-r--r-- | arch/arc/mm/Makefile | 2 | ||||
-rw-r--r-- | arch/arc/mm/cache_arc700.c | 470 | ||||
-rw-r--r-- | arch/arc/mm/extable.c | 4 | ||||
-rw-r--r-- | arch/arc/mm/fault.c | 1 | ||||
-rw-r--r-- | arch/arc/mm/init.c | 3 | ||||
-rw-r--r-- | arch/arc/mm/ioremap.c | 2 | ||||
-rw-r--r-- | arch/arc/mm/mmap.c | 78 | ||||
-rw-r--r-- | arch/arc/mm/tlb.c | 47 |
8 files changed, 345 insertions, 262 deletions
diff --git a/arch/arc/mm/Makefile b/arch/arc/mm/Makefile index 168dc146a8f6..ac95cc239c1e 100644 --- a/arch/arc/mm/Makefile +++ b/arch/arc/mm/Makefile @@ -7,4 +7,4 @@ # obj-y := extable.o ioremap.o dma.o fault.o init.o -obj-y += tlb.o tlbex.o cache_arc700.o +obj-y += tlb.o tlbex.o cache_arc700.o mmap.o diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c index 88d617d84234..2f12bca8aef3 100644 --- a/arch/arc/mm/cache_arc700.c +++ b/arch/arc/mm/cache_arc700.c @@ -68,20 +68,11 @@ #include <linux/mmu_context.h> #include <linux/syscalls.h> #include <linux/uaccess.h> +#include <linux/pagemap.h> #include <asm/cacheflush.h> #include <asm/cachectl.h> #include <asm/setup.h> - -#ifdef CONFIG_ARC_HAS_ICACHE -static void __ic_line_inv_no_alias(unsigned long, int); -static void __ic_line_inv_2_alias(unsigned long, int); -static void __ic_line_inv_4_alias(unsigned long, int); - -/* Holds the ptr to flush routine, dependign on size due to aliasing issues */ -static void (*___flush_icache_rtn) (unsigned long, int); -#endif - char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len) { int n = 0; @@ -109,7 +100,7 @@ char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len) * the cpuinfo structure for later use. * No Validation done here, simply read/convert the BCRs */ -void __init read_decode_cache_bcr(void) +void __cpuinit read_decode_cache_bcr(void) { struct bcr_cache ibcr, dbcr; struct cpuinfo_arc_cache *p_ic, *p_dc; @@ -141,13 +132,14 @@ void __init read_decode_cache_bcr(void) * 3. Enable the Caches, setup default flush mode for D-Cache * 3. Calculate the SHMLBA used by user space */ -void __init arc_cache_init(void) +void __cpuinit arc_cache_init(void) { unsigned int temp; unsigned int cpu = smp_processor_id(); struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache; struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache; int way_pg_ratio = way_pg_ratio; + int dcache_does_alias; char str[256]; printk(arc_cache_mumbojumbo(0, str, sizeof(str))); @@ -171,30 +163,6 @@ void __init arc_cache_init(void) } #endif - - /* - * if Cache way size is <= page size then no aliasing exhibited - * otherwise ratio determines num of aliases. - * e.g. 32K I$, 2 way set assoc, 8k pg size - * way-sz = 32k/2 = 16k - * way-pg-ratio = 16k/8k = 2, so 2 aliases possible - * (meaning 1 line could be in 2 possible locations). - */ - way_pg_ratio = ic->sz / ARC_ICACHE_WAYS / PAGE_SIZE; - switch (way_pg_ratio) { - case 0: - case 1: - ___flush_icache_rtn = __ic_line_inv_no_alias; - break; - case 2: - ___flush_icache_rtn = __ic_line_inv_2_alias; - break; - case 4: - ___flush_icache_rtn = __ic_line_inv_4_alias; - break; - default: - panic("Unsupported I-Cache Sz\n"); - } #endif /* Enable/disable I-Cache */ @@ -218,9 +186,13 @@ chk_dc: panic("Cache H/W doesn't match kernel Config"); } + dcache_does_alias = (dc->sz / ARC_DCACHE_WAYS) > PAGE_SIZE; + /* check for D-Cache aliasing */ - if ((dc->sz / ARC_DCACHE_WAYS) > PAGE_SIZE) - panic("D$ aliasing not handled right now\n"); + if (dcache_does_alias && !cache_is_vipt_aliasing()) + panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); + else if (!dcache_does_alias && cache_is_vipt_aliasing()) + panic("Don't need CONFIG_ARC_CACHE_VIPT_ALIASING\n"); #endif /* Set the default Invalidate Mode to "simpy discard dirty lines" @@ -303,47 +275,57 @@ static inline void __dc_entire_op(const int cacheop) * Per Line Operation on D-Cache * Doesn't deal with type-of-op/IRQ-disabling/waiting-for-flush-to-complete * It's sole purpose is to help gcc generate ZOL + * (aliasing VIPT dcache flushing needs both vaddr and paddr) */ -static inline void __dc_line_loop(unsigned long start, unsigned long sz, - int aux_reg) +static inline void __dc_line_loop(unsigned long paddr, unsigned long vaddr, + unsigned long sz, const int aux_reg) { - int num_lines, slack; + int num_lines; /* Ensure we properly floor/ceil the non-line aligned/sized requests - * and have @start - aligned to cache line and integral @num_lines. + * and have @paddr - aligned to cache line and integral @num_lines. * This however can be avoided for page sized since: - * -@start will be cache-line aligned already (being page aligned) + * -@paddr will be cache-line aligned already (being page aligned) * -@sz will be integral multiple of line size (being page sized). */ if (!(__builtin_constant_p(sz) && sz == PAGE_SIZE)) { - slack = start & ~DCACHE_LINE_MASK; - sz += slack; - start -= slack; + sz += paddr & ~DCACHE_LINE_MASK; + paddr &= DCACHE_LINE_MASK; + vaddr &= DCACHE_LINE_MASK; } num_lines = DIV_ROUND_UP(sz, ARC_DCACHE_LINE_LEN); +#if (CONFIG_ARC_MMU_VER <= 2) + paddr |= (vaddr >> PAGE_SHIFT) & 0x1F; +#endif + while (num_lines-- > 0) { #if (CONFIG_ARC_MMU_VER > 2) /* * Just as for I$, in MMU v3, D$ ops also require * "tag" bits in DC_PTAG, "index" bits in FLDL,IVDL ops - * But we pass phy addr for both. This works since Linux - * doesn't support aliasing configs for D$, yet. - * Thus paddr is enough to provide both tag and index. */ - write_aux_reg(ARC_REG_DC_PTAG, start); + write_aux_reg(ARC_REG_DC_PTAG, paddr); + + write_aux_reg(aux_reg, vaddr); + vaddr += ARC_DCACHE_LINE_LEN; +#else + /* paddr contains stuffed vaddrs bits */ + write_aux_reg(aux_reg, paddr); #endif - write_aux_reg(aux_reg, start); - start += ARC_DCACHE_LINE_LEN; + paddr += ARC_DCACHE_LINE_LEN; } } +/* For kernel mappings cache operation: index is same as paddr */ +#define __dc_line_op_k(p, sz, op) __dc_line_op(p, p, sz, op) + /* * D-Cache : Per Line INV (discard or wback+discard) or FLUSH (wback) */ -static inline void __dc_line_op(unsigned long start, unsigned long sz, - const int cacheop) +static inline void __dc_line_op(unsigned long paddr, unsigned long vaddr, + unsigned long sz, const int cacheop) { unsigned long flags, tmp = tmp; int aux; @@ -366,7 +348,7 @@ static inline void __dc_line_op(unsigned long start, unsigned long sz, else aux = ARC_REG_DC_FLDL; - __dc_line_loop(start, sz, aux); + __dc_line_loop(paddr, vaddr, sz, aux); if (cacheop & OP_FLUSH) /* flush / flush-n-inv both wait */ wait_for_flush(); @@ -381,7 +363,8 @@ static inline void __dc_line_op(unsigned long start, unsigned long sz, #else #define __dc_entire_op(cacheop) -#define __dc_line_op(start, sz, cacheop) +#define __dc_line_op(paddr, vaddr, sz, cacheop) +#define __dc_line_op_k(paddr, sz, cacheop) #endif /* CONFIG_ARC_HAS_DCACHE */ @@ -391,75 +374,38 @@ static inline void __dc_line_op(unsigned long start, unsigned long sz, /* * I-Cache Aliasing in ARC700 VIPT caches * - * For fetching code from I$, ARC700 uses vaddr (embedded in program code) - * to "index" into SET of cache-line and paddr from MMU to match the TAG - * in the WAYS of SET. - * - * However the CDU iterface (to flush/inv) lines from software, only takes - * paddr (to have simpler hardware interface). For simpler cases, using paddr - * alone suffices. - * e.g. 2-way-set-assoc, 16K I$ (8k MMU pg sz, 32b cache line size): - * way_sz = cache_sz / num_ways = 16k/2 = 8k - * num_sets = way_sz / line_sz = 8k/32 = 256 => 8 bits - * Ignoring the bottom 5 bits corresp to the off within a 32b cacheline, - * bits req for calc set-index = bits 12:5 (0 based). Since this range fits - * inside the bottom 13 bits of paddr, which are same for vaddr and paddr - * (with 8k pg sz), paddr alone can be safely used by CDU to unambigously - * locate a cache-line. + * ARC VIPT I-cache uses vaddr to index into cache and paddr to match the tag. + * The orig Cache Management Module "CDU" only required paddr to invalidate a + * certain line since it sufficed as index in Non-Aliasing VIPT cache-geometry. + * Infact for distinct V1,V2,P: all of {V1-P},{V2-P},{P-P} would end up fetching + * the exact same line. * - * However for a difft sized cache, say 32k I$, above math yields need - * for 14 bits of vaddr to locate a cache line, which can't be provided by - * paddr, since the bit 13 (0 based) might differ between the two. - * - * This lack of extra bits needed for correct line addressing, defines the - * classical problem of Cache aliasing with VIPT architectures - * num_aliases = 1 << extra_bits - * e.g. 2-way-set-assoc, 32K I$ with 8k MMU pg sz => 2 aliases - * 2-way-set-assoc, 64K I$ with 8k MMU pg sz => 4 aliases - * 2-way-set-assoc, 16K I$ with 8k MMU pg sz => NO aliases + * However for larger Caches (way-size > page-size) - i.e. in Aliasing config, + * paddr alone could not be used to correctly index the cache. * * ------------------ * MMU v1/v2 (Fixed Page Size 8k) * ------------------ * The solution was to provide CDU with these additonal vaddr bits. These - * would be bits [x:13], x would depend on cache-geom. + * would be bits [x:13], x would depend on cache-geometry, 13 comes from + * standard page size of 8k. * H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits * of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the * orig 5 bits of paddr were anyways ignored by CDU line ops, as they * represent the offset within cache-line. The adv of using this "clumsy" - * interface for additional info was no new reg was needed in CDU. + * interface for additional info was no new reg was needed in CDU programming + * model. * * 17:13 represented the max num of bits passable, actual bits needed were * fewer, based on the num-of-aliases possible. * -for 2 alias possibility, only bit 13 needed (32K cache) * -for 4 alias possibility, bits 14:13 needed (64K cache) * - * Since vaddr was not available for all instances of I$ flush req by core - * kernel, the only safe way (non-optimal though) was to kill all possible - * lines which could represent an alias (even if they didnt represent one - * in execution). - * e.g. for 64K I$, 4 aliases possible, so we did - * flush start - * flush start | 0x01 - * flush start | 0x2 - * flush start | 0x3 - * - * The penalty was invoking the operation itself, since tag match is anyways - * paddr based, a line which didn't represent an alias would not match the - * paddr, hence wont be killed - * - * Note that aliasing concerns are independent of line-sz for a given cache - * geometry (size + set_assoc) because the extra bits required by line-sz are - * reduced from the set calc. - * e.g. 2-way-set-assoc, 32K I$ with 8k MMU pg sz and using math above - * 32b line-sz: 9 bits set-index-calc, 5 bits offset-in-line => 1 extra bit - * 64b line-sz: 8 bits set-index-calc, 6 bits offset-in-line => 1 extra bit - * * ------------------ * MMU v3 * ------------------ - * This ver of MMU supports var page sizes (1k-16k) - Linux will support - * 8k (default), 16k and 4k. + * This ver of MMU supports variable page sizes (1k-16k): although Linux will + * only support 8k (default), 16k and 4k. * However from hardware perspective, smaller page sizes aggrevate aliasing * meaning more vaddr bits needed to disambiguate the cache-line-op ; * the existing scheme of piggybacking won't work for certain configurations. @@ -468,144 +414,53 @@ static inline void __dc_line_op(unsigned long start, unsigned long sz, */ /*********************************************************** - * Machine specific helpers for per line I-Cache invalidate. - * 3 routines to accpunt for 1, 2, 4 aliases possible + * Machine specific helper for per line I-Cache invalidate. */ - -static void __ic_line_inv_no_alias(unsigned long start, int num_lines) -{ - while (num_lines-- > 0) { -#if (CONFIG_ARC_MMU_VER > 2) - write_aux_reg(ARC_REG_IC_PTAG, start); -#endif - write_aux_reg(ARC_REG_IC_IVIL, start); - start += ARC_ICACHE_LINE_LEN; - } -} - -static void __ic_line_inv_2_alias(unsigned long start, int num_lines) -{ - while (num_lines-- > 0) { - -#if (CONFIG_ARC_MMU_VER > 2) - /* - * MMU v3, CDU prog model (for line ops) now uses a new IC_PTAG - * reg to pass the "tag" bits and existing IVIL reg only looks - * at bits relevant for "index" (details above) - * Programming Notes: - * -when writing tag to PTAG reg, bit chopping can be avoided, - * CDU ignores non-tag bits. - * -Ideally "index" must be computed from vaddr, but it is not - * avail in these rtns. So to be safe, we kill the lines in all - * possible indexes corresp to num of aliases possible for - * given cache config. - */ - write_aux_reg(ARC_REG_IC_PTAG, start); - write_aux_reg(ARC_REG_IC_IVIL, - start & ~(0x1 << PAGE_SHIFT)); - write_aux_reg(ARC_REG_IC_IVIL, start | (0x1 << PAGE_SHIFT)); -#else - write_aux_reg(ARC_REG_IC_IVIL, start); - write_aux_reg(ARC_REG_IC_IVIL, start | 0x01); -#endif - start += ARC_ICACHE_LINE_LEN; - } -} - -static void __ic_line_inv_4_alias(unsigned long start, int num_lines) -{ - while (num_lines-- > 0) { - -#if (CONFIG_ARC_MMU_VER > 2) - write_aux_reg(ARC_REG_IC_PTAG, start); - - write_aux_reg(ARC_REG_IC_IVIL, - start & ~(0x3 << PAGE_SHIFT)); - write_aux_reg(ARC_REG_IC_IVIL, - start & ~(0x2 << PAGE_SHIFT)); - write_aux_reg(ARC_REG_IC_IVIL, - start & ~(0x1 << PAGE_SHIFT)); - write_aux_reg(ARC_REG_IC_IVIL, start | (0x3 << PAGE_SHIFT)); -#else - write_aux_reg(ARC_REG_IC_IVIL, start); - write_aux_reg(ARC_REG_IC_IVIL, start | 0x01); - write_aux_reg(ARC_REG_IC_IVIL, start | 0x02); - write_aux_reg(ARC_REG_IC_IVIL, start | 0x03); -#endif - start += ARC_ICACHE_LINE_LEN; - } -} - -static void __ic_line_inv(unsigned long start, unsigned long sz) +static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr, + unsigned long sz) { unsigned long flags; - int num_lines, slack; + int num_lines; /* - * Ensure we properly floor/ceil the non-line aligned/sized requests - * and have @start - aligned to cache line, and integral @num_lines + * Ensure we properly floor/ceil the non-line aligned/sized requests: * However page sized flushes can be compile time optimised. - * -@start will be cache-line aligned already (being page aligned) + * -@paddr will be cache-line aligned already (being page aligned) * -@sz will be integral multiple of line size (being page sized). */ if (!(__builtin_constant_p(sz) && sz == PAGE_SIZE)) { - slack = start & ~ICACHE_LINE_MASK; - sz += slack; - start -= slack; + sz += paddr & ~ICACHE_LINE_MASK; + paddr &= ICACHE_LINE_MASK; + vaddr &= ICACHE_LINE_MASK; } num_lines = DIV_ROUND_UP(sz, ARC_ICACHE_LINE_LEN); - local_irq_save(flags); - (*___flush_icache_rtn) (start, num_lines); - local_irq_restore(flags); -} - -/* Unlike routines above, having vaddr for flush op (along with paddr), - * prevents the need to speculatively kill the lines in multiple sets - * based on ratio of way_sz : pg_sz - */ -static void __ic_line_inv_vaddr(unsigned long phy_start, - unsigned long vaddr, unsigned long sz) -{ - unsigned long flags; - int num_lines, slack; - unsigned int addr; - - slack = phy_start & ~ICACHE_LINE_MASK; - sz += slack; - phy_start -= slack; - num_lines = DIV_ROUND_UP(sz, ARC_ICACHE_LINE_LEN); - -#if (CONFIG_ARC_MMU_VER > 2) - vaddr &= ~ICACHE_LINE_MASK; - addr = phy_start; -#else +#if (CONFIG_ARC_MMU_VER <= 2) /* bits 17:13 of vaddr go as bits 4:0 of paddr */ - addr = phy_start | ((vaddr >> 13) & 0x1F); + paddr |= (vaddr >> PAGE_SHIFT) & 0x1F; #endif local_irq_save(flags); while (num_lines-- > 0) { #if (CONFIG_ARC_MMU_VER > 2) /* tag comes from phy addr */ - write_aux_reg(ARC_REG_IC_PTAG, addr); + write_aux_reg(ARC_REG_IC_PTAG, paddr); /* index bits come from vaddr */ write_aux_reg(ARC_REG_IC_IVIL, vaddr); vaddr += ARC_ICACHE_LINE_LEN; #else - /* this paddr contains vaddrs bits as needed */ - write_aux_reg(ARC_REG_IC_IVIL, addr); + /* paddr contains stuffed vaddrs bits */ + write_aux_reg(ARC_REG_IC_IVIL, paddr); #endif - addr += ARC_ICACHE_LINE_LEN; + paddr += ARC_ICACHE_LINE_LEN; } local_irq_restore(flags); } #else -#define __ic_line_inv(start, sz) #define __ic_line_inv_vaddr(pstart, vstart, sz) #endif /* CONFIG_ARC_HAS_ICACHE */ @@ -615,35 +470,72 @@ static void __ic_line_inv_vaddr(unsigned long phy_start, * Exported APIs */ -/* TBD: use pg_arch_1 to optimize this */ +/* + * Handle cache congruency of kernel and userspace mappings of page when kernel + * writes-to/reads-from + * + * The idea is to defer flushing of kernel mapping after a WRITE, possible if: + * -dcache is NOT aliasing, hence any U/K-mappings of page are congruent + * -U-mapping doesn't exist yet for page (finalised in update_mmu_cache) + * -In SMP, if hardware caches are coherent + * + * There's a corollary case, where kernel READs from a userspace mapped page. + * If the U-mapping is not congruent to to K-mapping, former needs flushing. + */ void flush_dcache_page(struct page *page) { - __dc_line_op((unsigned long)page_address(page), PAGE_SIZE, OP_FLUSH); + struct address_space *mapping; + + if (!cache_is_vipt_aliasing()) { + set_bit(PG_arch_1, &page->flags); + return; + } + + /* don't handle anon pages here */ + mapping = page_mapping(page); + if (!mapping) + return; + + /* + * pagecache page, file not yet mapped to userspace + * Make a note that K-mapping is dirty + */ + if (!mapping_mapped(mapping)) { + set_bit(PG_arch_1, &page->flags); + } else if (page_mapped(page)) { + + /* kernel reading from page with U-mapping */ + void *paddr = page_address(page); + unsigned long vaddr = page->index << PAGE_CACHE_SHIFT; + + if (addr_not_cache_congruent(paddr, vaddr)) + __flush_dcache_page(paddr, vaddr); + } } EXPORT_SYMBOL(flush_dcache_page); void dma_cache_wback_inv(unsigned long start, unsigned long sz) { - __dc_line_op(start, sz, OP_FLUSH_N_INV); + __dc_line_op_k(start, sz, OP_FLUSH_N_INV); } EXPORT_SYMBOL(dma_cache_wback_inv); void dma_cache_inv(unsigned long start, unsigned long sz) { - __dc_line_op(start, sz, OP_INV); + __dc_line_op_k(start, sz, OP_INV); } EXPORT_SYMBOL(dma_cache_inv); void dma_cache_wback(unsigned long start, unsigned long sz) { - __dc_line_op(start, sz, OP_FLUSH); + __dc_line_op_k(start, sz, OP_FLUSH); } EXPORT_SYMBOL(dma_cache_wback); /* - * This is API for making I/D Caches consistent when modifying code - * (loadable modules, kprobes, etc) + * This is API for making I/D Caches consistent when modifying + * kernel code (loadable modules, kprobes, kgdb...) * This is called on insmod, with kernel virtual address for CODE of * the module. ARC cache maintenance ops require PHY address thus we * need to convert vmalloc addr to PHY addr @@ -652,7 +544,6 @@ void flush_icache_range(unsigned long kstart, unsigned long kend) { unsigned int tot_sz, off, sz; unsigned long phy, pfn; - unsigned long flags; /* printk("Kernel Cache Cohenercy: %lx to %lx\n",kstart, kend); */ @@ -673,8 +564,13 @@ void flush_icache_range(unsigned long kstart, unsigned long kend) /* Case: Kernel Phy addr (0x8000_0000 onwards) */ if (likely(kstart > PAGE_OFFSET)) { - __ic_line_inv(kstart, kend - kstart); - __dc_line_op(kstart, kend - kstart, OP_FLUSH); + /* + * The 2nd arg despite being paddr will be used to index icache + * This is OK since no alternate virtual mappings will exist + * given the callers for this case: kprobe/kgdb in built-in + * kernel code only. + */ + __sync_icache_dcache(kstart, kstart, kend - kstart); return; } @@ -692,42 +588,45 @@ void flush_icache_range(unsigned long kstart, unsigned long kend) pfn = vmalloc_to_pfn((void *)kstart); phy = (pfn << PAGE_SHIFT) + off; sz = min_t(unsigned int, tot_sz, PAGE_SIZE - off); - local_irq_save(flags); - __dc_line_op(phy, sz, OP_FLUSH); - __ic_line_inv(phy, sz); - local_irq_restore(flags); + __sync_icache_dcache(phy, kstart, sz); kstart += sz; tot_sz -= sz; } } /* - * Optimised ver of flush_icache_range() with spec callers: ptrace/signals - * where vaddr is also available. This allows passing both vaddr and paddr - * bits to CDU for cache flush, short-circuting the current pessimistic algo - * which kills all possible aliases. - * An added adv of knowing that vaddr is user-vaddr avoids various checks - * and handling for k-vaddr, k-paddr as done in orig ver above + * General purpose helper to make I and D cache lines consistent. + * @paddr is phy addr of region + * @vaddr is typically user or kernel vaddr (vmalloc) + * Howver in one instance, flush_icache_range() by kprobe (for a breakpt in + * builtin kernel code) @vaddr will be paddr only, meaning CDU operation will + * use a paddr to index the cache (despite VIPT). This is fine since since a + * built-in kernel page will not have any virtual mappings (not even kernel) + * kprobe on loadable module is different as it will have kvaddr. */ -void flush_icache_range_vaddr(unsigned long paddr, unsigned long u_vaddr, - int len) +void __sync_icache_dcache(unsigned long paddr, unsigned long vaddr, int len) { - __ic_line_inv_vaddr(paddr, u_vaddr, len); - __dc_line_op(paddr, len, OP_FLUSH); + unsigned long flags; + + local_irq_save(flags); + __ic_line_inv_vaddr(paddr, vaddr, len); + __dc_line_op(paddr, vaddr, len, OP_FLUSH); + local_irq_restore(flags); +} + +/* wrapper to compile time eliminate alignment checks in flush loop */ +void __inv_icache_page(unsigned long paddr, unsigned long vaddr) +{ + __ic_line_inv_vaddr(paddr, vaddr, PAGE_SIZE); } /* - * XXX: This also needs to be optim using pg_arch_1 - * This is called when a page-cache page is about to be mapped into a - * user process' address space. It offers an opportunity for a - * port to ensure d-cache/i-cache coherency if necessary. + * wrapper to clearout kernel or userspace mappings of a page + * For kernel mappings @vaddr == @paddr */ -void flush_icache_page(struct vm_area_struct *vma, struct page *page) +void ___flush_dcache_page(unsigned long paddr, unsigned long vaddr) { - if (!(vma->vm_flags & VM_EXEC)) - return; - - __ic_line_inv((unsigned long)page_address(page), PAGE_SIZE); + __dc_line_op(paddr, vaddr & PAGE_MASK, PAGE_SIZE, OP_FLUSH_N_INV); } void flush_icache_all(void) @@ -756,6 +655,87 @@ noinline void flush_cache_all(void) } +#ifdef CONFIG_ARC_CACHE_VIPT_ALIASING + +void flush_cache_mm(struct mm_struct *mm) +{ + flush_cache_all(); +} + +void flush_cache_page(struct vm_area_struct *vma, unsigned long u_vaddr, + unsigned long pfn) +{ + unsigned int paddr = pfn << PAGE_SHIFT; + + __sync_icache_dcache(paddr, u_vaddr, PAGE_SIZE); +} + +void flush_cache_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + flush_cache_all(); +} + +void copy_user_highpage(struct page *to, struct page *from, + unsigned long u_vaddr, struct vm_area_struct *vma) +{ + void *kfrom = page_address(from); + void *kto = page_address(to); + int clean_src_k_mappings = 0; + + /* + * If SRC page was already mapped in userspace AND it's U-mapping is + * not congruent with K-mapping, sync former to physical page so that + * K-mapping in memcpy below, sees the right data + * + * Note that while @u_vaddr refers to DST page's userspace vaddr, it is + * equally valid for SRC page as well + */ + if (page_mapped(from) && addr_not_cache_congruent(kfrom, u_vaddr)) { + __flush_dcache_page(kfrom, u_vaddr); + clean_src_k_mappings = 1; + } + + copy_page(kto, kfrom); + + /* + * Mark DST page K-mapping as dirty for a later finalization by + * update_mmu_cache(). Although the finalization could have been done + * here as well (given that both vaddr/paddr are available). + * But update_mmu_cache() already has code to do that for other + * non copied user pages (e.g. read faults which wire in pagecache page + * directly). + */ + set_bit(PG_arch_1, &to->flags); + + /* + * if SRC was already usermapped and non-congruent to kernel mapping + * sync the kernel mapping back to physical page + */ + if (clean_src_k_mappings) { + __flush_dcache_page(kfrom, kfrom); + } else { + set_bit(PG_arch_1, &from->flags); + } +} + +void clear_user_page(void *to, unsigned long u_vaddr, struct page *page) +{ + clear_page(to); + set_bit(PG_arch_1, &page->flags); +} + +void flush_anon_page(struct vm_area_struct *vma, struct page *page, + unsigned long u_vaddr) +{ + /* TBD: do we really need to clear the kernel mapping */ + __flush_dcache_page(page_address(page), u_vaddr); + __flush_dcache_page(page_address(page), page_address(page)); + +} + +#endif + /********************************************************************** * Explicit Cache flush request from user space via syscall * Needed for JITs which generate code on the fly diff --git a/arch/arc/mm/extable.c b/arch/arc/mm/extable.c index 014172ba8432..aa652e281324 100644 --- a/arch/arc/mm/extable.c +++ b/arch/arc/mm/extable.c @@ -27,7 +27,7 @@ int fixup_exception(struct pt_regs *regs) #ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE -long arc_copy_from_user_noinline(void *to, const void __user * from, +long arc_copy_from_user_noinline(void *to, const void __user *from, unsigned long n) { return __arc_copy_from_user(to, from, n); @@ -48,7 +48,7 @@ unsigned long arc_clear_user_noinline(void __user *to, } EXPORT_SYMBOL(arc_clear_user_noinline); -long arc_strncpy_from_user_noinline (char *dst, const char __user *src, +long arc_strncpy_from_user_noinline(char *dst, const char __user *src, long count) { return __arc_strncpy_from_user(dst, src, count); diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index af55aab803d2..689ffd86d5e9 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -12,7 +12,6 @@ #include <linux/sched.h> #include <linux/errno.h> #include <linux/ptrace.h> -#include <linux/version.h> #include <linux/uaccess.h> #include <linux/kdebug.h> #include <asm/pgalloc.h> diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index 727d4794ea0f..4a177365b2c4 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -10,9 +10,6 @@ #include <linux/mm.h> #include <linux/bootmem.h> #include <linux/memblock.h> -#ifdef CONFIG_BLOCK_DEV_RAM -#include <linux/blk.h> -#endif #include <linux/swap.h> #include <linux/module.h> #include <asm/page.h> diff --git a/arch/arc/mm/ioremap.c b/arch/arc/mm/ioremap.c index 3e5c92c79936..739e65f355de 100644 --- a/arch/arc/mm/ioremap.c +++ b/arch/arc/mm/ioremap.c @@ -12,7 +12,7 @@ #include <linux/io.h> #include <linux/mm.h> #include <linux/slab.h> -#include <asm/cache.h> +#include <linux/cache.h> void __iomem *ioremap(unsigned long paddr, unsigned long size) { diff --git a/arch/arc/mm/mmap.c b/arch/arc/mm/mmap.c new file mode 100644 index 000000000000..2e06d56e987b --- /dev/null +++ b/arch/arc/mm/mmap.c @@ -0,0 +1,78 @@ +/* + * ARC700 mmap + * + * (started from arm version - for VIPT alias handling) + * + * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/sched.h> +#include <asm/cacheflush.h> + +#define COLOUR_ALIGN(addr, pgoff) \ + ((((addr) + SHMLBA - 1) & ~(SHMLBA - 1)) + \ + (((pgoff) << PAGE_SHIFT) & (SHMLBA - 1))) + +/* + * Ensure that shared mappings are correctly aligned to + * avoid aliasing issues with VIPT caches. + * We need to ensure that + * a specific page of an object is always mapped at a multiple of + * SHMLBA bytes. + */ +unsigned long +arch_get_unmapped_area(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + int do_align = 0; + int aliasing = cache_is_vipt_aliasing(); + struct vm_unmapped_area_info info; + + /* + * We only need to do colour alignment if D cache aliases. + */ + if (aliasing) + do_align = filp || (flags & MAP_SHARED); + + /* + * We enforce the MAP_FIXED case. + */ + if (flags & MAP_FIXED) { + if (aliasing && flags & MAP_SHARED && + (addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1)) + return -EINVAL; + return addr; + } + + if (len > TASK_SIZE) + return -ENOMEM; + + if (addr) { + if (do_align) + addr = COLOUR_ALIGN(addr, pgoff); + else + addr = PAGE_ALIGN(addr); + + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && + (!vma || addr + len <= vma->vm_start)) + return addr; + } + + info.flags = 0; + info.length = len; + info.low_limit = mm->mmap_base; + info.high_limit = TASK_SIZE; + info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0; + info.align_offset = pgoff << PAGE_SHIFT; + return vm_unmapped_area(&info); +} diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index 9b9ce23f4ec3..066145b5f348 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -418,23 +418,52 @@ void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) local_irq_restore(flags); } -/* arch hook called by core VM at the end of handle_mm_fault( ), - * when a new PTE is entered in Page Tables or an existing one - * is modified. We aggresively pre-install a TLB entry +/* + * Called at the end of pagefault, for a userspace mapped page + * -pre-install the corresponding TLB entry into MMU + * -Finalize the delayed D-cache flush of kernel mapping of page due to + * flush_dcache_page(), copy_user_page() + * + * Note that flush (when done) involves both WBACK - so physical page is + * in sync as well as INV - so any non-congruent aliases don't remain */ - -void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddress, +void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned, pte_t *ptep) { + unsigned long vaddr = vaddr_unaligned & PAGE_MASK; + unsigned long paddr = pte_val(*ptep) & PAGE_MASK; + + create_tlb(vma, vaddr, ptep); + + /* + * Exec page : Independent of aliasing/page-color considerations, + * since icache doesn't snoop dcache on ARC, any dirty + * K-mapping of a code page needs to be wback+inv so that + * icache fetch by userspace sees code correctly. + * !EXEC page: If K-mapping is NOT congruent to U-mapping, flush it + * so userspace sees the right data. + * (Avoids the flush for Non-exec + congruent mapping case) + */ + if (vma->vm_flags & VM_EXEC || addr_not_cache_congruent(paddr, vaddr)) { + struct page *page = pfn_to_page(pte_pfn(*ptep)); + + int dirty = test_and_clear_bit(PG_arch_1, &page->flags); + if (dirty) { + /* wback + inv dcache lines */ + __flush_dcache_page(paddr, paddr); - create_tlb(vma, vaddress, ptep); + /* invalidate any existing icache lines */ + if (vma->vm_flags & VM_EXEC) + __inv_icache_page(paddr, vaddr); + } + } } /* Read the Cache Build Confuration Registers, Decode them and save into * the cpuinfo structure for later use. * No Validation is done here, simply read/convert the BCRs */ -void __init read_decode_mmu_bcr(void) +void __cpuinit read_decode_mmu_bcr(void) { unsigned int tmp; struct bcr_mmu_1_2 *mmu2; /* encoded MMU2 attr */ @@ -466,7 +495,7 @@ void __init read_decode_mmu_bcr(void) char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len) { int n = 0; - struct cpuinfo_arc_mmu *p_mmu = &cpuinfo_arc700[smp_processor_id()].mmu; + struct cpuinfo_arc_mmu *p_mmu = &cpuinfo_arc700[cpu_id].mmu; n += scnprintf(buf + n, len - n, "ARC700 MMU [v%x]\t: %dk PAGE, ", p_mmu->ver, TO_KB(p_mmu->pg_sz)); @@ -480,7 +509,7 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len) return buf; } -void __init arc_mmu_init(void) +void __cpuinit arc_mmu_init(void) { char str[256]; struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; |