From 9d35dc3006a9865eb5b55cc79df49933601131f8 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Tue, 18 Jun 2019 17:20:10 +0800 Subject: csky: Revert mmu ASID mechanism Current C-SKY ASID mechanism is from mips and it doesn't work well with multi-cores. ASID per core mechanism is not suitable for C-SKY SMP tlb maintain operations, eg: tlbi.vas need share the same asid in all processors and it'll invalid the tlb entry in all cores with the same asid. This patch is prepare for new ASID mechanism. Signed-off-by: Guo Ren Cc: Arnd Bergmann --- arch/csky/include/asm/mmu.h | 1 - arch/csky/include/asm/mmu_context.h | 112 +++--------------------------------- arch/csky/include/asm/pgtable.h | 2 - 3 files changed, 7 insertions(+), 108 deletions(-) (limited to 'arch/csky/include/asm') diff --git a/arch/csky/include/asm/mmu.h b/arch/csky/include/asm/mmu.h index cb344675ccc4..06f509ae09b0 100644 --- a/arch/csky/include/asm/mmu.h +++ b/arch/csky/include/asm/mmu.h @@ -5,7 +5,6 @@ #define __ASM_CSKY_MMU_H typedef struct { - unsigned long asid[NR_CPUS]; void *vdso; } mm_context_t; diff --git a/arch/csky/include/asm/mmu_context.h b/arch/csky/include/asm/mmu_context.h index 734db3a122e1..86dde481df76 100644 --- a/arch/csky/include/asm/mmu_context.h +++ b/arch/csky/include/asm/mmu_context.h @@ -16,122 +16,24 @@ #define TLBMISS_HANDLER_SETUP_PGD(pgd) \ setup_pgd(__pa(pgd), false) + #define TLBMISS_HANDLER_SETUP_PGD_KERNEL(pgd) \ setup_pgd(__pa(pgd), true) -#define cpu_context(cpu, mm) ((mm)->context.asid[cpu]) -#define cpu_asid(cpu, mm) (cpu_context((cpu), (mm)) & ASID_MASK) -#define asid_cache(cpu) (cpu_data[cpu].asid_cache) - -#define ASID_FIRST_VERSION (1 << CONFIG_CPU_ASID_BITS) -#define ASID_INC 0x1 -#define ASID_MASK (ASID_FIRST_VERSION - 1) -#define ASID_VERSION_MASK ~ASID_MASK +#define init_new_context(tsk,mm) 0 +#define activate_mm(prev,next) switch_mm(prev, next, current) #define destroy_context(mm) do {} while (0) #define enter_lazy_tlb(mm, tsk) do {} while (0) #define deactivate_mm(tsk, mm) do {} while (0) -/* - * All unused by hardware upper bits will be considered - * as a software asid extension. - */ -static inline void -get_new_mmu_context(struct mm_struct *mm, unsigned long cpu) -{ - unsigned long asid = asid_cache(cpu); - - asid += ASID_INC; - if (!(asid & ASID_MASK)) { - flush_tlb_all(); /* start new asid cycle */ - if (!asid) /* fix version if needed */ - asid = ASID_FIRST_VERSION; - } - cpu_context(cpu, mm) = asid_cache(cpu) = asid; -} - -/* - * Initialize the context related info for a new mm_struct - * instance. - */ -static inline int -init_new_context(struct task_struct *tsk, struct mm_struct *mm) -{ - int i; - - for_each_online_cpu(i) - cpu_context(i, mm) = 0; - return 0; -} - -static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, - struct task_struct *tsk) -{ - unsigned int cpu = smp_processor_id(); - unsigned long flags; - - local_irq_save(flags); - /* Check if our ASID is of an older version and thus invalid */ - if ((cpu_context(cpu, next) ^ asid_cache(cpu)) & ASID_VERSION_MASK) - get_new_mmu_context(next, cpu); - write_mmu_entryhi(cpu_asid(cpu, next)); - TLBMISS_HANDLER_SETUP_PGD(next->pgd); - - /* - * Mark current->active_mm as not "active" anymore. - * We don't want to mislead possible IPI tlb flush routines. - */ - cpumask_clear_cpu(cpu, mm_cpumask(prev)); - cpumask_set_cpu(cpu, mm_cpumask(next)); - - local_irq_restore(flags); -} - -/* - * After we have set current->mm to a new value, this activates - * the context for the new mm so we see the new mappings. - */ static inline void -activate_mm(struct mm_struct *prev, struct mm_struct *next) +switch_mm(struct mm_struct *prev, struct mm_struct *next, + struct task_struct *tsk) { - unsigned long flags; - int cpu = smp_processor_id(); - - local_irq_save(flags); + if (prev != next) + tlb_invalid_all(); - /* Unconditionally get a new ASID. */ - get_new_mmu_context(next, cpu); - - write_mmu_entryhi(cpu_asid(cpu, next)); TLBMISS_HANDLER_SETUP_PGD(next->pgd); - - /* mark mmu ownership change */ - cpumask_clear_cpu(cpu, mm_cpumask(prev)); - cpumask_set_cpu(cpu, mm_cpumask(next)); - - local_irq_restore(flags); } - -/* - * If mm is currently active_mm, we can't really drop it. Instead, - * we will get a new one for it. - */ -static inline void -drop_mmu_context(struct mm_struct *mm, unsigned int cpu) -{ - unsigned long flags; - - local_irq_save(flags); - - if (cpumask_test_cpu(cpu, mm_cpumask(mm))) { - get_new_mmu_context(mm, cpu); - write_mmu_entryhi(cpu_asid(cpu, mm)); - } else { - /* will get a new context next time */ - cpu_context(cpu, mm) = 0; - } - - local_irq_restore(flags); -} - #endif /* __ASM_CSKY_MMU_CONTEXT_H */ diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h index dcea277c09ae..c429a6f347de 100644 --- a/arch/csky/include/asm/pgtable.h +++ b/arch/csky/include/asm/pgtable.h @@ -290,8 +290,6 @@ static inline pte_t *pte_offset(pmd_t *dir, unsigned long address) extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; extern void paging_init(void); -extern void show_jtlb_table(void); - void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *pte); -- cgit v1.2.1 From a231b8839cd4259de1d37a78165739a4d5d08e72 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Tue, 18 Jun 2019 20:06:52 +0800 Subject: csky: Add new asid lib code from arm This patch only contains asid help code from arm for next patch to use. The asid allocator use five level check to reduce the cost of switch_mm. 1. Check if the asid version is the same (it's general) 2. Check reserved_asid which is set in rollover flush_context() and key point is to keep the same bit position with the current asid version instead of input version. 3. Check if the position of bitmap is free then it could be set & used directly. 4. find_next_zero_bit() (a little performance cost) 5. flush_context (this is the worst cost with increase current asid version) Check is level by level and cost is also higher with the next level. The reserved_asid and bitmap mechanism prevent unnecessary find_next_zero_bit(). The atomic 64 bit asid is also suitable for 32-bit system and it won't cost a lot in 1th 2th 3th level check. The operation of set/clear mm_cpumask was removed in arm64 compared to arm32. It seems no side effect on current arm64 system, but from software meaning it's wrong. Although csky also needn't it, we add it back for csky. The asid_per_ctxt is no use for csky and it reserves the lowest bits for other use, maybe: trust zone ? Ok, just keep it in csky copy. Seems it also could be used by other archs and it's worth to move asid code to generic in future. Signed-off-by: Guo Ren Cc: Arnd Bergmann Cc: Julien Grall --- arch/csky/include/asm/asid.h | 78 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 arch/csky/include/asm/asid.h (limited to 'arch/csky/include/asm') diff --git a/arch/csky/include/asm/asid.h b/arch/csky/include/asm/asid.h new file mode 100644 index 000000000000..ac08b0ffbe1f --- /dev/null +++ b/arch/csky/include/asm/asid.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_ASM_ASID_H +#define __ASM_ASM_ASID_H + +#include +#include +#include +#include +#include + +struct asid_info +{ + atomic64_t generation; + unsigned long *map; + atomic64_t __percpu *active; + u64 __percpu *reserved; + u32 bits; + /* Lock protecting the structure */ + raw_spinlock_t lock; + /* Which CPU requires context flush on next call */ + cpumask_t flush_pending; + /* Number of ASID allocated by context (shift value) */ + unsigned int ctxt_shift; + /* Callback to locally flush the context. */ + void (*flush_cpu_ctxt_cb)(void); +}; + +#define NUM_ASIDS(info) (1UL << ((info)->bits)) +#define NUM_CTXT_ASIDS(info) (NUM_ASIDS(info) >> (info)->ctxt_shift) + +#define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu) + +void asid_new_context(struct asid_info *info, atomic64_t *pasid, + unsigned int cpu, struct mm_struct *mm); + +/* + * Check the ASID is still valid for the context. If not generate a new ASID. + * + * @pasid: Pointer to the current ASID batch + * @cpu: current CPU ID. Must have been acquired throught get_cpu() + */ +static inline void asid_check_context(struct asid_info *info, + atomic64_t *pasid, unsigned int cpu, + struct mm_struct *mm) +{ + u64 asid, old_active_asid; + + asid = atomic64_read(pasid); + + /* + * The memory ordering here is subtle. + * If our active_asid is non-zero and the ASID matches the current + * generation, then we update the active_asid entry with a relaxed + * cmpxchg. Racing with a concurrent rollover means that either: + * + * - We get a zero back from the cmpxchg and end up waiting on the + * lock. Taking the lock synchronises with the rollover and so + * we are forced to see the updated generation. + * + * - We get a valid ASID back from the cmpxchg, which means the + * relaxed xchg in flush_context will treat us as reserved + * because atomic RmWs are totally ordered for a given location. + */ + old_active_asid = atomic64_read(&active_asid(info, cpu)); + if (old_active_asid && + !((asid ^ atomic64_read(&info->generation)) >> info->bits) && + atomic64_cmpxchg_relaxed(&active_asid(info, cpu), + old_active_asid, asid)) + return; + + asid_new_context(info, pasid, cpu, mm); +} + +int asid_allocator_init(struct asid_info *info, + u32 bits, unsigned int asid_per_ctxt, + void (*flush_cpu_ctxt_cb)(void)); + +#endif -- cgit v1.2.1 From 22d55f02b8922a097cd4be1e2f131dfa7ef65901 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Tue, 18 Jun 2019 20:33:32 +0800 Subject: csky: Use generic asid algorithm to implement switch_mm Use linux generic asid/vmid algorithm to implement csky switch_mm function. The algorithm is from arm and it could work with SMP system. It'll help reduce tlb flush for switch_mm in task/vm switch. Signed-off-by: Guo Ren Cc: Arnd Bergmann --- arch/csky/include/asm/mmu.h | 1 + arch/csky/include/asm/mmu_context.h | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) (limited to 'arch/csky/include/asm') diff --git a/arch/csky/include/asm/mmu.h b/arch/csky/include/asm/mmu.h index 06f509ae09b0..b382a14ea4ec 100644 --- a/arch/csky/include/asm/mmu.h +++ b/arch/csky/include/asm/mmu.h @@ -5,6 +5,7 @@ #define __ASM_CSKY_MMU_H typedef struct { + atomic64_t asid; void *vdso; } mm_context_t; diff --git a/arch/csky/include/asm/mmu_context.h b/arch/csky/include/asm/mmu_context.h index 86dde481df76..0285b0ad18b6 100644 --- a/arch/csky/include/asm/mmu_context.h +++ b/arch/csky/include/asm/mmu_context.h @@ -20,20 +20,28 @@ #define TLBMISS_HANDLER_SETUP_PGD_KERNEL(pgd) \ setup_pgd(__pa(pgd), true) -#define init_new_context(tsk,mm) 0 +#define ASID_MASK ((1 << CONFIG_CPU_ASID_BITS) - 1) +#define cpu_asid(mm) (atomic64_read(&mm->context.asid) & ASID_MASK) + +#define init_new_context(tsk,mm) ({ atomic64_set(&(mm)->context.asid, 0); 0; }) #define activate_mm(prev,next) switch_mm(prev, next, current) #define destroy_context(mm) do {} while (0) #define enter_lazy_tlb(mm, tsk) do {} while (0) #define deactivate_mm(tsk, mm) do {} while (0) +void check_and_switch_context(struct mm_struct *mm, unsigned int cpu); + static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { + unsigned int cpu = smp_processor_id(); + if (prev != next) - tlb_invalid_all(); + check_and_switch_context(next, cpu); TLBMISS_HANDLER_SETUP_PGD(next->pgd); + write_mmu_entryhi(next->context.asid.counter); } #endif /* __ASM_CSKY_MMU_CONTEXT_H */ -- cgit v1.2.1