diff options
Diffstat (limited to 'arch/nds32/kernel')
-rw-r--r-- | arch/nds32/kernel/Makefile | 7 | ||||
-rw-r--r-- | arch/nds32/kernel/ex-entry.S | 24 | ||||
-rw-r--r-- | arch/nds32/kernel/ex-exit.S | 13 | ||||
-rw-r--r-- | arch/nds32/kernel/ex-scall.S | 8 | ||||
-rw-r--r-- | arch/nds32/kernel/fpu.c | 269 | ||||
-rw-r--r-- | arch/nds32/kernel/head.S | 13 | ||||
-rw-r--r-- | arch/nds32/kernel/perf_event_cpu.c | 1522 | ||||
-rw-r--r-- | arch/nds32/kernel/pm.c | 78 | ||||
-rw-r--r-- | arch/nds32/kernel/process.c | 64 | ||||
-rw-r--r-- | arch/nds32/kernel/setup.c | 22 | ||||
-rw-r--r-- | arch/nds32/kernel/signal.c | 62 | ||||
-rw-r--r-- | arch/nds32/kernel/sleep.S | 131 | ||||
-rw-r--r-- | arch/nds32/kernel/sys_nds32.c | 32 | ||||
-rw-r--r-- | arch/nds32/kernel/traps.c | 16 |
14 files changed, 2232 insertions, 29 deletions
diff --git a/arch/nds32/kernel/Makefile b/arch/nds32/kernel/Makefile index 27cded39fa66..a1a1d61509e5 100644 --- a/arch/nds32/kernel/Makefile +++ b/arch/nds32/kernel/Makefile @@ -4,7 +4,6 @@ CPPFLAGS_vmlinux.lds := -DTEXTADDR=$(TEXTADDR) AFLAGS_head.o := -DTEXTADDR=$(TEXTADDR) - # Object file lists. obj-y := ex-entry.o ex-exit.o ex-scall.o irq.o \ @@ -14,11 +13,15 @@ obj-y := ex-entry.o ex-exit.o ex-scall.o irq.o \ obj-$(CONFIG_MODULES) += nds32_ksyms.o module.o obj-$(CONFIG_STACKTRACE) += stacktrace.o +obj-$(CONFIG_FPU) += fpu.o obj-$(CONFIG_OF) += devtree.o obj-$(CONFIG_CACHE_L2) += atl2c.o - +obj-$(CONFIG_PERF_EVENTS) += perf_event_cpu.o +obj-$(CONFIG_PM) += pm.o sleep.o extra-y := head.o vmlinux.lds +CFLAGS_fpu.o += -mext-fpu-sp -mext-fpu-dp + obj-y += vdso/ diff --git a/arch/nds32/kernel/ex-entry.S b/arch/nds32/kernel/ex-entry.S index 21a144071566..107d98a1d1b8 100644 --- a/arch/nds32/kernel/ex-entry.S +++ b/arch/nds32/kernel/ex-entry.S @@ -7,6 +7,7 @@ #include <asm/errno.h> #include <asm/asm-offsets.h> #include <asm/page.h> +#include <asm/fpu.h> #ifdef CONFIG_HWZOL .macro push_zol @@ -15,12 +16,31 @@ mfusr $r16, $LC .endm #endif + .macro skip_save_fucop_ctl +#if defined(CONFIG_FPU) +skip_fucop_ctl: + smw.adm $p0, [$sp], $p0, #0x1 + j fucop_ctl_done +#endif + .endm .macro save_user_regs - +#if defined(CONFIG_FPU) + sethi $p0, hi20(has_fpu) + lbsi $p0, [$p0+lo12(has_fpu)] + beqz $p0, skip_fucop_ctl + mfsr $p0, $FUCOP_CTL + smw.adm $p0, [$sp], $p0, #0x1 + bclr $p0, $p0, #FUCOP_CTL_offCP0EN + mtsr $p0, $FUCOP_CTL +fucop_ctl_done: + /* move $SP to the bottom of pt_regs */ + addi $sp, $sp, -FUCOP_CTL_OFFSET +#else smw.adm $sp, [$sp], $sp, #0x1 /* move $SP to the bottom of pt_regs */ addi $sp, $sp, -OSP_OFFSET +#endif /* push $r0 ~ $r25 */ smw.bim $r0, [$sp], $r25 @@ -79,6 +99,7 @@ exception_handlers: .long eh_syscall !Syscall .long asm_do_IRQ !IRQ + skip_save_fucop_ctl common_exception_handler: save_user_regs mfsr $p0, $ITYPE @@ -103,7 +124,6 @@ common_exception_handler: mtsr $r21, $PSW dsb jr $p1 - /* syscall */ 1: addi $p1, $p0, #-NDS32_VECTOR_offEXCEPTION diff --git a/arch/nds32/kernel/ex-exit.S b/arch/nds32/kernel/ex-exit.S index f00af92f7e22..97ba15cd4180 100644 --- a/arch/nds32/kernel/ex-exit.S +++ b/arch/nds32/kernel/ex-exit.S @@ -8,6 +8,7 @@ #include <asm/asm-offsets.h> #include <asm/thread_info.h> #include <asm/current.h> +#include <asm/fpu.h> @@ -22,10 +23,18 @@ .macro restore_user_regs_first setgie.d isb - +#if defined(CONFIG_FPU) + addi $sp, $sp, OSP_OFFSET + lmw.adm $r12, [$sp], $r25, #0x0 + sethi $p0, hi20(has_fpu) + lbsi $p0, [$p0+lo12(has_fpu)] + beqz $p0, 2f + mtsr $r25, $FUCOP_CTL +2: +#else addi $sp, $sp, FUCOP_CTL_OFFSET - lmw.adm $r12, [$sp], $r24, #0x0 +#endif mtsr $r12, $SP_USR mtsr $r13, $IPC #ifdef CONFIG_HWZOL diff --git a/arch/nds32/kernel/ex-scall.S b/arch/nds32/kernel/ex-scall.S index 36aa87ecdabd..270050f1b7b1 100644 --- a/arch/nds32/kernel/ex-scall.S +++ b/arch/nds32/kernel/ex-scall.S @@ -19,11 +19,13 @@ ENTRY(__switch_to) la $p0, __entry_task sw $r1, [$p0] - move $p1, $r0 - addi $p1, $p1, #THREAD_CPU_CONTEXT + addi $p1, $r0, #THREAD_CPU_CONTEXT smw.bi $r6, [$p1], $r14, #0xb ! push r6~r14, fp, lp, sp move $r25, $r1 - addi $r1, $r1, #THREAD_CPU_CONTEXT +#if defined(CONFIG_FPU) + call _switch_fpu +#endif + addi $r1, $r25, #THREAD_CPU_CONTEXT lmw.bi $r6, [$r1], $r14, #0xb ! pop r6~r14, fp, lp, sp ret diff --git a/arch/nds32/kernel/fpu.c b/arch/nds32/kernel/fpu.c new file mode 100644 index 000000000000..fddd40c7a16f --- /dev/null +++ b/arch/nds32/kernel/fpu.c @@ -0,0 +1,269 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2005-2018 Andes Technology Corporation + +#include <linux/sched.h> +#include <linux/signal.h> +#include <linux/sched/signal.h> +#include <asm/processor.h> +#include <asm/user.h> +#include <asm/io.h> +#include <asm/bitfield.h> +#include <asm/fpu.h> + +const struct fpu_struct init_fpuregs = { + .fd_regs = {[0 ... 31] = sNAN64}, + .fpcsr = FPCSR_INIT, +#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC) + .UDF_trap = 0 +#endif +}; + +void save_fpu(struct task_struct *tsk) +{ + unsigned int fpcfg, fpcsr; + + enable_fpu(); + fpcfg = ((__nds32__fmfcfg() & FPCFG_mskFREG) >> FPCFG_offFREG); + switch (fpcfg) { + case SP32_DP32_reg: + asm volatile ("fsdi $fd31, [%0+0xf8]\n\t" + "fsdi $fd30, [%0+0xf0]\n\t" + "fsdi $fd29, [%0+0xe8]\n\t" + "fsdi $fd28, [%0+0xe0]\n\t" + "fsdi $fd27, [%0+0xd8]\n\t" + "fsdi $fd26, [%0+0xd0]\n\t" + "fsdi $fd25, [%0+0xc8]\n\t" + "fsdi $fd24, [%0+0xc0]\n\t" + "fsdi $fd23, [%0+0xb8]\n\t" + "fsdi $fd22, [%0+0xb0]\n\t" + "fsdi $fd21, [%0+0xa8]\n\t" + "fsdi $fd20, [%0+0xa0]\n\t" + "fsdi $fd19, [%0+0x98]\n\t" + "fsdi $fd18, [%0+0x90]\n\t" + "fsdi $fd17, [%0+0x88]\n\t" + "fsdi $fd16, [%0+0x80]\n\t" + : /* no output */ + : "r" (&tsk->thread.fpu) + : "memory"); + /* fall through */ + case SP32_DP16_reg: + asm volatile ("fsdi $fd15, [%0+0x78]\n\t" + "fsdi $fd14, [%0+0x70]\n\t" + "fsdi $fd13, [%0+0x68]\n\t" + "fsdi $fd12, [%0+0x60]\n\t" + "fsdi $fd11, [%0+0x58]\n\t" + "fsdi $fd10, [%0+0x50]\n\t" + "fsdi $fd9, [%0+0x48]\n\t" + "fsdi $fd8, [%0+0x40]\n\t" + : /* no output */ + : "r" (&tsk->thread.fpu) + : "memory"); + /* fall through */ + case SP16_DP8_reg: + asm volatile ("fsdi $fd7, [%0+0x38]\n\t" + "fsdi $fd6, [%0+0x30]\n\t" + "fsdi $fd5, [%0+0x28]\n\t" + "fsdi $fd4, [%0+0x20]\n\t" + : /* no output */ + : "r" (&tsk->thread.fpu) + : "memory"); + /* fall through */ + case SP8_DP4_reg: + asm volatile ("fsdi $fd3, [%1+0x18]\n\t" + "fsdi $fd2, [%1+0x10]\n\t" + "fsdi $fd1, [%1+0x8]\n\t" + "fsdi $fd0, [%1+0x0]\n\t" + "fmfcsr %0\n\t" + "swi %0, [%1+0x100]\n\t" + : "=&r" (fpcsr) + : "r"(&tsk->thread.fpu) + : "memory"); + } + disable_fpu(); +} + +void load_fpu(const struct fpu_struct *fpregs) +{ + unsigned int fpcfg, fpcsr; + + enable_fpu(); + fpcfg = ((__nds32__fmfcfg() & FPCFG_mskFREG) >> FPCFG_offFREG); + switch (fpcfg) { + case SP32_DP32_reg: + asm volatile ("fldi $fd31, [%0+0xf8]\n\t" + "fldi $fd30, [%0+0xf0]\n\t" + "fldi $fd29, [%0+0xe8]\n\t" + "fldi $fd28, [%0+0xe0]\n\t" + "fldi $fd27, [%0+0xd8]\n\t" + "fldi $fd26, [%0+0xd0]\n\t" + "fldi $fd25, [%0+0xc8]\n\t" + "fldi $fd24, [%0+0xc0]\n\t" + "fldi $fd23, [%0+0xb8]\n\t" + "fldi $fd22, [%0+0xb0]\n\t" + "fldi $fd21, [%0+0xa8]\n\t" + "fldi $fd20, [%0+0xa0]\n\t" + "fldi $fd19, [%0+0x98]\n\t" + "fldi $fd18, [%0+0x90]\n\t" + "fldi $fd17, [%0+0x88]\n\t" + "fldi $fd16, [%0+0x80]\n\t" + : /* no output */ + : "r" (fpregs)); + /* fall through */ + case SP32_DP16_reg: + asm volatile ("fldi $fd15, [%0+0x78]\n\t" + "fldi $fd14, [%0+0x70]\n\t" + "fldi $fd13, [%0+0x68]\n\t" + "fldi $fd12, [%0+0x60]\n\t" + "fldi $fd11, [%0+0x58]\n\t" + "fldi $fd10, [%0+0x50]\n\t" + "fldi $fd9, [%0+0x48]\n\t" + "fldi $fd8, [%0+0x40]\n\t" + : /* no output */ + : "r" (fpregs)); + /* fall through */ + case SP16_DP8_reg: + asm volatile ("fldi $fd7, [%0+0x38]\n\t" + "fldi $fd6, [%0+0x30]\n\t" + "fldi $fd5, [%0+0x28]\n\t" + "fldi $fd4, [%0+0x20]\n\t" + : /* no output */ + : "r" (fpregs)); + /* fall through */ + case SP8_DP4_reg: + asm volatile ("fldi $fd3, [%1+0x18]\n\t" + "fldi $fd2, [%1+0x10]\n\t" + "fldi $fd1, [%1+0x8]\n\t" + "fldi $fd0, [%1+0x0]\n\t" + "lwi %0, [%1+0x100]\n\t" + "fmtcsr %0\n\t":"=&r" (fpcsr) + : "r"(fpregs)); + } + disable_fpu(); +} +void store_fpu_for_suspend(void) +{ +#ifdef CONFIG_LAZY_FPU + if (last_task_used_math != NULL) + save_fpu(last_task_used_math); + last_task_used_math = NULL; +#else + if (!used_math()) + return; + unlazy_fpu(current); +#endif + clear_fpu(task_pt_regs(current)); +} +inline void do_fpu_context_switch(struct pt_regs *regs) +{ + /* Enable to use FPU. */ + + if (!user_mode(regs)) { + pr_err("BUG: FPU is used in kernel mode.\n"); + BUG(); + return; + } + + enable_ptreg_fpu(regs); +#ifdef CONFIG_LAZY_FPU //Lazy FPU is used + if (last_task_used_math == current) + return; + if (last_task_used_math != NULL) + /* Other processes fpu state, save away */ + save_fpu(last_task_used_math); + last_task_used_math = current; +#endif + if (used_math()) { + load_fpu(¤t->thread.fpu); + } else { + /* First time FPU user. */ + load_fpu(&init_fpuregs); +#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC) + current->thread.fpu.UDF_trap = init_fpuregs.UDF_trap; +#endif + set_used_math(); + } + +} + +inline void fill_sigfpe_signo(unsigned int fpcsr, int *signo) +{ + if (fpcsr & FPCSR_mskOVFT) + *signo = FPE_FLTOVF; +#ifndef CONFIG_SUPPORT_DENORMAL_ARITHMETIC + else if (fpcsr & FPCSR_mskUDFT) + *signo = FPE_FLTUND; +#endif + else if (fpcsr & FPCSR_mskIVOT) + *signo = FPE_FLTINV; + else if (fpcsr & FPCSR_mskDBZT) + *signo = FPE_FLTDIV; + else if (fpcsr & FPCSR_mskIEXT) + *signo = FPE_FLTRES; +} + +inline void handle_fpu_exception(struct pt_regs *regs) +{ + unsigned int fpcsr; + int si_code = 0, si_signo = SIGFPE; +#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC) + unsigned long redo_except = FPCSR_mskDNIT|FPCSR_mskUDFT; +#else + unsigned long redo_except = FPCSR_mskDNIT; +#endif + + lose_fpu(); + fpcsr = current->thread.fpu.fpcsr; + + if (fpcsr & redo_except) { +#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC) + if (fpcsr & FPCSR_mskUDFT) + current->thread.fpu.fpcsr &= ~FPCSR_mskIEX; +#endif + si_signo = do_fpuemu(regs, ¤t->thread.fpu); + fpcsr = current->thread.fpu.fpcsr; + if (!si_signo) + goto done; + } else if (fpcsr & FPCSR_mskRIT) { + if (!user_mode(regs)) + do_exit(SIGILL); + si_signo = SIGILL; + } + + + switch (si_signo) { + case SIGFPE: + fill_sigfpe_signo(fpcsr, &si_code); + break; + case SIGILL: + show_regs(regs); + si_code = ILL_COPROC; + break; + case SIGBUS: + si_code = BUS_ADRERR; + break; + default: + break; + } + + force_sig_fault(si_signo, si_code, + (void __user *)instruction_pointer(regs), current); +done: + own_fpu(); +} + +bool do_fpu_exception(unsigned int subtype, struct pt_regs *regs) +{ + int done = true; + /* Coprocessor disabled exception */ + if (subtype == FPU_DISABLE_EXCEPTION) { + preempt_disable(); + do_fpu_context_switch(regs); + preempt_enable(); + } + /* Coprocessor exception such as underflow and overflow */ + else if (subtype == FPU_EXCEPTION) + handle_fpu_exception(regs); + else + done = false; + return done; +} diff --git a/arch/nds32/kernel/head.S b/arch/nds32/kernel/head.S index c5fdae174ced..db64b78b1232 100644 --- a/arch/nds32/kernel/head.S +++ b/arch/nds32/kernel/head.S @@ -123,21 +123,12 @@ _image_size_check: andi $r0, $r0, MMU_CFG_mskTBS srli $r6, $r6, MMU_CFG_offTBW srli $r0, $r0, MMU_CFG_offTBS - /* - * we just map the kernel to the maximum way - 1 of tlb - * reserver one way for UART VA mapping - * it will cause page fault if UART mapping cover the kernel mapping - * - * direct mapping is not supported now. - */ - li $r2, 't' - beqz $r6, __error ! MMU_CFG.TBW = 0 is direct mappin + addi $r6, $r6, #0x1 ! MMU_CFG.TBW value -> meaning addi $r0, $r0, #0x2 ! MMU_CFG.TBS value -> meaning sll $r0, $r6, $r0 ! entries = k-way * n-set mul $r6, $r0, $r5 ! max size = entries * page size /* check kernel image size */ la $r3, (_end - PAGE_OFFSET) - li $r2, 's' bgt $r3, $r6, __error li $r2, #(PHYS_OFFSET + TLB_DATA_kernel_text_attr) @@ -160,7 +151,7 @@ _tlb: #endif mtsr $r3, $TLB_MISC - mfsr $r0, $MISC_CTL ! Enable BTB and RTP and shadow sp + mfsr $r0, $MISC_CTL ! Enable BTB, RTP, shadow sp, and HW_PRE ori $r0, $r0, #MISC_init mtsr $r0, $MISC_CTL diff --git a/arch/nds32/kernel/perf_event_cpu.c b/arch/nds32/kernel/perf_event_cpu.c new file mode 100644 index 000000000000..5e00ce54d0ff --- /dev/null +++ b/arch/nds32/kernel/perf_event_cpu.c @@ -0,0 +1,1522 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2008-2017 Andes Technology Corporation + * + * Reference ARMv7: Jean Pihet <jpihet@mvista.com> + * 2010 (c) MontaVista Software, LLC. + */ + +#include <linux/perf_event.h> +#include <linux/bitmap.h> +#include <linux/export.h> +#include <linux/kernel.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/pm_runtime.h> +#include <linux/ftrace.h> +#include <linux/uaccess.h> +#include <linux/sched/clock.h> +#include <linux/percpu-defs.h> + +#include <asm/pmu.h> +#include <asm/irq_regs.h> +#include <asm/nds32.h> +#include <asm/stacktrace.h> +#include <asm/perf_event.h> +#include <nds32_intrinsic.h> + +/* Set at runtime when we know what CPU type we are. */ +static struct nds32_pmu *cpu_pmu; + +static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events); +static void nds32_pmu_start(struct nds32_pmu *cpu_pmu); +static void nds32_pmu_stop(struct nds32_pmu *cpu_pmu); +static struct platform_device_id cpu_pmu_plat_device_ids[] = { + {.name = "nds32-pfm"}, + {}, +}; + +static int nds32_pmu_map_cache_event(const unsigned int (*cache_map) + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX], u64 config) +{ + unsigned int cache_type, cache_op, cache_result, ret; + + cache_type = (config >> 0) & 0xff; + if (cache_type >= PERF_COUNT_HW_CACHE_MAX) + return -EINVAL; + + cache_op = (config >> 8) & 0xff; + if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) + return -EINVAL; + + cache_result = (config >> 16) & 0xff; + if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) + return -EINVAL; + + ret = (int)(*cache_map)[cache_type][cache_op][cache_result]; + + if (ret == CACHE_OP_UNSUPPORTED) + return -ENOENT; + + return ret; +} + +static int +nds32_pmu_map_hw_event(const unsigned int (*event_map)[PERF_COUNT_HW_MAX], + u64 config) +{ + int mapping; + + if (config >= PERF_COUNT_HW_MAX) + return -ENOENT; + + mapping = (*event_map)[config]; + return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping; +} + +static int nds32_pmu_map_raw_event(u32 raw_event_mask, u64 config) +{ + int ev_type = (int)(config & raw_event_mask); + int idx = config >> 8; + + switch (idx) { + case 0: + ev_type = PFM_OFFSET_MAGIC_0 + ev_type; + if (ev_type >= SPAV3_0_SEL_LAST || ev_type <= SPAV3_0_SEL_BASE) + return -ENOENT; + break; + case 1: + ev_type = PFM_OFFSET_MAGIC_1 + ev_type; + if (ev_type >= SPAV3_1_SEL_LAST || ev_type <= SPAV3_1_SEL_BASE) + return -ENOENT; + break; + case 2: + ev_type = PFM_OFFSET_MAGIC_2 + ev_type; + if (ev_type >= SPAV3_2_SEL_LAST || ev_type <= SPAV3_2_SEL_BASE) + return -ENOENT; + break; + default: + return -ENOENT; + } + + return ev_type; +} + +int +nds32_pmu_map_event(struct perf_event *event, + const unsigned int (*event_map)[PERF_COUNT_HW_MAX], + const unsigned int (*cache_map) + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX], u32 raw_event_mask) +{ + u64 config = event->attr.config; + + switch (event->attr.type) { + case PERF_TYPE_HARDWARE: + return nds32_pmu_map_hw_event(event_map, config); + case PERF_TYPE_HW_CACHE: + return nds32_pmu_map_cache_event(cache_map, config); + case PERF_TYPE_RAW: + return nds32_pmu_map_raw_event(raw_event_mask, config); + } + + return -ENOENT; +} + +static int nds32_spav3_map_event(struct perf_event *event) +{ + return nds32_pmu_map_event(event, &nds32_pfm_perf_map, + &nds32_pfm_perf_cache_map, SOFTWARE_EVENT_MASK); +} + +static inline u32 nds32_pfm_getreset_flags(void) +{ + /* Read overflow status */ + u32 val = __nds32__mfsr(NDS32_SR_PFM_CTL); + u32 old_val = val; + + /* Write overflow bit to clear status, and others keep it 0 */ + u32 ov_flag = PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]; + + __nds32__mtsr(val | ov_flag, NDS32_SR_PFM_CTL); + + return old_val; +} + +static inline int nds32_pfm_has_overflowed(u32 pfm) +{ + u32 ov_flag = PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]; + + return pfm & ov_flag; +} + +static inline int nds32_pfm_counter_has_overflowed(u32 pfm, int idx) +{ + u32 mask = 0; + + switch (idx) { + case 0: + mask = PFM_CTL_OVF[0]; + break; + case 1: + mask = PFM_CTL_OVF[1]; + break; + case 2: + mask = PFM_CTL_OVF[2]; + break; + default: + pr_err("%s index wrong\n", __func__); + break; + } + return pfm & mask; +} + +/* + * Set the next IRQ period, based on the hwc->period_left value. + * To be called with the event disabled in hw: + */ +int nds32_pmu_event_set_period(struct perf_event *event) +{ + struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + s64 left = local64_read(&hwc->period_left); + s64 period = hwc->sample_period; + int ret = 0; + + /* The period may have been changed by PERF_EVENT_IOC_PERIOD */ + if (unlikely(period != hwc->last_period)) + left = period - (hwc->last_period - left); + + if (unlikely(left <= -period)) { + left = period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + + if (unlikely(left <= 0)) { + left += period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + ret = 1; + } + + if (left > (s64)nds32_pmu->max_period) + left = nds32_pmu->max_period; + + /* + * The hw event starts counting from this event offset, + * mark it to be able to extract future "deltas": + */ + local64_set(&hwc->prev_count, (u64)(-left)); + + nds32_pmu->write_counter(event, (u64)(-left) & nds32_pmu->max_period); + + perf_event_update_userpage(event); + + return ret; +} + +static irqreturn_t nds32_pmu_handle_irq(int irq_num, void *dev) +{ + u32 pfm; + struct perf_sample_data data; + struct nds32_pmu *cpu_pmu = (struct nds32_pmu *)dev; + struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events(); + struct pt_regs *regs; + int idx; + /* + * Get and reset the IRQ flags + */ + pfm = nds32_pfm_getreset_flags(); + + /* + * Did an overflow occur? + */ + if (!nds32_pfm_has_overflowed(pfm)) + return IRQ_NONE; + + /* + * Handle the counter(s) overflow(s) + */ + regs = get_irq_regs(); + + nds32_pmu_stop(cpu_pmu); + for (idx = 0; idx < cpu_pmu->num_events; ++idx) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + /* Ignore if we don't have an event. */ + if (!event) + continue; + + /* + * We have a single interrupt for all counters. Check that + * each counter has overflowed before we process it. + */ + if (!nds32_pfm_counter_has_overflowed(pfm, idx)) + continue; + + hwc = &event->hw; + nds32_pmu_event_update(event); + perf_sample_data_init(&data, 0, hwc->last_period); + if (!nds32_pmu_event_set_period(event)) + continue; + + if (perf_event_overflow(event, &data, regs)) + cpu_pmu->disable(event); + } + nds32_pmu_start(cpu_pmu); + /* + * Handle the pending perf events. + * + * Note: this call *must* be run with interrupts disabled. For + * platforms that can have the PMU interrupts raised as an NMI, this + * will not work. + */ + irq_work_run(); + + return IRQ_HANDLED; +} + +static inline int nds32_pfm_counter_valid(struct nds32_pmu *cpu_pmu, int idx) +{ + return ((idx >= 0) && (idx < cpu_pmu->num_events)); +} + +static inline int nds32_pfm_disable_counter(int idx) +{ + unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL); + u32 mask = 0; + + mask = PFM_CTL_EN[idx]; + val &= ~mask; + val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]); + __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL); + return idx; +} + +/* + * Add an event filter to a given event. + */ +static int nds32_pmu_set_event_filter(struct hw_perf_event *event, + struct perf_event_attr *attr) +{ + unsigned long config_base = 0; + int idx = event->idx; + unsigned long no_kernel_tracing = 0; + unsigned long no_user_tracing = 0; + /* If index is -1, do not do anything */ + if (idx == -1) + return 0; + + no_kernel_tracing = PFM_CTL_KS[idx]; + no_user_tracing = PFM_CTL_KU[idx]; + /* + * Default: enable both kernel and user mode tracing. + */ + if (attr->exclude_user) + config_base |= no_user_tracing; + + if (attr->exclude_kernel) + config_base |= no_kernel_tracing; + + /* + * Install the filter into config_base as this is used to + * construct the event type. + */ + event->config_base |= config_base; + return 0; +} + +static inline void nds32_pfm_write_evtsel(int idx, u32 evnum) +{ + u32 offset = 0; + u32 ori_val = __nds32__mfsr(NDS32_SR_PFM_CTL); + u32 ev_mask = 0; + u32 no_kernel_mask = 0; + u32 no_user_mask = 0; + u32 val; + + offset = PFM_CTL_OFFSEL[idx]; + /* Clear previous mode selection, and write new one */ + no_kernel_mask = PFM_CTL_KS[idx]; + no_user_mask = PFM_CTL_KU[idx]; + ori_val &= ~no_kernel_mask; + ori_val &= ~no_user_mask; + if (evnum & no_kernel_mask) + ori_val |= no_kernel_mask; + + if (evnum & no_user_mask) + ori_val |= no_user_mask; + + /* Clear previous event selection */ + ev_mask = PFM_CTL_SEL[idx]; + ori_val &= ~ev_mask; + evnum &= SOFTWARE_EVENT_MASK; + + /* undo the linear mapping */ + evnum = get_converted_evet_hw_num(evnum); + val = ori_val | (evnum << offset); + val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]); + __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL); +} + +static inline int nds32_pfm_enable_counter(int idx) +{ + unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL); + u32 mask = 0; + + mask = PFM_CTL_EN[idx]; + val |= mask; + val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]); + __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL); + return idx; +} + +static inline int nds32_pfm_enable_intens(int idx) +{ + unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL); + u32 mask = 0; + + mask = PFM_CTL_IE[idx]; + val |= mask; + val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]); + __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL); + return idx; +} + +static inline int nds32_pfm_disable_intens(int idx) +{ + unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL); + u32 mask = 0; + + mask = PFM_CTL_IE[idx]; + val &= ~mask; + val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]); + __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL); + return idx; +} + +static int event_requires_mode_exclusion(struct perf_event_attr *attr) +{ + /* Other modes NDS32 does not support */ + return attr->exclude_user || attr->exclude_kernel; +} + +static void nds32_pmu_enable_event(struct perf_event *event) +{ + unsigned long flags; + unsigned int evnum = 0; + struct hw_perf_event *hwc = &event->hw; + struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu); + struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + int idx = hwc->idx; + + if (!nds32_pfm_counter_valid(cpu_pmu, idx)) { + pr_err("CPU enabling wrong pfm counter IRQ enable\n"); + return; + } + + /* + * Enable counter and interrupt, and set the counter to count + * the event that we're interested in. + */ + raw_spin_lock_irqsave(&events->pmu_lock, flags); + + /* + * Disable counter + */ + nds32_pfm_disable_counter(idx); + + /* + * Check whether we need to exclude the counter from certain modes. + */ + if ((!cpu_pmu->set_event_filter || + cpu_pmu->set_event_filter(hwc, &event->attr)) && + event_requires_mode_exclusion(&event->attr)) { + pr_notice + ("NDS32 performance counters do not support mode exclusion\n"); + hwc->config_base = 0; + } + /* Write event */ + evnum = hwc->config_base; + nds32_pfm_write_evtsel(idx, evnum); + + /* + * Enable interrupt for this counter + */ + nds32_pfm_enable_intens(idx); + + /* + * Enable counter + */ + nds32_pfm_enable_counter(idx); + + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static void nds32_pmu_disable_event(struct perf_event *event) +{ + unsigned long flags; + struct hw_perf_event *hwc = &event->hw; + struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu); + struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + int idx = hwc->idx; + + if (!nds32_pfm_counter_valid(cpu_pmu, idx)) { + pr_err("CPU disabling wrong pfm counter IRQ enable %d\n", idx); + return; + } + + /* + * Disable counter and interrupt + */ + raw_spin_lock_irqsave(&events->pmu_lock, flags); + + /* + * Disable counter + */ + nds32_pfm_disable_counter(idx); + + /* + * Disable interrupt for this counter + */ + nds32_pfm_disable_intens(idx); + + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static inline u32 nds32_pmu_read_counter(struct perf_event *event) +{ + struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + u32 count = 0; + + if (!nds32_pfm_counter_valid(cpu_pmu, idx)) { + pr_err("CPU reading wrong counter %d\n", idx); + } else { + switch (idx) { + case PFMC0: + count = __nds32__mfsr(NDS32_SR_PFMC0); + break; + case PFMC1: + count = __nds32__mfsr(NDS32_SR_PFMC1); + break; + case PFMC2: + count = __nds32__mfsr(NDS32_SR_PFMC2); + break; + default: + pr_err + ("%s: CPU has no performance counters %d\n", + __func__, idx); + } + } + return count; +} + +static inline void nds32_pmu_write_counter(struct perf_event *event, u32 value) +{ + struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + if (!nds32_pfm_counter_valid(cpu_pmu, idx)) { + pr_err("CPU writing wrong counter %d\n", idx); + } else { + switch (idx) { + case PFMC0: + __nds32__mtsr_isb(value, NDS32_SR_PFMC0); + break; + case PFMC1: + __nds32__mtsr_isb(value, NDS32_SR_PFMC1); + break; + case PFMC2: + __nds32__mtsr_isb(value, NDS32_SR_PFMC2); + break; + default: + pr_err + ("%s: CPU has no performance counters %d\n", + __func__, idx); + } + } +} + +static int nds32_pmu_get_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + int idx; + struct hw_perf_event *hwc = &event->hw; + /* + * Current implementation maps cycles, instruction count and cache-miss + * to specific counter. + * However, multiple of the 3 counters are able to count these events. + * + * + * SOFTWARE_EVENT_MASK mask for getting event num , + * This is defined by Jia-Rung, you can change the polocies. + * However, do not exceed 8 bits. This is hardware specific. + * The last number is SPAv3_2_SEL_LAST. + */ + unsigned long evtype = hwc->config_base & SOFTWARE_EVENT_MASK; + + idx = get_converted_event_idx(evtype); + /* + * Try to get the counter for correpsonding event + */ + if (evtype == SPAV3_0_SEL_TOTAL_CYCLES) { + if (!test_and_set_bit(idx, cpuc->used_mask)) + return idx; + if (!test_and_set_bit(NDS32_IDX_COUNTER0, cpuc->used_mask)) + return NDS32_IDX_COUNTER0; + if (!test_and_set_bit(NDS32_IDX_COUNTER1, cpuc->used_mask)) + return NDS32_IDX_COUNTER1; + } else if (evtype == SPAV3_1_SEL_COMPLETED_INSTRUCTION) { + if (!test_and_set_bit(idx, cpuc->used_mask)) + return idx; + else if (!test_and_set_bit(NDS32_IDX_COUNTER1, cpuc->used_mask)) + return NDS32_IDX_COUNTER1; + else if (!test_and_set_bit + (NDS32_IDX_CYCLE_COUNTER, cpuc->used_mask)) + return NDS32_IDX_CYCLE_COUNTER; + } else { + if (!test_and_set_bit(idx, cpuc->used_mask)) + return idx; + } + return -EAGAIN; +} + +static void nds32_pmu_start(struct nds32_pmu *cpu_pmu) +{ + unsigned long flags; + unsigned int val; + struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + + raw_spin_lock_irqsave(&events->pmu_lock, flags); + + /* Enable all counters , NDS PFM has 3 counters */ + val = __nds32__mfsr(NDS32_SR_PFM_CTL); + val |= (PFM_CTL_EN[0] | PFM_CTL_EN[1] | PFM_CTL_EN[2]); + val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]); + __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL); + + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static void nds32_pmu_stop(struct nds32_pmu *cpu_pmu) +{ + unsigned long flags; + unsigned int val; + struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + + raw_spin_lock_irqsave(&events->pmu_lock, flags); + + /* Disable all counters , NDS PFM has 3 counters */ + val = __nds32__mfsr(NDS32_SR_PFM_CTL); + val &= ~(PFM_CTL_EN[0] | PFM_CTL_EN[1] | PFM_CTL_EN[2]); + val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]); + __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL); + + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static void nds32_pmu_reset(void *info) +{ + u32 val = 0; + + val |= (PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]); + __nds32__mtsr(val, NDS32_SR_PFM_CTL); + __nds32__mtsr(0, NDS32_SR_PFM_CTL); + __nds32__mtsr(0, NDS32_SR_PFMC0); + __nds32__mtsr(0, NDS32_SR_PFMC1); + __nds32__mtsr(0, NDS32_SR_PFMC2); +} + +static void nds32_pmu_init(struct nds32_pmu *cpu_pmu) +{ + cpu_pmu->handle_irq = nds32_pmu_handle_irq; + cpu_pmu->enable = nds32_pmu_enable_event; + cpu_pmu->disable = nds32_pmu_disable_event; + cpu_pmu->read_counter = nds32_pmu_read_counter; + cpu_pmu->write_counter = nds32_pmu_write_counter; + cpu_pmu->get_event_idx = nds32_pmu_get_event_idx; + cpu_pmu->start = nds32_pmu_start; + cpu_pmu->stop = nds32_pmu_stop; + cpu_pmu->reset = nds32_pmu_reset; + cpu_pmu->max_period = 0xFFFFFFFF; /* Maximum counts */ +}; + +static u32 nds32_read_num_pfm_events(void) +{ + /* NDS32 SPAv3 PMU support 3 counter */ + return 3; +} + +static int device_pmu_init(struct nds32_pmu *cpu_pmu) +{ + nds32_pmu_init(cpu_pmu); + /* + * This name should be devive-specific name, whatever you like :) + * I think "PMU" will be a good generic name. + */ + cpu_pmu->name = "nds32v3-pmu"; + cpu_pmu->map_event = nds32_spav3_map_event; + cpu_pmu->num_events = nds32_read_num_pfm_events(); + cpu_pmu->set_event_filter = nds32_pmu_set_event_filter; + return 0; +} + +/* + * CPU PMU identification and probing. + */ +static int probe_current_pmu(struct nds32_pmu *pmu) +{ + int ret; + + get_cpu(); + ret = -ENODEV; + /* + * If ther are various CPU types with its own PMU, initialize with + * + * the corresponding one + */ + device_pmu_init(pmu); + put_cpu(); + return ret; +} + +static void nds32_pmu_enable(struct pmu *pmu) +{ + struct nds32_pmu *nds32_pmu = to_nds32_pmu(pmu); + struct pmu_hw_events *hw_events = nds32_pmu->get_hw_events(); + int enabled = bitmap_weight(hw_events->used_mask, + nds32_pmu->num_events); + + if (enabled) + nds32_pmu->start(nds32_pmu); +} + +static void nds32_pmu_disable(struct pmu *pmu) +{ + struct nds32_pmu *nds32_pmu = to_nds32_pmu(pmu); + + nds32_pmu->stop(nds32_pmu); +} + +static void nds32_pmu_release_hardware(struct nds32_pmu *nds32_pmu) +{ + nds32_pmu->free_irq(nds32_pmu); + pm_runtime_put_sync(&nds32_pmu->plat_device->dev); +} + +static irqreturn_t nds32_pmu_dispatch_irq(int irq, void *dev) +{ + struct nds32_pmu *nds32_pmu = (struct nds32_pmu *)dev; + int ret; + u64 start_clock, finish_clock; + + start_clock = local_clock(); + ret = nds32_pmu->handle_irq(irq, dev); + finish_clock = local_clock(); + + perf_sample_event_took(finish_clock - start_clock); + return ret; +} + +static int nds32_pmu_reserve_hardware(struct nds32_pmu *nds32_pmu) +{ + int err; + struct platform_device *pmu_device = nds32_pmu->plat_device; + + if (!pmu_device) + return -ENODEV; + + pm_runtime_get_sync(&pmu_device->dev); + err = nds32_pmu->request_irq(nds32_pmu, nds32_pmu_dispatch_irq); + if (err) { + nds32_pmu_release_hardware(nds32_pmu); + return err; + } + + return 0; +} + +static int +validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events, + struct perf_event *event) +{ + struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu); + + if (is_software_event(event)) + return 1; + + if (event->pmu != pmu) + return 0; + + if (event->state < PERF_EVENT_STATE_OFF) + return 1; + + if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec) + return 1; + + return nds32_pmu->get_event_idx(hw_events, event) >= 0; +} + +static int validate_group(struct perf_event *event) +{ + struct perf_event *sibling, *leader = event->group_leader; + struct pmu_hw_events fake_pmu; + DECLARE_BITMAP(fake_used_mask, MAX_COUNTERS); + /* + * Initialize the fake PMU. We only need to populate the + * used_mask for the purposes of validation. + */ + memset(fake_used_mask, 0, sizeof(fake_used_mask)); + + if (!validate_event(event->pmu, &fake_pmu, leader)) + return -EINVAL; + + for_each_sibling_event(sibling, leader) { + if (!validate_event(event->pmu, &fake_pmu, sibling)) + return -EINVAL; + } + + if (!validate_event(event->pmu, &fake_pmu, event)) + return -EINVAL; + + return 0; +} + +static int __hw_perf_event_init(struct perf_event *event) +{ + struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int mapping; + + mapping = nds32_pmu->map_event(event); + + if (mapping < 0) { + pr_debug("event %x:%llx not supported\n", event->attr.type, + event->attr.config); + return mapping; + } + + /* + * We don't assign an index until we actually place the event onto + * hardware. Use -1 to signify that we haven't decided where to put it + * yet. For SMP systems, each core has it's own PMU so we can't do any + * clever allocation or constraints checking at this point. + */ + hwc->idx = -1; + hwc->config_base = 0; + hwc->config = 0; + hwc->event_base = 0; + + /* + * Check whether we need to exclude the counter from certain modes. + */ + if ((!nds32_pmu->set_event_filter || + nds32_pmu->set_event_filter(hwc, &event->attr)) && + event_requires_mode_exclusion(&event->attr)) { + pr_debug + ("NDS performance counters do not support mode exclusion\n"); + return -EOPNOTSUPP; + } + + /* + * Store the event encoding into the config_base field. + */ + hwc->config_base |= (unsigned long)mapping; + + if (!hwc->sample_period) { + /* + * For non-sampling runs, limit the sample_period to half + * of the counter width. That way, the new counter value + * is far less likely to overtake the previous one unless + * you have some serious IRQ latency issues. + */ + hwc->sample_period = nds32_pmu->max_period >> 1; + hwc->last_period = hwc->sample_period; + local64_set(&hwc->period_left, hwc->sample_period); + } + + if (event->group_leader != event) { + if (validate_group(event) != 0) + return -EINVAL; + } + + return 0; +} + +static int nds32_pmu_event_init(struct perf_event *event) +{ + struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu); + int err = 0; + atomic_t *active_events = &nds32_pmu->active_events; + + /* does not support taken branch sampling */ + if (has_branch_stack(event)) + return -EOPNOTSUPP; + + if (nds32_pmu->map_event(event) == -ENOENT) + return -ENOENT; + + if (!atomic_inc_not_zero(active_events)) { + if (atomic_read(active_events) == 0) { + /* Register irq handler */ + err = nds32_pmu_reserve_hardware(nds32_pmu); + } + + if (!err) + atomic_inc(active_events); + } + + if (err) + return err; + + err = __hw_perf_event_init(event); + + return err; +} + +static void nds32_start(struct perf_event *event, int flags) +{ + struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + /* + * NDS pmu always has to reprogram the period, so ignore + * PERF_EF_RELOAD, see the comment below. + */ + if (flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + + hwc->state = 0; + /* Set the period for the event. */ + nds32_pmu_event_set_period(event); + + nds32_pmu->enable(event); +} + +static int nds32_pmu_add(struct perf_event *event, int flags) +{ + struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu); + struct pmu_hw_events *hw_events = nds32_pmu->get_hw_events(); + struct hw_perf_event *hwc = &event->hw; + int idx; + int err = 0; + + perf_pmu_disable(event->pmu); + + /* If we don't have a space for the counter then finish early. */ + idx = nds32_pmu->get_event_idx(hw_events, event); + if (idx < 0) { + err = idx; + goto out; + } + + /* + * If there is an event in the counter we are going to use then make + * sure it is disabled. + */ + event->hw.idx = idx; + nds32_pmu->disable(event); + hw_events->events[idx] = event; + + hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + if (flags & PERF_EF_START) + nds32_start(event, PERF_EF_RELOAD); + + /* Propagate our changes to the userspace mapping. */ + perf_event_update_userpage(event); + +out: + perf_pmu_enable(event->pmu); + return err; +} + +u64 nds32_pmu_event_update(struct perf_event *event) +{ + struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + u64 delta, prev_raw_count, new_raw_count; + +again: + prev_raw_count = local64_read(&hwc->prev_count); + new_raw_count = nds32_pmu->read_counter(event); + + if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count) != prev_raw_count) { + goto again; + } + /* + * Whether overflow or not, "unsigned substraction" + * will always get their delta + */ + delta = (new_raw_count - prev_raw_count) & nds32_pmu->max_period; + + local64_add(delta, &event->count); + local64_sub(delta, &hwc->period_left); + + return new_raw_count; +} + +static void nds32_stop(struct perf_event *event, int flags) +{ + struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + /* + * NDS pmu always has to update the counter, so ignore + * PERF_EF_UPDATE, see comments in nds32_start(). + */ + if (!(hwc->state & PERF_HES_STOPPED)) { + nds32_pmu->disable(event); + nds32_pmu_event_update(event); + hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; + } +} + +static void nds32_pmu_del(struct perf_event *event, int flags) +{ + struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu); + struct pmu_hw_events *hw_events = nds32_pmu->get_hw_events(); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + nds32_stop(event, PERF_EF_UPDATE); + hw_events->events[idx] = NULL; + clear_bit(idx, hw_events->used_mask); + + perf_event_update_userpage(event); +} + +static void nds32_pmu_read(struct perf_event *event) +{ + nds32_pmu_event_update(event); +} + +/* Please refer to SPAv3 for more hardware specific details */ +PMU_FORMAT_ATTR(event, "config:0-63"); + +static struct attribute *nds32_arch_formats_attr[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group nds32_pmu_format_group = { + .name = "format", + .attrs = nds32_arch_formats_attr, +}; + +static ssize_t nds32_pmu_cpumask_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return 0; +} + +static DEVICE_ATTR(cpus, 0444, nds32_pmu_cpumask_show, NULL); + +static struct attribute *nds32_pmu_common_attrs[] = { + &dev_attr_cpus.attr, + NULL, +}; + +static struct attribute_group nds32_pmu_common_group = { + .attrs = nds32_pmu_common_attrs, +}; + +static const struct attribute_group *nds32_pmu_attr_groups[] = { + &nds32_pmu_format_group, + &nds32_pmu_common_group, + NULL, +}; + +static void nds32_init(struct nds32_pmu *nds32_pmu) +{ + atomic_set(&nds32_pmu->active_events, 0); + + nds32_pmu->pmu = (struct pmu) { + .pmu_enable = nds32_pmu_enable, + .pmu_disable = nds32_pmu_disable, + .attr_groups = nds32_pmu_attr_groups, + .event_init = nds32_pmu_event_init, + .add = nds32_pmu_add, + .del = nds32_pmu_del, + .start = nds32_start, + .stop = nds32_stop, + .read = nds32_pmu_read, + }; +} + +int nds32_pmu_register(struct nds32_pmu *nds32_pmu, int type) +{ + nds32_init(nds32_pmu); + pm_runtime_enable(&nds32_pmu->plat_device->dev); + pr_info("enabled with %s PMU driver, %d counters available\n", + nds32_pmu->name, nds32_pmu->num_events); + return perf_pmu_register(&nds32_pmu->pmu, nds32_pmu->name, type); +} + +static struct pmu_hw_events *cpu_pmu_get_cpu_events(void) +{ + return this_cpu_ptr(&cpu_hw_events); +} + +static int cpu_pmu_request_irq(struct nds32_pmu *cpu_pmu, irq_handler_t handler) +{ + int err, irq, irqs; + struct platform_device *pmu_device = cpu_pmu->plat_device; + + if (!pmu_device) + return -ENODEV; + + irqs = min(pmu_device->num_resources, num_possible_cpus()); + if (irqs < 1) { + pr_err("no irqs for PMUs defined\n"); + return -ENODEV; + } + + irq = platform_get_irq(pmu_device, 0); + err = request_irq(irq, handler, IRQF_NOBALANCING, "nds32-pfm", + cpu_pmu); + if (err) { + pr_err("unable to request IRQ%d for NDS PMU counters\n", + irq); + return err; + } + return 0; +} + +static void cpu_pmu_free_irq(struct nds32_pmu *cpu_pmu) +{ + int irq; + struct platform_device *pmu_device = cpu_pmu->plat_device; + + irq = platform_get_irq(pmu_device, 0); + if (irq >= 0) + free_irq(irq, cpu_pmu); +} + +static void cpu_pmu_init(struct nds32_pmu *cpu_pmu) +{ + int cpu; + struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu); + + raw_spin_lock_init(&events->pmu_lock); + + cpu_pmu->get_hw_events = cpu_pmu_get_cpu_events; + cpu_pmu->request_irq = cpu_pmu_request_irq; + cpu_pmu->free_irq = cpu_pmu_free_irq; + + /* Ensure the PMU has sane values out of reset. */ + if (cpu_pmu->reset) + on_each_cpu(cpu_pmu->reset, cpu_pmu, 1); +} + +const static struct of_device_id cpu_pmu_of_device_ids[] = { + {.compatible = "andestech,nds32v3-pmu", + .data = device_pmu_init}, + {}, +}; + +static int cpu_pmu_device_probe(struct platform_device *pdev) +{ + const struct of_device_id *of_id; + int (*init_fn)(struct nds32_pmu *nds32_pmu); + struct device_node *node = pdev->dev.of_node; + struct nds32_pmu *pmu; + int ret = -ENODEV; + + if (cpu_pmu) { + pr_notice("[perf] attempt to register multiple PMU devices!\n"); + return -ENOSPC; + } + + pmu = kzalloc(sizeof(*pmu), GFP_KERNEL); + if (!pmu) + return -ENOMEM; + + of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node); + if (node && of_id) { + init_fn = of_id->data; + ret = init_fn(pmu); + } else { + ret = probe_current_pmu(pmu); + } + + if (ret) { + pr_notice("[perf] failed to probe PMU!\n"); + goto out_free; + } + + cpu_pmu = pmu; + cpu_pmu->plat_device = pdev; + cpu_pmu_init(cpu_pmu); + ret = nds32_pmu_register(cpu_pmu, PERF_TYPE_RAW); + + if (!ret) + return 0; + +out_free: + pr_notice("[perf] failed to register PMU devices!\n"); + kfree(pmu); + return ret; +} + +static struct platform_driver cpu_pmu_driver = { + .driver = { + .name = "nds32-pfm", + .of_match_table = cpu_pmu_of_device_ids, + }, + .probe = cpu_pmu_device_probe, + .id_table = cpu_pmu_plat_device_ids, +}; + +static int __init register_pmu_driver(void) +{ + int err = 0; + + err = platform_driver_register(&cpu_pmu_driver); + if (err) + pr_notice("[perf] PMU initialization failed\n"); + else + pr_notice("[perf] PMU initialization done\n"); + + return err; +} + +device_initcall(register_pmu_driver); + +/* + * References: arch/nds32/kernel/traps.c:__dump() + * You will need to know the NDS ABI first. + */ +static int unwind_frame_kernel(struct stackframe *frame) +{ + int graph = 0; +#ifdef CONFIG_FRAME_POINTER + /* 0x3 means misalignment */ + if (!kstack_end((void *)frame->fp) && + !((unsigned long)frame->fp & 0x3) && + ((unsigned long)frame->fp >= TASK_SIZE)) { + /* + * The array index is based on the ABI, the below graph + * illustrate the reasons. + * Function call procedure: "smw" and "lmw" will always + * update SP and FP for you automatically. + * + * Stack Relative Address + * | | 0 + * ---- + * |LP| <-- SP(before smw) <-- FP(after smw) -1 + * ---- + * |FP| -2 + * ---- + * | | <-- SP(after smw) -3 + */ + frame->lp = ((unsigned long *)frame->fp)[-1]; + frame->fp = ((unsigned long *)frame->fp)[FP_OFFSET]; + /* make sure CONFIG_FUNCTION_GRAPH_TRACER is turned on */ + if (__kernel_text_address(frame->lp)) + frame->lp = ftrace_graph_ret_addr + (NULL, &graph, frame->lp, NULL); + + return 0; + } else { + return -EPERM; + } +#else + /* + * You can refer to arch/nds32/kernel/traps.c:__dump() + * Treat "sp" as "fp", but the "sp" is one frame ahead of "fp". + * And, the "sp" is not always correct. + * + * Stack Relative Address + * | | 0 + * ---- + * |LP| <-- SP(before smw) -1 + * ---- + * | | <-- SP(after smw) -2 + * ---- + */ + if (!kstack_end((void *)frame->sp)) { + frame->lp = ((unsigned long *)frame->sp)[1]; + /* TODO: How to deal with the value in first + * "sp" is not correct? + */ + if (__kernel_text_address(frame->lp)) + frame->lp = ftrace_graph_ret_addr + (tsk, &graph, frame->lp, NULL); + + frame->sp = ((unsigned long *)frame->sp) + 1; + + return 0; + } else { + return -EPERM; + } +#endif +} + +static void notrace +walk_stackframe(struct stackframe *frame, + int (*fn_record)(struct stackframe *, void *), + void *data) +{ + while (1) { + int ret; + + if (fn_record(frame, data)) + break; + + ret = unwind_frame_kernel(frame); + if (ret < 0) + break; + } +} + +/* + * Gets called by walk_stackframe() for every stackframe. This will be called + * whist unwinding the stackframe and is like a subroutine return so we use + * the PC. + */ +static int callchain_trace(struct stackframe *fr, void *data) +{ + struct perf_callchain_entry_ctx *entry = data; + + perf_callchain_store(entry, fr->lp); + return 0; +} + +/* + * Get the return address for a single stackframe and return a pointer to the + * next frame tail. + */ +static unsigned long +user_backtrace(struct perf_callchain_entry_ctx *entry, unsigned long fp) +{ + struct frame_tail buftail; + unsigned long lp = 0; + unsigned long *user_frame_tail = + (unsigned long *)(fp - (unsigned long)sizeof(buftail)); + + /* Check accessibility of one struct frame_tail beyond */ + if (!access_ok(VERIFY_READ, user_frame_tail, sizeof(buftail))) + return 0; + if (__copy_from_user_inatomic + (&buftail, user_frame_tail, sizeof(buftail))) + return 0; + + /* + * Refer to unwind_frame_kernel() for more illurstration + */ + lp = buftail.stack_lp; /* ((unsigned long *)fp)[-1] */ + fp = buftail.stack_fp; /* ((unsigned long *)fp)[FP_OFFSET] */ + perf_callchain_store(entry, lp); + return fp; +} + +static unsigned long +user_backtrace_opt_size(struct perf_callchain_entry_ctx *entry, + unsigned long fp) +{ + struct frame_tail_opt_size buftail; + unsigned long lp = 0; + + unsigned long *user_frame_tail = + (unsigned long *)(fp - (unsigned long)sizeof(buftail)); + + /* Check accessibility of one struct frame_tail beyond */ + if (!access_ok(VERIFY_READ, user_frame_tail, sizeof(buftail))) + return 0; + if (__copy_from_user_inatomic + (&buftail, user_frame_tail, sizeof(buftail))) + return 0; + + /* + * Refer to unwind_frame_kernel() for more illurstration + */ + lp = buftail.stack_lp; /* ((unsigned long *)fp)[-1] */ + fp = buftail.stack_fp; /* ((unsigned long *)fp)[FP_OFFSET] */ + + perf_callchain_store(entry, lp); + return fp; +} + +/* + * This will be called when the target is in user mode + * This function will only be called when we use + * "PERF_SAMPLE_CALLCHAIN" in + * kernel/events/core.c:perf_prepare_sample() + * + * How to trigger perf_callchain_[user/kernel] : + * $ perf record -e cpu-clock --call-graph fp ./program + * $ perf report --call-graph + */ +unsigned long leaf_fp; +void +perf_callchain_user(struct perf_callchain_entry_ctx *entry, + struct pt_regs *regs) +{ + unsigned long fp = 0; + unsigned long gp = 0; + unsigned long lp = 0; + unsigned long sp = 0; + unsigned long *user_frame_tail; + + leaf_fp = 0; + + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + /* We don't support guest os callchain now */ + return; + } + + perf_callchain_store(entry, regs->ipc); + fp = regs->fp; + gp = regs->gp; + lp = regs->lp; + sp = regs->sp; + if (entry->nr < PERF_MAX_STACK_DEPTH && + (unsigned long)fp && !((unsigned long)fp & 0x7) && fp > sp) { + user_frame_tail = + (unsigned long *)(fp - (unsigned long)sizeof(fp)); + + if (!access_ok(VERIFY_READ, user_frame_tail, sizeof(fp))) + return; + + if (__copy_from_user_inatomic + (&leaf_fp, user_frame_tail, sizeof(fp))) + return; + + if (leaf_fp == lp) { + /* + * Maybe this is non leaf function + * with optimize for size, + * or maybe this is the function + * with optimize for size + */ + struct frame_tail buftail; + + user_frame_tail = + (unsigned long *)(fp - + (unsigned long)sizeof(buftail)); + + if (!access_ok + (VERIFY_READ, user_frame_tail, sizeof(buftail))) + return; + + if (__copy_from_user_inatomic + (&buftail, user_frame_tail, sizeof(buftail))) + return; + + if (buftail.stack_fp == gp) { + /* non leaf function with optimize + * for size condition + */ + struct frame_tail_opt_size buftail_opt_size; + + user_frame_tail = + (unsigned long *)(fp - (unsigned long) + sizeof(buftail_opt_size)); + + if (!access_ok(VERIFY_READ, user_frame_tail, + sizeof(buftail_opt_size))) + return; + + if (__copy_from_user_inatomic + (&buftail_opt_size, user_frame_tail, + sizeof(buftail_opt_size))) + return; + + perf_callchain_store(entry, lp); + fp = buftail_opt_size.stack_fp; + + while ((entry->nr < PERF_MAX_STACK_DEPTH) && + (unsigned long)fp && + !((unsigned long)fp & 0x7) && + fp > sp) { + sp = fp; + fp = user_backtrace_opt_size(entry, fp); + } + + } else { + /* this is the function + * without optimize for size + */ + fp = buftail.stack_fp; + perf_callchain_store(entry, lp); + while ((entry->nr < PERF_MAX_STACK_DEPTH) && + (unsigned long)fp && + !((unsigned long)fp & 0x7) && + fp > sp) { + sp = fp; + fp = user_backtrace(entry, fp); + } + } + } else { + /* this is leaf function */ + fp = leaf_fp; + perf_callchain_store(entry, lp); + + /* previous function callcahin */ + while ((entry->nr < PERF_MAX_STACK_DEPTH) && + (unsigned long)fp && + !((unsigned long)fp & 0x7) && fp > sp) { + sp = fp; + fp = user_backtrace(entry, fp); + } + } + return; + } +} + +/* This will be called when the target is in kernel mode */ +void +perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, + struct pt_regs *regs) +{ + struct stackframe fr; + + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + /* We don't support guest os callchain now */ + return; + } + fr.fp = regs->fp; + fr.lp = regs->lp; + fr.sp = regs->sp; + walk_stackframe(&fr, callchain_trace, entry); +} + +unsigned long perf_instruction_pointer(struct pt_regs *regs) +{ + /* However, NDS32 does not support virtualization */ + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) + return perf_guest_cbs->get_guest_ip(); + + return instruction_pointer(regs); +} + +unsigned long perf_misc_flags(struct pt_regs *regs) +{ + int misc = 0; + + /* However, NDS32 does not support virtualization */ + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (perf_guest_cbs->is_user_mode()) + misc |= PERF_RECORD_MISC_GUEST_USER; + else + misc |= PERF_RECORD_MISC_GUEST_KERNEL; + } else { + if (user_mode(regs)) + misc |= PERF_RECORD_MISC_USER; + else + misc |= PERF_RECORD_MISC_KERNEL; + } + + return misc; +} diff --git a/arch/nds32/kernel/pm.c b/arch/nds32/kernel/pm.c new file mode 100644 index 000000000000..ffa8040d8be7 --- /dev/null +++ b/arch/nds32/kernel/pm.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2008-2017 Andes Technology Corporation + +#include <linux/init.h> +#include <linux/suspend.h> +#include <linux/device.h> +#include <linux/printk.h> +#include <asm/suspend.h> +#include <nds32_intrinsic.h> + +unsigned int resume_addr; +unsigned int *phy_addr_sp_tmp; + +static void nds32_suspend2ram(void) +{ + pgd_t *pgdv; + pud_t *pudv; + pmd_t *pmdv; + pte_t *ptev; + + pgdv = (pgd_t *)__va((__nds32__mfsr(NDS32_SR_L1_PPTB) & + L1_PPTB_mskBASE)) + pgd_index((unsigned int)cpu_resume); + + pudv = pud_offset(pgdv, (unsigned int)cpu_resume); + pmdv = pmd_offset(pudv, (unsigned int)cpu_resume); + ptev = pte_offset_map(pmdv, (unsigned int)cpu_resume); + + resume_addr = ((*ptev) & TLB_DATA_mskPPN) + | ((unsigned int)cpu_resume & 0x00000fff); + + suspend2ram(); +} + +static void nds32_suspend_cpu(void) +{ + while (!(__nds32__mfsr(NDS32_SR_INT_PEND) & wake_mask)) + __asm__ volatile ("standby no_wake_grant\n\t"); +} + +static int nds32_pm_valid(suspend_state_t state) +{ + switch (state) { + case PM_SUSPEND_ON: + case PM_SUSPEND_STANDBY: + case PM_SUSPEND_MEM: + return 1; + default: + return 0; + } +} + +static int nds32_pm_enter(suspend_state_t state) +{ + pr_debug("%s:state:%d\n", __func__, state); + switch (state) { + case PM_SUSPEND_STANDBY: + nds32_suspend_cpu(); + return 0; + case PM_SUSPEND_MEM: + nds32_suspend2ram(); + return 0; + default: + return -EINVAL; + } +} + +static const struct platform_suspend_ops nds32_pm_ops = { + .valid = nds32_pm_valid, + .enter = nds32_pm_enter, +}; + +static int __init nds32_pm_init(void) +{ + pr_debug("Enter %s\n", __func__); + suspend_set_ops(&nds32_pm_ops); + return 0; +} +late_initcall(nds32_pm_init); diff --git a/arch/nds32/kernel/process.c b/arch/nds32/kernel/process.c index 65fda986e55f..ab7ab46234b1 100644 --- a/arch/nds32/kernel/process.c +++ b/arch/nds32/kernel/process.c @@ -9,15 +9,16 @@ #include <linux/uaccess.h> #include <asm/elf.h> #include <asm/proc-fns.h> +#include <asm/fpu.h> #include <linux/ptrace.h> #include <linux/reboot.h> -extern void setup_mm_for_reboot(char mode); -#ifdef CONFIG_PROC_FS -struct proc_dir_entry *proc_dir_cpu; -EXPORT_SYMBOL(proc_dir_cpu); +#if IS_ENABLED(CONFIG_LAZY_FPU) +struct task_struct *last_task_used_math; #endif +extern void setup_mm_for_reboot(char mode); + extern inline void arch_reset(char mode) { if (mode == 's') { @@ -125,15 +126,31 @@ void show_regs(struct pt_regs *regs) EXPORT_SYMBOL(show_regs); +void exit_thread(struct task_struct *tsk) +{ +#if defined(CONFIG_FPU) && defined(CONFIG_LAZY_FPU) + if (last_task_used_math == tsk) + last_task_used_math = NULL; +#endif +} + void flush_thread(void) { +#if defined(CONFIG_FPU) + clear_fpu(task_pt_regs(current)); + clear_used_math(); +# ifdef CONFIG_LAZY_FPU + if (last_task_used_math == current) + last_task_used_math = NULL; +# endif +#endif } DEFINE_PER_CPU(struct task_struct *, __entry_task); asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); int copy_thread(unsigned long clone_flags, unsigned long stack_start, - unsigned long stk_sz, struct task_struct *p) + unsigned long stk_sz, struct task_struct *p) { struct pt_regs *childregs = task_pt_regs(p); @@ -159,6 +176,22 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, p->thread.cpu_context.pc = (unsigned long)ret_from_fork; p->thread.cpu_context.sp = (unsigned long)childregs; +#if IS_ENABLED(CONFIG_FPU) + if (used_math()) { +# if !IS_ENABLED(CONFIG_LAZY_FPU) + unlazy_fpu(current); +# else + preempt_disable(); + if (last_task_used_math == current) + save_fpu(current); + preempt_enable(); +# endif + p->thread.fpu = current->thread.fpu; + clear_fpu(task_pt_regs(p)); + set_stopped_child_used_math(p); + } +#endif + #ifdef CONFIG_HWZOL childregs->lb = 0; childregs->le = 0; @@ -168,12 +201,33 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, return 0; } +#if IS_ENABLED(CONFIG_FPU) +struct task_struct *_switch_fpu(struct task_struct *prev, struct task_struct *next) +{ +#if !IS_ENABLED(CONFIG_LAZY_FPU) + unlazy_fpu(prev); +#endif + if (!(next->flags & PF_KTHREAD)) + clear_fpu(task_pt_regs(next)); + return prev; +} +#endif + /* * fill in the fpe structure for a core dump... */ int dump_fpu(struct pt_regs *regs, elf_fpregset_t * fpu) { int fpvalid = 0; +#if IS_ENABLED(CONFIG_FPU) + struct task_struct *tsk = current; + + fpvalid = tsk_used_math(tsk); + if (fpvalid) { + lose_fpu(); + memcpy(fpu, &tsk->thread.fpu, sizeof(*fpu)); + } +#endif return fpvalid; } diff --git a/arch/nds32/kernel/setup.c b/arch/nds32/kernel/setup.c index eacc79024879..31d29d92478e 100644 --- a/arch/nds32/kernel/setup.c +++ b/arch/nds32/kernel/setup.c @@ -15,6 +15,7 @@ #include <asm/proc-fns.h> #include <asm/cache_info.h> #include <asm/elf.h> +#include <asm/fpu.h> #include <nds32_intrinsic.h> #define HWCAP_MFUSR_PC 0x000001 @@ -38,8 +39,10 @@ #define HWCAP_FPU_DP 0x040000 #define HWCAP_V2 0x080000 #define HWCAP_DX_REGS 0x100000 +#define HWCAP_HWPRE 0x200000 unsigned long cpu_id, cpu_rev, cpu_cfgid; +bool has_fpu = false; char cpu_series; char *endianness = NULL; @@ -70,8 +73,10 @@ static const char *hwcap_str[] = { "div", "mac", "l2c", - "dx_regs", + "fpu_dp", "v2", + "dx_regs", + "hw_pre", NULL, }; @@ -136,6 +141,11 @@ static void __init dump_cpu_info(int cpu) (aliasing_num - 1) << PAGE_SHIFT; } #endif +#ifdef CONFIG_FPU + /* Disable fpu and enable when it is used. */ + if (has_fpu) + disable_fpu(); +#endif } static void __init setup_cpuinfo(void) @@ -180,9 +190,10 @@ static void __init setup_cpuinfo(void) if (cpu_cfgid & 0x0004) elf_hwcap |= HWCAP_EXT2; - if (cpu_cfgid & 0x0008) + if (cpu_cfgid & 0x0008) { elf_hwcap |= HWCAP_FPU; - + has_fpu = true; + } if (cpu_cfgid & 0x0010) elf_hwcap |= HWCAP_STRING; @@ -212,6 +223,11 @@ static void __init setup_cpuinfo(void) if (__nds32__mfsr(NDS32_SR_MSC_CFG) & MSC_CFG_mskL2C) elf_hwcap |= HWCAP_L2C; +#ifdef CONFIG_HW_PRE + if (__nds32__mfsr(NDS32_SR_MISC_CTL) & MISC_CTL_makHWPRE_EN) + elf_hwcap |= HWCAP_HWPRE; +#endif + tmp = __nds32__mfsr(NDS32_SR_CACHE_CTL); if (!IS_ENABLED(CONFIG_CPU_DCACHE_DISABLE)) tmp |= CACHE_CTL_mskDC_EN; diff --git a/arch/nds32/kernel/signal.c b/arch/nds32/kernel/signal.c index 5d01f6e33cb8..5b5be082cfa4 100644 --- a/arch/nds32/kernel/signal.c +++ b/arch/nds32/kernel/signal.c @@ -12,6 +12,7 @@ #include <asm/cacheflush.h> #include <asm/ucontext.h> #include <asm/unistd.h> +#include <asm/fpu.h> #include <asm/ptrace.h> #include <asm/vdso.h> @@ -20,6 +21,60 @@ struct rt_sigframe { struct siginfo info; struct ucontext uc; }; +#if IS_ENABLED(CONFIG_FPU) +static inline int restore_sigcontext_fpu(struct pt_regs *regs, + struct sigcontext __user *sc) +{ + struct task_struct *tsk = current; + unsigned long used_math_flag; + int ret = 0; + + clear_used_math(); + __get_user_error(used_math_flag, &sc->used_math_flag, ret); + + if (!used_math_flag) + return 0; + set_used_math(); + +#if IS_ENABLED(CONFIG_LAZY_FPU) + preempt_disable(); + if (current == last_task_used_math) { + last_task_used_math = NULL; + disable_ptreg_fpu(regs); + } + preempt_enable(); +#else + clear_fpu(regs); +#endif + + return __copy_from_user(&tsk->thread.fpu, &sc->fpu, + sizeof(struct fpu_struct)); +} + +static inline int setup_sigcontext_fpu(struct pt_regs *regs, + struct sigcontext __user *sc) +{ + struct task_struct *tsk = current; + int ret = 0; + + __put_user_error(used_math(), &sc->used_math_flag, ret); + + if (!used_math()) + return ret; + + preempt_disable(); +#if IS_ENABLED(CONFIG_LAZY_FPU) + if (last_task_used_math == tsk) + save_fpu(last_task_used_math); +#else + unlazy_fpu(tsk); +#endif + ret = __copy_to_user(&sc->fpu, &tsk->thread.fpu, + sizeof(struct fpu_struct)); + preempt_enable(); + return ret; +} +#endif static int restore_sigframe(struct pt_regs *regs, struct rt_sigframe __user * sf) @@ -69,7 +124,9 @@ static int restore_sigframe(struct pt_regs *regs, __get_user_error(regs->le, &sf->uc.uc_mcontext.zol.nds32_le, err); __get_user_error(regs->lb, &sf->uc.uc_mcontext.zol.nds32_lb, err); #endif - +#if IS_ENABLED(CONFIG_FPU) + err |= restore_sigcontext_fpu(regs, &sf->uc.uc_mcontext); +#endif /* * Avoid sys_rt_sigreturn() restarting. */ @@ -153,6 +210,9 @@ setup_sigframe(struct rt_sigframe __user * sf, struct pt_regs *regs, __put_user_error(regs->le, &sf->uc.uc_mcontext.zol.nds32_le, err); __put_user_error(regs->lb, &sf->uc.uc_mcontext.zol.nds32_lb, err); #endif +#if IS_ENABLED(CONFIG_FPU) + err |= setup_sigcontext_fpu(regs, &sf->uc.uc_mcontext); +#endif __put_user_error(current->thread.trap_no, &sf->uc.uc_mcontext.trap_no, err); diff --git a/arch/nds32/kernel/sleep.S b/arch/nds32/kernel/sleep.S new file mode 100644 index 000000000000..ca4e61f3656f --- /dev/null +++ b/arch/nds32/kernel/sleep.S @@ -0,0 +1,131 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2017 Andes Technology Corporation */ + +#include <asm/memory.h> + +.data +.global sp_tmp +sp_tmp: +.long + +.text +.globl suspend2ram +.globl cpu_resume + +suspend2ram: + pushm $r0, $r31 +#if defined(CONFIG_HWZOL) + mfusr $r0, $lc + mfusr $r1, $le + mfusr $r2, $lb +#endif + mfsr $r3, $mr0 + mfsr $r4, $mr1 + mfsr $r5, $mr4 + mfsr $r6, $mr6 + mfsr $r7, $mr7 + mfsr $r8, $mr8 + mfsr $r9, $ir0 + mfsr $r10, $ir1 + mfsr $r11, $ir2 + mfsr $r12, $ir3 + mfsr $r13, $ir9 + mfsr $r14, $ir10 + mfsr $r15, $ir12 + mfsr $r16, $ir13 + mfsr $r17, $ir14 + mfsr $r18, $ir15 + pushm $r0, $r19 +#if defined(CONFIG_FPU) + jal store_fpu_for_suspend +#endif + tlbop FlushAll + isb + + // transfer $sp from va to pa + sethi $r0, hi20(PAGE_OFFSET) + ori $r0, $r0, lo12(PAGE_OFFSET) + movi $r2, PHYS_OFFSET + sub $r1, $sp, $r0 + add $r2, $r1, $r2 + + // store pa($sp) to sp_tmp + sethi $r1, hi20(sp_tmp) + swi $r2, [$r1 + lo12(sp_tmp)] + + pushm $r16, $r25 + pushm $r29, $r30 +#ifdef CONFIG_CACHE_L2 + jal dcache_wb_all_level +#else + jal cpu_dcache_wb_all +#endif + popm $r29, $r30 + popm $r16, $r25 + + // get wake_mask and loop in standby + la $r1, wake_mask + lwi $r1, [$r1] +self_loop: + standby wake_grant + mfsr $r2, $ir15 + and $r2, $r1, $r2 + beqz $r2, self_loop + + // set ipc to resume address + la $r1, resume_addr + lwi $r1, [$r1] + mtsr $r1, $ipc + isb + + // reset psw, turn off the address translation + li $r2, 0x7000a + mtsr $r2, $ipsw + isb + + iret +cpu_resume: + // translate the address of sp_tmp variable to pa + la $r1, sp_tmp + sethi $r0, hi20(PAGE_OFFSET) + ori $r0, $r0, lo12(PAGE_OFFSET) + movi $r2, PHYS_OFFSET + sub $r1, $r1, $r0 + add $r1, $r1, $r2 + + // access the sp_tmp to get stack pointer + lwi $sp, [$r1] + + popm $r0, $r19 +#if defined(CONFIG_HWZOL) + mtusr $r0, $lb + mtusr $r1, $lc + mtusr $r2, $le +#endif + mtsr $r3, $mr0 + mtsr $r4, $mr1 + mtsr $r5, $mr4 + mtsr $r6, $mr6 + mtsr $r7, $mr7 + mtsr $r8, $mr8 + // set original psw to ipsw + mtsr $r9, $ir1 + + mtsr $r11, $ir2 + mtsr $r12, $ir3 + + // set ipc to RR + la $r13, RR + mtsr $r13, $ir9 + + mtsr $r14, $ir10 + mtsr $r15, $ir12 + mtsr $r16, $ir13 + mtsr $r17, $ir14 + mtsr $r18, $ir15 + popm $r0, $r31 + + isb + iret +RR: + ret diff --git a/arch/nds32/kernel/sys_nds32.c b/arch/nds32/kernel/sys_nds32.c index 9de93ab4c52b..0835277636ce 100644 --- a/arch/nds32/kernel/sys_nds32.c +++ b/arch/nds32/kernel/sys_nds32.c @@ -6,6 +6,8 @@ #include <asm/cachectl.h> #include <asm/proc-fns.h> +#include <asm/udftrap.h> +#include <asm/fpu.h> SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, @@ -48,3 +50,33 @@ SYSCALL_DEFINE3(cacheflush, unsigned int, start, unsigned int, end, int, cache) return 0; } + +SYSCALL_DEFINE1(udftrap, int, option) +{ +#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC) + int old_udftrap; + + if (!used_math()) { + load_fpu(&init_fpuregs); + current->thread.fpu.UDF_trap = init_fpuregs.UDF_trap; + set_used_math(); + } + + old_udftrap = current->thread.fpu.UDF_trap; + switch (option) { + case DISABLE_UDFTRAP: + current->thread.fpu.UDF_trap = 0; + break; + case ENABLE_UDFTRAP: + current->thread.fpu.UDF_trap = FPCSR_mskUDFE; + break; + case GET_UDFTRAP: + break; + default: + return -EINVAL; + } + return old_udftrap; +#else + return -ENOTSUPP; +#endif +} diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c index 1496aab48998..5aa7c17da27a 100644 --- a/arch/nds32/kernel/traps.c +++ b/arch/nds32/kernel/traps.c @@ -12,6 +12,7 @@ #include <asm/proc-fns.h> #include <asm/unistd.h> +#include <asm/fpu.h> #include <linux/ptrace.h> #include <nds32_intrinsic.h> @@ -357,6 +358,21 @@ void do_dispatch_general(unsigned long entry, unsigned long addr, } else if (type == ETYPE_RESERVED_INSTRUCTION) { /* Reserved instruction */ do_revinsn(regs); + } else if (type == ETYPE_COPROCESSOR) { + /* Coprocessor */ +#if IS_ENABLED(CONFIG_FPU) + unsigned int fucop_exist = __nds32__mfsr(NDS32_SR_FUCOP_EXIST); + unsigned int cpid = ((itype & ITYPE_mskCPID) >> ITYPE_offCPID); + + if ((cpid == FPU_CPID) && + (fucop_exist & FUCOP_EXIST_mskCP0ISFPU)) { + unsigned int subtype = (itype & ITYPE_mskSTYPE); + + if (true == do_fpu_exception(subtype, regs)) + return; + } +#endif + unhandled_exceptions(entry, addr, type, regs); } else if (type == ETYPE_TRAP && swid == SWID_RAISE_INTERRUPT_LEVEL) { /* trap, used on v3 EDM target debugging workaround */ /* |