summaryrefslogtreecommitdiffstats
path: root/arch/nds32/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/nds32/kernel')
-rw-r--r--arch/nds32/kernel/Makefile7
-rw-r--r--arch/nds32/kernel/ex-entry.S24
-rw-r--r--arch/nds32/kernel/ex-exit.S13
-rw-r--r--arch/nds32/kernel/ex-scall.S8
-rw-r--r--arch/nds32/kernel/fpu.c269
-rw-r--r--arch/nds32/kernel/head.S13
-rw-r--r--arch/nds32/kernel/perf_event_cpu.c1522
-rw-r--r--arch/nds32/kernel/pm.c78
-rw-r--r--arch/nds32/kernel/process.c64
-rw-r--r--arch/nds32/kernel/setup.c22
-rw-r--r--arch/nds32/kernel/signal.c62
-rw-r--r--arch/nds32/kernel/sleep.S131
-rw-r--r--arch/nds32/kernel/sys_nds32.c32
-rw-r--r--arch/nds32/kernel/traps.c16
14 files changed, 2232 insertions, 29 deletions
diff --git a/arch/nds32/kernel/Makefile b/arch/nds32/kernel/Makefile
index 27cded39fa66..a1a1d61509e5 100644
--- a/arch/nds32/kernel/Makefile
+++ b/arch/nds32/kernel/Makefile
@@ -4,7 +4,6 @@
CPPFLAGS_vmlinux.lds := -DTEXTADDR=$(TEXTADDR)
AFLAGS_head.o := -DTEXTADDR=$(TEXTADDR)
-
# Object file lists.
obj-y := ex-entry.o ex-exit.o ex-scall.o irq.o \
@@ -14,11 +13,15 @@ obj-y := ex-entry.o ex-exit.o ex-scall.o irq.o \
obj-$(CONFIG_MODULES) += nds32_ksyms.o module.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
+obj-$(CONFIG_FPU) += fpu.o
obj-$(CONFIG_OF) += devtree.o
obj-$(CONFIG_CACHE_L2) += atl2c.o
-
+obj-$(CONFIG_PERF_EVENTS) += perf_event_cpu.o
+obj-$(CONFIG_PM) += pm.o sleep.o
extra-y := head.o vmlinux.lds
+CFLAGS_fpu.o += -mext-fpu-sp -mext-fpu-dp
+
obj-y += vdso/
diff --git a/arch/nds32/kernel/ex-entry.S b/arch/nds32/kernel/ex-entry.S
index 21a144071566..107d98a1d1b8 100644
--- a/arch/nds32/kernel/ex-entry.S
+++ b/arch/nds32/kernel/ex-entry.S
@@ -7,6 +7,7 @@
#include <asm/errno.h>
#include <asm/asm-offsets.h>
#include <asm/page.h>
+#include <asm/fpu.h>
#ifdef CONFIG_HWZOL
.macro push_zol
@@ -15,12 +16,31 @@
mfusr $r16, $LC
.endm
#endif
+ .macro skip_save_fucop_ctl
+#if defined(CONFIG_FPU)
+skip_fucop_ctl:
+ smw.adm $p0, [$sp], $p0, #0x1
+ j fucop_ctl_done
+#endif
+ .endm
.macro save_user_regs
-
+#if defined(CONFIG_FPU)
+ sethi $p0, hi20(has_fpu)
+ lbsi $p0, [$p0+lo12(has_fpu)]
+ beqz $p0, skip_fucop_ctl
+ mfsr $p0, $FUCOP_CTL
+ smw.adm $p0, [$sp], $p0, #0x1
+ bclr $p0, $p0, #FUCOP_CTL_offCP0EN
+ mtsr $p0, $FUCOP_CTL
+fucop_ctl_done:
+ /* move $SP to the bottom of pt_regs */
+ addi $sp, $sp, -FUCOP_CTL_OFFSET
+#else
smw.adm $sp, [$sp], $sp, #0x1
/* move $SP to the bottom of pt_regs */
addi $sp, $sp, -OSP_OFFSET
+#endif
/* push $r0 ~ $r25 */
smw.bim $r0, [$sp], $r25
@@ -79,6 +99,7 @@ exception_handlers:
.long eh_syscall !Syscall
.long asm_do_IRQ !IRQ
+ skip_save_fucop_ctl
common_exception_handler:
save_user_regs
mfsr $p0, $ITYPE
@@ -103,7 +124,6 @@ common_exception_handler:
mtsr $r21, $PSW
dsb
jr $p1
-
/* syscall */
1:
addi $p1, $p0, #-NDS32_VECTOR_offEXCEPTION
diff --git a/arch/nds32/kernel/ex-exit.S b/arch/nds32/kernel/ex-exit.S
index f00af92f7e22..97ba15cd4180 100644
--- a/arch/nds32/kernel/ex-exit.S
+++ b/arch/nds32/kernel/ex-exit.S
@@ -8,6 +8,7 @@
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
#include <asm/current.h>
+#include <asm/fpu.h>
@@ -22,10 +23,18 @@
.macro restore_user_regs_first
setgie.d
isb
-
+#if defined(CONFIG_FPU)
+ addi $sp, $sp, OSP_OFFSET
+ lmw.adm $r12, [$sp], $r25, #0x0
+ sethi $p0, hi20(has_fpu)
+ lbsi $p0, [$p0+lo12(has_fpu)]
+ beqz $p0, 2f
+ mtsr $r25, $FUCOP_CTL
+2:
+#else
addi $sp, $sp, FUCOP_CTL_OFFSET
-
lmw.adm $r12, [$sp], $r24, #0x0
+#endif
mtsr $r12, $SP_USR
mtsr $r13, $IPC
#ifdef CONFIG_HWZOL
diff --git a/arch/nds32/kernel/ex-scall.S b/arch/nds32/kernel/ex-scall.S
index 36aa87ecdabd..270050f1b7b1 100644
--- a/arch/nds32/kernel/ex-scall.S
+++ b/arch/nds32/kernel/ex-scall.S
@@ -19,11 +19,13 @@ ENTRY(__switch_to)
la $p0, __entry_task
sw $r1, [$p0]
- move $p1, $r0
- addi $p1, $p1, #THREAD_CPU_CONTEXT
+ addi $p1, $r0, #THREAD_CPU_CONTEXT
smw.bi $r6, [$p1], $r14, #0xb ! push r6~r14, fp, lp, sp
move $r25, $r1
- addi $r1, $r1, #THREAD_CPU_CONTEXT
+#if defined(CONFIG_FPU)
+ call _switch_fpu
+#endif
+ addi $r1, $r25, #THREAD_CPU_CONTEXT
lmw.bi $r6, [$r1], $r14, #0xb ! pop r6~r14, fp, lp, sp
ret
diff --git a/arch/nds32/kernel/fpu.c b/arch/nds32/kernel/fpu.c
new file mode 100644
index 000000000000..fddd40c7a16f
--- /dev/null
+++ b/arch/nds32/kernel/fpu.c
@@ -0,0 +1,269 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2005-2018 Andes Technology Corporation
+
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/sched/signal.h>
+#include <asm/processor.h>
+#include <asm/user.h>
+#include <asm/io.h>
+#include <asm/bitfield.h>
+#include <asm/fpu.h>
+
+const struct fpu_struct init_fpuregs = {
+ .fd_regs = {[0 ... 31] = sNAN64},
+ .fpcsr = FPCSR_INIT,
+#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC)
+ .UDF_trap = 0
+#endif
+};
+
+void save_fpu(struct task_struct *tsk)
+{
+ unsigned int fpcfg, fpcsr;
+
+ enable_fpu();
+ fpcfg = ((__nds32__fmfcfg() & FPCFG_mskFREG) >> FPCFG_offFREG);
+ switch (fpcfg) {
+ case SP32_DP32_reg:
+ asm volatile ("fsdi $fd31, [%0+0xf8]\n\t"
+ "fsdi $fd30, [%0+0xf0]\n\t"
+ "fsdi $fd29, [%0+0xe8]\n\t"
+ "fsdi $fd28, [%0+0xe0]\n\t"
+ "fsdi $fd27, [%0+0xd8]\n\t"
+ "fsdi $fd26, [%0+0xd0]\n\t"
+ "fsdi $fd25, [%0+0xc8]\n\t"
+ "fsdi $fd24, [%0+0xc0]\n\t"
+ "fsdi $fd23, [%0+0xb8]\n\t"
+ "fsdi $fd22, [%0+0xb0]\n\t"
+ "fsdi $fd21, [%0+0xa8]\n\t"
+ "fsdi $fd20, [%0+0xa0]\n\t"
+ "fsdi $fd19, [%0+0x98]\n\t"
+ "fsdi $fd18, [%0+0x90]\n\t"
+ "fsdi $fd17, [%0+0x88]\n\t"
+ "fsdi $fd16, [%0+0x80]\n\t"
+ : /* no output */
+ : "r" (&tsk->thread.fpu)
+ : "memory");
+ /* fall through */
+ case SP32_DP16_reg:
+ asm volatile ("fsdi $fd15, [%0+0x78]\n\t"
+ "fsdi $fd14, [%0+0x70]\n\t"
+ "fsdi $fd13, [%0+0x68]\n\t"
+ "fsdi $fd12, [%0+0x60]\n\t"
+ "fsdi $fd11, [%0+0x58]\n\t"
+ "fsdi $fd10, [%0+0x50]\n\t"
+ "fsdi $fd9, [%0+0x48]\n\t"
+ "fsdi $fd8, [%0+0x40]\n\t"
+ : /* no output */
+ : "r" (&tsk->thread.fpu)
+ : "memory");
+ /* fall through */
+ case SP16_DP8_reg:
+ asm volatile ("fsdi $fd7, [%0+0x38]\n\t"
+ "fsdi $fd6, [%0+0x30]\n\t"
+ "fsdi $fd5, [%0+0x28]\n\t"
+ "fsdi $fd4, [%0+0x20]\n\t"
+ : /* no output */
+ : "r" (&tsk->thread.fpu)
+ : "memory");
+ /* fall through */
+ case SP8_DP4_reg:
+ asm volatile ("fsdi $fd3, [%1+0x18]\n\t"
+ "fsdi $fd2, [%1+0x10]\n\t"
+ "fsdi $fd1, [%1+0x8]\n\t"
+ "fsdi $fd0, [%1+0x0]\n\t"
+ "fmfcsr %0\n\t"
+ "swi %0, [%1+0x100]\n\t"
+ : "=&r" (fpcsr)
+ : "r"(&tsk->thread.fpu)
+ : "memory");
+ }
+ disable_fpu();
+}
+
+void load_fpu(const struct fpu_struct *fpregs)
+{
+ unsigned int fpcfg, fpcsr;
+
+ enable_fpu();
+ fpcfg = ((__nds32__fmfcfg() & FPCFG_mskFREG) >> FPCFG_offFREG);
+ switch (fpcfg) {
+ case SP32_DP32_reg:
+ asm volatile ("fldi $fd31, [%0+0xf8]\n\t"
+ "fldi $fd30, [%0+0xf0]\n\t"
+ "fldi $fd29, [%0+0xe8]\n\t"
+ "fldi $fd28, [%0+0xe0]\n\t"
+ "fldi $fd27, [%0+0xd8]\n\t"
+ "fldi $fd26, [%0+0xd0]\n\t"
+ "fldi $fd25, [%0+0xc8]\n\t"
+ "fldi $fd24, [%0+0xc0]\n\t"
+ "fldi $fd23, [%0+0xb8]\n\t"
+ "fldi $fd22, [%0+0xb0]\n\t"
+ "fldi $fd21, [%0+0xa8]\n\t"
+ "fldi $fd20, [%0+0xa0]\n\t"
+ "fldi $fd19, [%0+0x98]\n\t"
+ "fldi $fd18, [%0+0x90]\n\t"
+ "fldi $fd17, [%0+0x88]\n\t"
+ "fldi $fd16, [%0+0x80]\n\t"
+ : /* no output */
+ : "r" (fpregs));
+ /* fall through */
+ case SP32_DP16_reg:
+ asm volatile ("fldi $fd15, [%0+0x78]\n\t"
+ "fldi $fd14, [%0+0x70]\n\t"
+ "fldi $fd13, [%0+0x68]\n\t"
+ "fldi $fd12, [%0+0x60]\n\t"
+ "fldi $fd11, [%0+0x58]\n\t"
+ "fldi $fd10, [%0+0x50]\n\t"
+ "fldi $fd9, [%0+0x48]\n\t"
+ "fldi $fd8, [%0+0x40]\n\t"
+ : /* no output */
+ : "r" (fpregs));
+ /* fall through */
+ case SP16_DP8_reg:
+ asm volatile ("fldi $fd7, [%0+0x38]\n\t"
+ "fldi $fd6, [%0+0x30]\n\t"
+ "fldi $fd5, [%0+0x28]\n\t"
+ "fldi $fd4, [%0+0x20]\n\t"
+ : /* no output */
+ : "r" (fpregs));
+ /* fall through */
+ case SP8_DP4_reg:
+ asm volatile ("fldi $fd3, [%1+0x18]\n\t"
+ "fldi $fd2, [%1+0x10]\n\t"
+ "fldi $fd1, [%1+0x8]\n\t"
+ "fldi $fd0, [%1+0x0]\n\t"
+ "lwi %0, [%1+0x100]\n\t"
+ "fmtcsr %0\n\t":"=&r" (fpcsr)
+ : "r"(fpregs));
+ }
+ disable_fpu();
+}
+void store_fpu_for_suspend(void)
+{
+#ifdef CONFIG_LAZY_FPU
+ if (last_task_used_math != NULL)
+ save_fpu(last_task_used_math);
+ last_task_used_math = NULL;
+#else
+ if (!used_math())
+ return;
+ unlazy_fpu(current);
+#endif
+ clear_fpu(task_pt_regs(current));
+}
+inline void do_fpu_context_switch(struct pt_regs *regs)
+{
+ /* Enable to use FPU. */
+
+ if (!user_mode(regs)) {
+ pr_err("BUG: FPU is used in kernel mode.\n");
+ BUG();
+ return;
+ }
+
+ enable_ptreg_fpu(regs);
+#ifdef CONFIG_LAZY_FPU //Lazy FPU is used
+ if (last_task_used_math == current)
+ return;
+ if (last_task_used_math != NULL)
+ /* Other processes fpu state, save away */
+ save_fpu(last_task_used_math);
+ last_task_used_math = current;
+#endif
+ if (used_math()) {
+ load_fpu(&current->thread.fpu);
+ } else {
+ /* First time FPU user. */
+ load_fpu(&init_fpuregs);
+#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC)
+ current->thread.fpu.UDF_trap = init_fpuregs.UDF_trap;
+#endif
+ set_used_math();
+ }
+
+}
+
+inline void fill_sigfpe_signo(unsigned int fpcsr, int *signo)
+{
+ if (fpcsr & FPCSR_mskOVFT)
+ *signo = FPE_FLTOVF;
+#ifndef CONFIG_SUPPORT_DENORMAL_ARITHMETIC
+ else if (fpcsr & FPCSR_mskUDFT)
+ *signo = FPE_FLTUND;
+#endif
+ else if (fpcsr & FPCSR_mskIVOT)
+ *signo = FPE_FLTINV;
+ else if (fpcsr & FPCSR_mskDBZT)
+ *signo = FPE_FLTDIV;
+ else if (fpcsr & FPCSR_mskIEXT)
+ *signo = FPE_FLTRES;
+}
+
+inline void handle_fpu_exception(struct pt_regs *regs)
+{
+ unsigned int fpcsr;
+ int si_code = 0, si_signo = SIGFPE;
+#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC)
+ unsigned long redo_except = FPCSR_mskDNIT|FPCSR_mskUDFT;
+#else
+ unsigned long redo_except = FPCSR_mskDNIT;
+#endif
+
+ lose_fpu();
+ fpcsr = current->thread.fpu.fpcsr;
+
+ if (fpcsr & redo_except) {
+#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC)
+ if (fpcsr & FPCSR_mskUDFT)
+ current->thread.fpu.fpcsr &= ~FPCSR_mskIEX;
+#endif
+ si_signo = do_fpuemu(regs, &current->thread.fpu);
+ fpcsr = current->thread.fpu.fpcsr;
+ if (!si_signo)
+ goto done;
+ } else if (fpcsr & FPCSR_mskRIT) {
+ if (!user_mode(regs))
+ do_exit(SIGILL);
+ si_signo = SIGILL;
+ }
+
+
+ switch (si_signo) {
+ case SIGFPE:
+ fill_sigfpe_signo(fpcsr, &si_code);
+ break;
+ case SIGILL:
+ show_regs(regs);
+ si_code = ILL_COPROC;
+ break;
+ case SIGBUS:
+ si_code = BUS_ADRERR;
+ break;
+ default:
+ break;
+ }
+
+ force_sig_fault(si_signo, si_code,
+ (void __user *)instruction_pointer(regs), current);
+done:
+ own_fpu();
+}
+
+bool do_fpu_exception(unsigned int subtype, struct pt_regs *regs)
+{
+ int done = true;
+ /* Coprocessor disabled exception */
+ if (subtype == FPU_DISABLE_EXCEPTION) {
+ preempt_disable();
+ do_fpu_context_switch(regs);
+ preempt_enable();
+ }
+ /* Coprocessor exception such as underflow and overflow */
+ else if (subtype == FPU_EXCEPTION)
+ handle_fpu_exception(regs);
+ else
+ done = false;
+ return done;
+}
diff --git a/arch/nds32/kernel/head.S b/arch/nds32/kernel/head.S
index c5fdae174ced..db64b78b1232 100644
--- a/arch/nds32/kernel/head.S
+++ b/arch/nds32/kernel/head.S
@@ -123,21 +123,12 @@ _image_size_check:
andi $r0, $r0, MMU_CFG_mskTBS
srli $r6, $r6, MMU_CFG_offTBW
srli $r0, $r0, MMU_CFG_offTBS
- /*
- * we just map the kernel to the maximum way - 1 of tlb
- * reserver one way for UART VA mapping
- * it will cause page fault if UART mapping cover the kernel mapping
- *
- * direct mapping is not supported now.
- */
- li $r2, 't'
- beqz $r6, __error ! MMU_CFG.TBW = 0 is direct mappin
+ addi $r6, $r6, #0x1 ! MMU_CFG.TBW value -> meaning
addi $r0, $r0, #0x2 ! MMU_CFG.TBS value -> meaning
sll $r0, $r6, $r0 ! entries = k-way * n-set
mul $r6, $r0, $r5 ! max size = entries * page size
/* check kernel image size */
la $r3, (_end - PAGE_OFFSET)
- li $r2, 's'
bgt $r3, $r6, __error
li $r2, #(PHYS_OFFSET + TLB_DATA_kernel_text_attr)
@@ -160,7 +151,7 @@ _tlb:
#endif
mtsr $r3, $TLB_MISC
- mfsr $r0, $MISC_CTL ! Enable BTB and RTP and shadow sp
+ mfsr $r0, $MISC_CTL ! Enable BTB, RTP, shadow sp, and HW_PRE
ori $r0, $r0, #MISC_init
mtsr $r0, $MISC_CTL
diff --git a/arch/nds32/kernel/perf_event_cpu.c b/arch/nds32/kernel/perf_event_cpu.c
new file mode 100644
index 000000000000..5e00ce54d0ff
--- /dev/null
+++ b/arch/nds32/kernel/perf_event_cpu.c
@@ -0,0 +1,1522 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2008-2017 Andes Technology Corporation
+ *
+ * Reference ARMv7: Jean Pihet <jpihet@mvista.com>
+ * 2010 (c) MontaVista Software, LLC.
+ */
+
+#include <linux/perf_event.h>
+#include <linux/bitmap.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/pm_runtime.h>
+#include <linux/ftrace.h>
+#include <linux/uaccess.h>
+#include <linux/sched/clock.h>
+#include <linux/percpu-defs.h>
+
+#include <asm/pmu.h>
+#include <asm/irq_regs.h>
+#include <asm/nds32.h>
+#include <asm/stacktrace.h>
+#include <asm/perf_event.h>
+#include <nds32_intrinsic.h>
+
+/* Set at runtime when we know what CPU type we are. */
+static struct nds32_pmu *cpu_pmu;
+
+static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events);
+static void nds32_pmu_start(struct nds32_pmu *cpu_pmu);
+static void nds32_pmu_stop(struct nds32_pmu *cpu_pmu);
+static struct platform_device_id cpu_pmu_plat_device_ids[] = {
+ {.name = "nds32-pfm"},
+ {},
+};
+
+static int nds32_pmu_map_cache_event(const unsigned int (*cache_map)
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX], u64 config)
+{
+ unsigned int cache_type, cache_op, cache_result, ret;
+
+ cache_type = (config >> 0) & 0xff;
+ if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
+ return -EINVAL;
+
+ cache_op = (config >> 8) & 0xff;
+ if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
+ return -EINVAL;
+
+ cache_result = (config >> 16) & 0xff;
+ if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+ return -EINVAL;
+
+ ret = (int)(*cache_map)[cache_type][cache_op][cache_result];
+
+ if (ret == CACHE_OP_UNSUPPORTED)
+ return -ENOENT;
+
+ return ret;
+}
+
+static int
+nds32_pmu_map_hw_event(const unsigned int (*event_map)[PERF_COUNT_HW_MAX],
+ u64 config)
+{
+ int mapping;
+
+ if (config >= PERF_COUNT_HW_MAX)
+ return -ENOENT;
+
+ mapping = (*event_map)[config];
+ return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping;
+}
+
+static int nds32_pmu_map_raw_event(u32 raw_event_mask, u64 config)
+{
+ int ev_type = (int)(config & raw_event_mask);
+ int idx = config >> 8;
+
+ switch (idx) {
+ case 0:
+ ev_type = PFM_OFFSET_MAGIC_0 + ev_type;
+ if (ev_type >= SPAV3_0_SEL_LAST || ev_type <= SPAV3_0_SEL_BASE)
+ return -ENOENT;
+ break;
+ case 1:
+ ev_type = PFM_OFFSET_MAGIC_1 + ev_type;
+ if (ev_type >= SPAV3_1_SEL_LAST || ev_type <= SPAV3_1_SEL_BASE)
+ return -ENOENT;
+ break;
+ case 2:
+ ev_type = PFM_OFFSET_MAGIC_2 + ev_type;
+ if (ev_type >= SPAV3_2_SEL_LAST || ev_type <= SPAV3_2_SEL_BASE)
+ return -ENOENT;
+ break;
+ default:
+ return -ENOENT;
+ }
+
+ return ev_type;
+}
+
+int
+nds32_pmu_map_event(struct perf_event *event,
+ const unsigned int (*event_map)[PERF_COUNT_HW_MAX],
+ const unsigned int (*cache_map)
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX], u32 raw_event_mask)
+{
+ u64 config = event->attr.config;
+
+ switch (event->attr.type) {
+ case PERF_TYPE_HARDWARE:
+ return nds32_pmu_map_hw_event(event_map, config);
+ case PERF_TYPE_HW_CACHE:
+ return nds32_pmu_map_cache_event(cache_map, config);
+ case PERF_TYPE_RAW:
+ return nds32_pmu_map_raw_event(raw_event_mask, config);
+ }
+
+ return -ENOENT;
+}
+
+static int nds32_spav3_map_event(struct perf_event *event)
+{
+ return nds32_pmu_map_event(event, &nds32_pfm_perf_map,
+ &nds32_pfm_perf_cache_map, SOFTWARE_EVENT_MASK);
+}
+
+static inline u32 nds32_pfm_getreset_flags(void)
+{
+ /* Read overflow status */
+ u32 val = __nds32__mfsr(NDS32_SR_PFM_CTL);
+ u32 old_val = val;
+
+ /* Write overflow bit to clear status, and others keep it 0 */
+ u32 ov_flag = PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2];
+
+ __nds32__mtsr(val | ov_flag, NDS32_SR_PFM_CTL);
+
+ return old_val;
+}
+
+static inline int nds32_pfm_has_overflowed(u32 pfm)
+{
+ u32 ov_flag = PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2];
+
+ return pfm & ov_flag;
+}
+
+static inline int nds32_pfm_counter_has_overflowed(u32 pfm, int idx)
+{
+ u32 mask = 0;
+
+ switch (idx) {
+ case 0:
+ mask = PFM_CTL_OVF[0];
+ break;
+ case 1:
+ mask = PFM_CTL_OVF[1];
+ break;
+ case 2:
+ mask = PFM_CTL_OVF[2];
+ break;
+ default:
+ pr_err("%s index wrong\n", __func__);
+ break;
+ }
+ return pfm & mask;
+}
+
+/*
+ * Set the next IRQ period, based on the hwc->period_left value.
+ * To be called with the event disabled in hw:
+ */
+int nds32_pmu_event_set_period(struct perf_event *event)
+{
+ struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ s64 left = local64_read(&hwc->period_left);
+ s64 period = hwc->sample_period;
+ int ret = 0;
+
+ /* The period may have been changed by PERF_EVENT_IOC_PERIOD */
+ if (unlikely(period != hwc->last_period))
+ left = period - (hwc->last_period - left);
+
+ if (unlikely(left <= -period)) {
+ left = period;
+ local64_set(&hwc->period_left, left);
+ hwc->last_period = period;
+ ret = 1;
+ }
+
+ if (unlikely(left <= 0)) {
+ left += period;
+ local64_set(&hwc->period_left, left);
+ hwc->last_period = period;
+ ret = 1;
+ }
+
+ if (left > (s64)nds32_pmu->max_period)
+ left = nds32_pmu->max_period;
+
+ /*
+ * The hw event starts counting from this event offset,
+ * mark it to be able to extract future "deltas":
+ */
+ local64_set(&hwc->prev_count, (u64)(-left));
+
+ nds32_pmu->write_counter(event, (u64)(-left) & nds32_pmu->max_period);
+
+ perf_event_update_userpage(event);
+
+ return ret;
+}
+
+static irqreturn_t nds32_pmu_handle_irq(int irq_num, void *dev)
+{
+ u32 pfm;
+ struct perf_sample_data data;
+ struct nds32_pmu *cpu_pmu = (struct nds32_pmu *)dev;
+ struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events();
+ struct pt_regs *regs;
+ int idx;
+ /*
+ * Get and reset the IRQ flags
+ */
+ pfm = nds32_pfm_getreset_flags();
+
+ /*
+ * Did an overflow occur?
+ */
+ if (!nds32_pfm_has_overflowed(pfm))
+ return IRQ_NONE;
+
+ /*
+ * Handle the counter(s) overflow(s)
+ */
+ regs = get_irq_regs();
+
+ nds32_pmu_stop(cpu_pmu);
+ for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
+ struct perf_event *event = cpuc->events[idx];
+ struct hw_perf_event *hwc;
+
+ /* Ignore if we don't have an event. */
+ if (!event)
+ continue;
+
+ /*
+ * We have a single interrupt for all counters. Check that
+ * each counter has overflowed before we process it.
+ */
+ if (!nds32_pfm_counter_has_overflowed(pfm, idx))
+ continue;
+
+ hwc = &event->hw;
+ nds32_pmu_event_update(event);
+ perf_sample_data_init(&data, 0, hwc->last_period);
+ if (!nds32_pmu_event_set_period(event))
+ continue;
+
+ if (perf_event_overflow(event, &data, regs))
+ cpu_pmu->disable(event);
+ }
+ nds32_pmu_start(cpu_pmu);
+ /*
+ * Handle the pending perf events.
+ *
+ * Note: this call *must* be run with interrupts disabled. For
+ * platforms that can have the PMU interrupts raised as an NMI, this
+ * will not work.
+ */
+ irq_work_run();
+
+ return IRQ_HANDLED;
+}
+
+static inline int nds32_pfm_counter_valid(struct nds32_pmu *cpu_pmu, int idx)
+{
+ return ((idx >= 0) && (idx < cpu_pmu->num_events));
+}
+
+static inline int nds32_pfm_disable_counter(int idx)
+{
+ unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL);
+ u32 mask = 0;
+
+ mask = PFM_CTL_EN[idx];
+ val &= ~mask;
+ val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
+ __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
+ return idx;
+}
+
+/*
+ * Add an event filter to a given event.
+ */
+static int nds32_pmu_set_event_filter(struct hw_perf_event *event,
+ struct perf_event_attr *attr)
+{
+ unsigned long config_base = 0;
+ int idx = event->idx;
+ unsigned long no_kernel_tracing = 0;
+ unsigned long no_user_tracing = 0;
+ /* If index is -1, do not do anything */
+ if (idx == -1)
+ return 0;
+
+ no_kernel_tracing = PFM_CTL_KS[idx];
+ no_user_tracing = PFM_CTL_KU[idx];
+ /*
+ * Default: enable both kernel and user mode tracing.
+ */
+ if (attr->exclude_user)
+ config_base |= no_user_tracing;
+
+ if (attr->exclude_kernel)
+ config_base |= no_kernel_tracing;
+
+ /*
+ * Install the filter into config_base as this is used to
+ * construct the event type.
+ */
+ event->config_base |= config_base;
+ return 0;
+}
+
+static inline void nds32_pfm_write_evtsel(int idx, u32 evnum)
+{
+ u32 offset = 0;
+ u32 ori_val = __nds32__mfsr(NDS32_SR_PFM_CTL);
+ u32 ev_mask = 0;
+ u32 no_kernel_mask = 0;
+ u32 no_user_mask = 0;
+ u32 val;
+
+ offset = PFM_CTL_OFFSEL[idx];
+ /* Clear previous mode selection, and write new one */
+ no_kernel_mask = PFM_CTL_KS[idx];
+ no_user_mask = PFM_CTL_KU[idx];
+ ori_val &= ~no_kernel_mask;
+ ori_val &= ~no_user_mask;
+ if (evnum & no_kernel_mask)
+ ori_val |= no_kernel_mask;
+
+ if (evnum & no_user_mask)
+ ori_val |= no_user_mask;
+
+ /* Clear previous event selection */
+ ev_mask = PFM_CTL_SEL[idx];
+ ori_val &= ~ev_mask;
+ evnum &= SOFTWARE_EVENT_MASK;
+
+ /* undo the linear mapping */
+ evnum = get_converted_evet_hw_num(evnum);
+ val = ori_val | (evnum << offset);
+ val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
+ __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
+}
+
+static inline int nds32_pfm_enable_counter(int idx)
+{
+ unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL);
+ u32 mask = 0;
+
+ mask = PFM_CTL_EN[idx];
+ val |= mask;
+ val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
+ __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
+ return idx;
+}
+
+static inline int nds32_pfm_enable_intens(int idx)
+{
+ unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL);
+ u32 mask = 0;
+
+ mask = PFM_CTL_IE[idx];
+ val |= mask;
+ val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
+ __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
+ return idx;
+}
+
+static inline int nds32_pfm_disable_intens(int idx)
+{
+ unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL);
+ u32 mask = 0;
+
+ mask = PFM_CTL_IE[idx];
+ val &= ~mask;
+ val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
+ __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
+ return idx;
+}
+
+static int event_requires_mode_exclusion(struct perf_event_attr *attr)
+{
+ /* Other modes NDS32 does not support */
+ return attr->exclude_user || attr->exclude_kernel;
+}
+
+static void nds32_pmu_enable_event(struct perf_event *event)
+{
+ unsigned long flags;
+ unsigned int evnum = 0;
+ struct hw_perf_event *hwc = &event->hw;
+ struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu);
+ struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+ int idx = hwc->idx;
+
+ if (!nds32_pfm_counter_valid(cpu_pmu, idx)) {
+ pr_err("CPU enabling wrong pfm counter IRQ enable\n");
+ return;
+ }
+
+ /*
+ * Enable counter and interrupt, and set the counter to count
+ * the event that we're interested in.
+ */
+ raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+ /*
+ * Disable counter
+ */
+ nds32_pfm_disable_counter(idx);
+
+ /*
+ * Check whether we need to exclude the counter from certain modes.
+ */
+ if ((!cpu_pmu->set_event_filter ||
+ cpu_pmu->set_event_filter(hwc, &event->attr)) &&
+ event_requires_mode_exclusion(&event->attr)) {
+ pr_notice
+ ("NDS32 performance counters do not support mode exclusion\n");
+ hwc->config_base = 0;
+ }
+ /* Write event */
+ evnum = hwc->config_base;
+ nds32_pfm_write_evtsel(idx, evnum);
+
+ /*
+ * Enable interrupt for this counter
+ */
+ nds32_pfm_enable_intens(idx);
+
+ /*
+ * Enable counter
+ */
+ nds32_pfm_enable_counter(idx);
+
+ raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void nds32_pmu_disable_event(struct perf_event *event)
+{
+ unsigned long flags;
+ struct hw_perf_event *hwc = &event->hw;
+ struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu);
+ struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+ int idx = hwc->idx;
+
+ if (!nds32_pfm_counter_valid(cpu_pmu, idx)) {
+ pr_err("CPU disabling wrong pfm counter IRQ enable %d\n", idx);
+ return;
+ }
+
+ /*
+ * Disable counter and interrupt
+ */
+ raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+ /*
+ * Disable counter
+ */
+ nds32_pfm_disable_counter(idx);
+
+ /*
+ * Disable interrupt for this counter
+ */
+ nds32_pfm_disable_intens(idx);
+
+ raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static inline u32 nds32_pmu_read_counter(struct perf_event *event)
+{
+ struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+ u32 count = 0;
+
+ if (!nds32_pfm_counter_valid(cpu_pmu, idx)) {
+ pr_err("CPU reading wrong counter %d\n", idx);
+ } else {
+ switch (idx) {
+ case PFMC0:
+ count = __nds32__mfsr(NDS32_SR_PFMC0);
+ break;
+ case PFMC1:
+ count = __nds32__mfsr(NDS32_SR_PFMC1);
+ break;
+ case PFMC2:
+ count = __nds32__mfsr(NDS32_SR_PFMC2);
+ break;
+ default:
+ pr_err
+ ("%s: CPU has no performance counters %d\n",
+ __func__, idx);
+ }
+ }
+ return count;
+}
+
+static inline void nds32_pmu_write_counter(struct perf_event *event, u32 value)
+{
+ struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ if (!nds32_pfm_counter_valid(cpu_pmu, idx)) {
+ pr_err("CPU writing wrong counter %d\n", idx);
+ } else {
+ switch (idx) {
+ case PFMC0:
+ __nds32__mtsr_isb(value, NDS32_SR_PFMC0);
+ break;
+ case PFMC1:
+ __nds32__mtsr_isb(value, NDS32_SR_PFMC1);
+ break;
+ case PFMC2:
+ __nds32__mtsr_isb(value, NDS32_SR_PFMC2);
+ break;
+ default:
+ pr_err
+ ("%s: CPU has no performance counters %d\n",
+ __func__, idx);
+ }
+ }
+}
+
+static int nds32_pmu_get_event_idx(struct pmu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ int idx;
+ struct hw_perf_event *hwc = &event->hw;
+ /*
+ * Current implementation maps cycles, instruction count and cache-miss
+ * to specific counter.
+ * However, multiple of the 3 counters are able to count these events.
+ *
+ *
+ * SOFTWARE_EVENT_MASK mask for getting event num ,
+ * This is defined by Jia-Rung, you can change the polocies.
+ * However, do not exceed 8 bits. This is hardware specific.
+ * The last number is SPAv3_2_SEL_LAST.
+ */
+ unsigned long evtype = hwc->config_base & SOFTWARE_EVENT_MASK;
+
+ idx = get_converted_event_idx(evtype);
+ /*
+ * Try to get the counter for correpsonding event
+ */
+ if (evtype == SPAV3_0_SEL_TOTAL_CYCLES) {
+ if (!test_and_set_bit(idx, cpuc->used_mask))
+ return idx;
+ if (!test_and_set_bit(NDS32_IDX_COUNTER0, cpuc->used_mask))
+ return NDS32_IDX_COUNTER0;
+ if (!test_and_set_bit(NDS32_IDX_COUNTER1, cpuc->used_mask))
+ return NDS32_IDX_COUNTER1;
+ } else if (evtype == SPAV3_1_SEL_COMPLETED_INSTRUCTION) {
+ if (!test_and_set_bit(idx, cpuc->used_mask))
+ return idx;
+ else if (!test_and_set_bit(NDS32_IDX_COUNTER1, cpuc->used_mask))
+ return NDS32_IDX_COUNTER1;
+ else if (!test_and_set_bit
+ (NDS32_IDX_CYCLE_COUNTER, cpuc->used_mask))
+ return NDS32_IDX_CYCLE_COUNTER;
+ } else {
+ if (!test_and_set_bit(idx, cpuc->used_mask))
+ return idx;
+ }
+ return -EAGAIN;
+}
+
+static void nds32_pmu_start(struct nds32_pmu *cpu_pmu)
+{
+ unsigned long flags;
+ unsigned int val;
+ struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+
+ raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+ /* Enable all counters , NDS PFM has 3 counters */
+ val = __nds32__mfsr(NDS32_SR_PFM_CTL);
+ val |= (PFM_CTL_EN[0] | PFM_CTL_EN[1] | PFM_CTL_EN[2]);
+ val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
+ __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
+
+ raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void nds32_pmu_stop(struct nds32_pmu *cpu_pmu)
+{
+ unsigned long flags;
+ unsigned int val;
+ struct pmu_hw_events *events = cpu_pmu->get_hw_events();
+
+ raw_spin_lock_irqsave(&events->pmu_lock, flags);
+
+ /* Disable all counters , NDS PFM has 3 counters */
+ val = __nds32__mfsr(NDS32_SR_PFM_CTL);
+ val &= ~(PFM_CTL_EN[0] | PFM_CTL_EN[1] | PFM_CTL_EN[2]);
+ val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
+ __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
+
+ raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
+}
+
+static void nds32_pmu_reset(void *info)
+{
+ u32 val = 0;
+
+ val |= (PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
+ __nds32__mtsr(val, NDS32_SR_PFM_CTL);
+ __nds32__mtsr(0, NDS32_SR_PFM_CTL);
+ __nds32__mtsr(0, NDS32_SR_PFMC0);
+ __nds32__mtsr(0, NDS32_SR_PFMC1);
+ __nds32__mtsr(0, NDS32_SR_PFMC2);
+}
+
+static void nds32_pmu_init(struct nds32_pmu *cpu_pmu)
+{
+ cpu_pmu->handle_irq = nds32_pmu_handle_irq;
+ cpu_pmu->enable = nds32_pmu_enable_event;
+ cpu_pmu->disable = nds32_pmu_disable_event;
+ cpu_pmu->read_counter = nds32_pmu_read_counter;
+ cpu_pmu->write_counter = nds32_pmu_write_counter;
+ cpu_pmu->get_event_idx = nds32_pmu_get_event_idx;
+ cpu_pmu->start = nds32_pmu_start;
+ cpu_pmu->stop = nds32_pmu_stop;
+ cpu_pmu->reset = nds32_pmu_reset;
+ cpu_pmu->max_period = 0xFFFFFFFF; /* Maximum counts */
+};
+
+static u32 nds32_read_num_pfm_events(void)
+{
+ /* NDS32 SPAv3 PMU support 3 counter */
+ return 3;
+}
+
+static int device_pmu_init(struct nds32_pmu *cpu_pmu)
+{
+ nds32_pmu_init(cpu_pmu);
+ /*
+ * This name should be devive-specific name, whatever you like :)
+ * I think "PMU" will be a good generic name.
+ */
+ cpu_pmu->name = "nds32v3-pmu";
+ cpu_pmu->map_event = nds32_spav3_map_event;
+ cpu_pmu->num_events = nds32_read_num_pfm_events();
+ cpu_pmu->set_event_filter = nds32_pmu_set_event_filter;
+ return 0;
+}
+
+/*
+ * CPU PMU identification and probing.
+ */
+static int probe_current_pmu(struct nds32_pmu *pmu)
+{
+ int ret;
+
+ get_cpu();
+ ret = -ENODEV;
+ /*
+ * If ther are various CPU types with its own PMU, initialize with
+ *
+ * the corresponding one
+ */
+ device_pmu_init(pmu);
+ put_cpu();
+ return ret;
+}
+
+static void nds32_pmu_enable(struct pmu *pmu)
+{
+ struct nds32_pmu *nds32_pmu = to_nds32_pmu(pmu);
+ struct pmu_hw_events *hw_events = nds32_pmu->get_hw_events();
+ int enabled = bitmap_weight(hw_events->used_mask,
+ nds32_pmu->num_events);
+
+ if (enabled)
+ nds32_pmu->start(nds32_pmu);
+}
+
+static void nds32_pmu_disable(struct pmu *pmu)
+{
+ struct nds32_pmu *nds32_pmu = to_nds32_pmu(pmu);
+
+ nds32_pmu->stop(nds32_pmu);
+}
+
+static void nds32_pmu_release_hardware(struct nds32_pmu *nds32_pmu)
+{
+ nds32_pmu->free_irq(nds32_pmu);
+ pm_runtime_put_sync(&nds32_pmu->plat_device->dev);
+}
+
+static irqreturn_t nds32_pmu_dispatch_irq(int irq, void *dev)
+{
+ struct nds32_pmu *nds32_pmu = (struct nds32_pmu *)dev;
+ int ret;
+ u64 start_clock, finish_clock;
+
+ start_clock = local_clock();
+ ret = nds32_pmu->handle_irq(irq, dev);
+ finish_clock = local_clock();
+
+ perf_sample_event_took(finish_clock - start_clock);
+ return ret;
+}
+
+static int nds32_pmu_reserve_hardware(struct nds32_pmu *nds32_pmu)
+{
+ int err;
+ struct platform_device *pmu_device = nds32_pmu->plat_device;
+
+ if (!pmu_device)
+ return -ENODEV;
+
+ pm_runtime_get_sync(&pmu_device->dev);
+ err = nds32_pmu->request_irq(nds32_pmu, nds32_pmu_dispatch_irq);
+ if (err) {
+ nds32_pmu_release_hardware(nds32_pmu);
+ return err;
+ }
+
+ return 0;
+}
+
+static int
+validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events,
+ struct perf_event *event)
+{
+ struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
+
+ if (is_software_event(event))
+ return 1;
+
+ if (event->pmu != pmu)
+ return 0;
+
+ if (event->state < PERF_EVENT_STATE_OFF)
+ return 1;
+
+ if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec)
+ return 1;
+
+ return nds32_pmu->get_event_idx(hw_events, event) >= 0;
+}
+
+static int validate_group(struct perf_event *event)
+{
+ struct perf_event *sibling, *leader = event->group_leader;
+ struct pmu_hw_events fake_pmu;
+ DECLARE_BITMAP(fake_used_mask, MAX_COUNTERS);
+ /*
+ * Initialize the fake PMU. We only need to populate the
+ * used_mask for the purposes of validation.
+ */
+ memset(fake_used_mask, 0, sizeof(fake_used_mask));
+
+ if (!validate_event(event->pmu, &fake_pmu, leader))
+ return -EINVAL;
+
+ for_each_sibling_event(sibling, leader) {
+ if (!validate_event(event->pmu, &fake_pmu, sibling))
+ return -EINVAL;
+ }
+
+ if (!validate_event(event->pmu, &fake_pmu, event))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int __hw_perf_event_init(struct perf_event *event)
+{
+ struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ int mapping;
+
+ mapping = nds32_pmu->map_event(event);
+
+ if (mapping < 0) {
+ pr_debug("event %x:%llx not supported\n", event->attr.type,
+ event->attr.config);
+ return mapping;
+ }
+
+ /*
+ * We don't assign an index until we actually place the event onto
+ * hardware. Use -1 to signify that we haven't decided where to put it
+ * yet. For SMP systems, each core has it's own PMU so we can't do any
+ * clever allocation or constraints checking at this point.
+ */
+ hwc->idx = -1;
+ hwc->config_base = 0;
+ hwc->config = 0;
+ hwc->event_base = 0;
+
+ /*
+ * Check whether we need to exclude the counter from certain modes.
+ */
+ if ((!nds32_pmu->set_event_filter ||
+ nds32_pmu->set_event_filter(hwc, &event->attr)) &&
+ event_requires_mode_exclusion(&event->attr)) {
+ pr_debug
+ ("NDS performance counters do not support mode exclusion\n");
+ return -EOPNOTSUPP;
+ }
+
+ /*
+ * Store the event encoding into the config_base field.
+ */
+ hwc->config_base |= (unsigned long)mapping;
+
+ if (!hwc->sample_period) {
+ /*
+ * For non-sampling runs, limit the sample_period to half
+ * of the counter width. That way, the new counter value
+ * is far less likely to overtake the previous one unless
+ * you have some serious IRQ latency issues.
+ */
+ hwc->sample_period = nds32_pmu->max_period >> 1;
+ hwc->last_period = hwc->sample_period;
+ local64_set(&hwc->period_left, hwc->sample_period);
+ }
+
+ if (event->group_leader != event) {
+ if (validate_group(event) != 0)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int nds32_pmu_event_init(struct perf_event *event)
+{
+ struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
+ int err = 0;
+ atomic_t *active_events = &nds32_pmu->active_events;
+
+ /* does not support taken branch sampling */
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+
+ if (nds32_pmu->map_event(event) == -ENOENT)
+ return -ENOENT;
+
+ if (!atomic_inc_not_zero(active_events)) {
+ if (atomic_read(active_events) == 0) {
+ /* Register irq handler */
+ err = nds32_pmu_reserve_hardware(nds32_pmu);
+ }
+
+ if (!err)
+ atomic_inc(active_events);
+ }
+
+ if (err)
+ return err;
+
+ err = __hw_perf_event_init(event);
+
+ return err;
+}
+
+static void nds32_start(struct perf_event *event, int flags)
+{
+ struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ /*
+ * NDS pmu always has to reprogram the period, so ignore
+ * PERF_EF_RELOAD, see the comment below.
+ */
+ if (flags & PERF_EF_RELOAD)
+ WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
+
+ hwc->state = 0;
+ /* Set the period for the event. */
+ nds32_pmu_event_set_period(event);
+
+ nds32_pmu->enable(event);
+}
+
+static int nds32_pmu_add(struct perf_event *event, int flags)
+{
+ struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
+ struct pmu_hw_events *hw_events = nds32_pmu->get_hw_events();
+ struct hw_perf_event *hwc = &event->hw;
+ int idx;
+ int err = 0;
+
+ perf_pmu_disable(event->pmu);
+
+ /* If we don't have a space for the counter then finish early. */
+ idx = nds32_pmu->get_event_idx(hw_events, event);
+ if (idx < 0) {
+ err = idx;
+ goto out;
+ }
+
+ /*
+ * If there is an event in the counter we are going to use then make
+ * sure it is disabled.
+ */
+ event->hw.idx = idx;
+ nds32_pmu->disable(event);
+ hw_events->events[idx] = event;
+
+ hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+ if (flags & PERF_EF_START)
+ nds32_start(event, PERF_EF_RELOAD);
+
+ /* Propagate our changes to the userspace mapping. */
+ perf_event_update_userpage(event);
+
+out:
+ perf_pmu_enable(event->pmu);
+ return err;
+}
+
+u64 nds32_pmu_event_update(struct perf_event *event)
+{
+ struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ u64 delta, prev_raw_count, new_raw_count;
+
+again:
+ prev_raw_count = local64_read(&hwc->prev_count);
+ new_raw_count = nds32_pmu->read_counter(event);
+
+ if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+ new_raw_count) != prev_raw_count) {
+ goto again;
+ }
+ /*
+ * Whether overflow or not, "unsigned substraction"
+ * will always get their delta
+ */
+ delta = (new_raw_count - prev_raw_count) & nds32_pmu->max_period;
+
+ local64_add(delta, &event->count);
+ local64_sub(delta, &hwc->period_left);
+
+ return new_raw_count;
+}
+
+static void nds32_stop(struct perf_event *event, int flags)
+{
+ struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ /*
+ * NDS pmu always has to update the counter, so ignore
+ * PERF_EF_UPDATE, see comments in nds32_start().
+ */
+ if (!(hwc->state & PERF_HES_STOPPED)) {
+ nds32_pmu->disable(event);
+ nds32_pmu_event_update(event);
+ hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+ }
+}
+
+static void nds32_pmu_del(struct perf_event *event, int flags)
+{
+ struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
+ struct pmu_hw_events *hw_events = nds32_pmu->get_hw_events();
+ struct hw_perf_event *hwc = &event->hw;
+ int idx = hwc->idx;
+
+ nds32_stop(event, PERF_EF_UPDATE);
+ hw_events->events[idx] = NULL;
+ clear_bit(idx, hw_events->used_mask);
+
+ perf_event_update_userpage(event);
+}
+
+static void nds32_pmu_read(struct perf_event *event)
+{
+ nds32_pmu_event_update(event);
+}
+
+/* Please refer to SPAv3 for more hardware specific details */
+PMU_FORMAT_ATTR(event, "config:0-63");
+
+static struct attribute *nds32_arch_formats_attr[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group nds32_pmu_format_group = {
+ .name = "format",
+ .attrs = nds32_arch_formats_attr,
+};
+
+static ssize_t nds32_pmu_cpumask_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return 0;
+}
+
+static DEVICE_ATTR(cpus, 0444, nds32_pmu_cpumask_show, NULL);
+
+static struct attribute *nds32_pmu_common_attrs[] = {
+ &dev_attr_cpus.attr,
+ NULL,
+};
+
+static struct attribute_group nds32_pmu_common_group = {
+ .attrs = nds32_pmu_common_attrs,
+};
+
+static const struct attribute_group *nds32_pmu_attr_groups[] = {
+ &nds32_pmu_format_group,
+ &nds32_pmu_common_group,
+ NULL,
+};
+
+static void nds32_init(struct nds32_pmu *nds32_pmu)
+{
+ atomic_set(&nds32_pmu->active_events, 0);
+
+ nds32_pmu->pmu = (struct pmu) {
+ .pmu_enable = nds32_pmu_enable,
+ .pmu_disable = nds32_pmu_disable,
+ .attr_groups = nds32_pmu_attr_groups,
+ .event_init = nds32_pmu_event_init,
+ .add = nds32_pmu_add,
+ .del = nds32_pmu_del,
+ .start = nds32_start,
+ .stop = nds32_stop,
+ .read = nds32_pmu_read,
+ };
+}
+
+int nds32_pmu_register(struct nds32_pmu *nds32_pmu, int type)
+{
+ nds32_init(nds32_pmu);
+ pm_runtime_enable(&nds32_pmu->plat_device->dev);
+ pr_info("enabled with %s PMU driver, %d counters available\n",
+ nds32_pmu->name, nds32_pmu->num_events);
+ return perf_pmu_register(&nds32_pmu->pmu, nds32_pmu->name, type);
+}
+
+static struct pmu_hw_events *cpu_pmu_get_cpu_events(void)
+{
+ return this_cpu_ptr(&cpu_hw_events);
+}
+
+static int cpu_pmu_request_irq(struct nds32_pmu *cpu_pmu, irq_handler_t handler)
+{
+ int err, irq, irqs;
+ struct platform_device *pmu_device = cpu_pmu->plat_device;
+
+ if (!pmu_device)
+ return -ENODEV;
+
+ irqs = min(pmu_device->num_resources, num_possible_cpus());
+ if (irqs < 1) {
+ pr_err("no irqs for PMUs defined\n");
+ return -ENODEV;
+ }
+
+ irq = platform_get_irq(pmu_device, 0);
+ err = request_irq(irq, handler, IRQF_NOBALANCING, "nds32-pfm",
+ cpu_pmu);
+ if (err) {
+ pr_err("unable to request IRQ%d for NDS PMU counters\n",
+ irq);
+ return err;
+ }
+ return 0;
+}
+
+static void cpu_pmu_free_irq(struct nds32_pmu *cpu_pmu)
+{
+ int irq;
+ struct platform_device *pmu_device = cpu_pmu->plat_device;
+
+ irq = platform_get_irq(pmu_device, 0);
+ if (irq >= 0)
+ free_irq(irq, cpu_pmu);
+}
+
+static void cpu_pmu_init(struct nds32_pmu *cpu_pmu)
+{
+ int cpu;
+ struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu);
+
+ raw_spin_lock_init(&events->pmu_lock);
+
+ cpu_pmu->get_hw_events = cpu_pmu_get_cpu_events;
+ cpu_pmu->request_irq = cpu_pmu_request_irq;
+ cpu_pmu->free_irq = cpu_pmu_free_irq;
+
+ /* Ensure the PMU has sane values out of reset. */
+ if (cpu_pmu->reset)
+ on_each_cpu(cpu_pmu->reset, cpu_pmu, 1);
+}
+
+const static struct of_device_id cpu_pmu_of_device_ids[] = {
+ {.compatible = "andestech,nds32v3-pmu",
+ .data = device_pmu_init},
+ {},
+};
+
+static int cpu_pmu_device_probe(struct platform_device *pdev)
+{
+ const struct of_device_id *of_id;
+ int (*init_fn)(struct nds32_pmu *nds32_pmu);
+ struct device_node *node = pdev->dev.of_node;
+ struct nds32_pmu *pmu;
+ int ret = -ENODEV;
+
+ if (cpu_pmu) {
+ pr_notice("[perf] attempt to register multiple PMU devices!\n");
+ return -ENOSPC;
+ }
+
+ pmu = kzalloc(sizeof(*pmu), GFP_KERNEL);
+ if (!pmu)
+ return -ENOMEM;
+
+ of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node);
+ if (node && of_id) {
+ init_fn = of_id->data;
+ ret = init_fn(pmu);
+ } else {
+ ret = probe_current_pmu(pmu);
+ }
+
+ if (ret) {
+ pr_notice("[perf] failed to probe PMU!\n");
+ goto out_free;
+ }
+
+ cpu_pmu = pmu;
+ cpu_pmu->plat_device = pdev;
+ cpu_pmu_init(cpu_pmu);
+ ret = nds32_pmu_register(cpu_pmu, PERF_TYPE_RAW);
+
+ if (!ret)
+ return 0;
+
+out_free:
+ pr_notice("[perf] failed to register PMU devices!\n");
+ kfree(pmu);
+ return ret;
+}
+
+static struct platform_driver cpu_pmu_driver = {
+ .driver = {
+ .name = "nds32-pfm",
+ .of_match_table = cpu_pmu_of_device_ids,
+ },
+ .probe = cpu_pmu_device_probe,
+ .id_table = cpu_pmu_plat_device_ids,
+};
+
+static int __init register_pmu_driver(void)
+{
+ int err = 0;
+
+ err = platform_driver_register(&cpu_pmu_driver);
+ if (err)
+ pr_notice("[perf] PMU initialization failed\n");
+ else
+ pr_notice("[perf] PMU initialization done\n");
+
+ return err;
+}
+
+device_initcall(register_pmu_driver);
+
+/*
+ * References: arch/nds32/kernel/traps.c:__dump()
+ * You will need to know the NDS ABI first.
+ */
+static int unwind_frame_kernel(struct stackframe *frame)
+{
+ int graph = 0;
+#ifdef CONFIG_FRAME_POINTER
+ /* 0x3 means misalignment */
+ if (!kstack_end((void *)frame->fp) &&
+ !((unsigned long)frame->fp & 0x3) &&
+ ((unsigned long)frame->fp >= TASK_SIZE)) {
+ /*
+ * The array index is based on the ABI, the below graph
+ * illustrate the reasons.
+ * Function call procedure: "smw" and "lmw" will always
+ * update SP and FP for you automatically.
+ *
+ * Stack Relative Address
+ * | | 0
+ * ----
+ * |LP| <-- SP(before smw) <-- FP(after smw) -1
+ * ----
+ * |FP| -2
+ * ----
+ * | | <-- SP(after smw) -3
+ */
+ frame->lp = ((unsigned long *)frame->fp)[-1];
+ frame->fp = ((unsigned long *)frame->fp)[FP_OFFSET];
+ /* make sure CONFIG_FUNCTION_GRAPH_TRACER is turned on */
+ if (__kernel_text_address(frame->lp))
+ frame->lp = ftrace_graph_ret_addr
+ (NULL, &graph, frame->lp, NULL);
+
+ return 0;
+ } else {
+ return -EPERM;
+ }
+#else
+ /*
+ * You can refer to arch/nds32/kernel/traps.c:__dump()
+ * Treat "sp" as "fp", but the "sp" is one frame ahead of "fp".
+ * And, the "sp" is not always correct.
+ *
+ * Stack Relative Address
+ * | | 0
+ * ----
+ * |LP| <-- SP(before smw) -1
+ * ----
+ * | | <-- SP(after smw) -2
+ * ----
+ */
+ if (!kstack_end((void *)frame->sp)) {
+ frame->lp = ((unsigned long *)frame->sp)[1];
+ /* TODO: How to deal with the value in first
+ * "sp" is not correct?
+ */
+ if (__kernel_text_address(frame->lp))
+ frame->lp = ftrace_graph_ret_addr
+ (tsk, &graph, frame->lp, NULL);
+
+ frame->sp = ((unsigned long *)frame->sp) + 1;
+
+ return 0;
+ } else {
+ return -EPERM;
+ }
+#endif
+}
+
+static void notrace
+walk_stackframe(struct stackframe *frame,
+ int (*fn_record)(struct stackframe *, void *),
+ void *data)
+{
+ while (1) {
+ int ret;
+
+ if (fn_record(frame, data))
+ break;
+
+ ret = unwind_frame_kernel(frame);
+ if (ret < 0)
+ break;
+ }
+}
+
+/*
+ * Gets called by walk_stackframe() for every stackframe. This will be called
+ * whist unwinding the stackframe and is like a subroutine return so we use
+ * the PC.
+ */
+static int callchain_trace(struct stackframe *fr, void *data)
+{
+ struct perf_callchain_entry_ctx *entry = data;
+
+ perf_callchain_store(entry, fr->lp);
+ return 0;
+}
+
+/*
+ * Get the return address for a single stackframe and return a pointer to the
+ * next frame tail.
+ */
+static unsigned long
+user_backtrace(struct perf_callchain_entry_ctx *entry, unsigned long fp)
+{
+ struct frame_tail buftail;
+ unsigned long lp = 0;
+ unsigned long *user_frame_tail =
+ (unsigned long *)(fp - (unsigned long)sizeof(buftail));
+
+ /* Check accessibility of one struct frame_tail beyond */
+ if (!access_ok(VERIFY_READ, user_frame_tail, sizeof(buftail)))
+ return 0;
+ if (__copy_from_user_inatomic
+ (&buftail, user_frame_tail, sizeof(buftail)))
+ return 0;
+
+ /*
+ * Refer to unwind_frame_kernel() for more illurstration
+ */
+ lp = buftail.stack_lp; /* ((unsigned long *)fp)[-1] */
+ fp = buftail.stack_fp; /* ((unsigned long *)fp)[FP_OFFSET] */
+ perf_callchain_store(entry, lp);
+ return fp;
+}
+
+static unsigned long
+user_backtrace_opt_size(struct perf_callchain_entry_ctx *entry,
+ unsigned long fp)
+{
+ struct frame_tail_opt_size buftail;
+ unsigned long lp = 0;
+
+ unsigned long *user_frame_tail =
+ (unsigned long *)(fp - (unsigned long)sizeof(buftail));
+
+ /* Check accessibility of one struct frame_tail beyond */
+ if (!access_ok(VERIFY_READ, user_frame_tail, sizeof(buftail)))
+ return 0;
+ if (__copy_from_user_inatomic
+ (&buftail, user_frame_tail, sizeof(buftail)))
+ return 0;
+
+ /*
+ * Refer to unwind_frame_kernel() for more illurstration
+ */
+ lp = buftail.stack_lp; /* ((unsigned long *)fp)[-1] */
+ fp = buftail.stack_fp; /* ((unsigned long *)fp)[FP_OFFSET] */
+
+ perf_callchain_store(entry, lp);
+ return fp;
+}
+
+/*
+ * This will be called when the target is in user mode
+ * This function will only be called when we use
+ * "PERF_SAMPLE_CALLCHAIN" in
+ * kernel/events/core.c:perf_prepare_sample()
+ *
+ * How to trigger perf_callchain_[user/kernel] :
+ * $ perf record -e cpu-clock --call-graph fp ./program
+ * $ perf report --call-graph
+ */
+unsigned long leaf_fp;
+void
+perf_callchain_user(struct perf_callchain_entry_ctx *entry,
+ struct pt_regs *regs)
+{
+ unsigned long fp = 0;
+ unsigned long gp = 0;
+ unsigned long lp = 0;
+ unsigned long sp = 0;
+ unsigned long *user_frame_tail;
+
+ leaf_fp = 0;
+
+ if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+ /* We don't support guest os callchain now */
+ return;
+ }
+
+ perf_callchain_store(entry, regs->ipc);
+ fp = regs->fp;
+ gp = regs->gp;
+ lp = regs->lp;
+ sp = regs->sp;
+ if (entry->nr < PERF_MAX_STACK_DEPTH &&
+ (unsigned long)fp && !((unsigned long)fp & 0x7) && fp > sp) {
+ user_frame_tail =
+ (unsigned long *)(fp - (unsigned long)sizeof(fp));
+
+ if (!access_ok(VERIFY_READ, user_frame_tail, sizeof(fp)))
+ return;
+
+ if (__copy_from_user_inatomic
+ (&leaf_fp, user_frame_tail, sizeof(fp)))
+ return;
+
+ if (leaf_fp == lp) {
+ /*
+ * Maybe this is non leaf function
+ * with optimize for size,
+ * or maybe this is the function
+ * with optimize for size
+ */
+ struct frame_tail buftail;
+
+ user_frame_tail =
+ (unsigned long *)(fp -
+ (unsigned long)sizeof(buftail));
+
+ if (!access_ok
+ (VERIFY_READ, user_frame_tail, sizeof(buftail)))
+ return;
+
+ if (__copy_from_user_inatomic
+ (&buftail, user_frame_tail, sizeof(buftail)))
+ return;
+
+ if (buftail.stack_fp == gp) {
+ /* non leaf function with optimize
+ * for size condition
+ */
+ struct frame_tail_opt_size buftail_opt_size;
+
+ user_frame_tail =
+ (unsigned long *)(fp - (unsigned long)
+ sizeof(buftail_opt_size));
+
+ if (!access_ok(VERIFY_READ, user_frame_tail,
+ sizeof(buftail_opt_size)))
+ return;
+
+ if (__copy_from_user_inatomic
+ (&buftail_opt_size, user_frame_tail,
+ sizeof(buftail_opt_size)))
+ return;
+
+ perf_callchain_store(entry, lp);
+ fp = buftail_opt_size.stack_fp;
+
+ while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
+ (unsigned long)fp &&
+ !((unsigned long)fp & 0x7) &&
+ fp > sp) {
+ sp = fp;
+ fp = user_backtrace_opt_size(entry, fp);
+ }
+
+ } else {
+ /* this is the function
+ * without optimize for size
+ */
+ fp = buftail.stack_fp;
+ perf_callchain_store(entry, lp);
+ while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
+ (unsigned long)fp &&
+ !((unsigned long)fp & 0x7) &&
+ fp > sp) {
+ sp = fp;
+ fp = user_backtrace(entry, fp);
+ }
+ }
+ } else {
+ /* this is leaf function */
+ fp = leaf_fp;
+ perf_callchain_store(entry, lp);
+
+ /* previous function callcahin */
+ while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
+ (unsigned long)fp &&
+ !((unsigned long)fp & 0x7) && fp > sp) {
+ sp = fp;
+ fp = user_backtrace(entry, fp);
+ }
+ }
+ return;
+ }
+}
+
+/* This will be called when the target is in kernel mode */
+void
+perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
+ struct pt_regs *regs)
+{
+ struct stackframe fr;
+
+ if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+ /* We don't support guest os callchain now */
+ return;
+ }
+ fr.fp = regs->fp;
+ fr.lp = regs->lp;
+ fr.sp = regs->sp;
+ walk_stackframe(&fr, callchain_trace, entry);
+}
+
+unsigned long perf_instruction_pointer(struct pt_regs *regs)
+{
+ /* However, NDS32 does not support virtualization */
+ if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
+ return perf_guest_cbs->get_guest_ip();
+
+ return instruction_pointer(regs);
+}
+
+unsigned long perf_misc_flags(struct pt_regs *regs)
+{
+ int misc = 0;
+
+ /* However, NDS32 does not support virtualization */
+ if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+ if (perf_guest_cbs->is_user_mode())
+ misc |= PERF_RECORD_MISC_GUEST_USER;
+ else
+ misc |= PERF_RECORD_MISC_GUEST_KERNEL;
+ } else {
+ if (user_mode(regs))
+ misc |= PERF_RECORD_MISC_USER;
+ else
+ misc |= PERF_RECORD_MISC_KERNEL;
+ }
+
+ return misc;
+}
diff --git a/arch/nds32/kernel/pm.c b/arch/nds32/kernel/pm.c
new file mode 100644
index 000000000000..ffa8040d8be7
--- /dev/null
+++ b/arch/nds32/kernel/pm.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2008-2017 Andes Technology Corporation
+
+#include <linux/init.h>
+#include <linux/suspend.h>
+#include <linux/device.h>
+#include <linux/printk.h>
+#include <asm/suspend.h>
+#include <nds32_intrinsic.h>
+
+unsigned int resume_addr;
+unsigned int *phy_addr_sp_tmp;
+
+static void nds32_suspend2ram(void)
+{
+ pgd_t *pgdv;
+ pud_t *pudv;
+ pmd_t *pmdv;
+ pte_t *ptev;
+
+ pgdv = (pgd_t *)__va((__nds32__mfsr(NDS32_SR_L1_PPTB) &
+ L1_PPTB_mskBASE)) + pgd_index((unsigned int)cpu_resume);
+
+ pudv = pud_offset(pgdv, (unsigned int)cpu_resume);
+ pmdv = pmd_offset(pudv, (unsigned int)cpu_resume);
+ ptev = pte_offset_map(pmdv, (unsigned int)cpu_resume);
+
+ resume_addr = ((*ptev) & TLB_DATA_mskPPN)
+ | ((unsigned int)cpu_resume & 0x00000fff);
+
+ suspend2ram();
+}
+
+static void nds32_suspend_cpu(void)
+{
+ while (!(__nds32__mfsr(NDS32_SR_INT_PEND) & wake_mask))
+ __asm__ volatile ("standby no_wake_grant\n\t");
+}
+
+static int nds32_pm_valid(suspend_state_t state)
+{
+ switch (state) {
+ case PM_SUSPEND_ON:
+ case PM_SUSPEND_STANDBY:
+ case PM_SUSPEND_MEM:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+static int nds32_pm_enter(suspend_state_t state)
+{
+ pr_debug("%s:state:%d\n", __func__, state);
+ switch (state) {
+ case PM_SUSPEND_STANDBY:
+ nds32_suspend_cpu();
+ return 0;
+ case PM_SUSPEND_MEM:
+ nds32_suspend2ram();
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
+static const struct platform_suspend_ops nds32_pm_ops = {
+ .valid = nds32_pm_valid,
+ .enter = nds32_pm_enter,
+};
+
+static int __init nds32_pm_init(void)
+{
+ pr_debug("Enter %s\n", __func__);
+ suspend_set_ops(&nds32_pm_ops);
+ return 0;
+}
+late_initcall(nds32_pm_init);
diff --git a/arch/nds32/kernel/process.c b/arch/nds32/kernel/process.c
index 65fda986e55f..ab7ab46234b1 100644
--- a/arch/nds32/kernel/process.c
+++ b/arch/nds32/kernel/process.c
@@ -9,15 +9,16 @@
#include <linux/uaccess.h>
#include <asm/elf.h>
#include <asm/proc-fns.h>
+#include <asm/fpu.h>
#include <linux/ptrace.h>
#include <linux/reboot.h>
-extern void setup_mm_for_reboot(char mode);
-#ifdef CONFIG_PROC_FS
-struct proc_dir_entry *proc_dir_cpu;
-EXPORT_SYMBOL(proc_dir_cpu);
+#if IS_ENABLED(CONFIG_LAZY_FPU)
+struct task_struct *last_task_used_math;
#endif
+extern void setup_mm_for_reboot(char mode);
+
extern inline void arch_reset(char mode)
{
if (mode == 's') {
@@ -125,15 +126,31 @@ void show_regs(struct pt_regs *regs)
EXPORT_SYMBOL(show_regs);
+void exit_thread(struct task_struct *tsk)
+{
+#if defined(CONFIG_FPU) && defined(CONFIG_LAZY_FPU)
+ if (last_task_used_math == tsk)
+ last_task_used_math = NULL;
+#endif
+}
+
void flush_thread(void)
{
+#if defined(CONFIG_FPU)
+ clear_fpu(task_pt_regs(current));
+ clear_used_math();
+# ifdef CONFIG_LAZY_FPU
+ if (last_task_used_math == current)
+ last_task_used_math = NULL;
+# endif
+#endif
}
DEFINE_PER_CPU(struct task_struct *, __entry_task);
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
int copy_thread(unsigned long clone_flags, unsigned long stack_start,
- unsigned long stk_sz, struct task_struct *p)
+ unsigned long stk_sz, struct task_struct *p)
{
struct pt_regs *childregs = task_pt_regs(p);
@@ -159,6 +176,22 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
p->thread.cpu_context.pc = (unsigned long)ret_from_fork;
p->thread.cpu_context.sp = (unsigned long)childregs;
+#if IS_ENABLED(CONFIG_FPU)
+ if (used_math()) {
+# if !IS_ENABLED(CONFIG_LAZY_FPU)
+ unlazy_fpu(current);
+# else
+ preempt_disable();
+ if (last_task_used_math == current)
+ save_fpu(current);
+ preempt_enable();
+# endif
+ p->thread.fpu = current->thread.fpu;
+ clear_fpu(task_pt_regs(p));
+ set_stopped_child_used_math(p);
+ }
+#endif
+
#ifdef CONFIG_HWZOL
childregs->lb = 0;
childregs->le = 0;
@@ -168,12 +201,33 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
return 0;
}
+#if IS_ENABLED(CONFIG_FPU)
+struct task_struct *_switch_fpu(struct task_struct *prev, struct task_struct *next)
+{
+#if !IS_ENABLED(CONFIG_LAZY_FPU)
+ unlazy_fpu(prev);
+#endif
+ if (!(next->flags & PF_KTHREAD))
+ clear_fpu(task_pt_regs(next));
+ return prev;
+}
+#endif
+
/*
* fill in the fpe structure for a core dump...
*/
int dump_fpu(struct pt_regs *regs, elf_fpregset_t * fpu)
{
int fpvalid = 0;
+#if IS_ENABLED(CONFIG_FPU)
+ struct task_struct *tsk = current;
+
+ fpvalid = tsk_used_math(tsk);
+ if (fpvalid) {
+ lose_fpu();
+ memcpy(fpu, &tsk->thread.fpu, sizeof(*fpu));
+ }
+#endif
return fpvalid;
}
diff --git a/arch/nds32/kernel/setup.c b/arch/nds32/kernel/setup.c
index eacc79024879..31d29d92478e 100644
--- a/arch/nds32/kernel/setup.c
+++ b/arch/nds32/kernel/setup.c
@@ -15,6 +15,7 @@
#include <asm/proc-fns.h>
#include <asm/cache_info.h>
#include <asm/elf.h>
+#include <asm/fpu.h>
#include <nds32_intrinsic.h>
#define HWCAP_MFUSR_PC 0x000001
@@ -38,8 +39,10 @@
#define HWCAP_FPU_DP 0x040000
#define HWCAP_V2 0x080000
#define HWCAP_DX_REGS 0x100000
+#define HWCAP_HWPRE 0x200000
unsigned long cpu_id, cpu_rev, cpu_cfgid;
+bool has_fpu = false;
char cpu_series;
char *endianness = NULL;
@@ -70,8 +73,10 @@ static const char *hwcap_str[] = {
"div",
"mac",
"l2c",
- "dx_regs",
+ "fpu_dp",
"v2",
+ "dx_regs",
+ "hw_pre",
NULL,
};
@@ -136,6 +141,11 @@ static void __init dump_cpu_info(int cpu)
(aliasing_num - 1) << PAGE_SHIFT;
}
#endif
+#ifdef CONFIG_FPU
+ /* Disable fpu and enable when it is used. */
+ if (has_fpu)
+ disable_fpu();
+#endif
}
static void __init setup_cpuinfo(void)
@@ -180,9 +190,10 @@ static void __init setup_cpuinfo(void)
if (cpu_cfgid & 0x0004)
elf_hwcap |= HWCAP_EXT2;
- if (cpu_cfgid & 0x0008)
+ if (cpu_cfgid & 0x0008) {
elf_hwcap |= HWCAP_FPU;
-
+ has_fpu = true;
+ }
if (cpu_cfgid & 0x0010)
elf_hwcap |= HWCAP_STRING;
@@ -212,6 +223,11 @@ static void __init setup_cpuinfo(void)
if (__nds32__mfsr(NDS32_SR_MSC_CFG) & MSC_CFG_mskL2C)
elf_hwcap |= HWCAP_L2C;
+#ifdef CONFIG_HW_PRE
+ if (__nds32__mfsr(NDS32_SR_MISC_CTL) & MISC_CTL_makHWPRE_EN)
+ elf_hwcap |= HWCAP_HWPRE;
+#endif
+
tmp = __nds32__mfsr(NDS32_SR_CACHE_CTL);
if (!IS_ENABLED(CONFIG_CPU_DCACHE_DISABLE))
tmp |= CACHE_CTL_mskDC_EN;
diff --git a/arch/nds32/kernel/signal.c b/arch/nds32/kernel/signal.c
index 5d01f6e33cb8..5b5be082cfa4 100644
--- a/arch/nds32/kernel/signal.c
+++ b/arch/nds32/kernel/signal.c
@@ -12,6 +12,7 @@
#include <asm/cacheflush.h>
#include <asm/ucontext.h>
#include <asm/unistd.h>
+#include <asm/fpu.h>
#include <asm/ptrace.h>
#include <asm/vdso.h>
@@ -20,6 +21,60 @@ struct rt_sigframe {
struct siginfo info;
struct ucontext uc;
};
+#if IS_ENABLED(CONFIG_FPU)
+static inline int restore_sigcontext_fpu(struct pt_regs *regs,
+ struct sigcontext __user *sc)
+{
+ struct task_struct *tsk = current;
+ unsigned long used_math_flag;
+ int ret = 0;
+
+ clear_used_math();
+ __get_user_error(used_math_flag, &sc->used_math_flag, ret);
+
+ if (!used_math_flag)
+ return 0;
+ set_used_math();
+
+#if IS_ENABLED(CONFIG_LAZY_FPU)
+ preempt_disable();
+ if (current == last_task_used_math) {
+ last_task_used_math = NULL;
+ disable_ptreg_fpu(regs);
+ }
+ preempt_enable();
+#else
+ clear_fpu(regs);
+#endif
+
+ return __copy_from_user(&tsk->thread.fpu, &sc->fpu,
+ sizeof(struct fpu_struct));
+}
+
+static inline int setup_sigcontext_fpu(struct pt_regs *regs,
+ struct sigcontext __user *sc)
+{
+ struct task_struct *tsk = current;
+ int ret = 0;
+
+ __put_user_error(used_math(), &sc->used_math_flag, ret);
+
+ if (!used_math())
+ return ret;
+
+ preempt_disable();
+#if IS_ENABLED(CONFIG_LAZY_FPU)
+ if (last_task_used_math == tsk)
+ save_fpu(last_task_used_math);
+#else
+ unlazy_fpu(tsk);
+#endif
+ ret = __copy_to_user(&sc->fpu, &tsk->thread.fpu,
+ sizeof(struct fpu_struct));
+ preempt_enable();
+ return ret;
+}
+#endif
static int restore_sigframe(struct pt_regs *regs,
struct rt_sigframe __user * sf)
@@ -69,7 +124,9 @@ static int restore_sigframe(struct pt_regs *regs,
__get_user_error(regs->le, &sf->uc.uc_mcontext.zol.nds32_le, err);
__get_user_error(regs->lb, &sf->uc.uc_mcontext.zol.nds32_lb, err);
#endif
-
+#if IS_ENABLED(CONFIG_FPU)
+ err |= restore_sigcontext_fpu(regs, &sf->uc.uc_mcontext);
+#endif
/*
* Avoid sys_rt_sigreturn() restarting.
*/
@@ -153,6 +210,9 @@ setup_sigframe(struct rt_sigframe __user * sf, struct pt_regs *regs,
__put_user_error(regs->le, &sf->uc.uc_mcontext.zol.nds32_le, err);
__put_user_error(regs->lb, &sf->uc.uc_mcontext.zol.nds32_lb, err);
#endif
+#if IS_ENABLED(CONFIG_FPU)
+ err |= setup_sigcontext_fpu(regs, &sf->uc.uc_mcontext);
+#endif
__put_user_error(current->thread.trap_no, &sf->uc.uc_mcontext.trap_no,
err);
diff --git a/arch/nds32/kernel/sleep.S b/arch/nds32/kernel/sleep.S
new file mode 100644
index 000000000000..ca4e61f3656f
--- /dev/null
+++ b/arch/nds32/kernel/sleep.S
@@ -0,0 +1,131 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2017 Andes Technology Corporation */
+
+#include <asm/memory.h>
+
+.data
+.global sp_tmp
+sp_tmp:
+.long
+
+.text
+.globl suspend2ram
+.globl cpu_resume
+
+suspend2ram:
+ pushm $r0, $r31
+#if defined(CONFIG_HWZOL)
+ mfusr $r0, $lc
+ mfusr $r1, $le
+ mfusr $r2, $lb
+#endif
+ mfsr $r3, $mr0
+ mfsr $r4, $mr1
+ mfsr $r5, $mr4
+ mfsr $r6, $mr6
+ mfsr $r7, $mr7
+ mfsr $r8, $mr8
+ mfsr $r9, $ir0
+ mfsr $r10, $ir1
+ mfsr $r11, $ir2
+ mfsr $r12, $ir3
+ mfsr $r13, $ir9
+ mfsr $r14, $ir10
+ mfsr $r15, $ir12
+ mfsr $r16, $ir13
+ mfsr $r17, $ir14
+ mfsr $r18, $ir15
+ pushm $r0, $r19
+#if defined(CONFIG_FPU)
+ jal store_fpu_for_suspend
+#endif
+ tlbop FlushAll
+ isb
+
+ // transfer $sp from va to pa
+ sethi $r0, hi20(PAGE_OFFSET)
+ ori $r0, $r0, lo12(PAGE_OFFSET)
+ movi $r2, PHYS_OFFSET
+ sub $r1, $sp, $r0
+ add $r2, $r1, $r2
+
+ // store pa($sp) to sp_tmp
+ sethi $r1, hi20(sp_tmp)
+ swi $r2, [$r1 + lo12(sp_tmp)]
+
+ pushm $r16, $r25
+ pushm $r29, $r30
+#ifdef CONFIG_CACHE_L2
+ jal dcache_wb_all_level
+#else
+ jal cpu_dcache_wb_all
+#endif
+ popm $r29, $r30
+ popm $r16, $r25
+
+ // get wake_mask and loop in standby
+ la $r1, wake_mask
+ lwi $r1, [$r1]
+self_loop:
+ standby wake_grant
+ mfsr $r2, $ir15
+ and $r2, $r1, $r2
+ beqz $r2, self_loop
+
+ // set ipc to resume address
+ la $r1, resume_addr
+ lwi $r1, [$r1]
+ mtsr $r1, $ipc
+ isb
+
+ // reset psw, turn off the address translation
+ li $r2, 0x7000a
+ mtsr $r2, $ipsw
+ isb
+
+ iret
+cpu_resume:
+ // translate the address of sp_tmp variable to pa
+ la $r1, sp_tmp
+ sethi $r0, hi20(PAGE_OFFSET)
+ ori $r0, $r0, lo12(PAGE_OFFSET)
+ movi $r2, PHYS_OFFSET
+ sub $r1, $r1, $r0
+ add $r1, $r1, $r2
+
+ // access the sp_tmp to get stack pointer
+ lwi $sp, [$r1]
+
+ popm $r0, $r19
+#if defined(CONFIG_HWZOL)
+ mtusr $r0, $lb
+ mtusr $r1, $lc
+ mtusr $r2, $le
+#endif
+ mtsr $r3, $mr0
+ mtsr $r4, $mr1
+ mtsr $r5, $mr4
+ mtsr $r6, $mr6
+ mtsr $r7, $mr7
+ mtsr $r8, $mr8
+ // set original psw to ipsw
+ mtsr $r9, $ir1
+
+ mtsr $r11, $ir2
+ mtsr $r12, $ir3
+
+ // set ipc to RR
+ la $r13, RR
+ mtsr $r13, $ir9
+
+ mtsr $r14, $ir10
+ mtsr $r15, $ir12
+ mtsr $r16, $ir13
+ mtsr $r17, $ir14
+ mtsr $r18, $ir15
+ popm $r0, $r31
+
+ isb
+ iret
+RR:
+ ret
diff --git a/arch/nds32/kernel/sys_nds32.c b/arch/nds32/kernel/sys_nds32.c
index 9de93ab4c52b..0835277636ce 100644
--- a/arch/nds32/kernel/sys_nds32.c
+++ b/arch/nds32/kernel/sys_nds32.c
@@ -6,6 +6,8 @@
#include <asm/cachectl.h>
#include <asm/proc-fns.h>
+#include <asm/udftrap.h>
+#include <asm/fpu.h>
SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len,
unsigned long, prot, unsigned long, flags,
@@ -48,3 +50,33 @@ SYSCALL_DEFINE3(cacheflush, unsigned int, start, unsigned int, end, int, cache)
return 0;
}
+
+SYSCALL_DEFINE1(udftrap, int, option)
+{
+#if IS_ENABLED(CONFIG_SUPPORT_DENORMAL_ARITHMETIC)
+ int old_udftrap;
+
+ if (!used_math()) {
+ load_fpu(&init_fpuregs);
+ current->thread.fpu.UDF_trap = init_fpuregs.UDF_trap;
+ set_used_math();
+ }
+
+ old_udftrap = current->thread.fpu.UDF_trap;
+ switch (option) {
+ case DISABLE_UDFTRAP:
+ current->thread.fpu.UDF_trap = 0;
+ break;
+ case ENABLE_UDFTRAP:
+ current->thread.fpu.UDF_trap = FPCSR_mskUDFE;
+ break;
+ case GET_UDFTRAP:
+ break;
+ default:
+ return -EINVAL;
+ }
+ return old_udftrap;
+#else
+ return -ENOTSUPP;
+#endif
+}
diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c
index 1496aab48998..5aa7c17da27a 100644
--- a/arch/nds32/kernel/traps.c
+++ b/arch/nds32/kernel/traps.c
@@ -12,6 +12,7 @@
#include <asm/proc-fns.h>
#include <asm/unistd.h>
+#include <asm/fpu.h>
#include <linux/ptrace.h>
#include <nds32_intrinsic.h>
@@ -357,6 +358,21 @@ void do_dispatch_general(unsigned long entry, unsigned long addr,
} else if (type == ETYPE_RESERVED_INSTRUCTION) {
/* Reserved instruction */
do_revinsn(regs);
+ } else if (type == ETYPE_COPROCESSOR) {
+ /* Coprocessor */
+#if IS_ENABLED(CONFIG_FPU)
+ unsigned int fucop_exist = __nds32__mfsr(NDS32_SR_FUCOP_EXIST);
+ unsigned int cpid = ((itype & ITYPE_mskCPID) >> ITYPE_offCPID);
+
+ if ((cpid == FPU_CPID) &&
+ (fucop_exist & FUCOP_EXIST_mskCP0ISFPU)) {
+ unsigned int subtype = (itype & ITYPE_mskSTYPE);
+
+ if (true == do_fpu_exception(subtype, regs))
+ return;
+ }
+#endif
+ unhandled_exceptions(entry, addr, type, regs);
} else if (type == ETYPE_TRAP && swid == SWID_RAISE_INTERRUPT_LEVEL) {
/* trap, used on v3 EDM target debugging workaround */
/*
OpenPOWER on IntegriCloud