summaryrefslogtreecommitdiffstats
path: root/arch/s390/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/kernel')
-rw-r--r--arch/s390/kernel/Makefile14
-rw-r--r--arch/s390/kernel/asm-offsets.c3
-rw-r--r--arch/s390/kernel/base.S21
-rw-r--r--arch/s390/kernel/dis.c13
-rw-r--r--arch/s390/kernel/dumpstack.c9
-rw-r--r--arch/s390/kernel/early.c70
-rw-r--r--arch/s390/kernel/early_nobss.c45
-rw-r--r--arch/s390/kernel/early_printk.c2
-rw-r--r--arch/s390/kernel/entry.S2
-rw-r--r--arch/s390/kernel/entry.h1
-rw-r--r--arch/s390/kernel/ftrace.c80
-rw-r--r--arch/s390/kernel/head64.S28
-rw-r--r--arch/s390/kernel/idle.c29
-rw-r--r--arch/s390/kernel/kexec_elf.c4
-rw-r--r--arch/s390/kernel/kexec_image.c4
-rw-r--r--arch/s390/kernel/kprobes.c61
-rw-r--r--arch/s390/kernel/machine_kexec.c4
-rw-r--r--arch/s390/kernel/machine_kexec_file.c28
-rw-r--r--arch/s390/kernel/machine_kexec_reloc.c1
-rw-r--r--arch/s390/kernel/mcount.S22
-rw-r--r--arch/s390/kernel/module.c4
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c21
-rw-r--r--arch/s390/kernel/perf_cpum_cf_diag.c14
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c402
-rw-r--r--arch/s390/kernel/perf_event.c8
-rw-r--r--arch/s390/kernel/pgm_check.S2
-rw-r--r--arch/s390/kernel/process.c36
-rw-r--r--arch/s390/kernel/ptrace.c2
-rw-r--r--arch/s390/kernel/setup.c54
-rw-r--r--arch/s390/kernel/smp.c100
-rw-r--r--arch/s390/kernel/stacktrace.c77
-rw-r--r--arch/s390/kernel/syscalls/syscall.tbl2
-rw-r--r--arch/s390/kernel/time.c9
-rw-r--r--arch/s390/kernel/topology.c3
-rw-r--r--arch/s390/kernel/traps.c41
-rw-r--r--arch/s390/kernel/unwind_bc.c93
-rw-r--r--arch/s390/kernel/vdso.c60
-rw-r--r--arch/s390/kernel/vdso32/.gitignore1
-rw-r--r--arch/s390/kernel/vdso32/Makefile66
-rw-r--r--arch/s390/kernel/vdso32/clock_getres.S44
-rw-r--r--arch/s390/kernel/vdso32/clock_gettime.S179
-rw-r--r--arch/s390/kernel/vdso32/getcpu.S33
-rw-r--r--arch/s390/kernel/vdso32/gettimeofday.S103
-rw-r--r--arch/s390/kernel/vdso32/note.S13
-rw-r--r--arch/s390/kernel/vdso32/vdso32.lds.S142
-rw-r--r--arch/s390/kernel/vdso32/vdso32_wrapper.S15
-rw-r--r--arch/s390/kernel/vdso64/getcpu.S4
-rw-r--r--arch/s390/kernel/vmlinux.lds.S12
-rw-r--r--arch/s390/kernel/vtime.c4
49 files changed, 715 insertions, 1270 deletions
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 0f255b54b051..2b1203cf7be6 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -10,20 +10,12 @@ CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
# Do not trace early setup code
CFLAGS_REMOVE_early.o = $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_early_nobss.o = $(CC_FLAGS_FTRACE)
endif
GCOV_PROFILE_early.o := n
-GCOV_PROFILE_early_nobss.o := n
-
KCOV_INSTRUMENT_early.o := n
-KCOV_INSTRUMENT_early_nobss.o := n
-
UBSAN_SANITIZE_early.o := n
-UBSAN_SANITIZE_early_nobss.o := n
-
-KASAN_SANITIZE_early_nobss.o := n
KASAN_SANITIZE_ipl.o := n
KASAN_SANITIZE_machine_kexec.o := n
@@ -48,7 +40,7 @@ CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"'
obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o
obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
-obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o early_nobss.o
+obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o
obj-y += sysinfo.o lgr.o os_info.o machine_kexec.o pgm_check.o
obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o
@@ -89,7 +81,3 @@ obj-$(CONFIG_TRACEPOINTS) += trace.o
# vdso
obj-y += vdso64/
-obj-$(CONFIG_COMPAT_VDSO) += vdso32/
-
-chkbss := head64.o early_nobss.o
-include $(srctree)/arch/s390/scripts/Makefile.chkbss
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 41ac4ad21311..ce33406cfe83 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -78,8 +78,7 @@ int main(void)
OFFSET(__VDSO_TS_END, vdso_data, ts_end);
OFFSET(__VDSO_ECTG_BASE, vdso_per_cpu_data, ectg_timer_base);
OFFSET(__VDSO_ECTG_USER, vdso_per_cpu_data, ectg_user_time);
- OFFSET(__VDSO_CPU_NR, vdso_per_cpu_data, cpu_nr);
- OFFSET(__VDSO_NODE_ID, vdso_per_cpu_data, node_id);
+ OFFSET(__VDSO_GETCPU_VAL, vdso_per_cpu_data, getcpu_val);
BLANK();
/* constants used by the vdso */
DEFINE(__CLOCK_REALTIME, CLOCK_REALTIME);
diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S
index 2f39ea57f358..b79e0fd571f8 100644
--- a/arch/s390/kernel/base.S
+++ b/arch/s390/kernel/base.S
@@ -16,27 +16,6 @@
GEN_BR_THUNK %r9
GEN_BR_THUNK %r14
-ENTRY(s390_base_mcck_handler)
- basr %r13,0
-0: lg %r15,__LC_NODAT_STACK # load panic stack
- aghi %r15,-STACK_FRAME_OVERHEAD
- larl %r1,s390_base_mcck_handler_fn
- lg %r9,0(%r1)
- ltgr %r9,%r9
- jz 1f
- BASR_EX %r14,%r9
-1: la %r1,4095
- lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)
- lpswe __LC_MCK_OLD_PSW
-ENDPROC(s390_base_mcck_handler)
-
- .section .bss
- .align 8
- .globl s390_base_mcck_handler_fn
-s390_base_mcck_handler_fn:
- .quad 0
- .previous
-
ENTRY(s390_base_ext_handler)
stmg %r0,%r15,__LC_SAVE_AREA_ASYNC
basr %r13,0
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index 7abe6ae261b4..f304802ecf7b 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -461,10 +461,11 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr)
ptr += sprintf(ptr, "%%c%i", value);
else if (operand->flags & OPERAND_VR)
ptr += sprintf(ptr, "%%v%i", value);
- else if (operand->flags & OPERAND_PCREL)
- ptr += sprintf(ptr, "%lx", (signed int) value
- + addr);
- else if (operand->flags & OPERAND_SIGNED)
+ else if (operand->flags & OPERAND_PCREL) {
+ void *pcrel = (void *)((int)value + addr);
+
+ ptr += sprintf(ptr, "%px", pcrel);
+ } else if (operand->flags & OPERAND_SIGNED)
ptr += sprintf(ptr, "%i", value);
else
ptr += sprintf(ptr, "%u", value);
@@ -536,7 +537,7 @@ void show_code(struct pt_regs *regs)
else
*ptr++ = ' ';
addr = regs->psw.addr + start - 32;
- ptr += sprintf(ptr, "%016lx: ", addr);
+ ptr += sprintf(ptr, "%px: ", (void *)addr);
if (start + opsize >= end)
break;
for (i = 0; i < opsize; i++)
@@ -564,7 +565,7 @@ void print_fn_code(unsigned char *code, unsigned long len)
opsize = insn_length(*code);
if (opsize > len)
break;
- ptr += sprintf(ptr, "%p: ", code);
+ ptr += sprintf(ptr, "%px: ", code);
for (i = 0; i < opsize; i++)
ptr += sprintf(ptr, "%02x", code[i]);
*ptr++ = '\t';
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index 34bdc60c0b11..2c122d8bab93 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -38,6 +38,7 @@ const char *stack_type_name(enum stack_type type)
return "unknown";
}
}
+EXPORT_SYMBOL_GPL(stack_type_name);
static inline bool in_stack(unsigned long sp, struct stack_info *info,
enum stack_type type, unsigned long low,
@@ -93,7 +94,9 @@ int get_stack_info(unsigned long sp, struct task_struct *task,
if (!sp)
goto unknown;
- task = task ? : current;
+ /* Sanity check: ABI requires SP to be aligned 8 bytes. */
+ if (sp & 0x7)
+ goto unknown;
/* Check per-task stack */
if (in_task_stack(sp, task, info))
@@ -128,8 +131,6 @@ void show_stack(struct task_struct *task, unsigned long *stack)
struct unwind_state state;
printk("Call Trace:\n");
- if (!task)
- task = current;
unwind_for_each_frame(&state, task, NULL, (unsigned long) stack)
printk(state.reliable ? " [<%016lx>] %pSR \n" :
"([<%016lx>] %pSR)\n",
@@ -194,6 +195,8 @@ void die(struct pt_regs *regs, const char *str)
regs->int_code >> 17, ++die_counter);
#ifdef CONFIG_PREEMPT
pr_cont("PREEMPT ");
+#elif defined(CONFIG_PREEMPT_RT)
+ pr_cont("PREEMPT_RT ");
#endif
pr_cont("SMP ");
if (debug_pagealloc_enabled())
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 6312fed48530..cd241ee66eff 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -30,8 +30,24 @@
#include <asm/sclp.h>
#include <asm/facility.h>
#include <asm/boot_data.h>
+#include <asm/switch_to.h>
#include "entry.h"
+static void __init reset_tod_clock(void)
+{
+ u64 time;
+
+ if (store_tod_clock(&time) == 0)
+ return;
+ /* TOD clock not running. Set the clock to Unix Epoch. */
+ if (set_tod_clock(TOD_UNIX_EPOCH) != 0 || store_tod_clock(&time) != 0)
+ disabled_wait();
+
+ memset(tod_clock_base, 0, 16);
+ *(__u64 *) &tod_clock_base[1] = TOD_UNIX_EPOCH;
+ S390_lowcore.last_update_clock = TOD_UNIX_EPOCH;
+}
+
/*
* Initialize storage key for kernel pages
*/
@@ -188,21 +204,6 @@ static __init void detect_diag9c(void)
S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG9C;
}
-static __init void detect_diag44(void)
-{
- int rc;
-
- diag_stat_inc(DIAG_STAT_X044);
- asm volatile(
- " diag 0,0,0x44\n"
- "0: la %0,0\n"
- "1:\n"
- EX_TABLE(0b,1b)
- : "=d" (rc) : "0" (-EOPNOTSUPP) : "cc");
- if (!rc)
- S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG44;
-}
-
static __init void detect_machine_facilities(void)
{
if (test_facility(8)) {
@@ -223,7 +224,7 @@ static __init void detect_machine_facilities(void)
S390_lowcore.machine_flags |= MACHINE_FLAG_VX;
__ctl_set_bit(0, 17);
}
- if (test_facility(130)) {
+ if (test_facility(130) && !noexec_disabled) {
S390_lowcore.machine_flags |= MACHINE_FLAG_NX;
__ctl_set_bit(0, 20);
}
@@ -245,6 +246,24 @@ static inline void save_vector_registers(void)
#endif
}
+static inline void setup_control_registers(void)
+{
+ unsigned long reg;
+
+ __ctl_store(reg, 0, 0);
+ reg |= CR0_LOW_ADDRESS_PROTECTION;
+ reg |= CR0_EMERGENCY_SIGNAL_SUBMASK;
+ reg |= CR0_EXTERNAL_CALL_SUBMASK;
+ __ctl_load(reg, 0, 0);
+}
+
+static inline void setup_access_registers(void)
+{
+ unsigned int acrs[NUM_ACRS] = { 0 };
+
+ restore_access_regs(acrs);
+}
+
static int __init disable_vector_extension(char *str)
{
S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX;
@@ -253,21 +272,6 @@ static int __init disable_vector_extension(char *str)
}
early_param("novx", disable_vector_extension);
-static int __init noexec_setup(char *str)
-{
- bool enabled;
- int rc;
-
- rc = kstrtobool(str, &enabled);
- if (!rc && !enabled) {
- /* Disable no-execute support */
- S390_lowcore.machine_flags &= ~MACHINE_FLAG_NX;
- __ctl_clear_bit(0, 20);
- }
- return rc;
-}
-early_param("noexec", noexec_setup);
-
static int __init cad_setup(char *str)
{
bool enabled;
@@ -301,6 +305,7 @@ static void __init check_image_bootable(void)
void __init startup_init(void)
{
+ reset_tod_clock();
check_image_bootable();
time_early_init();
init_kernel_storage_key();
@@ -311,10 +316,11 @@ void __init startup_init(void)
setup_arch_string();
setup_boot_command_line();
detect_diag9c();
- detect_diag44();
detect_machine_facilities();
save_vector_registers();
setup_topology();
sclp_early_detect();
+ setup_control_registers();
+ setup_access_registers();
lockdep_on();
}
diff --git a/arch/s390/kernel/early_nobss.c b/arch/s390/kernel/early_nobss.c
deleted file mode 100644
index 52a3ef959341..000000000000
--- a/arch/s390/kernel/early_nobss.c
+++ /dev/null
@@ -1,45 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright IBM Corp. 2007, 2018
- */
-
-/*
- * Early setup functions which may not rely on an initialized bss
- * section. The last thing that is supposed to happen here is
- * initialization of the bss section.
- */
-
-#include <linux/processor.h>
-#include <linux/string.h>
-#include <asm/sections.h>
-#include <asm/lowcore.h>
-#include <asm/timex.h>
-#include <asm/kasan.h>
-#include "entry.h"
-
-static void __init reset_tod_clock(void)
-{
- u64 time;
-
- if (store_tod_clock(&time) == 0)
- return;
- /* TOD clock not running. Set the clock to Unix Epoch. */
- if (set_tod_clock(TOD_UNIX_EPOCH) != 0 || store_tod_clock(&time) != 0)
- disabled_wait();
-
- memset(tod_clock_base, 0, 16);
- *(__u64 *) &tod_clock_base[1] = TOD_UNIX_EPOCH;
- S390_lowcore.last_update_clock = TOD_UNIX_EPOCH;
-}
-
-static void __init clear_bss_section(void)
-{
- memset(__bss_start, 0, __bss_stop - __bss_start);
-}
-
-void __init startup_init_nobss(void)
-{
- reset_tod_clock();
- clear_bss_section();
- kasan_early_init();
-}
diff --git a/arch/s390/kernel/early_printk.c b/arch/s390/kernel/early_printk.c
index 40c1dfec944e..6f24d83bc5dc 100644
--- a/arch/s390/kernel/early_printk.c
+++ b/arch/s390/kernel/early_printk.c
@@ -25,7 +25,7 @@ static int __init setup_early_printk(char *buf)
if (early_console)
return 0;
/* Accept only "earlyprintk" and "earlyprintk=sclp" */
- if (buf && strncmp(buf, "sclp", 4))
+ if (buf && !str_has_prefix(buf, "sclp"))
return 0;
if (!sclp.has_linemode && !sclp.has_vt220)
return 0;
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 270d1d145761..9205add8481d 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -790,7 +790,7 @@ ENTRY(io_int_handler)
.Lio_work:
tm __PT_PSW+1(%r11),0x01 # returning to user ?
jo .Lio_work_user # yes -> do resched & signal
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPTION
# check for preemptive scheduling
icm %r0,15,__LC_PREEMPT_COUNT
jnz .Lio_restore # preemption is disabled
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index b2956d49b6ad..1d3927e01a5f 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -45,6 +45,7 @@ void specification_exception(struct pt_regs *regs);
void transaction_exception(struct pt_regs *regs);
void translation_exception(struct pt_regs *regs);
void vector_exception(struct pt_regs *regs);
+void monitor_event_exception(struct pt_regs *regs);
void do_per_trap(struct pt_regs *regs);
void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str);
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 1bb85f60c0dd..4cd9b1ada834 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -72,15 +72,6 @@ static inline void ftrace_generate_orig_insn(struct ftrace_insn *insn)
#endif
}
-static inline int is_kprobe_on_ftrace(struct ftrace_insn *insn)
-{
-#ifdef CONFIG_KPROBES
- if (insn->opc == BREAKPOINT_INSTRUCTION)
- return 1;
-#endif
- return 0;
-}
-
static inline void ftrace_generate_kprobe_nop_insn(struct ftrace_insn *insn)
{
#ifdef CONFIG_KPROBES
@@ -114,16 +105,6 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
/* Initial code replacement */
ftrace_generate_orig_insn(&orig);
ftrace_generate_nop_insn(&new);
- } else if (is_kprobe_on_ftrace(&old)) {
- /*
- * If we find a breakpoint instruction, a kprobe has been
- * placed at the beginning of the function. We write the
- * constant KPROBE_ON_FTRACE_NOP into the remaining four
- * bytes of the original instruction so that the kprobes
- * handler can execute a nop, if it reaches this breakpoint.
- */
- ftrace_generate_kprobe_call_insn(&orig);
- ftrace_generate_kprobe_nop_insn(&new);
} else {
/* Replace ftrace call with a nop. */
ftrace_generate_call_insn(&orig, rec->ip);
@@ -142,21 +123,10 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
if (probe_kernel_read(&old, (void *) rec->ip, sizeof(old)))
return -EFAULT;
- if (is_kprobe_on_ftrace(&old)) {
- /*
- * If we find a breakpoint instruction, a kprobe has been
- * placed at the beginning of the function. We write the
- * constant KPROBE_ON_FTRACE_CALL into the remaining four
- * bytes of the original instruction so that the kprobes
- * handler can execute a brasl if it reaches this breakpoint.
- */
- ftrace_generate_kprobe_nop_insn(&orig);
- ftrace_generate_kprobe_call_insn(&new);
- } else {
- /* Replace nop with an ftrace call. */
- ftrace_generate_nop_insn(&orig);
- ftrace_generate_call_insn(&new, rec->ip);
- }
+ /* Replace nop with an ftrace call. */
+ ftrace_generate_nop_insn(&orig);
+ ftrace_generate_call_insn(&new, rec->ip);
+
/* Verify that the to be replaced code matches what we expect. */
if (memcmp(&orig, &old, sizeof(old)))
return -EINVAL;
@@ -241,3 +211,45 @@ int ftrace_disable_ftrace_graph_caller(void)
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+#ifdef CONFIG_KPROBES_ON_FTRACE
+void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *ops, struct pt_regs *regs)
+{
+ struct kprobe_ctlblk *kcb;
+ struct kprobe *p = get_kprobe((kprobe_opcode_t *)ip);
+
+ if (unlikely(!p) || kprobe_disabled(p))
+ return;
+
+ if (kprobe_running()) {
+ kprobes_inc_nmissed_count(p);
+ return;
+ }
+
+ __this_cpu_write(current_kprobe, p);
+
+ kcb = get_kprobe_ctlblk();
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+
+ instruction_pointer_set(regs, ip);
+
+ if (!p->pre_handler || !p->pre_handler(p, regs)) {
+
+ instruction_pointer_set(regs, ip + MCOUNT_INSN_SIZE);
+
+ if (unlikely(p->post_handler)) {
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ p->post_handler(p, regs, 0);
+ }
+ }
+ __this_cpu_write(current_kprobe, NULL);
+}
+NOKPROBE_SYMBOL(kprobe_ftrace_handler);
+
+int arch_prepare_kprobe_ftrace(struct kprobe *p)
+{
+ p->ainsn.insn = NULL;
+ return 0;
+}
+#endif
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index f384a18e6c26..8b88dbbda7df 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -26,32 +26,17 @@ ENTRY(startup_continue)
0: larl %r1,tod_clock_base
mvc 0(16,%r1),__LC_BOOT_CLOCK
larl %r13,.LPG1 # get base
- larl %r0,boot_vdso_data
- stg %r0,__LC_VDSO_PER_CPU
#
# Setup stack
#
larl %r14,init_task
stg %r14,__LC_CURRENT
- larl %r15,init_thread_union+THREAD_SIZE-STACK_FRAME_OVERHEAD
-#
-# Early setup functions that may not rely on an initialized bss section,
-# like moving the initrd. Returns with an initialized bss section.
-#
- brasl %r14,startup_init_nobss
-#
-# Early machine initialization and detection functions.
-#
- brasl %r14,startup_init
-
-# check control registers
- stctg %c0,%c15,0(%r15)
- oi 6(%r15),0x60 # enable sigp emergency & external call
- oi 4(%r15),0x10 # switch on low address proctection
- lctlg %c0,%c15,0(%r15)
-
- lam 0,15,.Laregs-.LPG1(%r13) # load acrs needed by uaccess
- brasl %r14,start_kernel # go to C code
+ larl %r15,init_thread_union+THREAD_SIZE-STACK_FRAME_OVERHEAD-__PT_SIZE
+#ifdef CONFIG_KASAN
+ brasl %r14,kasan_early_init
+#endif
+ brasl %r14,startup_init # s390 specific early init
+ brasl %r14,start_kernel # common init code
#
# We returned from start_kernel ?!? PANIK
#
@@ -61,4 +46,3 @@ ENTRY(startup_continue)
.align 16
.LPG1:
.Ldw: .quad 0x0002000180000000,0x0000000000000000
-.Laregs:.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
index b9d8fe45737a..8f8456816d83 100644
--- a/arch/s390/kernel/idle.c
+++ b/arch/s390/kernel/idle.c
@@ -69,18 +69,26 @@ DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL);
static ssize_t show_idle_time(struct device *dev,
struct device_attribute *attr, char *buf)
{
+ unsigned long long now, idle_time, idle_enter, idle_exit, in_idle;
struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
- unsigned long long now, idle_time, idle_enter, idle_exit;
unsigned int seq;
do {
- now = get_tod_clock();
seq = read_seqcount_begin(&idle->seqcount);
idle_time = READ_ONCE(idle->idle_time);
idle_enter = READ_ONCE(idle->clock_idle_enter);
idle_exit = READ_ONCE(idle->clock_idle_exit);
} while (read_seqcount_retry(&idle->seqcount, seq));
- idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0;
+ in_idle = 0;
+ now = get_tod_clock();
+ if (idle_enter) {
+ if (idle_exit) {
+ in_idle = idle_exit - idle_enter;
+ } else if (now > idle_enter) {
+ in_idle = now - idle_enter;
+ }
+ }
+ idle_time += in_idle;
return sprintf(buf, "%llu\n", idle_time >> 12);
}
DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
@@ -88,17 +96,24 @@ DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
u64 arch_cpu_idle_time(int cpu)
{
struct s390_idle_data *idle = &per_cpu(s390_idle, cpu);
- unsigned long long now, idle_enter, idle_exit;
+ unsigned long long now, idle_enter, idle_exit, in_idle;
unsigned int seq;
do {
- now = get_tod_clock();
seq = read_seqcount_begin(&idle->seqcount);
idle_enter = READ_ONCE(idle->clock_idle_enter);
idle_exit = READ_ONCE(idle->clock_idle_exit);
} while (read_seqcount_retry(&idle->seqcount, seq));
-
- return cputime_to_nsecs(idle_enter ? ((idle_exit ?: now) - idle_enter) : 0);
+ in_idle = 0;
+ now = get_tod_clock();
+ if (idle_enter) {
+ if (idle_exit) {
+ in_idle = idle_exit - idle_enter;
+ } else if (now > idle_enter) {
+ in_idle = now - idle_enter;
+ }
+ }
+ return cputime_to_nsecs(in_idle);
}
void arch_cpu_idle_enter(void)
diff --git a/arch/s390/kernel/kexec_elf.c b/arch/s390/kernel/kexec_elf.c
index 6d0635ceddd0..9da6fa30c447 100644
--- a/arch/s390/kernel/kexec_elf.c
+++ b/arch/s390/kernel/kexec_elf.c
@@ -130,7 +130,7 @@ static int s390_elf_probe(const char *buf, unsigned long len)
const struct kexec_file_ops s390_kexec_elf_ops = {
.probe = s390_elf_probe,
.load = s390_elf_load,
-#ifdef CONFIG_KEXEC_VERIFY_SIG
+#ifdef CONFIG_KEXEC_SIG
.verify_sig = s390_verify_sig,
-#endif /* CONFIG_KEXEC_VERIFY_SIG */
+#endif /* CONFIG_KEXEC_SIG */
};
diff --git a/arch/s390/kernel/kexec_image.c b/arch/s390/kernel/kexec_image.c
index 58318bf89fd9..af23eff5774d 100644
--- a/arch/s390/kernel/kexec_image.c
+++ b/arch/s390/kernel/kexec_image.c
@@ -59,7 +59,7 @@ static int s390_image_probe(const char *buf, unsigned long len)
const struct kexec_file_ops s390_kexec_image_ops = {
.probe = s390_image_probe,
.load = s390_image_load,
-#ifdef CONFIG_KEXEC_VERIFY_SIG
+#ifdef CONFIG_KEXEC_SIG
.verify_sig = s390_verify_sig,
-#endif /* CONFIG_KEXEC_VERIFY_SIG */
+#endif /* CONFIG_KEXEC_SIG */
};
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 6f1388391620..548d0ea9808d 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -56,21 +56,10 @@ struct kprobe_insn_cache kprobe_s390_insn_slots = {
static void copy_instruction(struct kprobe *p)
{
- unsigned long ip = (unsigned long) p->addr;
s64 disp, new_disp;
u64 addr, new_addr;
- if (ftrace_location(ip) == ip) {
- /*
- * If kprobes patches the instruction that is morphed by
- * ftrace make sure that kprobes always sees the branch
- * "jg .+24" that skips the mcount block or the "brcl 0,0"
- * in case of hotpatch.
- */
- ftrace_generate_nop_insn((struct ftrace_insn *)p->ainsn.insn);
- p->ainsn.is_ftrace_insn = 1;
- } else
- memcpy(p->ainsn.insn, p->addr, insn_length(*p->addr >> 8));
+ memcpy(p->ainsn.insn, p->addr, insn_length(*p->addr >> 8));
p->opcode = p->ainsn.insn[0];
if (!probe_is_insn_relative_long(p->ainsn.insn))
return;
@@ -136,11 +125,6 @@ int arch_prepare_kprobe(struct kprobe *p)
}
NOKPROBE_SYMBOL(arch_prepare_kprobe);
-int arch_check_ftrace_location(struct kprobe *p)
-{
- return 0;
-}
-
struct swap_insn_args {
struct kprobe *p;
unsigned int arm_kprobe : 1;
@@ -149,28 +133,11 @@ struct swap_insn_args {
static int swap_instruction(void *data)
{
struct swap_insn_args *args = data;
- struct ftrace_insn new_insn, *insn;
struct kprobe *p = args->p;
- size_t len;
-
- new_insn.opc = args->arm_kprobe ? BREAKPOINT_INSTRUCTION : p->opcode;
- len = sizeof(new_insn.opc);
- if (!p->ainsn.is_ftrace_insn)
- goto skip_ftrace;
- len = sizeof(new_insn);
- insn = (struct ftrace_insn *) p->addr;
- if (args->arm_kprobe) {
- if (is_ftrace_nop(insn))
- new_insn.disp = KPROBE_ON_FTRACE_NOP;
- else
- new_insn.disp = KPROBE_ON_FTRACE_CALL;
- } else {
- ftrace_generate_call_insn(&new_insn, (unsigned long)p->addr);
- if (insn->disp == KPROBE_ON_FTRACE_NOP)
- ftrace_generate_nop_insn(&new_insn);
- }
-skip_ftrace:
- s390_kernel_write(p->addr, &new_insn, len);
+ u16 opc;
+
+ opc = args->arm_kprobe ? BREAKPOINT_INSTRUCTION : p->opcode;
+ s390_kernel_write(p->addr, &opc, sizeof(opc));
return 0;
}
NOKPROBE_SYMBOL(swap_instruction);
@@ -464,24 +431,6 @@ static void resume_execution(struct kprobe *p, struct pt_regs *regs)
unsigned long ip = regs->psw.addr;
int fixup = probe_get_fixup_type(p->ainsn.insn);
- /* Check if the kprobes location is an enabled ftrace caller */
- if (p->ainsn.is_ftrace_insn) {
- struct ftrace_insn *insn = (struct ftrace_insn *) p->addr;
- struct ftrace_insn call_insn;
-
- ftrace_generate_call_insn(&call_insn, (unsigned long) p->addr);
- /*
- * A kprobe on an enabled ftrace call site actually single
- * stepped an unconditional branch (ftrace nop equivalent).
- * Now we need to fixup things and pretend that a brasl r0,...
- * was executed instead.
- */
- if (insn->disp == KPROBE_ON_FTRACE_CALL) {
- ip += call_insn.disp * 2 - MCOUNT_INSN_SIZE;
- regs->gprs[0] = (unsigned long)p->addr + sizeof(*insn);
- }
- }
-
if (fixup & FIXUP_PSW_NORMAL)
ip += (unsigned long) p->addr - (unsigned long) p->ainsn.insn;
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 444a19125a81..cb8b1cc285c9 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -164,7 +164,9 @@ static bool kdump_csum_valid(struct kimage *image)
#ifdef CONFIG_CRASH_DUMP
int rc;
+ preempt_disable();
rc = CALL_ON_STACK(do_start_kdump, S390_lowcore.nodat_stack, 1, image);
+ preempt_enable();
return rc == 0;
#else
return false;
@@ -254,10 +256,10 @@ void arch_crash_save_vmcoreinfo(void)
VMCOREINFO_SYMBOL(lowcore_ptr);
VMCOREINFO_SYMBOL(high_memory);
VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
- mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
vmcoreinfo_append_str("SDMA=%lx\n", __sdma);
vmcoreinfo_append_str("EDMA=%lx\n", __edma);
vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
+ mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
}
void machine_shutdown(void)
diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c
index fbdd3ea73667..8415ae7d2a23 100644
--- a/arch/s390/kernel/machine_kexec_file.c
+++ b/arch/s390/kernel/machine_kexec_file.c
@@ -10,7 +10,7 @@
#include <linux/elf.h>
#include <linux/errno.h>
#include <linux/kexec.h>
-#include <linux/module.h>
+#include <linux/module_signature.h>
#include <linux/verification.h>
#include <asm/boot_data.h>
#include <asm/ipl.h>
@@ -22,29 +22,7 @@ const struct kexec_file_ops * const kexec_file_loaders[] = {
NULL,
};
-#ifdef CONFIG_KEXEC_VERIFY_SIG
-/*
- * Module signature information block.
- *
- * The constituents of the signature section are, in order:
- *
- * - Signer's name
- * - Key identifier
- * - Signature data
- * - Information block
- */
-struct module_signature {
- u8 algo; /* Public-key crypto algorithm [0] */
- u8 hash; /* Digest algorithm [0] */
- u8 id_type; /* Key identifier type [PKEY_ID_PKCS7] */
- u8 signer_len; /* Length of signer's name [0] */
- u8 key_id_len; /* Length of key identifier [0] */
- u8 __pad[3];
- __be32 sig_len; /* Length of signature data */
-};
-
-#define PKEY_ID_PKCS7 2
-
+#ifdef CONFIG_KEXEC_SIG
int s390_verify_sig(const char *kernel, unsigned long kernel_len)
{
const unsigned long marker_len = sizeof(MODULE_SIG_STRING) - 1;
@@ -90,7 +68,7 @@ int s390_verify_sig(const char *kernel, unsigned long kernel_len)
VERIFYING_MODULE_SIGNATURE,
NULL, NULL);
}
-#endif /* CONFIG_KEXEC_VERIFY_SIG */
+#endif /* CONFIG_KEXEC_SIG */
static int kexec_file_update_purgatory(struct kimage *image,
struct s390_load_data *data)
diff --git a/arch/s390/kernel/machine_kexec_reloc.c b/arch/s390/kernel/machine_kexec_reloc.c
index 3b664cb3ec4d..d5035de9020e 100644
--- a/arch/s390/kernel/machine_kexec_reloc.c
+++ b/arch/s390/kernel/machine_kexec_reloc.c
@@ -27,6 +27,7 @@ int arch_kexec_do_relocs(int r_type, void *loc, unsigned long val,
*(u32 *)loc = val;
break;
case R_390_64: /* Direct 64 bit. */
+ case R_390_GLOB_DAT:
*(u64 *)loc = val;
break;
case R_390_PC16: /* PC relative 16 bit. */
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index 9e1660a6b9db..7458dcfd6464 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -26,6 +26,12 @@ ENDPROC(ftrace_stub)
#define STACK_PTREGS (STACK_FRAME_OVERHEAD)
#define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS)
#define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW)
+#ifdef __PACK_STACK
+/* allocate just enough for r14, r15 and backchain */
+#define TRACED_FUNC_FRAME_SIZE 24
+#else
+#define TRACED_FUNC_FRAME_SIZE STACK_FRAME_OVERHEAD
+#endif
ENTRY(_mcount)
BR_EX %r14
@@ -35,13 +41,27 @@ EXPORT_SYMBOL(_mcount)
ENTRY(ftrace_caller)
.globl ftrace_regs_caller
.set ftrace_regs_caller,ftrace_caller
+ stg %r14,(__SF_GPRS+8*8)(%r15) # save traced function caller
+ lghi %r14,0 # save condition code
+ ipm %r14 # don't put any instructions
+ sllg %r14,%r14,16 # clobbering CC before this point
lgr %r1,%r15
#if !(defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT))
aghi %r0,MCOUNT_RETURN_FIXUP
#endif
- aghi %r15,-STACK_FRAME_SIZE
+ # allocate stack frame for ftrace_caller to contain traced function
+ aghi %r15,-TRACED_FUNC_FRAME_SIZE
stg %r1,__SF_BACKCHAIN(%r15)
+ stg %r0,(__SF_GPRS+8*8)(%r15)
+ stg %r15,(__SF_GPRS+9*8)(%r15)
+ # allocate pt_regs and stack frame for ftrace_trace_function
+ aghi %r15,-STACK_FRAME_SIZE
stg %r1,(STACK_PTREGS_GPRS+15*8)(%r15)
+ stg %r14,(STACK_PTREGS_PSW)(%r15)
+ lg %r14,(__SF_GPRS+8*8)(%r1) # restore original return address
+ stosm (STACK_PTREGS_PSW)(%r15),0
+ aghi %r1,-TRACED_FUNC_FRAME_SIZE
+ stg %r1,__SF_BACKCHAIN(%r15)
stg %r0,(STACK_PTREGS_PSW+8)(%r15)
stmg %r2,%r14,(STACK_PTREGS_GPRS+2*8)(%r15)
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index 31889db609e9..ba8f19bb438b 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -472,11 +472,11 @@ int module_finalize(const Elf_Ehdr *hdr,
apply_alternatives(aseg, aseg + s->sh_size);
if (IS_ENABLED(CONFIG_EXPOLINE) &&
- (!strncmp(".s390_indirect", secname, 14)))
+ (str_has_prefix(secname, ".s390_indirect")))
nospec_revert(aseg, aseg + s->sh_size);
if (IS_ENABLED(CONFIG_EXPOLINE) &&
- (!strncmp(".s390_return", secname, 12)))
+ (str_has_prefix(secname, ".s390_return")))
nospec_revert(aseg, aseg + s->sh_size);
}
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index 48d48b6187c0..0eb1d1cc53a8 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -199,7 +199,7 @@ static const int cpumf_generic_events_user[] = {
[PERF_COUNT_HW_BUS_CYCLES] = -1,
};
-static int __hw_perf_event_init(struct perf_event *event)
+static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
{
struct perf_event_attr *attr = &event->attr;
struct hw_perf_event *hwc = &event->hw;
@@ -207,7 +207,7 @@ static int __hw_perf_event_init(struct perf_event *event)
int err = 0;
u64 ev;
- switch (attr->type) {
+ switch (type) {
case PERF_TYPE_RAW:
/* Raw events are used to access counters directly,
* hence do not permit excludes */
@@ -294,17 +294,16 @@ static int __hw_perf_event_init(struct perf_event *event)
static int cpumf_pmu_event_init(struct perf_event *event)
{
+ unsigned int type = event->attr.type;
int err;
- switch (event->attr.type) {
- case PERF_TYPE_HARDWARE:
- case PERF_TYPE_HW_CACHE:
- case PERF_TYPE_RAW:
- err = __hw_perf_event_init(event);
- break;
- default:
+ if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW)
+ err = __hw_perf_event_init(event, type);
+ else if (event->pmu->type == type)
+ /* Registered as unknown PMU */
+ err = __hw_perf_event_init(event, PERF_TYPE_RAW);
+ else
return -ENOENT;
- }
if (unlikely(err) && event->destroy)
event->destroy(event);
@@ -553,7 +552,7 @@ static int __init cpumf_pmu_init(void)
return -ENODEV;
cpumf_pmu.attr_groups = cpumf_cf_event_group();
- rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW);
+ rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", -1);
if (rc)
pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc);
return rc;
diff --git a/arch/s390/kernel/perf_cpum_cf_diag.c b/arch/s390/kernel/perf_cpum_cf_diag.c
index 5f1fd1581330..e949ab832ed7 100644
--- a/arch/s390/kernel/perf_cpum_cf_diag.c
+++ b/arch/s390/kernel/perf_cpum_cf_diag.c
@@ -243,13 +243,13 @@ static int cf_diag_event_init(struct perf_event *event)
int err = -ENOENT;
debug_sprintf_event(cf_diag_dbg, 5,
- "%s event %p cpu %d config %#llx "
+ "%s event %p cpu %d config %#llx type:%u "
"sample_type %#llx cf_diag_events %d\n", __func__,
- event, event->cpu, attr->config, attr->sample_type,
- atomic_read(&cf_diag_events));
+ event, event->cpu, attr->config, event->pmu->type,
+ attr->sample_type, atomic_read(&cf_diag_events));
if (event->attr.config != PERF_EVENT_CPUM_CF_DIAG ||
- event->attr.type != PERF_TYPE_RAW)
+ event->attr.type != event->pmu->type)
goto out;
/* Raw events are used to access counters directly,
@@ -390,7 +390,7 @@ static size_t cf_diag_getctrset(struct cf_ctrset_entry *ctrdata, int ctrset,
debug_sprintf_event(cf_diag_dbg, 6,
"%s ctrset %d ctrset_size %zu cfvn %d csvn %d"
- " need %zd rc:%d\n",
+ " need %zd rc %d\n",
__func__, ctrset, ctrset_size, cpuhw->info.cfvn,
cpuhw->info.csvn, need, rc);
return need;
@@ -567,7 +567,7 @@ static int cf_diag_add(struct perf_event *event, int flags)
int err = 0;
debug_sprintf_event(cf_diag_dbg, 5,
- "%s event %p cpu %d flags %#x cpuhw:%p\n",
+ "%s event %p cpu %d flags %#x cpuhw %p\n",
__func__, event, event->cpu, flags, cpuhw);
if (cpuhw->flags & PMU_F_IN_USE) {
@@ -693,7 +693,7 @@ static int __init cf_diag_init(void)
}
debug_register_view(cf_diag_dbg, &debug_sprintf_view);
- rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", PERF_TYPE_RAW);
+ rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", -1);
if (rc) {
debug_unregister_view(cf_diag_dbg, &debug_sprintf_view);
debug_unregister(cf_diag_dbg);
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 1266194afb02..b095b1c78987 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -156,8 +156,8 @@ static void free_sampling_buffer(struct sf_buffer *sfb)
}
}
- debug_sprintf_event(sfdbg, 5,
- "free_sampling_buffer: freed sdbt=%p\n", sfb->sdbt);
+ debug_sprintf_event(sfdbg, 5, "%s: freed sdbt %#lx\n", __func__,
+ (unsigned long)sfb->sdbt);
memset(sfb, 0, sizeof(*sfb));
}
@@ -193,7 +193,7 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb,
unsigned long num_sdb, gfp_t gfp_flags)
{
int i, rc;
- unsigned long *new, *tail;
+ unsigned long *new, *tail, *tail_prev = NULL;
if (!sfb->sdbt || !sfb->tail)
return -EINVAL;
@@ -212,10 +212,11 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb,
* the sampling buffer origin.
*/
if (sfb->sdbt != get_next_sdbt(tail)) {
- debug_sprintf_event(sfdbg, 3, "realloc_sampling_buffer: "
- "sampling buffer is not linked: origin=%p"
- "tail=%p\n",
- (void *) sfb->sdbt, (void *) tail);
+ debug_sprintf_event(sfdbg, 3, "%s: "
+ "sampling buffer is not linked: origin %#lx"
+ " tail %#lx\n", __func__,
+ (unsigned long)sfb->sdbt,
+ (unsigned long)tail);
return -EINVAL;
}
@@ -232,6 +233,7 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb,
sfb->num_sdbt++;
/* Link current page to tail of chain */
*tail = (unsigned long)(void *) new + 1;
+ tail_prev = tail;
tail = new;
}
@@ -241,18 +243,30 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb,
* issue, a new realloc call (if required) might succeed.
*/
rc = alloc_sample_data_block(tail, gfp_flags);
- if (rc)
+ if (rc) {
+ /* Undo last SDBT. An SDBT with no SDB at its first
+ * entry but with an SDBT entry instead can not be
+ * handled by the interrupt handler code.
+ * Avoid this situation.
+ */
+ if (tail_prev) {
+ sfb->num_sdbt--;
+ free_page((unsigned long) new);
+ tail = tail_prev;
+ }
break;
+ }
sfb->num_sdb++;
tail++;
+ tail_prev = new = NULL; /* Allocated at least one SBD */
}
/* Link sampling buffer to its origin */
*tail = (unsigned long) sfb->sdbt + 1;
sfb->tail = tail;
- debug_sprintf_event(sfdbg, 4, "realloc_sampling_buffer: new buffer"
- " settings: sdbt=%lu sdb=%lu\n",
+ debug_sprintf_event(sfdbg, 4, "%s: new buffer"
+ " settings: sdbt %lu sdb %lu\n", __func__,
sfb->num_sdbt, sfb->num_sdb);
return rc;
}
@@ -292,12 +306,13 @@ static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb)
rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL);
if (rc) {
free_sampling_buffer(sfb);
- debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: "
- "realloc_sampling_buffer failed with rc=%i\n", rc);
+ debug_sprintf_event(sfdbg, 4, "%s: "
+ "realloc_sampling_buffer failed with rc %i\n",
+ __func__, rc);
} else
debug_sprintf_event(sfdbg, 4,
- "alloc_sampling_buffer: tear=%p dear=%p\n",
- sfb->sdbt, (void *) *sfb->sdbt);
+ "%s: tear %#lx dear %#lx\n", __func__,
+ (unsigned long)sfb->sdbt, (unsigned long)*sfb->sdbt);
return rc;
}
@@ -404,8 +419,8 @@ static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc)
return 0;
debug_sprintf_event(sfdbg, 3,
- "allocate_buffers: rate=%lu f=%lu sdb=%lu/%lu"
- " sample_size=%lu cpuhw=%p\n",
+ "%s: rate %lu f %lu sdb %lu/%lu"
+ " sample_size %lu cpuhw %p\n", __func__,
SAMPL_RATE(hwc), freq, n_sdb, sfb_max_limit(hwc),
sample_size, cpuhw);
@@ -465,8 +480,8 @@ static void sfb_account_overflows(struct cpu_hw_sf *cpuhw,
if (num)
sfb_account_allocs(num, hwc);
- debug_sprintf_event(sfdbg, 5, "sfb: overflow: overflow=%llu ratio=%lu"
- " num=%lu\n", OVERFLOW_REG(hwc), ratio, num);
+ debug_sprintf_event(sfdbg, 5, "%s: overflow %llu ratio %lu num %lu\n",
+ __func__, OVERFLOW_REG(hwc), ratio, num);
OVERFLOW_REG(hwc) = 0;
}
@@ -504,17 +519,16 @@ static void extend_sampling_buffer(struct sf_buffer *sfb,
*/
rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC);
if (rc)
- debug_sprintf_event(sfdbg, 5, "sfb: extend: realloc "
- "failed with rc=%i\n", rc);
+ debug_sprintf_event(sfdbg, 5, "%s: realloc failed with rc %i\n",
+ __func__, rc);
if (sfb_has_pending_allocs(sfb, hwc))
- debug_sprintf_event(sfdbg, 5, "sfb: extend: "
- "req=%lu alloc=%lu remaining=%lu\n",
- num, sfb->num_sdb - num_old,
+ debug_sprintf_event(sfdbg, 5, "%s: "
+ "req %lu alloc %lu remaining %lu\n",
+ __func__, num, sfb->num_sdb - num_old,
sfb_pending_allocs(sfb, hwc));
}
-
/* Number of perf events counting hardware events */
static atomic_t num_events;
/* Used to avoid races in calling reserve/release_cpumf_hardware */
@@ -539,20 +553,22 @@ static void setup_pmc_cpu(void *flags)
err = sf_disable();
if (err)
pr_err("Switching off the sampling facility failed "
- "with rc=%i\n", err);
+ "with rc %i\n", err);
debug_sprintf_event(sfdbg, 5,
- "setup_pmc_cpu: initialized: cpuhw=%p\n", cpusf);
+ "%s: initialized: cpuhw %p\n", __func__,
+ cpusf);
break;
case PMC_RELEASE:
cpusf->flags &= ~PMU_F_RESERVED;
err = sf_disable();
if (err) {
pr_err("Switching off the sampling facility failed "
- "with rc=%i\n", err);
+ "with rc %i\n", err);
} else
deallocate_buffers(cpusf);
debug_sprintf_event(sfdbg, 5,
- "setup_pmc_cpu: released: cpuhw=%p\n", cpusf);
+ "%s: released: cpuhw %p\n", __func__,
+ cpusf);
break;
}
if (err)
@@ -599,13 +615,6 @@ static void hw_init_period(struct hw_perf_event *hwc, u64 period)
local64_set(&hwc->period_left, hwc->sample_period);
}
-static void hw_reset_registers(struct hw_perf_event *hwc,
- unsigned long *sdbt_origin)
-{
- /* (Re)set to first sample-data-block-table */
- TEAR_REG(hwc) = (unsigned long) sdbt_origin;
-}
-
static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si,
unsigned long rate)
{
@@ -674,13 +683,88 @@ out:
rcu_read_unlock();
}
+static unsigned long getrate(bool freq, unsigned long sample,
+ struct hws_qsi_info_block *si)
+{
+ unsigned long rate;
+
+ if (freq) {
+ rate = freq_to_sample_rate(si, sample);
+ rate = hw_limit_rate(si, rate);
+ } else {
+ /* The min/max sampling rates specifies the valid range
+ * of sample periods. If the specified sample period is
+ * out of range, limit the period to the range boundary.
+ */
+ rate = hw_limit_rate(si, sample);
+
+ /* The perf core maintains a maximum sample rate that is
+ * configurable through the sysctl interface. Ensure the
+ * sampling rate does not exceed this value. This also helps
+ * to avoid throttling when pushing samples with
+ * perf_event_overflow().
+ */
+ if (sample_rate_to_freq(si, rate) >
+ sysctl_perf_event_sample_rate) {
+ debug_sprintf_event(sfdbg, 1, "%s: "
+ "Sampling rate exceeds maximum "
+ "perf sample rate\n", __func__);
+ rate = 0;
+ }
+ }
+ return rate;
+}
+
+/* The sampling information (si) contains information about the
+ * min/max sampling intervals and the CPU speed. So calculate the
+ * correct sampling interval and avoid the whole period adjust
+ * feedback loop.
+ *
+ * Since the CPU Measurement sampling facility can not handle frequency
+ * calculate the sampling interval when frequency is specified using
+ * this formula:
+ * interval := cpu_speed * 1000000 / sample_freq
+ *
+ * Returns errno on bad input and zero on success with parameter interval
+ * set to the correct sampling rate.
+ *
+ * Note: This function turns off freq bit to avoid calling function
+ * perf_adjust_period(). This causes frequency adjustment in the common
+ * code part which causes tremendous variations in the counter values.
+ */
+static int __hw_perf_event_init_rate(struct perf_event *event,
+ struct hws_qsi_info_block *si)
+{
+ struct perf_event_attr *attr = &event->attr;
+ struct hw_perf_event *hwc = &event->hw;
+ unsigned long rate;
+
+ if (attr->freq) {
+ if (!attr->sample_freq)
+ return -EINVAL;
+ rate = getrate(attr->freq, attr->sample_freq, si);
+ attr->freq = 0; /* Don't call perf_adjust_period() */
+ SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FREQ_MODE;
+ } else {
+ rate = getrate(attr->freq, attr->sample_period, si);
+ if (!rate)
+ return -EINVAL;
+ }
+ attr->sample_period = rate;
+ SAMPL_RATE(hwc) = rate;
+ hw_init_period(hwc, SAMPL_RATE(hwc));
+ debug_sprintf_event(sfdbg, 4, "%s: cpu %d period %#llx freq %d,%#lx\n",
+ __func__, event->cpu, event->attr.sample_period,
+ event->attr.freq, SAMPLE_FREQ_MODE(hwc));
+ return 0;
+}
+
static int __hw_perf_event_init(struct perf_event *event)
{
struct cpu_hw_sf *cpuhw;
struct hws_qsi_info_block si;
struct perf_event_attr *attr = &event->attr;
struct hw_perf_event *hwc = &event->hw;
- unsigned long rate;
int cpu, err;
/* Reserve CPU-measurement sampling facility */
@@ -728,6 +812,12 @@ static int __hw_perf_event_init(struct perf_event *event)
goto out;
}
+ if (si.ribm & CPU_MF_SF_RIBM_NOTAV) {
+ pr_warn("CPU Measurement Facility sampling is temporarily not available\n");
+ err = -EBUSY;
+ goto out;
+ }
+
/* Always enable basic sampling */
SAMPL_FLAGS(hwc) = PERF_CPUM_SF_BASIC_MODE;
@@ -746,43 +836,9 @@ static int __hw_perf_event_init(struct perf_event *event)
if (attr->config1 & PERF_CPUM_SF_FULL_BLOCKS)
SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FULL_BLOCKS;
- /* The sampling information (si) contains information about the
- * min/max sampling intervals and the CPU speed. So calculate the
- * correct sampling interval and avoid the whole period adjust
- * feedback loop.
- */
- rate = 0;
- if (attr->freq) {
- if (!attr->sample_freq) {
- err = -EINVAL;
- goto out;
- }
- rate = freq_to_sample_rate(&si, attr->sample_freq);
- rate = hw_limit_rate(&si, rate);
- attr->freq = 0;
- attr->sample_period = rate;
- } else {
- /* The min/max sampling rates specifies the valid range
- * of sample periods. If the specified sample period is
- * out of range, limit the period to the range boundary.
- */
- rate = hw_limit_rate(&si, hwc->sample_period);
-
- /* The perf core maintains a maximum sample rate that is
- * configurable through the sysctl interface. Ensure the
- * sampling rate does not exceed this value. This also helps
- * to avoid throttling when pushing samples with
- * perf_event_overflow().
- */
- if (sample_rate_to_freq(&si, rate) >
- sysctl_perf_event_sample_rate) {
- err = -EINVAL;
- debug_sprintf_event(sfdbg, 1, "Sampling rate exceeds maximum perf sample rate\n");
- goto out;
- }
- }
- SAMPL_RATE(hwc) = rate;
- hw_init_period(hwc, SAMPL_RATE(hwc));
+ err = __hw_perf_event_init_rate(event, &si);
+ if (err)
+ goto out;
/* Initialize sample data overflow accounting */
hwc->extra_reg.reg = REG_OVERFLOW;
@@ -854,7 +910,7 @@ static int cpumsf_pmu_event_init(struct perf_event *event)
/* Check online status of the CPU to which the event is pinned */
if (event->cpu >= 0 && !cpu_online(event->cpu))
- return -ENODEV;
+ return -ENODEV;
/* Force reset of idle/hv excludes regardless of what the
* user requested.
@@ -902,9 +958,10 @@ static void cpumsf_pmu_enable(struct pmu *pmu)
* buffer extents
*/
sfb_account_overflows(cpuhw, hwc);
- if (sfb_has_pending_allocs(&cpuhw->sfb, hwc))
- extend_sampling_buffer(&cpuhw->sfb, hwc);
+ extend_sampling_buffer(&cpuhw->sfb, hwc);
}
+ /* Rate may be adjusted with ioctl() */
+ cpuhw->lsctl.interval = SAMPL_RATE(&cpuhw->event->hw);
}
/* (Re)enable the PMU and sampling facility */
@@ -914,7 +971,7 @@ static void cpumsf_pmu_enable(struct pmu *pmu)
err = lsctl(&cpuhw->lsctl);
if (err) {
cpuhw->flags &= ~PMU_F_ENABLED;
- pr_err("Loading sampling controls failed: op=%i err=%i\n",
+ pr_err("Loading sampling controls failed: op %i err %i\n",
1, err);
return;
}
@@ -922,10 +979,11 @@ static void cpumsf_pmu_enable(struct pmu *pmu)
/* Load current program parameter */
lpp(&S390_lowcore.lpp);
- debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i ed=%i cd=%i "
- "tear=%p dear=%p\n", cpuhw->lsctl.es, cpuhw->lsctl.cs,
- cpuhw->lsctl.ed, cpuhw->lsctl.cd,
- (void *) cpuhw->lsctl.tear, (void *) cpuhw->lsctl.dear);
+ debug_sprintf_event(sfdbg, 6, "%s: es %i cs %i ed %i cd %i "
+ "interval %#lx tear %#lx dear %#lx\n", __func__,
+ cpuhw->lsctl.es, cpuhw->lsctl.cs, cpuhw->lsctl.ed,
+ cpuhw->lsctl.cd, cpuhw->lsctl.interval,
+ cpuhw->lsctl.tear, cpuhw->lsctl.dear);
}
static void cpumsf_pmu_disable(struct pmu *pmu)
@@ -948,13 +1006,14 @@ static void cpumsf_pmu_disable(struct pmu *pmu)
err = lsctl(&inactive);
if (err) {
- pr_err("Loading sampling controls failed: op=%i err=%i\n",
+ pr_err("Loading sampling controls failed: op %i err %i\n",
2, err);
return;
}
/* Save state of TEAR and DEAR register contents */
- if (!qsi(&si)) {
+ err = qsi(&si);
+ if (!err) {
/* TEAR/DEAR values are valid only if the sampling facility is
* enabled. Note that cpumsf_pmu_disable() might be called even
* for a disabled sampling facility because cpumsf_pmu_enable()
@@ -965,8 +1024,8 @@ static void cpumsf_pmu_disable(struct pmu *pmu)
cpuhw->lsctl.dear = si.dear;
}
} else
- debug_sprintf_event(sfdbg, 3, "cpumsf_pmu_disable: "
- "qsi() failed with err=%i\n", err);
+ debug_sprintf_event(sfdbg, 3, "%s: qsi() failed with err %i\n",
+ __func__, err);
cpuhw->flags &= ~PMU_F_ENABLED;
}
@@ -1079,14 +1138,6 @@ static void perf_event_count_update(struct perf_event *event, u64 count)
local64_add(count, &event->count);
}
-static void debug_sample_entry(struct hws_basic_entry *sample,
- struct hws_trailer_entry *te)
-{
- debug_sprintf_event(sfdbg, 4, "hw_collect_samples: Found unknown "
- "sampling data entry: te->f=%i basic.def=%04x (%p)\n",
- te->f, sample->def, sample);
-}
-
/* hw_collect_samples() - Walk through a sample-data-block and collect samples
* @event: The perf event
* @sdbt: Sample-data-block table
@@ -1140,7 +1191,11 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt,
/* Count discarded samples */
*overflow += 1;
} else {
- debug_sample_entry(sample, te);
+ debug_sprintf_event(sfdbg, 4,
+ "%s: Found unknown"
+ " sampling data entry: te->f %i"
+ " basic.def %#4x (%p)\n", __func__,
+ te->f, sample->def, sample);
/* Sample slot is not yet written or other record.
*
* This condition can occur if the buffer was reused
@@ -1215,9 +1270,9 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
sampl_overflow += te->overflow;
/* Timestamps are valid for full sample-data-blocks only */
- debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p "
- "overflow=%llu timestamp=0x%llx\n",
- sdbt, te->overflow,
+ debug_sprintf_event(sfdbg, 6, "%s: sdbt %#lx "
+ "overflow %llu timestamp %#llx\n",
+ __func__, (unsigned long)sdbt, te->overflow,
(te->f) ? trailer_timestamp(te) : 0ULL);
/* Collect all samples from a single sample-data-block and
@@ -1248,22 +1303,34 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
*/
if (flush_all && done)
break;
-
- /* If an event overflow happened, discard samples by
- * processing any remaining sample-data-blocks.
- */
- if (event_overflow)
- flush_all = 1;
}
/* Account sample overflows in the event hardware structure */
if (sampl_overflow)
OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) +
sampl_overflow, 1 + num_sdb);
+
+ /* Perf_event_overflow() and perf_event_account_interrupt() limit
+ * the interrupt rate to an upper limit. Roughly 1000 samples per
+ * task tick.
+ * Hitting this limit results in a large number
+ * of throttled REF_REPORT_THROTTLE entries and the samples
+ * are dropped.
+ * Slightly increase the interval to avoid hitting this limit.
+ */
+ if (event_overflow) {
+ SAMPL_RATE(hwc) += DIV_ROUND_UP(SAMPL_RATE(hwc), 10);
+ debug_sprintf_event(sfdbg, 1, "%s: rate adjustment %ld\n",
+ __func__,
+ DIV_ROUND_UP(SAMPL_RATE(hwc), 10));
+ }
+
if (sampl_overflow || event_overflow)
- debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: "
- "overflow stats: sample=%llu event=%llu\n",
- sampl_overflow, event_overflow);
+ debug_sprintf_event(sfdbg, 4, "%s: "
+ "overflows: sample %llu event %llu"
+ " total %llu num_sdb %llu\n",
+ __func__, sampl_overflow, event_overflow,
+ OVERFLOW_REG(hwc), num_sdb);
}
#define AUX_SDB_INDEX(aux, i) ((i) % aux->sfb.num_sdb)
@@ -1316,7 +1383,8 @@ static void aux_output_end(struct perf_output_handle *handle)
te = aux_sdb_trailer(aux, aux->alert_mark);
te->flags &= ~SDB_TE_ALERT_REQ_MASK;
- debug_sprintf_event(sfdbg, 6, "aux_output_end: collect %lx SDBs\n", i);
+ debug_sprintf_event(sfdbg, 6, "%s: SDBs %ld range %ld head %ld\n",
+ __func__, i, range_scan, aux->head);
}
/*
@@ -1349,13 +1417,17 @@ static int aux_output_begin(struct perf_output_handle *handle,
* SDBs between aux->head and aux->empty_mark are already ready
* for new data. range_scan is num of SDBs not within them.
*/
+ debug_sprintf_event(sfdbg, 6,
+ "%s: range %ld head %ld alert %ld empty %ld\n",
+ __func__, range, aux->head, aux->alert_mark,
+ aux->empty_mark);
if (range > AUX_SDB_NUM_EMPTY(aux)) {
range_scan = range - AUX_SDB_NUM_EMPTY(aux);
idx = aux->empty_mark + 1;
for (i = 0; i < range_scan; i++, idx++) {
te = aux_sdb_trailer(aux, idx);
- te->flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK;
- te->flags = te->flags & ~SDB_TE_ALERT_REQ_MASK;
+ te->flags &= ~(SDB_TE_BUFFER_FULL_MASK |
+ SDB_TE_ALERT_REQ_MASK);
te->overflow = 0;
}
/* Save the position of empty SDBs */
@@ -1374,15 +1446,11 @@ static int aux_output_begin(struct perf_output_handle *handle,
cpuhw->lsctl.tear = base + offset * sizeof(unsigned long);
cpuhw->lsctl.dear = aux->sdb_index[head];
- debug_sprintf_event(sfdbg, 6, "aux_output_begin: "
- "head->alert_mark->empty_mark (num_alert, range)"
- "[%lx -> %lx -> %lx] (%lx, %lx) "
- "tear index %lx, tear %lx dear %lx\n",
+ debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld empty %ld "
+ "index %ld tear %#lx dear %#lx\n", __func__,
aux->head, aux->alert_mark, aux->empty_mark,
- AUX_SDB_NUM_ALERT(aux), range,
head / CPUM_SF_SDB_PER_TABLE,
- cpuhw->lsctl.tear,
- cpuhw->lsctl.dear);
+ cpuhw->lsctl.tear, cpuhw->lsctl.dear);
return 0;
}
@@ -1402,8 +1470,7 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index,
te = aux_sdb_trailer(aux, alert_index);
do {
orig_flags = te->flags;
- orig_overflow = te->overflow;
- *overflow = orig_overflow;
+ *overflow = orig_overflow = te->overflow;
if (orig_flags & SDB_TE_BUFFER_FULL_MASK) {
/*
* SDB is already set by hardware.
@@ -1445,9 +1512,12 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
unsigned long long *overflow)
{
unsigned long long orig_overflow, orig_flags, new_flags;
- unsigned long i, range_scan, idx;
+ unsigned long i, range_scan, idx, idx_old;
struct hws_trailer_entry *te;
+ debug_sprintf_event(sfdbg, 6, "%s: range %ld head %ld alert %ld "
+ "empty %ld\n", __func__, range, aux->head,
+ aux->alert_mark, aux->empty_mark);
if (range <= AUX_SDB_NUM_EMPTY(aux))
/*
* No need to scan. All SDBs in range are marked as empty.
@@ -1470,7 +1540,7 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
* indicator fall into this range, set it.
*/
range_scan = range - AUX_SDB_NUM_EMPTY(aux);
- idx = aux->empty_mark + 1;
+ idx_old = idx = aux->empty_mark + 1;
for (i = 0; i < range_scan; i++, idx++) {
te = aux_sdb_trailer(aux, idx);
do {
@@ -1490,6 +1560,9 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range,
/* Update empty_mark to new position */
aux->empty_mark = aux->head + range - 1;
+ debug_sprintf_event(sfdbg, 6, "%s: range_scan %ld idx %ld..%ld "
+ "empty %ld\n", __func__, range_scan, idx_old,
+ idx - 1, aux->empty_mark);
return true;
}
@@ -1503,7 +1576,6 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
unsigned long range = 0, size;
unsigned long long overflow = 0;
struct perf_output_handle *handle = &cpuhw->handle;
- unsigned long num_sdb;
aux = perf_get_aux(handle);
if (WARN_ON_ONCE(!aux))
@@ -1511,8 +1583,9 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
/* Inform user space new data arrived */
size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT;
+ debug_sprintf_event(sfdbg, 6, "%s: #alert %ld\n", __func__,
+ size >> PAGE_SHIFT);
perf_aux_output_end(handle, size);
- num_sdb = aux->sfb.num_sdb;
while (!done) {
/* Get an output handle */
@@ -1520,8 +1593,10 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
if (handle->size == 0) {
pr_err("The AUX buffer with %lu pages for the "
"diagnostic-sampling mode is full\n",
- num_sdb);
- debug_sprintf_event(sfdbg, 1, "AUX buffer used up\n");
+ aux->sfb.num_sdb);
+ debug_sprintf_event(sfdbg, 1,
+ "%s: AUX buffer used up\n",
+ __func__);
break;
}
if (WARN_ON_ONCE(!aux))
@@ -1543,24 +1618,24 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw)
size = range << PAGE_SHIFT;
perf_aux_output_end(&cpuhw->handle, size);
pr_err("Sample data caused the AUX buffer with %lu "
- "pages to overflow\n", num_sdb);
- debug_sprintf_event(sfdbg, 1, "head %lx range %lx "
- "overflow %llx\n",
+ "pages to overflow\n", aux->sfb.num_sdb);
+ debug_sprintf_event(sfdbg, 1, "%s: head %ld range %ld "
+ "overflow %lld\n", __func__,
aux->head, range, overflow);
} else {
size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT;
perf_aux_output_end(&cpuhw->handle, size);
- debug_sprintf_event(sfdbg, 6, "head %lx alert %lx "
+ debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld "
"already full, try another\n",
+ __func__,
aux->head, aux->alert_mark);
}
}
if (done)
- debug_sprintf_event(sfdbg, 6, "aux_reset_buffer: "
- "[%lx -> %lx -> %lx] (%lx, %lx)\n",
- aux->head, aux->alert_mark, aux->empty_mark,
- AUX_SDB_NUM_ALERT(aux), range);
+ debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld "
+ "empty %ld\n", __func__, aux->head,
+ aux->alert_mark, aux->empty_mark);
}
/*
@@ -1583,8 +1658,7 @@ static void aux_buffer_free(void *data)
kfree(aux->sdb_index);
kfree(aux);
- debug_sprintf_event(sfdbg, 4, "aux_buffer_free: free "
- "%lu SDBTs\n", num_sdbt);
+ debug_sprintf_event(sfdbg, 4, "%s: SDBTs %lu\n", __func__, num_sdbt);
}
static void aux_sdb_init(unsigned long sdb)
@@ -1636,13 +1710,13 @@ static void *aux_buffer_setup(struct perf_event *event, void **pages,
}
/* Allocate aux_buffer struct for the event */
- aux = kmalloc(sizeof(struct aux_buffer), GFP_KERNEL);
+ aux = kzalloc(sizeof(struct aux_buffer), GFP_KERNEL);
if (!aux)
goto no_aux;
sfb = &aux->sfb;
/* Allocate sdbt_index for fast reference */
- n_sdbt = (nr_pages + CPUM_SF_SDB_PER_TABLE - 1) / CPUM_SF_SDB_PER_TABLE;
+ n_sdbt = DIV_ROUND_UP(nr_pages, CPUM_SF_SDB_PER_TABLE);
aux->sdbt_index = kmalloc_array(n_sdbt, sizeof(void *), GFP_KERNEL);
if (!aux->sdbt_index)
goto no_sdbt_index;
@@ -1692,8 +1766,7 @@ static void *aux_buffer_setup(struct perf_event *event, void **pages,
*/
aux->empty_mark = sfb->num_sdb - 1;
- debug_sprintf_event(sfdbg, 4, "aux_buffer_setup: setup %lu SDBTs"
- " and %lu SDBs\n",
+ debug_sprintf_event(sfdbg, 4, "%s: SDBTs %lu SDBs %lu\n", __func__,
sfb->num_sdbt, sfb->num_sdb);
return aux;
@@ -1716,6 +1789,44 @@ static void cpumsf_pmu_read(struct perf_event *event)
/* Nothing to do ... updates are interrupt-driven */
}
+/* Check if the new sampling period/freqeuncy is appropriate.
+ *
+ * Return non-zero on error and zero on passed checks.
+ */
+static int cpumsf_pmu_check_period(struct perf_event *event, u64 value)
+{
+ struct hws_qsi_info_block si;
+ unsigned long rate;
+ bool do_freq;
+
+ memset(&si, 0, sizeof(si));
+ if (event->cpu == -1) {
+ if (qsi(&si))
+ return -ENODEV;
+ } else {
+ /* Event is pinned to a particular CPU, retrieve the per-CPU
+ * sampling structure for accessing the CPU-specific QSI.
+ */
+ struct cpu_hw_sf *cpuhw = &per_cpu(cpu_hw_sf, event->cpu);
+
+ si = cpuhw->qsi;
+ }
+
+ do_freq = !!SAMPLE_FREQ_MODE(&event->hw);
+ rate = getrate(do_freq, value, &si);
+ if (!rate)
+ return -EINVAL;
+
+ event->attr.sample_period = rate;
+ SAMPL_RATE(&event->hw) = rate;
+ hw_init_period(&event->hw, SAMPL_RATE(&event->hw));
+ debug_sprintf_event(sfdbg, 4, "%s:"
+ " cpu %d value %#llx period %#llx freq %d\n",
+ __func__, event->cpu, value,
+ event->attr.sample_period, do_freq);
+ return 0;
+}
+
/* Activate sampling control.
* Next call of pmu_enable() starts sampling.
*/
@@ -1787,7 +1898,7 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags)
if (!SAMPL_DIAG_MODE(&event->hw)) {
cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt;
cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt;
- hw_reset_registers(&event->hw, cpuhw->sfb.sdbt);
+ TEAR_REG(&event->hw) = (unsigned long) cpuhw->sfb.sdbt;
}
/* Ensure sampling functions are in the disabled state. If disabled,
@@ -1879,10 +1990,12 @@ static struct attribute_group cpumsf_pmu_events_group = {
.name = "events",
.attrs = cpumsf_pmu_events_attr,
};
+
static struct attribute_group cpumsf_pmu_format_group = {
.name = "format",
.attrs = cpumsf_pmu_format_attr,
};
+
static const struct attribute_group *cpumsf_pmu_attr_groups[] = {
&cpumsf_pmu_events_group,
&cpumsf_pmu_format_group,
@@ -1905,6 +2018,8 @@ static struct pmu cpumf_sampling = {
.setup_aux = aux_buffer_setup,
.free_aux = aux_buffer_free,
+
+ .check_period = cpumsf_pmu_check_period,
};
static void cpumf_measurement_alert(struct ext_code ext_code,
@@ -1938,7 +2053,8 @@ static void cpumf_measurement_alert(struct ext_code ext_code,
/* Report measurement alerts only for non-PRA codes */
if (alert != CPU_MF_INT_SF_PRA)
- debug_sprintf_event(sfdbg, 6, "measurement alert: 0x%x\n", alert);
+ debug_sprintf_event(sfdbg, 6, "%s: alert %#x\n", __func__,
+ alert);
/* Sampling authorization change request */
if (alert & CPU_MF_INT_SF_SACA)
@@ -1959,6 +2075,7 @@ static void cpumf_measurement_alert(struct ext_code ext_code,
sf_disable();
}
}
+
static int cpusf_pmu_setup(unsigned int cpu, int flags)
{
/* Ignore the notification if no events are scheduled on the PMU.
@@ -2015,7 +2132,7 @@ static int param_set_sfb_size(const char *val, const struct kernel_param *kp)
sfb_set_limits(min, max);
pr_info("The sampling buffer limits have changed to: "
- "min=%lu max=%lu (diag=x%lu)\n",
+ "min %lu max %lu (diag %lu)\n",
CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB, CPUM_SF_SDB_DIAG_FACTOR);
return 0;
}
@@ -2033,7 +2150,7 @@ static const struct kernel_param_ops param_ops_sfb_size = {
static void __init pr_cpumsf_err(unsigned int reason)
{
pr_err("Sampling facility support for perf is not available: "
- "reason=%04x\n", reason);
+ "reason %#x\n", reason);
}
static int __init init_cpum_sampling_pmu(void)
@@ -2096,5 +2213,6 @@ static int __init init_cpum_sampling_pmu(void)
out:
return err;
}
+
arch_initcall(init_cpum_sampling_pmu);
core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0640);
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index fcb6c2e92b07..1e75cc983546 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -224,9 +224,13 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
struct pt_regs *regs)
{
struct unwind_state state;
+ unsigned long addr;
- unwind_for_each_frame(&state, current, regs, 0)
- perf_callchain_store(entry, state.ip);
+ unwind_for_each_frame(&state, current, regs, 0) {
+ addr = unwind_get_return_address(&state);
+ if (!addr || perf_callchain_store(entry, addr))
+ return;
+ }
}
/* Perf definitions for PMU event attributes in sysfs */
diff --git a/arch/s390/kernel/pgm_check.S b/arch/s390/kernel/pgm_check.S
index 59dee9d3bebf..eee3a482195a 100644
--- a/arch/s390/kernel/pgm_check.S
+++ b/arch/s390/kernel/pgm_check.S
@@ -81,7 +81,7 @@ PGM_CHECK_DEFAULT /* 3c */
PGM_CHECK_DEFAULT /* 3d */
PGM_CHECK_DEFAULT /* 3e */
PGM_CHECK_DEFAULT /* 3f */
-PGM_CHECK_DEFAULT /* 40 */
+PGM_CHECK(monitor_event_exception) /* 40 */
PGM_CHECK_DEFAULT /* 41 */
PGM_CHECK_DEFAULT /* 42 */
PGM_CHECK_DEFAULT /* 43 */
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 63873aa6693f..6ccef5f29761 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -40,6 +40,7 @@
#include <asm/stacktrace.h>
#include <asm/switch_to.h>
#include <asm/runtime_instr.h>
+#include <asm/unwind.h>
#include "entry.h"
asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
@@ -178,26 +179,31 @@ EXPORT_SYMBOL(dump_fpu);
unsigned long get_wchan(struct task_struct *p)
{
- struct stack_frame *sf, *low, *high;
- unsigned long return_address;
- int count;
+ struct unwind_state state;
+ unsigned long ip = 0;
if (!p || p == current || p->state == TASK_RUNNING || !task_stack_page(p))
return 0;
- low = task_stack_page(p);
- high = (struct stack_frame *) task_pt_regs(p);
- sf = (struct stack_frame *) p->thread.ksp;
- if (sf <= low || sf > high)
+
+ if (!try_get_task_stack(p))
return 0;
- for (count = 0; count < 16; count++) {
- sf = (struct stack_frame *) sf->back_chain;
- if (sf <= low || sf > high)
- return 0;
- return_address = sf->gprs[8];
- if (!in_sched_functions(return_address))
- return return_address;
+
+ unwind_for_each_frame(&state, p, NULL, 0) {
+ if (state.stack_info.type != STACK_TYPE_TASK) {
+ ip = 0;
+ break;
+ }
+
+ ip = unwind_get_return_address(&state);
+ if (!ip)
+ break;
+
+ if (!in_sched_functions(ip))
+ break;
}
- return 0;
+
+ put_task_stack(p);
+ return ip;
}
unsigned long arch_align_stack(unsigned long sp)
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index ad71132374f0..58faa12542a1 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -856,7 +856,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
}
/* Do the secure computing check after ptrace. */
- if (secure_computing(NULL)) {
+ if (secure_computing()) {
/* seccomp failures shouldn't expose any additional code. */
return -1;
}
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 253177900950..b2c2f75860e8 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -99,6 +99,7 @@ int __bootdata_preserved(prot_virt_guest);
int __bootdata(noexec_disabled);
int __bootdata(memory_end_set);
unsigned long __bootdata(memory_end);
+unsigned long __bootdata(vmalloc_size);
unsigned long __bootdata(max_physmem_end);
struct mem_detect_info __bootdata(mem_detect);
@@ -110,6 +111,8 @@ unsigned long __bootdata_preserved(__etext_dma);
unsigned long __bootdata_preserved(__sdma);
unsigned long __bootdata_preserved(__edma);
unsigned long __bootdata_preserved(__kaslr_offset);
+unsigned int __bootdata_preserved(zlib_dfltcc_support);
+EXPORT_SYMBOL(zlib_dfltcc_support);
unsigned long VMALLOC_START;
EXPORT_SYMBOL(VMALLOC_START);
@@ -168,15 +171,15 @@ static void __init set_preferred_console(void)
static int __init conmode_setup(char *str)
{
#if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
- if (strncmp(str, "hwc", 4) == 0 || strncmp(str, "sclp", 5) == 0)
+ if (!strcmp(str, "hwc") || !strcmp(str, "sclp"))
SET_CONSOLE_SCLP;
#endif
#if defined(CONFIG_TN3215_CONSOLE)
- if (strncmp(str, "3215", 5) == 0)
+ if (!strcmp(str, "3215"))
SET_CONSOLE_3215;
#endif
#if defined(CONFIG_TN3270_CONSOLE)
- if (strncmp(str, "3270", 5) == 0)
+ if (!strcmp(str, "3270"))
SET_CONSOLE_3270;
#endif
set_preferred_console();
@@ -211,7 +214,7 @@ static void __init conmode_default(void)
#endif
return;
}
- if (strncmp(ptr + 8, "3270", 4) == 0) {
+ if (str_has_prefix(ptr + 8, "3270")) {
#if defined(CONFIG_TN3270_CONSOLE)
SET_CONSOLE_3270;
#elif defined(CONFIG_TN3215_CONSOLE)
@@ -219,7 +222,7 @@ static void __init conmode_default(void)
#elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
SET_CONSOLE_SCLP;
#endif
- } else if (strncmp(ptr + 8, "3215", 4) == 0) {
+ } else if (str_has_prefix(ptr + 8, "3215")) {
#if defined(CONFIG_TN3215_CONSOLE)
SET_CONSOLE_3215;
#elif defined(CONFIG_TN3270_CONSOLE)
@@ -240,8 +243,6 @@ static void __init conmode_default(void)
SET_CONSOLE_SCLP;
#endif
}
- if (IS_ENABLED(CONFIG_VT) && IS_ENABLED(CONFIG_DUMMY_CONSOLE))
- conswitchp = &dummy_con;
}
#ifdef CONFIG_CRASH_DUMP
@@ -302,15 +303,6 @@ void machine_power_off(void)
void (*pm_power_off)(void) = machine_power_off;
EXPORT_SYMBOL_GPL(pm_power_off);
-static int __init parse_vmalloc(char *arg)
-{
- if (!arg)
- return -EINVAL;
- VMALLOC_END = (memparse(arg, &arg) + PAGE_SIZE - 1) & PAGE_MASK;
- return 0;
-}
-early_param("vmalloc", parse_vmalloc);
-
void *restart_stack __section(.data);
unsigned long stack_alloc(void)
@@ -363,7 +355,6 @@ early_initcall(async_stack_realloc);
void __init arch_call_rest_init(void)
{
- struct stack_frame *frame;
unsigned long stack;
stack = stack_alloc();
@@ -376,13 +367,7 @@ void __init arch_call_rest_init(void)
set_task_stack_end_magic(current);
stack += STACK_INIT_OFFSET;
S390_lowcore.kernel_stack = stack;
- frame = (struct stack_frame *) stack;
- memset(frame, 0, sizeof(*frame));
- /* Branch to rest_init on the new stack, never returns */
- asm volatile(
- " la 15,0(%[_frame])\n"
- " jg rest_init\n"
- : : [_frame] "a" (frame));
+ CALL_ON_STACK_NORETURN(rest_init, stack);
}
static void __init setup_lowcore_dat_off(void)
@@ -563,10 +548,9 @@ static void __init setup_resources(void)
static void __init setup_memory_end(void)
{
- unsigned long vmax, vmalloc_size, tmp;
+ unsigned long vmax, tmp;
/* Choose kernel address space layout: 3 or 4 levels. */
- vmalloc_size = VMALLOC_END ?: (128UL << 30) - MODULES_LEN;
if (IS_ENABLED(CONFIG_KASAN)) {
vmax = IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING)
? _REGION1_SIZE
@@ -777,14 +761,6 @@ static void __init free_mem_detect_info(void)
memblock_free(start, size);
}
-static void __init memblock_physmem_add(phys_addr_t start, phys_addr_t size)
-{
- memblock_dbg("memblock_physmem_add: [%#016llx-%#016llx]\n",
- start, start + size - 1);
- memblock_add_range(&memblock.memory, start, size, 0, 0);
- memblock_add_range(&memblock.physmem, start, size, 0, 0);
-}
-
static const char * __init get_mem_info_source(void)
{
switch (mem_detect.info_source) {
@@ -809,8 +785,10 @@ static void __init memblock_add_mem_detect_info(void)
get_mem_info_source(), mem_detect.info_source);
/* keep memblock lists close to the kernel */
memblock_set_bottom_up(true);
- for_each_mem_detect_block(i, &start, &end)
+ for_each_mem_detect_block(i, &start, &end) {
+ memblock_add(start, end - start);
memblock_physmem_add(start, end - start);
+ }
memblock_set_bottom_up(false);
memblock_dump_all();
}
@@ -990,6 +968,10 @@ static int __init setup_hwcaps(void)
case 0x3907:
strcpy(elf_platform, "z14");
break;
+ case 0x8561:
+ case 0x8562:
+ strcpy(elf_platform, "z15");
+ break;
}
/*
@@ -1064,7 +1046,7 @@ static void __init log_component_list(void)
if (!early_ipl_comp_list_addr)
return;
- if (ipl_block.hdr.flags & IPL_PL_FLAG_IPLSR)
+ if (ipl_block.hdr.flags & IPL_PL_FLAG_SIPL)
pr_info("Linux is running with Secure-IPL enabled\n");
else
pr_info("Linux is running with Secure-IPL disabled\n");
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 44974654cbd0..a08bd2522dd9 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -262,10 +262,13 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
lc->spinlock_index = 0;
lc->percpu_offset = __per_cpu_offset[cpu];
lc->kernel_asce = S390_lowcore.kernel_asce;
+ lc->user_asce = S390_lowcore.kernel_asce;
lc->machine_flags = S390_lowcore.machine_flags;
lc->user_timer = lc->system_timer =
lc->steal_timer = lc->avg_steal_timer = 0;
__ctl_store(lc->cregs_save_area, 0, 15);
+ lc->cregs_save_area[1] = lc->kernel_asce;
+ lc->cregs_save_area[7] = lc->vdso_asce;
save_access_regs((unsigned int *) lc->access_regs_save_area);
memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
sizeof(lc->stfle_fac_list));
@@ -410,14 +413,11 @@ EXPORT_SYMBOL(arch_vcpu_is_preempted);
void smp_yield_cpu(int cpu)
{
- if (MACHINE_HAS_DIAG9C) {
- diag_stat_inc_norecursion(DIAG_STAT_X09C);
- asm volatile("diag %0,0,0x9c"
- : : "d" (pcpu_devices[cpu].address));
- } else if (MACHINE_HAS_DIAG44 && !smp_cpu_mtid) {
- diag_stat_inc_norecursion(DIAG_STAT_X044);
- asm volatile("diag 0,0,0x44");
- }
+ if (!MACHINE_HAS_DIAG9C)
+ return;
+ diag_stat_inc_norecursion(DIAG_STAT_X09C);
+ asm volatile("diag %0,0,0x9c"
+ : : "d" (pcpu_devices[cpu].address));
}
/*
@@ -724,39 +724,67 @@ static void __ref smp_get_core_info(struct sclp_core_info *info, int early)
static int smp_add_present_cpu(int cpu);
-static int __smp_rescan_cpus(struct sclp_core_info *info, int sysfs_add)
+static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail,
+ bool configured, bool early)
{
struct pcpu *pcpu;
- cpumask_t avail;
- int cpu, nr, i, j;
+ int cpu, nr, i;
u16 address;
nr = 0;
- cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask);
- cpu = cpumask_first(&avail);
- for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) {
- if (sclp.has_core_type && info->core[i].type != boot_core_type)
+ if (sclp.has_core_type && core->type != boot_core_type)
+ return nr;
+ cpu = cpumask_first(avail);
+ address = core->core_id << smp_cpu_mt_shift;
+ for (i = 0; (i <= smp_cpu_mtid) && (cpu < nr_cpu_ids); i++) {
+ if (pcpu_find_address(cpu_present_mask, address + i))
continue;
- address = info->core[i].core_id << smp_cpu_mt_shift;
- for (j = 0; j <= smp_cpu_mtid; j++) {
- if (pcpu_find_address(cpu_present_mask, address + j))
- continue;
- pcpu = pcpu_devices + cpu;
- pcpu->address = address + j;
- pcpu->state =
- (cpu >= info->configured*(smp_cpu_mtid + 1)) ?
- CPU_STATE_STANDBY : CPU_STATE_CONFIGURED;
- smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
- set_cpu_present(cpu, true);
- if (sysfs_add && smp_add_present_cpu(cpu) != 0)
- set_cpu_present(cpu, false);
- else
- nr++;
- cpu = cpumask_next(cpu, &avail);
- if (cpu >= nr_cpu_ids)
+ pcpu = pcpu_devices + cpu;
+ pcpu->address = address + i;
+ if (configured)
+ pcpu->state = CPU_STATE_CONFIGURED;
+ else
+ pcpu->state = CPU_STATE_STANDBY;
+ smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
+ set_cpu_present(cpu, true);
+ if (!early && smp_add_present_cpu(cpu) != 0)
+ set_cpu_present(cpu, false);
+ else
+ nr++;
+ cpumask_clear_cpu(cpu, avail);
+ cpu = cpumask_next(cpu, avail);
+ }
+ return nr;
+}
+
+static int __smp_rescan_cpus(struct sclp_core_info *info, bool early)
+{
+ struct sclp_core_entry *core;
+ cpumask_t avail;
+ bool configured;
+ u16 core_id;
+ int nr, i;
+
+ nr = 0;
+ cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask);
+ /*
+ * Add IPL core first (which got logical CPU number 0) to make sure
+ * that all SMT threads get subsequent logical CPU numbers.
+ */
+ if (early) {
+ core_id = pcpu_devices[0].address >> smp_cpu_mt_shift;
+ for (i = 0; i < info->configured; i++) {
+ core = &info->core[i];
+ if (core->core_id == core_id) {
+ nr += smp_add_core(core, &avail, true, early);
break;
+ }
}
}
+ for (i = 0; i < info->combined; i++) {
+ configured = i < info->configured;
+ nr += smp_add_core(&info->core[i], &avail, configured, early);
+ }
return nr;
}
@@ -805,7 +833,7 @@ void __init smp_detect_cpus(void)
/* Add CPUs present at boot */
get_online_cpus();
- __smp_rescan_cpus(info, 0);
+ __smp_rescan_cpus(info, true);
put_online_cpus();
memblock_free_early((unsigned long)info, sizeof(*info));
}
@@ -816,6 +844,8 @@ static void smp_init_secondary(void)
S390_lowcore.last_update_clock = get_tod_clock();
restore_access_regs(S390_lowcore.access_regs_save_area);
+ set_cpu_flag(CIF_ASCE_PRIMARY);
+ set_cpu_flag(CIF_ASCE_SECONDARY);
cpu_init();
preempt_disable();
init_cpu_timer();
@@ -843,7 +873,7 @@ static void __no_sanitize_address smp_start_secondary(void *cpuvoid)
S390_lowcore.restart_source = -1UL;
__ctl_load(S390_lowcore.cregs_save_area, 0, 15);
__load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
- CALL_ON_STACK(smp_init_secondary, S390_lowcore.kernel_stack, 0);
+ CALL_ON_STACK_NORETURN(smp_init_secondary, S390_lowcore.kernel_stack);
}
/* Upping and downing of CPUs */
@@ -1148,7 +1178,7 @@ int __ref smp_rescan_cpus(void)
smp_get_core_info(info, 0);
get_online_cpus();
mutex_lock(&smp_cpu_state_mutex);
- nr = __smp_rescan_cpus(info, 1);
+ nr = __smp_rescan_cpus(info, false);
mutex_unlock(&smp_cpu_state_mutex);
put_online_cpus();
kfree(info);
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index f6a620f854e1..fc5419ac64c8 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -6,57 +6,62 @@
* Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
*/
-#include <linux/sched.h>
-#include <linux/sched/debug.h>
#include <linux/stacktrace.h>
-#include <linux/kallsyms.h>
-#include <linux/export.h>
#include <asm/stacktrace.h>
#include <asm/unwind.h>
+#include <asm/kprobes.h>
-void save_stack_trace(struct stack_trace *trace)
+void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
+ struct task_struct *task, struct pt_regs *regs)
{
struct unwind_state state;
+ unsigned long addr;
- unwind_for_each_frame(&state, current, NULL, 0) {
- if (trace->nr_entries >= trace->max_entries)
+ unwind_for_each_frame(&state, task, regs, 0) {
+ addr = unwind_get_return_address(&state);
+ if (!addr || !consume_entry(cookie, addr, false))
break;
- if (trace->skip > 0)
- trace->skip--;
- else
- trace->entries[trace->nr_entries++] = state.ip;
}
}
-EXPORT_SYMBOL_GPL(save_stack_trace);
-void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
+/*
+ * This function returns an error if it detects any unreliable features of the
+ * stack. Otherwise it guarantees that the stack trace is reliable.
+ *
+ * If the task is not 'current', the caller *must* ensure the task is inactive.
+ */
+int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
+ void *cookie, struct task_struct *task)
{
struct unwind_state state;
+ unsigned long addr;
- unwind_for_each_frame(&state, tsk, NULL, 0) {
- if (trace->nr_entries >= trace->max_entries)
- break;
- if (in_sched_functions(state.ip))
- continue;
- if (trace->skip > 0)
- trace->skip--;
- else
- trace->entries[trace->nr_entries++] = state.ip;
- }
-}
-EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
+ unwind_for_each_frame(&state, task, NULL, 0) {
+ if (state.stack_info.type != STACK_TYPE_TASK)
+ return -EINVAL;
-void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
-{
- struct unwind_state state;
+ if (state.regs)
+ return -EINVAL;
- unwind_for_each_frame(&state, current, regs, 0) {
- if (trace->nr_entries >= trace->max_entries)
- break;
- if (trace->skip > 0)
- trace->skip--;
- else
- trace->entries[trace->nr_entries++] = state.ip;
+ addr = unwind_get_return_address(&state);
+ if (!addr)
+ return -EINVAL;
+
+#ifdef CONFIG_KPROBES
+ /*
+ * Mark stacktraces with kretprobed functions on them
+ * as unreliable.
+ */
+ if (state.ip == (unsigned long)kretprobe_trampoline)
+ return -EINVAL;
+#endif
+
+ if (!consume_entry(cookie, addr, false))
+ return -EINVAL;
}
+
+ /* Check for stack corruption */
+ if (unwind_error(&state))
+ return -EINVAL;
+ return 0;
}
-EXPORT_SYMBOL_GPL(save_stack_trace_regs);
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index 3054e9c035a3..bd7bd3581a0f 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -438,3 +438,5 @@
433 common fspick sys_fspick sys_fspick
434 common pidfd_open sys_pidfd_open sys_pidfd_open
435 common clone3 sys_clone3 sys_clone3
+437 common openat2 sys_openat2 sys_openat2
+438 common pidfd_getfd sys_pidfd_getfd sys_pidfd_getfd
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index e8766beee5ad..f9d070d016e3 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -110,15 +110,6 @@ unsigned long long notrace sched_clock(void)
}
NOKPROBE_SYMBOL(sched_clock);
-/*
- * Monotonic_clock - returns # of nanoseconds passed since time_init()
- */
-unsigned long long monotonic_clock(void)
-{
- return sched_clock();
-}
-EXPORT_SYMBOL(monotonic_clock);
-
static void ext_to_timespec64(unsigned char *clk, struct timespec64 *xt)
{
unsigned long long high, low, rem, sec, nsec;
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 2db6fb405a9a..3627953007ed 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -311,7 +311,8 @@ int arch_update_cpu_topology(void)
on_each_cpu(__arch_update_dedicated_flag, NULL, 0);
for_each_online_cpu(cpu) {
dev = get_cpu_device(cpu);
- kobject_uevent(&dev->kobj, KOBJ_CHANGE);
+ if (dev)
+ kobject_uevent(&dev->kobj, KOBJ_CHANGE);
}
return rc;
}
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 164c0282b41a..dc75588d7894 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -53,11 +53,6 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
if (fixup)
regs->psw.addr = extable_fixup(fixup);
else {
- enum bug_trap_type btt;
-
- btt = report_bug(regs->psw.addr, regs);
- if (btt == BUG_TRAP_TYPE_WARN)
- return;
die(regs, str);
}
}
@@ -245,6 +240,27 @@ void space_switch_exception(struct pt_regs *regs)
do_trap(regs, SIGILL, ILL_PRVOPC, "space switch event");
}
+void monitor_event_exception(struct pt_regs *regs)
+{
+ const struct exception_table_entry *fixup;
+
+ if (user_mode(regs))
+ return;
+
+ switch (report_bug(regs->psw.addr - (regs->int_code >> 16), regs)) {
+ case BUG_TRAP_TYPE_NONE:
+ fixup = s390_search_extables(regs->psw.addr);
+ if (fixup)
+ regs->psw.addr = extable_fixup(fixup);
+ break;
+ case BUG_TRAP_TYPE_WARN:
+ break;
+ case BUG_TRAP_TYPE_BUG:
+ die(regs, "monitor event");
+ break;
+ }
+}
+
void kernel_stack_overflow(struct pt_regs *regs)
{
bust_spinlocks(1);
@@ -255,8 +271,23 @@ void kernel_stack_overflow(struct pt_regs *regs)
}
NOKPROBE_SYMBOL(kernel_stack_overflow);
+static void test_monitor_call(void)
+{
+ int val = 1;
+
+ asm volatile(
+ " mc 0,0\n"
+ "0: xgr %0,%0\n"
+ "1:\n"
+ EX_TABLE(0b,1b)
+ : "+d" (val));
+ if (!val)
+ panic("Monitor call doesn't work!\n");
+}
+
void __init trap_init(void)
{
sort_extable(__start_dma_ex_table, __stop_dma_ex_table);
local_mcck_enable();
+ test_monitor_call();
}
diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c
index 8fc9daae47a2..707fd99f6734 100644
--- a/arch/s390/kernel/unwind_bc.c
+++ b/arch/s390/kernel/unwind_bc.c
@@ -36,6 +36,19 @@ static bool update_stack_info(struct unwind_state *state, unsigned long sp)
return true;
}
+static inline bool is_final_pt_regs(struct unwind_state *state,
+ struct pt_regs *regs)
+{
+ /* user mode or kernel thread pt_regs at the bottom of task stack */
+ if (task_pt_regs(state->task) == regs)
+ return true;
+
+ /* user mode pt_regs at the bottom of irq stack */
+ return state->stack_info.type == STACK_TYPE_IRQ &&
+ state->stack_info.end - sizeof(struct pt_regs) == (unsigned long)regs &&
+ READ_ONCE_NOCHECK(regs->psw.mask) & PSW_MASK_PSTATE;
+}
+
bool unwind_next_frame(struct unwind_state *state)
{
struct stack_info *info = &state->stack_info;
@@ -46,15 +59,16 @@ bool unwind_next_frame(struct unwind_state *state)
regs = state->regs;
if (unlikely(regs)) {
- sp = READ_ONCE_NOCHECK(regs->gprs[15]);
- if (unlikely(outside_of_stack(state, sp))) {
- if (!update_stack_info(state, sp))
- goto out_err;
- }
+ sp = state->sp;
sf = (struct stack_frame *) sp;
ip = READ_ONCE_NOCHECK(sf->gprs[8]);
reliable = false;
regs = NULL;
+ if (!__kernel_text_address(ip)) {
+ /* skip bogus %r14 */
+ state->regs = NULL;
+ return unwind_next_frame(state);
+ }
} else {
sf = (struct stack_frame *) state->sp;
sp = READ_ONCE_NOCHECK(sf->back_chain);
@@ -71,21 +85,25 @@ bool unwind_next_frame(struct unwind_state *state)
/* No back-chain, look for a pt_regs structure */
sp = state->sp + STACK_FRAME_OVERHEAD;
if (!on_stack(info, sp, sizeof(struct pt_regs)))
- goto out_stop;
+ goto out_err;
regs = (struct pt_regs *) sp;
- if (READ_ONCE_NOCHECK(regs->psw.mask) & PSW_MASK_PSTATE)
+ if (is_final_pt_regs(state, regs))
goto out_stop;
ip = READ_ONCE_NOCHECK(regs->psw.addr);
+ sp = READ_ONCE_NOCHECK(regs->gprs[15]);
+ if (unlikely(outside_of_stack(state, sp))) {
+ if (!update_stack_info(state, sp))
+ goto out_err;
+ }
reliable = true;
}
}
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- /* Decode any ftrace redirection */
- if (ip == (unsigned long) return_to_handler)
- ip = ftrace_graph_ret_addr(state->task, &state->graph_idx,
- ip, (void *) sp);
-#endif
+ /* Sanity check: ABI requires SP to be aligned 8 bytes. */
+ if (sp & 0x7)
+ goto out_err;
+
+ ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, (void *) sp);
/* Update unwind state */
state->sp = sp;
@@ -103,13 +121,11 @@ out_stop:
EXPORT_SYMBOL_GPL(unwind_next_frame);
void __unwind_start(struct unwind_state *state, struct task_struct *task,
- struct pt_regs *regs, unsigned long sp)
+ struct pt_regs *regs, unsigned long first_frame)
{
struct stack_info *info = &state->stack_info;
- unsigned long *mask = &state->stack_mask;
struct stack_frame *sf;
- unsigned long ip;
- bool reliable;
+ unsigned long ip, sp;
memset(state, 0, sizeof(*state));
state->task = task;
@@ -121,35 +137,46 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
return;
}
+ /* Get the instruction pointer from pt_regs or the stack frame */
+ if (regs) {
+ ip = regs->psw.addr;
+ sp = regs->gprs[15];
+ } else if (task == current) {
+ sp = current_frame_address();
+ } else {
+ sp = task->thread.ksp;
+ }
+
/* Get current stack pointer and initialize stack info */
- if (get_stack_info(sp, task, info, mask) != 0 ||
- !on_stack(info, sp, sizeof(struct stack_frame))) {
+ if (!update_stack_info(state, sp)) {
/* Something is wrong with the stack pointer */
info->type = STACK_TYPE_UNKNOWN;
state->error = true;
return;
}
- /* Get the instruction pointer from pt_regs or the stack frame */
- if (regs) {
- ip = READ_ONCE_NOCHECK(regs->psw.addr);
- reliable = true;
- } else {
- sf = (struct stack_frame *) sp;
+ if (!regs) {
+ /* Stack frame is within valid stack */
+ sf = (struct stack_frame *)sp;
ip = READ_ONCE_NOCHECK(sf->gprs[8]);
- reliable = false;
}
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- /* Decode any ftrace redirection */
- if (ip == (unsigned long) return_to_handler)
- ip = ftrace_graph_ret_addr(state->task, &state->graph_idx,
- ip, NULL);
-#endif
+ ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, NULL);
/* Update unwind state */
state->sp = sp;
state->ip = ip;
- state->reliable = reliable;
+ state->reliable = true;
+
+ if (!first_frame)
+ return;
+ /* Skip through the call chain to the specified starting frame */
+ while (!unwind_done(state)) {
+ if (on_stack(&state->stack_info, first_frame, sizeof(struct stack_frame))) {
+ if (state->sp >= first_frame)
+ break;
+ }
+ unwind_next_frame(state);
+ }
}
EXPORT_SYMBOL_GPL(__unwind_start);
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index c6bc190f3c28..bcc9bdb39ba2 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -29,13 +29,6 @@
#include <asm/vdso.h>
#include <asm/facility.h>
-#ifdef CONFIG_COMPAT_VDSO
-extern char vdso32_start, vdso32_end;
-static void *vdso32_kbase = &vdso32_start;
-static unsigned int vdso32_pages;
-static struct page **vdso32_pagelist;
-#endif
-
extern char vdso64_start, vdso64_end;
static void *vdso64_kbase = &vdso64_start;
static unsigned int vdso64_pages;
@@ -55,12 +48,6 @@ static vm_fault_t vdso_fault(const struct vm_special_mapping *sm,
vdso_pagelist = vdso64_pagelist;
vdso_pages = vdso64_pages;
-#ifdef CONFIG_COMPAT_VDSO
- if (vma->vm_mm->context.compat_mm) {
- vdso_pagelist = vdso32_pagelist;
- vdso_pages = vdso32_pages;
- }
-#endif
if (vmf->pgoff >= vdso_pages)
return VM_FAULT_SIGBUS;
@@ -76,10 +63,6 @@ static int vdso_mremap(const struct vm_special_mapping *sm,
unsigned long vdso_pages;
vdso_pages = vdso64_pages;
-#ifdef CONFIG_COMPAT_VDSO
- if (vma->vm_mm->context.compat_mm)
- vdso_pages = vdso32_pages;
-#endif
if ((vdso_pages << PAGE_SHIFT) != vma->vm_end - vma->vm_start)
return -EINVAL;
@@ -97,21 +80,13 @@ static const struct vm_special_mapping vdso_mapping = {
.mremap = vdso_mremap,
};
-static int __init vdso_setup(char *s)
+static int __init vdso_setup(char *str)
{
- unsigned long val;
- int rc;
+ bool enabled;
- rc = 0;
- if (strncmp(s, "on", 3) == 0)
- vdso_enabled = 1;
- else if (strncmp(s, "off", 4) == 0)
- vdso_enabled = 0;
- else {
- rc = kstrtoul(s, 0, &val);
- vdso_enabled = rc ? 0 : !!val;
- }
- return !rc;
+ if (!kstrtobool(str, &enabled))
+ vdso_enabled = enabled;
+ return 1;
}
__setup("vdso=", vdso_setup);
@@ -217,12 +192,10 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
if (!vdso_enabled)
return 0;
+ if (is_compat_task())
+ return 0;
+
vdso_pages = vdso64_pages;
-#ifdef CONFIG_COMPAT_VDSO
- mm->context.compat_mm = is_compat_task();
- if (mm->context.compat_mm)
- vdso_pages = vdso32_pages;
-#endif
/*
* vDSO has a problem and was disabled, just don't "enable" it for
* the process
@@ -275,23 +248,6 @@ static int __init vdso_init(void)
int i;
vdso_init_data(vdso_data);
-#ifdef CONFIG_COMPAT_VDSO
- /* Calculate the size of the 32 bit vDSO */
- vdso32_pages = ((&vdso32_end - &vdso32_start
- + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1;
-
- /* Make sure pages are in the correct state */
- vdso32_pagelist = kcalloc(vdso32_pages + 1, sizeof(struct page *),
- GFP_KERNEL);
- BUG_ON(vdso32_pagelist == NULL);
- for (i = 0; i < vdso32_pages - 1; i++) {
- struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE);
- get_page(pg);
- vdso32_pagelist[i] = pg;
- }
- vdso32_pagelist[vdso32_pages - 1] = virt_to_page(vdso_data);
- vdso32_pagelist[vdso32_pages] = NULL;
-#endif
/* Calculate the size of the 64 bit vDSO */
vdso64_pages = ((&vdso64_end - &vdso64_start
diff --git a/arch/s390/kernel/vdso32/.gitignore b/arch/s390/kernel/vdso32/.gitignore
deleted file mode 100644
index e45fba9d0ced..000000000000
--- a/arch/s390/kernel/vdso32/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-vdso32.lds
diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
deleted file mode 100644
index aee9ffbccb54..000000000000
--- a/arch/s390/kernel/vdso32/Makefile
+++ /dev/null
@@ -1,66 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-# List of files in the vdso, has to be asm only for now
-
-KCOV_INSTRUMENT := n
-
-obj-vdso32 = gettimeofday.o clock_getres.o clock_gettime.o note.o getcpu.o
-
-# Build rules
-
-targets := $(obj-vdso32) vdso32.so vdso32.so.dbg
-obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
-
-KBUILD_AFLAGS += -DBUILD_VDSO
-KBUILD_CFLAGS += -DBUILD_VDSO
-
-KBUILD_AFLAGS_31 := $(filter-out -m64,$(KBUILD_AFLAGS))
-KBUILD_AFLAGS_31 += -m31 -s
-
-KBUILD_CFLAGS_31 := $(filter-out -m64,$(KBUILD_CFLAGS))
-KBUILD_CFLAGS_31 += -m31 -fPIC -shared -fno-common -fno-builtin
-KBUILD_CFLAGS_31 += -nostdlib -Wl,-soname=linux-vdso32.so.1 \
- -Wl,--hash-style=both
-
-$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_31)
-$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_31)
-
-obj-y += vdso32_wrapper.o
-extra-y += vdso32.lds
-CPPFLAGS_vdso32.lds += -P -C -U$(ARCH)
-
-# Disable gcov profiling, ubsan and kasan for VDSO code
-GCOV_PROFILE := n
-UBSAN_SANITIZE := n
-KASAN_SANITIZE := n
-
-# Force dependency (incbin is bad)
-$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
-
-# link rule for the .so file, .lds has to be first
-$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE
- $(call if_changed,vdso32ld)
-
-# strip rule for the .so file
-$(obj)/%.so: OBJCOPYFLAGS := -S
-$(obj)/%.so: $(obj)/%.so.dbg FORCE
- $(call if_changed,objcopy)
-
-# assembly rules for the .S files
-$(obj-vdso32): %.o: %.S FORCE
- $(call if_changed_dep,vdso32as)
-
-# actual build commands
-quiet_cmd_vdso32ld = VDSO32L $@
- cmd_vdso32ld = $(CC) $(c_flags) -Wl,-T $(filter %.lds %.o,$^) -o $@
-quiet_cmd_vdso32as = VDSO32A $@
- cmd_vdso32as = $(CC) $(a_flags) -c -o $@ $<
-
-# install commands for the unstripped file
-quiet_cmd_vdso_install = INSTALL $@
- cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
-
-vdso32.so: $(obj)/vdso32.so.dbg
- @mkdir -p $(MODLIB)/vdso
- $(call cmd,vdso_install)
-
-vdso_install: vdso32.so
diff --git a/arch/s390/kernel/vdso32/clock_getres.S b/arch/s390/kernel/vdso32/clock_getres.S
deleted file mode 100644
index eaf9cf1417f6..000000000000
--- a/arch/s390/kernel/vdso32/clock_getres.S
+++ /dev/null
@@ -1,44 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Userland implementation of clock_getres() for 32 bits processes in a
- * s390 kernel for use in the vDSO
- *
- * Copyright IBM Corp. 2008
- * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-#include <asm/dwarf.h>
-
- .text
- .align 4
- .globl __kernel_clock_getres
- .type __kernel_clock_getres,@function
-__kernel_clock_getres:
- CFI_STARTPROC
- basr %r1,0
- la %r1,4f-.(%r1)
- chi %r2,__CLOCK_REALTIME
- je 0f
- chi %r2,__CLOCK_MONOTONIC
- je 0f
- la %r1,5f-4f(%r1)
- chi %r2,__CLOCK_REALTIME_COARSE
- je 0f
- chi %r2,__CLOCK_MONOTONIC_COARSE
- jne 3f
-0: ltr %r3,%r3
- jz 2f /* res == NULL */
-1: l %r0,0(%r1)
- xc 0(4,%r3),0(%r3) /* set tp->tv_sec to zero */
- st %r0,4(%r3) /* store tp->tv_usec */
-2: lhi %r2,0
- br %r14
-3: lhi %r1,__NR_clock_getres /* fallback to svc */
- svc 0
- br %r14
- CFI_ENDPROC
-4: .long __CLOCK_REALTIME_RES
-5: .long __CLOCK_COARSE_RES
- .size __kernel_clock_getres,.-__kernel_clock_getres
diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S
deleted file mode 100644
index ada5c11a16e5..000000000000
--- a/arch/s390/kernel/vdso32/clock_gettime.S
+++ /dev/null
@@ -1,179 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Userland implementation of clock_gettime() for 32 bits processes in a
- * s390 kernel for use in the vDSO
- *
- * Copyright IBM Corp. 2008
- * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-#include <asm/dwarf.h>
-#include <asm/ptrace.h>
-
- .text
- .align 4
- .globl __kernel_clock_gettime
- .type __kernel_clock_gettime,@function
-__kernel_clock_gettime:
- CFI_STARTPROC
- ahi %r15,-16
- CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
- CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
- basr %r5,0
-0: al %r5,21f-0b(%r5) /* get &_vdso_data */
- chi %r2,__CLOCK_REALTIME_COARSE
- je 10f
- chi %r2,__CLOCK_REALTIME
- je 11f
- chi %r2,__CLOCK_MONOTONIC_COARSE
- je 9f
- chi %r2,__CLOCK_MONOTONIC
- jne 19f
-
- /* CLOCK_MONOTONIC */
-1: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */
- tml %r4,0x0001 /* pending update ? loop */
- jnz 1b
- stcke 0(%r15) /* Store TOD clock */
- lm %r0,%r1,1(%r15)
- s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
- sl %r1,__VDSO_XTIME_STAMP+4(%r5)
- brc 3,2f
- ahi %r0,-1
-2: ms %r0,__VDSO_TK_MULT(%r5) /* * tk->mult */
- lr %r2,%r0
- l %r0,__VDSO_TK_MULT(%r5)
- ltr %r1,%r1
- mr %r0,%r0
- jnm 3f
- a %r0,__VDSO_TK_MULT(%r5)
-3: alr %r0,%r2
- al %r0,__VDSO_WTOM_NSEC(%r5)
- al %r1,__VDSO_WTOM_NSEC+4(%r5)
- brc 12,5f
- ahi %r0,1
-5: l %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
- srdl %r0,0(%r2) /* >> tk->shift */
- l %r2,__VDSO_WTOM_SEC+4(%r5)
- cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */
- jne 1b
- basr %r5,0
-6: ltr %r0,%r0
- jnz 7f
- cl %r1,20f-6b(%r5)
- jl 8f
-7: ahi %r2,1
- sl %r1,20f-6b(%r5)
- brc 3,6b
- ahi %r0,-1
- j 6b
-8: st %r2,0(%r3) /* store tp->tv_sec */
- st %r1,4(%r3) /* store tp->tv_nsec */
- lhi %r2,0
- ahi %r15,16
- CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
- CFI_RESTORE 15
- br %r14
-
- /* CLOCK_MONOTONIC_COARSE */
- CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
- CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
-9: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */
- tml %r4,0x0001 /* pending update ? loop */
- jnz 9b
- l %r2,__VDSO_WTOM_CRS_SEC+4(%r5)
- l %r1,__VDSO_WTOM_CRS_NSEC+4(%r5)
- cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */
- jne 9b
- j 8b
-
- /* CLOCK_REALTIME_COARSE */
-10: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */
- tml %r4,0x0001 /* pending update ? loop */
- jnz 10b
- l %r2,__VDSO_XTIME_CRS_SEC+4(%r5)
- l %r1,__VDSO_XTIME_CRS_NSEC+4(%r5)
- cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */
- jne 10b
- j 17f
-
- /* CLOCK_REALTIME */
-11: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */
- tml %r4,0x0001 /* pending update ? loop */
- jnz 11b
- stcke 0(%r15) /* Store TOD clock */
- lm %r0,%r1,__VDSO_TS_END(%r5) /* TOD steering end time */
- s %r0,1(%r15) /* no - ts_steering_end */
- sl %r1,5(%r15)
- brc 3,22f
- ahi %r0,-1
-22: ltr %r0,%r0 /* past end of steering? */
- jm 24f
- srdl %r0,15 /* 1 per 2^16 */
- tm __VDSO_TS_DIR+3(%r5),0x01 /* steering direction? */
- jz 23f
- lcr %r0,%r0 /* negative TOD offset */
- lcr %r1,%r1
- je 23f
- ahi %r0,-1
-23: a %r0,1(%r15) /* add TOD timestamp */
- al %r1,5(%r15)
- brc 12,25f
- ahi %r0,1
- j 25f
-24: lm %r0,%r1,1(%r15) /* load TOD timestamp */
-25: s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
- sl %r1,__VDSO_XTIME_STAMP+4(%r5)
- brc 3,12f
- ahi %r0,-1
-12: ms %r0,__VDSO_TK_MULT(%r5) /* * tk->mult */
- lr %r2,%r0
- l %r0,__VDSO_TK_MULT(%r5)
- ltr %r1,%r1
- mr %r0,%r0
- jnm 13f
- a %r0,__VDSO_TK_MULT(%r5)
-13: alr %r0,%r2
- al %r0,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */
- al %r1,__VDSO_XTIME_NSEC+4(%r5)
- brc 12,14f
- ahi %r0,1
-14: l %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
- srdl %r0,0(%r2) /* >> tk->shift */
- l %r2,__VDSO_XTIME_SEC+4(%r5)
- cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */
- jne 11b
- basr %r5,0
-15: ltr %r0,%r0
- jnz 16f
- cl %r1,20f-15b(%r5)
- jl 17f
-16: ahi %r2,1
- sl %r1,20f-15b(%r5)
- brc 3,15b
- ahi %r0,-1
- j 15b
-17: st %r2,0(%r3) /* store tp->tv_sec */
- st %r1,4(%r3) /* store tp->tv_nsec */
- lhi %r2,0
- ahi %r15,16
- CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
- CFI_RESTORE 15
- br %r14
-
- /* Fallback to system call */
- CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16
- CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
-19: lhi %r1,__NR_clock_gettime
- svc 0
- ahi %r15,16
- CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD
- CFI_RESTORE 15
- br %r14
- CFI_ENDPROC
-
-20: .long 1000000000
-21: .long _vdso_data - 0b
- .size __kernel_clock_gettime,.-__kernel_clock_gettime
diff --git a/arch/s390/kernel/vdso32/getcpu.S b/arch/s390/kernel/vdso32/getcpu.S
deleted file mode 100644
index 25515f3fbcea..000000000000
--- a/arch/s390/kernel/vdso32/getcpu.S
+++ /dev/null
@@ -1,33 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Userland implementation of getcpu() for 32 bits processes in a
- * s390 kernel for use in the vDSO
- *
- * Copyright IBM Corp. 2016
- * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/dwarf.h>
-
- .text
- .align 4
- .globl __kernel_getcpu
- .type __kernel_getcpu,@function
-__kernel_getcpu:
- CFI_STARTPROC
- la %r4,0
- sacf 256
- l %r5,__VDSO_CPU_NR(%r4)
- l %r4,__VDSO_NODE_ID(%r4)
- sacf 0
- ltr %r2,%r2
- jz 2f
- st %r5,0(%r2)
-2: ltr %r3,%r3
- jz 3f
- st %r4,0(%r3)
-3: lhi %r2,0
- br %r14
- CFI_ENDPROC
- .size __kernel_getcpu,.-__kernel_getcpu
diff --git a/arch/s390/kernel/vdso32/gettimeofday.S b/arch/s390/kernel/vdso32/gettimeofday.S
deleted file mode 100644
index b23063fbc892..000000000000
--- a/arch/s390/kernel/vdso32/gettimeofday.S
+++ /dev/null
@@ -1,103 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Userland implementation of gettimeofday() for 32 bits processes in a
- * s390 kernel for use in the vDSO
- *
- * Copyright IBM Corp. 2008
- * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
- */
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-#include <asm/dwarf.h>
-#include <asm/ptrace.h>
-
- .text
- .align 4
- .globl __kernel_gettimeofday
- .type __kernel_gettimeofday,@function
-__kernel_gettimeofday:
- CFI_STARTPROC
- ahi %r15,-16
- CFI_ADJUST_CFA_OFFSET 16
- CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD
- basr %r5,0
-0: al %r5,13f-0b(%r5) /* get &_vdso_data */
-1: ltr %r3,%r3 /* check if tz is NULL */
- je 2f
- mvc 0(8,%r3),__VDSO_TIMEZONE(%r5)
-2: ltr %r2,%r2 /* check if tv is NULL */
- je 10f
- l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */
- tml %r4,0x0001 /* pending update ? loop */
- jnz 1b
- stcke 0(%r15) /* Store TOD clock */
- lm %r0,%r1,__VDSO_TS_END(%r5) /* TOD steering end time */
- s %r0,1(%r15)
- sl %r1,5(%r15)
- brc 3,14f
- ahi %r0,-1
-14: ltr %r0,%r0 /* past end of steering? */
- jm 16f
- srdl %r0,15 /* 1 per 2^16 */
- tm __VDSO_TS_DIR+3(%r5),0x01 /* steering direction? */
- jz 15f
- lcr %r0,%r0 /* negative TOD offset */
- lcr %r1,%r1
- je 15f
- ahi %r0,-1
-15: a %r0,1(%r15) /* add TOD timestamp */
- al %r1,5(%r15)
- brc 12,17f
- ahi %r0,1
- j 17f
-16: lm %r0,%r1,1(%r15) /* load TOD timestamp */
-17: s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
- sl %r1,__VDSO_XTIME_STAMP+4(%r5)
- brc 3,3f
- ahi %r0,-1
-3: ms %r0,__VDSO_TK_MULT(%r5) /* * tk->mult */
- st %r0,0(%r15)
- l %r0,__VDSO_TK_MULT(%r5)
- ltr %r1,%r1
- mr %r0,%r0
- jnm 4f
- a %r0,__VDSO_TK_MULT(%r5)
-4: al %r0,0(%r15)
- al %r0,__VDSO_XTIME_NSEC(%r5) /* + xtime */
- al %r1,__VDSO_XTIME_NSEC+4(%r5)
- brc 12,5f
- ahi %r0,1
-5: mvc 0(4,%r15),__VDSO_XTIME_SEC+4(%r5)
- cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */
- jne 1b
- l %r4,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
- srdl %r0,0(%r4) /* >> tk->shift */
- l %r4,0(%r15) /* get tv_sec from stack */
- basr %r5,0
-6: ltr %r0,%r0
- jnz 7f
- cl %r1,11f-6b(%r5)
- jl 8f
-7: ahi %r4,1
- sl %r1,11f-6b(%r5)
- brc 3,6b
- ahi %r0,-1
- j 6b
-8: st %r4,0(%r2) /* store tv->tv_sec */
- ltr %r1,%r1
- m %r0,12f-6b(%r5)
- jnm 9f
- al %r0,12f-6b(%r5)
-9: srl %r0,6
- st %r0,4(%r2) /* store tv->tv_usec */
-10: slr %r2,%r2
- ahi %r15,16
- CFI_ADJUST_CFA_OFFSET -16
- CFI_RESTORE 15
- br %r14
- CFI_ENDPROC
-11: .long 1000000000
-12: .long 274877907
-13: .long _vdso_data - 0b
- .size __kernel_gettimeofday,.-__kernel_gettimeofday
diff --git a/arch/s390/kernel/vdso32/note.S b/arch/s390/kernel/vdso32/note.S
deleted file mode 100644
index db19d0680a0a..000000000000
--- a/arch/s390/kernel/vdso32/note.S
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
- * Here we can supply some information useful to userland.
- */
-
-#include <linux/uts.h>
-#include <linux/version.h>
-#include <linux/elfnote.h>
-
-ELFNOTE_START(Linux, 0, "a")
- .long LINUX_VERSION_CODE
-ELFNOTE_END
diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S
deleted file mode 100644
index 721c4954cb6e..000000000000
--- a/arch/s390/kernel/vdso32/vdso32.lds.S
+++ /dev/null
@@ -1,142 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * This is the infamous ld script for the 32 bits vdso
- * library
- */
-
-#include <asm/page.h>
-#include <asm/vdso.h>
-
-OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390")
-OUTPUT_ARCH(s390:31-bit)
-ENTRY(_start)
-
-SECTIONS
-{
- . = VDSO32_LBASE + SIZEOF_HEADERS;
-
- .hash : { *(.hash) } :text
- .gnu.hash : { *(.gnu.hash) }
- .dynsym : { *(.dynsym) }
- .dynstr : { *(.dynstr) }
- .gnu.version : { *(.gnu.version) }
- .gnu.version_d : { *(.gnu.version_d) }
- .gnu.version_r : { *(.gnu.version_r) }
-
- .note : { *(.note.*) } :text :note
-
- . = ALIGN(16);
- .text : {
- *(.text .stub .text.* .gnu.linkonce.t.*)
- } :text
- PROVIDE(__etext = .);
- PROVIDE(_etext = .);
- PROVIDE(etext = .);
-
- /*
- * Other stuff is appended to the text segment:
- */
- .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
- .rodata1 : { *(.rodata1) }
-
- .dynamic : { *(.dynamic) } :text :dynamic
-
- .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
- .eh_frame : { KEEP (*(.eh_frame)) } :text
- .gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) }
-
- .rela.dyn ALIGN(8) : { *(.rela.dyn) }
- .got ALIGN(8) : { *(.got .toc) }
-
- _end = .;
- PROVIDE(end = .);
-
- /*
- * Stabs debugging sections are here too.
- */
- .stab 0 : { *(.stab) }
- .stabstr 0 : { *(.stabstr) }
- .stab.excl 0 : { *(.stab.excl) }
- .stab.exclstr 0 : { *(.stab.exclstr) }
- .stab.index 0 : { *(.stab.index) }
- .stab.indexstr 0 : { *(.stab.indexstr) }
- .comment 0 : { *(.comment) }
-
- /*
- * DWARF debug sections.
- * Symbols in the DWARF debugging sections are relative to the
- * beginning of the section so we begin them at 0.
- */
- /* DWARF 1 */
- .debug 0 : { *(.debug) }
- .line 0 : { *(.line) }
- /* GNU DWARF 1 extensions */
- .debug_srcinfo 0 : { *(.debug_srcinfo) }
- .debug_sfnames 0 : { *(.debug_sfnames) }
- /* DWARF 1.1 and DWARF 2 */
- .debug_aranges 0 : { *(.debug_aranges) }
- .debug_pubnames 0 : { *(.debug_pubnames) }
- /* DWARF 2 */
- .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
- .debug_abbrev 0 : { *(.debug_abbrev) }
- .debug_line 0 : { *(.debug_line) }
- .debug_frame 0 : { *(.debug_frame) }
- .debug_str 0 : { *(.debug_str) }
- .debug_loc 0 : { *(.debug_loc) }
- .debug_macinfo 0 : { *(.debug_macinfo) }
- /* SGI/MIPS DWARF 2 extensions */
- .debug_weaknames 0 : { *(.debug_weaknames) }
- .debug_funcnames 0 : { *(.debug_funcnames) }
- .debug_typenames 0 : { *(.debug_typenames) }
- .debug_varnames 0 : { *(.debug_varnames) }
- /* DWARF 3 */
- .debug_pubtypes 0 : { *(.debug_pubtypes) }
- .debug_ranges 0 : { *(.debug_ranges) }
- .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
-
- . = ALIGN(PAGE_SIZE);
- PROVIDE(_vdso_data = .);
-
- /DISCARD/ : {
- *(.note.GNU-stack)
- *(.branch_lt)
- *(.data .data.* .gnu.linkonce.d.* .sdata*)
- *(.bss .sbss .dynbss .dynsbss)
- }
-}
-
-/*
- * Very old versions of ld do not recognize this name token; use the constant.
- */
-#define PT_GNU_EH_FRAME 0x6474e550
-
-/*
- * We must supply the ELF program headers explicitly to get just one
- * PT_LOAD segment, and set the flags explicitly to make segments read-only.
- */
-PHDRS
-{
- text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
- dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
- note PT_NOTE FLAGS(4); /* PF_R */
- eh_frame_hdr PT_GNU_EH_FRAME;
-}
-
-/*
- * This controls what symbols we export from the DSO.
- */
-VERSION
-{
- VDSO_VERSION_STRING {
- global:
- /*
- * Has to be there for the kernel to find
- */
- __kernel_gettimeofday;
- __kernel_clock_gettime;
- __kernel_clock_getres;
- __kernel_getcpu;
-
- local: *;
- };
-}
diff --git a/arch/s390/kernel/vdso32/vdso32_wrapper.S b/arch/s390/kernel/vdso32/vdso32_wrapper.S
deleted file mode 100644
index de2fb930471a..000000000000
--- a/arch/s390/kernel/vdso32/vdso32_wrapper.S
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/init.h>
-#include <linux/linkage.h>
-#include <asm/page.h>
-
- __PAGE_ALIGNED_DATA
-
- .globl vdso32_start, vdso32_end
- .balign PAGE_SIZE
-vdso32_start:
- .incbin "arch/s390/kernel/vdso32/vdso32.so"
- .balign PAGE_SIZE
-vdso32_end:
-
- .previous
diff --git a/arch/s390/kernel/vdso64/getcpu.S b/arch/s390/kernel/vdso64/getcpu.S
index 2446e9dac8ab..3c04f7328500 100644
--- a/arch/s390/kernel/vdso64/getcpu.S
+++ b/arch/s390/kernel/vdso64/getcpu.S
@@ -16,10 +16,8 @@
.type __kernel_getcpu,@function
__kernel_getcpu:
CFI_STARTPROC
- la %r4,0
sacf 256
- l %r5,__VDSO_CPU_NR(%r4)
- l %r4,__VDSO_NODE_ID(%r4)
+ lm %r4,%r5,__VDSO_GETCPU_VAL(%r0)
sacf 0
ltgr %r2,%r2
jz 2f
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 7e0eb4020917..37695499717d 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -15,6 +15,8 @@
/* Handle ro_after_init data on our own. */
#define RO_AFTER_INIT_DATA
+#define EMITS_PT_NOTE
+
#include <asm-generic/vmlinux.lds.h>
#include <asm/vmlinux.lds.h>
@@ -50,11 +52,7 @@ SECTIONS
_etext = .; /* End of text section */
} :text = 0x0700
- NOTES :text :note
-
- .dummy : { *(.dummy) } :data
-
- RO_DATA_SECTION(PAGE_SIZE)
+ RO_DATA(PAGE_SIZE)
. = ALIGN(PAGE_SIZE);
_sdata = .; /* Start of data section */
@@ -64,12 +62,12 @@ SECTIONS
.data..ro_after_init : {
*(.data..ro_after_init)
JUMP_TABLE_DATA
- }
+ } :data
EXCEPTION_TABLE(16)
. = ALIGN(PAGE_SIZE);
__end_ro_after_init = .;
- RW_DATA_SECTION(0x100, PAGE_SIZE, THREAD_SIZE)
+ RW_DATA(0x100, PAGE_SIZE, THREAD_SIZE)
BOOT_DATA_PRESERVED
_edata = .; /* End of data section */
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index c475ca49cfc6..8df10d3c8f6c 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -247,9 +247,9 @@ void vtime_account_irq_enter(struct task_struct *tsk)
}
EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
-void vtime_account_system(struct task_struct *tsk)
+void vtime_account_kernel(struct task_struct *tsk)
__attribute__((alias("vtime_account_irq_enter")));
-EXPORT_SYMBOL_GPL(vtime_account_system);
+EXPORT_SYMBOL_GPL(vtime_account_kernel);
/*
* Sorted add to a list. List is linear searched until first bigger
OpenPOWER on IntegriCloud