diff options
Diffstat (limited to 'arch/s390/kernel')
49 files changed, 715 insertions, 1270 deletions
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 0f255b54b051..2b1203cf7be6 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -10,20 +10,12 @@ CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) # Do not trace early setup code CFLAGS_REMOVE_early.o = $(CC_FLAGS_FTRACE) -CFLAGS_REMOVE_early_nobss.o = $(CC_FLAGS_FTRACE) endif GCOV_PROFILE_early.o := n -GCOV_PROFILE_early_nobss.o := n - KCOV_INSTRUMENT_early.o := n -KCOV_INSTRUMENT_early_nobss.o := n - UBSAN_SANITIZE_early.o := n -UBSAN_SANITIZE_early_nobss.o := n - -KASAN_SANITIZE_early_nobss.o := n KASAN_SANITIZE_ipl.o := n KASAN_SANITIZE_machine_kexec.o := n @@ -48,7 +40,7 @@ CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o -obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o early_nobss.o +obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o obj-y += sysinfo.o lgr.o os_info.o machine_kexec.o pgm_check.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o @@ -89,7 +81,3 @@ obj-$(CONFIG_TRACEPOINTS) += trace.o # vdso obj-y += vdso64/ -obj-$(CONFIG_COMPAT_VDSO) += vdso32/ - -chkbss := head64.o early_nobss.o -include $(srctree)/arch/s390/scripts/Makefile.chkbss diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 41ac4ad21311..ce33406cfe83 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -78,8 +78,7 @@ int main(void) OFFSET(__VDSO_TS_END, vdso_data, ts_end); OFFSET(__VDSO_ECTG_BASE, vdso_per_cpu_data, ectg_timer_base); OFFSET(__VDSO_ECTG_USER, vdso_per_cpu_data, ectg_user_time); - OFFSET(__VDSO_CPU_NR, vdso_per_cpu_data, cpu_nr); - OFFSET(__VDSO_NODE_ID, vdso_per_cpu_data, node_id); + OFFSET(__VDSO_GETCPU_VAL, vdso_per_cpu_data, getcpu_val); BLANK(); /* constants used by the vdso */ DEFINE(__CLOCK_REALTIME, CLOCK_REALTIME); diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S index 2f39ea57f358..b79e0fd571f8 100644 --- a/arch/s390/kernel/base.S +++ b/arch/s390/kernel/base.S @@ -16,27 +16,6 @@ GEN_BR_THUNK %r9 GEN_BR_THUNK %r14 -ENTRY(s390_base_mcck_handler) - basr %r13,0 -0: lg %r15,__LC_NODAT_STACK # load panic stack - aghi %r15,-STACK_FRAME_OVERHEAD - larl %r1,s390_base_mcck_handler_fn - lg %r9,0(%r1) - ltgr %r9,%r9 - jz 1f - BASR_EX %r14,%r9 -1: la %r1,4095 - lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1) - lpswe __LC_MCK_OLD_PSW -ENDPROC(s390_base_mcck_handler) - - .section .bss - .align 8 - .globl s390_base_mcck_handler_fn -s390_base_mcck_handler_fn: - .quad 0 - .previous - ENTRY(s390_base_ext_handler) stmg %r0,%r15,__LC_SAVE_AREA_ASYNC basr %r13,0 diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 7abe6ae261b4..f304802ecf7b 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -461,10 +461,11 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr) ptr += sprintf(ptr, "%%c%i", value); else if (operand->flags & OPERAND_VR) ptr += sprintf(ptr, "%%v%i", value); - else if (operand->flags & OPERAND_PCREL) - ptr += sprintf(ptr, "%lx", (signed int) value - + addr); - else if (operand->flags & OPERAND_SIGNED) + else if (operand->flags & OPERAND_PCREL) { + void *pcrel = (void *)((int)value + addr); + + ptr += sprintf(ptr, "%px", pcrel); + } else if (operand->flags & OPERAND_SIGNED) ptr += sprintf(ptr, "%i", value); else ptr += sprintf(ptr, "%u", value); @@ -536,7 +537,7 @@ void show_code(struct pt_regs *regs) else *ptr++ = ' '; addr = regs->psw.addr + start - 32; - ptr += sprintf(ptr, "%016lx: ", addr); + ptr += sprintf(ptr, "%px: ", (void *)addr); if (start + opsize >= end) break; for (i = 0; i < opsize; i++) @@ -564,7 +565,7 @@ void print_fn_code(unsigned char *code, unsigned long len) opsize = insn_length(*code); if (opsize > len) break; - ptr += sprintf(ptr, "%p: ", code); + ptr += sprintf(ptr, "%px: ", code); for (i = 0; i < opsize; i++) ptr += sprintf(ptr, "%02x", code[i]); *ptr++ = '\t'; diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 34bdc60c0b11..2c122d8bab93 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -38,6 +38,7 @@ const char *stack_type_name(enum stack_type type) return "unknown"; } } +EXPORT_SYMBOL_GPL(stack_type_name); static inline bool in_stack(unsigned long sp, struct stack_info *info, enum stack_type type, unsigned long low, @@ -93,7 +94,9 @@ int get_stack_info(unsigned long sp, struct task_struct *task, if (!sp) goto unknown; - task = task ? : current; + /* Sanity check: ABI requires SP to be aligned 8 bytes. */ + if (sp & 0x7) + goto unknown; /* Check per-task stack */ if (in_task_stack(sp, task, info)) @@ -128,8 +131,6 @@ void show_stack(struct task_struct *task, unsigned long *stack) struct unwind_state state; printk("Call Trace:\n"); - if (!task) - task = current; unwind_for_each_frame(&state, task, NULL, (unsigned long) stack) printk(state.reliable ? " [<%016lx>] %pSR \n" : "([<%016lx>] %pSR)\n", @@ -194,6 +195,8 @@ void die(struct pt_regs *regs, const char *str) regs->int_code >> 17, ++die_counter); #ifdef CONFIG_PREEMPT pr_cont("PREEMPT "); +#elif defined(CONFIG_PREEMPT_RT) + pr_cont("PREEMPT_RT "); #endif pr_cont("SMP "); if (debug_pagealloc_enabled()) diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 6312fed48530..cd241ee66eff 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -30,8 +30,24 @@ #include <asm/sclp.h> #include <asm/facility.h> #include <asm/boot_data.h> +#include <asm/switch_to.h> #include "entry.h" +static void __init reset_tod_clock(void) +{ + u64 time; + + if (store_tod_clock(&time) == 0) + return; + /* TOD clock not running. Set the clock to Unix Epoch. */ + if (set_tod_clock(TOD_UNIX_EPOCH) != 0 || store_tod_clock(&time) != 0) + disabled_wait(); + + memset(tod_clock_base, 0, 16); + *(__u64 *) &tod_clock_base[1] = TOD_UNIX_EPOCH; + S390_lowcore.last_update_clock = TOD_UNIX_EPOCH; +} + /* * Initialize storage key for kernel pages */ @@ -188,21 +204,6 @@ static __init void detect_diag9c(void) S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG9C; } -static __init void detect_diag44(void) -{ - int rc; - - diag_stat_inc(DIAG_STAT_X044); - asm volatile( - " diag 0,0,0x44\n" - "0: la %0,0\n" - "1:\n" - EX_TABLE(0b,1b) - : "=d" (rc) : "0" (-EOPNOTSUPP) : "cc"); - if (!rc) - S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG44; -} - static __init void detect_machine_facilities(void) { if (test_facility(8)) { @@ -223,7 +224,7 @@ static __init void detect_machine_facilities(void) S390_lowcore.machine_flags |= MACHINE_FLAG_VX; __ctl_set_bit(0, 17); } - if (test_facility(130)) { + if (test_facility(130) && !noexec_disabled) { S390_lowcore.machine_flags |= MACHINE_FLAG_NX; __ctl_set_bit(0, 20); } @@ -245,6 +246,24 @@ static inline void save_vector_registers(void) #endif } +static inline void setup_control_registers(void) +{ + unsigned long reg; + + __ctl_store(reg, 0, 0); + reg |= CR0_LOW_ADDRESS_PROTECTION; + reg |= CR0_EMERGENCY_SIGNAL_SUBMASK; + reg |= CR0_EXTERNAL_CALL_SUBMASK; + __ctl_load(reg, 0, 0); +} + +static inline void setup_access_registers(void) +{ + unsigned int acrs[NUM_ACRS] = { 0 }; + + restore_access_regs(acrs); +} + static int __init disable_vector_extension(char *str) { S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX; @@ -253,21 +272,6 @@ static int __init disable_vector_extension(char *str) } early_param("novx", disable_vector_extension); -static int __init noexec_setup(char *str) -{ - bool enabled; - int rc; - - rc = kstrtobool(str, &enabled); - if (!rc && !enabled) { - /* Disable no-execute support */ - S390_lowcore.machine_flags &= ~MACHINE_FLAG_NX; - __ctl_clear_bit(0, 20); - } - return rc; -} -early_param("noexec", noexec_setup); - static int __init cad_setup(char *str) { bool enabled; @@ -301,6 +305,7 @@ static void __init check_image_bootable(void) void __init startup_init(void) { + reset_tod_clock(); check_image_bootable(); time_early_init(); init_kernel_storage_key(); @@ -311,10 +316,11 @@ void __init startup_init(void) setup_arch_string(); setup_boot_command_line(); detect_diag9c(); - detect_diag44(); detect_machine_facilities(); save_vector_registers(); setup_topology(); sclp_early_detect(); + setup_control_registers(); + setup_access_registers(); lockdep_on(); } diff --git a/arch/s390/kernel/early_nobss.c b/arch/s390/kernel/early_nobss.c deleted file mode 100644 index 52a3ef959341..000000000000 --- a/arch/s390/kernel/early_nobss.c +++ /dev/null @@ -1,45 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright IBM Corp. 2007, 2018 - */ - -/* - * Early setup functions which may not rely on an initialized bss - * section. The last thing that is supposed to happen here is - * initialization of the bss section. - */ - -#include <linux/processor.h> -#include <linux/string.h> -#include <asm/sections.h> -#include <asm/lowcore.h> -#include <asm/timex.h> -#include <asm/kasan.h> -#include "entry.h" - -static void __init reset_tod_clock(void) -{ - u64 time; - - if (store_tod_clock(&time) == 0) - return; - /* TOD clock not running. Set the clock to Unix Epoch. */ - if (set_tod_clock(TOD_UNIX_EPOCH) != 0 || store_tod_clock(&time) != 0) - disabled_wait(); - - memset(tod_clock_base, 0, 16); - *(__u64 *) &tod_clock_base[1] = TOD_UNIX_EPOCH; - S390_lowcore.last_update_clock = TOD_UNIX_EPOCH; -} - -static void __init clear_bss_section(void) -{ - memset(__bss_start, 0, __bss_stop - __bss_start); -} - -void __init startup_init_nobss(void) -{ - reset_tod_clock(); - clear_bss_section(); - kasan_early_init(); -} diff --git a/arch/s390/kernel/early_printk.c b/arch/s390/kernel/early_printk.c index 40c1dfec944e..6f24d83bc5dc 100644 --- a/arch/s390/kernel/early_printk.c +++ b/arch/s390/kernel/early_printk.c @@ -25,7 +25,7 @@ static int __init setup_early_printk(char *buf) if (early_console) return 0; /* Accept only "earlyprintk" and "earlyprintk=sclp" */ - if (buf && strncmp(buf, "sclp", 4)) + if (buf && !str_has_prefix(buf, "sclp")) return 0; if (!sclp.has_linemode && !sclp.has_vt220) return 0; diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 270d1d145761..9205add8481d 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -790,7 +790,7 @@ ENTRY(io_int_handler) .Lio_work: tm __PT_PSW+1(%r11),0x01 # returning to user ? jo .Lio_work_user # yes -> do resched & signal -#ifdef CONFIG_PREEMPT +#ifdef CONFIG_PREEMPTION # check for preemptive scheduling icm %r0,15,__LC_PREEMPT_COUNT jnz .Lio_restore # preemption is disabled diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index b2956d49b6ad..1d3927e01a5f 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -45,6 +45,7 @@ void specification_exception(struct pt_regs *regs); void transaction_exception(struct pt_regs *regs); void translation_exception(struct pt_regs *regs); void vector_exception(struct pt_regs *regs); +void monitor_event_exception(struct pt_regs *regs); void do_per_trap(struct pt_regs *regs); void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str); diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 1bb85f60c0dd..4cd9b1ada834 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -72,15 +72,6 @@ static inline void ftrace_generate_orig_insn(struct ftrace_insn *insn) #endif } -static inline int is_kprobe_on_ftrace(struct ftrace_insn *insn) -{ -#ifdef CONFIG_KPROBES - if (insn->opc == BREAKPOINT_INSTRUCTION) - return 1; -#endif - return 0; -} - static inline void ftrace_generate_kprobe_nop_insn(struct ftrace_insn *insn) { #ifdef CONFIG_KPROBES @@ -114,16 +105,6 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, /* Initial code replacement */ ftrace_generate_orig_insn(&orig); ftrace_generate_nop_insn(&new); - } else if (is_kprobe_on_ftrace(&old)) { - /* - * If we find a breakpoint instruction, a kprobe has been - * placed at the beginning of the function. We write the - * constant KPROBE_ON_FTRACE_NOP into the remaining four - * bytes of the original instruction so that the kprobes - * handler can execute a nop, if it reaches this breakpoint. - */ - ftrace_generate_kprobe_call_insn(&orig); - ftrace_generate_kprobe_nop_insn(&new); } else { /* Replace ftrace call with a nop. */ ftrace_generate_call_insn(&orig, rec->ip); @@ -142,21 +123,10 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) if (probe_kernel_read(&old, (void *) rec->ip, sizeof(old))) return -EFAULT; - if (is_kprobe_on_ftrace(&old)) { - /* - * If we find a breakpoint instruction, a kprobe has been - * placed at the beginning of the function. We write the - * constant KPROBE_ON_FTRACE_CALL into the remaining four - * bytes of the original instruction so that the kprobes - * handler can execute a brasl if it reaches this breakpoint. - */ - ftrace_generate_kprobe_nop_insn(&orig); - ftrace_generate_kprobe_call_insn(&new); - } else { - /* Replace nop with an ftrace call. */ - ftrace_generate_nop_insn(&orig); - ftrace_generate_call_insn(&new, rec->ip); - } + /* Replace nop with an ftrace call. */ + ftrace_generate_nop_insn(&orig); + ftrace_generate_call_insn(&new, rec->ip); + /* Verify that the to be replaced code matches what we expect. */ if (memcmp(&orig, &old, sizeof(old))) return -EINVAL; @@ -241,3 +211,45 @@ int ftrace_disable_ftrace_graph_caller(void) } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + +#ifdef CONFIG_KPROBES_ON_FTRACE +void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *ops, struct pt_regs *regs) +{ + struct kprobe_ctlblk *kcb; + struct kprobe *p = get_kprobe((kprobe_opcode_t *)ip); + + if (unlikely(!p) || kprobe_disabled(p)) + return; + + if (kprobe_running()) { + kprobes_inc_nmissed_count(p); + return; + } + + __this_cpu_write(current_kprobe, p); + + kcb = get_kprobe_ctlblk(); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + + instruction_pointer_set(regs, ip); + + if (!p->pre_handler || !p->pre_handler(p, regs)) { + + instruction_pointer_set(regs, ip + MCOUNT_INSN_SIZE); + + if (unlikely(p->post_handler)) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + p->post_handler(p, regs, 0); + } + } + __this_cpu_write(current_kprobe, NULL); +} +NOKPROBE_SYMBOL(kprobe_ftrace_handler); + +int arch_prepare_kprobe_ftrace(struct kprobe *p) +{ + p->ainsn.insn = NULL; + return 0; +} +#endif diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index f384a18e6c26..8b88dbbda7df 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -26,32 +26,17 @@ ENTRY(startup_continue) 0: larl %r1,tod_clock_base mvc 0(16,%r1),__LC_BOOT_CLOCK larl %r13,.LPG1 # get base - larl %r0,boot_vdso_data - stg %r0,__LC_VDSO_PER_CPU # # Setup stack # larl %r14,init_task stg %r14,__LC_CURRENT - larl %r15,init_thread_union+THREAD_SIZE-STACK_FRAME_OVERHEAD -# -# Early setup functions that may not rely on an initialized bss section, -# like moving the initrd. Returns with an initialized bss section. -# - brasl %r14,startup_init_nobss -# -# Early machine initialization and detection functions. -# - brasl %r14,startup_init - -# check control registers - stctg %c0,%c15,0(%r15) - oi 6(%r15),0x60 # enable sigp emergency & external call - oi 4(%r15),0x10 # switch on low address proctection - lctlg %c0,%c15,0(%r15) - - lam 0,15,.Laregs-.LPG1(%r13) # load acrs needed by uaccess - brasl %r14,start_kernel # go to C code + larl %r15,init_thread_union+THREAD_SIZE-STACK_FRAME_OVERHEAD-__PT_SIZE +#ifdef CONFIG_KASAN + brasl %r14,kasan_early_init +#endif + brasl %r14,startup_init # s390 specific early init + brasl %r14,start_kernel # common init code # # We returned from start_kernel ?!? PANIK # @@ -61,4 +46,3 @@ ENTRY(startup_continue) .align 16 .LPG1: .Ldw: .quad 0x0002000180000000,0x0000000000000000 -.Laregs:.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c index b9d8fe45737a..8f8456816d83 100644 --- a/arch/s390/kernel/idle.c +++ b/arch/s390/kernel/idle.c @@ -69,18 +69,26 @@ DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL); static ssize_t show_idle_time(struct device *dev, struct device_attribute *attr, char *buf) { + unsigned long long now, idle_time, idle_enter, idle_exit, in_idle; struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); - unsigned long long now, idle_time, idle_enter, idle_exit; unsigned int seq; do { - now = get_tod_clock(); seq = read_seqcount_begin(&idle->seqcount); idle_time = READ_ONCE(idle->idle_time); idle_enter = READ_ONCE(idle->clock_idle_enter); idle_exit = READ_ONCE(idle->clock_idle_exit); } while (read_seqcount_retry(&idle->seqcount, seq)); - idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0; + in_idle = 0; + now = get_tod_clock(); + if (idle_enter) { + if (idle_exit) { + in_idle = idle_exit - idle_enter; + } else if (now > idle_enter) { + in_idle = now - idle_enter; + } + } + idle_time += in_idle; return sprintf(buf, "%llu\n", idle_time >> 12); } DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL); @@ -88,17 +96,24 @@ DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL); u64 arch_cpu_idle_time(int cpu) { struct s390_idle_data *idle = &per_cpu(s390_idle, cpu); - unsigned long long now, idle_enter, idle_exit; + unsigned long long now, idle_enter, idle_exit, in_idle; unsigned int seq; do { - now = get_tod_clock(); seq = read_seqcount_begin(&idle->seqcount); idle_enter = READ_ONCE(idle->clock_idle_enter); idle_exit = READ_ONCE(idle->clock_idle_exit); } while (read_seqcount_retry(&idle->seqcount, seq)); - - return cputime_to_nsecs(idle_enter ? ((idle_exit ?: now) - idle_enter) : 0); + in_idle = 0; + now = get_tod_clock(); + if (idle_enter) { + if (idle_exit) { + in_idle = idle_exit - idle_enter; + } else if (now > idle_enter) { + in_idle = now - idle_enter; + } + } + return cputime_to_nsecs(in_idle); } void arch_cpu_idle_enter(void) diff --git a/arch/s390/kernel/kexec_elf.c b/arch/s390/kernel/kexec_elf.c index 6d0635ceddd0..9da6fa30c447 100644 --- a/arch/s390/kernel/kexec_elf.c +++ b/arch/s390/kernel/kexec_elf.c @@ -130,7 +130,7 @@ static int s390_elf_probe(const char *buf, unsigned long len) const struct kexec_file_ops s390_kexec_elf_ops = { .probe = s390_elf_probe, .load = s390_elf_load, -#ifdef CONFIG_KEXEC_VERIFY_SIG +#ifdef CONFIG_KEXEC_SIG .verify_sig = s390_verify_sig, -#endif /* CONFIG_KEXEC_VERIFY_SIG */ +#endif /* CONFIG_KEXEC_SIG */ }; diff --git a/arch/s390/kernel/kexec_image.c b/arch/s390/kernel/kexec_image.c index 58318bf89fd9..af23eff5774d 100644 --- a/arch/s390/kernel/kexec_image.c +++ b/arch/s390/kernel/kexec_image.c @@ -59,7 +59,7 @@ static int s390_image_probe(const char *buf, unsigned long len) const struct kexec_file_ops s390_kexec_image_ops = { .probe = s390_image_probe, .load = s390_image_load, -#ifdef CONFIG_KEXEC_VERIFY_SIG +#ifdef CONFIG_KEXEC_SIG .verify_sig = s390_verify_sig, -#endif /* CONFIG_KEXEC_VERIFY_SIG */ +#endif /* CONFIG_KEXEC_SIG */ }; diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 6f1388391620..548d0ea9808d 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -56,21 +56,10 @@ struct kprobe_insn_cache kprobe_s390_insn_slots = { static void copy_instruction(struct kprobe *p) { - unsigned long ip = (unsigned long) p->addr; s64 disp, new_disp; u64 addr, new_addr; - if (ftrace_location(ip) == ip) { - /* - * If kprobes patches the instruction that is morphed by - * ftrace make sure that kprobes always sees the branch - * "jg .+24" that skips the mcount block or the "brcl 0,0" - * in case of hotpatch. - */ - ftrace_generate_nop_insn((struct ftrace_insn *)p->ainsn.insn); - p->ainsn.is_ftrace_insn = 1; - } else - memcpy(p->ainsn.insn, p->addr, insn_length(*p->addr >> 8)); + memcpy(p->ainsn.insn, p->addr, insn_length(*p->addr >> 8)); p->opcode = p->ainsn.insn[0]; if (!probe_is_insn_relative_long(p->ainsn.insn)) return; @@ -136,11 +125,6 @@ int arch_prepare_kprobe(struct kprobe *p) } NOKPROBE_SYMBOL(arch_prepare_kprobe); -int arch_check_ftrace_location(struct kprobe *p) -{ - return 0; -} - struct swap_insn_args { struct kprobe *p; unsigned int arm_kprobe : 1; @@ -149,28 +133,11 @@ struct swap_insn_args { static int swap_instruction(void *data) { struct swap_insn_args *args = data; - struct ftrace_insn new_insn, *insn; struct kprobe *p = args->p; - size_t len; - - new_insn.opc = args->arm_kprobe ? BREAKPOINT_INSTRUCTION : p->opcode; - len = sizeof(new_insn.opc); - if (!p->ainsn.is_ftrace_insn) - goto skip_ftrace; - len = sizeof(new_insn); - insn = (struct ftrace_insn *) p->addr; - if (args->arm_kprobe) { - if (is_ftrace_nop(insn)) - new_insn.disp = KPROBE_ON_FTRACE_NOP; - else - new_insn.disp = KPROBE_ON_FTRACE_CALL; - } else { - ftrace_generate_call_insn(&new_insn, (unsigned long)p->addr); - if (insn->disp == KPROBE_ON_FTRACE_NOP) - ftrace_generate_nop_insn(&new_insn); - } -skip_ftrace: - s390_kernel_write(p->addr, &new_insn, len); + u16 opc; + + opc = args->arm_kprobe ? BREAKPOINT_INSTRUCTION : p->opcode; + s390_kernel_write(p->addr, &opc, sizeof(opc)); return 0; } NOKPROBE_SYMBOL(swap_instruction); @@ -464,24 +431,6 @@ static void resume_execution(struct kprobe *p, struct pt_regs *regs) unsigned long ip = regs->psw.addr; int fixup = probe_get_fixup_type(p->ainsn.insn); - /* Check if the kprobes location is an enabled ftrace caller */ - if (p->ainsn.is_ftrace_insn) { - struct ftrace_insn *insn = (struct ftrace_insn *) p->addr; - struct ftrace_insn call_insn; - - ftrace_generate_call_insn(&call_insn, (unsigned long) p->addr); - /* - * A kprobe on an enabled ftrace call site actually single - * stepped an unconditional branch (ftrace nop equivalent). - * Now we need to fixup things and pretend that a brasl r0,... - * was executed instead. - */ - if (insn->disp == KPROBE_ON_FTRACE_CALL) { - ip += call_insn.disp * 2 - MCOUNT_INSN_SIZE; - regs->gprs[0] = (unsigned long)p->addr + sizeof(*insn); - } - } - if (fixup & FIXUP_PSW_NORMAL) ip += (unsigned long) p->addr - (unsigned long) p->ainsn.insn; diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 444a19125a81..cb8b1cc285c9 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -164,7 +164,9 @@ static bool kdump_csum_valid(struct kimage *image) #ifdef CONFIG_CRASH_DUMP int rc; + preempt_disable(); rc = CALL_ON_STACK(do_start_kdump, S390_lowcore.nodat_stack, 1, image); + preempt_enable(); return rc == 0; #else return false; @@ -254,10 +256,10 @@ void arch_crash_save_vmcoreinfo(void) VMCOREINFO_SYMBOL(lowcore_ptr); VMCOREINFO_SYMBOL(high_memory); VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS); - mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note()); vmcoreinfo_append_str("SDMA=%lx\n", __sdma); vmcoreinfo_append_str("EDMA=%lx\n", __edma); vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset()); + mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note()); } void machine_shutdown(void) diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c index fbdd3ea73667..8415ae7d2a23 100644 --- a/arch/s390/kernel/machine_kexec_file.c +++ b/arch/s390/kernel/machine_kexec_file.c @@ -10,7 +10,7 @@ #include <linux/elf.h> #include <linux/errno.h> #include <linux/kexec.h> -#include <linux/module.h> +#include <linux/module_signature.h> #include <linux/verification.h> #include <asm/boot_data.h> #include <asm/ipl.h> @@ -22,29 +22,7 @@ const struct kexec_file_ops * const kexec_file_loaders[] = { NULL, }; -#ifdef CONFIG_KEXEC_VERIFY_SIG -/* - * Module signature information block. - * - * The constituents of the signature section are, in order: - * - * - Signer's name - * - Key identifier - * - Signature data - * - Information block - */ -struct module_signature { - u8 algo; /* Public-key crypto algorithm [0] */ - u8 hash; /* Digest algorithm [0] */ - u8 id_type; /* Key identifier type [PKEY_ID_PKCS7] */ - u8 signer_len; /* Length of signer's name [0] */ - u8 key_id_len; /* Length of key identifier [0] */ - u8 __pad[3]; - __be32 sig_len; /* Length of signature data */ -}; - -#define PKEY_ID_PKCS7 2 - +#ifdef CONFIG_KEXEC_SIG int s390_verify_sig(const char *kernel, unsigned long kernel_len) { const unsigned long marker_len = sizeof(MODULE_SIG_STRING) - 1; @@ -90,7 +68,7 @@ int s390_verify_sig(const char *kernel, unsigned long kernel_len) VERIFYING_MODULE_SIGNATURE, NULL, NULL); } -#endif /* CONFIG_KEXEC_VERIFY_SIG */ +#endif /* CONFIG_KEXEC_SIG */ static int kexec_file_update_purgatory(struct kimage *image, struct s390_load_data *data) diff --git a/arch/s390/kernel/machine_kexec_reloc.c b/arch/s390/kernel/machine_kexec_reloc.c index 3b664cb3ec4d..d5035de9020e 100644 --- a/arch/s390/kernel/machine_kexec_reloc.c +++ b/arch/s390/kernel/machine_kexec_reloc.c @@ -27,6 +27,7 @@ int arch_kexec_do_relocs(int r_type, void *loc, unsigned long val, *(u32 *)loc = val; break; case R_390_64: /* Direct 64 bit. */ + case R_390_GLOB_DAT: *(u64 *)loc = val; break; case R_390_PC16: /* PC relative 16 bit. */ diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 9e1660a6b9db..7458dcfd6464 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -26,6 +26,12 @@ ENDPROC(ftrace_stub) #define STACK_PTREGS (STACK_FRAME_OVERHEAD) #define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS) #define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW) +#ifdef __PACK_STACK +/* allocate just enough for r14, r15 and backchain */ +#define TRACED_FUNC_FRAME_SIZE 24 +#else +#define TRACED_FUNC_FRAME_SIZE STACK_FRAME_OVERHEAD +#endif ENTRY(_mcount) BR_EX %r14 @@ -35,13 +41,27 @@ EXPORT_SYMBOL(_mcount) ENTRY(ftrace_caller) .globl ftrace_regs_caller .set ftrace_regs_caller,ftrace_caller + stg %r14,(__SF_GPRS+8*8)(%r15) # save traced function caller + lghi %r14,0 # save condition code + ipm %r14 # don't put any instructions + sllg %r14,%r14,16 # clobbering CC before this point lgr %r1,%r15 #if !(defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT)) aghi %r0,MCOUNT_RETURN_FIXUP #endif - aghi %r15,-STACK_FRAME_SIZE + # allocate stack frame for ftrace_caller to contain traced function + aghi %r15,-TRACED_FUNC_FRAME_SIZE stg %r1,__SF_BACKCHAIN(%r15) + stg %r0,(__SF_GPRS+8*8)(%r15) + stg %r15,(__SF_GPRS+9*8)(%r15) + # allocate pt_regs and stack frame for ftrace_trace_function + aghi %r15,-STACK_FRAME_SIZE stg %r1,(STACK_PTREGS_GPRS+15*8)(%r15) + stg %r14,(STACK_PTREGS_PSW)(%r15) + lg %r14,(__SF_GPRS+8*8)(%r1) # restore original return address + stosm (STACK_PTREGS_PSW)(%r15),0 + aghi %r1,-TRACED_FUNC_FRAME_SIZE + stg %r1,__SF_BACKCHAIN(%r15) stg %r0,(STACK_PTREGS_PSW+8)(%r15) stmg %r2,%r14,(STACK_PTREGS_GPRS+2*8)(%r15) #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 31889db609e9..ba8f19bb438b 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -472,11 +472,11 @@ int module_finalize(const Elf_Ehdr *hdr, apply_alternatives(aseg, aseg + s->sh_size); if (IS_ENABLED(CONFIG_EXPOLINE) && - (!strncmp(".s390_indirect", secname, 14))) + (str_has_prefix(secname, ".s390_indirect"))) nospec_revert(aseg, aseg + s->sh_size); if (IS_ENABLED(CONFIG_EXPOLINE) && - (!strncmp(".s390_return", secname, 12))) + (str_has_prefix(secname, ".s390_return"))) nospec_revert(aseg, aseg + s->sh_size); } diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 48d48b6187c0..0eb1d1cc53a8 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -199,7 +199,7 @@ static const int cpumf_generic_events_user[] = { [PERF_COUNT_HW_BUS_CYCLES] = -1, }; -static int __hw_perf_event_init(struct perf_event *event) +static int __hw_perf_event_init(struct perf_event *event, unsigned int type) { struct perf_event_attr *attr = &event->attr; struct hw_perf_event *hwc = &event->hw; @@ -207,7 +207,7 @@ static int __hw_perf_event_init(struct perf_event *event) int err = 0; u64 ev; - switch (attr->type) { + switch (type) { case PERF_TYPE_RAW: /* Raw events are used to access counters directly, * hence do not permit excludes */ @@ -294,17 +294,16 @@ static int __hw_perf_event_init(struct perf_event *event) static int cpumf_pmu_event_init(struct perf_event *event) { + unsigned int type = event->attr.type; int err; - switch (event->attr.type) { - case PERF_TYPE_HARDWARE: - case PERF_TYPE_HW_CACHE: - case PERF_TYPE_RAW: - err = __hw_perf_event_init(event); - break; - default: + if (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_RAW) + err = __hw_perf_event_init(event, type); + else if (event->pmu->type == type) + /* Registered as unknown PMU */ + err = __hw_perf_event_init(event, PERF_TYPE_RAW); + else return -ENOENT; - } if (unlikely(err) && event->destroy) event->destroy(event); @@ -553,7 +552,7 @@ static int __init cpumf_pmu_init(void) return -ENODEV; cpumf_pmu.attr_groups = cpumf_cf_event_group(); - rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW); + rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", -1); if (rc) pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc); return rc; diff --git a/arch/s390/kernel/perf_cpum_cf_diag.c b/arch/s390/kernel/perf_cpum_cf_diag.c index 5f1fd1581330..e949ab832ed7 100644 --- a/arch/s390/kernel/perf_cpum_cf_diag.c +++ b/arch/s390/kernel/perf_cpum_cf_diag.c @@ -243,13 +243,13 @@ static int cf_diag_event_init(struct perf_event *event) int err = -ENOENT; debug_sprintf_event(cf_diag_dbg, 5, - "%s event %p cpu %d config %#llx " + "%s event %p cpu %d config %#llx type:%u " "sample_type %#llx cf_diag_events %d\n", __func__, - event, event->cpu, attr->config, attr->sample_type, - atomic_read(&cf_diag_events)); + event, event->cpu, attr->config, event->pmu->type, + attr->sample_type, atomic_read(&cf_diag_events)); if (event->attr.config != PERF_EVENT_CPUM_CF_DIAG || - event->attr.type != PERF_TYPE_RAW) + event->attr.type != event->pmu->type) goto out; /* Raw events are used to access counters directly, @@ -390,7 +390,7 @@ static size_t cf_diag_getctrset(struct cf_ctrset_entry *ctrdata, int ctrset, debug_sprintf_event(cf_diag_dbg, 6, "%s ctrset %d ctrset_size %zu cfvn %d csvn %d" - " need %zd rc:%d\n", + " need %zd rc %d\n", __func__, ctrset, ctrset_size, cpuhw->info.cfvn, cpuhw->info.csvn, need, rc); return need; @@ -567,7 +567,7 @@ static int cf_diag_add(struct perf_event *event, int flags) int err = 0; debug_sprintf_event(cf_diag_dbg, 5, - "%s event %p cpu %d flags %#x cpuhw:%p\n", + "%s event %p cpu %d flags %#x cpuhw %p\n", __func__, event, event->cpu, flags, cpuhw); if (cpuhw->flags & PMU_F_IN_USE) { @@ -693,7 +693,7 @@ static int __init cf_diag_init(void) } debug_register_view(cf_diag_dbg, &debug_sprintf_view); - rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", PERF_TYPE_RAW); + rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", -1); if (rc) { debug_unregister_view(cf_diag_dbg, &debug_sprintf_view); debug_unregister(cf_diag_dbg); diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 1266194afb02..b095b1c78987 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -156,8 +156,8 @@ static void free_sampling_buffer(struct sf_buffer *sfb) } } - debug_sprintf_event(sfdbg, 5, - "free_sampling_buffer: freed sdbt=%p\n", sfb->sdbt); + debug_sprintf_event(sfdbg, 5, "%s: freed sdbt %#lx\n", __func__, + (unsigned long)sfb->sdbt); memset(sfb, 0, sizeof(*sfb)); } @@ -193,7 +193,7 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb, gfp_t gfp_flags) { int i, rc; - unsigned long *new, *tail; + unsigned long *new, *tail, *tail_prev = NULL; if (!sfb->sdbt || !sfb->tail) return -EINVAL; @@ -212,10 +212,11 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb, * the sampling buffer origin. */ if (sfb->sdbt != get_next_sdbt(tail)) { - debug_sprintf_event(sfdbg, 3, "realloc_sampling_buffer: " - "sampling buffer is not linked: origin=%p" - "tail=%p\n", - (void *) sfb->sdbt, (void *) tail); + debug_sprintf_event(sfdbg, 3, "%s: " + "sampling buffer is not linked: origin %#lx" + " tail %#lx\n", __func__, + (unsigned long)sfb->sdbt, + (unsigned long)tail); return -EINVAL; } @@ -232,6 +233,7 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb, sfb->num_sdbt++; /* Link current page to tail of chain */ *tail = (unsigned long)(void *) new + 1; + tail_prev = tail; tail = new; } @@ -241,18 +243,30 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb, * issue, a new realloc call (if required) might succeed. */ rc = alloc_sample_data_block(tail, gfp_flags); - if (rc) + if (rc) { + /* Undo last SDBT. An SDBT with no SDB at its first + * entry but with an SDBT entry instead can not be + * handled by the interrupt handler code. + * Avoid this situation. + */ + if (tail_prev) { + sfb->num_sdbt--; + free_page((unsigned long) new); + tail = tail_prev; + } break; + } sfb->num_sdb++; tail++; + tail_prev = new = NULL; /* Allocated at least one SBD */ } /* Link sampling buffer to its origin */ *tail = (unsigned long) sfb->sdbt + 1; sfb->tail = tail; - debug_sprintf_event(sfdbg, 4, "realloc_sampling_buffer: new buffer" - " settings: sdbt=%lu sdb=%lu\n", + debug_sprintf_event(sfdbg, 4, "%s: new buffer" + " settings: sdbt %lu sdb %lu\n", __func__, sfb->num_sdbt, sfb->num_sdb); return rc; } @@ -292,12 +306,13 @@ static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb) rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL); if (rc) { free_sampling_buffer(sfb); - debug_sprintf_event(sfdbg, 4, "alloc_sampling_buffer: " - "realloc_sampling_buffer failed with rc=%i\n", rc); + debug_sprintf_event(sfdbg, 4, "%s: " + "realloc_sampling_buffer failed with rc %i\n", + __func__, rc); } else debug_sprintf_event(sfdbg, 4, - "alloc_sampling_buffer: tear=%p dear=%p\n", - sfb->sdbt, (void *) *sfb->sdbt); + "%s: tear %#lx dear %#lx\n", __func__, + (unsigned long)sfb->sdbt, (unsigned long)*sfb->sdbt); return rc; } @@ -404,8 +419,8 @@ static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc) return 0; debug_sprintf_event(sfdbg, 3, - "allocate_buffers: rate=%lu f=%lu sdb=%lu/%lu" - " sample_size=%lu cpuhw=%p\n", + "%s: rate %lu f %lu sdb %lu/%lu" + " sample_size %lu cpuhw %p\n", __func__, SAMPL_RATE(hwc), freq, n_sdb, sfb_max_limit(hwc), sample_size, cpuhw); @@ -465,8 +480,8 @@ static void sfb_account_overflows(struct cpu_hw_sf *cpuhw, if (num) sfb_account_allocs(num, hwc); - debug_sprintf_event(sfdbg, 5, "sfb: overflow: overflow=%llu ratio=%lu" - " num=%lu\n", OVERFLOW_REG(hwc), ratio, num); + debug_sprintf_event(sfdbg, 5, "%s: overflow %llu ratio %lu num %lu\n", + __func__, OVERFLOW_REG(hwc), ratio, num); OVERFLOW_REG(hwc) = 0; } @@ -504,17 +519,16 @@ static void extend_sampling_buffer(struct sf_buffer *sfb, */ rc = realloc_sampling_buffer(sfb, num, GFP_ATOMIC); if (rc) - debug_sprintf_event(sfdbg, 5, "sfb: extend: realloc " - "failed with rc=%i\n", rc); + debug_sprintf_event(sfdbg, 5, "%s: realloc failed with rc %i\n", + __func__, rc); if (sfb_has_pending_allocs(sfb, hwc)) - debug_sprintf_event(sfdbg, 5, "sfb: extend: " - "req=%lu alloc=%lu remaining=%lu\n", - num, sfb->num_sdb - num_old, + debug_sprintf_event(sfdbg, 5, "%s: " + "req %lu alloc %lu remaining %lu\n", + __func__, num, sfb->num_sdb - num_old, sfb_pending_allocs(sfb, hwc)); } - /* Number of perf events counting hardware events */ static atomic_t num_events; /* Used to avoid races in calling reserve/release_cpumf_hardware */ @@ -539,20 +553,22 @@ static void setup_pmc_cpu(void *flags) err = sf_disable(); if (err) pr_err("Switching off the sampling facility failed " - "with rc=%i\n", err); + "with rc %i\n", err); debug_sprintf_event(sfdbg, 5, - "setup_pmc_cpu: initialized: cpuhw=%p\n", cpusf); + "%s: initialized: cpuhw %p\n", __func__, + cpusf); break; case PMC_RELEASE: cpusf->flags &= ~PMU_F_RESERVED; err = sf_disable(); if (err) { pr_err("Switching off the sampling facility failed " - "with rc=%i\n", err); + "with rc %i\n", err); } else deallocate_buffers(cpusf); debug_sprintf_event(sfdbg, 5, - "setup_pmc_cpu: released: cpuhw=%p\n", cpusf); + "%s: released: cpuhw %p\n", __func__, + cpusf); break; } if (err) @@ -599,13 +615,6 @@ static void hw_init_period(struct hw_perf_event *hwc, u64 period) local64_set(&hwc->period_left, hwc->sample_period); } -static void hw_reset_registers(struct hw_perf_event *hwc, - unsigned long *sdbt_origin) -{ - /* (Re)set to first sample-data-block-table */ - TEAR_REG(hwc) = (unsigned long) sdbt_origin; -} - static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si, unsigned long rate) { @@ -674,13 +683,88 @@ out: rcu_read_unlock(); } +static unsigned long getrate(bool freq, unsigned long sample, + struct hws_qsi_info_block *si) +{ + unsigned long rate; + + if (freq) { + rate = freq_to_sample_rate(si, sample); + rate = hw_limit_rate(si, rate); + } else { + /* The min/max sampling rates specifies the valid range + * of sample periods. If the specified sample period is + * out of range, limit the period to the range boundary. + */ + rate = hw_limit_rate(si, sample); + + /* The perf core maintains a maximum sample rate that is + * configurable through the sysctl interface. Ensure the + * sampling rate does not exceed this value. This also helps + * to avoid throttling when pushing samples with + * perf_event_overflow(). + */ + if (sample_rate_to_freq(si, rate) > + sysctl_perf_event_sample_rate) { + debug_sprintf_event(sfdbg, 1, "%s: " + "Sampling rate exceeds maximum " + "perf sample rate\n", __func__); + rate = 0; + } + } + return rate; +} + +/* The sampling information (si) contains information about the + * min/max sampling intervals and the CPU speed. So calculate the + * correct sampling interval and avoid the whole period adjust + * feedback loop. + * + * Since the CPU Measurement sampling facility can not handle frequency + * calculate the sampling interval when frequency is specified using + * this formula: + * interval := cpu_speed * 1000000 / sample_freq + * + * Returns errno on bad input and zero on success with parameter interval + * set to the correct sampling rate. + * + * Note: This function turns off freq bit to avoid calling function + * perf_adjust_period(). This causes frequency adjustment in the common + * code part which causes tremendous variations in the counter values. + */ +static int __hw_perf_event_init_rate(struct perf_event *event, + struct hws_qsi_info_block *si) +{ + struct perf_event_attr *attr = &event->attr; + struct hw_perf_event *hwc = &event->hw; + unsigned long rate; + + if (attr->freq) { + if (!attr->sample_freq) + return -EINVAL; + rate = getrate(attr->freq, attr->sample_freq, si); + attr->freq = 0; /* Don't call perf_adjust_period() */ + SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FREQ_MODE; + } else { + rate = getrate(attr->freq, attr->sample_period, si); + if (!rate) + return -EINVAL; + } + attr->sample_period = rate; + SAMPL_RATE(hwc) = rate; + hw_init_period(hwc, SAMPL_RATE(hwc)); + debug_sprintf_event(sfdbg, 4, "%s: cpu %d period %#llx freq %d,%#lx\n", + __func__, event->cpu, event->attr.sample_period, + event->attr.freq, SAMPLE_FREQ_MODE(hwc)); + return 0; +} + static int __hw_perf_event_init(struct perf_event *event) { struct cpu_hw_sf *cpuhw; struct hws_qsi_info_block si; struct perf_event_attr *attr = &event->attr; struct hw_perf_event *hwc = &event->hw; - unsigned long rate; int cpu, err; /* Reserve CPU-measurement sampling facility */ @@ -728,6 +812,12 @@ static int __hw_perf_event_init(struct perf_event *event) goto out; } + if (si.ribm & CPU_MF_SF_RIBM_NOTAV) { + pr_warn("CPU Measurement Facility sampling is temporarily not available\n"); + err = -EBUSY; + goto out; + } + /* Always enable basic sampling */ SAMPL_FLAGS(hwc) = PERF_CPUM_SF_BASIC_MODE; @@ -746,43 +836,9 @@ static int __hw_perf_event_init(struct perf_event *event) if (attr->config1 & PERF_CPUM_SF_FULL_BLOCKS) SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_FULL_BLOCKS; - /* The sampling information (si) contains information about the - * min/max sampling intervals and the CPU speed. So calculate the - * correct sampling interval and avoid the whole period adjust - * feedback loop. - */ - rate = 0; - if (attr->freq) { - if (!attr->sample_freq) { - err = -EINVAL; - goto out; - } - rate = freq_to_sample_rate(&si, attr->sample_freq); - rate = hw_limit_rate(&si, rate); - attr->freq = 0; - attr->sample_period = rate; - } else { - /* The min/max sampling rates specifies the valid range - * of sample periods. If the specified sample period is - * out of range, limit the period to the range boundary. - */ - rate = hw_limit_rate(&si, hwc->sample_period); - - /* The perf core maintains a maximum sample rate that is - * configurable through the sysctl interface. Ensure the - * sampling rate does not exceed this value. This also helps - * to avoid throttling when pushing samples with - * perf_event_overflow(). - */ - if (sample_rate_to_freq(&si, rate) > - sysctl_perf_event_sample_rate) { - err = -EINVAL; - debug_sprintf_event(sfdbg, 1, "Sampling rate exceeds maximum perf sample rate\n"); - goto out; - } - } - SAMPL_RATE(hwc) = rate; - hw_init_period(hwc, SAMPL_RATE(hwc)); + err = __hw_perf_event_init_rate(event, &si); + if (err) + goto out; /* Initialize sample data overflow accounting */ hwc->extra_reg.reg = REG_OVERFLOW; @@ -854,7 +910,7 @@ static int cpumsf_pmu_event_init(struct perf_event *event) /* Check online status of the CPU to which the event is pinned */ if (event->cpu >= 0 && !cpu_online(event->cpu)) - return -ENODEV; + return -ENODEV; /* Force reset of idle/hv excludes regardless of what the * user requested. @@ -902,9 +958,10 @@ static void cpumsf_pmu_enable(struct pmu *pmu) * buffer extents */ sfb_account_overflows(cpuhw, hwc); - if (sfb_has_pending_allocs(&cpuhw->sfb, hwc)) - extend_sampling_buffer(&cpuhw->sfb, hwc); + extend_sampling_buffer(&cpuhw->sfb, hwc); } + /* Rate may be adjusted with ioctl() */ + cpuhw->lsctl.interval = SAMPL_RATE(&cpuhw->event->hw); } /* (Re)enable the PMU and sampling facility */ @@ -914,7 +971,7 @@ static void cpumsf_pmu_enable(struct pmu *pmu) err = lsctl(&cpuhw->lsctl); if (err) { cpuhw->flags &= ~PMU_F_ENABLED; - pr_err("Loading sampling controls failed: op=%i err=%i\n", + pr_err("Loading sampling controls failed: op %i err %i\n", 1, err); return; } @@ -922,10 +979,11 @@ static void cpumsf_pmu_enable(struct pmu *pmu) /* Load current program parameter */ lpp(&S390_lowcore.lpp); - debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i ed=%i cd=%i " - "tear=%p dear=%p\n", cpuhw->lsctl.es, cpuhw->lsctl.cs, - cpuhw->lsctl.ed, cpuhw->lsctl.cd, - (void *) cpuhw->lsctl.tear, (void *) cpuhw->lsctl.dear); + debug_sprintf_event(sfdbg, 6, "%s: es %i cs %i ed %i cd %i " + "interval %#lx tear %#lx dear %#lx\n", __func__, + cpuhw->lsctl.es, cpuhw->lsctl.cs, cpuhw->lsctl.ed, + cpuhw->lsctl.cd, cpuhw->lsctl.interval, + cpuhw->lsctl.tear, cpuhw->lsctl.dear); } static void cpumsf_pmu_disable(struct pmu *pmu) @@ -948,13 +1006,14 @@ static void cpumsf_pmu_disable(struct pmu *pmu) err = lsctl(&inactive); if (err) { - pr_err("Loading sampling controls failed: op=%i err=%i\n", + pr_err("Loading sampling controls failed: op %i err %i\n", 2, err); return; } /* Save state of TEAR and DEAR register contents */ - if (!qsi(&si)) { + err = qsi(&si); + if (!err) { /* TEAR/DEAR values are valid only if the sampling facility is * enabled. Note that cpumsf_pmu_disable() might be called even * for a disabled sampling facility because cpumsf_pmu_enable() @@ -965,8 +1024,8 @@ static void cpumsf_pmu_disable(struct pmu *pmu) cpuhw->lsctl.dear = si.dear; } } else - debug_sprintf_event(sfdbg, 3, "cpumsf_pmu_disable: " - "qsi() failed with err=%i\n", err); + debug_sprintf_event(sfdbg, 3, "%s: qsi() failed with err %i\n", + __func__, err); cpuhw->flags &= ~PMU_F_ENABLED; } @@ -1079,14 +1138,6 @@ static void perf_event_count_update(struct perf_event *event, u64 count) local64_add(count, &event->count); } -static void debug_sample_entry(struct hws_basic_entry *sample, - struct hws_trailer_entry *te) -{ - debug_sprintf_event(sfdbg, 4, "hw_collect_samples: Found unknown " - "sampling data entry: te->f=%i basic.def=%04x (%p)\n", - te->f, sample->def, sample); -} - /* hw_collect_samples() - Walk through a sample-data-block and collect samples * @event: The perf event * @sdbt: Sample-data-block table @@ -1140,7 +1191,11 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, /* Count discarded samples */ *overflow += 1; } else { - debug_sample_entry(sample, te); + debug_sprintf_event(sfdbg, 4, + "%s: Found unknown" + " sampling data entry: te->f %i" + " basic.def %#4x (%p)\n", __func__, + te->f, sample->def, sample); /* Sample slot is not yet written or other record. * * This condition can occur if the buffer was reused @@ -1215,9 +1270,9 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) sampl_overflow += te->overflow; /* Timestamps are valid for full sample-data-blocks only */ - debug_sprintf_event(sfdbg, 6, "hw_perf_event_update: sdbt=%p " - "overflow=%llu timestamp=0x%llx\n", - sdbt, te->overflow, + debug_sprintf_event(sfdbg, 6, "%s: sdbt %#lx " + "overflow %llu timestamp %#llx\n", + __func__, (unsigned long)sdbt, te->overflow, (te->f) ? trailer_timestamp(te) : 0ULL); /* Collect all samples from a single sample-data-block and @@ -1248,22 +1303,34 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) */ if (flush_all && done) break; - - /* If an event overflow happened, discard samples by - * processing any remaining sample-data-blocks. - */ - if (event_overflow) - flush_all = 1; } /* Account sample overflows in the event hardware structure */ if (sampl_overflow) OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) + sampl_overflow, 1 + num_sdb); + + /* Perf_event_overflow() and perf_event_account_interrupt() limit + * the interrupt rate to an upper limit. Roughly 1000 samples per + * task tick. + * Hitting this limit results in a large number + * of throttled REF_REPORT_THROTTLE entries and the samples + * are dropped. + * Slightly increase the interval to avoid hitting this limit. + */ + if (event_overflow) { + SAMPL_RATE(hwc) += DIV_ROUND_UP(SAMPL_RATE(hwc), 10); + debug_sprintf_event(sfdbg, 1, "%s: rate adjustment %ld\n", + __func__, + DIV_ROUND_UP(SAMPL_RATE(hwc), 10)); + } + if (sampl_overflow || event_overflow) - debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: " - "overflow stats: sample=%llu event=%llu\n", - sampl_overflow, event_overflow); + debug_sprintf_event(sfdbg, 4, "%s: " + "overflows: sample %llu event %llu" + " total %llu num_sdb %llu\n", + __func__, sampl_overflow, event_overflow, + OVERFLOW_REG(hwc), num_sdb); } #define AUX_SDB_INDEX(aux, i) ((i) % aux->sfb.num_sdb) @@ -1316,7 +1383,8 @@ static void aux_output_end(struct perf_output_handle *handle) te = aux_sdb_trailer(aux, aux->alert_mark); te->flags &= ~SDB_TE_ALERT_REQ_MASK; - debug_sprintf_event(sfdbg, 6, "aux_output_end: collect %lx SDBs\n", i); + debug_sprintf_event(sfdbg, 6, "%s: SDBs %ld range %ld head %ld\n", + __func__, i, range_scan, aux->head); } /* @@ -1349,13 +1417,17 @@ static int aux_output_begin(struct perf_output_handle *handle, * SDBs between aux->head and aux->empty_mark are already ready * for new data. range_scan is num of SDBs not within them. */ + debug_sprintf_event(sfdbg, 6, + "%s: range %ld head %ld alert %ld empty %ld\n", + __func__, range, aux->head, aux->alert_mark, + aux->empty_mark); if (range > AUX_SDB_NUM_EMPTY(aux)) { range_scan = range - AUX_SDB_NUM_EMPTY(aux); idx = aux->empty_mark + 1; for (i = 0; i < range_scan; i++, idx++) { te = aux_sdb_trailer(aux, idx); - te->flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK; - te->flags = te->flags & ~SDB_TE_ALERT_REQ_MASK; + te->flags &= ~(SDB_TE_BUFFER_FULL_MASK | + SDB_TE_ALERT_REQ_MASK); te->overflow = 0; } /* Save the position of empty SDBs */ @@ -1374,15 +1446,11 @@ static int aux_output_begin(struct perf_output_handle *handle, cpuhw->lsctl.tear = base + offset * sizeof(unsigned long); cpuhw->lsctl.dear = aux->sdb_index[head]; - debug_sprintf_event(sfdbg, 6, "aux_output_begin: " - "head->alert_mark->empty_mark (num_alert, range)" - "[%lx -> %lx -> %lx] (%lx, %lx) " - "tear index %lx, tear %lx dear %lx\n", + debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld empty %ld " + "index %ld tear %#lx dear %#lx\n", __func__, aux->head, aux->alert_mark, aux->empty_mark, - AUX_SDB_NUM_ALERT(aux), range, head / CPUM_SF_SDB_PER_TABLE, - cpuhw->lsctl.tear, - cpuhw->lsctl.dear); + cpuhw->lsctl.tear, cpuhw->lsctl.dear); return 0; } @@ -1402,8 +1470,7 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index, te = aux_sdb_trailer(aux, alert_index); do { orig_flags = te->flags; - orig_overflow = te->overflow; - *overflow = orig_overflow; + *overflow = orig_overflow = te->overflow; if (orig_flags & SDB_TE_BUFFER_FULL_MASK) { /* * SDB is already set by hardware. @@ -1445,9 +1512,12 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range, unsigned long long *overflow) { unsigned long long orig_overflow, orig_flags, new_flags; - unsigned long i, range_scan, idx; + unsigned long i, range_scan, idx, idx_old; struct hws_trailer_entry *te; + debug_sprintf_event(sfdbg, 6, "%s: range %ld head %ld alert %ld " + "empty %ld\n", __func__, range, aux->head, + aux->alert_mark, aux->empty_mark); if (range <= AUX_SDB_NUM_EMPTY(aux)) /* * No need to scan. All SDBs in range are marked as empty. @@ -1470,7 +1540,7 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range, * indicator fall into this range, set it. */ range_scan = range - AUX_SDB_NUM_EMPTY(aux); - idx = aux->empty_mark + 1; + idx_old = idx = aux->empty_mark + 1; for (i = 0; i < range_scan; i++, idx++) { te = aux_sdb_trailer(aux, idx); do { @@ -1490,6 +1560,9 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range, /* Update empty_mark to new position */ aux->empty_mark = aux->head + range - 1; + debug_sprintf_event(sfdbg, 6, "%s: range_scan %ld idx %ld..%ld " + "empty %ld\n", __func__, range_scan, idx_old, + idx - 1, aux->empty_mark); return true; } @@ -1503,7 +1576,6 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw) unsigned long range = 0, size; unsigned long long overflow = 0; struct perf_output_handle *handle = &cpuhw->handle; - unsigned long num_sdb; aux = perf_get_aux(handle); if (WARN_ON_ONCE(!aux)) @@ -1511,8 +1583,9 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw) /* Inform user space new data arrived */ size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT; + debug_sprintf_event(sfdbg, 6, "%s: #alert %ld\n", __func__, + size >> PAGE_SHIFT); perf_aux_output_end(handle, size); - num_sdb = aux->sfb.num_sdb; while (!done) { /* Get an output handle */ @@ -1520,8 +1593,10 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw) if (handle->size == 0) { pr_err("The AUX buffer with %lu pages for the " "diagnostic-sampling mode is full\n", - num_sdb); - debug_sprintf_event(sfdbg, 1, "AUX buffer used up\n"); + aux->sfb.num_sdb); + debug_sprintf_event(sfdbg, 1, + "%s: AUX buffer used up\n", + __func__); break; } if (WARN_ON_ONCE(!aux)) @@ -1543,24 +1618,24 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw) size = range << PAGE_SHIFT; perf_aux_output_end(&cpuhw->handle, size); pr_err("Sample data caused the AUX buffer with %lu " - "pages to overflow\n", num_sdb); - debug_sprintf_event(sfdbg, 1, "head %lx range %lx " - "overflow %llx\n", + "pages to overflow\n", aux->sfb.num_sdb); + debug_sprintf_event(sfdbg, 1, "%s: head %ld range %ld " + "overflow %lld\n", __func__, aux->head, range, overflow); } else { size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT; perf_aux_output_end(&cpuhw->handle, size); - debug_sprintf_event(sfdbg, 6, "head %lx alert %lx " + debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld " "already full, try another\n", + __func__, aux->head, aux->alert_mark); } } if (done) - debug_sprintf_event(sfdbg, 6, "aux_reset_buffer: " - "[%lx -> %lx -> %lx] (%lx, %lx)\n", - aux->head, aux->alert_mark, aux->empty_mark, - AUX_SDB_NUM_ALERT(aux), range); + debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld " + "empty %ld\n", __func__, aux->head, + aux->alert_mark, aux->empty_mark); } /* @@ -1583,8 +1658,7 @@ static void aux_buffer_free(void *data) kfree(aux->sdb_index); kfree(aux); - debug_sprintf_event(sfdbg, 4, "aux_buffer_free: free " - "%lu SDBTs\n", num_sdbt); + debug_sprintf_event(sfdbg, 4, "%s: SDBTs %lu\n", __func__, num_sdbt); } static void aux_sdb_init(unsigned long sdb) @@ -1636,13 +1710,13 @@ static void *aux_buffer_setup(struct perf_event *event, void **pages, } /* Allocate aux_buffer struct for the event */ - aux = kmalloc(sizeof(struct aux_buffer), GFP_KERNEL); + aux = kzalloc(sizeof(struct aux_buffer), GFP_KERNEL); if (!aux) goto no_aux; sfb = &aux->sfb; /* Allocate sdbt_index for fast reference */ - n_sdbt = (nr_pages + CPUM_SF_SDB_PER_TABLE - 1) / CPUM_SF_SDB_PER_TABLE; + n_sdbt = DIV_ROUND_UP(nr_pages, CPUM_SF_SDB_PER_TABLE); aux->sdbt_index = kmalloc_array(n_sdbt, sizeof(void *), GFP_KERNEL); if (!aux->sdbt_index) goto no_sdbt_index; @@ -1692,8 +1766,7 @@ static void *aux_buffer_setup(struct perf_event *event, void **pages, */ aux->empty_mark = sfb->num_sdb - 1; - debug_sprintf_event(sfdbg, 4, "aux_buffer_setup: setup %lu SDBTs" - " and %lu SDBs\n", + debug_sprintf_event(sfdbg, 4, "%s: SDBTs %lu SDBs %lu\n", __func__, sfb->num_sdbt, sfb->num_sdb); return aux; @@ -1716,6 +1789,44 @@ static void cpumsf_pmu_read(struct perf_event *event) /* Nothing to do ... updates are interrupt-driven */ } +/* Check if the new sampling period/freqeuncy is appropriate. + * + * Return non-zero on error and zero on passed checks. + */ +static int cpumsf_pmu_check_period(struct perf_event *event, u64 value) +{ + struct hws_qsi_info_block si; + unsigned long rate; + bool do_freq; + + memset(&si, 0, sizeof(si)); + if (event->cpu == -1) { + if (qsi(&si)) + return -ENODEV; + } else { + /* Event is pinned to a particular CPU, retrieve the per-CPU + * sampling structure for accessing the CPU-specific QSI. + */ + struct cpu_hw_sf *cpuhw = &per_cpu(cpu_hw_sf, event->cpu); + + si = cpuhw->qsi; + } + + do_freq = !!SAMPLE_FREQ_MODE(&event->hw); + rate = getrate(do_freq, value, &si); + if (!rate) + return -EINVAL; + + event->attr.sample_period = rate; + SAMPL_RATE(&event->hw) = rate; + hw_init_period(&event->hw, SAMPL_RATE(&event->hw)); + debug_sprintf_event(sfdbg, 4, "%s:" + " cpu %d value %#llx period %#llx freq %d\n", + __func__, event->cpu, value, + event->attr.sample_period, do_freq); + return 0; +} + /* Activate sampling control. * Next call of pmu_enable() starts sampling. */ @@ -1787,7 +1898,7 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags) if (!SAMPL_DIAG_MODE(&event->hw)) { cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt; cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt; - hw_reset_registers(&event->hw, cpuhw->sfb.sdbt); + TEAR_REG(&event->hw) = (unsigned long) cpuhw->sfb.sdbt; } /* Ensure sampling functions are in the disabled state. If disabled, @@ -1879,10 +1990,12 @@ static struct attribute_group cpumsf_pmu_events_group = { .name = "events", .attrs = cpumsf_pmu_events_attr, }; + static struct attribute_group cpumsf_pmu_format_group = { .name = "format", .attrs = cpumsf_pmu_format_attr, }; + static const struct attribute_group *cpumsf_pmu_attr_groups[] = { &cpumsf_pmu_events_group, &cpumsf_pmu_format_group, @@ -1905,6 +2018,8 @@ static struct pmu cpumf_sampling = { .setup_aux = aux_buffer_setup, .free_aux = aux_buffer_free, + + .check_period = cpumsf_pmu_check_period, }; static void cpumf_measurement_alert(struct ext_code ext_code, @@ -1938,7 +2053,8 @@ static void cpumf_measurement_alert(struct ext_code ext_code, /* Report measurement alerts only for non-PRA codes */ if (alert != CPU_MF_INT_SF_PRA) - debug_sprintf_event(sfdbg, 6, "measurement alert: 0x%x\n", alert); + debug_sprintf_event(sfdbg, 6, "%s: alert %#x\n", __func__, + alert); /* Sampling authorization change request */ if (alert & CPU_MF_INT_SF_SACA) @@ -1959,6 +2075,7 @@ static void cpumf_measurement_alert(struct ext_code ext_code, sf_disable(); } } + static int cpusf_pmu_setup(unsigned int cpu, int flags) { /* Ignore the notification if no events are scheduled on the PMU. @@ -2015,7 +2132,7 @@ static int param_set_sfb_size(const char *val, const struct kernel_param *kp) sfb_set_limits(min, max); pr_info("The sampling buffer limits have changed to: " - "min=%lu max=%lu (diag=x%lu)\n", + "min %lu max %lu (diag %lu)\n", CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB, CPUM_SF_SDB_DIAG_FACTOR); return 0; } @@ -2033,7 +2150,7 @@ static const struct kernel_param_ops param_ops_sfb_size = { static void __init pr_cpumsf_err(unsigned int reason) { pr_err("Sampling facility support for perf is not available: " - "reason=%04x\n", reason); + "reason %#x\n", reason); } static int __init init_cpum_sampling_pmu(void) @@ -2096,5 +2213,6 @@ static int __init init_cpum_sampling_pmu(void) out: return err; } + arch_initcall(init_cpum_sampling_pmu); core_param(cpum_sfb_size, CPUM_SF_MAX_SDB, sfb_size, 0640); diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index fcb6c2e92b07..1e75cc983546 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -224,9 +224,13 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { struct unwind_state state; + unsigned long addr; - unwind_for_each_frame(&state, current, regs, 0) - perf_callchain_store(entry, state.ip); + unwind_for_each_frame(&state, current, regs, 0) { + addr = unwind_get_return_address(&state); + if (!addr || perf_callchain_store(entry, addr)) + return; + } } /* Perf definitions for PMU event attributes in sysfs */ diff --git a/arch/s390/kernel/pgm_check.S b/arch/s390/kernel/pgm_check.S index 59dee9d3bebf..eee3a482195a 100644 --- a/arch/s390/kernel/pgm_check.S +++ b/arch/s390/kernel/pgm_check.S @@ -81,7 +81,7 @@ PGM_CHECK_DEFAULT /* 3c */ PGM_CHECK_DEFAULT /* 3d */ PGM_CHECK_DEFAULT /* 3e */ PGM_CHECK_DEFAULT /* 3f */ -PGM_CHECK_DEFAULT /* 40 */ +PGM_CHECK(monitor_event_exception) /* 40 */ PGM_CHECK_DEFAULT /* 41 */ PGM_CHECK_DEFAULT /* 42 */ PGM_CHECK_DEFAULT /* 43 */ diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 63873aa6693f..6ccef5f29761 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -40,6 +40,7 @@ #include <asm/stacktrace.h> #include <asm/switch_to.h> #include <asm/runtime_instr.h> +#include <asm/unwind.h> #include "entry.h" asmlinkage void ret_from_fork(void) asm ("ret_from_fork"); @@ -178,26 +179,31 @@ EXPORT_SYMBOL(dump_fpu); unsigned long get_wchan(struct task_struct *p) { - struct stack_frame *sf, *low, *high; - unsigned long return_address; - int count; + struct unwind_state state; + unsigned long ip = 0; if (!p || p == current || p->state == TASK_RUNNING || !task_stack_page(p)) return 0; - low = task_stack_page(p); - high = (struct stack_frame *) task_pt_regs(p); - sf = (struct stack_frame *) p->thread.ksp; - if (sf <= low || sf > high) + + if (!try_get_task_stack(p)) return 0; - for (count = 0; count < 16; count++) { - sf = (struct stack_frame *) sf->back_chain; - if (sf <= low || sf > high) - return 0; - return_address = sf->gprs[8]; - if (!in_sched_functions(return_address)) - return return_address; + + unwind_for_each_frame(&state, p, NULL, 0) { + if (state.stack_info.type != STACK_TYPE_TASK) { + ip = 0; + break; + } + + ip = unwind_get_return_address(&state); + if (!ip) + break; + + if (!in_sched_functions(ip)) + break; } - return 0; + + put_task_stack(p); + return ip; } unsigned long arch_align_stack(unsigned long sp) diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index ad71132374f0..58faa12542a1 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -856,7 +856,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs) } /* Do the secure computing check after ptrace. */ - if (secure_computing(NULL)) { + if (secure_computing()) { /* seccomp failures shouldn't expose any additional code. */ return -1; } diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 253177900950..b2c2f75860e8 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -99,6 +99,7 @@ int __bootdata_preserved(prot_virt_guest); int __bootdata(noexec_disabled); int __bootdata(memory_end_set); unsigned long __bootdata(memory_end); +unsigned long __bootdata(vmalloc_size); unsigned long __bootdata(max_physmem_end); struct mem_detect_info __bootdata(mem_detect); @@ -110,6 +111,8 @@ unsigned long __bootdata_preserved(__etext_dma); unsigned long __bootdata_preserved(__sdma); unsigned long __bootdata_preserved(__edma); unsigned long __bootdata_preserved(__kaslr_offset); +unsigned int __bootdata_preserved(zlib_dfltcc_support); +EXPORT_SYMBOL(zlib_dfltcc_support); unsigned long VMALLOC_START; EXPORT_SYMBOL(VMALLOC_START); @@ -168,15 +171,15 @@ static void __init set_preferred_console(void) static int __init conmode_setup(char *str) { #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE) - if (strncmp(str, "hwc", 4) == 0 || strncmp(str, "sclp", 5) == 0) + if (!strcmp(str, "hwc") || !strcmp(str, "sclp")) SET_CONSOLE_SCLP; #endif #if defined(CONFIG_TN3215_CONSOLE) - if (strncmp(str, "3215", 5) == 0) + if (!strcmp(str, "3215")) SET_CONSOLE_3215; #endif #if defined(CONFIG_TN3270_CONSOLE) - if (strncmp(str, "3270", 5) == 0) + if (!strcmp(str, "3270")) SET_CONSOLE_3270; #endif set_preferred_console(); @@ -211,7 +214,7 @@ static void __init conmode_default(void) #endif return; } - if (strncmp(ptr + 8, "3270", 4) == 0) { + if (str_has_prefix(ptr + 8, "3270")) { #if defined(CONFIG_TN3270_CONSOLE) SET_CONSOLE_3270; #elif defined(CONFIG_TN3215_CONSOLE) @@ -219,7 +222,7 @@ static void __init conmode_default(void) #elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE) SET_CONSOLE_SCLP; #endif - } else if (strncmp(ptr + 8, "3215", 4) == 0) { + } else if (str_has_prefix(ptr + 8, "3215")) { #if defined(CONFIG_TN3215_CONSOLE) SET_CONSOLE_3215; #elif defined(CONFIG_TN3270_CONSOLE) @@ -240,8 +243,6 @@ static void __init conmode_default(void) SET_CONSOLE_SCLP; #endif } - if (IS_ENABLED(CONFIG_VT) && IS_ENABLED(CONFIG_DUMMY_CONSOLE)) - conswitchp = &dummy_con; } #ifdef CONFIG_CRASH_DUMP @@ -302,15 +303,6 @@ void machine_power_off(void) void (*pm_power_off)(void) = machine_power_off; EXPORT_SYMBOL_GPL(pm_power_off); -static int __init parse_vmalloc(char *arg) -{ - if (!arg) - return -EINVAL; - VMALLOC_END = (memparse(arg, &arg) + PAGE_SIZE - 1) & PAGE_MASK; - return 0; -} -early_param("vmalloc", parse_vmalloc); - void *restart_stack __section(.data); unsigned long stack_alloc(void) @@ -363,7 +355,6 @@ early_initcall(async_stack_realloc); void __init arch_call_rest_init(void) { - struct stack_frame *frame; unsigned long stack; stack = stack_alloc(); @@ -376,13 +367,7 @@ void __init arch_call_rest_init(void) set_task_stack_end_magic(current); stack += STACK_INIT_OFFSET; S390_lowcore.kernel_stack = stack; - frame = (struct stack_frame *) stack; - memset(frame, 0, sizeof(*frame)); - /* Branch to rest_init on the new stack, never returns */ - asm volatile( - " la 15,0(%[_frame])\n" - " jg rest_init\n" - : : [_frame] "a" (frame)); + CALL_ON_STACK_NORETURN(rest_init, stack); } static void __init setup_lowcore_dat_off(void) @@ -563,10 +548,9 @@ static void __init setup_resources(void) static void __init setup_memory_end(void) { - unsigned long vmax, vmalloc_size, tmp; + unsigned long vmax, tmp; /* Choose kernel address space layout: 3 or 4 levels. */ - vmalloc_size = VMALLOC_END ?: (128UL << 30) - MODULES_LEN; if (IS_ENABLED(CONFIG_KASAN)) { vmax = IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING) ? _REGION1_SIZE @@ -777,14 +761,6 @@ static void __init free_mem_detect_info(void) memblock_free(start, size); } -static void __init memblock_physmem_add(phys_addr_t start, phys_addr_t size) -{ - memblock_dbg("memblock_physmem_add: [%#016llx-%#016llx]\n", - start, start + size - 1); - memblock_add_range(&memblock.memory, start, size, 0, 0); - memblock_add_range(&memblock.physmem, start, size, 0, 0); -} - static const char * __init get_mem_info_source(void) { switch (mem_detect.info_source) { @@ -809,8 +785,10 @@ static void __init memblock_add_mem_detect_info(void) get_mem_info_source(), mem_detect.info_source); /* keep memblock lists close to the kernel */ memblock_set_bottom_up(true); - for_each_mem_detect_block(i, &start, &end) + for_each_mem_detect_block(i, &start, &end) { + memblock_add(start, end - start); memblock_physmem_add(start, end - start); + } memblock_set_bottom_up(false); memblock_dump_all(); } @@ -990,6 +968,10 @@ static int __init setup_hwcaps(void) case 0x3907: strcpy(elf_platform, "z14"); break; + case 0x8561: + case 0x8562: + strcpy(elf_platform, "z15"); + break; } /* @@ -1064,7 +1046,7 @@ static void __init log_component_list(void) if (!early_ipl_comp_list_addr) return; - if (ipl_block.hdr.flags & IPL_PL_FLAG_IPLSR) + if (ipl_block.hdr.flags & IPL_PL_FLAG_SIPL) pr_info("Linux is running with Secure-IPL enabled\n"); else pr_info("Linux is running with Secure-IPL disabled\n"); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 44974654cbd0..a08bd2522dd9 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -262,10 +262,13 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) lc->spinlock_index = 0; lc->percpu_offset = __per_cpu_offset[cpu]; lc->kernel_asce = S390_lowcore.kernel_asce; + lc->user_asce = S390_lowcore.kernel_asce; lc->machine_flags = S390_lowcore.machine_flags; lc->user_timer = lc->system_timer = lc->steal_timer = lc->avg_steal_timer = 0; __ctl_store(lc->cregs_save_area, 0, 15); + lc->cregs_save_area[1] = lc->kernel_asce; + lc->cregs_save_area[7] = lc->vdso_asce; save_access_regs((unsigned int *) lc->access_regs_save_area); memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, sizeof(lc->stfle_fac_list)); @@ -410,14 +413,11 @@ EXPORT_SYMBOL(arch_vcpu_is_preempted); void smp_yield_cpu(int cpu) { - if (MACHINE_HAS_DIAG9C) { - diag_stat_inc_norecursion(DIAG_STAT_X09C); - asm volatile("diag %0,0,0x9c" - : : "d" (pcpu_devices[cpu].address)); - } else if (MACHINE_HAS_DIAG44 && !smp_cpu_mtid) { - diag_stat_inc_norecursion(DIAG_STAT_X044); - asm volatile("diag 0,0,0x44"); - } + if (!MACHINE_HAS_DIAG9C) + return; + diag_stat_inc_norecursion(DIAG_STAT_X09C); + asm volatile("diag %0,0,0x9c" + : : "d" (pcpu_devices[cpu].address)); } /* @@ -724,39 +724,67 @@ static void __ref smp_get_core_info(struct sclp_core_info *info, int early) static int smp_add_present_cpu(int cpu); -static int __smp_rescan_cpus(struct sclp_core_info *info, int sysfs_add) +static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail, + bool configured, bool early) { struct pcpu *pcpu; - cpumask_t avail; - int cpu, nr, i, j; + int cpu, nr, i; u16 address; nr = 0; - cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask); - cpu = cpumask_first(&avail); - for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) { - if (sclp.has_core_type && info->core[i].type != boot_core_type) + if (sclp.has_core_type && core->type != boot_core_type) + return nr; + cpu = cpumask_first(avail); + address = core->core_id << smp_cpu_mt_shift; + for (i = 0; (i <= smp_cpu_mtid) && (cpu < nr_cpu_ids); i++) { + if (pcpu_find_address(cpu_present_mask, address + i)) continue; - address = info->core[i].core_id << smp_cpu_mt_shift; - for (j = 0; j <= smp_cpu_mtid; j++) { - if (pcpu_find_address(cpu_present_mask, address + j)) - continue; - pcpu = pcpu_devices + cpu; - pcpu->address = address + j; - pcpu->state = - (cpu >= info->configured*(smp_cpu_mtid + 1)) ? - CPU_STATE_STANDBY : CPU_STATE_CONFIGURED; - smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); - set_cpu_present(cpu, true); - if (sysfs_add && smp_add_present_cpu(cpu) != 0) - set_cpu_present(cpu, false); - else - nr++; - cpu = cpumask_next(cpu, &avail); - if (cpu >= nr_cpu_ids) + pcpu = pcpu_devices + cpu; + pcpu->address = address + i; + if (configured) + pcpu->state = CPU_STATE_CONFIGURED; + else + pcpu->state = CPU_STATE_STANDBY; + smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + set_cpu_present(cpu, true); + if (!early && smp_add_present_cpu(cpu) != 0) + set_cpu_present(cpu, false); + else + nr++; + cpumask_clear_cpu(cpu, avail); + cpu = cpumask_next(cpu, avail); + } + return nr; +} + +static int __smp_rescan_cpus(struct sclp_core_info *info, bool early) +{ + struct sclp_core_entry *core; + cpumask_t avail; + bool configured; + u16 core_id; + int nr, i; + + nr = 0; + cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask); + /* + * Add IPL core first (which got logical CPU number 0) to make sure + * that all SMT threads get subsequent logical CPU numbers. + */ + if (early) { + core_id = pcpu_devices[0].address >> smp_cpu_mt_shift; + for (i = 0; i < info->configured; i++) { + core = &info->core[i]; + if (core->core_id == core_id) { + nr += smp_add_core(core, &avail, true, early); break; + } } } + for (i = 0; i < info->combined; i++) { + configured = i < info->configured; + nr += smp_add_core(&info->core[i], &avail, configured, early); + } return nr; } @@ -805,7 +833,7 @@ void __init smp_detect_cpus(void) /* Add CPUs present at boot */ get_online_cpus(); - __smp_rescan_cpus(info, 0); + __smp_rescan_cpus(info, true); put_online_cpus(); memblock_free_early((unsigned long)info, sizeof(*info)); } @@ -816,6 +844,8 @@ static void smp_init_secondary(void) S390_lowcore.last_update_clock = get_tod_clock(); restore_access_regs(S390_lowcore.access_regs_save_area); + set_cpu_flag(CIF_ASCE_PRIMARY); + set_cpu_flag(CIF_ASCE_SECONDARY); cpu_init(); preempt_disable(); init_cpu_timer(); @@ -843,7 +873,7 @@ static void __no_sanitize_address smp_start_secondary(void *cpuvoid) S390_lowcore.restart_source = -1UL; __ctl_load(S390_lowcore.cregs_save_area, 0, 15); __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT); - CALL_ON_STACK(smp_init_secondary, S390_lowcore.kernel_stack, 0); + CALL_ON_STACK_NORETURN(smp_init_secondary, S390_lowcore.kernel_stack); } /* Upping and downing of CPUs */ @@ -1148,7 +1178,7 @@ int __ref smp_rescan_cpus(void) smp_get_core_info(info, 0); get_online_cpus(); mutex_lock(&smp_cpu_state_mutex); - nr = __smp_rescan_cpus(info, 1); + nr = __smp_rescan_cpus(info, false); mutex_unlock(&smp_cpu_state_mutex); put_online_cpus(); kfree(info); diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index f6a620f854e1..fc5419ac64c8 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -6,57 +6,62 @@ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> */ -#include <linux/sched.h> -#include <linux/sched/debug.h> #include <linux/stacktrace.h> -#include <linux/kallsyms.h> -#include <linux/export.h> #include <asm/stacktrace.h> #include <asm/unwind.h> +#include <asm/kprobes.h> -void save_stack_trace(struct stack_trace *trace) +void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, + struct task_struct *task, struct pt_regs *regs) { struct unwind_state state; + unsigned long addr; - unwind_for_each_frame(&state, current, NULL, 0) { - if (trace->nr_entries >= trace->max_entries) + unwind_for_each_frame(&state, task, regs, 0) { + addr = unwind_get_return_address(&state); + if (!addr || !consume_entry(cookie, addr, false)) break; - if (trace->skip > 0) - trace->skip--; - else - trace->entries[trace->nr_entries++] = state.ip; } } -EXPORT_SYMBOL_GPL(save_stack_trace); -void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) +/* + * This function returns an error if it detects any unreliable features of the + * stack. Otherwise it guarantees that the stack trace is reliable. + * + * If the task is not 'current', the caller *must* ensure the task is inactive. + */ +int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, + void *cookie, struct task_struct *task) { struct unwind_state state; + unsigned long addr; - unwind_for_each_frame(&state, tsk, NULL, 0) { - if (trace->nr_entries >= trace->max_entries) - break; - if (in_sched_functions(state.ip)) - continue; - if (trace->skip > 0) - trace->skip--; - else - trace->entries[trace->nr_entries++] = state.ip; - } -} -EXPORT_SYMBOL_GPL(save_stack_trace_tsk); + unwind_for_each_frame(&state, task, NULL, 0) { + if (state.stack_info.type != STACK_TYPE_TASK) + return -EINVAL; -void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) -{ - struct unwind_state state; + if (state.regs) + return -EINVAL; - unwind_for_each_frame(&state, current, regs, 0) { - if (trace->nr_entries >= trace->max_entries) - break; - if (trace->skip > 0) - trace->skip--; - else - trace->entries[trace->nr_entries++] = state.ip; + addr = unwind_get_return_address(&state); + if (!addr) + return -EINVAL; + +#ifdef CONFIG_KPROBES + /* + * Mark stacktraces with kretprobed functions on them + * as unreliable. + */ + if (state.ip == (unsigned long)kretprobe_trampoline) + return -EINVAL; +#endif + + if (!consume_entry(cookie, addr, false)) + return -EINVAL; } + + /* Check for stack corruption */ + if (unwind_error(&state)) + return -EINVAL; + return 0; } -EXPORT_SYMBOL_GPL(save_stack_trace_regs); diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 3054e9c035a3..bd7bd3581a0f 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -438,3 +438,5 @@ 433 common fspick sys_fspick sys_fspick 434 common pidfd_open sys_pidfd_open sys_pidfd_open 435 common clone3 sys_clone3 sys_clone3 +437 common openat2 sys_openat2 sys_openat2 +438 common pidfd_getfd sys_pidfd_getfd sys_pidfd_getfd diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index e8766beee5ad..f9d070d016e3 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -110,15 +110,6 @@ unsigned long long notrace sched_clock(void) } NOKPROBE_SYMBOL(sched_clock); -/* - * Monotonic_clock - returns # of nanoseconds passed since time_init() - */ -unsigned long long monotonic_clock(void) -{ - return sched_clock(); -} -EXPORT_SYMBOL(monotonic_clock); - static void ext_to_timespec64(unsigned char *clk, struct timespec64 *xt) { unsigned long long high, low, rem, sec, nsec; diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 2db6fb405a9a..3627953007ed 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -311,7 +311,8 @@ int arch_update_cpu_topology(void) on_each_cpu(__arch_update_dedicated_flag, NULL, 0); for_each_online_cpu(cpu) { dev = get_cpu_device(cpu); - kobject_uevent(&dev->kobj, KOBJ_CHANGE); + if (dev) + kobject_uevent(&dev->kobj, KOBJ_CHANGE); } return rc; } diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 164c0282b41a..dc75588d7894 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -53,11 +53,6 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str) if (fixup) regs->psw.addr = extable_fixup(fixup); else { - enum bug_trap_type btt; - - btt = report_bug(regs->psw.addr, regs); - if (btt == BUG_TRAP_TYPE_WARN) - return; die(regs, str); } } @@ -245,6 +240,27 @@ void space_switch_exception(struct pt_regs *regs) do_trap(regs, SIGILL, ILL_PRVOPC, "space switch event"); } +void monitor_event_exception(struct pt_regs *regs) +{ + const struct exception_table_entry *fixup; + + if (user_mode(regs)) + return; + + switch (report_bug(regs->psw.addr - (regs->int_code >> 16), regs)) { + case BUG_TRAP_TYPE_NONE: + fixup = s390_search_extables(regs->psw.addr); + if (fixup) + regs->psw.addr = extable_fixup(fixup); + break; + case BUG_TRAP_TYPE_WARN: + break; + case BUG_TRAP_TYPE_BUG: + die(regs, "monitor event"); + break; + } +} + void kernel_stack_overflow(struct pt_regs *regs) { bust_spinlocks(1); @@ -255,8 +271,23 @@ void kernel_stack_overflow(struct pt_regs *regs) } NOKPROBE_SYMBOL(kernel_stack_overflow); +static void test_monitor_call(void) +{ + int val = 1; + + asm volatile( + " mc 0,0\n" + "0: xgr %0,%0\n" + "1:\n" + EX_TABLE(0b,1b) + : "+d" (val)); + if (!val) + panic("Monitor call doesn't work!\n"); +} + void __init trap_init(void) { sort_extable(__start_dma_ex_table, __stop_dma_ex_table); local_mcck_enable(); + test_monitor_call(); } diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c index 8fc9daae47a2..707fd99f6734 100644 --- a/arch/s390/kernel/unwind_bc.c +++ b/arch/s390/kernel/unwind_bc.c @@ -36,6 +36,19 @@ static bool update_stack_info(struct unwind_state *state, unsigned long sp) return true; } +static inline bool is_final_pt_regs(struct unwind_state *state, + struct pt_regs *regs) +{ + /* user mode or kernel thread pt_regs at the bottom of task stack */ + if (task_pt_regs(state->task) == regs) + return true; + + /* user mode pt_regs at the bottom of irq stack */ + return state->stack_info.type == STACK_TYPE_IRQ && + state->stack_info.end - sizeof(struct pt_regs) == (unsigned long)regs && + READ_ONCE_NOCHECK(regs->psw.mask) & PSW_MASK_PSTATE; +} + bool unwind_next_frame(struct unwind_state *state) { struct stack_info *info = &state->stack_info; @@ -46,15 +59,16 @@ bool unwind_next_frame(struct unwind_state *state) regs = state->regs; if (unlikely(regs)) { - sp = READ_ONCE_NOCHECK(regs->gprs[15]); - if (unlikely(outside_of_stack(state, sp))) { - if (!update_stack_info(state, sp)) - goto out_err; - } + sp = state->sp; sf = (struct stack_frame *) sp; ip = READ_ONCE_NOCHECK(sf->gprs[8]); reliable = false; regs = NULL; + if (!__kernel_text_address(ip)) { + /* skip bogus %r14 */ + state->regs = NULL; + return unwind_next_frame(state); + } } else { sf = (struct stack_frame *) state->sp; sp = READ_ONCE_NOCHECK(sf->back_chain); @@ -71,21 +85,25 @@ bool unwind_next_frame(struct unwind_state *state) /* No back-chain, look for a pt_regs structure */ sp = state->sp + STACK_FRAME_OVERHEAD; if (!on_stack(info, sp, sizeof(struct pt_regs))) - goto out_stop; + goto out_err; regs = (struct pt_regs *) sp; - if (READ_ONCE_NOCHECK(regs->psw.mask) & PSW_MASK_PSTATE) + if (is_final_pt_regs(state, regs)) goto out_stop; ip = READ_ONCE_NOCHECK(regs->psw.addr); + sp = READ_ONCE_NOCHECK(regs->gprs[15]); + if (unlikely(outside_of_stack(state, sp))) { + if (!update_stack_info(state, sp)) + goto out_err; + } reliable = true; } } -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - /* Decode any ftrace redirection */ - if (ip == (unsigned long) return_to_handler) - ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, - ip, (void *) sp); -#endif + /* Sanity check: ABI requires SP to be aligned 8 bytes. */ + if (sp & 0x7) + goto out_err; + + ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, (void *) sp); /* Update unwind state */ state->sp = sp; @@ -103,13 +121,11 @@ out_stop: EXPORT_SYMBOL_GPL(unwind_next_frame); void __unwind_start(struct unwind_state *state, struct task_struct *task, - struct pt_regs *regs, unsigned long sp) + struct pt_regs *regs, unsigned long first_frame) { struct stack_info *info = &state->stack_info; - unsigned long *mask = &state->stack_mask; struct stack_frame *sf; - unsigned long ip; - bool reliable; + unsigned long ip, sp; memset(state, 0, sizeof(*state)); state->task = task; @@ -121,35 +137,46 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, return; } + /* Get the instruction pointer from pt_regs or the stack frame */ + if (regs) { + ip = regs->psw.addr; + sp = regs->gprs[15]; + } else if (task == current) { + sp = current_frame_address(); + } else { + sp = task->thread.ksp; + } + /* Get current stack pointer and initialize stack info */ - if (get_stack_info(sp, task, info, mask) != 0 || - !on_stack(info, sp, sizeof(struct stack_frame))) { + if (!update_stack_info(state, sp)) { /* Something is wrong with the stack pointer */ info->type = STACK_TYPE_UNKNOWN; state->error = true; return; } - /* Get the instruction pointer from pt_regs or the stack frame */ - if (regs) { - ip = READ_ONCE_NOCHECK(regs->psw.addr); - reliable = true; - } else { - sf = (struct stack_frame *) sp; + if (!regs) { + /* Stack frame is within valid stack */ + sf = (struct stack_frame *)sp; ip = READ_ONCE_NOCHECK(sf->gprs[8]); - reliable = false; } -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - /* Decode any ftrace redirection */ - if (ip == (unsigned long) return_to_handler) - ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, - ip, NULL); -#endif + ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, NULL); /* Update unwind state */ state->sp = sp; state->ip = ip; - state->reliable = reliable; + state->reliable = true; + + if (!first_frame) + return; + /* Skip through the call chain to the specified starting frame */ + while (!unwind_done(state)) { + if (on_stack(&state->stack_info, first_frame, sizeof(struct stack_frame))) { + if (state->sp >= first_frame) + break; + } + unwind_next_frame(state); + } } EXPORT_SYMBOL_GPL(__unwind_start); diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index c6bc190f3c28..bcc9bdb39ba2 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -29,13 +29,6 @@ #include <asm/vdso.h> #include <asm/facility.h> -#ifdef CONFIG_COMPAT_VDSO -extern char vdso32_start, vdso32_end; -static void *vdso32_kbase = &vdso32_start; -static unsigned int vdso32_pages; -static struct page **vdso32_pagelist; -#endif - extern char vdso64_start, vdso64_end; static void *vdso64_kbase = &vdso64_start; static unsigned int vdso64_pages; @@ -55,12 +48,6 @@ static vm_fault_t vdso_fault(const struct vm_special_mapping *sm, vdso_pagelist = vdso64_pagelist; vdso_pages = vdso64_pages; -#ifdef CONFIG_COMPAT_VDSO - if (vma->vm_mm->context.compat_mm) { - vdso_pagelist = vdso32_pagelist; - vdso_pages = vdso32_pages; - } -#endif if (vmf->pgoff >= vdso_pages) return VM_FAULT_SIGBUS; @@ -76,10 +63,6 @@ static int vdso_mremap(const struct vm_special_mapping *sm, unsigned long vdso_pages; vdso_pages = vdso64_pages; -#ifdef CONFIG_COMPAT_VDSO - if (vma->vm_mm->context.compat_mm) - vdso_pages = vdso32_pages; -#endif if ((vdso_pages << PAGE_SHIFT) != vma->vm_end - vma->vm_start) return -EINVAL; @@ -97,21 +80,13 @@ static const struct vm_special_mapping vdso_mapping = { .mremap = vdso_mremap, }; -static int __init vdso_setup(char *s) +static int __init vdso_setup(char *str) { - unsigned long val; - int rc; + bool enabled; - rc = 0; - if (strncmp(s, "on", 3) == 0) - vdso_enabled = 1; - else if (strncmp(s, "off", 4) == 0) - vdso_enabled = 0; - else { - rc = kstrtoul(s, 0, &val); - vdso_enabled = rc ? 0 : !!val; - } - return !rc; + if (!kstrtobool(str, &enabled)) + vdso_enabled = enabled; + return 1; } __setup("vdso=", vdso_setup); @@ -217,12 +192,10 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) if (!vdso_enabled) return 0; + if (is_compat_task()) + return 0; + vdso_pages = vdso64_pages; -#ifdef CONFIG_COMPAT_VDSO - mm->context.compat_mm = is_compat_task(); - if (mm->context.compat_mm) - vdso_pages = vdso32_pages; -#endif /* * vDSO has a problem and was disabled, just don't "enable" it for * the process @@ -275,23 +248,6 @@ static int __init vdso_init(void) int i; vdso_init_data(vdso_data); -#ifdef CONFIG_COMPAT_VDSO - /* Calculate the size of the 32 bit vDSO */ - vdso32_pages = ((&vdso32_end - &vdso32_start - + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1; - - /* Make sure pages are in the correct state */ - vdso32_pagelist = kcalloc(vdso32_pages + 1, sizeof(struct page *), - GFP_KERNEL); - BUG_ON(vdso32_pagelist == NULL); - for (i = 0; i < vdso32_pages - 1; i++) { - struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE); - get_page(pg); - vdso32_pagelist[i] = pg; - } - vdso32_pagelist[vdso32_pages - 1] = virt_to_page(vdso_data); - vdso32_pagelist[vdso32_pages] = NULL; -#endif /* Calculate the size of the 64 bit vDSO */ vdso64_pages = ((&vdso64_end - &vdso64_start diff --git a/arch/s390/kernel/vdso32/.gitignore b/arch/s390/kernel/vdso32/.gitignore deleted file mode 100644 index e45fba9d0ced..000000000000 --- a/arch/s390/kernel/vdso32/.gitignore +++ /dev/null @@ -1 +0,0 @@ -vdso32.lds diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile deleted file mode 100644 index aee9ffbccb54..000000000000 --- a/arch/s390/kernel/vdso32/Makefile +++ /dev/null @@ -1,66 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# List of files in the vdso, has to be asm only for now - -KCOV_INSTRUMENT := n - -obj-vdso32 = gettimeofday.o clock_getres.o clock_gettime.o note.o getcpu.o - -# Build rules - -targets := $(obj-vdso32) vdso32.so vdso32.so.dbg -obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) - -KBUILD_AFLAGS += -DBUILD_VDSO -KBUILD_CFLAGS += -DBUILD_VDSO - -KBUILD_AFLAGS_31 := $(filter-out -m64,$(KBUILD_AFLAGS)) -KBUILD_AFLAGS_31 += -m31 -s - -KBUILD_CFLAGS_31 := $(filter-out -m64,$(KBUILD_CFLAGS)) -KBUILD_CFLAGS_31 += -m31 -fPIC -shared -fno-common -fno-builtin -KBUILD_CFLAGS_31 += -nostdlib -Wl,-soname=linux-vdso32.so.1 \ - -Wl,--hash-style=both - -$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_31) -$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_31) - -obj-y += vdso32_wrapper.o -extra-y += vdso32.lds -CPPFLAGS_vdso32.lds += -P -C -U$(ARCH) - -# Disable gcov profiling, ubsan and kasan for VDSO code -GCOV_PROFILE := n -UBSAN_SANITIZE := n -KASAN_SANITIZE := n - -# Force dependency (incbin is bad) -$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so - -# link rule for the .so file, .lds has to be first -$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) FORCE - $(call if_changed,vdso32ld) - -# strip rule for the .so file -$(obj)/%.so: OBJCOPYFLAGS := -S -$(obj)/%.so: $(obj)/%.so.dbg FORCE - $(call if_changed,objcopy) - -# assembly rules for the .S files -$(obj-vdso32): %.o: %.S FORCE - $(call if_changed_dep,vdso32as) - -# actual build commands -quiet_cmd_vdso32ld = VDSO32L $@ - cmd_vdso32ld = $(CC) $(c_flags) -Wl,-T $(filter %.lds %.o,$^) -o $@ -quiet_cmd_vdso32as = VDSO32A $@ - cmd_vdso32as = $(CC) $(a_flags) -c -o $@ $< - -# install commands for the unstripped file -quiet_cmd_vdso_install = INSTALL $@ - cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ - -vdso32.so: $(obj)/vdso32.so.dbg - @mkdir -p $(MODLIB)/vdso - $(call cmd,vdso_install) - -vdso_install: vdso32.so diff --git a/arch/s390/kernel/vdso32/clock_getres.S b/arch/s390/kernel/vdso32/clock_getres.S deleted file mode 100644 index eaf9cf1417f6..000000000000 --- a/arch/s390/kernel/vdso32/clock_getres.S +++ /dev/null @@ -1,44 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Userland implementation of clock_getres() for 32 bits processes in a - * s390 kernel for use in the vDSO - * - * Copyright IBM Corp. 2008 - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - */ -#include <asm/vdso.h> -#include <asm/asm-offsets.h> -#include <asm/unistd.h> -#include <asm/dwarf.h> - - .text - .align 4 - .globl __kernel_clock_getres - .type __kernel_clock_getres,@function -__kernel_clock_getres: - CFI_STARTPROC - basr %r1,0 - la %r1,4f-.(%r1) - chi %r2,__CLOCK_REALTIME - je 0f - chi %r2,__CLOCK_MONOTONIC - je 0f - la %r1,5f-4f(%r1) - chi %r2,__CLOCK_REALTIME_COARSE - je 0f - chi %r2,__CLOCK_MONOTONIC_COARSE - jne 3f -0: ltr %r3,%r3 - jz 2f /* res == NULL */ -1: l %r0,0(%r1) - xc 0(4,%r3),0(%r3) /* set tp->tv_sec to zero */ - st %r0,4(%r3) /* store tp->tv_usec */ -2: lhi %r2,0 - br %r14 -3: lhi %r1,__NR_clock_getres /* fallback to svc */ - svc 0 - br %r14 - CFI_ENDPROC -4: .long __CLOCK_REALTIME_RES -5: .long __CLOCK_COARSE_RES - .size __kernel_clock_getres,.-__kernel_clock_getres diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S deleted file mode 100644 index ada5c11a16e5..000000000000 --- a/arch/s390/kernel/vdso32/clock_gettime.S +++ /dev/null @@ -1,179 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Userland implementation of clock_gettime() for 32 bits processes in a - * s390 kernel for use in the vDSO - * - * Copyright IBM Corp. 2008 - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - */ -#include <asm/vdso.h> -#include <asm/asm-offsets.h> -#include <asm/unistd.h> -#include <asm/dwarf.h> -#include <asm/ptrace.h> - - .text - .align 4 - .globl __kernel_clock_gettime - .type __kernel_clock_gettime,@function -__kernel_clock_gettime: - CFI_STARTPROC - ahi %r15,-16 - CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16 - CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD - basr %r5,0 -0: al %r5,21f-0b(%r5) /* get &_vdso_data */ - chi %r2,__CLOCK_REALTIME_COARSE - je 10f - chi %r2,__CLOCK_REALTIME - je 11f - chi %r2,__CLOCK_MONOTONIC_COARSE - je 9f - chi %r2,__CLOCK_MONOTONIC - jne 19f - - /* CLOCK_MONOTONIC */ -1: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ - tml %r4,0x0001 /* pending update ? loop */ - jnz 1b - stcke 0(%r15) /* Store TOD clock */ - lm %r0,%r1,1(%r15) - s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ - sl %r1,__VDSO_XTIME_STAMP+4(%r5) - brc 3,2f - ahi %r0,-1 -2: ms %r0,__VDSO_TK_MULT(%r5) /* * tk->mult */ - lr %r2,%r0 - l %r0,__VDSO_TK_MULT(%r5) - ltr %r1,%r1 - mr %r0,%r0 - jnm 3f - a %r0,__VDSO_TK_MULT(%r5) -3: alr %r0,%r2 - al %r0,__VDSO_WTOM_NSEC(%r5) - al %r1,__VDSO_WTOM_NSEC+4(%r5) - brc 12,5f - ahi %r0,1 -5: l %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */ - srdl %r0,0(%r2) /* >> tk->shift */ - l %r2,__VDSO_WTOM_SEC+4(%r5) - cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */ - jne 1b - basr %r5,0 -6: ltr %r0,%r0 - jnz 7f - cl %r1,20f-6b(%r5) - jl 8f -7: ahi %r2,1 - sl %r1,20f-6b(%r5) - brc 3,6b - ahi %r0,-1 - j 6b -8: st %r2,0(%r3) /* store tp->tv_sec */ - st %r1,4(%r3) /* store tp->tv_nsec */ - lhi %r2,0 - ahi %r15,16 - CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD - CFI_RESTORE 15 - br %r14 - - /* CLOCK_MONOTONIC_COARSE */ - CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16 - CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD -9: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ - tml %r4,0x0001 /* pending update ? loop */ - jnz 9b - l %r2,__VDSO_WTOM_CRS_SEC+4(%r5) - l %r1,__VDSO_WTOM_CRS_NSEC+4(%r5) - cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */ - jne 9b - j 8b - - /* CLOCK_REALTIME_COARSE */ -10: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ - tml %r4,0x0001 /* pending update ? loop */ - jnz 10b - l %r2,__VDSO_XTIME_CRS_SEC+4(%r5) - l %r1,__VDSO_XTIME_CRS_NSEC+4(%r5) - cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */ - jne 10b - j 17f - - /* CLOCK_REALTIME */ -11: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ - tml %r4,0x0001 /* pending update ? loop */ - jnz 11b - stcke 0(%r15) /* Store TOD clock */ - lm %r0,%r1,__VDSO_TS_END(%r5) /* TOD steering end time */ - s %r0,1(%r15) /* no - ts_steering_end */ - sl %r1,5(%r15) - brc 3,22f - ahi %r0,-1 -22: ltr %r0,%r0 /* past end of steering? */ - jm 24f - srdl %r0,15 /* 1 per 2^16 */ - tm __VDSO_TS_DIR+3(%r5),0x01 /* steering direction? */ - jz 23f - lcr %r0,%r0 /* negative TOD offset */ - lcr %r1,%r1 - je 23f - ahi %r0,-1 -23: a %r0,1(%r15) /* add TOD timestamp */ - al %r1,5(%r15) - brc 12,25f - ahi %r0,1 - j 25f -24: lm %r0,%r1,1(%r15) /* load TOD timestamp */ -25: s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ - sl %r1,__VDSO_XTIME_STAMP+4(%r5) - brc 3,12f - ahi %r0,-1 -12: ms %r0,__VDSO_TK_MULT(%r5) /* * tk->mult */ - lr %r2,%r0 - l %r0,__VDSO_TK_MULT(%r5) - ltr %r1,%r1 - mr %r0,%r0 - jnm 13f - a %r0,__VDSO_TK_MULT(%r5) -13: alr %r0,%r2 - al %r0,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */ - al %r1,__VDSO_XTIME_NSEC+4(%r5) - brc 12,14f - ahi %r0,1 -14: l %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */ - srdl %r0,0(%r2) /* >> tk->shift */ - l %r2,__VDSO_XTIME_SEC+4(%r5) - cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */ - jne 11b - basr %r5,0 -15: ltr %r0,%r0 - jnz 16f - cl %r1,20f-15b(%r5) - jl 17f -16: ahi %r2,1 - sl %r1,20f-15b(%r5) - brc 3,15b - ahi %r0,-1 - j 15b -17: st %r2,0(%r3) /* store tp->tv_sec */ - st %r1,4(%r3) /* store tp->tv_nsec */ - lhi %r2,0 - ahi %r15,16 - CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD - CFI_RESTORE 15 - br %r14 - - /* Fallback to system call */ - CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD+16 - CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD -19: lhi %r1,__NR_clock_gettime - svc 0 - ahi %r15,16 - CFI_DEF_CFA_OFFSET STACK_FRAME_OVERHEAD - CFI_RESTORE 15 - br %r14 - CFI_ENDPROC - -20: .long 1000000000 -21: .long _vdso_data - 0b - .size __kernel_clock_gettime,.-__kernel_clock_gettime diff --git a/arch/s390/kernel/vdso32/getcpu.S b/arch/s390/kernel/vdso32/getcpu.S deleted file mode 100644 index 25515f3fbcea..000000000000 --- a/arch/s390/kernel/vdso32/getcpu.S +++ /dev/null @@ -1,33 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Userland implementation of getcpu() for 32 bits processes in a - * s390 kernel for use in the vDSO - * - * Copyright IBM Corp. 2016 - * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> - */ -#include <asm/vdso.h> -#include <asm/asm-offsets.h> -#include <asm/dwarf.h> - - .text - .align 4 - .globl __kernel_getcpu - .type __kernel_getcpu,@function -__kernel_getcpu: - CFI_STARTPROC - la %r4,0 - sacf 256 - l %r5,__VDSO_CPU_NR(%r4) - l %r4,__VDSO_NODE_ID(%r4) - sacf 0 - ltr %r2,%r2 - jz 2f - st %r5,0(%r2) -2: ltr %r3,%r3 - jz 3f - st %r4,0(%r3) -3: lhi %r2,0 - br %r14 - CFI_ENDPROC - .size __kernel_getcpu,.-__kernel_getcpu diff --git a/arch/s390/kernel/vdso32/gettimeofday.S b/arch/s390/kernel/vdso32/gettimeofday.S deleted file mode 100644 index b23063fbc892..000000000000 --- a/arch/s390/kernel/vdso32/gettimeofday.S +++ /dev/null @@ -1,103 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Userland implementation of gettimeofday() for 32 bits processes in a - * s390 kernel for use in the vDSO - * - * Copyright IBM Corp. 2008 - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - */ -#include <asm/vdso.h> -#include <asm/asm-offsets.h> -#include <asm/unistd.h> -#include <asm/dwarf.h> -#include <asm/ptrace.h> - - .text - .align 4 - .globl __kernel_gettimeofday - .type __kernel_gettimeofday,@function -__kernel_gettimeofday: - CFI_STARTPROC - ahi %r15,-16 - CFI_ADJUST_CFA_OFFSET 16 - CFI_VAL_OFFSET 15, -STACK_FRAME_OVERHEAD - basr %r5,0 -0: al %r5,13f-0b(%r5) /* get &_vdso_data */ -1: ltr %r3,%r3 /* check if tz is NULL */ - je 2f - mvc 0(8,%r3),__VDSO_TIMEZONE(%r5) -2: ltr %r2,%r2 /* check if tv is NULL */ - je 10f - l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */ - tml %r4,0x0001 /* pending update ? loop */ - jnz 1b - stcke 0(%r15) /* Store TOD clock */ - lm %r0,%r1,__VDSO_TS_END(%r5) /* TOD steering end time */ - s %r0,1(%r15) - sl %r1,5(%r15) - brc 3,14f - ahi %r0,-1 -14: ltr %r0,%r0 /* past end of steering? */ - jm 16f - srdl %r0,15 /* 1 per 2^16 */ - tm __VDSO_TS_DIR+3(%r5),0x01 /* steering direction? */ - jz 15f - lcr %r0,%r0 /* negative TOD offset */ - lcr %r1,%r1 - je 15f - ahi %r0,-1 -15: a %r0,1(%r15) /* add TOD timestamp */ - al %r1,5(%r15) - brc 12,17f - ahi %r0,1 - j 17f -16: lm %r0,%r1,1(%r15) /* load TOD timestamp */ -17: s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */ - sl %r1,__VDSO_XTIME_STAMP+4(%r5) - brc 3,3f - ahi %r0,-1 -3: ms %r0,__VDSO_TK_MULT(%r5) /* * tk->mult */ - st %r0,0(%r15) - l %r0,__VDSO_TK_MULT(%r5) - ltr %r1,%r1 - mr %r0,%r0 - jnm 4f - a %r0,__VDSO_TK_MULT(%r5) -4: al %r0,0(%r15) - al %r0,__VDSO_XTIME_NSEC(%r5) /* + xtime */ - al %r1,__VDSO_XTIME_NSEC+4(%r5) - brc 12,5f - ahi %r0,1 -5: mvc 0(4,%r15),__VDSO_XTIME_SEC+4(%r5) - cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */ - jne 1b - l %r4,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */ - srdl %r0,0(%r4) /* >> tk->shift */ - l %r4,0(%r15) /* get tv_sec from stack */ - basr %r5,0 -6: ltr %r0,%r0 - jnz 7f - cl %r1,11f-6b(%r5) - jl 8f -7: ahi %r4,1 - sl %r1,11f-6b(%r5) - brc 3,6b - ahi %r0,-1 - j 6b -8: st %r4,0(%r2) /* store tv->tv_sec */ - ltr %r1,%r1 - m %r0,12f-6b(%r5) - jnm 9f - al %r0,12f-6b(%r5) -9: srl %r0,6 - st %r0,4(%r2) /* store tv->tv_usec */ -10: slr %r2,%r2 - ahi %r15,16 - CFI_ADJUST_CFA_OFFSET -16 - CFI_RESTORE 15 - br %r14 - CFI_ENDPROC -11: .long 1000000000 -12: .long 274877907 -13: .long _vdso_data - 0b - .size __kernel_gettimeofday,.-__kernel_gettimeofday diff --git a/arch/s390/kernel/vdso32/note.S b/arch/s390/kernel/vdso32/note.S deleted file mode 100644 index db19d0680a0a..000000000000 --- a/arch/s390/kernel/vdso32/note.S +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. - * Here we can supply some information useful to userland. - */ - -#include <linux/uts.h> -#include <linux/version.h> -#include <linux/elfnote.h> - -ELFNOTE_START(Linux, 0, "a") - .long LINUX_VERSION_CODE -ELFNOTE_END diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S deleted file mode 100644 index 721c4954cb6e..000000000000 --- a/arch/s390/kernel/vdso32/vdso32.lds.S +++ /dev/null @@ -1,142 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * This is the infamous ld script for the 32 bits vdso - * library - */ - -#include <asm/page.h> -#include <asm/vdso.h> - -OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390") -OUTPUT_ARCH(s390:31-bit) -ENTRY(_start) - -SECTIONS -{ - . = VDSO32_LBASE + SIZEOF_HEADERS; - - .hash : { *(.hash) } :text - .gnu.hash : { *(.gnu.hash) } - .dynsym : { *(.dynsym) } - .dynstr : { *(.dynstr) } - .gnu.version : { *(.gnu.version) } - .gnu.version_d : { *(.gnu.version_d) } - .gnu.version_r : { *(.gnu.version_r) } - - .note : { *(.note.*) } :text :note - - . = ALIGN(16); - .text : { - *(.text .stub .text.* .gnu.linkonce.t.*) - } :text - PROVIDE(__etext = .); - PROVIDE(_etext = .); - PROVIDE(etext = .); - - /* - * Other stuff is appended to the text segment: - */ - .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } - .rodata1 : { *(.rodata1) } - - .dynamic : { *(.dynamic) } :text :dynamic - - .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr - .eh_frame : { KEEP (*(.eh_frame)) } :text - .gcc_except_table : { *(.gcc_except_table .gcc_except_table.*) } - - .rela.dyn ALIGN(8) : { *(.rela.dyn) } - .got ALIGN(8) : { *(.got .toc) } - - _end = .; - PROVIDE(end = .); - - /* - * Stabs debugging sections are here too. - */ - .stab 0 : { *(.stab) } - .stabstr 0 : { *(.stabstr) } - .stab.excl 0 : { *(.stab.excl) } - .stab.exclstr 0 : { *(.stab.exclstr) } - .stab.index 0 : { *(.stab.index) } - .stab.indexstr 0 : { *(.stab.indexstr) } - .comment 0 : { *(.comment) } - - /* - * DWARF debug sections. - * Symbols in the DWARF debugging sections are relative to the - * beginning of the section so we begin them at 0. - */ - /* DWARF 1 */ - .debug 0 : { *(.debug) } - .line 0 : { *(.line) } - /* GNU DWARF 1 extensions */ - .debug_srcinfo 0 : { *(.debug_srcinfo) } - .debug_sfnames 0 : { *(.debug_sfnames) } - /* DWARF 1.1 and DWARF 2 */ - .debug_aranges 0 : { *(.debug_aranges) } - .debug_pubnames 0 : { *(.debug_pubnames) } - /* DWARF 2 */ - .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } - .debug_abbrev 0 : { *(.debug_abbrev) } - .debug_line 0 : { *(.debug_line) } - .debug_frame 0 : { *(.debug_frame) } - .debug_str 0 : { *(.debug_str) } - .debug_loc 0 : { *(.debug_loc) } - .debug_macinfo 0 : { *(.debug_macinfo) } - /* SGI/MIPS DWARF 2 extensions */ - .debug_weaknames 0 : { *(.debug_weaknames) } - .debug_funcnames 0 : { *(.debug_funcnames) } - .debug_typenames 0 : { *(.debug_typenames) } - .debug_varnames 0 : { *(.debug_varnames) } - /* DWARF 3 */ - .debug_pubtypes 0 : { *(.debug_pubtypes) } - .debug_ranges 0 : { *(.debug_ranges) } - .gnu.attributes 0 : { KEEP (*(.gnu.attributes)) } - - . = ALIGN(PAGE_SIZE); - PROVIDE(_vdso_data = .); - - /DISCARD/ : { - *(.note.GNU-stack) - *(.branch_lt) - *(.data .data.* .gnu.linkonce.d.* .sdata*) - *(.bss .sbss .dynbss .dynsbss) - } -} - -/* - * Very old versions of ld do not recognize this name token; use the constant. - */ -#define PT_GNU_EH_FRAME 0x6474e550 - -/* - * We must supply the ELF program headers explicitly to get just one - * PT_LOAD segment, and set the flags explicitly to make segments read-only. - */ -PHDRS -{ - text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ - dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ - note PT_NOTE FLAGS(4); /* PF_R */ - eh_frame_hdr PT_GNU_EH_FRAME; -} - -/* - * This controls what symbols we export from the DSO. - */ -VERSION -{ - VDSO_VERSION_STRING { - global: - /* - * Has to be there for the kernel to find - */ - __kernel_gettimeofday; - __kernel_clock_gettime; - __kernel_clock_getres; - __kernel_getcpu; - - local: *; - }; -} diff --git a/arch/s390/kernel/vdso32/vdso32_wrapper.S b/arch/s390/kernel/vdso32/vdso32_wrapper.S deleted file mode 100644 index de2fb930471a..000000000000 --- a/arch/s390/kernel/vdso32/vdso32_wrapper.S +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include <linux/init.h> -#include <linux/linkage.h> -#include <asm/page.h> - - __PAGE_ALIGNED_DATA - - .globl vdso32_start, vdso32_end - .balign PAGE_SIZE -vdso32_start: - .incbin "arch/s390/kernel/vdso32/vdso32.so" - .balign PAGE_SIZE -vdso32_end: - - .previous diff --git a/arch/s390/kernel/vdso64/getcpu.S b/arch/s390/kernel/vdso64/getcpu.S index 2446e9dac8ab..3c04f7328500 100644 --- a/arch/s390/kernel/vdso64/getcpu.S +++ b/arch/s390/kernel/vdso64/getcpu.S @@ -16,10 +16,8 @@ .type __kernel_getcpu,@function __kernel_getcpu: CFI_STARTPROC - la %r4,0 sacf 256 - l %r5,__VDSO_CPU_NR(%r4) - l %r4,__VDSO_NODE_ID(%r4) + lm %r4,%r5,__VDSO_GETCPU_VAL(%r0) sacf 0 ltgr %r2,%r2 jz 2f diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 7e0eb4020917..37695499717d 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -15,6 +15,8 @@ /* Handle ro_after_init data on our own. */ #define RO_AFTER_INIT_DATA +#define EMITS_PT_NOTE + #include <asm-generic/vmlinux.lds.h> #include <asm/vmlinux.lds.h> @@ -50,11 +52,7 @@ SECTIONS _etext = .; /* End of text section */ } :text = 0x0700 - NOTES :text :note - - .dummy : { *(.dummy) } :data - - RO_DATA_SECTION(PAGE_SIZE) + RO_DATA(PAGE_SIZE) . = ALIGN(PAGE_SIZE); _sdata = .; /* Start of data section */ @@ -64,12 +62,12 @@ SECTIONS .data..ro_after_init : { *(.data..ro_after_init) JUMP_TABLE_DATA - } + } :data EXCEPTION_TABLE(16) . = ALIGN(PAGE_SIZE); __end_ro_after_init = .; - RW_DATA_SECTION(0x100, PAGE_SIZE, THREAD_SIZE) + RW_DATA(0x100, PAGE_SIZE, THREAD_SIZE) BOOT_DATA_PRESERVED _edata = .; /* End of data section */ diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index c475ca49cfc6..8df10d3c8f6c 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -247,9 +247,9 @@ void vtime_account_irq_enter(struct task_struct *tsk) } EXPORT_SYMBOL_GPL(vtime_account_irq_enter); -void vtime_account_system(struct task_struct *tsk) +void vtime_account_kernel(struct task_struct *tsk) __attribute__((alias("vtime_account_irq_enter"))); -EXPORT_SYMBOL_GPL(vtime_account_system); +EXPORT_SYMBOL_GPL(vtime_account_kernel); /* * Sorted add to a list. List is linear searched until first bigger |