diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-12-01 19:37:03 -0500 | 
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-12-01 19:37:03 -0500 | 
| commit | 4b1967c90af473e3a8bec00024758a3e676cea2d (patch) | |
| tree | d626f0283c42b662e4a4fd57d804af841f7fad2a | |
| parent | a0651c7fa2c088a605f63792279859608ed7f2c8 (diff) | |
| parent | 3a33c7605750fb6a87613044d16b1455e482414d (diff) | |
| download | blackbird-op-linux-4b1967c90af473e3a8bec00024758a3e676cea2d.tar.gz blackbird-op-linux-4b1967c90af473e3a8bec00024758a3e676cea2d.zip  | |
Merge tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm64 fixes from Will Deacon:
 "The critical one here is a fix for fpsimd register corruption across
  signals which was introduced by the SVE support code (the register
  files overlap), but the others are worth having as well.
  Summary:
   - Fix FP register corruption when SVE is not available or in use
   - Fix out-of-tree module build failure when CONFIG_ARM64_MODULE_PLTS=y
   - Missing 'const' generating errors with LTO builds
   - Remove unsupported events from Cortex-A73 PMU description
   - Removal of stale and incorrect comments"
* tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux:
  arm64: context: Fix comments and remove pointless smp_wmb()
  arm64: cpu_ops: Add missing 'const' qualifiers
  arm64: perf: remove unsupported events for Cortex-A73
  arm64: fpsimd: Fix failure to restore FPSIMD state after signals
  arm64: pgd: Mark pgd_cache as __ro_after_init
  arm64: ftrace: emit ftrace-mod.o contents through code
  arm64: module-plts: factor out PLT generation code for ftrace
  arm64: mm: cleanup stale AIVIVT references
| -rw-r--r-- | arch/arm64/Makefile | 3 | ||||
| -rw-r--r-- | arch/arm64/include/asm/cacheflush.h | 2 | ||||
| -rw-r--r-- | arch/arm64/include/asm/module.h | 46 | ||||
| -rw-r--r-- | arch/arm64/kernel/Makefile | 3 | ||||
| -rw-r--r-- | arch/arm64/kernel/cpu_ops.c | 6 | ||||
| -rw-r--r-- | arch/arm64/kernel/fpsimd.c | 6 | ||||
| -rw-r--r-- | arch/arm64/kernel/ftrace-mod.S | 18 | ||||
| -rw-r--r-- | arch/arm64/kernel/ftrace.c | 14 | ||||
| -rw-r--r-- | arch/arm64/kernel/module-plts.c | 50 | ||||
| -rw-r--r-- | arch/arm64/kernel/module.lds | 1 | ||||
| -rw-r--r-- | arch/arm64/kernel/perf_event.c | 6 | ||||
| -rw-r--r-- | arch/arm64/mm/context.c | 28 | ||||
| -rw-r--r-- | arch/arm64/mm/pgd.c | 2 | 
13 files changed, 92 insertions, 93 deletions
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index b35788c909f1..b481b4a7c011 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -83,9 +83,6 @@ endif  ifeq ($(CONFIG_ARM64_MODULE_PLTS),y)  KBUILD_LDFLAGS_MODULE	+= -T $(srctree)/arch/arm64/kernel/module.lds -ifeq ($(CONFIG_DYNAMIC_FTRACE),y) -KBUILD_LDFLAGS_MODULE	+= $(objtree)/arch/arm64/kernel/ftrace-mod.o -endif  endif  # Default value diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 76d1cc85d5b1..955130762a3c 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -38,7 +38,7 @@   *   *	See Documentation/cachetlb.txt for more information. Please note that   *	the implementation assumes non-aliasing VIPT D-cache and (aliasing) - *	VIPT or ASID-tagged VIVT I-cache. + *	VIPT I-cache.   *   *	flush_cache_mm(mm)   * diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index 19bd97671bb8..4f766178fa6f 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -32,7 +32,7 @@ struct mod_arch_specific {  	struct mod_plt_sec	init;  	/* for CONFIG_DYNAMIC_FTRACE */ -	void			*ftrace_trampoline; +	struct plt_entry 	*ftrace_trampoline;  };  #endif @@ -45,4 +45,48 @@ extern u64 module_alloc_base;  #define module_alloc_base	((u64)_etext - MODULES_VSIZE)  #endif +struct plt_entry { +	/* +	 * A program that conforms to the AArch64 Procedure Call Standard +	 * (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or +	 * IP1 (x17) may be inserted at any branch instruction that is +	 * exposed to a relocation that supports long branches. Since that +	 * is exactly what we are dealing with here, we are free to use x16 +	 * as a scratch register in the PLT veneers. +	 */ +	__le32	mov0;	/* movn	x16, #0x....			*/ +	__le32	mov1;	/* movk	x16, #0x...., lsl #16		*/ +	__le32	mov2;	/* movk	x16, #0x...., lsl #32		*/ +	__le32	br;	/* br	x16				*/ +}; + +static inline struct plt_entry get_plt_entry(u64 val) +{ +	/* +	 * MOVK/MOVN/MOVZ opcode: +	 * +--------+------------+--------+-----------+-------------+---------+ +	 * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] | +	 * +--------+------------+--------+-----------+-------------+---------+ +	 * +	 * Rd     := 0x10 (x16) +	 * hw     := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32) +	 * opc    := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ) +	 * sf     := 1 (64-bit variant) +	 */ +	return (struct plt_entry){ +		cpu_to_le32(0x92800010 | (((~val      ) & 0xffff)) << 5), +		cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5), +		cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5), +		cpu_to_le32(0xd61f0200) +	}; +} + +static inline bool plt_entries_equal(const struct plt_entry *a, +				     const struct plt_entry *b) +{ +	return a->mov0 == b->mov0 && +	       a->mov1 == b->mov1 && +	       a->mov2 == b->mov2; +} +  #endif /* __ASM_MODULE_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 8265dd790895..067baace74a0 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -61,6 +61,3 @@ extra-y					+= $(head-y) vmlinux.lds  ifeq ($(CONFIG_DEBUG_EFI),y)  AFLAGS_head.o += -DVMLINUX_PATH="\"$(realpath $(objtree)/vmlinux)\""  endif - -# will be included by each individual module but not by the core kernel itself -extra-$(CONFIG_DYNAMIC_FTRACE) += ftrace-mod.o diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c index d16978213c5b..ea001241bdd4 100644 --- a/arch/arm64/kernel/cpu_ops.c +++ b/arch/arm64/kernel/cpu_ops.c @@ -31,13 +31,13 @@ extern const struct cpu_operations cpu_psci_ops;  const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init; -static const struct cpu_operations *dt_supported_cpu_ops[] __initconst = { +static const struct cpu_operations *const dt_supported_cpu_ops[] __initconst = {  	&smp_spin_table_ops,  	&cpu_psci_ops,  	NULL,  }; -static const struct cpu_operations *acpi_supported_cpu_ops[] __initconst = { +static const struct cpu_operations *const acpi_supported_cpu_ops[] __initconst = {  #ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL  	&acpi_parking_protocol_ops,  #endif @@ -47,7 +47,7 @@ static const struct cpu_operations *acpi_supported_cpu_ops[] __initconst = {  static const struct cpu_operations * __init cpu_get_ops(const char *name)  { -	const struct cpu_operations **ops; +	const struct cpu_operations *const *ops;  	ops = acpi_disabled ? dt_supported_cpu_ops : acpi_supported_cpu_ops; diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 143b3e72c25e..5084e699447a 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1026,10 +1026,10 @@ void fpsimd_update_current_state(struct fpsimd_state *state)  	local_bh_disable(); -	if (system_supports_sve() && test_thread_flag(TIF_SVE)) { -		current->thread.fpsimd_state = *state; +	current->thread.fpsimd_state = *state; +	if (system_supports_sve() && test_thread_flag(TIF_SVE))  		fpsimd_to_sve(current); -	} +  	task_fpsimd_load();  	if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { diff --git a/arch/arm64/kernel/ftrace-mod.S b/arch/arm64/kernel/ftrace-mod.S deleted file mode 100644 index 00c4025be4ff..000000000000 --- a/arch/arm64/kernel/ftrace-mod.S +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/linkage.h> -#include <asm/assembler.h> - -	.section	".text.ftrace_trampoline", "ax" -	.align		3 -0:	.quad		0 -__ftrace_trampoline: -	ldr		x16, 0b -	br		x16 -ENDPROC(__ftrace_trampoline) diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index c13b1fca0e5b..50986e388d2b 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -76,7 +76,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)  	if (offset < -SZ_128M || offset >= SZ_128M) {  #ifdef CONFIG_ARM64_MODULE_PLTS -		unsigned long *trampoline; +		struct plt_entry trampoline;  		struct module *mod;  		/* @@ -104,22 +104,24 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)  		 * is added in the future, but for now, the pr_err() below  		 * deals with a theoretical issue only.  		 */ -		trampoline = (unsigned long *)mod->arch.ftrace_trampoline; -		if (trampoline[0] != addr) { -			if (trampoline[0] != 0) { +		trampoline = get_plt_entry(addr); +		if (!plt_entries_equal(mod->arch.ftrace_trampoline, +				       &trampoline)) { +			if (!plt_entries_equal(mod->arch.ftrace_trampoline, +					       &(struct plt_entry){})) {  				pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n");  				return -EINVAL;  			}  			/* point the trampoline to our ftrace entry point */  			module_disable_ro(mod); -			trampoline[0] = addr; +			*mod->arch.ftrace_trampoline = trampoline;  			module_enable_ro(mod, true);  			/* update trampoline before patching in the branch */  			smp_wmb();  		} -		addr = (unsigned long)&trampoline[1]; +		addr = (unsigned long)(void *)mod->arch.ftrace_trampoline;  #else /* CONFIG_ARM64_MODULE_PLTS */  		return -EINVAL;  #endif /* CONFIG_ARM64_MODULE_PLTS */ diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index d05dbe658409..ea640f92fe5a 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -11,21 +11,6 @@  #include <linux/module.h>  #include <linux/sort.h> -struct plt_entry { -	/* -	 * A program that conforms to the AArch64 Procedure Call Standard -	 * (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or -	 * IP1 (x17) may be inserted at any branch instruction that is -	 * exposed to a relocation that supports long branches. Since that -	 * is exactly what we are dealing with here, we are free to use x16 -	 * as a scratch register in the PLT veneers. -	 */ -	__le32	mov0;	/* movn	x16, #0x....			*/ -	__le32	mov1;	/* movk	x16, #0x...., lsl #16		*/ -	__le32	mov2;	/* movk	x16, #0x...., lsl #32		*/ -	__le32	br;	/* br	x16				*/ -}; -  static bool in_init(const struct module *mod, void *loc)  {  	return (u64)loc - (u64)mod->init_layout.base < mod->init_layout.size; @@ -40,33 +25,14 @@ u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela,  	int i = pltsec->plt_num_entries;  	u64 val = sym->st_value + rela->r_addend; -	/* -	 * MOVK/MOVN/MOVZ opcode: -	 * +--------+------------+--------+-----------+-------------+---------+ -	 * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] | -	 * +--------+------------+--------+-----------+-------------+---------+ -	 * -	 * Rd     := 0x10 (x16) -	 * hw     := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32) -	 * opc    := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ) -	 * sf     := 1 (64-bit variant) -	 */ -	plt[i] = (struct plt_entry){ -		cpu_to_le32(0x92800010 | (((~val      ) & 0xffff)) << 5), -		cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5), -		cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5), -		cpu_to_le32(0xd61f0200) -	}; +	plt[i] = get_plt_entry(val);  	/*  	 * Check if the entry we just created is a duplicate. Given that the  	 * relocations are sorted, this will be the last entry we allocated.  	 * (if one exists).  	 */ -	if (i > 0 && -	    plt[i].mov0 == plt[i - 1].mov0 && -	    plt[i].mov1 == plt[i - 1].mov1 && -	    plt[i].mov2 == plt[i - 1].mov2) +	if (i > 0 && plt_entries_equal(plt + i, plt + i - 1))  		return (u64)&plt[i - 1];  	pltsec->plt_num_entries++; @@ -154,6 +120,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,  	unsigned long core_plts = 0;  	unsigned long init_plts = 0;  	Elf64_Sym *syms = NULL; +	Elf_Shdr *tramp = NULL;  	int i;  	/* @@ -165,6 +132,10 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,  			mod->arch.core.plt = sechdrs + i;  		else if (!strcmp(secstrings + sechdrs[i].sh_name, ".init.plt"))  			mod->arch.init.plt = sechdrs + i; +		else if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) && +			 !strcmp(secstrings + sechdrs[i].sh_name, +				 ".text.ftrace_trampoline")) +			tramp = sechdrs + i;  		else if (sechdrs[i].sh_type == SHT_SYMTAB)  			syms = (Elf64_Sym *)sechdrs[i].sh_addr;  	} @@ -215,5 +186,12 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,  	mod->arch.init.plt_num_entries = 0;  	mod->arch.init.plt_max_entries = init_plts; +	if (tramp) { +		tramp->sh_type = SHT_NOBITS; +		tramp->sh_flags = SHF_EXECINSTR | SHF_ALLOC; +		tramp->sh_addralign = __alignof__(struct plt_entry); +		tramp->sh_size = sizeof(struct plt_entry); +	} +  	return 0;  } diff --git a/arch/arm64/kernel/module.lds b/arch/arm64/kernel/module.lds index f7c9781a9d48..22e36a21c113 100644 --- a/arch/arm64/kernel/module.lds +++ b/arch/arm64/kernel/module.lds @@ -1,4 +1,5 @@  SECTIONS {  	.plt (NOLOAD) : { BYTE(0) }  	.init.plt (NOLOAD) : { BYTE(0) } +	.text.ftrace_trampoline (NOLOAD) : { BYTE(0) }  } diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 9eaef51f83ff..3affca3dd96a 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -262,12 +262,6 @@ static const unsigned armv8_a73_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]  	[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD,  	[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, - -	[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, -	[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, - -	[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)]	= ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, -	[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,  };  static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index ab9f5f0fb2c7..6f4017046323 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -96,12 +96,6 @@ static void flush_context(unsigned int cpu)  	set_reserved_asid_bits(); -	/* -	 * Ensure the generation bump is observed before we xchg the -	 * active_asids. -	 */ -	smp_wmb(); -  	for_each_possible_cpu(i) {  		asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0);  		/* @@ -117,7 +111,10 @@ static void flush_context(unsigned int cpu)  		per_cpu(reserved_asids, i) = asid;  	} -	/* Queue a TLB invalidate and flush the I-cache if necessary. */ +	/* +	 * Queue a TLB invalidation for each CPU to perform on next +	 * context-switch +	 */  	cpumask_setall(&tlb_flush_pending);  } @@ -202,11 +199,18 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)  	asid = atomic64_read(&mm->context.id);  	/* -	 * The memory ordering here is subtle. We rely on the control -	 * dependency between the generation read and the update of -	 * active_asids to ensure that we are synchronised with a -	 * parallel rollover (i.e. this pairs with the smp_wmb() in -	 * flush_context). +	 * The memory ordering here is subtle. +	 * If our ASID matches the current generation, then we update +	 * our active_asids entry with a relaxed xchg. Racing with a +	 * concurrent rollover means that either: +	 * +	 * - We get a zero back from the xchg and end up waiting on the +	 *   lock. Taking the lock synchronises with the rollover and so +	 *   we are forced to see the updated generation. +	 * +	 * - We get a valid ASID back from the xchg, which means the +	 *   relaxed xchg in flush_context will treat us as reserved +	 *   because atomic RmWs are totally ordered for a given location.  	 */  	if (!((asid ^ atomic64_read(&asid_generation)) >> asid_bits)  	    && atomic64_xchg_relaxed(&per_cpu(active_asids, cpu), asid)) diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c index 371c5f03a170..051e71ec3335 100644 --- a/arch/arm64/mm/pgd.c +++ b/arch/arm64/mm/pgd.c @@ -26,7 +26,7 @@  #include <asm/page.h>  #include <asm/tlbflush.h> -static struct kmem_cache *pgd_cache; +static struct kmem_cache *pgd_cache __ro_after_init;  pgd_t *pgd_alloc(struct mm_struct *mm)  {  | 

