From 0cfe5b5fc0277463fa795dea312a3a2fd5e8bac2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9my=20Lefaure?= Date: Sun, 1 Oct 2017 15:30:50 -0400 Subject: x86: Use ARRAY_SIZE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using the ARRAY_SIZE macro improves the readability of the code. Found with Coccinelle with the following semantic patch: @r depends on (org || report)@ type T; T[] E; position p; @@ ( (sizeof(E)@p /sizeof(*E)) | (sizeof(E)@p /sizeof(E[...])) | (sizeof(E)@p /sizeof(T)) ) Signed-off-by: Jérémy Lefaure Signed-off-by: Thomas Gleixner Cc: linux-video@atrey.karlin.mff.cuni.cz Cc: Martin Mares Cc: Andy Lutomirski Link: https://lkml.kernel.org/r/20171001193101.8898-13-jeremy.lefaure@lse.epita.fr --- arch/x86/entry/vdso/vdso2c.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c index 0780a443a53b..4674f58581a1 100644 --- a/arch/x86/entry/vdso/vdso2c.c +++ b/arch/x86/entry/vdso/vdso2c.c @@ -65,6 +65,7 @@ #include #include +#include const char *outfilename; @@ -151,7 +152,7 @@ extern void bad_put_le(void); PLE(x, val, 64, PLE(x, val, 32, PLE(x, val, 16, LAST_PLE(x, val)))) -#define NSYMS (sizeof(required_syms) / sizeof(required_syms[0])) +#define NSYMS ARRAY_SIZE(required_syms) #define BITSFUNC3(name, bits, suffix) name##bits##suffix #define BITSFUNC2(name, bits, suffix) BITSFUNC3(name, bits, suffix) -- cgit v1.2.1 From 6aa7de059173a986114ac43b8f50b297a86f09a8 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 23 Oct 2017 14:07:29 -0700 Subject: locking/atomics: COCCINELLE/treewide: Convert trivial ACCESS_ONCE() patterns to READ_ONCE()/WRITE_ONCE() Please do not apply this to mainline directly, instead please re-run the coccinelle script shown below and apply its output. For several reasons, it is desirable to use {READ,WRITE}_ONCE() in preference to ACCESS_ONCE(), and new code is expected to use one of the former. So far, there's been no reason to change most existing uses of ACCESS_ONCE(), as these aren't harmful, and changing them results in churn. However, for some features, the read/write distinction is critical to correct operation. To distinguish these cases, separate read/write accessors must be used. This patch migrates (most) remaining ACCESS_ONCE() instances to {READ,WRITE}_ONCE(), using the following coccinelle script: ---- // Convert trivial ACCESS_ONCE() uses to equivalent READ_ONCE() and // WRITE_ONCE() // $ make coccicheck COCCI=/home/mark/once.cocci SPFLAGS="--include-headers" MODE=patch virtual patch @ depends on patch @ expression E1, E2; @@ - ACCESS_ONCE(E1) = E2 + WRITE_ONCE(E1, E2) @ depends on patch @ expression E; @@ - ACCESS_ONCE(E) + READ_ONCE(E) ---- Signed-off-by: Mark Rutland Signed-off-by: Paul E. McKenney Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: davem@davemloft.net Cc: linux-arch@vger.kernel.org Cc: mpe@ellerman.id.au Cc: shuah@kernel.org Cc: snitzer@redhat.com Cc: thor.thayer@linux.intel.com Cc: tj@kernel.org Cc: viro@zeniv.linux.org.uk Cc: will.deacon@arm.com Link: http://lkml.kernel.org/r/1508792849-3115-19-git-send-email-paulmck@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- arch/x86/entry/common.c | 2 +- arch/x86/entry/vdso/vclock_gettime.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 03505ffbe1b6..eaa0ba66cf96 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -75,7 +75,7 @@ static long syscall_trace_enter(struct pt_regs *regs) if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) BUG_ON(regs != task_pt_regs(current)); - work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY; + work = READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY; if (unlikely(work & _TIF_SYSCALL_EMU)) emulated = true; diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index fa8dbfcf7ed3..11b13c4b43d5 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -318,7 +318,7 @@ int gettimeofday(struct timeval *, struct timezone *) notrace time_t __vdso_time(time_t *t) { /* This is atomic on x86 so we don't need any locks. */ - time_t result = ACCESS_ONCE(gtod->wall_time_sec); + time_t result = READ_ONCE(gtod->wall_time_sec); if (t) *t = result; -- cgit v1.2.1 From 819aeee065e5d1b417ecd633897427c89f3253ec Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Fri, 20 Oct 2017 09:30:59 -0500 Subject: X86/KVM: Clear encryption attribute when SEV is active MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The guest physical memory area holding the struct pvclock_wall_clock and struct pvclock_vcpu_time_info are shared with the hypervisor. It periodically updates the contents of the memory. When SEV is active, the encryption attributes from the shared memory pages must be cleared so that both hypervisor and guest can access the data. Signed-off-by: Brijesh Singh Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Tested-by: Borislav Petkov Cc: Tom Lendacky Cc: kvm@vger.kernel.org Cc: Radim Krčmář Cc: Borislav Petkov Cc: Paolo Bonzini Link: https://lkml.kernel.org/r/20171020143059.3291-18-brijesh.singh@amd.com --- arch/x86/entry/vdso/vma.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 1911310959f8..d63053142b16 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -114,10 +114,11 @@ static int vvar_fault(const struct vm_special_mapping *sm, struct pvclock_vsyscall_time_info *pvti = pvclock_pvti_cpu0_va(); if (pvti && vclock_was_used(VCLOCK_PVCLOCK)) { - ret = vm_insert_pfn( + ret = vm_insert_pfn_prot( vma, vmf->address, - __pa(pvti) >> PAGE_SHIFT); + __pa(pvti) >> PAGE_SHIFT, + pgprot_decrypted(vma->vm_page_prot)); } } else if (sym_offset == image->sym_hvclock_page) { struct ms_hyperv_tsc_page *tsc_pg = hv_get_tsc_page(); -- cgit v1.2.1 From 7a10e2a9190628a4024ea394ce7bd641ae40ffd1 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 6 Nov 2017 16:01:23 +0100 Subject: x86: Use lockdep to assert IRQs are disabled/enabled Use lockdep to check that IRQs are enabled or disabled as expected. This way the sanity check only shows overhead when concurrency correctness debug code is enabled. It also makes no more sense to fix the IRQ flags when a bug is detected as the assertion is now pure config-dependent debugging. And to quote Peter Zijlstra: The whole if !disabled, disable logic is uber paranoid programming, but I don't think we've ever seen that WARN trigger, and if it does (and then burns the kernel) we at least know what happend. Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Cc: David S . Miller Cc: Lai Jiangshan Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Tejun Heo Link: http://lkml.kernel.org/r/1509980490-4285-8-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/common.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index eaa0ba66cf96..d7d3cc24baf4 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -186,9 +186,7 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs) addr_limit_user_check(); - if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled())) - local_irq_disable(); - + lockdep_assert_irqs_disabled(); lockdep_sys_exit(); cached_flags = READ_ONCE(ti->flags); -- cgit v1.2.1 From 9f08890ab906abaf9d4c1bad8111755cbd302260 Mon Sep 17 00:00:00 2001 From: Joao Martins Date: Wed, 8 Nov 2017 17:19:55 +0000 Subject: x86/pvclock: add setter for pvclock_pvti_cpu0_va Right now there is only a pvclock_pvti_cpu0_va() which is defined on kvmclock since: commit dac16fba6fc5 ("x86/vdso: Get pvclock data from the vvar VMA instead of the fixmap") The only user of this interface so far is kvm. This commit adds a setter function for the pvti page and moves pvclock_pvti_cpu0_va to pvclock, which is a more generic place to have it; and would allow other PV clocksources to use it, such as Xen. While moving pvclock_pvti_cpu0_va into pvclock, rename also this function to pvclock_get_pvti_cpu0_va (including its call sites) to be symmetric with the setter (pvclock_set_pvti_cpu0_va). Signed-off-by: Joao Martins Acked-by: Andy Lutomirski Acked-by: Paolo Bonzini Acked-by: Thomas Gleixner Signed-off-by: Boris Ostrovsky --- arch/x86/entry/vdso/vma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 1911310959f8..a77fd3c8d824 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -112,7 +112,7 @@ static int vvar_fault(const struct vm_special_mapping *sm, __pa_symbol(&__vvar_page) >> PAGE_SHIFT); } else if (sym_offset == image->sym_pvclock_page) { struct pvclock_vsyscall_time_info *pvti = - pvclock_pvti_cpu0_va(); + pvclock_get_pvti_cpu0_va(); if (pvti && vclock_was_used(VCLOCK_PVCLOCK)) { ret = vm_insert_pfn( vma, -- cgit v1.2.1 From 8a78756eb545a6fb8007fa154a626ca2bc208027 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 13 Nov 2017 19:29:37 +0900 Subject: kbuild: create object directories simpler and faster For the out-of-tree build, scripts/Makefile.build creates output directories, but this operation is not efficient. scripts/Makefile.lib calculates obj-dirs as follows: obj-dirs := $(dir $(multi-objs) $(obj-y)) Please notice $(sort ...) is not used here. Usually the result is as many "./" as objects here. For a lot of duplicated paths, the following command is invoked. _dummy := $(foreach d,$(obj-dirs), $(shell [ -d $(d) ] || mkdir -p $(d))) Then, the costly shell command is run over and over again. I see many points for optimization: [1] Use $(sort ...) to cut down duplicated paths before passing them to system call [2] Use single $(shell ...) instead of repeating it with $(foreach ...) This will reduce forking. [3] We can calculate obj-dirs more simply. Most of objects are already accumulated in $(targets). So, $(dir $(targets)) is fine and more comprehensive. I also removed ugly code in arch/x86/entry/vdso/Makefile. This is now really unnecessary. Signed-off-by: Masahiro Yamada Acked-by: Ingo Molnar Tested-by: Douglas Anderson --- arch/x86/entry/vdso/Makefile | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index d5409660f5de..f8e3d85256ad 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -129,10 +129,6 @@ $(obj)/vdsox32.so.dbg: $(src)/vdsox32.lds $(vobjx32s) FORCE CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds) VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf_i386 -Wl,-soname=linux-gate.so.1 -# This makes sure the $(obj) subdirectory exists even though vdso32/ -# is not a kbuild sub-make subdirectory. -override obj-dirs = $(dir $(obj)) $(obj)/vdso32/ - targets += vdso32/vdso32.lds targets += vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o targets += vdso32/vclock_gettime.o -- cgit v1.2.1 From 548c3050ea8d16997ae27f9e080a8338a606fc93 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 21 Nov 2017 20:43:56 -0800 Subject: x86/entry/64: Fix entry_SYSCALL_64_after_hwframe() IRQ tracing When I added entry_SYSCALL_64_after_hwframe(), I left TRACE_IRQS_OFF before it. This means that users of entry_SYSCALL_64_after_hwframe() were responsible for invoking TRACE_IRQS_OFF, and the one and only user (Xen, added in the same commit) got it wrong. I think this would manifest as a warning if a Xen PV guest with CONFIG_DEBUG_LOCKDEP=y were used with context tracking. (The context tracking bit is to cause lockdep to get invoked before we turn IRQs back on.) I haven't tested that for real yet because I can't get a kernel configured like that to boot at all on Xen PV. Move TRACE_IRQS_OFF below the label. Signed-off-by: Andy Lutomirski Cc: Boris Ostrovsky Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Fixes: 8a9949bc71a7 ("x86/xen/64: Rearrange the SYSCALL entries") Link: http://lkml.kernel.org/r/9150aac013b7b95d62c2336751d5b6e91d2722aa.1511325444.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index a2b30ec69497..5063ed1214dd 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -148,8 +148,6 @@ ENTRY(entry_SYSCALL_64) movq %rsp, PER_CPU_VAR(rsp_scratch) movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp - TRACE_IRQS_OFF - /* Construct struct pt_regs on stack */ pushq $__USER_DS /* pt_regs->ss */ pushq PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */ @@ -170,6 +168,8 @@ GLOBAL(entry_SYSCALL_64_after_hwframe) sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */ UNWIND_HINT_REGS extra=0 + TRACE_IRQS_OFF + /* * If we need to do entry work or if we guess we'll need to do * exit work, go straight to the slow path. -- cgit v1.2.1 From ca37e57bbe0cf1455ea3e84eb89ed04a132d59e1 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 22 Nov 2017 20:39:16 -0800 Subject: x86/entry/64: Add missing irqflags tracing to native_load_gs_index() Running this code with IRQs enabled (where dummy_lock is a spinlock): static void check_load_gs_index(void) { /* This will fail. */ load_gs_index(0xffff); spin_lock(&dummy_lock); spin_unlock(&dummy_lock); } Will generate a lockdep warning. The issue is that the actual write to %gs would cause an exception with IRQs disabled, and the exception handler would, as an inadvertent side effect, update irqflag tracing to reflect the IRQs-off status. native_load_gs_index() would then turn IRQs back on and return with irqflag tracing still thinking that IRQs were off. The dummy lock-and-unlock causes lockdep to notice the error and warn. Fix it by adding the missing tracing. Apparently nothing did this in a context where it mattered. I haven't tried to find a code path that would actually exhibit the warning if appropriately nasty user code were running. I suspect that the security impact of this bug is very, very low -- production systems don't run with lockdep enabled, and the warning is mostly harmless anyway. Found during a quick audit of the entry code to try to track down an unrelated bug that Ingo found in some still-in-development code. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/e1aeb0e6ba8dd430ec36c8a35e63b429698b4132.1511411918.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64.S | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 5063ed1214dd..f81d50d7ceac 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -51,15 +51,19 @@ ENTRY(native_usergs_sysret64) END(native_usergs_sysret64) #endif /* CONFIG_PARAVIRT */ -.macro TRACE_IRQS_IRETQ +.macro TRACE_IRQS_FLAGS flags:req #ifdef CONFIG_TRACE_IRQFLAGS - bt $9, EFLAGS(%rsp) /* interrupts off? */ + bt $9, \flags /* interrupts off? */ jnc 1f TRACE_IRQS_ON 1: #endif .endm +.macro TRACE_IRQS_IRETQ + TRACE_IRQS_FLAGS EFLAGS(%rsp) +.endm + /* * When dynamic function tracer is enabled it will add a breakpoint * to all locations that it is about to modify, sync CPUs, update @@ -943,11 +947,13 @@ ENTRY(native_load_gs_index) FRAME_BEGIN pushfq DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) + TRACE_IRQS_OFF SWAPGS .Lgs_change: movl %edi, %gs 2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE SWAPGS + TRACE_IRQS_FLAGS (%rsp) popfq FRAME_END ret -- cgit v1.2.1 From 88edb57d1e0b262e669c5cad36646dcf5a7f37f5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 4 Dec 2017 16:01:55 +0100 Subject: x86/vdso: Change time() prototype to match __vdso_time() gcc-8 warns that time() is an alias for __vdso_time() but the two have different prototypes: arch/x86/entry/vdso/vclock_gettime.c:327:5: error: 'time' alias between functions of incompatible types 'int(time_t *)' {aka 'int(long int *)'} and 'time_t(time_t *)' {aka 'long int(long int *)'} [-Werror=attribute-alias] int time(time_t *t) ^~~~ arch/x86/entry/vdso/vclock_gettime.c:318:16: note: aliased declaration here I could not figure out whether this is intentional, but I see that changing it to return time_t avoids the warning. Returning 'int' from time() is also a bit questionable, as it causes an overflow in y2038 even on 64-bit architectures that use a 64-bit time_t type. On 32-bit architecture with 64-bit time_t, time() should always be implement by the C library by calling a (to be added) clock_gettime() variant that takes a sufficiently wide argument. Signed-off-by: Arnd Bergmann Acked-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Mark Rutland Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Vitaly Kuznetsov Link: http://lkml.kernel.org/r/20171204150203.852959-1-arnd@arndb.de Signed-off-by: Ingo Molnar --- arch/x86/entry/vdso/vclock_gettime.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/entry') diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index 11b13c4b43d5..f19856d95c60 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -324,5 +324,5 @@ notrace time_t __vdso_time(time_t *t) *t = result; return result; } -int time(time_t *t) +time_t time(time_t *t) __attribute__((weak, alias("__vdso_time"))); -- cgit v1.2.1