summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2015-04-27 10:08:39 +0200
committerIngo Molnar <mingo@kernel.org>2015-05-19 15:47:54 +0200
commitb16529004f5cc0debf8073d21b560a4677a03a2a (patch)
tree9067523005576227e5239518aad48d42029c2872
parent68271c6ae726d7ab51e39b7342c838761bf0a25c (diff)
downloadtalos-op-linux-b16529004f5cc0debf8073d21b560a4677a03a2a.tar.gz
talos-op-linux-b16529004f5cc0debf8073d21b560a4677a03a2a.zip
x86/fpu: Optimize fpu_copy() some more on lazy switching systems
The current fpu_copy() code on lazy switching CPUs always saves into the current fpstate and then copies it over into the child context: preempt_disable(); if (!copy_fpregs_to_fpstate(src_fpu)) fpregs_deactivate(src_fpu); preempt_enable(); memcpy(&dst_fpu->state, &src_fpu->state, xstate_size); That memcpy() can be avoided on all lazy switching setups except really old FNSAVE-only systems: change fpu_copy() to directly save into the child context, for both the lazy and the eager context switching case. Note that we still have to do a memcpy() back into the parent context in the FNSAVE case, but this won't be executed on the majority of x86 systems that got built in the last 10 years or so. Reviewed-by: Borislav Petkov <bp@alien8.de> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Fenghua Yu <fenghua.yu@intel.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/kernel/fpu/core.c35
1 files changed, 27 insertions, 8 deletions
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 41ea25a61b5f..edbb5d04a558 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -220,16 +220,35 @@ static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu)
{
WARN_ON(src_fpu != &current->thread.fpu);
- if (use_eager_fpu()) {
+ /*
+ * Don't let 'init optimized' areas of the XSAVE area
+ * leak into the child task:
+ */
+ if (use_eager_fpu())
memset(&dst_fpu->state.xsave, 0, xstate_size);
- copy_fpregs_to_fpstate(dst_fpu);
- } else {
- preempt_disable();
- if (!copy_fpregs_to_fpstate(src_fpu))
- fpregs_deactivate(src_fpu);
- preempt_enable();
- memcpy(&dst_fpu->state, &src_fpu->state, xstate_size);
+
+ /*
+ * Save current FPU registers directly into the child
+ * FPU context, without any memory-to-memory copying.
+ *
+ * If the FPU context got destroyed in the process (FNSAVE
+ * done on old CPUs) then copy it back into the source
+ * context and mark the current task for lazy restore.
+ *
+ * We have to do all this with preemption disabled,
+ * mostly because of the FNSAVE case, because in that
+ * case we must not allow preemption in the window
+ * between the FNSAVE and us marking the context lazy.
+ *
+ * It shouldn't be an issue as even FNSAVE is plenty
+ * fast in terms of critical section length.
+ */
+ preempt_disable();
+ if (!copy_fpregs_to_fpstate(dst_fpu)) {
+ memcpy(&src_fpu->state, &dst_fpu->state, xstate_size);
+ fpregs_deactivate(src_fpu);
}
+ preempt_enable();
}
int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
OpenPOWER on IntegriCloud