summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/process_64.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/process_64.c')
-rw-r--r--arch/x86/kernel/process_64.c52
1 files changed, 37 insertions, 15 deletions
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 67fcc43577d2..ddfdbf74f174 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -52,7 +52,7 @@
asmlinkage extern void ret_from_fork(void);
-__visible DEFINE_PER_CPU(unsigned long, old_rsp);
+__visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
/* Prints also some state that isn't saved in the pt_regs */
void __show_regs(struct pt_regs *regs, int all)
@@ -161,7 +161,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
childregs = task_pt_regs(p);
p->thread.sp = (unsigned long) childregs;
- p->thread.usersp = me->thread.usersp;
set_tsk_thread_flag(p, TIF_FORK);
p->thread.io_bitmap_ptr = NULL;
@@ -207,7 +206,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
*/
if (clone_flags & CLONE_SETTLS) {
#ifdef CONFIG_IA32_EMULATION
- if (test_thread_flag(TIF_IA32))
+ if (is_ia32_task())
err = do_set_thread_area(p, -1,
(struct user_desc __user *)childregs->si, 0);
else
@@ -235,13 +234,12 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
loadsegment(es, _ds);
loadsegment(ds, _ds);
load_gs_index(0);
- current->thread.usersp = new_sp;
regs->ip = new_ip;
regs->sp = new_sp;
- this_cpu_write(old_rsp, new_sp);
regs->cs = _cs;
regs->ss = _ss;
regs->flags = X86_EFLAGS_IF;
+ force_iret();
}
void
@@ -277,15 +275,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
struct thread_struct *prev = &prev_p->thread;
struct thread_struct *next = &next_p->thread;
int cpu = smp_processor_id();
- struct tss_struct *tss = &per_cpu(init_tss, cpu);
+ struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
unsigned fsindex, gsindex;
fpu_switch_t fpu;
fpu = switch_fpu_prepare(prev_p, next_p, cpu);
- /* Reload esp0 and ss1. */
- load_sp0(tss, next);
-
/* We must save %fs and %gs before load_TLS() because
* %fs and %gs may be cleared by load_TLS().
*
@@ -401,8 +396,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/*
* Switch the PDA and FPU contexts.
*/
- prev->usersp = this_cpu_read(old_rsp);
- this_cpu_write(old_rsp, next->usersp);
this_cpu_write(current_task, next_p);
/*
@@ -413,9 +406,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count);
this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count);
+ /* Reload esp0 and ss1. This changes current_thread_info(). */
+ load_sp0(tss, next);
+
this_cpu_write(kernel_stack,
- (unsigned long)task_stack_page(next_p) +
- THREAD_SIZE - KERNEL_STACK_OFFSET);
+ (unsigned long)task_stack_page(next_p) + THREAD_SIZE);
/*
* Now maybe reload the debug registers and handle I/O bitmaps
@@ -424,6 +419,34 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
__switch_to_xtra(prev_p, next_p, tss);
+ if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) {
+ /*
+ * AMD CPUs have a misfeature: SYSRET sets the SS selector but
+ * does not update the cached descriptor. As a result, if we
+ * do SYSRET while SS is NULL, we'll end up in user mode with
+ * SS apparently equal to __USER_DS but actually unusable.
+ *
+ * The straightforward workaround would be to fix it up just
+ * before SYSRET, but that would slow down the system call
+ * fast paths. Instead, we ensure that SS is never NULL in
+ * system call context. We do this by replacing NULL SS
+ * selectors at every context switch. SYSCALL sets up a valid
+ * SS, so the only way to get NULL is to re-enter the kernel
+ * from CPL 3 through an interrupt. Since that can't happen
+ * in the same task as a running syscall, we are guaranteed to
+ * context switch between every interrupt vector entry and a
+ * subsequent SYSRET.
+ *
+ * We read SS first because SS reads are much faster than
+ * writes. Out of caution, we force SS to __KERNEL_DS even if
+ * it previously had a different non-NULL value.
+ */
+ unsigned short ss_sel;
+ savesegment(ss, ss_sel);
+ if (ss_sel != __KERNEL_DS)
+ loadsegment(ss, __KERNEL_DS);
+ }
+
return prev_p;
}
@@ -602,6 +625,5 @@ long sys_arch_prctl(int code, unsigned long addr)
unsigned long KSTK_ESP(struct task_struct *task)
{
- return (test_tsk_thread_flag(task, TIF_IA32)) ?
- (task_pt_regs(task)->sp) : ((task)->thread.usersp);
+ return task_pt_regs(task)->sp;
}
OpenPOWER on IntegriCloud