summaryrefslogtreecommitdiffstats
path: root/arch/x86/mm/tlb.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm/tlb.c')
-rw-r--r--arch/x86/mm/tlb.c73
1 files changed, 67 insertions, 6 deletions
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index a1561957dccb..8dcc0607f805 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -6,13 +6,14 @@
#include <linux/interrupt.h>
#include <linux/export.h>
#include <linux/cpu.h>
+#include <linux/debugfs.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
+#include <asm/nospec-branch.h>
#include <asm/cache.h>
#include <asm/apic.h>
#include <asm/uv/uv.h>
-#include <linux/debugfs.h>
/*
* TLB flushing, formerly SMP-only
@@ -151,6 +152,34 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
local_irq_restore(flags);
}
+static void sync_current_stack_to_mm(struct mm_struct *mm)
+{
+ unsigned long sp = current_stack_pointer;
+ pgd_t *pgd = pgd_offset(mm, sp);
+
+ if (CONFIG_PGTABLE_LEVELS > 4) {
+ if (unlikely(pgd_none(*pgd))) {
+ pgd_t *pgd_ref = pgd_offset_k(sp);
+
+ set_pgd(pgd, *pgd_ref);
+ }
+ } else {
+ /*
+ * "pgd" is faked. The top level entries are "p4d"s, so sync
+ * the p4d. This compiles to approximately the same code as
+ * the 5-level case.
+ */
+ p4d_t *p4d = p4d_offset(pgd, sp);
+
+ if (unlikely(p4d_none(*p4d))) {
+ pgd_t *pgd_ref = pgd_offset_k(sp);
+ p4d_t *p4d_ref = p4d_offset(pgd_ref, sp);
+
+ set_p4d(p4d, *p4d_ref);
+ }
+ }
+}
+
void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
@@ -200,6 +229,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
#endif
this_cpu_write(cpu_tlbstate.is_lazy, false);
+ /*
+ * The membarrier system call requires a full memory barrier and
+ * core serialization before returning to user-space, after
+ * storing to rq->curr. Writing to CR3 provides that full
+ * memory barrier and core serializing instruction.
+ */
if (real_prev == next) {
VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
next->context.ctx_id);
@@ -219,6 +254,27 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
} else {
u16 new_asid;
bool need_flush;
+ u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id);
+
+ /*
+ * Avoid user/user BTB poisoning by flushing the branch
+ * predictor when switching between processes. This stops
+ * one process from doing Spectre-v2 attacks on another.
+ *
+ * As an optimization, flush indirect branches only when
+ * switching into processes that disable dumping. This
+ * protects high value processes like gpg, without having
+ * too high performance overhead. IBPB is *expensive*!
+ *
+ * This will not flush branches when switching into kernel
+ * threads. It will also not flush if we switch to idle
+ * thread and back to the same process. It will flush if we
+ * switch to a different non-dumpable process.
+ */
+ if (tsk && tsk->mm &&
+ tsk->mm->context.ctx_id != last_ctx_id &&
+ get_dumpable(tsk->mm) != SUID_DUMP_USER)
+ indirect_branch_prediction_barrier();
if (IS_ENABLED(CONFIG_VMAP_STACK)) {
/*
@@ -226,11 +282,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
* mapped in the new pgd, we'll double-fault. Forcibly
* map it.
*/
- unsigned int index = pgd_index(current_stack_pointer);
- pgd_t *pgd = next->pgd + index;
-
- if (unlikely(pgd_none(*pgd)))
- set_pgd(pgd, init_mm.pgd[index]);
+ sync_current_stack_to_mm(next);
}
/* Stop remote flushes for the previous mm */
@@ -268,6 +320,14 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
}
+ /*
+ * Record last user mm's context id, so we can avoid
+ * flushing branch buffer with IBPB if we switch back
+ * to the same user.
+ */
+ if (next != &init_mm)
+ this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id);
+
this_cpu_write(cpu_tlbstate.loaded_mm, next);
this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);
}
@@ -345,6 +405,7 @@ void initialize_tlbstate_and_flush(void)
write_cr3(build_cr3(mm->pgd, 0));
/* Reinitialize tlbstate. */
+ this_cpu_write(cpu_tlbstate.last_ctx_id, mm->context.ctx_id);
this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
this_cpu_write(cpu_tlbstate.next_asid, 1);
this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
OpenPOWER on IntegriCloud