From 1b17844b29ae042576bea588164f2f1e9590a8bc Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 22 Apr 2014 13:49:40 -0700
Subject: mm: make fixup_user_fault() check the vma access rights too

fixup_user_fault() is used by the futex code when the direct user access
fails, and the futex code wants it to either map in the page in a usable
form or return an error.  It relied on handle_mm_fault() to map the
page, and correctly checked the error return from that, but while that
does map the page, it doesn't actually guarantee that the page will be
mapped with sufficient permissions to be then accessed.

So do the appropriate tests of the vma access rights by hand.

[ Side note: arguably handle_mm_fault() could just do that itself, but
  we have traditionally done it in the caller, because some callers -
  notably get_user_pages() - have been able to access pages even when
  they are mapped with PROT_NONE.  Maybe we should re-visit that design
  decision, but in the meantime this is the minimal patch. ]

Found by Dave Jones running his trinity tool.

Reported-by: Dave Jones <davej@redhat.com>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: stable@vger.kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'mm/memory.c')

diff --git a/mm/memory.c b/mm/memory.c
index d0f0bef3be48..93e332d5ed77 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1955,12 +1955,17 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm,
 		     unsigned long address, unsigned int fault_flags)
 {
 	struct vm_area_struct *vma;
+	vm_flags_t vm_flags;
 	int ret;
 
 	vma = find_extend_vma(mm, address);
 	if (!vma || address < vma->vm_start)
 		return -EFAULT;
 
+	vm_flags = (fault_flags & FAULT_FLAG_WRITE) ? VM_WRITE : VM_READ;
+	if (!(vm_flags & vma->vm_flags))
+		return -EFAULT;
+
 	ret = handle_mm_fault(mm, vma, address, fault_flags);
 	if (ret & VM_FAULT_ERROR) {
 		if (ret & VM_FAULT_OOM)
-- 
cgit v1.2.1


From 1cf35d47712dd5dc4d62c6ce984f04ac6eab0408 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 25 Apr 2014 16:05:40 -0700
Subject: mm: split 'tlb_flush_mmu()' into tlb flushing and memory freeing
 parts

The mmu-gather operation 'tlb_flush_mmu()' has done two things: the
actual tlb flush operation, and the batched freeing of the pages that
the TLB entries pointed at.

This splits the operation into separate phases, so that the forced
batched flushing done by zap_pte_range() can now do the actual TLB flush
while still holding the page table lock, but delay the batched freeing
of all the pages to after the lock has been dropped.

This in turn allows us to avoid a race condition between
set_page_dirty() (as called by zap_pte_range() when it finds a dirty
shared memory pte) and page_mkclean(): because we now flush all the
dirty page data from the TLB's while holding the pte lock,
page_mkclean() will be held up walking the (recently cleaned) page
tables until after the TLB entries have been flushed from all CPU's.

Reported-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Tested-by: Dave Hansen <dave.hansen@intel.com>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Russell King - ARM Linux <linux@arm.linux.org.uk>
Cc: Tony Luck <tony.luck@intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory.c | 53 ++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 34 insertions(+), 19 deletions(-)

(limited to 'mm/memory.c')

diff --git a/mm/memory.c b/mm/memory.c
index 93e332d5ed77..037b812a9531 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -232,17 +232,18 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long
 #endif
 }
 
-void tlb_flush_mmu(struct mmu_gather *tlb)
+static void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
 {
-	struct mmu_gather_batch *batch;
-
-	if (!tlb->need_flush)
-		return;
 	tlb->need_flush = 0;
 	tlb_flush(tlb);
 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
 	tlb_table_flush(tlb);
 #endif
+}
+
+static void tlb_flush_mmu_free(struct mmu_gather *tlb)
+{
+	struct mmu_gather_batch *batch;
 
 	for (batch = &tlb->local; batch; batch = batch->next) {
 		free_pages_and_swap_cache(batch->pages, batch->nr);
@@ -251,6 +252,14 @@ void tlb_flush_mmu(struct mmu_gather *tlb)
 	tlb->active = &tlb->local;
 }
 
+void tlb_flush_mmu(struct mmu_gather *tlb)
+{
+	if (!tlb->need_flush)
+		return;
+	tlb_flush_mmu_tlbonly(tlb);
+	tlb_flush_mmu_free(tlb);
+}
+
 /* tlb_finish_mmu
  *	Called at the end of the shootdown operation to free up any resources
  *	that were required.
@@ -1127,8 +1136,10 @@ again:
 			if (PageAnon(page))
 				rss[MM_ANONPAGES]--;
 			else {
-				if (pte_dirty(ptent))
+				if (pte_dirty(ptent)) {
+					force_flush = 1;
 					set_page_dirty(page);
+				}
 				if (pte_young(ptent) &&
 				    likely(!(vma->vm_flags & VM_SEQ_READ)))
 					mark_page_accessed(page);
@@ -1137,9 +1148,10 @@ again:
 			page_remove_rmap(page);
 			if (unlikely(page_mapcount(page) < 0))
 				print_bad_pte(vma, addr, ptent, page);
-			force_flush = !__tlb_remove_page(tlb, page);
-			if (force_flush)
+			if (unlikely(!__tlb_remove_page(tlb, page))) {
+				force_flush = 1;
 				break;
+			}
 			continue;
 		}
 		/*
@@ -1174,18 +1186,11 @@ again:
 
 	add_mm_rss_vec(mm, rss);
 	arch_leave_lazy_mmu_mode();
-	pte_unmap_unlock(start_pte, ptl);
 
-	/*
-	 * mmu_gather ran out of room to batch pages, we break out of
-	 * the PTE lock to avoid doing the potential expensive TLB invalidate
-	 * and page-free while holding it.
-	 */
+	/* Do the actual TLB flush before dropping ptl */
 	if (force_flush) {
 		unsigned long old_end;
 
-		force_flush = 0;
-
 		/*
 		 * Flush the TLB just for the previous segment,
 		 * then update the range to be the remaining
@@ -1193,11 +1198,21 @@ again:
 		 */
 		old_end = tlb->end;
 		tlb->end = addr;
-
-		tlb_flush_mmu(tlb);
-
+		tlb_flush_mmu_tlbonly(tlb);
 		tlb->start = addr;
 		tlb->end = old_end;
+	}
+	pte_unmap_unlock(start_pte, ptl);
+
+	/*
+	 * If we forced a TLB flush (either due to running out of
+	 * batch buffers or because we needed to flush dirty TLB
+	 * entries before releasing the ptl), free the batched
+	 * memory too. Restart if we didn't do everything.
+	 */
+	if (force_flush) {
+		force_flush = 0;
+		tlb_flush_mmu_free(tlb);
 
 		if (addr != end)
 			goto again;
-- 
cgit v1.2.1