From a79e53d85683c6dd9f99c90511028adc2043031f Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Wed, 16 Feb 2011 15:45:22 -0800 Subject: x86/mm: Fix pgd_lock deadlock It's forbidden to take the page_table_lock with the irq disabled or if there's contention the IPIs (for tlb flushes) sent with the page_table_lock held will never run leading to a deadlock. Nobody takes the pgd_lock from irq context so the _irqsave can be removed. Signed-off-by: Andrea Arcangeli Acked-by: Rik van Riel Tested-by: Konrad Rzeszutek Wilk Signed-off-by: Andrew Morton Cc: Peter Zijlstra Cc: Linus Torvalds Cc: LKML-Reference: <201102162345.p1GNjMjm021738@imap1.linux-foundation.org> Signed-off-by: Ingo Molnar --- arch/x86/mm/pgtable.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'arch/x86/mm/pgtable.c') diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 500242d3c96d..0113d19c8aa6 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -121,14 +121,12 @@ static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd) static void pgd_dtor(pgd_t *pgd) { - unsigned long flags; /* can be called from interrupt context */ - if (SHARED_KERNEL_PMD) return; - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); pgd_list_del(pgd); - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); } /* @@ -260,7 +258,6 @@ pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *pgd; pmd_t *pmds[PREALLOCATED_PMDS]; - unsigned long flags; pgd = (pgd_t *)__get_free_page(PGALLOC_GFP); @@ -280,12 +277,12 @@ pgd_t *pgd_alloc(struct mm_struct *mm) * respect to anything walking the pgd_list, so that they * never see a partially populated pgd. */ - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); pgd_ctor(mm, pgd); pgd_prepopulate_pmd(mm, pgd, pmds); - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); return pgd; -- cgit v1.2.1 From 4981d01eada5354d81c8929d5b2836829ba3df7b Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 16 Mar 2011 11:37:29 +0800 Subject: x86: Flush TLB if PGD entry is changed in i386 PAE mode According to intel CPU manual, every time PGD entry is changed in i386 PAE mode, we need do a full TLB flush. Current code follows this and there is comment for this too in the code. But current code misses the multi-threaded case. A changed page table might be used by several CPUs, every such CPU should flush TLB. Usually this isn't a problem, because we prepopulate all PGD entries at process fork. But when the process does munmap and follows new mmap, this issue will be triggered. When it happens, some CPUs keep doing page faults: http://marc.info/?l=linux-kernel&m=129915020508238&w=2 Reported-by: Yasunori Goto Tested-by: Yasunori Goto Reviewed-by: Rik van Riel Signed-off-by: Shaohua Li Cc: Mallick Asit K Cc: Linus Torvalds Cc: Andrew Morton Cc: linux-mm Cc: stable LKML-Reference: <1300246649.2337.95.camel@sli10-conroe> Signed-off-by: Ingo Molnar --- arch/x86/mm/pgtable.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/mm/pgtable.c') diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 0113d19c8aa6..8573b83a63d0 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -168,8 +168,7 @@ void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd) * section 8.1: in PAE mode we explicitly have to flush the * TLB via cr3 if the top-level pgd is changed... */ - if (mm == current->active_mm) - write_cr3(read_cr3()); + flush_tlb_mm(mm); } #else /* !CONFIG_X86_PAE */ -- cgit v1.2.1