summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorNaoya Horiguchi <n-horiguchi@ah.jp.nec.com>2010-05-28 09:29:18 +0900
committerAndi Kleen <ak@linux.intel.com>2010-08-11 09:22:12 +0200
commit7013febc8940960eaaba039bac0f80910f679ce1 (patch)
tree27a61a69ae8c1df5149ba9ac98cc573e8bf19b95 /mm
parent7af446a841a264a1a9675001005b29ce01d1fc57 (diff)
downloadtalos-op-linux-7013febc8940960eaaba039bac0f80910f679ce1.tar.gz
talos-op-linux-7013febc8940960eaaba039bac0f80910f679ce1.zip
HWPOISON, hugetlb: set/clear PG_hwpoison bits on hugepage
To avoid race condition between concurrent memory errors on identified hugepage, we atomically test and set PG_hwpoison bit on the head page. All pages in the error hugepage are considered as hwpoisoned for now, so set and clear all PG_hwpoison bits in the hugepage with page lock of the head page held. Dependency: "HWPOISON, hugetlb: enable error handling path for hugepage" Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Cc: Andrew Morton <akpm@linux-foundation.org> Acked-by: Fengguang Wu <fengguang.wu@intel.com> Signed-off-by: Andi Kleen <ak@linux.intel.com>
Diffstat (limited to 'mm')
-rw-r--r--mm/memory-failure.c38
1 files changed, 38 insertions, 0 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 1ec68c80788e..fee648b9d393 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -920,6 +920,22 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
return ret;
}
+static void set_page_hwpoison_huge_page(struct page *hpage)
+{
+ int i;
+ int nr_pages = 1 << compound_order(hpage);
+ for (i = 0; i < nr_pages; i++)
+ SetPageHWPoison(hpage + i);
+}
+
+static void clear_page_hwpoison_huge_page(struct page *hpage)
+{
+ int i;
+ int nr_pages = 1 << compound_order(hpage);
+ for (i = 0; i < nr_pages; i++)
+ ClearPageHWPoison(hpage + i);
+}
+
int __memory_failure(unsigned long pfn, int trapno, int flags)
{
struct page_state *ps;
@@ -1014,6 +1030,26 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
return 0;
}
+ /*
+ * For error on the tail page, we should set PG_hwpoison
+ * on the head page to show that the hugepage is hwpoisoned
+ */
+ if (PageTail(p) && TestSetPageHWPoison(hpage)) {
+ action_result(pfn, "hugepage already hardware poisoned",
+ IGNORED);
+ unlock_page(hpage);
+ put_page(hpage);
+ return 0;
+ }
+ /*
+ * Set PG_hwpoison on all pages in an error hugepage,
+ * because containment is done in hugepage unit for now.
+ * Since we have done TestSetPageHWPoison() for the head page with
+ * page lock held, we can safely set PG_hwpoison bits on tail pages.
+ */
+ if (PageHuge(p))
+ set_page_hwpoison_huge_page(hpage);
+
wait_on_page_writeback(p);
/*
@@ -1118,6 +1154,8 @@ int unpoison_memory(unsigned long pfn)
atomic_long_dec(&mce_bad_pages);
freeit = 1;
}
+ if (PageHuge(p))
+ clear_page_hwpoison_huge_page(page);
unlock_page(page);
put_page(page);
OpenPOWER on IntegriCloud