summaryrefslogtreecommitdiffstats
path: root/include/linux/swap.h
diff options
context:
space:
mode:
authorJérôme Glisse <jglisse@redhat.com>2017-09-08 16:11:43 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-09-08 18:26:46 -0700
commit5042db43cc26f51eed51c56192e2c2317e44315f (patch)
treed4115135f5aad384fb173537a2c397170a86f29a /include/linux/swap.h
parent3072e413e305e353cd4654f8a57d953b66e85bf3 (diff)
downloadtalos-obmc-linux-5042db43cc26f51eed51c56192e2c2317e44315f.tar.gz
talos-obmc-linux-5042db43cc26f51eed51c56192e2c2317e44315f.zip
mm/ZONE_DEVICE: new type of ZONE_DEVICE for unaddressable memory
HMM (heterogeneous memory management) need struct page to support migration from system main memory to device memory. Reasons for HMM and migration to device memory is explained with HMM core patch. This patch deals with device memory that is un-addressable memory (ie CPU can not access it). Hence we do not want those struct page to be manage like regular memory. That is why we extend ZONE_DEVICE to support different types of memory. A persistent memory type is define for existing user of ZONE_DEVICE and a new device un-addressable type is added for the un-addressable memory type. There is a clear separation between what is expected from each memory type and existing user of ZONE_DEVICE are un-affected by new requirement and new use of the un-addressable type. All specific code path are protect with test against the memory type. Because memory is un-addressable we use a new special swap type for when a page is migrated to device memory (this reduces the number of maximum swap file). The main two additions beside memory type to ZONE_DEVICE is two callbacks. First one, page_free() is call whenever page refcount reach 1 (which means the page is free as ZONE_DEVICE page never reach a refcount of 0). This allow device driver to manage its memory and associated struct page. The second callback page_fault() happens when there is a CPU access to an address that is back by a device page (which are un-addressable by the CPU). This callback is responsible to migrate the page back to system main memory. Device driver can not block migration back to system memory, HMM make sure that such page can not be pin into device memory. If device is in some error condition and can not migrate memory back then a CPU page fault to device memory should end with SIGBUS. [arnd@arndb.de: fix warning] Link: http://lkml.kernel.org/r/20170823133213.712917-1-arnd@arndb.de Link: http://lkml.kernel.org/r/20170817000548.32038-8-jglisse@redhat.com Signed-off-by: Jérôme Glisse <jglisse@redhat.com> Signed-off-by: Arnd Bergmann <arnd@arndb.de> Acked-by: Dan Williams <dan.j.williams@intel.com> Cc: Ross Zwisler <ross.zwisler@linux.intel.com> Cc: Aneesh Kumar <aneesh.kumar@linux.vnet.ibm.com> Cc: Balbir Singh <bsingharora@gmail.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: David Nellans <dnellans@nvidia.com> Cc: Evgeny Baskakov <ebaskakov@nvidia.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: John Hubbard <jhubbard@nvidia.com> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Mark Hairgrove <mhairgrove@nvidia.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Sherry Cheung <SCheung@nvidia.com> Cc: Subhash Gutti <sgutti@nvidia.com> Cc: Vladimir Davydov <vdavydov.dev@gmail.com> Cc: Bob Liu <liubo95@huawei.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include/linux/swap.h')
-rw-r--r--include/linux/swap.h24
1 files changed, 21 insertions, 3 deletions
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 8bf3487fb204..8a807292037f 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -51,6 +51,23 @@ static inline int current_is_kswapd(void)
*/
/*
+ * Unaddressable device memory support. See include/linux/hmm.h and
+ * Documentation/vm/hmm.txt. Short description is we need struct pages for
+ * device memory that is unaddressable (inaccessible) by CPU, so that we can
+ * migrate part of a process memory to device memory.
+ *
+ * When a page is migrated from CPU to device, we set the CPU page table entry
+ * to a special SWP_DEVICE_* entry.
+ */
+#ifdef CONFIG_DEVICE_PRIVATE
+#define SWP_DEVICE_NUM 2
+#define SWP_DEVICE_WRITE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM)
+#define SWP_DEVICE_READ (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+1)
+#else
+#define SWP_DEVICE_NUM 0
+#endif
+
+/*
* NUMA node memory migration support
*/
#ifdef CONFIG_MIGRATION
@@ -72,7 +89,8 @@ static inline int current_is_kswapd(void)
#endif
#define MAX_SWAPFILES \
- ((1 << MAX_SWAPFILES_SHIFT) - SWP_MIGRATION_NUM - SWP_HWPOISON_NUM)
+ ((1 << MAX_SWAPFILES_SHIFT) - SWP_DEVICE_NUM - \
+ SWP_MIGRATION_NUM - SWP_HWPOISON_NUM)
/*
* Magic header for a swap area. The first part of the union is
@@ -469,8 +487,8 @@ static inline void show_swap_cache_info(void)
{
}
-#define free_swap_and_cache(swp) is_migration_entry(swp)
-#define swapcache_prepare(swp) is_migration_entry(swp)
+#define free_swap_and_cache(e) ({(is_migration_entry(e) || is_device_private_entry(e));})
+#define swapcache_prepare(e) ({(is_migration_entry(e) || is_device_private_entry(e));})
static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask)
{
OpenPOWER on IntegriCloud