summaryrefslogtreecommitdiffstats
path: root/fs/dax.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/dax.c')
-rw-r--r--fs/dax.c138
1 files changed, 120 insertions, 18 deletions
diff --git a/fs/dax.c b/fs/dax.c
index 897b51e41d8f..f32d7125ad0f 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -226,8 +226,8 @@ static inline void *unlock_slot(struct address_space *mapping, void **slot)
*
* Must be called with the i_pages lock held.
*/
-static void *get_unlocked_mapping_entry(struct address_space *mapping,
- pgoff_t index, void ***slotp)
+static void *__get_unlocked_mapping_entry(struct address_space *mapping,
+ pgoff_t index, void ***slotp, bool (*wait_fn)(void))
{
void *entry, **slot;
struct wait_exceptional_entry_queue ewait;
@@ -237,6 +237,8 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping,
ewait.wait.func = wake_exceptional_entry_func;
for (;;) {
+ bool revalidate;
+
entry = __radix_tree_lookup(&mapping->i_pages, index, NULL,
&slot);
if (!entry ||
@@ -251,14 +253,31 @@ static void *get_unlocked_mapping_entry(struct address_space *mapping,
prepare_to_wait_exclusive(wq, &ewait.wait,
TASK_UNINTERRUPTIBLE);
xa_unlock_irq(&mapping->i_pages);
- schedule();
+ revalidate = wait_fn();
finish_wait(wq, &ewait.wait);
xa_lock_irq(&mapping->i_pages);
+ if (revalidate)
+ return ERR_PTR(-EAGAIN);
}
}
-static void dax_unlock_mapping_entry(struct address_space *mapping,
- pgoff_t index)
+static bool entry_wait(void)
+{
+ schedule();
+ /*
+ * Never return an ERR_PTR() from
+ * __get_unlocked_mapping_entry(), just keep looping.
+ */
+ return false;
+}
+
+static void *get_unlocked_mapping_entry(struct address_space *mapping,
+ pgoff_t index, void ***slotp)
+{
+ return __get_unlocked_mapping_entry(mapping, index, slotp, entry_wait);
+}
+
+static void unlock_mapping_entry(struct address_space *mapping, pgoff_t index)
{
void *entry, **slot;
@@ -277,7 +296,7 @@ static void dax_unlock_mapping_entry(struct address_space *mapping,
static void put_locked_mapping_entry(struct address_space *mapping,
pgoff_t index)
{
- dax_unlock_mapping_entry(mapping, index);
+ unlock_mapping_entry(mapping, index);
}
/*
@@ -319,18 +338,27 @@ static unsigned long dax_radix_end_pfn(void *entry)
for (pfn = dax_radix_pfn(entry); \
pfn < dax_radix_end_pfn(entry); pfn++)
-static void dax_associate_entry(void *entry, struct address_space *mapping)
+/*
+ * TODO: for reflink+dax we need a way to associate a single page with
+ * multiple address_space instances at different linear_page_index()
+ * offsets.
+ */
+static void dax_associate_entry(void *entry, struct address_space *mapping,
+ struct vm_area_struct *vma, unsigned long address)
{
- unsigned long pfn;
+ unsigned long size = dax_entry_size(entry), pfn, index;
+ int i = 0;
if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
return;
+ index = linear_page_index(vma, address & ~(size - 1));
for_each_mapped_pfn(entry, pfn) {
struct page *page = pfn_to_page(pfn);
WARN_ON_ONCE(page->mapping);
page->mapping = mapping;
+ page->index = index + i++;
}
}
@@ -348,6 +376,7 @@ static void dax_disassociate_entry(void *entry, struct address_space *mapping,
WARN_ON_ONCE(trunc && page_ref_count(page) > 1);
WARN_ON_ONCE(page->mapping && page->mapping != mapping);
page->mapping = NULL;
+ page->index = 0;
}
}
@@ -364,6 +393,84 @@ static struct page *dax_busy_page(void *entry)
return NULL;
}
+static bool entry_wait_revalidate(void)
+{
+ rcu_read_unlock();
+ schedule();
+ rcu_read_lock();
+
+ /*
+ * Tell __get_unlocked_mapping_entry() to take a break, we need
+ * to revalidate page->mapping after dropping locks
+ */
+ return true;
+}
+
+bool dax_lock_mapping_entry(struct page *page)
+{
+ pgoff_t index;
+ struct inode *inode;
+ bool did_lock = false;
+ void *entry = NULL, **slot;
+ struct address_space *mapping;
+
+ rcu_read_lock();
+ for (;;) {
+ mapping = READ_ONCE(page->mapping);
+
+ if (!dax_mapping(mapping))
+ break;
+
+ /*
+ * In the device-dax case there's no need to lock, a
+ * struct dev_pagemap pin is sufficient to keep the
+ * inode alive, and we assume we have dev_pagemap pin
+ * otherwise we would not have a valid pfn_to_page()
+ * translation.
+ */
+ inode = mapping->host;
+ if (S_ISCHR(inode->i_mode)) {
+ did_lock = true;
+ break;
+ }
+
+ xa_lock_irq(&mapping->i_pages);
+ if (mapping != page->mapping) {
+ xa_unlock_irq(&mapping->i_pages);
+ continue;
+ }
+ index = page->index;
+
+ entry = __get_unlocked_mapping_entry(mapping, index, &slot,
+ entry_wait_revalidate);
+ if (!entry) {
+ xa_unlock_irq(&mapping->i_pages);
+ break;
+ } else if (IS_ERR(entry)) {
+ WARN_ON_ONCE(PTR_ERR(entry) != -EAGAIN);
+ continue;
+ }
+ lock_slot(mapping, slot);
+ did_lock = true;
+ xa_unlock_irq(&mapping->i_pages);
+ break;
+ }
+ rcu_read_unlock();
+
+ return did_lock;
+}
+
+void dax_unlock_mapping_entry(struct page *page)
+{
+ struct address_space *mapping = page->mapping;
+ struct inode *inode = mapping->host;
+
+ if (S_ISCHR(inode->i_mode))
+ return;
+
+ unlock_mapping_entry(mapping, page->index);
+}
+
/*
* Find radix tree entry at given index. If it points to an exceptional entry,
* return it with the radix tree entry locked. If the radix tree doesn't
@@ -655,7 +762,6 @@ static int copy_user_dax(struct block_device *bdev, struct dax_device *dax_dev,
{
void *vto, *kaddr;
pgoff_t pgoff;
- pfn_t pfn;
long rc;
int id;
@@ -664,7 +770,7 @@ static int copy_user_dax(struct block_device *bdev, struct dax_device *dax_dev,
return rc;
id = dax_read_lock();
- rc = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), &kaddr, &pfn);
+ rc = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), &kaddr, NULL);
if (rc < 0) {
dax_read_unlock(id);
return rc;
@@ -709,7 +815,7 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
new_entry = dax_radix_locked_entry(pfn, flags);
if (dax_entry_size(entry) != dax_entry_size(new_entry)) {
dax_disassociate_entry(entry, mapping, false);
- dax_associate_entry(new_entry, mapping);
+ dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address);
}
if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
@@ -975,7 +1081,6 @@ static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size,
{
const sector_t sector = dax_iomap_sector(iomap, pos);
pgoff_t pgoff;
- void *kaddr;
int id, rc;
long length;
@@ -984,7 +1089,7 @@ static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size,
return rc;
id = dax_read_lock();
length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size),
- &kaddr, pfnp);
+ NULL, pfnp);
if (length < 0) {
rc = length;
goto out;
@@ -1060,15 +1165,13 @@ int __dax_zero_page_range(struct block_device *bdev,
pgoff_t pgoff;
long rc, id;
void *kaddr;
- pfn_t pfn;
rc = bdev_dax_pgoff(bdev, sector, PAGE_SIZE, &pgoff);
if (rc)
return rc;
id = dax_read_lock();
- rc = dax_direct_access(dax_dev, pgoff, 1, &kaddr,
- &pfn);
+ rc = dax_direct_access(dax_dev, pgoff, 1, &kaddr, NULL);
if (rc < 0) {
dax_read_unlock(id);
return rc;
@@ -1124,7 +1227,6 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
ssize_t map_len;
pgoff_t pgoff;
void *kaddr;
- pfn_t pfn;
if (fatal_signal_pending(current)) {
ret = -EINTR;
@@ -1136,7 +1238,7 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
break;
map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size),
- &kaddr, &pfn);
+ &kaddr, NULL);
if (map_len < 0) {
ret = map_len;
break;
OpenPOWER on IntegriCloud