diff options
Diffstat (limited to 'fs/iomap/direct-io.c')
-rw-r--r-- | fs/iomap/direct-io.c | 85 |
1 files changed, 49 insertions, 36 deletions
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index 10517cea9682..23837926c0c5 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -24,7 +24,7 @@ struct iomap_dio { struct kiocb *iocb; - iomap_dio_end_io_t *end_io; + const struct iomap_dio_ops *dops; loff_t i_size; loff_t size; atomic_t ref; @@ -72,18 +72,14 @@ static void iomap_dio_submit_bio(struct iomap_dio *dio, struct iomap *iomap, static ssize_t iomap_dio_complete(struct iomap_dio *dio) { + const struct iomap_dio_ops *dops = dio->dops; struct kiocb *iocb = dio->iocb; struct inode *inode = file_inode(iocb->ki_filp); loff_t offset = iocb->ki_pos; - ssize_t ret; + ssize_t ret = dio->error; - if (dio->end_io) { - ret = dio->end_io(iocb, - dio->error ? dio->error : dio->size, - dio->flags); - } else { - ret = dio->error; - } + if (dops && dops->end_io) + ret = dops->end_io(iocb, dio->size, ret, dio->flags); if (likely(!ret)) { ret = dio->size; @@ -101,9 +97,9 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio) * one is a pretty crazy thing to do, so we don't support it 100%. If * this invalidation fails, tough, the write still worked... * - * And this page cache invalidation has to be after dio->end_io(), as - * some filesystems convert unwritten extents to real allocations in - * end_io() when necessary, otherwise a racing buffer read would cache + * And this page cache invalidation has to be after ->end_io(), as some + * filesystems convert unwritten extents to real allocations in + * ->end_io() when necessary, otherwise a racing buffer read would cache * zeros from unwritten extents. */ if (!dio->error && @@ -205,12 +201,12 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length, unsigned int blkbits = blksize_bits(bdev_logical_block_size(iomap->bdev)); unsigned int fs_block_size = i_blocksize(inode), pad; unsigned int align = iov_iter_alignment(dio->submit.iter); - struct iov_iter iter; struct bio *bio; bool need_zeroout = false; bool use_fua = false; int nr_pages, ret = 0; size_t copied = 0; + size_t orig_count; if ((pos | length | align) & ((1 << blkbits) - 1)) return -EINVAL; @@ -240,15 +236,18 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length, } /* - * Operate on a partial iter trimmed to the extent we were called for. - * We'll update the iter in the dio once we're done with this extent. + * Save the original count and trim the iter to just the extent we + * are operating on right now. The iter will be re-expanded once + * we are done. */ - iter = *dio->submit.iter; - iov_iter_truncate(&iter, length); + orig_count = iov_iter_count(dio->submit.iter); + iov_iter_truncate(dio->submit.iter, length); - nr_pages = iov_iter_npages(&iter, BIO_MAX_PAGES); - if (nr_pages <= 0) - return nr_pages; + nr_pages = iov_iter_npages(dio->submit.iter, BIO_MAX_PAGES); + if (nr_pages <= 0) { + ret = nr_pages; + goto out; + } if (need_zeroout) { /* zero out from the start of the block to the write offset */ @@ -261,7 +260,8 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length, size_t n; if (dio->error) { iov_iter_revert(dio->submit.iter, copied); - return 0; + copied = ret = 0; + goto out; } bio = bio_alloc(GFP_KERNEL, nr_pages); @@ -272,7 +272,7 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length, bio->bi_private = dio; bio->bi_end_io = iomap_dio_bio_end_io; - ret = bio_iov_iter_get_pages(bio, &iter); + ret = bio_iov_iter_get_pages(bio, dio->submit.iter); if (unlikely(ret)) { /* * We have to stop part way through an IO. We must fall @@ -298,13 +298,11 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length, bio_set_pages_dirty(bio); } - iov_iter_advance(dio->submit.iter, n); - dio->size += n; pos += n; copied += n; - nr_pages = iov_iter_npages(&iter, BIO_MAX_PAGES); + nr_pages = iov_iter_npages(dio->submit.iter, BIO_MAX_PAGES); iomap_dio_submit_bio(dio, iomap, bio); } while (nr_pages); @@ -322,7 +320,12 @@ zero_tail: if (pad) iomap_dio_zero(dio, iomap, pos, fs_block_size - pad); } - return copied ? copied : ret; +out: + /* Undo iter limitation to current extent */ + iov_iter_reexpand(dio->submit.iter, orig_count - copied); + if (copied) + return copied; + return ret; } static loff_t @@ -362,7 +365,7 @@ iomap_dio_inline_actor(struct inode *inode, loff_t pos, loff_t length, static loff_t iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, - void *data, struct iomap *iomap) + void *data, struct iomap *iomap, struct iomap *srcmap) { struct iomap_dio *dio = data; @@ -396,15 +399,15 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, */ ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, - const struct iomap_ops *ops, iomap_dio_end_io_t end_io) + const struct iomap_ops *ops, const struct iomap_dio_ops *dops, + bool wait_for_completion) { struct address_space *mapping = iocb->ki_filp->f_mapping; struct inode *inode = file_inode(iocb->ki_filp); size_t count = iov_iter_count(iter); - loff_t pos = iocb->ki_pos, start = pos; + loff_t pos = iocb->ki_pos; loff_t end = iocb->ki_pos + count - 1, ret = 0; unsigned int flags = IOMAP_DIRECT; - bool wait_for_completion = is_sync_kiocb(iocb); struct blk_plug plug; struct iomap_dio *dio; @@ -413,6 +416,9 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, if (!count) return 0; + if (WARN_ON(is_sync_kiocb(iocb) && !wait_for_completion)) + return -EIO; + dio = kmalloc(sizeof(*dio), GFP_KERNEL); if (!dio) return -ENOMEM; @@ -421,7 +427,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, atomic_set(&dio->ref, 1); dio->size = 0; dio->i_size = i_size_read(inode); - dio->end_io = end_io; + dio->dops = dops; dio->error = 0; dio->flags = 0; @@ -434,7 +440,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, if (pos >= dio->i_size) goto out_free_dio; - if (iter_is_iovec(iter) && iov_iter_rw(iter) == READ) + if (iter_is_iovec(iter)) dio->flags |= IOMAP_DIO_DIRTY; } else { flags |= IOMAP_WRITE; @@ -455,14 +461,14 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, } if (iocb->ki_flags & IOCB_NOWAIT) { - if (filemap_range_has_page(mapping, start, end)) { + if (filemap_range_has_page(mapping, pos, end)) { ret = -EAGAIN; goto out_free_dio; } flags |= IOMAP_NOWAIT; } - ret = filemap_write_and_wait_range(mapping, start, end); + ret = filemap_write_and_wait_range(mapping, pos, end); if (ret) goto out_free_dio; @@ -473,7 +479,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, * pretty crazy thing to do, so we don't support it 100%. */ ret = invalidate_inode_pages2_range(mapping, - start >> PAGE_SHIFT, end >> PAGE_SHIFT); + pos >> PAGE_SHIFT, end >> PAGE_SHIFT); if (ret) dio_warn_stale_pagecache(iocb->ki_filp); ret = 0; @@ -501,8 +507,15 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, } pos += ret; - if (iov_iter_rw(iter) == READ && pos >= dio->i_size) + if (iov_iter_rw(iter) == READ && pos >= dio->i_size) { + /* + * We only report that we've read data up to i_size. + * Revert iter to a state corresponding to that as + * some callers (such as splice code) rely on it. + */ + iov_iter_revert(iter, pos - dio->i_size); break; + } } while ((count = iov_iter_count(iter)) > 0); blk_finish_plug(&plug); |