diff options
Diffstat (limited to 'fs/fuse/file.c')
-rw-r--r-- | fs/fuse/file.c | 542 |
1 files changed, 363 insertions, 179 deletions
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 7e70506297bc..caa8d95b24e8 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -127,7 +127,15 @@ static void fuse_file_put(struct fuse_file *ff, bool sync) if (atomic_dec_and_test(&ff->count)) { struct fuse_req *req = ff->reserved_req; - if (sync) { + if (ff->fc->no_open) { + /* + * Drop the release request when client does not + * implement 'open' + */ + req->background = 0; + path_put(&req->misc.release.path); + fuse_put_request(ff->fc, req); + } else if (sync) { req->background = 0; fuse_request_send(ff->fc, req); path_put(&req->misc.release.path); @@ -144,33 +152,58 @@ static void fuse_file_put(struct fuse_file *ff, bool sync) int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, bool isdir) { - struct fuse_open_out outarg; struct fuse_file *ff; - int err; int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN; ff = fuse_file_alloc(fc); if (!ff) return -ENOMEM; - err = fuse_send_open(fc, nodeid, file, opcode, &outarg); - if (err) { - fuse_file_free(ff); - return err; + ff->fh = 0; + ff->open_flags = FOPEN_KEEP_CACHE; /* Default for no-open */ + if (!fc->no_open || isdir) { + struct fuse_open_out outarg; + int err; + + err = fuse_send_open(fc, nodeid, file, opcode, &outarg); + if (!err) { + ff->fh = outarg.fh; + ff->open_flags = outarg.open_flags; + + } else if (err != -ENOSYS || isdir) { + fuse_file_free(ff); + return err; + } else { + fc->no_open = 1; + } } if (isdir) - outarg.open_flags &= ~FOPEN_DIRECT_IO; + ff->open_flags &= ~FOPEN_DIRECT_IO; - ff->fh = outarg.fh; ff->nodeid = nodeid; - ff->open_flags = outarg.open_flags; file->private_data = fuse_file_get(ff); return 0; } EXPORT_SYMBOL_GPL(fuse_do_open); +static void fuse_link_write_file(struct file *file) +{ + struct inode *inode = file_inode(file); + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); + struct fuse_file *ff = file->private_data; + /* + * file may be written through mmap, so chain it onto the + * inodes's write_file list + */ + spin_lock(&fc->lock); + if (list_empty(&ff->write_entry)) + list_add(&ff->write_entry, &fi->write_files); + spin_unlock(&fc->lock); +} + void fuse_finish_open(struct inode *inode, struct file *file) { struct fuse_file *ff = file->private_data; @@ -190,25 +223,37 @@ void fuse_finish_open(struct inode *inode, struct file *file) i_size_write(inode, 0); spin_unlock(&fc->lock); fuse_invalidate_attr(inode); + if (fc->writeback_cache) + file_update_time(file); } + if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache) + fuse_link_write_file(file); } int fuse_open_common(struct inode *inode, struct file *file, bool isdir) { struct fuse_conn *fc = get_fuse_conn(inode); int err; + bool lock_inode = (file->f_flags & O_TRUNC) && + fc->atomic_o_trunc && + fc->writeback_cache; err = generic_file_open(inode, file); if (err) return err; + if (lock_inode) + mutex_lock(&inode->i_mutex); + err = fuse_do_open(fc, get_node_id(inode), file, isdir); - if (err) - return err; - fuse_finish_open(inode, file); + if (!err) + fuse_finish_open(inode, file); - return 0; + if (lock_inode) + mutex_unlock(&inode->i_mutex); + + return err; } static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode) @@ -275,6 +320,12 @@ static int fuse_open(struct inode *inode, struct file *file) static int fuse_release(struct inode *inode, struct file *file) { + struct fuse_conn *fc = get_fuse_conn(inode); + + /* see fuse_vma_close() for !writeback_cache case */ + if (fc->writeback_cache) + write_inode_now(inode, 1); + fuse_release_common(file, FUSE_RELEASE); /* return value is ignored by VFS */ @@ -316,12 +367,13 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id) } /* - * Check if page is under writeback + * Check if any page in a range is under writeback * * This is currently done by walking the list of writepage requests * for the inode, which can be pretty inefficient. */ -static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index) +static bool fuse_range_is_writeback(struct inode *inode, pgoff_t idx_from, + pgoff_t idx_to) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); @@ -334,8 +386,8 @@ static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index) BUG_ON(req->inode != inode); curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT; - if (curr_index <= index && - index < curr_index + req->num_pages) { + if (idx_from < curr_index + req->num_pages && + curr_index <= idx_to) { found = true; break; } @@ -345,6 +397,11 @@ static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index) return found; } +static inline bool fuse_page_is_writeback(struct inode *inode, pgoff_t index) +{ + return fuse_range_is_writeback(inode, index, index); +} + /* * Wait for page writeback to be completed. * @@ -359,6 +416,21 @@ static int fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index) return 0; } +/* + * Wait for all pending writepages on the inode to finish. + * + * This is currently done by blocking further writes with FUSE_NOWRITE + * and waiting for all sent writes to complete. + * + * This must be called under i_mutex, otherwise the FUSE_NOWRITE usage + * could conflict with truncation. + */ +static void fuse_sync_writes(struct inode *inode) +{ + fuse_set_nowrite(inode); + fuse_release_nowrite(inode); +} + static int fuse_flush(struct file *file, fl_owner_t id) { struct inode *inode = file_inode(file); @@ -374,6 +446,14 @@ static int fuse_flush(struct file *file, fl_owner_t id) if (fc->no_flush) return 0; + err = write_inode_now(inode, 1); + if (err) + return err; + + mutex_lock(&inode->i_mutex); + fuse_sync_writes(inode); + mutex_unlock(&inode->i_mutex); + req = fuse_get_req_nofail_nopages(fc, file); memset(&inarg, 0, sizeof(inarg)); inarg.fh = ff->fh; @@ -394,21 +474,6 @@ static int fuse_flush(struct file *file, fl_owner_t id) return err; } -/* - * Wait for all pending writepages on the inode to finish. - * - * This is currently done by blocking further writes with FUSE_NOWRITE - * and waiting for all sent writes to complete. - * - * This must be called under i_mutex, otherwise the FUSE_NOWRITE usage - * could conflict with truncation. - */ -static void fuse_sync_writes(struct inode *inode) -{ - fuse_set_nowrite(inode); - fuse_release_nowrite(inode); -} - int fuse_fsync_common(struct file *file, loff_t start, loff_t end, int datasync, int isdir) { @@ -422,13 +487,6 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end, if (is_bad_inode(inode)) return -EIO; - err = filemap_write_and_wait_range(inode->i_mapping, start, end); - if (err) - return err; - - if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir)) - return 0; - mutex_lock(&inode->i_mutex); /* @@ -436,11 +494,17 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end, * wait for all outstanding writes, before sending the FSYNC * request. */ - err = write_inode_now(inode, 0); + err = filemap_write_and_wait_range(inode->i_mapping, start, end); if (err) goto out; fuse_sync_writes(inode); + err = sync_inode_metadata(inode, 1); + if (err) + goto out; + + if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir)) + goto out; req = fuse_get_req_nopages(fc); if (IS_ERR(req)) { @@ -638,7 +702,33 @@ static void fuse_read_update_size(struct inode *inode, loff_t size, spin_unlock(&fc->lock); } -static int fuse_readpage(struct file *file, struct page *page) +static void fuse_short_read(struct fuse_req *req, struct inode *inode, + u64 attr_ver) +{ + size_t num_read = req->out.args[0].size; + struct fuse_conn *fc = get_fuse_conn(inode); + + if (fc->writeback_cache) { + /* + * A hole in a file. Some data after the hole are in page cache, + * but have not reached the client fs yet. So, the hole is not + * present there. + */ + int i; + int start_idx = num_read >> PAGE_CACHE_SHIFT; + size_t off = num_read & (PAGE_CACHE_SIZE - 1); + + for (i = start_idx; i < req->num_pages; i++) { + zero_user_segment(req->pages[i], off, PAGE_CACHE_SIZE); + off = 0; + } + } else { + loff_t pos = page_offset(req->pages[0]) + num_read; + fuse_read_update_size(inode, pos, attr_ver); + } +} + +static int fuse_do_readpage(struct file *file, struct page *page) { struct fuse_io_priv io = { .async = 0, .file = file }; struct inode *inode = page->mapping->host; @@ -650,10 +740,6 @@ static int fuse_readpage(struct file *file, struct page *page) u64 attr_ver; int err; - err = -EIO; - if (is_bad_inode(inode)) - goto out; - /* * Page writeback can extend beyond the lifetime of the * page-cache page, so make sure we read a properly synced @@ -662,9 +748,8 @@ static int fuse_readpage(struct file *file, struct page *page) fuse_wait_on_page_writeback(inode, page->index); req = fuse_get_req(fc, 1); - err = PTR_ERR(req); if (IS_ERR(req)) - goto out; + return PTR_ERR(req); attr_ver = fuse_get_attr_version(fc); @@ -675,19 +760,33 @@ static int fuse_readpage(struct file *file, struct page *page) req->page_descs[0].length = count; num_read = fuse_send_read(req, &io, pos, count, NULL); err = req->out.h.error; - fuse_put_request(fc, req); if (!err) { /* * Short read means EOF. If file size is larger, truncate it */ if (num_read < count) - fuse_read_update_size(inode, pos + num_read, attr_ver); + fuse_short_read(req, inode, attr_ver); SetPageUptodate(page); } - fuse_invalidate_attr(inode); /* atime changed */ + fuse_put_request(fc, req); + + return err; +} + +static int fuse_readpage(struct file *file, struct page *page) +{ + struct inode *inode = page->mapping->host; + int err; + + err = -EIO; + if (is_bad_inode(inode)) + goto out; + + err = fuse_do_readpage(file, page); + fuse_invalidate_atime(inode); out: unlock_page(page); return err; @@ -709,14 +808,10 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req) /* * Short read means EOF. If file size is larger, truncate it */ - if (!req->out.h.error && num_read < count) { - loff_t pos; + if (!req->out.h.error && num_read < count) + fuse_short_read(req, inode, req->misc.read.attr_ver); - pos = page_offset(req->pages[0]) + num_read; - fuse_read_update_size(inode, pos, - req->misc.read.attr_ver); - } - fuse_invalidate_attr(inode); /* atime changed */ + fuse_invalidate_atime(inode); } for (i = 0; i < req->num_pages; i++) { @@ -838,8 +933,7 @@ out: return err; } -static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct inode *inode = iocb->ki_filp->f_mapping->host; struct fuse_conn *fc = get_fuse_conn(inode); @@ -850,14 +944,14 @@ static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov, * i_size is up to date). */ if (fc->auto_inval_data || - (pos + iov_length(iov, nr_segs) > i_size_read(inode))) { + (iocb->ki_pos + iov_iter_count(to) > i_size_read(inode))) { int err; err = fuse_update_attributes(inode, NULL, iocb->ki_filp, NULL); if (err) return err; } - return generic_file_aio_read(iocb, iov, nr_segs, pos); + return generic_file_read_iter(iocb, to); } static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff, @@ -905,16 +999,21 @@ static size_t fuse_send_write(struct fuse_req *req, struct fuse_io_priv *io, return req->misc.write.out.size; } -void fuse_write_update_size(struct inode *inode, loff_t pos) +bool fuse_write_update_size(struct inode *inode, loff_t pos) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); + bool ret = false; spin_lock(&fc->lock); fi->attr_version = ++fc->attr_version; - if (pos > inode->i_size) + if (pos > inode->i_size) { i_size_write(inode, pos); + ret = true; + } spin_unlock(&fc->lock); + + return ret; } static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file, @@ -986,13 +1085,9 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req, if (mapping_writably_mapped(mapping)) flush_dcache_page(page); - pagefault_disable(); tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes); - pagefault_enable(); flush_dcache_page(page); - mark_page_accessed(page); - if (!tmp) { unlock_page(page); page_cache_release(page); @@ -1085,28 +1180,27 @@ static ssize_t fuse_perform_write(struct file *file, return res > 0 ? res : err; } -static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; - size_t count = 0; - size_t ocount = 0; + size_t count = iov_iter_count(from); ssize_t written = 0; ssize_t written_buffered = 0; struct inode *inode = mapping->host; ssize_t err; - struct iov_iter i; loff_t endbyte = 0; + loff_t pos = iocb->ki_pos; - WARN_ON(iocb->ki_pos != pos); + if (get_fuse_conn(inode)->writeback_cache) { + /* Update size (EOF optimization) and mode (SUID clearing) */ + err = fuse_update_attributes(mapping->host, NULL, file, NULL); + if (err) + return err; - ocount = 0; - err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); - if (err) - return err; + return generic_file_write_iter(iocb, from); + } - count = ocount; mutex_lock(&inode->i_mutex); /* We can write back this queue in page reclaim */ @@ -1119,6 +1213,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (count == 0) goto out; + iov_iter_truncate(from, count); err = file_remove_suid(file); if (err) goto out; @@ -1128,17 +1223,13 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, goto out; if (file->f_flags & O_DIRECT) { - written = generic_file_direct_write(iocb, iov, &nr_segs, - pos, &iocb->ki_pos, - count, ocount); - if (written < 0 || written == count) + written = generic_file_direct_write(iocb, from, pos); + if (written < 0 || !iov_iter_count(from)) goto out; pos += written; - count -= written; - iov_iter_init(&i, iov, nr_segs, count, written); - written_buffered = fuse_perform_write(file, mapping, &i, pos); + written_buffered = fuse_perform_write(file, mapping, from, pos); if (written_buffered < 0) { err = written_buffered; goto out; @@ -1157,8 +1248,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, written += written_buffered; iocb->ki_pos = pos + written_buffered; } else { - iov_iter_init(&i, iov, nr_segs, count, 0); - written = fuse_perform_write(file, mapping, &i, pos); + written = fuse_perform_write(file, mapping, from, pos); if (written >= 0) iocb->ki_pos = pos + written; } @@ -1196,7 +1286,7 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, size_t nbytes = 0; /* # bytes already packed in req */ /* Special case for kernel I/O: can copy directly into the buffer */ - if (segment_eq(get_fs(), KERNEL_DS)) { + if (ii->type & ITER_KVEC) { unsigned long user_addr = fuse_get_user_addr(ii); size_t frag_size = fuse_get_frag_size(ii, *nbytesp); @@ -1212,35 +1302,27 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, while (nbytes < *nbytesp && req->num_pages < req->max_pages) { unsigned npages; - unsigned long user_addr = fuse_get_user_addr(ii); - unsigned offset = user_addr & ~PAGE_MASK; - size_t frag_size = fuse_get_frag_size(ii, *nbytesp - nbytes); - int ret; - - unsigned n = req->max_pages - req->num_pages; - frag_size = min_t(size_t, frag_size, n << PAGE_SHIFT); - - npages = (frag_size + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; - npages = clamp(npages, 1U, n); - - ret = get_user_pages_fast(user_addr, npages, !write, - &req->pages[req->num_pages]); + size_t start; + ssize_t ret = iov_iter_get_pages(ii, + &req->pages[req->num_pages], + *nbytesp - nbytes, + req->max_pages - req->num_pages, + &start); if (ret < 0) return ret; - npages = ret; - frag_size = min_t(size_t, frag_size, - (npages << PAGE_SHIFT) - offset); - iov_iter_advance(ii, frag_size); + iov_iter_advance(ii, ret); + nbytes += ret; - req->page_descs[req->num_pages].offset = offset; + ret += start; + npages = (ret + PAGE_SIZE - 1) / PAGE_SIZE; + + req->page_descs[req->num_pages].offset = start; fuse_page_descs_length_init(req, req->num_pages, npages); req->num_pages += npages; req->page_descs[req->num_pages - 1].length -= - (npages << PAGE_SHIFT) - offset - frag_size; - - nbytes += frag_size; + (PAGE_SIZE - ret) & (PAGE_SIZE - 1); } if (write) @@ -1255,48 +1337,46 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, static inline int fuse_iter_npages(const struct iov_iter *ii_p) { - struct iov_iter ii = *ii_p; - int npages = 0; - - while (iov_iter_count(&ii) && npages < FUSE_MAX_PAGES_PER_REQ) { - unsigned long user_addr = fuse_get_user_addr(&ii); - unsigned offset = user_addr & ~PAGE_MASK; - size_t frag_size = iov_iter_single_seg_count(&ii); - - npages += (frag_size + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; - iov_iter_advance(&ii, frag_size); - } - - return min(npages, FUSE_MAX_PAGES_PER_REQ); + return iov_iter_npages(ii_p, FUSE_MAX_PAGES_PER_REQ); } -ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, - unsigned long nr_segs, size_t count, loff_t *ppos, - int write) +ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, + loff_t *ppos, int flags) { + int write = flags & FUSE_DIO_WRITE; + int cuse = flags & FUSE_DIO_CUSE; struct file *file = io->file; + struct inode *inode = file->f_mapping->host; struct fuse_file *ff = file->private_data; struct fuse_conn *fc = ff->fc; size_t nmax = write ? fc->max_write : fc->max_read; loff_t pos = *ppos; + size_t count = iov_iter_count(iter); + pgoff_t idx_from = pos >> PAGE_CACHE_SHIFT; + pgoff_t idx_to = (pos + count - 1) >> PAGE_CACHE_SHIFT; ssize_t res = 0; struct fuse_req *req; - struct iov_iter ii; - - iov_iter_init(&ii, iov, nr_segs, count, 0); if (io->async) - req = fuse_get_req_for_background(fc, fuse_iter_npages(&ii)); + req = fuse_get_req_for_background(fc, fuse_iter_npages(iter)); else - req = fuse_get_req(fc, fuse_iter_npages(&ii)); + req = fuse_get_req(fc, fuse_iter_npages(iter)); if (IS_ERR(req)) return PTR_ERR(req); + if (!cuse && fuse_range_is_writeback(inode, idx_from, idx_to)) { + if (!write) + mutex_lock(&inode->i_mutex); + fuse_sync_writes(inode); + if (!write) + mutex_unlock(&inode->i_mutex); + } + while (count) { size_t nres; fl_owner_t owner = current->files; size_t nbytes = min(count, nmax); - int err = fuse_get_user_pages(req, &ii, &nbytes, write); + int err = fuse_get_user_pages(req, iter, &nbytes, write); if (err) { res = err; break; @@ -1326,9 +1406,9 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, fuse_put_request(fc, req); if (io->async) req = fuse_get_req_for_background(fc, - fuse_iter_npages(&ii)); + fuse_iter_npages(iter)); else - req = fuse_get_req(fc, fuse_iter_npages(&ii)); + req = fuse_get_req(fc, fuse_iter_npages(iter)); if (IS_ERR(req)) break; } @@ -1343,9 +1423,8 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, EXPORT_SYMBOL_GPL(fuse_direct_io); static ssize_t __fuse_direct_read(struct fuse_io_priv *io, - const struct iovec *iov, - unsigned long nr_segs, loff_t *ppos, - size_t count) + struct iov_iter *iter, + loff_t *ppos) { ssize_t res; struct file *file = io->file; @@ -1354,7 +1433,7 @@ static ssize_t __fuse_direct_read(struct fuse_io_priv *io, if (is_bad_inode(inode)) return -EIO; - res = fuse_direct_io(io, iov, nr_segs, count, ppos, 0); + res = fuse_direct_io(io, iter, ppos, 0); fuse_invalidate_attr(inode); @@ -1366,21 +1445,26 @@ static ssize_t fuse_direct_read(struct file *file, char __user *buf, { struct fuse_io_priv io = { .async = 0, .file = file }; struct iovec iov = { .iov_base = buf, .iov_len = count }; - return __fuse_direct_read(&io, &iov, 1, ppos, count); + struct iov_iter ii; + iov_iter_init(&ii, READ, &iov, 1, count); + return __fuse_direct_read(&io, &ii, ppos); } static ssize_t __fuse_direct_write(struct fuse_io_priv *io, - const struct iovec *iov, - unsigned long nr_segs, loff_t *ppos) + struct iov_iter *iter, + loff_t *ppos) { struct file *file = io->file; struct inode *inode = file_inode(file); - size_t count = iov_length(iov, nr_segs); + size_t count = iov_iter_count(iter); ssize_t res; + res = generic_write_checks(file, ppos, &count, 0); - if (!res) - res = fuse_direct_io(io, iov, nr_segs, count, ppos, 1); + if (!res) { + iov_iter_truncate(iter, count); + res = fuse_direct_io(io, iter, ppos, FUSE_DIO_WRITE); + } fuse_invalidate_attr(inode); @@ -1394,13 +1478,15 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf, struct inode *inode = file_inode(file); ssize_t res; struct fuse_io_priv io = { .async = 0, .file = file }; + struct iov_iter ii; + iov_iter_init(&ii, WRITE, &iov, 1, count); if (is_bad_inode(inode)) return -EIO; /* Don't allow parallel writes to the same file */ mutex_lock(&inode->i_mutex); - res = __fuse_direct_write(&io, &iov, 1, ppos); + res = __fuse_direct_write(&io, &ii, ppos); if (res > 0) fuse_write_update_size(inode, *ppos); mutex_unlock(&inode->i_mutex); @@ -1539,13 +1625,13 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req) fuse_writepage_free(fc, req); } -static struct fuse_file *fuse_write_file_get(struct fuse_conn *fc, - struct fuse_inode *fi) +static struct fuse_file *__fuse_write_file_get(struct fuse_conn *fc, + struct fuse_inode *fi) { struct fuse_file *ff = NULL; spin_lock(&fc->lock); - if (!WARN_ON(list_empty(&fi->write_files))) { + if (!list_empty(&fi->write_files)) { ff = list_entry(fi->write_files.next, struct fuse_file, write_entry); fuse_file_get(ff); @@ -1555,6 +1641,29 @@ static struct fuse_file *fuse_write_file_get(struct fuse_conn *fc, return ff; } +static struct fuse_file *fuse_write_file_get(struct fuse_conn *fc, + struct fuse_inode *fi) +{ + struct fuse_file *ff = __fuse_write_file_get(fc, fi); + WARN_ON(!ff); + return ff; +} + +int fuse_write_inode(struct inode *inode, struct writeback_control *wbc) +{ + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); + struct fuse_file *ff; + int err; + + ff = __fuse_write_file_get(fc, fi); + err = fuse_flush_times(inode, ff); + if (ff) + fuse_file_put(ff, 0); + + return err; +} + static int fuse_writepage_locked(struct page *page) { struct address_space *mapping = page->mapping; @@ -1579,7 +1688,7 @@ static int fuse_writepage_locked(struct page *page) error = -EIO; req->ff = fuse_write_file_get(fc, fi); if (!req->ff) - goto err_free; + goto err_nofile; fuse_write_fill(req, req->ff, page_offset(page), 0); @@ -1607,6 +1716,8 @@ static int fuse_writepage_locked(struct page *page) return 0; +err_nofile: + __free_page(tmp_page); err_free: fuse_request_free(req); err: @@ -1847,8 +1958,8 @@ static int fuse_writepages(struct address_space *mapping, data.ff = NULL; err = -ENOMEM; - data.orig_pages = kzalloc(sizeof(struct page *) * - FUSE_MAX_PAGES_PER_REQ, + data.orig_pages = kcalloc(FUSE_MAX_PAGES_PER_REQ, + sizeof(struct page *), GFP_NOFS); if (!data.orig_pages) goto out; @@ -1868,6 +1979,77 @@ out: return err; } +/* + * It's worthy to make sure that space is reserved on disk for the write, + * but how to implement it without killing performance need more thinking. + */ +static int fuse_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + pgoff_t index = pos >> PAGE_CACHE_SHIFT; + struct fuse_conn *fc = get_fuse_conn(file->f_dentry->d_inode); + struct page *page; + loff_t fsize; + int err = -ENOMEM; + + WARN_ON(!fc->writeback_cache); + + page = grab_cache_page_write_begin(mapping, index, flags); + if (!page) + goto error; + + fuse_wait_on_page_writeback(mapping->host, page->index); + + if (PageUptodate(page) || len == PAGE_CACHE_SIZE) + goto success; + /* + * Check if the start this page comes after the end of file, in which + * case the readpage can be optimized away. + */ + fsize = i_size_read(mapping->host); + if (fsize <= (pos & PAGE_CACHE_MASK)) { + size_t off = pos & ~PAGE_CACHE_MASK; + if (off) + zero_user_segment(page, 0, off); + goto success; + } + err = fuse_do_readpage(file, page); + if (err) + goto cleanup; +success: + *pagep = page; + return 0; + +cleanup: + unlock_page(page); + page_cache_release(page); +error: + return err; +} + +static int fuse_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) +{ + struct inode *inode = page->mapping->host; + + if (!PageUptodate(page)) { + /* Zero any unwritten bytes at the end of the page */ + size_t endoff = (pos + copied) & ~PAGE_CACHE_MASK; + if (endoff) + zero_user_segment(page, endoff, PAGE_CACHE_SIZE); + SetPageUptodate(page); + } + + fuse_write_update_size(inode, pos + copied); + set_page_dirty(page); + unlock_page(page); + page_cache_release(page); + + return copied; +} + static int fuse_launder_page(struct page *page) { int err = 0; @@ -1923,26 +2105,16 @@ static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) static const struct vm_operations_struct fuse_file_vm_ops = { .close = fuse_vma_close, .fault = filemap_fault, + .map_pages = filemap_map_pages, .page_mkwrite = fuse_page_mkwrite, .remap_pages = generic_file_remap_pages, }; static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) { - if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) { - struct inode *inode = file_inode(file); - struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_inode *fi = get_fuse_inode(inode); - struct fuse_file *ff = file->private_data; - /* - * file may be written through mmap, so chain it onto the - * inodes's write_file list - */ - spin_lock(&fc->lock); - if (list_empty(&ff->write_entry)) - list_add(&ff->write_entry, &fi->write_files); - spin_unlock(&fc->lock); - } + if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) + fuse_link_write_file(file); + file_accessed(file); vma->vm_ops = &fuse_file_vm_ops; return 0; @@ -2100,7 +2272,6 @@ static int fuse_file_flock(struct file *file, int cmd, struct file_lock *fl) struct fuse_file *ff = file->private_data; /* emulate flock with POSIX locks */ - fl->fl_owner = (fl_owner_t) file; ff->flock = true; err = fuse_setlk(file, fl, 1); } @@ -2171,7 +2342,7 @@ static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov, if (!bytes) return 0; - iov_iter_init(&ii, iov, nr_segs, bytes, 0); + iov_iter_init(&ii, to_user ? READ : WRITE, iov, nr_segs, bytes); while (iov_iter_count(&ii)) { struct page *page = pages[page_idx++]; @@ -2589,7 +2760,7 @@ static void fuse_register_polled_file(struct fuse_conn *fc, { spin_lock(&fc->lock); if (RB_EMPTY_NODE(&ff->polled_node)) { - struct rb_node **link, *parent; + struct rb_node **link, *uninitialized_var(parent); link = fuse_find_polled_node(fc, ff->kh, &parent); BUG_ON(*link); @@ -2693,8 +2864,8 @@ static inline loff_t fuse_round_up(loff_t off) } static ssize_t -fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, - loff_t offset, unsigned long nr_segs) +fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, + loff_t offset) { ssize_t ret = 0; struct file *file = iocb->ki_filp; @@ -2703,18 +2874,22 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos = 0; struct inode *inode; loff_t i_size; - size_t count = iov_length(iov, nr_segs); + size_t count = iov_iter_count(iter); struct fuse_io_priv *io; pos = offset; inode = file->f_mapping->host; i_size = i_size_read(inode); + if ((rw == READ) && (offset > i_size)) + return 0; + /* optimization for short read */ if (async_dio && rw != WRITE && offset + count > i_size) { if (offset >= i_size) return 0; count = min_t(loff_t, count, fuse_round_up(i_size - offset)); + iov_iter_truncate(iter, count); } io = kmalloc(sizeof(struct fuse_io_priv), GFP_KERNEL); @@ -2744,9 +2919,9 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, io->async = false; if (rw == WRITE) - ret = __fuse_direct_write(io, iov, nr_segs, &pos); + ret = __fuse_direct_write(io, iter, &pos); else - ret = __fuse_direct_read(io, iov, nr_segs, &pos, count); + ret = __fuse_direct_read(io, iter, &pos); if (io->async) { fuse_aio_complete(io, ret < 0 ? ret : 0, -1); @@ -2788,6 +2963,9 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, bool lock_inode = !(mode & FALLOC_FL_KEEP_SIZE) || (mode & FALLOC_FL_PUNCH_HOLE); + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) + return -EOPNOTSUPP; + if (fc->no_fallocate) return -EOPNOTSUPP; @@ -2830,8 +3008,12 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, goto out; /* we could have extended the file */ - if (!(mode & FALLOC_FL_KEEP_SIZE)) - fuse_write_update_size(inode, offset + length); + if (!(mode & FALLOC_FL_KEEP_SIZE)) { + bool changed = fuse_write_update_size(inode, offset + length); + + if (changed && fc->writeback_cache) + file_update_time(file); + } if (mode & FALLOC_FL_PUNCH_HOLE) truncate_pagecache_range(inode, offset, offset + length - 1); @@ -2850,10 +3032,10 @@ out: static const struct file_operations fuse_file_operations = { .llseek = fuse_file_llseek, - .read = do_sync_read, - .aio_read = fuse_file_aio_read, - .write = do_sync_write, - .aio_write = fuse_file_aio_write, + .read = new_sync_read, + .read_iter = fuse_file_read_iter, + .write = new_sync_write, + .write_iter = fuse_file_write_iter, .mmap = fuse_file_mmap, .open = fuse_open, .flush = fuse_flush, @@ -2895,6 +3077,8 @@ static const struct address_space_operations fuse_file_aops = { .set_page_dirty = __set_page_dirty_nobuffers, .bmap = fuse_bmap, .direct_IO = fuse_direct_IO, + .write_begin = fuse_write_begin, + .write_end = fuse_write_end, }; void fuse_init_file_inode(struct inode *inode) |