From 7627151ea30bce2051e3cb27d7bb2c30083f86a5 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Wed, 3 Feb 2016 21:24:49 +0800 Subject: libceph: define new ceph_file_layout structure Define new ceph_file_layout structure and rename old ceph_file_layout to ceph_file_layout_legacy. This is preparation for adding namespace to ceph_file_layout structure. Signed-off-by: Yan, Zheng --- fs/ceph/file.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/ceph/file.c') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 0daaf7ceedc5..cba5dcf49a65 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -1583,9 +1583,9 @@ static int ceph_zero_objects(struct inode *inode, loff_t offset, loff_t length) { int ret = 0; struct ceph_inode_info *ci = ceph_inode(inode); - s32 stripe_unit = ceph_file_layout_su(ci->i_layout); - s32 stripe_count = ceph_file_layout_stripe_count(ci->i_layout); - s32 object_size = ceph_file_layout_object_size(ci->i_layout); + s32 stripe_unit = ci->i_layout.stripe_unit; + s32 stripe_count = ci->i_layout.stripe_count; + s32 object_size = ci->i_layout.object_size; u64 object_set_size = object_size * stripe_count; u64 nearly, t; -- cgit v1.2.3 From a22bd5ffae2d22c054c832fe0d60976ed9e4a49d Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 26 May 2016 10:30:13 +0800 Subject: ceph: set user pages dirty after direct IO read Signed-off-by: Yan, Zheng --- fs/ceph/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/ceph/file.c') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index cba5dcf49a65..ac75fa9fd858 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -708,7 +708,7 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req) } } - ceph_put_page_vector(osd_data->pages, num_pages, false); + ceph_put_page_vector(osd_data->pages, num_pages, !aio_req->write); ceph_osdc_put_request(req); if (rc < 0) @@ -964,7 +964,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, len = ret; } - ceph_put_page_vector(pages, num_pages, false); + ceph_put_page_vector(pages, num_pages, !write); ceph_osdc_put_request(req); if (ret < 0) -- cgit v1.2.3 From fc8c3892f30c39f28fdb835f7c8598ac4cf5ed1e Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 14 Jun 2016 11:13:59 +0800 Subject: ceph: fix use-after-free bug in ceph_direct_read_write() ceph_aio_complete() can free the ceph_aio_request struct before the code exits the while loop. Signed-off-by: Yan, Zheng --- fs/ceph/file.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs/ceph/file.c') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index ac75fa9fd858..033e88753875 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -985,6 +985,8 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, } if (aio_req) { + LIST_HEAD(osd_reqs); + if (aio_req->num_reqs == 0) { kfree(aio_req); return ret; @@ -993,8 +995,9 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, ceph_get_cap_refs(ci, write ? CEPH_CAP_FILE_WR : CEPH_CAP_FILE_RD); - while (!list_empty(&aio_req->osd_reqs)) { - req = list_first_entry(&aio_req->osd_reqs, + list_splice(&aio_req->osd_reqs, &osd_reqs); + while (!list_empty(&osd_reqs)) { + req = list_first_entry(&osd_reqs, struct ceph_osd_request, r_unsafe_item); list_del_init(&req->r_unsafe_item); -- cgit v1.2.3 From 9a5530c63889ac928a45c4645ab0bc23b4fbfcb8 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Wed, 15 Jun 2016 16:29:18 +0800 Subject: ceph: wait unsafe sync writes for evicting inode Otherwise ceph_sync_write_unsafe() may access/modify freed inode. Signed-off-by: Yan, Zheng --- fs/ceph/caps.c | 50 ++------------------------------------------------ fs/ceph/file.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ fs/ceph/inode.c | 8 ++++++++ fs/ceph/super.c | 1 + fs/ceph/super.h | 2 ++ 5 files changed, 61 insertions(+), 48 deletions(-) (limited to 'fs/ceph/file.c') diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index a08d245f16f5..1e48377f18a7 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1927,53 +1927,6 @@ static int caps_are_flushed(struct inode *inode, u64 flush_tid) return ret; } -/* - * Wait on any unsafe replies for the given inode. First wait on the - * newest request, and make that the upper bound. Then, if there are - * more requests, keep waiting on the oldest as long as it is still older - * than the original request. - */ -static void sync_write_wait(struct inode *inode) -{ - struct ceph_inode_info *ci = ceph_inode(inode); - struct list_head *head = &ci->i_unsafe_writes; - struct ceph_osd_request *req; - u64 last_tid; - - if (!S_ISREG(inode->i_mode)) - return; - - spin_lock(&ci->i_unsafe_lock); - if (list_empty(head)) - goto out; - - /* set upper bound as _last_ entry in chain */ - req = list_last_entry(head, struct ceph_osd_request, - r_unsafe_item); - last_tid = req->r_tid; - - do { - ceph_osdc_get_request(req); - spin_unlock(&ci->i_unsafe_lock); - dout("sync_write_wait on tid %llu (until %llu)\n", - req->r_tid, last_tid); - wait_for_completion(&req->r_safe_completion); - spin_lock(&ci->i_unsafe_lock); - ceph_osdc_put_request(req); - - /* - * from here on look at first entry in chain, since we - * only want to wait for anything older than last_tid - */ - if (list_empty(head)) - break; - req = list_first_entry(head, struct ceph_osd_request, - r_unsafe_item); - } while (req->r_tid < last_tid); -out: - spin_unlock(&ci->i_unsafe_lock); -} - /* * wait for any unsafe requests to complete. */ @@ -2026,7 +1979,8 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) int dirty; dout("fsync %p%s\n", inode, datasync ? " datasync" : ""); - sync_write_wait(inode); + + ceph_sync_write_wait(inode); ret = filemap_write_and_wait_range(inode->i_mapping, start, end); if (ret < 0) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 033e88753875..7f2ef262cdf7 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -821,6 +821,54 @@ static void ceph_sync_write_unsafe(struct ceph_osd_request *req, bool unsafe) } } +/* + * Wait on any unsafe replies for the given inode. First wait on the + * newest request, and make that the upper bound. Then, if there are + * more requests, keep waiting on the oldest as long as it is still older + * than the original request. + */ +void ceph_sync_write_wait(struct inode *inode) +{ + struct ceph_inode_info *ci = ceph_inode(inode); + struct list_head *head = &ci->i_unsafe_writes; + struct ceph_osd_request *req; + u64 last_tid; + + if (!S_ISREG(inode->i_mode)) + return; + + spin_lock(&ci->i_unsafe_lock); + if (list_empty(head)) + goto out; + + /* set upper bound as _last_ entry in chain */ + + req = list_last_entry(head, struct ceph_osd_request, + r_unsafe_item); + last_tid = req->r_tid; + + do { + ceph_osdc_get_request(req); + spin_unlock(&ci->i_unsafe_lock); + + dout("sync_write_wait on tid %llu (until %llu)\n", + req->r_tid, last_tid); + wait_for_completion(&req->r_safe_completion); + ceph_osdc_put_request(req); + + spin_lock(&ci->i_unsafe_lock); + /* + * from here on look at first entry in chain, since we + * only want to wait for anything older than last_tid + */ + if (list_empty(head)) + break; + req = list_first_entry(head, struct ceph_osd_request, + r_unsafe_item); + } while (req->r_tid < last_tid); +out: + spin_unlock(&ci->i_unsafe_lock); +} static ssize_t ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 8ca843371d4b..6e16269277bd 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -585,6 +585,14 @@ int ceph_drop_inode(struct inode *inode) return 1; } +void ceph_evict_inode(struct inode *inode) +{ + /* wait unsafe sync writes */ + ceph_sync_write_wait(inode); + truncate_inode_pages_final(&inode->i_data); + clear_inode(inode); +} + static inline blkcnt_t calc_inode_blocks(u64 size) { return (size + (1<<9) - 1) >> 9; diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 91e02481ce06..a5b2275e1573 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -731,6 +731,7 @@ static const struct super_operations ceph_super_ops = { .destroy_inode = ceph_destroy_inode, .write_inode = ceph_write_inode, .drop_inode = ceph_drop_inode, + .evict_inode = ceph_evict_inode, .sync_fs = ceph_sync_fs, .put_super = ceph_put_super, .show_options = ceph_show_options, diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 50846e6f6a8c..d5b9077467a4 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -749,6 +749,7 @@ extern const struct inode_operations ceph_file_iops; extern struct inode *ceph_alloc_inode(struct super_block *sb); extern void ceph_destroy_inode(struct inode *inode); extern int ceph_drop_inode(struct inode *inode); +extern void ceph_evict_inode(struct inode *inode); extern struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino); @@ -927,6 +928,7 @@ extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry, extern int ceph_release(struct inode *inode, struct file *filp); extern void ceph_fill_inline_data(struct inode *inode, struct page *locked_page, char *data, size_t len); +extern void ceph_sync_write_wait(struct inode *inode); /* dir.c */ extern const struct file_operations ceph_dir_fops; extern const struct file_operations ceph_snapdir_fops; -- cgit v1.2.3 From 955818cd5b6c4b58ea574ace4573e7afa4c19c1e Mon Sep 17 00:00:00 2001 From: Phil Turnbull Date: Thu, 21 Jul 2016 13:43:09 -0400 Subject: ceph: Correctly return NXIO errors from ceph_llseek ceph_llseek does not correctly return NXIO errors because the 'out' path always returns 'offset'. Fixes: 06222e491e66 ("fs: handle SEEK_HOLE/SEEK_DATA properly in all fs's that define their own llseek") Signed-off-by: Phil Turnbull Signed-off-by: Yan, Zheng --- fs/ceph/file.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'fs/ceph/file.c') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 7f2ef262cdf7..0f5375d8e030 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -1499,16 +1499,14 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int whence) { struct inode *inode = file->f_mapping->host; loff_t i_size; - int ret; + loff_t ret; inode_lock(inode); if (whence == SEEK_END || whence == SEEK_DATA || whence == SEEK_HOLE) { ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false); - if (ret < 0) { - offset = ret; + if (ret < 0) goto out; - } } i_size = i_size_read(inode); @@ -1524,7 +1522,7 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int whence) * write() or lseek() might have altered it */ if (offset == 0) { - offset = file->f_pos; + ret = file->f_pos; goto out; } offset += file->f_pos; @@ -1544,11 +1542,11 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int whence) break; } - offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); + ret = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); out: inode_unlock(inode); - return offset; + return ret; } static inline void ceph_zero_partial_page( -- cgit v1.2.3