diff options
Diffstat (limited to 'fs')
| -rw-r--r-- | fs/aio.c | 70 | ||||
| -rw-r--r-- | fs/btrfs/extent_io.c | 39 | ||||
| -rw-r--r-- | fs/btrfs/extent_io.h | 3 | ||||
| -rw-r--r-- | fs/btrfs/ioctl.c | 147 | ||||
| -rw-r--r-- | fs/btrfs/qgroup.c | 4 | ||||
| -rw-r--r-- | fs/btrfs/reada.c | 9 | ||||
| -rw-r--r-- | fs/btrfs/tests/btrfs-tests.c | 2 | ||||
| -rw-r--r-- | fs/btrfs/tests/qgroup-tests.c | 2 | ||||
| -rw-r--r-- | fs/btrfs/transaction.c | 12 | ||||
| -rw-r--r-- | fs/ceph/acl.c | 6 | ||||
| -rw-r--r-- | fs/ceph/addr.c | 17 | ||||
| -rw-r--r-- | fs/ceph/caps.c | 246 | ||||
| -rw-r--r-- | fs/ceph/export.c | 2 | ||||
| -rw-r--r-- | fs/ceph/inode.c | 247 | ||||
| -rw-r--r-- | fs/ceph/mds_client.c | 9 | ||||
| -rw-r--r-- | fs/ceph/mds_client.h | 1 | ||||
| -rw-r--r-- | fs/ceph/super.h | 13 | ||||
| -rw-r--r-- | fs/dlm/lowcomms.c | 5 | ||||
| -rw-r--r-- | fs/exec.c | 7 |
19 files changed, 537 insertions, 304 deletions
@@ -477,7 +477,7 @@ void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel) } EXPORT_SYMBOL(kiocb_set_cancel_fn); -static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb) +static int kiocb_cancel(struct kiocb *kiocb) { kiocb_cancel_fn *old, *cancel; @@ -538,7 +538,7 @@ static void free_ioctx_users(struct percpu_ref *ref) struct kiocb, ki_list); list_del_init(&req->ki_list); - kiocb_cancel(ctx, req); + kiocb_cancel(req); } spin_unlock_irq(&ctx->ctx_lock); @@ -727,42 +727,42 @@ err: * when the processes owning a context have all exited to encourage * the rapid destruction of the kioctx. */ -static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, +static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, struct completion *requests_done) { - if (!atomic_xchg(&ctx->dead, 1)) { - struct kioctx_table *table; + struct kioctx_table *table; - spin_lock(&mm->ioctx_lock); - rcu_read_lock(); - table = rcu_dereference(mm->ioctx_table); + if (atomic_xchg(&ctx->dead, 1)) + return -EINVAL; - WARN_ON(ctx != table->table[ctx->id]); - table->table[ctx->id] = NULL; - rcu_read_unlock(); - spin_unlock(&mm->ioctx_lock); - /* percpu_ref_kill() will do the necessary call_rcu() */ - wake_up_all(&ctx->wait); + spin_lock(&mm->ioctx_lock); + rcu_read_lock(); + table = rcu_dereference(mm->ioctx_table); - /* - * It'd be more correct to do this in free_ioctx(), after all - * the outstanding kiocbs have finished - but by then io_destroy - * has already returned, so io_setup() could potentially return - * -EAGAIN with no ioctxs actually in use (as far as userspace - * could tell). - */ - aio_nr_sub(ctx->max_reqs); + WARN_ON(ctx != table->table[ctx->id]); + table->table[ctx->id] = NULL; + rcu_read_unlock(); + spin_unlock(&mm->ioctx_lock); - if (ctx->mmap_size) - vm_munmap(ctx->mmap_base, ctx->mmap_size); + /* percpu_ref_kill() will do the necessary call_rcu() */ + wake_up_all(&ctx->wait); - ctx->requests_done = requests_done; - percpu_ref_kill(&ctx->users); - } else { - if (requests_done) - complete(requests_done); - } + /* + * It'd be more correct to do this in free_ioctx(), after all + * the outstanding kiocbs have finished - but by then io_destroy + * has already returned, so io_setup() could potentially return + * -EAGAIN with no ioctxs actually in use (as far as userspace + * could tell). + */ + aio_nr_sub(ctx->max_reqs); + + if (ctx->mmap_size) + vm_munmap(ctx->mmap_base, ctx->mmap_size); + + ctx->requests_done = requests_done; + percpu_ref_kill(&ctx->users); + return 0; } /* wait_on_sync_kiocb: @@ -1219,21 +1219,23 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) if (likely(NULL != ioctx)) { struct completion requests_done = COMPLETION_INITIALIZER_ONSTACK(requests_done); + int ret; /* Pass requests_done to kill_ioctx() where it can be set * in a thread-safe way. If we try to set it here then we have * a race condition if two io_destroy() called simultaneously. */ - kill_ioctx(current->mm, ioctx, &requests_done); + ret = kill_ioctx(current->mm, ioctx, &requests_done); percpu_ref_put(&ioctx->users); /* Wait until all IO for the context are done. Otherwise kernel * keep using user-space buffers even if user thinks the context * is destroyed. */ - wait_for_completion(&requests_done); + if (!ret) + wait_for_completion(&requests_done); - return 0; + return ret; } pr_debug("EINVAL: io_destroy: invalid context id\n"); return -EINVAL; @@ -1595,7 +1597,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, kiocb = lookup_kiocb(ctx, iocb, key); if (kiocb) - ret = kiocb_cancel(ctx, kiocb); + ret = kiocb_cancel(kiocb); else ret = -EINVAL; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index f25a9092b946..a389820d158b 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2354,7 +2354,7 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end) { int uptodate = (err == 0); struct extent_io_tree *tree; - int ret; + int ret = 0; tree = &BTRFS_I(page->mapping->host)->io_tree; @@ -5068,6 +5068,43 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, } } +int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv, + unsigned long start, + unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + char __user *dst = (char __user *)dstv; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + int ret = 0; + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); + + while (len > 0) { + page = extent_buffer_page(eb, i); + + cur = min(len, (PAGE_CACHE_SIZE - offset)); + kaddr = page_address(page); + if (copy_to_user(dst, kaddr + offset, cur)) { + ret = -EFAULT; + break; + } + + dst += cur; + len -= cur; + offset = 0; + i++; + } + + return ret; +} + int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, unsigned long min_len, char **map, unsigned long *map_start, diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 8b63f2d46518..15ce5f2a2b62 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -304,6 +304,9 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, void read_extent_buffer(struct extent_buffer *eb, void *dst, unsigned long start, unsigned long len); +int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dst, + unsigned long start, + unsigned long len); void write_extent_buffer(struct extent_buffer *eb, const void *src, unsigned long start, unsigned long len); void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 82c18ba12e3f..0d321c23069a 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1957,7 +1957,8 @@ static noinline int copy_to_sk(struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *key, struct btrfs_ioctl_search_key *sk, - char *buf, + size_t *buf_size, + char __user *ubuf, unsigned long *sk_offset, int *num_found) { @@ -1989,13 +1990,25 @@ static noinline int copy_to_sk(struct btrfs_root *root, if (!key_in_sk(key, sk)) continue; - if (sizeof(sh) + item_len > BTRFS_SEARCH_ARGS_BUFSIZE) + if (sizeof(sh) + item_len > *buf_size) { + if (*num_found) { + ret = 1; + goto out; + } + + /* + * return one empty item back for v1, which does not + * handle -EOVERFLOW + */ + + *buf_size = sizeof(sh) + item_len; item_len = 0; + ret = -EOVERFLOW; + } - if (sizeof(sh) + item_len + *sk_offset > - BTRFS_SEARCH_ARGS_BUFSIZE) { + if (sizeof(sh) + item_len + *sk_offset > *buf_size) { ret = 1; - goto overflow; + goto out; } sh.objectid = key->objectid; @@ -2005,20 +2018,33 @@ static noinline int copy_to_sk(struct btrfs_root *root, sh.transid = found_transid; /* copy search result header */ - memcpy(buf + *sk_offset, &sh, sizeof(sh)); + if (copy_to_user(ubuf + *sk_offset, &sh, sizeof(sh))) { + ret = -EFAULT; + goto out; + } + *sk_offset += sizeof(sh); if (item_len) { - char *p = buf + *sk_offset; + char __user *up = ubuf + *sk_offset; /* copy the item */ - read_extent_buffer(leaf, p, - item_off, item_len); + if (read_extent_buffer_to_user(leaf, up, + item_off, item_len)) { + ret = -EFAULT; + goto out; + } + *sk_offset += item_len; } (*num_found)++; - if (*num_found >= sk->nr_items) - break; + if (ret) /* -EOVERFLOW from above */ + goto out; + + if (*num_found >= sk->nr_items) { + ret = 1; + goto out; + } } advance_key: ret = 0; @@ -2033,22 +2059,37 @@ advance_key: key->objectid++; } else ret = 1; -overflow: +out: + /* + * 0: all items from this leaf copied, continue with next + * 1: * more items can be copied, but unused buffer is too small + * * all items were found + * Either way, it will stops the loop which iterates to the next + * leaf + * -EOVERFLOW: item was to large for buffer + * -EFAULT: could not copy extent buffer back to userspace + */ return ret; } static noinline int search_ioctl(struct inode *inode, - struct btrfs_ioctl_search_args *args) + struct btrfs_ioctl_search_key *sk, + size_t *buf_size, + char __user *ubuf) { struct btrfs_root *root; struct btrfs_key key; struct btrfs_path *path; - struct btrfs_ioctl_search_key *sk = &args->key; struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info; int ret; int num_found = 0; unsigned long sk_offset = 0; + if (*buf_size < sizeof(struct btrfs_ioctl_search_header)) { + *buf_size = sizeof(struct btrfs_ioctl_search_header); + return -EOVERFLOW; + } + path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -2082,14 +2123,15 @@ static noinline int search_ioctl(struct inode *inode, ret = 0; goto err; } - ret = copy_to_sk(root, path, &key, sk, args->buf, + ret = copy_to_sk(root, path, &key, sk, buf_size, ubuf, &sk_offset, &num_found); btrfs_release_path(path); - if (ret || num_found >= sk->nr_items) + if (ret) break; } - ret = 0; + if (ret > 0) + ret = 0; err: sk->nr_items = num_found; btrfs_free_path(path); @@ -2099,22 +2141,73 @@ err: static noinline int btrfs_ioctl_tree_search(struct file *file, void __user *argp) { - struct btrfs_ioctl_search_args *args; - struct inode *inode; - int ret; + struct btrfs_ioctl_search_args __user *uargs; + struct btrfs_ioctl_search_key sk; + struct inode *inode; + int ret; + size_t buf_size; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - args = memdup_user(argp, sizeof(*args)); - if (IS_ERR(args)) - return PTR_ERR(args); + uargs = (struct btrfs_ioctl_search_args __user *)argp; + + if (copy_from_user(&sk, &uargs->key, sizeof(sk))) + return -EFAULT; + + buf_size = sizeof(uargs->buf); inode = file_inode(file); - ret = search_ioctl(inode, args); - if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) + ret = search_ioctl(inode, &sk, &buf_size, uargs->buf); + + /* + * In the origin implementation an overflow is handled by returning a + * search header with a len of zero, so reset ret. + */ + if (ret == -EOVERFLOW) + ret = 0; + + if (ret == 0 && copy_to_user(&uargs->key, &sk, sizeof(sk))) ret = -EFAULT; - kfree(args); + return ret; +} + +static noinline int btrfs_ioctl_tree_search_v2(struct file *file, + void __user *argp) +{ + struct btrfs_ioctl_search_args_v2 __user *uarg; + struct btrfs_ioctl_search_args_v2 args; + struct inode *inode; + int ret; + size_t buf_size; + const size_t buf_limit = 16 * 1024 * 1024; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + /* copy search header and buffer size */ + uarg = (struct btrfs_ioctl_search_args_v2 __user *)argp; + if (copy_from_user(&args, uarg, sizeof(args))) + return -EFAULT; + + buf_size = args.buf_size; + + if (buf_size < sizeof(struct btrfs_ioctl_search_header)) + return -EOVERFLOW; + + /* limit result size to 16MB */ + if (buf_size > buf_limit) + buf_size = buf_limit; + + inode = file_inode(file); + ret = search_ioctl(inode, &args.key, &buf_size, + (char *)(&uarg->buf[0])); + if (ret == 0 && copy_to_user(&uarg->key, &args.key, sizeof(args.key))) + ret = -EFAULT; + else if (ret == -EOVERFLOW && + copy_to_user(&uarg->buf_size, &buf_size, sizeof(buf_size))) + ret = -EFAULT; + return ret; } @@ -5198,6 +5291,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_trans_end(file); case BTRFS_IOC_TREE_SEARCH: return btrfs_ioctl_tree_search(file, argp); + case BTRFS_IOC_TREE_SEARCH_V2: + return btrfs_ioctl_tree_search_v2(file, argp); case BTRFS_IOC_INO_LOOKUP: return btrfs_ioctl_ino_lookup(file, argp); case BTRFS_IOC_INO_PATHS: diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index cf5aead95a7f..98cb6b2630f9 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1798,8 +1798,10 @@ static int qgroup_shared_accounting(struct btrfs_trans_handle *trans, return -ENOMEM; tmp = ulist_alloc(GFP_NOFS); - if (!tmp) + if (!tmp) { + ulist_free(qgroups); return -ENOMEM; + } btrfs_get_tree_mod_seq(fs_info, &elem); ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq, diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 30947f923620..09230cf3a244 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -428,8 +428,13 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, continue; } if (!dev->bdev) { - /* cannot read ahead on missing device */ - continue; + /* + * cannot read ahead on missing device, but for RAID5/6, + * REQ_GET_READ_MIRRORS return 1. So don't skip missing + * device for such case. + */ + if (nzones > 1) + continue; } if (dev_replace_is_ongoing && dev == fs_info->dev_replace.tgtdev) { diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index a5dcacb5df9c..9626252ee6b4 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c @@ -135,7 +135,7 @@ restart: radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter, 0) { struct extent_buffer *eb; - eb = radix_tree_deref_slot(slot); + eb = radix_tree_deref_slot_protected(slot, &fs_info->buffer_lock); if (!eb) continue; /* Shouldn't happen but that kind of thinking creates CVE's */ diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c index fa691b754aaf..ec3dcb202357 100644 --- a/fs/btrfs/tests/qgroup-tests.c +++ b/fs/btrfs/tests/qgroup-tests.c @@ -415,6 +415,8 @@ int btrfs_test_qgroups(void) ret = -ENOMEM; goto out; } + btrfs_set_header_level(root->node, 0); + btrfs_set_header_nritems(root->node, 0); root->alloc_bytenr += 8192; tmp_root = btrfs_alloc_dummy_root(); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 9630f10f8e1e..511839c04f11 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1284,11 +1284,13 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, goto fail; } - pending->error = btrfs_qgroup_inherit(trans, fs_info, - root->root_key.objectid, - objectid, pending->inherit); - if (pending->error) - goto no_free_objectid; + ret = btrfs_qgroup_inherit(trans, fs_info, + root->root_key.objectid, + objectid, pending->inherit); + if (ret) { + btrfs_abort_transaction(trans, root, ret); + goto fail; + } /* see comments in should_cow_block() */ set_bit(BTRFS_ROOT_FORCE_COW, &root->state); diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c index 21887d63dad5..469f2e8657e8 100644 --- a/fs/ceph/acl.c +++ b/fs/ceph/acl.c @@ -104,12 +104,6 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type) umode_t new_mode = inode->i_mode, old_mode = inode->i_mode; struct dentry *dentry; - if (acl) { - ret = posix_acl_valid(acl); - if (ret < 0) - goto out; - } - switch (type) { case ACL_TYPE_ACCESS: name = POSIX_ACL_XATTR_ACCESS; diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 4f3f69079f36..90b3954d48ed 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -211,18 +211,15 @@ static int readpage_nounlock(struct file *filp, struct page *page) SetPageError(page); ceph_fscache_readpage_cancel(inode, page); goto out; - } else { - if (err < PAGE_CACHE_SIZE) { - /* zero fill remainder of page */ - zero_user_segment(page, err, PAGE_CACHE_SIZE); - } else { - flush_dcache_page(page); - } } - SetPageUptodate(page); + if (err < PAGE_CACHE_SIZE) + /* zero fill remainder of page */ + zero_user_segment(page, err, PAGE_CACHE_SIZE); + else + flush_dcache_page(page); - if (err >= 0) - ceph_readpage_to_fscache(inode, page); + SetPageUptodate(page); + ceph_readpage_to_fscache(inode, page); out: return err < 0 ? err : 0; diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index c561b628ebce..1fde164b74b5 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -221,8 +221,8 @@ int ceph_unreserve_caps(struct ceph_mds_client *mdsc, return 0; } -static struct ceph_cap *get_cap(struct ceph_mds_client *mdsc, - struct ceph_cap_reservation *ctx) +struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc, + struct ceph_cap_reservation *ctx) { struct ceph_cap *cap = NULL; @@ -508,15 +508,14 @@ static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap, * it is < 0. (This is so we can atomically add the cap and add an * open file reference to it.) */ -int ceph_add_cap(struct inode *inode, - struct ceph_mds_session *session, u64 cap_id, - int fmode, unsigned issued, unsigned wanted, - unsigned seq, unsigned mseq, u64 realmino, int flags, - struct ceph_cap_reservation *caps_reservation) +void ceph_add_cap(struct inode *inode, + struct ceph_mds_session *session, u64 cap_id, + int fmode, unsigned issued, unsigned wanted, + unsigned seq, unsigned mseq, u64 realmino, int flags, + struct ceph_cap **new_cap) { struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_cap *new_cap = NULL; struct ceph_cap *cap; int mds = session->s_mds; int actual_wanted; @@ -531,20 +530,10 @@ int ceph_add_cap(struct inode *inode, if (fmode >= 0) wanted |= ceph_caps_for_mode(fmode); -retry: - spin_lock(&ci->i_ceph_lock); cap = __get_cap_for_mds(ci, mds); if (!cap) { - if (new_cap) { - cap = new_cap; - new_cap = NULL; - } else { - spin_unlock(&ci->i_ceph_lock); - new_cap = get_cap(mdsc, caps_reservation); - if (new_cap == NULL) - return -ENOMEM; - goto retry; - } + cap = *new_cap; + *new_cap = NULL; cap->issued = 0; cap->implemented = 0; @@ -562,9 +551,6 @@ retry: session->s_nr_caps++; spin_unlock(&session->s_cap_lock); } else { - if (new_cap) - ceph_put_cap(mdsc, new_cap); - /* * auth mds of the inode changed. we received the cap export * message, but still haven't received the cap import message. @@ -626,7 +612,6 @@ retry: ci->i_auth_cap = cap; cap->mds_wanted = wanted; } - ci->i_cap_exporting_issued = 0; } else { WARN_ON(ci->i_auth_cap == cap); } @@ -648,9 +633,6 @@ retry: if (fmode >= 0) __ceph_get_fmode(ci, fmode); - spin_unlock(&ci->i_ceph_lock); - wake_up_all(&ci->i_cap_wq); - return 0; } /* @@ -685,7 +667,7 @@ static int __cap_is_valid(struct ceph_cap *cap) */ int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented) { - int have = ci->i_snap_caps | ci->i_cap_exporting_issued; + int have = ci->i_snap_caps; struct ceph_cap *cap; struct rb_node *p; @@ -900,7 +882,7 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci) */ static int __ceph_is_any_caps(struct ceph_inode_info *ci) { - return !RB_EMPTY_ROOT(&ci->i_caps) || ci->i_cap_exporting_issued; + return !RB_EMPTY_ROOT(&ci->i_caps); } int ceph_is_any_caps(struct inode *inode) @@ -2397,32 +2379,30 @@ static void invalidate_aliases(struct inode *inode) * actually be a revocation if it specifies a smaller cap set.) * * caller holds s_mutex and i_ceph_lock, we drop both. - * - * return value: - * 0 - ok - * 1 - check_caps on auth cap only (writeback) - * 2 - check_caps (ack revoke) */ -static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, +static void handle_cap_grant(struct ceph_mds_client *mdsc, + struct inode *inode, struct ceph_mds_caps *grant, + void *snaptrace, int snaptrace_len, + struct ceph_buffer *xattr_buf, struct ceph_mds_session *session, - struct ceph_cap *cap, - struct ceph_buffer *xattr_buf) - __releases(ci->i_ceph_lock) + struct ceph_cap *cap, int issued) + __releases(ci->i_ceph_lock) { struct ceph_inode_info *ci = ceph_inode(inode); int mds = session->s_mds; int seq = le32_to_cpu(grant->seq); int newcaps = le32_to_cpu(grant->caps); - int issued, implemented, used, wanted, dirty; + int used, wanted, dirty; u64 size = le64_to_cpu(grant->size); u64 max_size = le64_to_cpu(grant->max_size); struct timespec mtime, atime, ctime; int check_caps = 0; - int wake = 0; - int writeback = 0; - int queue_invalidate = 0; - int deleted_inode = 0; - int queue_revalidate = 0; + bool wake = 0; + bool writeback = 0; + bool queue_trunc = 0; + bool queue_invalidate = 0; + bool queue_revalidate = 0; + bool deleted_inode = 0; dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n", inode, cap, mds, seq, ceph_cap_string(newcaps)); @@ -2466,16 +2446,13 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, } /* side effects now are allowed */ - - issued = __ceph_caps_issued(ci, &implemented); - issued |= implemented | __ceph_caps_dirty(ci); - cap->cap_gen = session->s_cap_gen; cap->seq = seq; __check_cap_issue(ci, cap, newcaps); - if ((issued & CEPH_CAP_AUTH_EXCL) == 0) { + if ((newcaps & CEPH_CAP_AUTH_SHARED) && + (issued & CEPH_CAP_AUTH_EXCL) == 0) { inode->i_mode = le32_to_cpu(grant->mode); inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(grant->uid)); inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(grant->gid)); @@ -2484,7 +2461,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, from_kgid(&init_user_ns, inode->i_gid)); } - if ((issued & CEPH_CAP_LINK_EXCL) == 0) { + if ((newcaps & CEPH_CAP_AUTH_SHARED) && + (issued & CEPH_CAP_LINK_EXCL) == 0) { set_nlink(inode, le32_to_cpu(grant->nlink)); if (inode->i_nlink == 0 && (newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL))) @@ -2511,30 +2489,35 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, if ((issued & CEPH_CAP_FILE_CACHE) && ci->i_rdcache_gen > 1) queue_revalidate = 1; - /* size/ctime/mtime/atime? */ - ceph_fill_file_size(inode, issued, - le32_to_cpu(grant->truncate_seq), - le64_to_cpu(grant->truncate_size), size); - ceph_decode_timespec(&mtime, &grant->mtime); - ceph_decode_timespec(&atime, &grant->atime); - ceph_decode_timespec(&ctime, &grant->ctime); - ceph_fill_file_time(inode, issued, - le32_to_cpu(grant->time_warp_seq), &ctime, &mtime, - &atime); - - - /* file layout may have changed */ - ci->i_layout = grant->layout; - - /* max size increase? */ - if (ci->i_auth_cap == cap && max_size != ci->i_max_size) { - dout("max_size %lld -> %llu\n", ci->i_max_size, max_size); - ci->i_max_size = max_size; - if (max_size >= ci->i_wanted_max_size) { - ci->i_wanted_max_size = 0; /* reset */ - ci->i_requested_max_size = 0; + if (newcaps & CEPH_CAP_ANY_RD) { + /* ctime/mtime/atime? */ + ceph_decode_timespec(&mtime, &grant->mtime); + ceph_decode_timespec(&atime, &grant->atime); + ceph_decode_timespec(&ctime, &grant->ctime); + ceph_fill_file_time(inode, issued, + le32_to_cpu(grant->time_warp_seq), + &ctime, &mtime, &atime); + } + + if (newcaps & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR)) { + /* file layout may have changed */ + ci->i_layout = grant->layout; + /* size/truncate_seq? */ + queue_trunc = ceph_fill_file_size(inode, issued, + le32_to_cpu(grant->truncate_seq), + le64_to_cpu(grant->truncate_size), + size); + /* max size increase? */ + if (ci->i_auth_cap == cap && max_size != ci->i_max_size) { + dout("max_size %lld -> %llu\n", + ci->i_max_size, max_size); + ci->i_max_size = max_size; + if (max_size >= ci->i_wanted_max_size) { + ci->i_wanted_max_size = 0; /* reset */ + ci->i_requested_max_size = 0; + } + wake = 1; } - wake = 1; } /* check cap bits */ @@ -2595,6 +2578,23 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, spin_unlock(&ci->i_ceph_lock); + if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) { + down_write(&mdsc->snap_rwsem); + ceph_update_snap_trace(mdsc, snaptrace, + snaptrace + snaptrace_len, false); + downgrade_write(&mdsc->snap_rwsem); + kick_flushing_inode_caps(mdsc, session, inode); + up_read(&mdsc->snap_rwsem); + if (newcaps & ~issued) + wake = 1; + } + + if (queue_trunc) { + ceph_queue_vmtruncate(inode); + ceph_queue_revalidate(inode); + } else if (queue_revalidate) + ceph_queue_revalidate(inode); + if (writeback) /* * queue inode for writeback: we can't actually call @@ -2606,8 +2606,6 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, ceph_queue_invalidate(inode); if (deleted_inode) invalidate_aliases(inode); - if (queue_revalidate) - ceph_queue_revalidate(inode); if (wake) wake_up_all(&ci->i_cap_wq); @@ -2784,7 +2782,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, { struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; struct ceph_mds_session *tsession = NULL; - struct ceph_cap *cap, *tcap; + struct ceph_cap *cap, *tcap, *new_cap = NULL; struct ceph_inode_info *ci = ceph_inode(inode); u64 t_cap_id; unsigned mseq = le32_to_cpu(ex->migrate_seq); @@ -2807,7 +2805,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, retry: spin_lock(&ci->i_ceph_lock); cap = __get_cap_for_mds(ci, mds); - if (!cap) + if (!cap || cap->cap_id != le64_to_cpu(ex->cap_id)) goto out_unlock; if (target < 0) { @@ -2846,15 +2844,14 @@ retry: } __ceph_remove_cap(cap, false); goto out_unlock; - } - - if (tsession) { - int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0; - spin_unlock(&ci->i_ceph_lock); + } else if (tsession) { /* add placeholder for the export tagert */ + int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0; ceph_add_cap(inode, tsession, t_cap_id, -1, issued, 0, - t_seq - 1, t_mseq, (u64)-1, flag, NULL); - goto retry; + t_seq - 1, t_mseq, (u64)-1, flag, &new_cap); + + __ceph_remove_cap(cap, false); + goto out_unlock; } spin_unlock(&ci->i_ceph_lock); @@ -2873,6 +2870,7 @@ retry: SINGLE_DEPTH_NESTING); } ceph_add_cap_releases(mdsc, tsession); + new_cap = ceph_get_cap(mdsc, NULL); } else { WARN_ON(1); tsession = NULL; @@ -2887,24 +2885,27 @@ out_unlock: mutex_unlock(&tsession->s_mutex); ceph_put_mds_session(tsession); } + if (new_cap) + ceph_put_cap(mdsc, new_cap); } /* - * Handle cap IMPORT. If there are temp bits from an older EXPORT, - * clean them up. + * Handle cap IMPORT. * - * caller holds s_mutex. + * caller holds s_mutex. acquires i_ceph_lock */ static void handle_cap_import(struct ceph_mds_client *mdsc, struct inode *inode, struct ceph_mds_caps *im, struct ceph_mds_cap_peer *ph, struct ceph_mds_session *session, - void *snaptrace, int snaptrace_len) + struct ceph_cap **target_cap, int *old_issued) + __acquires(ci->i_ceph_lock) { struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_cap *cap; + struct ceph_cap *cap, *ocap, *new_cap = NULL; int mds = session->s_mds; - unsigned issued = le32_to_cpu(im->caps); + int issued; + unsigned caps = le32_to_cpu(im->caps); unsigned wanted = le32_to_cpu(im->wanted); unsigned seq = le32_to_cpu(im->seq); unsigned mseq = le32_to_cpu(im->migrate_seq); @@ -2924,40 +2925,52 @@ static void handle_cap_import(struct ceph_mds_client *mdsc, dout("handle_cap_import inode %p ci %p mds%d mseq %d peer %d\n", inode, ci, mds, mseq, peer); +retry: spin_lock(&ci->i_ceph_lock); - cap = peer >= 0 ? __get_cap_for_mds(ci, peer) : NULL; - if (cap && cap->cap_id == p_cap_id) { + cap = __get_cap_for_mds(ci, mds); + if (!cap) { + if (!new_cap) { + spin_unlock(&ci->i_ceph_lock); + new_cap = ceph_get_cap(mdsc, NULL); + goto retry; + } + cap = new_cap; + } else { + if (new_cap) { + ceph_put_cap(mdsc, new_cap); + new_cap = NULL; + } + } + + __ceph_caps_issued(ci, &issued); + issued |= __ceph_caps_dirty(ci); + + ceph_add_cap(inode, session, cap_id, -1, caps, wanted, seq, mseq, + realmino, CEPH_CAP_FLAG_AUTH, &new_cap); + + ocap = peer >= 0 ? __get_cap_for_mds(ci, peer) : NULL; + if (ocap && ocap->cap_id == p_cap_id) { dout(" remove export cap %p mds%d flags %d\n", - cap, peer, ph->flags); + ocap, peer, ph->flags); if ((ph->flags & CEPH_CAP_FLAG_AUTH) && - (cap->seq != le32_to_cpu(ph->seq) || - cap->mseq != le32_to_cpu(ph->mseq))) { + (ocap->seq != le32_to_cpu(ph->seq) || + ocap->mseq != le32_to_cpu(ph->mseq))) { pr_err("handle_cap_import: mismatched seq/mseq: " "ino (%llx.%llx) mds%d seq %d mseq %d " "importer mds%d has peer seq %d mseq %d\n", - ceph_vinop(inode), peer, cap->seq, - cap->mseq, mds, le32_to_cpu(ph->seq), + ceph_vinop(inode), peer, ocap->seq, + ocap->mseq, mds, le32_to_cpu(ph->seq), le32_to_cpu(ph->mseq)); } - ci->i_cap_exporting_issued = cap->issued; - __ceph_remove_cap(cap, (ph->flags & CEPH_CAP_FLAG_RELEASE)); + __ceph_remove_cap(ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE)); } /* make sure we re-request max_size, if necessary */ ci->i_wanted_max_size = 0; ci->i_requested_max_size = 0; - spin_unlock(&ci->i_ceph_lock); - - down_write(&mdsc->snap_rwsem); - ceph_update_snap_trace(mdsc, snaptrace, snaptrace+snaptrace_len, - false); - downgrade_write(&mdsc->snap_rwsem); - ceph_add_cap(inode, session, cap_id, -1, - issued, wanted, seq, mseq, realmino, CEPH_CAP_FLAG_AUTH, - NULL /* no caps context */); - kick_flushing_inode_caps(mdsc, session, inode); - up_read(&mdsc->snap_rwsem); + *old_issued = issued; + *target_cap = cap; } /* @@ -2977,7 +2990,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, struct ceph_mds_caps *h; struct ceph_mds_cap_peer *peer = NULL; int mds = session->s_mds; - int op; + int op, issued; u32 seq, mseq; struct ceph_vino vino; u64 cap_id; @@ -3069,7 +3082,10 @@ void ceph_handle_caps(struct ceph_mds_session *session, case CEPH_CAP_OP_IMPORT: handle_cap_import(mdsc, inode, h, peer, session, - snaptrace, snaptrace_len); + &cap, &issued); + handle_cap_grant(mdsc, inode, h, snaptrace, snaptrace_len, + msg->middle, session, cap, issued); + goto done_unlocked; } /* the rest require a cap */ @@ -3086,8 +3102,10 @@ void ceph_handle_caps(struct ceph_mds_session *session, switch (op) { case CEPH_CAP_OP_REVOKE: case CEPH_CAP_OP_GRANT: - case CEPH_CAP_OP_IMPORT: - handle_cap_grant(inode, h, session, cap, msg->middle); + __ceph_caps_issued(ci, &issued); + issued |= __ceph_caps_dirty(ci); + handle_cap_grant(mdsc, inode, h, NULL, 0, msg->middle, + session, cap, issued); goto done_unlocked; case CEPH_CAP_OP_FLUSH_ACK: diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 00d6af6a32ec..8d7d782f4382 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -169,7 +169,7 @@ static struct dentry *__get_parent(struct super_block *sb, return dentry; } -struct dentry *ceph_get_parent(struct dentry *child) +static struct dentry *ceph_get_parent(struct dentry *child) { /* don't re-export snaps */ if (ceph_snap(child->d_inode) != CEPH_NOSNAP) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index e4fff9ff1c27..04c89c266cec 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -10,6 +10,7 @@ #include <linux/writeback.h> #include <linux/vmalloc.h> #include <linux/posix_acl.h> +#include <linux/random.h> #include "super.h" #include "mds_client.h" @@ -179,9 +180,8 @@ struct ceph_inode_frag *__ceph_find_frag(struct ceph_inode_info *ci, u32 f) * specified, copy the frag delegation info to the caller if * it is present. */ -u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v, - struct ceph_inode_frag *pfrag, - int *found) +static u32 __ceph_choose_frag(struct ceph_inode_info *ci, u32 v, + struct ceph_inode_frag *pfrag, int *found) { u32 t = ceph_frag_make(0, 0); struct ceph_inode_frag *frag; @@ -191,7 +191,6 @@ u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v, if (found) *found = 0; - mutex_lock(&ci->i_fragtree_mutex); while (1) { WARN_ON(!ceph_frag_contains_value(t, v)); frag = __ceph_find_frag(ci, t); @@ -220,10 +219,19 @@ u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v, } dout("choose_frag(%x) = %x\n", v, t); - mutex_unlock(&ci->i_fragtree_mutex); return t; } +u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v, + struct ceph_inode_frag *pfrag, int *found) +{ + u32 ret; + mutex_lock(&ci->i_fragtree_mutex); + ret = __ceph_choose_frag(ci, v, pfrag, found); + mutex_unlock(&ci->i_fragtree_mutex); + return ret; +} + /* * Process dirfrag (delegation) info from the mds. Include leaf * fragment in tree ONLY if ndist > 0. Otherwise, only @@ -237,11 +245,17 @@ static int ceph_fill_dirfrag(struct inode *inode, u32 id = le32_to_cpu(dirinfo->frag); int mds = le32_to_cpu(dirinfo->auth); int ndist = le32_to_cpu(dirinfo->ndist); + int diri_auth = -1; int i; int err = 0; + spin_lock(&ci->i_ceph_lock); + if (ci->i_auth_cap) + diri_auth = ci->i_auth_cap->mds; + spin_unlock(&ci->i_ceph_lock); + mutex_lock(&ci->i_fragtree_mutex); - if (ndist == 0) { + if (ndist == 0 && mds == diri_auth) { /* no delegation info needed. */ frag = __ceph_find_frag(ci, id); if (!frag) @@ -286,6 +300,75 @@ out: return err; } +static int ceph_fill_fragtree(struct inode *inode, + struct ceph_frag_tree_head *fragtree, + struct ceph_mds_reply_dirfrag *dirinfo) +{ + struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_inode_frag *frag; + struct rb_node *rb_node; + int i; + u32 id, nsplits; + bool update = false; + + mutex_lock(&ci->i_fragtree_mutex); + nsplits = le32_to_cpu(fragtree->nsplits); + if (nsplits) { + i = prandom_u32() % nsplits; + id = le32_to_cpu(fragtree->splits[i].frag); + if (!__ceph_find_frag(ci, id)) + update = true; + } else if (!RB_EMPTY_ROOT(&ci->i_fragtree)) { + rb_node = rb_first(&ci->i_fragtree); + frag = rb_entry(rb_node, struct ceph_inode_frag, node); + if (frag->frag != ceph_frag_make(0, 0) || rb_next(rb_node)) + update = true; + } + if (!update && dirinfo) { + id = le32_to_cpu(dirinfo->frag); + if (id != __ceph_choose_frag(ci, id, NULL, NULL)) + update = true; + } + if (!update) + goto out_unlock; + + dout("fill_fragtree %llx.%llx\n", ceph_vinop(inode)); + rb_node = rb_first(&ci->i_fragtree); + for (i = 0; i < nsplits; i++) { + id = le32_to_cpu(fragtree->splits[i].frag); + frag = NULL; + while (rb_node) { + frag = rb_entry(rb_node, struct ceph_inode_frag, node); + if (ceph_frag_compare(frag->frag, id) >= 0) { + if (frag->frag != id) + frag = NULL; + else + rb_node = rb_next(rb_node); + break; + } + rb_node = rb_next(rb_node); + rb_erase(&frag->node, &ci->i_fragtree); + kfree(frag); + frag = NULL; + } + if (!frag) { + frag = __get_or_create_frag(ci, id); + if (IS_ERR(frag)) + continue; + } + frag->split_by = le32_to_cpu(fragtree->splits[i].by); + dout(" frag %x split by %d\n", frag->frag, frag->split_by); + } + while (rb_node) { + frag = rb_entry(rb_node, struct ceph_inode_frag, node); + rb_node = rb_next(rb_node); + rb_erase(&frag->node, &ci->i_fragtree); + kfree(frag); + } +out_unlock: + mutex_unlock(&ci->i_fragtree_mutex); + return 0; +} /* * initialize a newly allocated inode. @@ -341,7 +424,6 @@ struct inode *ceph_alloc_inode(struct super_block *sb) INIT_LIST_HEAD(&ci->i_cap_snaps); ci->i_head_snapc = NULL; ci->i_snap_caps = 0; - ci->i_cap_exporting_issued = 0; for (i = 0; i < CEPH_FILE_MODE_NUM; i++) ci->i_nr_by_mode[i] = 0; @@ -407,7 +489,7 @@ void ceph_destroy_inode(struct inode *inode) /* * we may still have a snap_realm reference if there are stray - * caps in i_cap_exporting_issued or i_snap_caps. + * caps in i_snap_caps. */ if (ci->i_snap_realm) { struct ceph_mds_client *mdsc = @@ -582,22 +664,26 @@ static int fill_inode(struct inode *inode, unsigned long ttl_from, int cap_fmode, struct ceph_cap_reservation *caps_reservation) { + struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; struct ceph_mds_reply_inode *info = iinfo->in; struct ceph_inode_info *ci = ceph_inode(inode); - int i; - int issued = 0, implemented; + int issued = 0, implemented, new_issued; struct timespec mtime, atime, ctime; - u32 nsplits; - struct ceph_inode_frag *frag; - struct rb_node *rb_node; struct ceph_buffer *xattr_blob = NULL; + struct ceph_cap *new_cap = NULL; int err = 0; - int queue_trunc = 0; + bool wake = false; + bool queue_trunc = false; + bool new_version = false; dout("fill_inode %p ino %llx.%llx v %llu had %llu\n", inode, ceph_vinop(inode), le64_to_cpu(info->version), ci->i_version); + /* prealloc new cap struct */ + if (info->cap.caps && ceph_snap(inode) == CEPH_NOSNAP) + new_cap = ceph_get_cap(mdsc, caps_reservation); + /* * prealloc xattr data, if it looks like we'll need it. only * if len > 4 (meaning there are actually xattrs; the first 4 @@ -623,19 +709,23 @@ static int fill_inode(struct inode *inode, * 3 2 skip * 3 3 update */ - if (le64_to_cpu(info->version) > 0 && - (ci->i_version & ~1) >= le64_to_cpu(info->version)) - goto no_change; - + if (ci->i_version == 0 || + ((info->cap.flags & CEPH_CAP_FLAG_AUTH) && + le64_to_cpu(info->version) > (ci->i_version & ~1))) + new_version = true; + issued = __ceph_caps_issued(ci, &implemented); issued |= implemented | __ceph_caps_dirty(ci); + new_issued = ~issued & le32_to_cpu(info->cap.caps); /* update inode */ ci->i_version = le64_to_cpu(info->version); inode->i_version++; inode->i_rdev = le32_to_cpu(info->rdev); + inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; - if ((issued & CEPH_CAP_AUTH_EXCL) == 0) { + if ((new_version || (new_issued & CEPH_CAP_AUTH_SHARED)) && + (issued & CEPH_CAP_AUTH_EXCL) == 0) { inode->i_mode = le32_to_cpu(info->mode); inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(info->uid)); inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(info->gid)); @@ -644,23 +734,35 @@ static int fill_inode(struct inode *inode, from_kgid(&init_user_ns, inode->i_gid)); } - if ((issued & CEPH_CAP_LINK_EXCL) == 0) + if ((new_version || (new_issued & CEPH_CAP_LINK_SHARED)) && + (issued & CEPH_CAP_LINK_EXCL) == 0) set_nlink(inode, le32_to_cpu(info->nlink)); - /* be careful with mtime, atime, size */ - ceph_decode_timespec(&atime, &info->atime); - ceph_decode_timespec(&mtime, &info->mtime); - ceph_decode_timespec(&ctime, &info->ctime); - queue_trunc = ceph_fill_file_size(inode, issued, - le32_to_cpu(info->truncate_seq), - le64_to_cpu(info->truncate_size), - le64_to_cpu(info->size)); - ceph_fill_file_time(inode, issued, - le32_to_cpu(info->time_warp_seq), - &ctime, &mtime, &atime); - - ci->i_layout = info->layout; - inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; + if (new_version || (new_issued & CEPH_CAP_ANY_RD)) { + /* be careful with mtime, atime, size */ + ceph_decode_timespec(&atime, &info->atime); + ceph_decode_timespec(&mtime, &info->mtime); + ceph_decode_timespec(&ctime, &info->ctime); + ceph_fill_file_time(inode, issued, + le32_to_cpu(info->time_warp_seq), + &ctime, &mtime, &atime); + } + + if (new_version || + (new_issued & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR))) { + ci->i_layout = info->layout; + queue_trunc = ceph_fill_file_size(inode, issued, + le32_to_cpu(info->truncate_seq), + le64_to_cpu(info->truncate_size), + le64_to_cpu(info->size)); + /* only update max_size on auth cap */ + if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) && + ci->i_max_size != le64_to_cpu(info->max_size)) { + dout("max_size %lld -> %llu\n", ci->i_max_size, + le64_to_cpu(info->max_size)); + ci->i_max_size = le64_to_cpu(info->max_size); + } + } /* xattrs */ /* note that if i_xattrs.len <= 4, i_xattrs.data will still be NULL. */ @@ -745,58 +847,6 @@ static int fill_inode(struct inode *inode, dout(" marking %p complete (empty)\n", inode); __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count)); } -no_change: - /* only update max_size on auth cap */ - if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) && - ci->i_max_size != le64_to_cpu(info->max_size)) { - dout("max_size %lld -> %llu\n", ci->i_max_size, - le64_to_cpu(info->max_size)); - ci->i_max_size = le64_to_cpu(info->max_size); - } - - spin_unlock(&ci->i_ceph_lock); - - /* queue truncate if we saw i_size decrease */ - if (queue_trunc) - ceph_queue_vmtruncate(inode); - - /* populate frag tree */ - /* FIXME: move me up, if/when version reflects fragtree changes */ - nsplits = le32_to_cpu(info->fragtree.nsplits); - mutex_lock(&ci->i_fragtree_mutex); - rb_node = rb_first(&ci->i_fragtree); - for (i = 0; i < nsplits; i++) { - u32 id = le32_to_cpu(info->fragtree.splits[i].frag); - frag = NULL; - while (rb_node) { - frag = rb_entry(rb_node, struct ceph_inode_frag, node); - if (ceph_frag_compare(frag->frag, id) >= 0) { - if (frag->frag != id) - frag = NULL; - else - rb_node = rb_next(rb_node); - break; - } - rb_node = rb_next(rb_node); - rb_erase(&frag->node, &ci->i_fragtree); - kfree(frag); - frag = NULL; - } - if (!frag) { - frag = __get_or_create_frag(ci, id); - if (IS_ERR(frag)) - continue; - } - frag->split_by = le32_to_cpu(info->fragtree.splits[i].by); - dout(" frag %x split by %d\n", frag->frag, frag->split_by); - } - while (rb_node) { - frag = rb_entry(rb_node, struct ceph_inode_frag, node); - rb_node = rb_next(rb_node); - rb_erase(&frag->node, &ci->i_fragtree); - kfree(frag); - } - mutex_unlock(&ci->i_fragtree_mutex); /* were we issued a capability? */ if (info->cap.caps) { @@ -809,30 +859,41 @@ no_change: le32_to_cpu(info->cap.seq), le32_to_cpu(info->cap.mseq), le64_to_cpu(info->cap.realm), - info->cap.flags, - caps_reservation); + info->cap.flags, &new_cap); + wake = true; } else { - spin_lock(&ci->i_ceph_lock); dout(" %p got snap_caps %s\n", inode, ceph_cap_string(le32_to_cpu(info->cap.caps))); ci->i_snap_caps |= le32_to_cpu(info->cap.caps); if (cap_fmode >= 0) __ceph_get_fmode(ci, cap_fmode); - spin_unlock(&ci->i_ceph_lock); } } else if (cap_fmode >= 0) { pr_warn("mds issued no caps on %llx.%llx\n", ceph_vinop(inode)); __ceph_get_fmode(ci, cap_fmode); } + spin_unlock(&ci->i_ceph_lock); + + if (wake) + wake_up_all(&ci->i_cap_wq); + + /* queue truncate if we saw i_size decrease */ + if (queue_trunc) + ceph_queue_vmtruncate(inode); + + /* populate frag tree */ + if (S_ISDIR(inode->i_mode)) + ceph_fill_fragtree(inode, &info->fragtree, dirinfo); /* update delegation info? */ if (dirinfo) ceph_fill_dirfrag(inode, dirinfo); err = 0; - out: + if (new_cap) + ceph_put_cap(mdsc, new_cap); if (xattr_blob) ceph_buffer_put(xattr_blob); return err; @@ -1485,7 +1546,7 @@ static void ceph_invalidate_work(struct work_struct *work) orig_gen = ci->i_rdcache_gen; spin_unlock(&ci->i_ceph_lock); - truncate_inode_pages(inode->i_mapping, 0); + truncate_pagecache(inode, 0); spin_lock(&ci->i_ceph_lock); if (orig_gen == ci->i_rdcache_gen && @@ -1588,7 +1649,7 @@ retry: ci->i_truncate_pending, to); spin_unlock(&ci->i_ceph_lock); - truncate_inode_pages(inode->i_mapping, to); + truncate_pagecache(inode, to); spin_lock(&ci->i_ceph_lock); if (to == ci->i_truncate_size) { diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 9a33b98cb000..92a2548278fc 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1558,6 +1558,8 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) init_completion(&req->r_safe_completion); INIT_LIST_HEAD(&req->r_unsafe_item); + req->r_stamp = CURRENT_TIME; + req->r_op = op; req->r_direct_mode = mode; return req; @@ -1783,7 +1785,8 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, } len = sizeof(*head) + - pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)); + pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) + + sizeof(struct timespec); /* calculate (max) length for cap releases */ len += sizeof(struct ceph_mds_request_release) * @@ -1800,6 +1803,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, goto out_free2; } + msg->hdr.version = 2; msg->hdr.tid = cpu_to_le64(req->r_tid); head = msg->front.iov_base; @@ -1836,6 +1840,9 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, mds, req->r_old_inode_drop, req->r_old_inode_unless, 0); head->num_releases = cpu_to_le16(releases); + /* time stamp */ + ceph_encode_copy(&p, &req->r_stamp, sizeof(req->r_stamp)); + BUG_ON(p > end); msg->front.iov_len = p - msg->front.iov_base; msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index e90cfccf93bd..e00737cf523c 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -194,6 +194,7 @@ struct ceph_mds_request { int r_fmode; /* file mode, if expecting cap */ kuid_t r_uid; kgid_t r_gid; + struct timespec r_stamp; /* for choosing which mds to send this request to */ int r_direct_mode; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index ead05cc1f447..12b20744e386 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -292,7 +292,6 @@ struct ceph_inode_info { struct ceph_snap_context *i_head_snapc; /* set if wr_buffer_head > 0 or dirty|flushing caps */ unsigned i_snap_caps; /* cap bits for snapped files */ - unsigned i_cap_exporting_issued; int i_nr_by_mode[CEPH_FILE_MODE_NUM]; /* open file counts */ @@ -775,11 +774,13 @@ static inline void ceph_forget_all_cached_acls(struct inode *inode) extern const char *ceph_cap_string(int c); extern void ceph_handle_caps(struct ceph_mds_session *session, struct ceph_msg *msg); -extern int ceph_add_cap(struct inode *inode, - struct ceph_mds_session *session, u64 cap_id, - int fmode, unsigned issued, unsigned wanted, - unsigned cap, unsigned seq, u64 realmino, int flags, - struct ceph_cap_reservation *caps_reservation); +extern struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc, + struct ceph_cap_reservation *ctx); +extern void ceph_add_cap(struct inode *inode, + struct ceph_mds_session *session, u64 cap_id, + int fmode, unsigned issued, unsigned wanted, + unsigned cap, unsigned seq, u64 realmino, int flags, + struct ceph_cap **new_cap); extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release); extern void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap); diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 1e5b45359509..d08e079ea5d3 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -617,6 +617,11 @@ static void retry_failed_sctp_send(struct connection *recv_con, int nodeid = sn_send_failed->ssf_info.sinfo_ppid; log_print("Retry sending %d bytes to node id %d", len, nodeid); + + if (!nodeid) { + log_print("Shouldn't resend data via listening connection."); + return; + } con = nodeid2con(nodeid, 0); if (!con) { diff --git a/fs/exec.c b/fs/exec.c index 238b7aa26f68..a3d33fe592d6 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1046,13 +1046,13 @@ EXPORT_SYMBOL_GPL(get_task_comm); * so that a new one can be started */ -void set_task_comm(struct task_struct *tsk, const char *buf) +void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec) { task_lock(tsk); trace_task_rename(tsk, buf); strlcpy(tsk->comm, buf, sizeof(tsk->comm)); task_unlock(tsk); - perf_event_comm(tsk); + perf_event_comm(tsk, exec); } int flush_old_exec(struct linux_binprm * bprm) @@ -1110,7 +1110,8 @@ void setup_new_exec(struct linux_binprm * bprm) else set_dumpable(current->mm, suid_dumpable); - set_task_comm(current, kbasename(bprm->filename)); + perf_event_exec(); + __set_task_comm(current, kbasename(bprm->filename), true); /* Set the new mm task size. We have to do that late because it may * depend on TIF_32BIT which is only updated in flush_thread() on |

