summaryrefslogtreecommitdiffstats
path: root/fs/ceph
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/addr.c6
-rw-r--r--fs/ceph/caps.c2
-rw-r--r--fs/ceph/debugfs.c6
-rw-r--r--fs/ceph/dir.c33
-rw-r--r--fs/ceph/file.c185
-rw-r--r--fs/ceph/inode.c73
-rw-r--r--fs/ceph/ioctl.c3
-rw-r--r--fs/ceph/locks.c11
-rw-r--r--fs/ceph/mds_client.c6
-rw-r--r--fs/ceph/mdsmap.c2
-rw-r--r--fs/ceph/super.h1
11 files changed, 124 insertions, 204 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index b53278c9fd97..4f3f69079f36 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -694,7 +694,7 @@ static int ceph_writepages_start(struct address_space *mapping,
(wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD"));
if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) {
- pr_warning("writepage_start %p on forced umount\n", inode);
+ pr_warn("writepage_start %p on forced umount\n", inode);
return -EIO; /* we're in a forced umount, don't write! */
}
if (fsc->mount_options->wsize && fsc->mount_options->wsize < wsize)
@@ -1187,8 +1187,8 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
* never get called.
*/
static ssize_t ceph_direct_io(int rw, struct kiocb *iocb,
- const struct iovec *iov,
- loff_t pos, unsigned long nr_segs)
+ struct iov_iter *iter,
+ loff_t pos)
{
WARN_ON(1);
return -EINVAL;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 2e5e648eb5c3..c561b628ebce 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -3261,7 +3261,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
rel->seq = cpu_to_le32(cap->seq);
rel->issue_seq = cpu_to_le32(cap->issue_seq),
rel->mseq = cpu_to_le32(cap->mseq);
- rel->caps = cpu_to_le32(cap->issued);
+ rel->caps = cpu_to_le32(cap->implemented);
rel->wanted = cpu_to_le32(cap->mds_wanted);
rel->dname_len = 0;
rel->dname_seq = 0;
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 16b54aa31f08..5a743ac141ab 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -71,9 +71,9 @@ static int mdsc_show(struct seq_file *s, void *p)
seq_printf(s, "%s", ceph_mds_op_name(req->r_op));
if (req->r_got_unsafe)
- seq_printf(s, "\t(unsafe)");
+ seq_puts(s, "\t(unsafe)");
else
- seq_printf(s, "\t");
+ seq_puts(s, "\t");
if (req->r_inode) {
seq_printf(s, " #%llx", ceph_ino(req->r_inode));
@@ -119,7 +119,7 @@ static int mdsc_show(struct seq_file *s, void *p)
seq_printf(s, " %s", req->r_path2);
}
- seq_printf(s, "\n");
+ seq_puts(s, "\n");
}
mutex_unlock(&mdsc->mutex);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 766410a12c2c..c29d6ae68874 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -141,7 +141,7 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx,
/* start at beginning? */
if (ctx->pos == 2 || last == NULL ||
- ctx->pos < ceph_dentry(last)->offset) {
+ fpos_cmp(ctx->pos, ceph_dentry(last)->offset) < 0) {
if (list_empty(&parent->d_subdirs))
goto out_unlock;
p = parent->d_subdirs.prev;
@@ -182,9 +182,16 @@ more:
spin_unlock(&dentry->d_lock);
spin_unlock(&parent->d_lock);
+ /* make sure a dentry wasn't dropped while we didn't have parent lock */
+ if (!ceph_dir_is_complete(dir)) {
+ dout(" lost dir complete on %p; falling back to mds\n", dir);
+ dput(dentry);
+ err = -EAGAIN;
+ goto out;
+ }
+
dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, ctx->pos,
dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
- ctx->pos = di->offset;
if (!dir_emit(ctx, dentry->d_name.name,
dentry->d_name.len,
ceph_translate_ino(dentry->d_sb, dentry->d_inode->i_ino),
@@ -198,19 +205,12 @@ more:
return 0;
}
+ ctx->pos = di->offset + 1;
+
if (last)
dput(last);
last = dentry;
- ctx->pos++;
-
- /* make sure a dentry wasn't dropped while we didn't have parent lock */
- if (!ceph_dir_is_complete(dir)) {
- dout(" lost dir complete on %p; falling back to mds\n", dir);
- err = -EAGAIN;
- goto out;
- }
-
spin_lock(&parent->d_lock);
p = p->prev; /* advance to next dentry */
goto more;
@@ -296,6 +296,8 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
err = __dcache_readdir(file, ctx, shared_gen);
if (err != -EAGAIN)
return err;
+ frag = fpos_frag(ctx->pos);
+ off = fpos_off(ctx->pos);
} else {
spin_unlock(&ci->i_ceph_lock);
}
@@ -446,7 +448,6 @@ more:
if (atomic_read(&ci->i_release_count) == fi->dir_release_count) {
dout(" marking %p complete\n", inode);
__ceph_dir_set_complete(ci, fi->dir_release_count);
- ci->i_max_offset = ctx->pos;
}
spin_unlock(&ci->i_ceph_lock);
@@ -935,14 +936,16 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
* to do it here.
*/
- /* d_move screws up d_subdirs order */
- ceph_dir_clear_complete(new_dir);
-
d_move(old_dentry, new_dentry);
/* ensure target dentry is invalidated, despite
rehashing bug in vfs_rename_dir */
ceph_invalidate_dentry_lease(new_dentry);
+
+ /* d_move screws up sibling dentries' offsets */
+ ceph_dir_clear_complete(old_dir);
+ ceph_dir_clear_complete(new_dir);
+
}
ceph_mdsc_put_request(req);
return err;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 88a6df4cbe6d..302085100c28 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -418,7 +418,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
struct page **pages;
u64 off = iocb->ki_pos;
int num_pages, ret;
- size_t len = i->count;
+ size_t len = iov_iter_count(i);
dout("sync_read on file %p %llu~%u %s\n", file, off,
(unsigned)len,
@@ -436,25 +436,26 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
if (file->f_flags & O_DIRECT) {
while (iov_iter_count(i)) {
- void __user *data = i->iov[0].iov_base + i->iov_offset;
- size_t len = i->iov[0].iov_len - i->iov_offset;
+ size_t start;
+ ssize_t n;
- num_pages = calc_pages_for((unsigned long)data, len);
- pages = ceph_get_direct_page_vector(data,
- num_pages, true);
- if (IS_ERR(pages))
- return PTR_ERR(pages);
+ n = iov_iter_get_pages_alloc(i, &pages, INT_MAX, &start);
+ if (n < 0)
+ return n;
- ret = striped_read(inode, off, len,
+ num_pages = (n + start + PAGE_SIZE - 1) / PAGE_SIZE;
+
+ ret = striped_read(inode, off, n,
pages, num_pages, checkeof,
- 1, (unsigned long)data & ~PAGE_MASK);
+ 1, start);
+
ceph_put_page_vector(pages, num_pages, true);
if (ret <= 0)
break;
off += ret;
iov_iter_advance(i, ret);
- if (ret < len)
+ if (ret < n)
break;
}
} else {
@@ -466,25 +467,14 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i,
num_pages, checkeof, 0, 0);
if (ret > 0) {
int l, k = 0;
- size_t left = len = ret;
+ size_t left = ret;
while (left) {
- void __user *data = i->iov[0].iov_base
- + i->iov_offset;
- l = min(i->iov[0].iov_len - i->iov_offset,
- left);
-
- ret = ceph_copy_page_vector_to_user(&pages[k],
- data, off,
- l);
- if (ret > 0) {
- iov_iter_advance(i, ret);
- left -= ret;
- off += ret;
- k = calc_pages_for(iocb->ki_pos,
- len - left + 1) - 1;
- BUG_ON(k >= num_pages && left);
- } else
+ int copy = min_t(size_t, PAGE_SIZE, left);
+ l = copy_page_to_iter(pages[k++], 0, copy, i);
+ off += l;
+ left -= l;
+ if (l < copy)
break;
}
}
@@ -541,8 +531,7 @@ static void ceph_sync_write_unsafe(struct ceph_osd_request *req, bool unsafe)
* objects, rollback on failure, etc.)
*/
static ssize_t
-ceph_sync_direct_write(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, size_t count)
+ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
@@ -556,11 +545,10 @@ ceph_sync_direct_write(struct kiocb *iocb, const struct iovec *iov,
int written = 0;
int flags;
int check_caps = 0;
- int page_align;
int ret;
struct timespec mtime = CURRENT_TIME;
loff_t pos = iocb->ki_pos;
- struct iov_iter i;
+ size_t count = iov_iter_count(from);
if (ceph_snap(file_inode(file)) != CEPH_NOSNAP)
return -EROFS;
@@ -582,13 +570,10 @@ ceph_sync_direct_write(struct kiocb *iocb, const struct iovec *iov,
CEPH_OSD_FLAG_ONDISK |
CEPH_OSD_FLAG_WRITE;
- iov_iter_init(&i, iov, nr_segs, count, 0);
-
- while (iov_iter_count(&i) > 0) {
- void __user *data = i.iov->iov_base + i.iov_offset;
- u64 len = i.iov->iov_len - i.iov_offset;
-
- page_align = (unsigned long)data & ~PAGE_MASK;
+ while (iov_iter_count(from) > 0) {
+ u64 len = iov_iter_single_seg_count(from);
+ size_t start;
+ ssize_t n;
snapc = ci->i_snap_realm->cached_context;
vino = ceph_vino(inode);
@@ -604,20 +589,21 @@ ceph_sync_direct_write(struct kiocb *iocb, const struct iovec *iov,
break;
}
- num_pages = calc_pages_for(page_align, len);
- pages = ceph_get_direct_page_vector(data, num_pages, false);
- if (IS_ERR(pages)) {
- ret = PTR_ERR(pages);
- goto out;
+ n = iov_iter_get_pages_alloc(from, &pages, len, &start);
+ if (unlikely(n < 0)) {
+ ret = n;
+ ceph_osdc_put_request(req);
+ break;
}
+ num_pages = (n + start + PAGE_SIZE - 1) / PAGE_SIZE;
/*
* throw out any page cache pages in this range. this
* may block.
*/
truncate_inode_pages_range(inode->i_mapping, pos,
- (pos+len) | (PAGE_CACHE_SIZE-1));
- osd_req_op_extent_osd_data_pages(req, 0, pages, len, page_align,
+ (pos+n) | (PAGE_CACHE_SIZE-1));
+ osd_req_op_extent_osd_data_pages(req, 0, pages, n, start,
false, false);
/* BUG_ON(vino.snap != CEPH_NOSNAP); */
@@ -629,22 +615,20 @@ ceph_sync_direct_write(struct kiocb *iocb, const struct iovec *iov,
ceph_put_page_vector(pages, num_pages, false);
-out:
ceph_osdc_put_request(req);
- if (ret == 0) {
- pos += len;
- written += len;
- iov_iter_advance(&i, (size_t)len);
-
- if (pos > i_size_read(inode)) {
- check_caps = ceph_inode_set_size(inode, pos);
- if (check_caps)
- ceph_check_caps(ceph_inode(inode),
- CHECK_CAPS_AUTHONLY,
- NULL);
- }
- } else
+ if (ret)
break;
+ pos += n;
+ written += n;
+ iov_iter_advance(from, n);
+
+ if (pos > i_size_read(inode)) {
+ check_caps = ceph_inode_set_size(inode, pos);
+ if (check_caps)
+ ceph_check_caps(ceph_inode(inode),
+ CHECK_CAPS_AUTHONLY,
+ NULL);
+ }
}
if (ret != -EOLDSNAPC && written > 0) {
@@ -662,8 +646,7 @@ out:
* correct atomic write, we should e.g. take write locks on all
* objects, rollback on failure, etc.)
*/
-static ssize_t ceph_sync_write(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, size_t count)
+static ssize_t ceph_sync_write(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
@@ -681,7 +664,7 @@ static ssize_t ceph_sync_write(struct kiocb *iocb, const struct iovec *iov,
int ret;
struct timespec mtime = CURRENT_TIME;
loff_t pos = iocb->ki_pos;
- struct iov_iter i;
+ size_t count = iov_iter_count(from);
if (ceph_snap(file_inode(file)) != CEPH_NOSNAP)
return -EROFS;
@@ -703,9 +686,7 @@ static ssize_t ceph_sync_write(struct kiocb *iocb, const struct iovec *iov,
CEPH_OSD_FLAG_WRITE |
CEPH_OSD_FLAG_ACK;
- iov_iter_init(&i, iov, nr_segs, count, 0);
-
- while ((len = iov_iter_count(&i)) > 0) {
+ while ((len = iov_iter_count(from)) > 0) {
size_t left;
int n;
@@ -737,13 +718,12 @@ static ssize_t ceph_sync_write(struct kiocb *iocb, const struct iovec *iov,
left = len;
for (n = 0; n < num_pages; n++) {
size_t plen = min_t(size_t, left, PAGE_SIZE);
- ret = iov_iter_copy_from_user(pages[n], &i, 0, plen);
+ ret = copy_page_from_iter(pages[n], 0, plen, from);
if (ret != plen) {
ret = -EFAULT;
break;
}
left -= ret;
- iov_iter_advance(&i, ret);
}
if (ret < 0) {
@@ -796,8 +776,7 @@ out:
*
* Hmm, the sync read case isn't actually async... should it be?
*/
-static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
+static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct file *filp = iocb->ki_filp;
struct ceph_file_info *fi = filp->private_data;
@@ -823,40 +802,20 @@ again:
if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 ||
(iocb->ki_filp->f_flags & O_DIRECT) ||
(fi->flags & CEPH_F_SYNC)) {
- struct iov_iter i;
dout("aio_sync_read %p %llx.%llx %llu~%u got cap refs on %s\n",
inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
ceph_cap_string(got));
- if (!read) {
- ret = generic_segment_checks(iov, &nr_segs,
- &len, VERIFY_WRITE);
- if (ret)
- goto out;
- }
-
- iov_iter_init(&i, iov, nr_segs, len, read);
-
/* hmm, this isn't really async... */
- ret = ceph_sync_read(iocb, &i, &checkeof);
+ ret = ceph_sync_read(iocb, to, &checkeof);
} else {
- /*
- * We can't modify the content of iov,
- * so we only read from beginning.
- */
- if (read) {
- iocb->ki_pos = pos;
- len = iocb->ki_nbytes;
- read = 0;
- }
dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n",
- inode, ceph_vinop(inode), pos, (unsigned)len,
+ inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
ceph_cap_string(got));
- ret = generic_file_aio_read(iocb, iov, nr_segs, pos);
+ ret = generic_file_read_iter(iocb, to);
}
-out:
dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
ceph_put_cap_refs(ci, got);
@@ -872,6 +831,7 @@ out:
", reading more\n", iocb->ki_pos,
inode->i_size);
+ iov_iter_advance(to, ret);
read += ret;
len -= ret;
checkeof = 0;
@@ -895,8 +855,7 @@ out:
*
* If we are near ENOSPC, write synchronously.
*/
-static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
+static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct ceph_file_info *fi = file->private_data;
@@ -904,18 +863,15 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_osd_client *osdc =
&ceph_sb_to_client(inode->i_sb)->client->osdc;
- ssize_t count, written = 0;
+ ssize_t count = iov_iter_count(from), written = 0;
int err, want, got;
+ loff_t pos = iocb->ki_pos;
if (ceph_snap(inode) != CEPH_NOSNAP)
return -EROFS;
mutex_lock(&inode->i_mutex);
- err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ);
- if (err)
- goto out;
-
/* We can write back this queue in page reclaim */
current->backing_dev_info = file->f_mapping->backing_dev_info;
@@ -925,6 +881,7 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,
if (count == 0)
goto out;
+ iov_iter_truncate(from, count);
err = file_remove_suid(file);
if (err)
@@ -956,23 +913,26 @@ retry_snap:
if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 ||
(file->f_flags & O_DIRECT) || (fi->flags & CEPH_F_SYNC)) {
+ struct iov_iter data;
mutex_unlock(&inode->i_mutex);
+ /* we might need to revert back to that point */
+ data = *from;
if (file->f_flags & O_DIRECT)
- written = ceph_sync_direct_write(iocb, iov,
- nr_segs, count);
+ written = ceph_sync_direct_write(iocb, &data);
else
- written = ceph_sync_write(iocb, iov, nr_segs, count);
+ written = ceph_sync_write(iocb, &data);
if (written == -EOLDSNAPC) {
dout("aio_write %p %llx.%llx %llu~%u"
"got EOLDSNAPC, retrying\n",
inode, ceph_vinop(inode),
- pos, (unsigned)iov->iov_len);
+ pos, (unsigned)count);
mutex_lock(&inode->i_mutex);
goto retry_snap;
}
+ if (written > 0)
+ iov_iter_advance(from, written);
} else {
loff_t old_size = inode->i_size;
- struct iov_iter from;
/*
* No need to acquire the i_truncate_mutex. Because
* the MDS revokes Fwb caps before sending truncate
@@ -980,8 +940,7 @@ retry_snap:
* are pending vmtruncate. So write and vmtruncate
* can not run at the same time
*/
- iov_iter_init(&from, iov, nr_segs, count, 0);
- written = generic_perform_write(file, &from, pos);
+ written = generic_perform_write(file, from, pos);
if (likely(written >= 0))
iocb->ki_pos = pos + written;
if (inode->i_size > old_size)
@@ -999,7 +958,7 @@ retry_snap:
}
dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n",
- inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
+ inode, ceph_vinop(inode), pos, (unsigned)count,
ceph_cap_string(got));
ceph_put_cap_refs(ci, got);
@@ -1276,16 +1235,16 @@ const struct file_operations ceph_file_fops = {
.open = ceph_open,
.release = ceph_release,
.llseek = ceph_llseek,
- .read = do_sync_read,
- .write = do_sync_write,
- .aio_read = ceph_aio_read,
- .aio_write = ceph_aio_write,
+ .read = new_sync_read,
+ .write = new_sync_write,
+ .read_iter = ceph_read_iter,
+ .write_iter = ceph_write_iter,
.mmap = ceph_mmap,
.fsync = ceph_fsync,
.lock = ceph_lock,
.flock = ceph_flock,
.splice_read = generic_file_splice_read,
- .splice_write = generic_file_splice_write,
+ .splice_write = iter_file_splice_write,
.unlocked_ioctl = ceph_ioctl,
.compat_ioctl = ceph_ioctl,
.fallocate = ceph_fallocate,
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 0b0728e5be2d..e4fff9ff1c27 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -744,7 +744,6 @@ static int fill_inode(struct inode *inode,
!__ceph_dir_is_complete(ci)) {
dout(" marking %p complete (empty)\n", inode);
__ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count));
- ci->i_max_offset = 2;
}
no_change:
/* only update max_size on auth cap */
@@ -822,7 +821,7 @@ no_change:
spin_unlock(&ci->i_ceph_lock);
}
} else if (cap_fmode >= 0) {
- pr_warning("mds issued no caps on %llx.%llx\n",
+ pr_warn("mds issued no caps on %llx.%llx\n",
ceph_vinop(inode));
__ceph_get_fmode(ci, cap_fmode);
}
@@ -890,41 +889,6 @@ out_unlock:
}
/*
- * Set dentry's directory position based on the current dir's max, and
- * order it in d_subdirs, so that dcache_readdir behaves.
- *
- * Always called under directory's i_mutex.
- */
-static void ceph_set_dentry_offset(struct dentry *dn)
-{
- struct dentry *dir = dn->d_parent;
- struct inode *inode = dir->d_inode;
- struct ceph_inode_info *ci;
- struct ceph_dentry_info *di;
-
- BUG_ON(!inode);
-
- ci = ceph_inode(inode);
- di = ceph_dentry(dn);
-
- spin_lock(&ci->i_ceph_lock);
- if (!__ceph_dir_is_complete(ci)) {
- spin_unlock(&ci->i_ceph_lock);
- return;
- }
- di->offset = ceph_inode(inode)->i_max_offset++;
- spin_unlock(&ci->i_ceph_lock);
-
- spin_lock(&dir->d_lock);
- spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
- list_move(&dn->d_u.d_child, &dir->d_subdirs);
- dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset,
- dn->d_u.d_child.prev, dn->d_u.d_child.next);
- spin_unlock(&dn->d_lock);
- spin_unlock(&dir->d_lock);
-}
-
-/*
* splice a dentry to an inode.
* caller must hold directory i_mutex for this to be safe.
*
@@ -933,7 +897,7 @@ static void ceph_set_dentry_offset(struct dentry *dn)
* the caller) if we fail.
*/
static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
- bool *prehash, bool set_offset)
+ bool *prehash)
{
struct dentry *realdn;
@@ -965,8 +929,6 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
}
if ((!prehash || *prehash) && d_unhashed(dn))
d_rehash(dn);
- if (set_offset)
- ceph_set_dentry_offset(dn);
out:
return dn;
}
@@ -987,7 +949,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
{
struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
struct inode *in = NULL;
- struct ceph_mds_reply_inode *ininfo;
struct ceph_vino vino;
struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
int err = 0;
@@ -1161,6 +1122,9 @@ retry_lookup:
/* rename? */
if (req->r_old_dentry && req->r_op == CEPH_MDS_OP_RENAME) {
+ struct inode *olddir = req->r_old_dentry_dir;
+ BUG_ON(!olddir);
+
dout(" src %p '%.*s' dst %p '%.*s'\n",
req->r_old_dentry,
req->r_old_dentry->d_name.len,
@@ -1180,13 +1144,10 @@ retry_lookup:
rehashing bug in vfs_rename_dir */
ceph_invalidate_dentry_lease(dn);
- /*
- * d_move() puts the renamed dentry at the end of
- * d_subdirs. We need to assign it an appropriate
- * directory offset so we can behave when dir is
- * complete.
- */
- ceph_set_dentry_offset(req->r_old_dentry);
+ /* d_move screws up sibling dentries' offsets */
+ ceph_dir_clear_complete(dir);
+ ceph_dir_clear_complete(olddir);
+
dout("dn %p gets new offset %lld\n", req->r_old_dentry,
ceph_dentry(req->r_old_dentry)->offset);
@@ -1213,8 +1174,9 @@ retry_lookup:
/* attach proper inode */
if (!dn->d_inode) {
+ ceph_dir_clear_complete(dir);
ihold(in);
- dn = splice_dentry(dn, in, &have_lease, true);
+ dn = splice_dentry(dn, in, &have_lease);
if (IS_ERR(dn)) {
err = PTR_ERR(dn);
goto done;
@@ -1235,17 +1197,16 @@ retry_lookup:
(req->r_op == CEPH_MDS_OP_LOOKUPSNAP ||
req->r_op == CEPH_MDS_OP_MKSNAP)) {
struct dentry *dn = req->r_dentry;
+ struct inode *dir = req->r_locked_dir;
/* fill out a snapdir LOOKUPSNAP dentry */
BUG_ON(!dn);
- BUG_ON(!req->r_locked_dir);
- BUG_ON(ceph_snap(req->r_locked_dir) != CEPH_SNAPDIR);
- ininfo = rinfo->targeti.in;
- vino.ino = le64_to_cpu(ininfo->ino);
- vino.snap = le64_to_cpu(ininfo->snapid);
+ BUG_ON(!dir);
+ BUG_ON(ceph_snap(dir) != CEPH_SNAPDIR);
dout(" linking snapped dir %p to dn %p\n", in, dn);
+ ceph_dir_clear_complete(dir);
ihold(in);
- dn = splice_dentry(dn, in, NULL, true);
+ dn = splice_dentry(dn, in, NULL);
if (IS_ERR(dn)) {
err = PTR_ERR(dn);
goto done;
@@ -1407,7 +1368,7 @@ retry_lookup:
}
if (!dn->d_inode) {
- dn = splice_dentry(dn, in, NULL, false);
+ dn = splice_dentry(dn, in, NULL);
if (IS_ERR(dn)) {
err = PTR_ERR(dn);
dn = NULL;
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index fdf941b44ff1..a822a6e58290 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -109,6 +109,8 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
return PTR_ERR(req);
req->r_inode = inode;
ihold(inode);
+ req->r_num_caps = 1;
+
req->r_inode_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL;
req->r_args.setlayout.layout.fl_stripe_unit =
@@ -153,6 +155,7 @@ static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg)
return PTR_ERR(req);
req->r_inode = inode;
ihold(inode);
+ req->r_num_caps = 1;
req->r_args.setlayout.layout.fl_stripe_unit =
cpu_to_le32(l.stripe_unit);
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index d94ba0df9f4d..fbc39c47bacd 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -45,6 +45,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
return PTR_ERR(req);
req->r_inode = inode;
ihold(inode);
+ req->r_num_caps = 1;
/* mds requires start and length rather than start and end */
if (LLONG_MAX == fl->fl_end)
@@ -52,10 +53,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
else
length = fl->fl_end - fl->fl_start + 1;
- if (lock_type == CEPH_LOCK_FCNTL)
- owner = secure_addr(fl->fl_owner);
- else
- owner = secure_addr(fl->fl_file);
+ owner = secure_addr(fl->fl_owner);
dout("ceph_lock_message: rule: %d, op: %d, owner: %llx, pid: %llu, "
"start: %llu, length: %llu, wait: %d, type: %d", (int)lock_type,
@@ -313,10 +311,7 @@ int lock_to_ceph_filelock(struct file_lock *lock,
cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1);
cephlock->client = cpu_to_le64(0);
cephlock->pid = cpu_to_le64((u64)lock->fl_pid);
- if (lock->fl_flags & FL_POSIX)
- cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner));
- else
- cephlock->owner = cpu_to_le64(secure_addr(lock->fl_file));
+ cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner));
switch (lock->fl_type) {
case F_RDLCK:
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 2b4d093d0563..9a33b98cb000 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2218,13 +2218,13 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
/* dup? */
if ((req->r_got_unsafe && !head->safe) ||
(req->r_got_safe && head->safe)) {
- pr_warning("got a dup %s reply on %llu from mds%d\n",
+ pr_warn("got a dup %s reply on %llu from mds%d\n",
head->safe ? "safe" : "unsafe", tid, mds);
mutex_unlock(&mdsc->mutex);
goto out;
}
if (req->r_got_safe && !head->safe) {
- pr_warning("got unsafe after safe on %llu from mds%d\n",
+ pr_warn("got unsafe after safe on %llu from mds%d\n",
tid, mds);
mutex_unlock(&mdsc->mutex);
goto out;
@@ -3525,7 +3525,7 @@ static void peer_reset(struct ceph_connection *con)
struct ceph_mds_session *s = con->private;
struct ceph_mds_client *mdsc = s->s_mdsc;
- pr_warning("mds%d closed our session\n", s->s_mds);
+ pr_warn("mds%d closed our session\n", s->s_mds);
send_mds_reconnect(mdsc, s);
}
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index 132b64eeecd4..261531e55e9d 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -62,7 +62,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
ceph_decode_16_safe(p, end, version, bad);
if (version > 3) {
- pr_warning("got mdsmap version %d > 3, failing", version);
+ pr_warn("got mdsmap version %d > 3, failing", version);
goto bad;
}
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 7866cd05a6bb..ead05cc1f447 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -266,7 +266,6 @@ struct ceph_inode_info {
struct timespec i_rctime;
u64 i_rbytes, i_rfiles, i_rsubdirs;
u64 i_files, i_subdirs;
- u64 i_max_offset; /* largest readdir offset, set with complete dir */
struct rb_root i_fragtree;
struct mutex i_fragtree_mutex;
OpenPOWER on IntegriCloud