diff options
author | Sage Weil <sage@newdream.net> | 2011-06-01 16:08:44 -0700 |
---|---|---|
committer | Sage Weil <sage@newdream.net> | 2011-06-07 21:34:14 -0700 |
commit | c3cd62839aaa2cdb2b99687c9e44f1b300a4aece (patch) | |
tree | fc7823426f29f44911c93394fb8a3e43d0c91846 | |
parent | 2584547230ae49b8de91ab3bb5e0a81898905b45 (diff) | |
download | talos-obmc-linux-c3cd62839aaa2cdb2b99687c9e44f1b300a4aece.tar.gz talos-obmc-linux-c3cd62839aaa2cdb2b99687c9e44f1b300a4aece.zip |
ceph: fix short sync reads from the OSD
If we get a short read from the OSD because the object is small, we need to
zero the remainder of the buffer. For O_DIRECT reads, the attempted range
is not trimmed to i_size by the VFS, so we were actually looping
indefinitely.
Fix by trimming by i_size, and the unconditionally zeroing the trailing
range.
Reported-by: Jeff Wu <cpwu@tnsoft.com.cn>
Signed-off-by: Sage Weil <sage@newdream.net>
-rw-r--r-- | fs/ceph/file.c | 28 |
1 files changed, 15 insertions, 13 deletions
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 8c5ac4e72832..b654f403139e 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -283,7 +283,7 @@ int ceph_release(struct inode *inode, struct file *file) static int striped_read(struct inode *inode, u64 off, u64 len, struct page **pages, int num_pages, - int *checkeof, bool align_to_pages, + int *checkeof, bool o_direct, unsigned long buf_align) { struct ceph_fs_client *fsc = ceph_inode_to_client(inode); @@ -308,7 +308,7 @@ static int striped_read(struct inode *inode, io_align = off & ~PAGE_MASK; more: - if (align_to_pages) + if (o_direct) page_align = (pos - io_align + buf_align) & ~PAGE_MASK; else page_align = pos & ~PAGE_MASK; @@ -346,20 +346,22 @@ more: } if (was_short) { - /* was original extent fully inside i_size? */ - if (pos + left <= inode->i_size) { - dout("zero tail\n"); - ceph_zero_page_vector_range(page_off + read, len - read, + /* did we bounce off eof? */ + if (pos + left > inode->i_size) + *checkeof = 1; + + /* zero trailing bytes (inside i_size) */ + if (left > 0 && pos < inode->i_size) { + if (pos + left > inode->i_size) + left = inode->i_size - pos; + + dout("zero tail %d\n", left); + ceph_zero_page_vector_range(page_off + read, left, pages); - read = len; - goto out; + read += left; } - - /* check i_size */ - *checkeof = 1; } -out: if (ret >= 0) ret = read; dout("striped_read returns %d\n", ret); @@ -659,7 +661,7 @@ out: /* hit EOF or hole? */ if (statret == 0 && *ppos < inode->i_size) { - dout("aio_read sync_read hit hole, reading more\n"); + dout("aio_read sync_read hit hole, ppos %lld < size %lld, reading more\n", *ppos, inode->i_size); read += ret; base += ret; len -= ret; |