summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/cifs/connect.c4
-rw-r--r--fs/ecryptfs/file.c6
-rw-r--r--fs/fuse/dir.c12
-rw-r--r--fs/fuse/file.c58
-rw-r--r--fs/fuse/inode.c7
-rw-r--r--fs/gfs2/bmap.c17
-rw-r--r--fs/gfs2/dir.c43
-rw-r--r--fs/gfs2/file.c19
-rw-r--r--fs/gfs2/inode.c1
-rw-r--r--fs/gfs2/lops.c4
-rw-r--r--fs/gfs2/rgrp.c4
-rw-r--r--fs/gfs2/super.c6
-rw-r--r--fs/hpfs/file.c4
-rw-r--r--fs/jfs/jfs_logmgr.c8
-rw-r--r--fs/jfs/super.c38
-rw-r--r--fs/proc/base.c1
-rw-r--r--fs/xfs/xfs_acl.c31
-rw-r--r--fs/xfs/xfs_acl.h31
-rw-r--r--fs/xfs/xfs_attr_leaf.c2
-rw-r--r--fs/xfs/xfs_dquot.c37
-rw-r--r--fs/xfs/xfs_inode.c16
-rw-r--r--fs/xfs/xfs_log_recover.c84
-rw-r--r--fs/xfs/xfs_qm.c40
-rw-r--r--fs/xfs/xfs_quota.h2
-rw-r--r--fs/xfs/xfs_super.c11
25 files changed, 378 insertions, 108 deletions
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 5b97e56ddbca..e3bc39bb9d12 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3279,8 +3279,8 @@ build_unc_path_to_root(const struct smb_vol *vol,
pos = full_path + unc_len;
if (pplen) {
- *pos++ = CIFS_DIR_SEP(cifs_sb);
- strncpy(pos, vol->prepath, pplen);
+ *pos = CIFS_DIR_SEP(cifs_sb);
+ strncpy(pos + 1, vol->prepath, pplen);
pos += pplen;
}
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 201f0a0d6b0a..a7abbea2c096 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -295,6 +295,12 @@ static int ecryptfs_release(struct inode *inode, struct file *file)
static int
ecryptfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
+ int rc;
+
+ rc = filemap_write_and_wait(file->f_mapping);
+ if (rc)
+ return rc;
+
return vfs_fsync(ecryptfs_file_to_lower(file), datasync);
}
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 254df56b847b..f3f783dc4f75 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -180,6 +180,8 @@ u64 fuse_get_attr_version(struct fuse_conn *fc)
static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
{
struct inode *inode;
+ struct dentry *parent;
+ struct fuse_conn *fc;
inode = ACCESS_ONCE(entry->d_inode);
if (inode && is_bad_inode(inode))
@@ -187,10 +189,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
else if (fuse_dentry_time(entry) < get_jiffies_64()) {
int err;
struct fuse_entry_out outarg;
- struct fuse_conn *fc;
struct fuse_req *req;
struct fuse_forget_link *forget;
- struct dentry *parent;
u64 attr_version;
/* For negative dentries, always do a fresh lookup */
@@ -241,8 +241,14 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
entry_attr_timeout(&outarg),
attr_version);
fuse_change_entry_timeout(entry, &outarg);
+ } else if (inode) {
+ fc = get_fuse_conn(inode);
+ if (fc->readdirplus_auto) {
+ parent = dget_parent(entry);
+ fuse_advise_use_readdirplus(parent->d_inode);
+ dput(parent);
+ }
}
- fuse_advise_use_readdirplus(inode);
return 1;
}
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index d1c9b85b3f58..e570081f9f76 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -16,6 +16,7 @@
#include <linux/compat.h>
#include <linux/swap.h>
#include <linux/aio.h>
+#include <linux/falloc.h>
static const struct file_operations fuse_direct_io_file_operations;
@@ -1278,7 +1279,10 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
iov_iter_init(&ii, iov, nr_segs, count, 0);
- req = fuse_get_req(fc, fuse_iter_npages(&ii));
+ if (io->async)
+ req = fuse_get_req_for_background(fc, fuse_iter_npages(&ii));
+ else
+ req = fuse_get_req(fc, fuse_iter_npages(&ii));
if (IS_ERR(req))
return PTR_ERR(req);
@@ -1314,7 +1318,11 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov,
break;
if (count) {
fuse_put_request(fc, req);
- req = fuse_get_req(fc, fuse_iter_npages(&ii));
+ if (io->async)
+ req = fuse_get_req_for_background(fc,
+ fuse_iter_npages(&ii));
+ else
+ req = fuse_get_req(fc, fuse_iter_npages(&ii));
if (IS_ERR(req))
break;
}
@@ -2365,6 +2373,11 @@ static void fuse_do_truncate(struct file *file)
fuse_do_setattr(inode, &attr, file);
}
+static inline loff_t fuse_round_up(loff_t off)
+{
+ return round_up(off, FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT);
+}
+
static ssize_t
fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
loff_t offset, unsigned long nr_segs)
@@ -2372,6 +2385,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
ssize_t ret = 0;
struct file *file = iocb->ki_filp;
struct fuse_file *ff = file->private_data;
+ bool async_dio = ff->fc->async_dio;
loff_t pos = 0;
struct inode *inode;
loff_t i_size;
@@ -2383,10 +2397,10 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
i_size = i_size_read(inode);
/* optimization for short read */
- if (rw != WRITE && offset + count > i_size) {
+ if (async_dio && rw != WRITE && offset + count > i_size) {
if (offset >= i_size)
return 0;
- count = i_size - offset;
+ count = min_t(loff_t, count, fuse_round_up(i_size - offset));
}
io = kmalloc(sizeof(struct fuse_io_priv), GFP_KERNEL);
@@ -2404,7 +2418,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
* By default, we want to optimize all I/Os with async request
* submission to the client filesystem if supported.
*/
- io->async = ff->fc->async_dio;
+ io->async = async_dio;
io->iocb = iocb;
/*
@@ -2412,7 +2426,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
* to wait on real async I/O requests, so we must submit this request
* synchronously.
*/
- if (!is_sync_kiocb(iocb) && (offset + count > i_size))
+ if (!is_sync_kiocb(iocb) && (offset + count > i_size) && rw == WRITE)
io->async = false;
if (rw == WRITE)
@@ -2424,7 +2438,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
fuse_aio_complete(io, ret < 0 ? ret : 0, -1);
/* we have a non-extending, async request, so return */
- if (ret > 0 && !is_sync_kiocb(iocb))
+ if (!is_sync_kiocb(iocb))
return -EIOCBQUEUED;
ret = wait_on_sync_kiocb(iocb);
@@ -2446,6 +2460,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
loff_t length)
{
struct fuse_file *ff = file->private_data;
+ struct inode *inode = file->f_inode;
struct fuse_conn *fc = ff->fc;
struct fuse_req *req;
struct fuse_fallocate_in inarg = {
@@ -2459,9 +2474,16 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
if (fc->no_fallocate)
return -EOPNOTSUPP;
+ if (mode & FALLOC_FL_PUNCH_HOLE) {
+ mutex_lock(&inode->i_mutex);
+ fuse_set_nowrite(inode);
+ }
+
req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
+ if (IS_ERR(req)) {
+ err = PTR_ERR(req);
+ goto out;
+ }
req->in.h.opcode = FUSE_FALLOCATE;
req->in.h.nodeid = ff->nodeid;
@@ -2476,6 +2498,24 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
}
fuse_put_request(fc, req);
+ if (err)
+ goto out;
+
+ /* we could have extended the file */
+ if (!(mode & FALLOC_FL_KEEP_SIZE))
+ fuse_write_update_size(inode, offset + length);
+
+ if (mode & FALLOC_FL_PUNCH_HOLE)
+ truncate_pagecache_range(inode, offset, offset + length - 1);
+
+ fuse_invalidate_attr(inode);
+
+out:
+ if (mode & FALLOC_FL_PUNCH_HOLE) {
+ fuse_release_nowrite(inode);
+ mutex_unlock(&inode->i_mutex);
+ }
+
return err;
}
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 6201f81e4d3a..9a0cdde14a08 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -867,10 +867,11 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
fc->dont_mask = 1;
if (arg->flags & FUSE_AUTO_INVAL_DATA)
fc->auto_inval_data = 1;
- if (arg->flags & FUSE_DO_READDIRPLUS)
+ if (arg->flags & FUSE_DO_READDIRPLUS) {
fc->do_readdirplus = 1;
- if (arg->flags & FUSE_READDIRPLUS_AUTO)
- fc->readdirplus_auto = 1;
+ if (arg->flags & FUSE_READDIRPLUS_AUTO)
+ fc->readdirplus_auto = 1;
+ }
if (arg->flags & FUSE_ASYNC_DIO)
fc->async_dio = 1;
} else {
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 1dc9a13ce6bb..93b5809c20bb 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -1286,17 +1286,26 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize)
if (ret)
return ret;
+ ret = get_write_access(inode);
+ if (ret)
+ return ret;
+
inode_dio_wait(inode);
ret = gfs2_rs_alloc(GFS2_I(inode));
if (ret)
- return ret;
+ goto out;
oldsize = inode->i_size;
- if (newsize >= oldsize)
- return do_grow(inode, newsize);
+ if (newsize >= oldsize) {
+ ret = do_grow(inode, newsize);
+ goto out;
+ }
- return do_shrink(inode, oldsize, newsize);
+ ret = do_shrink(inode, oldsize, newsize);
+out:
+ put_write_access(inode);
+ return ret;
}
int gfs2_truncatei_resume(struct gfs2_inode *ip)
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index c3e82bd23179..b631c9043460 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -354,22 +354,31 @@ static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip)
return ERR_PTR(-EIO);
}
- hc = kmalloc(hsize, GFP_NOFS);
- ret = -ENOMEM;
+ hc = kmalloc(hsize, GFP_NOFS | __GFP_NOWARN);
+ if (hc == NULL)
+ hc = __vmalloc(hsize, GFP_NOFS, PAGE_KERNEL);
+
if (hc == NULL)
return ERR_PTR(-ENOMEM);
ret = gfs2_dir_read_data(ip, hc, hsize);
if (ret < 0) {
- kfree(hc);
+ if (is_vmalloc_addr(hc))
+ vfree(hc);
+ else
+ kfree(hc);
return ERR_PTR(ret);
}
spin_lock(&inode->i_lock);
- if (ip->i_hash_cache)
- kfree(hc);
- else
+ if (ip->i_hash_cache) {
+ if (is_vmalloc_addr(hc))
+ vfree(hc);
+ else
+ kfree(hc);
+ } else {
ip->i_hash_cache = hc;
+ }
spin_unlock(&inode->i_lock);
return ip->i_hash_cache;
@@ -385,7 +394,10 @@ void gfs2_dir_hash_inval(struct gfs2_inode *ip)
{
__be64 *hc = ip->i_hash_cache;
ip->i_hash_cache = NULL;
- kfree(hc);
+ if (is_vmalloc_addr(hc))
+ vfree(hc);
+ else
+ kfree(hc);
}
static inline int gfs2_dirent_sentinel(const struct gfs2_dirent *dent)
@@ -1113,7 +1125,10 @@ static int dir_double_exhash(struct gfs2_inode *dip)
if (IS_ERR(hc))
return PTR_ERR(hc);
- h = hc2 = kmalloc(hsize_bytes * 2, GFP_NOFS);
+ h = hc2 = kmalloc(hsize_bytes * 2, GFP_NOFS | __GFP_NOWARN);
+ if (hc2 == NULL)
+ hc2 = __vmalloc(hsize_bytes * 2, GFP_NOFS, PAGE_KERNEL);
+
if (!hc2)
return -ENOMEM;
@@ -1145,7 +1160,10 @@ fail:
gfs2_dinode_out(dip, dibh->b_data);
brelse(dibh);
out_kfree:
- kfree(hc2);
+ if (is_vmalloc_addr(hc2))
+ vfree(hc2);
+ else
+ kfree(hc2);
return error;
}
@@ -1846,6 +1864,8 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
ht = kzalloc(size, GFP_NOFS);
+ if (ht == NULL)
+ ht = vzalloc(size);
if (!ht)
return -ENOMEM;
@@ -1933,7 +1953,10 @@ out_rlist:
gfs2_rlist_free(&rlist);
gfs2_quota_unhold(dip);
out:
- kfree(ht);
+ if (is_vmalloc_addr(ht))
+ vfree(ht);
+ else
+ kfree(ht);
return error;
}
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index acd16764b133..ad0dc38d87ab 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -402,16 +402,20 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
/* Update file times before taking page lock */
file_update_time(vma->vm_file);
+ ret = get_write_access(inode);
+ if (ret)
+ goto out;
+
ret = gfs2_rs_alloc(ip);
if (ret)
- return ret;
+ goto out_write_access;
gfs2_size_hint(vma->vm_file, pos, PAGE_CACHE_SIZE);
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
ret = gfs2_glock_nq(&gh);
if (ret)
- goto out;
+ goto out_uninit;
set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
set_bit(GIF_SW_PAGED, &ip->i_flags);
@@ -480,12 +484,15 @@ out_quota_unlock:
gfs2_quota_unlock(ip);
out_unlock:
gfs2_glock_dq(&gh);
-out:
+out_uninit:
gfs2_holder_uninit(&gh);
if (ret == 0) {
set_page_dirty(page);
wait_for_stable_page(page);
}
+out_write_access:
+ put_write_access(inode);
+out:
sb_end_pagefault(inode->i_sb);
return block_page_mkwrite_return(ret);
}
@@ -594,10 +601,10 @@ static int gfs2_release(struct inode *inode, struct file *file)
kfree(file->private_data);
file->private_data = NULL;
- if ((file->f_mode & FMODE_WRITE) &&
- (atomic_read(&inode->i_writecount) == 1))
- gfs2_rs_delete(ip);
+ if (!(file->f_mode & FMODE_WRITE))
+ return 0;
+ gfs2_rs_delete(ip);
return 0;
}
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 8833a4f264e3..62b484e4a9e4 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -189,6 +189,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
return inode;
fail_refresh:
+ ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
ip->i_iopen_gh.gh_gl->gl_object = NULL;
gfs2_glock_dq_uninit(&ip->i_iopen_gh);
fail_iopen:
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 68b4c8f1fce8..6c33d7b6e0c4 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -419,7 +419,9 @@ static void gfs2_before_commit(struct gfs2_sbd *sdp, unsigned int limit,
if (total > limit)
num = limit;
gfs2_log_unlock(sdp);
- page = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_METADATA, num + 1, num);
+ page = gfs2_get_log_desc(sdp,
+ is_databuf ? GFS2_LOG_DESC_JDATA :
+ GFS2_LOG_DESC_METADATA, num + 1, num);
ld = page_address(page);
gfs2_log_lock(sdp);
ptr = (__be64 *)(ld + 1);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 5232525934ae..9809156e3d04 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -638,8 +638,10 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
*/
void gfs2_rs_delete(struct gfs2_inode *ip)
{
+ struct inode *inode = &ip->i_inode;
+
down_write(&ip->i_rw_mutex);
- if (ip->i_res) {
+ if (ip->i_res && atomic_read(&inode->i_writecount) <= 1) {
gfs2_rs_deltree(ip->i_res);
BUG_ON(ip->i_res->rs_free);
kmem_cache_free(gfs2_rsrv_cachep, ip->i_res);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 917c8e1eb4ae..e5639dec66c4 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1444,6 +1444,7 @@ static void gfs2_evict_inode(struct inode *inode)
/* Must not read inode block until block type has been verified */
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &gh);
if (unlikely(error)) {
+ ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
gfs2_glock_dq_uninit(&ip->i_iopen_gh);
goto out;
}
@@ -1514,8 +1515,10 @@ out_unlock:
if (gfs2_rs_active(ip->i_res))
gfs2_rs_deltree(ip->i_res);
- if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags))
+ if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
+ ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
gfs2_glock_dq(&ip->i_iopen_gh);
+ }
gfs2_holder_uninit(&ip->i_iopen_gh);
gfs2_glock_dq_uninit(&gh);
if (error && error != GLR_TRYFAILED && error != -EROFS)
@@ -1534,6 +1537,7 @@ out:
ip->i_gl = NULL;
if (ip->i_iopen_gh.gh_gl) {
ip->i_iopen_gh.gh_gl->gl_object = NULL;
+ ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
gfs2_glock_dq_uninit(&ip->i_iopen_gh);
}
}
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 3027f4dbbab5..e4ba5fe4c3b5 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -109,10 +109,14 @@ static void hpfs_write_failed(struct address_space *mapping, loff_t to)
{
struct inode *inode = mapping->host;
+ hpfs_lock(inode->i_sb);
+
if (to > inode->i_size) {
truncate_pagecache(inode, to, inode->i_size);
hpfs_truncate(inode);
}
+
+ hpfs_unlock(inode->i_sb);
}
static int hpfs_write_begin(struct file *file, struct address_space *mapping,
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index c57499dca89c..360d27c48887 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -2009,7 +2009,13 @@ static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
bio->bi_end_io = lbmIODone;
bio->bi_private = bp;
- submit_bio(READ_SYNC, bio);
+ /*check if journaling to disk has been disabled*/
+ if (log->no_integrity) {
+ bio->bi_size = 0;
+ lbmIODone(bio, 0);
+ } else {
+ submit_bio(READ_SYNC, bio);
+ }
wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD));
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 2003e830ed1c..788e0a9c1fb0 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -611,11 +611,28 @@ static int jfs_freeze(struct super_block *sb)
{
struct jfs_sb_info *sbi = JFS_SBI(sb);
struct jfs_log *log = sbi->log;
+ int rc = 0;
if (!(sb->s_flags & MS_RDONLY)) {
txQuiesce(sb);
- lmLogShutdown(log);
- updateSuper(sb, FM_CLEAN);
+ rc = lmLogShutdown(log);
+ if (rc) {
+ jfs_error(sb, "jfs_freeze: lmLogShutdown failed");
+
+ /* let operations fail rather than hang */
+ txResume(sb);
+
+ return rc;
+ }
+ rc = updateSuper(sb, FM_CLEAN);
+ if (rc) {
+ jfs_err("jfs_freeze: updateSuper failed\n");
+ /*
+ * Don't fail here. Everything succeeded except
+ * marking the superblock clean, so there's really
+ * no harm in leaving it frozen for now.
+ */
+ }
}
return 0;
}
@@ -627,13 +644,18 @@ static int jfs_unfreeze(struct super_block *sb)
int rc = 0;
if (!(sb->s_flags & MS_RDONLY)) {
- updateSuper(sb, FM_MOUNT);
- if ((rc = lmLogInit(log)))
- jfs_err("jfs_unlock failed with return code %d", rc);
- else
- txResume(sb);
+ rc = updateSuper(sb, FM_MOUNT);
+ if (rc) {
+ jfs_error(sb, "jfs_unfreeze: updateSuper failed");
+ goto out;
+ }
+ rc = lmLogInit(log);
+ if (rc)
+ jfs_error(sb, "jfs_unfreeze: lmLogInit failed");
+out:
+ txResume(sb);
}
- return 0;
+ return rc;
}
static struct dentry *jfs_do_mount(struct file_system_type *fs_type,
diff --git a/fs/proc/base.c b/fs/proc/base.c
index dd51e50001fe..c3834dad09b3 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2118,6 +2118,7 @@ static int show_timer(struct seq_file *m, void *v)
nstr[notify & ~SIGEV_THREAD_ID],
(notify & SIGEV_THREAD_ID) ? "tid" : "pid",
pid_nr_ns(timer->it_pid, tp->ns));
+ seq_printf(m, "ClockID: %d\n", timer->it_clock);
return 0;
}
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 1d32f1d52763..306d883d89bc 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -21,6 +21,8 @@
#include "xfs_bmap_btree.h"
#include "xfs_inode.h"
#include "xfs_vnodeops.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
#include "xfs_trace.h"
#include <linux/slab.h>
#include <linux/xattr.h>
@@ -34,7 +36,9 @@
*/
STATIC struct posix_acl *
-xfs_acl_from_disk(struct xfs_acl *aclp)
+xfs_acl_from_disk(
+ struct xfs_acl *aclp,
+ int max_entries)
{
struct posix_acl_entry *acl_e;
struct posix_acl *acl;
@@ -42,7 +46,7 @@ xfs_acl_from_disk(struct xfs_acl *aclp)
unsigned int count, i;
count = be32_to_cpu(aclp->acl_cnt);
- if (count > XFS_ACL_MAX_ENTRIES)
+ if (count > max_entries)
return ERR_PTR(-EFSCORRUPTED);
acl = posix_acl_alloc(count, GFP_KERNEL);
@@ -108,9 +112,9 @@ xfs_get_acl(struct inode *inode, int type)
struct xfs_inode *ip = XFS_I(inode);
struct posix_acl *acl;
struct xfs_acl *xfs_acl;
- int len = sizeof(struct xfs_acl);
unsigned char *ea_name;
int error;
+ int len;
acl = get_cached_acl(inode, type);
if (acl != ACL_NOT_CACHED)
@@ -133,8 +137,8 @@ xfs_get_acl(struct inode *inode, int type)
* If we have a cached ACLs value just return it, not need to
* go out to the disk.
*/
-
- xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
+ len = XFS_ACL_MAX_SIZE(ip->i_mount);
+ xfs_acl = kzalloc(len, GFP_KERNEL);
if (!xfs_acl)
return ERR_PTR(-ENOMEM);
@@ -153,7 +157,7 @@ xfs_get_acl(struct inode *inode, int type)
goto out;
}
- acl = xfs_acl_from_disk(xfs_acl);
+ acl = xfs_acl_from_disk(xfs_acl, XFS_ACL_MAX_ENTRIES(ip->i_mount));
if (IS_ERR(acl))
goto out;
@@ -189,16 +193,17 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
if (acl) {
struct xfs_acl *xfs_acl;
- int len;
+ int len = XFS_ACL_MAX_SIZE(ip->i_mount);
- xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
+ xfs_acl = kzalloc(len, GFP_KERNEL);
if (!xfs_acl)
return -ENOMEM;
xfs_acl_to_disk(xfs_acl, acl);
- len = sizeof(struct xfs_acl) -
- (sizeof(struct xfs_acl_entry) *
- (XFS_ACL_MAX_ENTRIES - acl->a_count));
+
+ /* subtract away the unused acl entries */
+ len -= sizeof(struct xfs_acl_entry) *
+ (XFS_ACL_MAX_ENTRIES(ip->i_mount) - acl->a_count);
error = -xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl,
len, ATTR_ROOT);
@@ -243,7 +248,7 @@ xfs_set_mode(struct inode *inode, umode_t mode)
static int
xfs_acl_exists(struct inode *inode, unsigned char *name)
{
- int len = sizeof(struct xfs_acl);
+ int len = XFS_ACL_MAX_SIZE(XFS_M(inode->i_sb));
return (xfs_attr_get(XFS_I(inode), name, NULL, &len,
ATTR_ROOT|ATTR_KERNOVAL) == 0);
@@ -379,7 +384,7 @@ xfs_xattr_acl_set(struct dentry *dentry, const char *name,
goto out_release;
error = -EINVAL;
- if (acl->a_count > XFS_ACL_MAX_ENTRIES)
+ if (acl->a_count > XFS_ACL_MAX_ENTRIES(XFS_M(inode->i_sb)))
goto out_release;
if (type == ACL_TYPE_ACCESS) {
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 39632d941354..4016a567b83c 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -22,19 +22,36 @@ struct inode;
struct posix_acl;
struct xfs_inode;
-#define XFS_ACL_MAX_ENTRIES 25
#define XFS_ACL_NOT_PRESENT (-1)
/* On-disk XFS access control list structure */
+struct xfs_acl_entry {
+ __be32 ae_tag;
+ __be32 ae_id;
+ __be16 ae_perm;
+ __be16 ae_pad; /* fill the implicit hole in the structure */
+};
+
struct xfs_acl {
- __be32 acl_cnt;
- struct xfs_acl_entry {
- __be32 ae_tag;
- __be32 ae_id;
- __be16 ae_perm;
- } acl_entry[XFS_ACL_MAX_ENTRIES];
+ __be32 acl_cnt;
+ struct xfs_acl_entry acl_entry[0];
};
+/*
+ * The number of ACL entries allowed is defined by the on-disk format.
+ * For v4 superblocks, that is limited to 25 entries. For v5 superblocks, it is
+ * limited only by the maximum size of the xattr that stores the information.
+ */
+#define XFS_ACL_MAX_ENTRIES(mp) \
+ (xfs_sb_version_hascrc(&mp->m_sb) \
+ ? (XATTR_SIZE_MAX - sizeof(struct xfs_acl)) / \
+ sizeof(struct xfs_acl_entry) \
+ : 25)
+
+#define XFS_ACL_MAX_SIZE(mp) \
+ (sizeof(struct xfs_acl) + \
+ sizeof(struct xfs_acl_entry) * XFS_ACL_MAX_ENTRIES((mp)))
+
/* On-disk XFS extended attribute names */
#define SGI_ACL_FILE (unsigned char *)"SGI_ACL_FILE"
#define SGI_ACL_DEFAULT (unsigned char *)"SGI_ACL_DEFAULT"
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index d788302e506a..31d3cd129269 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -3258,7 +3258,7 @@ xfs_attr3_leaf_inactive(
name_rmt = xfs_attr3_leaf_name_remote(leaf, i);
if (name_rmt->valueblk) {
lp->valueblk = be32_to_cpu(name_rmt->valueblk);
- lp->valuelen = XFS_B_TO_FSB(dp->i_mount,
+ lp->valuelen = xfs_attr3_rmt_blocks(dp->i_mount,
be32_to_cpu(name_rmt->valuelen));
lp++;
}
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index a41f8bf1da37..044e97a33c8d 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -249,8 +249,11 @@ xfs_qm_init_dquot_blk(
d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
d->dd_diskdq.d_id = cpu_to_be32(curid);
d->dd_diskdq.d_flags = type;
- if (xfs_sb_version_hascrc(&mp->m_sb))
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid);
+ xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk),
+ XFS_DQUOT_CRC_OFF);
+ }
}
xfs_trans_dquot_buf(tp, bp,
@@ -286,23 +289,6 @@ xfs_dquot_set_prealloc_limits(struct xfs_dquot *dqp)
dqp->q_low_space[XFS_QLOWSP_5_PCNT] = space * 5;
}
-STATIC void
-xfs_dquot_buf_calc_crc(
- struct xfs_mount *mp,
- struct xfs_buf *bp)
-{
- struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr;
- int i;
-
- if (!xfs_sb_version_hascrc(&mp->m_sb))
- return;
-
- for (i = 0; i < mp->m_quotainfo->qi_dqperchunk; i++, d++) {
- xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk),
- offsetof(struct xfs_dqblk, dd_crc));
- }
-}
-
STATIC bool
xfs_dquot_buf_verify_crc(
struct xfs_mount *mp,
@@ -328,12 +314,11 @@ xfs_dquot_buf_verify_crc(
for (i = 0; i < ndquots; i++, d++) {
if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk),
- offsetof(struct xfs_dqblk, dd_crc)))
+ XFS_DQUOT_CRC_OFF))
return false;
if (!uuid_equal(&d->dd_uuid, &mp->m_sb.sb_uuid))
return false;
}
-
return true;
}
@@ -393,6 +378,11 @@ xfs_dquot_buf_read_verify(
}
}
+/*
+ * we don't calculate the CRC here as that is done when the dquot is flushed to
+ * the buffer after the update is done. This ensures that the dquot in the
+ * buffer always has an up-to-date CRC value.
+ */
void
xfs_dquot_buf_write_verify(
struct xfs_buf *bp)
@@ -404,7 +394,6 @@ xfs_dquot_buf_write_verify(
xfs_buf_ioerror(bp, EFSCORRUPTED);
return;
}
- xfs_dquot_buf_calc_crc(mp, bp);
}
const struct xfs_buf_ops xfs_dquot_buf_ops = {
@@ -1151,11 +1140,17 @@ xfs_qm_dqflush(
* copy the lsn into the on-disk dquot now while we have the in memory
* dquot here. This can't be done later in the write verifier as we
* can't get access to the log item at that point in time.
+ *
+ * We also calculate the CRC here so that the on-disk dquot in the
+ * buffer always has a valid CRC. This ensures there is no possibility
+ * of a dquot without an up-to-date CRC getting to disk.
*/
if (xfs_sb_version_hascrc(&mp->m_sb)) {
struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddqp;
dqb->dd_lsn = cpu_to_be64(dqp->q_logitem.qli_item.li_lsn);
+ xfs_update_cksum((char *)dqb, sizeof(struct xfs_dqblk),
+ XFS_DQUOT_CRC_OFF);
}
/*
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index efbe1accb6ca..7f7be5f98f52 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1638,6 +1638,10 @@ xfs_iunlink(
dip->di_next_unlinked = agi->agi_unlinked[bucket_index];
offset = ip->i_imap.im_boffset +
offsetof(xfs_dinode_t, di_next_unlinked);
+
+ /* need to recalc the inode CRC if appropriate */
+ xfs_dinode_calc_crc(mp, dip);
+
xfs_trans_inode_buf(tp, ibp);
xfs_trans_log_buf(tp, ibp, offset,
(offset + sizeof(xfs_agino_t) - 1));
@@ -1723,6 +1727,10 @@ xfs_iunlink_remove(
dip->di_next_unlinked = cpu_to_be32(NULLAGINO);
offset = ip->i_imap.im_boffset +
offsetof(xfs_dinode_t, di_next_unlinked);
+
+ /* need to recalc the inode CRC if appropriate */
+ xfs_dinode_calc_crc(mp, dip);
+
xfs_trans_inode_buf(tp, ibp);
xfs_trans_log_buf(tp, ibp, offset,
(offset + sizeof(xfs_agino_t) - 1));
@@ -1796,6 +1804,10 @@ xfs_iunlink_remove(
dip->di_next_unlinked = cpu_to_be32(NULLAGINO);
offset = ip->i_imap.im_boffset +
offsetof(xfs_dinode_t, di_next_unlinked);
+
+ /* need to recalc the inode CRC if appropriate */
+ xfs_dinode_calc_crc(mp, dip);
+
xfs_trans_inode_buf(tp, ibp);
xfs_trans_log_buf(tp, ibp, offset,
(offset + sizeof(xfs_agino_t) - 1));
@@ -1809,6 +1821,10 @@ xfs_iunlink_remove(
last_dip->di_next_unlinked = cpu_to_be32(next_agino);
ASSERT(next_agino != 0);
offset = last_offset + offsetof(xfs_dinode_t, di_next_unlinked);
+
+ /* need to recalc the inode CRC if appropriate */
+ xfs_dinode_calc_crc(mp, last_dip);
+
xfs_trans_inode_buf(tp, last_ibp);
xfs_trans_log_buf(tp, last_ibp, offset,
(offset + sizeof(xfs_agino_t) - 1));
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index d9e4d3c3991a..45a85ff84da1 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1599,10 +1599,43 @@ xlog_recover_add_to_trans(
}
/*
- * Sort the log items in the transaction. Cancelled buffers need
- * to be put first so they are processed before any items that might
- * modify the buffers. If they are cancelled, then the modifications
- * don't need to be replayed.
+ * Sort the log items in the transaction.
+ *
+ * The ordering constraints are defined by the inode allocation and unlink
+ * behaviour. The rules are:
+ *
+ * 1. Every item is only logged once in a given transaction. Hence it
+ * represents the last logged state of the item. Hence ordering is
+ * dependent on the order in which operations need to be performed so
+ * required initial conditions are always met.
+ *
+ * 2. Cancelled buffers are recorded in pass 1 in a separate table and
+ * there's nothing to replay from them so we can simply cull them
+ * from the transaction. However, we can't do that until after we've
+ * replayed all the other items because they may be dependent on the
+ * cancelled buffer and replaying the cancelled buffer can remove it
+ * form the cancelled buffer table. Hence they have tobe done last.
+ *
+ * 3. Inode allocation buffers must be replayed before inode items that
+ * read the buffer and replay changes into it.
+ *
+ * 4. Inode unlink buffers must be replayed after inode items are replayed.
+ * This ensures that inodes are completely flushed to the inode buffer
+ * in a "free" state before we remove the unlinked inode list pointer.
+ *
+ * Hence the ordering needs to be inode allocation buffers first, inode items
+ * second, inode unlink buffers third and cancelled buffers last.
+ *
+ * But there's a problem with that - we can't tell an inode allocation buffer
+ * apart from a regular buffer, so we can't separate them. We can, however,
+ * tell an inode unlink buffer from the others, and so we can separate them out
+ * from all the other buffers and move them to last.
+ *
+ * Hence, 4 lists, in order from head to tail:
+ * - buffer_list for all buffers except cancelled/inode unlink buffers
+ * - item_list for all non-buffer items
+ * - inode_buffer_list for inode unlink buffers
+ * - cancel_list for the cancelled buffers
*/
STATIC int
xlog_recover_reorder_trans(
@@ -1612,6 +1645,10 @@ xlog_recover_reorder_trans(
{
xlog_recover_item_t *item, *n;
LIST_HEAD(sort_list);
+ LIST_HEAD(cancel_list);
+ LIST_HEAD(buffer_list);
+ LIST_HEAD(inode_buffer_list);
+ LIST_HEAD(inode_list);
list_splice_init(&trans->r_itemq, &sort_list);
list_for_each_entry_safe(item, n, &sort_list, ri_list) {
@@ -1619,12 +1656,18 @@ xlog_recover_reorder_trans(
switch (ITEM_TYPE(item)) {
case XFS_LI_BUF:
- if (!(buf_f->blf_flags & XFS_BLF_CANCEL)) {
+ if (buf_f->blf_flags & XFS_BLF_CANCEL) {
trace_xfs_log_recover_item_reorder_head(log,
trans, item, pass);
- list_move(&item->ri_list, &trans->r_itemq);
+ list_move(&item->ri_list, &cancel_list);
+ break;
+ }
+ if (buf_f->blf_flags & XFS_BLF_INODE_BUF) {
+ list_move(&item->ri_list, &inode_buffer_list);
break;
}
+ list_move_tail(&item->ri_list, &buffer_list);
+ break;
case XFS_LI_INODE:
case XFS_LI_DQUOT:
case XFS_LI_QUOTAOFF:
@@ -1632,7 +1675,7 @@ xlog_recover_reorder_trans(
case XFS_LI_EFI:
trace_xfs_log_recover_item_reorder_tail(log,
trans, item, pass);
- list_move_tail(&item->ri_list, &trans->r_itemq);
+ list_move_tail(&item->ri_list, &inode_list);
break;
default:
xfs_warn(log->l_mp,
@@ -1643,6 +1686,14 @@ xlog_recover_reorder_trans(
}
}
ASSERT(list_empty(&sort_list));
+ if (!list_empty(&buffer_list))
+ list_splice(&buffer_list, &trans->r_itemq);
+ if (!list_empty(&inode_list))
+ list_splice_tail(&inode_list, &trans->r_itemq);
+ if (!list_empty(&inode_buffer_list))
+ list_splice_tail(&inode_buffer_list, &trans->r_itemq);
+ if (!list_empty(&cancel_list))
+ list_splice_tail(&cancel_list, &trans->r_itemq);
return 0;
}
@@ -1861,6 +1912,15 @@ xlog_recover_do_inode_buffer(
buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp,
next_unlinked_offset);
*buffer_nextp = *logged_nextp;
+
+ /*
+ * If necessary, recalculate the CRC in the on-disk inode. We
+ * have to leave the inode in a consistent state for whoever
+ * reads it next....
+ */
+ xfs_dinode_calc_crc(mp, (struct xfs_dinode *)
+ xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize));
+
}
return 0;
@@ -2266,6 +2326,12 @@ xfs_qm_dqcheck(
d->dd_diskdq.d_flags = type;
d->dd_diskdq.d_id = cpu_to_be32(id);
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid);
+ xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk),
+ XFS_DQUOT_CRC_OFF);
+ }
+
return errs;
}
@@ -2793,6 +2859,10 @@ xlog_recover_dquot_pass2(
}
memcpy(ddq, recddq, item->ri_buf[1].i_len);
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ xfs_update_cksum((char *)ddq, sizeof(struct xfs_dqblk),
+ XFS_DQUOT_CRC_OFF);
+ }
ASSERT(dq_f->qlf_size == 2);
ASSERT(bp->b_target->bt_mount == mp);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index f41702b43003..b75c9bb6e71e 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -41,6 +41,7 @@
#include "xfs_qm.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
+#include "xfs_cksum.h"
/*
* The global quota manager. There is only one of these for the entire
@@ -839,7 +840,7 @@ xfs_qm_reset_dqcounts(
xfs_dqid_t id,
uint type)
{
- xfs_disk_dquot_t *ddq;
+ struct xfs_dqblk *dqb;
int j;
trace_xfs_reset_dqcounts(bp, _RET_IP_);
@@ -853,8 +854,12 @@ xfs_qm_reset_dqcounts(
do_div(j, sizeof(xfs_dqblk_t));
ASSERT(mp->m_quotainfo->qi_dqperchunk == j);
#endif
- ddq = bp->b_addr;
+ dqb = bp->b_addr;
for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) {
+ struct xfs_disk_dquot *ddq;
+
+ ddq = (struct xfs_disk_dquot *)&dqb[j];
+
/*
* Do a sanity check, and if needed, repair the dqblk. Don't
* output any warnings because it's perfectly possible to
@@ -871,7 +876,12 @@ xfs_qm_reset_dqcounts(
ddq->d_bwarns = 0;
ddq->d_iwarns = 0;
ddq->d_rtbwarns = 0;
- ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1);
+
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ xfs_update_cksum((char *)&dqb[j],
+ sizeof(struct xfs_dqblk),
+ XFS_DQUOT_CRC_OFF);
+ }
}
}
@@ -907,19 +917,29 @@ xfs_qm_dqiter_bufs(
XFS_FSB_TO_DADDR(mp, bno),
mp->m_quotainfo->qi_dqchunklen, 0, &bp,
&xfs_dquot_buf_ops);
- if (error)
- break;
/*
- * XXX(hch): need to figure out if it makes sense to validate
- * the CRC here.
+ * CRC and validation errors will return a EFSCORRUPTED here. If
+ * this occurs, re-read without CRC validation so that we can
+ * repair the damage via xfs_qm_reset_dqcounts(). This process
+ * will leave a trace in the log indicating corruption has
+ * been detected.
*/
+ if (error == EFSCORRUPTED) {
+ error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
+ XFS_FSB_TO_DADDR(mp, bno),
+ mp->m_quotainfo->qi_dqchunklen, 0, &bp,
+ NULL);
+ }
+
+ if (error)
+ break;
+
xfs_qm_reset_dqcounts(mp, bp, firstid, type);
xfs_buf_delwri_queue(bp, buffer_list);
xfs_buf_relse(bp);
- /*
- * goto the next block.
- */
+
+ /* goto the next block. */
bno++;
firstid += mp->m_quotainfo->qi_dqperchunk;
}
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index c61e31c7d997..c38068f26c55 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -87,6 +87,8 @@ typedef struct xfs_dqblk {
uuid_t dd_uuid; /* location information */
} xfs_dqblk_t;
+#define XFS_DQUOT_CRC_OFF offsetof(struct xfs_dqblk, dd_crc)
+
/*
* flags for q_flags field in the dquot.
*/
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index ea341cea68cb..3033ba5e9762 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1373,6 +1373,17 @@ xfs_finish_flags(
}
/*
+ * V5 filesystems always use attr2 format for attributes.
+ */
+ if (xfs_sb_version_hascrc(&mp->m_sb) &&
+ (mp->m_flags & XFS_MOUNT_NOATTR2)) {
+ xfs_warn(mp,
+"Cannot mount a V5 filesystem as %s. %s is always enabled for V5 filesystems.",
+ MNTOPT_NOATTR2, MNTOPT_ATTR2);
+ return XFS_ERROR(EINVAL);
+ }
+
+ /*
* mkfs'ed attr2 will turn on attr2 mount unless explicitly
* told by noattr2 to turn it off
*/
OpenPOWER on IntegriCloud