diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/cifs/connect.c | 4 | ||||
-rw-r--r-- | fs/ecryptfs/file.c | 6 | ||||
-rw-r--r-- | fs/fuse/dir.c | 12 | ||||
-rw-r--r-- | fs/fuse/file.c | 58 | ||||
-rw-r--r-- | fs/fuse/inode.c | 7 | ||||
-rw-r--r-- | fs/gfs2/bmap.c | 17 | ||||
-rw-r--r-- | fs/gfs2/dir.c | 43 | ||||
-rw-r--r-- | fs/gfs2/file.c | 19 | ||||
-rw-r--r-- | fs/gfs2/inode.c | 1 | ||||
-rw-r--r-- | fs/gfs2/lops.c | 4 | ||||
-rw-r--r-- | fs/gfs2/rgrp.c | 4 | ||||
-rw-r--r-- | fs/gfs2/super.c | 6 | ||||
-rw-r--r-- | fs/hpfs/file.c | 4 | ||||
-rw-r--r-- | fs/jfs/jfs_logmgr.c | 8 | ||||
-rw-r--r-- | fs/jfs/super.c | 38 | ||||
-rw-r--r-- | fs/proc/base.c | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_acl.c | 31 | ||||
-rw-r--r-- | fs/xfs/xfs_acl.h | 31 | ||||
-rw-r--r-- | fs/xfs/xfs_attr_leaf.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_dquot.c | 37 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 16 | ||||
-rw-r--r-- | fs/xfs/xfs_log_recover.c | 84 | ||||
-rw-r--r-- | fs/xfs/xfs_qm.c | 40 | ||||
-rw-r--r-- | fs/xfs/xfs_quota.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_super.c | 11 |
25 files changed, 378 insertions, 108 deletions
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 5b97e56ddbca..e3bc39bb9d12 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3279,8 +3279,8 @@ build_unc_path_to_root(const struct smb_vol *vol, pos = full_path + unc_len; if (pplen) { - *pos++ = CIFS_DIR_SEP(cifs_sb); - strncpy(pos, vol->prepath, pplen); + *pos = CIFS_DIR_SEP(cifs_sb); + strncpy(pos + 1, vol->prepath, pplen); pos += pplen; } diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 201f0a0d6b0a..a7abbea2c096 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -295,6 +295,12 @@ static int ecryptfs_release(struct inode *inode, struct file *file) static int ecryptfs_fsync(struct file *file, loff_t start, loff_t end, int datasync) { + int rc; + + rc = filemap_write_and_wait(file->f_mapping); + if (rc) + return rc; + return vfs_fsync(ecryptfs_file_to_lower(file), datasync); } diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 254df56b847b..f3f783dc4f75 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -180,6 +180,8 @@ u64 fuse_get_attr_version(struct fuse_conn *fc) static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) { struct inode *inode; + struct dentry *parent; + struct fuse_conn *fc; inode = ACCESS_ONCE(entry->d_inode); if (inode && is_bad_inode(inode)) @@ -187,10 +189,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) else if (fuse_dentry_time(entry) < get_jiffies_64()) { int err; struct fuse_entry_out outarg; - struct fuse_conn *fc; struct fuse_req *req; struct fuse_forget_link *forget; - struct dentry *parent; u64 attr_version; /* For negative dentries, always do a fresh lookup */ @@ -241,8 +241,14 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) entry_attr_timeout(&outarg), attr_version); fuse_change_entry_timeout(entry, &outarg); + } else if (inode) { + fc = get_fuse_conn(inode); + if (fc->readdirplus_auto) { + parent = dget_parent(entry); + fuse_advise_use_readdirplus(parent->d_inode); + dput(parent); + } } - fuse_advise_use_readdirplus(inode); return 1; } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index d1c9b85b3f58..e570081f9f76 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -16,6 +16,7 @@ #include <linux/compat.h> #include <linux/swap.h> #include <linux/aio.h> +#include <linux/falloc.h> static const struct file_operations fuse_direct_io_file_operations; @@ -1278,7 +1279,10 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, iov_iter_init(&ii, iov, nr_segs, count, 0); - req = fuse_get_req(fc, fuse_iter_npages(&ii)); + if (io->async) + req = fuse_get_req_for_background(fc, fuse_iter_npages(&ii)); + else + req = fuse_get_req(fc, fuse_iter_npages(&ii)); if (IS_ERR(req)) return PTR_ERR(req); @@ -1314,7 +1318,11 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, break; if (count) { fuse_put_request(fc, req); - req = fuse_get_req(fc, fuse_iter_npages(&ii)); + if (io->async) + req = fuse_get_req_for_background(fc, + fuse_iter_npages(&ii)); + else + req = fuse_get_req(fc, fuse_iter_npages(&ii)); if (IS_ERR(req)) break; } @@ -2365,6 +2373,11 @@ static void fuse_do_truncate(struct file *file) fuse_do_setattr(inode, &attr, file); } +static inline loff_t fuse_round_up(loff_t off) +{ + return round_up(off, FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT); +} + static ssize_t fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) @@ -2372,6 +2385,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, ssize_t ret = 0; struct file *file = iocb->ki_filp; struct fuse_file *ff = file->private_data; + bool async_dio = ff->fc->async_dio; loff_t pos = 0; struct inode *inode; loff_t i_size; @@ -2383,10 +2397,10 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, i_size = i_size_read(inode); /* optimization for short read */ - if (rw != WRITE && offset + count > i_size) { + if (async_dio && rw != WRITE && offset + count > i_size) { if (offset >= i_size) return 0; - count = i_size - offset; + count = min_t(loff_t, count, fuse_round_up(i_size - offset)); } io = kmalloc(sizeof(struct fuse_io_priv), GFP_KERNEL); @@ -2404,7 +2418,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, * By default, we want to optimize all I/Os with async request * submission to the client filesystem if supported. */ - io->async = ff->fc->async_dio; + io->async = async_dio; io->iocb = iocb; /* @@ -2412,7 +2426,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, * to wait on real async I/O requests, so we must submit this request * synchronously. */ - if (!is_sync_kiocb(iocb) && (offset + count > i_size)) + if (!is_sync_kiocb(iocb) && (offset + count > i_size) && rw == WRITE) io->async = false; if (rw == WRITE) @@ -2424,7 +2438,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, fuse_aio_complete(io, ret < 0 ? ret : 0, -1); /* we have a non-extending, async request, so return */ - if (ret > 0 && !is_sync_kiocb(iocb)) + if (!is_sync_kiocb(iocb)) return -EIOCBQUEUED; ret = wait_on_sync_kiocb(iocb); @@ -2446,6 +2460,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, loff_t length) { struct fuse_file *ff = file->private_data; + struct inode *inode = file->f_inode; struct fuse_conn *fc = ff->fc; struct fuse_req *req; struct fuse_fallocate_in inarg = { @@ -2459,9 +2474,16 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, if (fc->no_fallocate) return -EOPNOTSUPP; + if (mode & FALLOC_FL_PUNCH_HOLE) { + mutex_lock(&inode->i_mutex); + fuse_set_nowrite(inode); + } + req = fuse_get_req_nopages(fc); - if (IS_ERR(req)) - return PTR_ERR(req); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto out; + } req->in.h.opcode = FUSE_FALLOCATE; req->in.h.nodeid = ff->nodeid; @@ -2476,6 +2498,24 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, } fuse_put_request(fc, req); + if (err) + goto out; + + /* we could have extended the file */ + if (!(mode & FALLOC_FL_KEEP_SIZE)) + fuse_write_update_size(inode, offset + length); + + if (mode & FALLOC_FL_PUNCH_HOLE) + truncate_pagecache_range(inode, offset, offset + length - 1); + + fuse_invalidate_attr(inode); + +out: + if (mode & FALLOC_FL_PUNCH_HOLE) { + fuse_release_nowrite(inode); + mutex_unlock(&inode->i_mutex); + } + return err; } diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 6201f81e4d3a..9a0cdde14a08 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -867,10 +867,11 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->dont_mask = 1; if (arg->flags & FUSE_AUTO_INVAL_DATA) fc->auto_inval_data = 1; - if (arg->flags & FUSE_DO_READDIRPLUS) + if (arg->flags & FUSE_DO_READDIRPLUS) { fc->do_readdirplus = 1; - if (arg->flags & FUSE_READDIRPLUS_AUTO) - fc->readdirplus_auto = 1; + if (arg->flags & FUSE_READDIRPLUS_AUTO) + fc->readdirplus_auto = 1; + } if (arg->flags & FUSE_ASYNC_DIO) fc->async_dio = 1; } else { diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 1dc9a13ce6bb..93b5809c20bb 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1286,17 +1286,26 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize) if (ret) return ret; + ret = get_write_access(inode); + if (ret) + return ret; + inode_dio_wait(inode); ret = gfs2_rs_alloc(GFS2_I(inode)); if (ret) - return ret; + goto out; oldsize = inode->i_size; - if (newsize >= oldsize) - return do_grow(inode, newsize); + if (newsize >= oldsize) { + ret = do_grow(inode, newsize); + goto out; + } - return do_shrink(inode, oldsize, newsize); + ret = do_shrink(inode, oldsize, newsize); +out: + put_write_access(inode); + return ret; } int gfs2_truncatei_resume(struct gfs2_inode *ip) diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index c3e82bd23179..b631c9043460 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -354,22 +354,31 @@ static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip) return ERR_PTR(-EIO); } - hc = kmalloc(hsize, GFP_NOFS); - ret = -ENOMEM; + hc = kmalloc(hsize, GFP_NOFS | __GFP_NOWARN); + if (hc == NULL) + hc = __vmalloc(hsize, GFP_NOFS, PAGE_KERNEL); + if (hc == NULL) return ERR_PTR(-ENOMEM); ret = gfs2_dir_read_data(ip, hc, hsize); if (ret < 0) { - kfree(hc); + if (is_vmalloc_addr(hc)) + vfree(hc); + else + kfree(hc); return ERR_PTR(ret); } spin_lock(&inode->i_lock); - if (ip->i_hash_cache) - kfree(hc); - else + if (ip->i_hash_cache) { + if (is_vmalloc_addr(hc)) + vfree(hc); + else + kfree(hc); + } else { ip->i_hash_cache = hc; + } spin_unlock(&inode->i_lock); return ip->i_hash_cache; @@ -385,7 +394,10 @@ void gfs2_dir_hash_inval(struct gfs2_inode *ip) { __be64 *hc = ip->i_hash_cache; ip->i_hash_cache = NULL; - kfree(hc); + if (is_vmalloc_addr(hc)) + vfree(hc); + else + kfree(hc); } static inline int gfs2_dirent_sentinel(const struct gfs2_dirent *dent) @@ -1113,7 +1125,10 @@ static int dir_double_exhash(struct gfs2_inode *dip) if (IS_ERR(hc)) return PTR_ERR(hc); - h = hc2 = kmalloc(hsize_bytes * 2, GFP_NOFS); + h = hc2 = kmalloc(hsize_bytes * 2, GFP_NOFS | __GFP_NOWARN); + if (hc2 == NULL) + hc2 = __vmalloc(hsize_bytes * 2, GFP_NOFS, PAGE_KERNEL); + if (!hc2) return -ENOMEM; @@ -1145,7 +1160,10 @@ fail: gfs2_dinode_out(dip, dibh->b_data); brelse(dibh); out_kfree: - kfree(hc2); + if (is_vmalloc_addr(hc2)) + vfree(hc2); + else + kfree(hc2); return error; } @@ -1846,6 +1864,8 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, memset(&rlist, 0, sizeof(struct gfs2_rgrp_list)); ht = kzalloc(size, GFP_NOFS); + if (ht == NULL) + ht = vzalloc(size); if (!ht) return -ENOMEM; @@ -1933,7 +1953,10 @@ out_rlist: gfs2_rlist_free(&rlist); gfs2_quota_unhold(dip); out: - kfree(ht); + if (is_vmalloc_addr(ht)) + vfree(ht); + else + kfree(ht); return error; } diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index acd16764b133..ad0dc38d87ab 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -402,16 +402,20 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) /* Update file times before taking page lock */ file_update_time(vma->vm_file); + ret = get_write_access(inode); + if (ret) + goto out; + ret = gfs2_rs_alloc(ip); if (ret) - return ret; + goto out_write_access; gfs2_size_hint(vma->vm_file, pos, PAGE_CACHE_SIZE); gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); ret = gfs2_glock_nq(&gh); if (ret) - goto out; + goto out_uninit; set_bit(GLF_DIRTY, &ip->i_gl->gl_flags); set_bit(GIF_SW_PAGED, &ip->i_flags); @@ -480,12 +484,15 @@ out_quota_unlock: gfs2_quota_unlock(ip); out_unlock: gfs2_glock_dq(&gh); -out: +out_uninit: gfs2_holder_uninit(&gh); if (ret == 0) { set_page_dirty(page); wait_for_stable_page(page); } +out_write_access: + put_write_access(inode); +out: sb_end_pagefault(inode->i_sb); return block_page_mkwrite_return(ret); } @@ -594,10 +601,10 @@ static int gfs2_release(struct inode *inode, struct file *file) kfree(file->private_data); file->private_data = NULL; - if ((file->f_mode & FMODE_WRITE) && - (atomic_read(&inode->i_writecount) == 1)) - gfs2_rs_delete(ip); + if (!(file->f_mode & FMODE_WRITE)) + return 0; + gfs2_rs_delete(ip); return 0; } diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 8833a4f264e3..62b484e4a9e4 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -189,6 +189,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, return inode; fail_refresh: + ip->i_iopen_gh.gh_flags |= GL_NOCACHE; ip->i_iopen_gh.gh_gl->gl_object = NULL; gfs2_glock_dq_uninit(&ip->i_iopen_gh); fail_iopen: diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 68b4c8f1fce8..6c33d7b6e0c4 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -419,7 +419,9 @@ static void gfs2_before_commit(struct gfs2_sbd *sdp, unsigned int limit, if (total > limit) num = limit; gfs2_log_unlock(sdp); - page = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_METADATA, num + 1, num); + page = gfs2_get_log_desc(sdp, + is_databuf ? GFS2_LOG_DESC_JDATA : + GFS2_LOG_DESC_METADATA, num + 1, num); ld = page_address(page); gfs2_log_lock(sdp); ptr = (__be64 *)(ld + 1); diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 5232525934ae..9809156e3d04 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -638,8 +638,10 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs) */ void gfs2_rs_delete(struct gfs2_inode *ip) { + struct inode *inode = &ip->i_inode; + down_write(&ip->i_rw_mutex); - if (ip->i_res) { + if (ip->i_res && atomic_read(&inode->i_writecount) <= 1) { gfs2_rs_deltree(ip->i_res); BUG_ON(ip->i_res->rs_free); kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 917c8e1eb4ae..e5639dec66c4 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1444,6 +1444,7 @@ static void gfs2_evict_inode(struct inode *inode) /* Must not read inode block until block type has been verified */ error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &gh); if (unlikely(error)) { + ip->i_iopen_gh.gh_flags |= GL_NOCACHE; gfs2_glock_dq_uninit(&ip->i_iopen_gh); goto out; } @@ -1514,8 +1515,10 @@ out_unlock: if (gfs2_rs_active(ip->i_res)) gfs2_rs_deltree(ip->i_res); - if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) + if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) { + ip->i_iopen_gh.gh_flags |= GL_NOCACHE; gfs2_glock_dq(&ip->i_iopen_gh); + } gfs2_holder_uninit(&ip->i_iopen_gh); gfs2_glock_dq_uninit(&gh); if (error && error != GLR_TRYFAILED && error != -EROFS) @@ -1534,6 +1537,7 @@ out: ip->i_gl = NULL; if (ip->i_iopen_gh.gh_gl) { ip->i_iopen_gh.gh_gl->gl_object = NULL; + ip->i_iopen_gh.gh_flags |= GL_NOCACHE; gfs2_glock_dq_uninit(&ip->i_iopen_gh); } } diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index 3027f4dbbab5..e4ba5fe4c3b5 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c @@ -109,10 +109,14 @@ static void hpfs_write_failed(struct address_space *mapping, loff_t to) { struct inode *inode = mapping->host; + hpfs_lock(inode->i_sb); + if (to > inode->i_size) { truncate_pagecache(inode, to, inode->i_size); hpfs_truncate(inode); } + + hpfs_unlock(inode->i_sb); } static int hpfs_write_begin(struct file *file, struct address_space *mapping, diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index c57499dca89c..360d27c48887 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -2009,7 +2009,13 @@ static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp) bio->bi_end_io = lbmIODone; bio->bi_private = bp; - submit_bio(READ_SYNC, bio); + /*check if journaling to disk has been disabled*/ + if (log->no_integrity) { + bio->bi_size = 0; + lbmIODone(bio, 0); + } else { + submit_bio(READ_SYNC, bio); + } wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD)); diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 2003e830ed1c..788e0a9c1fb0 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -611,11 +611,28 @@ static int jfs_freeze(struct super_block *sb) { struct jfs_sb_info *sbi = JFS_SBI(sb); struct jfs_log *log = sbi->log; + int rc = 0; if (!(sb->s_flags & MS_RDONLY)) { txQuiesce(sb); - lmLogShutdown(log); - updateSuper(sb, FM_CLEAN); + rc = lmLogShutdown(log); + if (rc) { + jfs_error(sb, "jfs_freeze: lmLogShutdown failed"); + + /* let operations fail rather than hang */ + txResume(sb); + + return rc; + } + rc = updateSuper(sb, FM_CLEAN); + if (rc) { + jfs_err("jfs_freeze: updateSuper failed\n"); + /* + * Don't fail here. Everything succeeded except + * marking the superblock clean, so there's really + * no harm in leaving it frozen for now. + */ + } } return 0; } @@ -627,13 +644,18 @@ static int jfs_unfreeze(struct super_block *sb) int rc = 0; if (!(sb->s_flags & MS_RDONLY)) { - updateSuper(sb, FM_MOUNT); - if ((rc = lmLogInit(log))) - jfs_err("jfs_unlock failed with return code %d", rc); - else - txResume(sb); + rc = updateSuper(sb, FM_MOUNT); + if (rc) { + jfs_error(sb, "jfs_unfreeze: updateSuper failed"); + goto out; + } + rc = lmLogInit(log); + if (rc) + jfs_error(sb, "jfs_unfreeze: lmLogInit failed"); +out: + txResume(sb); } - return 0; + return rc; } static struct dentry *jfs_do_mount(struct file_system_type *fs_type, diff --git a/fs/proc/base.c b/fs/proc/base.c index dd51e50001fe..c3834dad09b3 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2118,6 +2118,7 @@ static int show_timer(struct seq_file *m, void *v) nstr[notify & ~SIGEV_THREAD_ID], (notify & SIGEV_THREAD_ID) ? "tid" : "pid", pid_nr_ns(timer->it_pid, tp->ns)); + seq_printf(m, "ClockID: %d\n", timer->it_clock); return 0; } diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 1d32f1d52763..306d883d89bc 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -21,6 +21,8 @@ #include "xfs_bmap_btree.h" #include "xfs_inode.h" #include "xfs_vnodeops.h" +#include "xfs_sb.h" +#include "xfs_mount.h" #include "xfs_trace.h" #include <linux/slab.h> #include <linux/xattr.h> @@ -34,7 +36,9 @@ */ STATIC struct posix_acl * -xfs_acl_from_disk(struct xfs_acl *aclp) +xfs_acl_from_disk( + struct xfs_acl *aclp, + int max_entries) { struct posix_acl_entry *acl_e; struct posix_acl *acl; @@ -42,7 +46,7 @@ xfs_acl_from_disk(struct xfs_acl *aclp) unsigned int count, i; count = be32_to_cpu(aclp->acl_cnt); - if (count > XFS_ACL_MAX_ENTRIES) + if (count > max_entries) return ERR_PTR(-EFSCORRUPTED); acl = posix_acl_alloc(count, GFP_KERNEL); @@ -108,9 +112,9 @@ xfs_get_acl(struct inode *inode, int type) struct xfs_inode *ip = XFS_I(inode); struct posix_acl *acl; struct xfs_acl *xfs_acl; - int len = sizeof(struct xfs_acl); unsigned char *ea_name; int error; + int len; acl = get_cached_acl(inode, type); if (acl != ACL_NOT_CACHED) @@ -133,8 +137,8 @@ xfs_get_acl(struct inode *inode, int type) * If we have a cached ACLs value just return it, not need to * go out to the disk. */ - - xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL); + len = XFS_ACL_MAX_SIZE(ip->i_mount); + xfs_acl = kzalloc(len, GFP_KERNEL); if (!xfs_acl) return ERR_PTR(-ENOMEM); @@ -153,7 +157,7 @@ xfs_get_acl(struct inode *inode, int type) goto out; } - acl = xfs_acl_from_disk(xfs_acl); + acl = xfs_acl_from_disk(xfs_acl, XFS_ACL_MAX_ENTRIES(ip->i_mount)); if (IS_ERR(acl)) goto out; @@ -189,16 +193,17 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) if (acl) { struct xfs_acl *xfs_acl; - int len; + int len = XFS_ACL_MAX_SIZE(ip->i_mount); - xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL); + xfs_acl = kzalloc(len, GFP_KERNEL); if (!xfs_acl) return -ENOMEM; xfs_acl_to_disk(xfs_acl, acl); - len = sizeof(struct xfs_acl) - - (sizeof(struct xfs_acl_entry) * - (XFS_ACL_MAX_ENTRIES - acl->a_count)); + + /* subtract away the unused acl entries */ + len -= sizeof(struct xfs_acl_entry) * + (XFS_ACL_MAX_ENTRIES(ip->i_mount) - acl->a_count); error = -xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl, len, ATTR_ROOT); @@ -243,7 +248,7 @@ xfs_set_mode(struct inode *inode, umode_t mode) static int xfs_acl_exists(struct inode *inode, unsigned char *name) { - int len = sizeof(struct xfs_acl); + int len = XFS_ACL_MAX_SIZE(XFS_M(inode->i_sb)); return (xfs_attr_get(XFS_I(inode), name, NULL, &len, ATTR_ROOT|ATTR_KERNOVAL) == 0); @@ -379,7 +384,7 @@ xfs_xattr_acl_set(struct dentry *dentry, const char *name, goto out_release; error = -EINVAL; - if (acl->a_count > XFS_ACL_MAX_ENTRIES) + if (acl->a_count > XFS_ACL_MAX_ENTRIES(XFS_M(inode->i_sb))) goto out_release; if (type == ACL_TYPE_ACCESS) { diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index 39632d941354..4016a567b83c 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h @@ -22,19 +22,36 @@ struct inode; struct posix_acl; struct xfs_inode; -#define XFS_ACL_MAX_ENTRIES 25 #define XFS_ACL_NOT_PRESENT (-1) /* On-disk XFS access control list structure */ +struct xfs_acl_entry { + __be32 ae_tag; + __be32 ae_id; + __be16 ae_perm; + __be16 ae_pad; /* fill the implicit hole in the structure */ +}; + struct xfs_acl { - __be32 acl_cnt; - struct xfs_acl_entry { - __be32 ae_tag; - __be32 ae_id; - __be16 ae_perm; - } acl_entry[XFS_ACL_MAX_ENTRIES]; + __be32 acl_cnt; + struct xfs_acl_entry acl_entry[0]; }; +/* + * The number of ACL entries allowed is defined by the on-disk format. + * For v4 superblocks, that is limited to 25 entries. For v5 superblocks, it is + * limited only by the maximum size of the xattr that stores the information. + */ +#define XFS_ACL_MAX_ENTRIES(mp) \ + (xfs_sb_version_hascrc(&mp->m_sb) \ + ? (XATTR_SIZE_MAX - sizeof(struct xfs_acl)) / \ + sizeof(struct xfs_acl_entry) \ + : 25) + +#define XFS_ACL_MAX_SIZE(mp) \ + (sizeof(struct xfs_acl) + \ + sizeof(struct xfs_acl_entry) * XFS_ACL_MAX_ENTRIES((mp))) + /* On-disk XFS extended attribute names */ #define SGI_ACL_FILE (unsigned char *)"SGI_ACL_FILE" #define SGI_ACL_DEFAULT (unsigned char *)"SGI_ACL_DEFAULT" diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index d788302e506a..31d3cd129269 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c @@ -3258,7 +3258,7 @@ xfs_attr3_leaf_inactive( name_rmt = xfs_attr3_leaf_name_remote(leaf, i); if (name_rmt->valueblk) { lp->valueblk = be32_to_cpu(name_rmt->valueblk); - lp->valuelen = XFS_B_TO_FSB(dp->i_mount, + lp->valuelen = xfs_attr3_rmt_blocks(dp->i_mount, be32_to_cpu(name_rmt->valuelen)); lp++; } diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index a41f8bf1da37..044e97a33c8d 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -249,8 +249,11 @@ xfs_qm_init_dquot_blk( d->dd_diskdq.d_version = XFS_DQUOT_VERSION; d->dd_diskdq.d_id = cpu_to_be32(curid); d->dd_diskdq.d_flags = type; - if (xfs_sb_version_hascrc(&mp->m_sb)) + if (xfs_sb_version_hascrc(&mp->m_sb)) { uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid); + xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk), + XFS_DQUOT_CRC_OFF); + } } xfs_trans_dquot_buf(tp, bp, @@ -286,23 +289,6 @@ xfs_dquot_set_prealloc_limits(struct xfs_dquot *dqp) dqp->q_low_space[XFS_QLOWSP_5_PCNT] = space * 5; } -STATIC void -xfs_dquot_buf_calc_crc( - struct xfs_mount *mp, - struct xfs_buf *bp) -{ - struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr; - int i; - - if (!xfs_sb_version_hascrc(&mp->m_sb)) - return; - - for (i = 0; i < mp->m_quotainfo->qi_dqperchunk; i++, d++) { - xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk), - offsetof(struct xfs_dqblk, dd_crc)); - } -} - STATIC bool xfs_dquot_buf_verify_crc( struct xfs_mount *mp, @@ -328,12 +314,11 @@ xfs_dquot_buf_verify_crc( for (i = 0; i < ndquots; i++, d++) { if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk), - offsetof(struct xfs_dqblk, dd_crc))) + XFS_DQUOT_CRC_OFF)) return false; if (!uuid_equal(&d->dd_uuid, &mp->m_sb.sb_uuid)) return false; } - return true; } @@ -393,6 +378,11 @@ xfs_dquot_buf_read_verify( } } +/* + * we don't calculate the CRC here as that is done when the dquot is flushed to + * the buffer after the update is done. This ensures that the dquot in the + * buffer always has an up-to-date CRC value. + */ void xfs_dquot_buf_write_verify( struct xfs_buf *bp) @@ -404,7 +394,6 @@ xfs_dquot_buf_write_verify( xfs_buf_ioerror(bp, EFSCORRUPTED); return; } - xfs_dquot_buf_calc_crc(mp, bp); } const struct xfs_buf_ops xfs_dquot_buf_ops = { @@ -1151,11 +1140,17 @@ xfs_qm_dqflush( * copy the lsn into the on-disk dquot now while we have the in memory * dquot here. This can't be done later in the write verifier as we * can't get access to the log item at that point in time. + * + * We also calculate the CRC here so that the on-disk dquot in the + * buffer always has a valid CRC. This ensures there is no possibility + * of a dquot without an up-to-date CRC getting to disk. */ if (xfs_sb_version_hascrc(&mp->m_sb)) { struct xfs_dqblk *dqb = (struct xfs_dqblk *)ddqp; dqb->dd_lsn = cpu_to_be64(dqp->q_logitem.qli_item.li_lsn); + xfs_update_cksum((char *)dqb, sizeof(struct xfs_dqblk), + XFS_DQUOT_CRC_OFF); } /* diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index efbe1accb6ca..7f7be5f98f52 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1638,6 +1638,10 @@ xfs_iunlink( dip->di_next_unlinked = agi->agi_unlinked[bucket_index]; offset = ip->i_imap.im_boffset + offsetof(xfs_dinode_t, di_next_unlinked); + + /* need to recalc the inode CRC if appropriate */ + xfs_dinode_calc_crc(mp, dip); + xfs_trans_inode_buf(tp, ibp); xfs_trans_log_buf(tp, ibp, offset, (offset + sizeof(xfs_agino_t) - 1)); @@ -1723,6 +1727,10 @@ xfs_iunlink_remove( dip->di_next_unlinked = cpu_to_be32(NULLAGINO); offset = ip->i_imap.im_boffset + offsetof(xfs_dinode_t, di_next_unlinked); + + /* need to recalc the inode CRC if appropriate */ + xfs_dinode_calc_crc(mp, dip); + xfs_trans_inode_buf(tp, ibp); xfs_trans_log_buf(tp, ibp, offset, (offset + sizeof(xfs_agino_t) - 1)); @@ -1796,6 +1804,10 @@ xfs_iunlink_remove( dip->di_next_unlinked = cpu_to_be32(NULLAGINO); offset = ip->i_imap.im_boffset + offsetof(xfs_dinode_t, di_next_unlinked); + + /* need to recalc the inode CRC if appropriate */ + xfs_dinode_calc_crc(mp, dip); + xfs_trans_inode_buf(tp, ibp); xfs_trans_log_buf(tp, ibp, offset, (offset + sizeof(xfs_agino_t) - 1)); @@ -1809,6 +1821,10 @@ xfs_iunlink_remove( last_dip->di_next_unlinked = cpu_to_be32(next_agino); ASSERT(next_agino != 0); offset = last_offset + offsetof(xfs_dinode_t, di_next_unlinked); + + /* need to recalc the inode CRC if appropriate */ + xfs_dinode_calc_crc(mp, last_dip); + xfs_trans_inode_buf(tp, last_ibp); xfs_trans_log_buf(tp, last_ibp, offset, (offset + sizeof(xfs_agino_t) - 1)); diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index d9e4d3c3991a..45a85ff84da1 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1599,10 +1599,43 @@ xlog_recover_add_to_trans( } /* - * Sort the log items in the transaction. Cancelled buffers need - * to be put first so they are processed before any items that might - * modify the buffers. If they are cancelled, then the modifications - * don't need to be replayed. + * Sort the log items in the transaction. + * + * The ordering constraints are defined by the inode allocation and unlink + * behaviour. The rules are: + * + * 1. Every item is only logged once in a given transaction. Hence it + * represents the last logged state of the item. Hence ordering is + * dependent on the order in which operations need to be performed so + * required initial conditions are always met. + * + * 2. Cancelled buffers are recorded in pass 1 in a separate table and + * there's nothing to replay from them so we can simply cull them + * from the transaction. However, we can't do that until after we've + * replayed all the other items because they may be dependent on the + * cancelled buffer and replaying the cancelled buffer can remove it + * form the cancelled buffer table. Hence they have tobe done last. + * + * 3. Inode allocation buffers must be replayed before inode items that + * read the buffer and replay changes into it. + * + * 4. Inode unlink buffers must be replayed after inode items are replayed. + * This ensures that inodes are completely flushed to the inode buffer + * in a "free" state before we remove the unlinked inode list pointer. + * + * Hence the ordering needs to be inode allocation buffers first, inode items + * second, inode unlink buffers third and cancelled buffers last. + * + * But there's a problem with that - we can't tell an inode allocation buffer + * apart from a regular buffer, so we can't separate them. We can, however, + * tell an inode unlink buffer from the others, and so we can separate them out + * from all the other buffers and move them to last. + * + * Hence, 4 lists, in order from head to tail: + * - buffer_list for all buffers except cancelled/inode unlink buffers + * - item_list for all non-buffer items + * - inode_buffer_list for inode unlink buffers + * - cancel_list for the cancelled buffers */ STATIC int xlog_recover_reorder_trans( @@ -1612,6 +1645,10 @@ xlog_recover_reorder_trans( { xlog_recover_item_t *item, *n; LIST_HEAD(sort_list); + LIST_HEAD(cancel_list); + LIST_HEAD(buffer_list); + LIST_HEAD(inode_buffer_list); + LIST_HEAD(inode_list); list_splice_init(&trans->r_itemq, &sort_list); list_for_each_entry_safe(item, n, &sort_list, ri_list) { @@ -1619,12 +1656,18 @@ xlog_recover_reorder_trans( switch (ITEM_TYPE(item)) { case XFS_LI_BUF: - if (!(buf_f->blf_flags & XFS_BLF_CANCEL)) { + if (buf_f->blf_flags & XFS_BLF_CANCEL) { trace_xfs_log_recover_item_reorder_head(log, trans, item, pass); - list_move(&item->ri_list, &trans->r_itemq); + list_move(&item->ri_list, &cancel_list); + break; + } + if (buf_f->blf_flags & XFS_BLF_INODE_BUF) { + list_move(&item->ri_list, &inode_buffer_list); break; } + list_move_tail(&item->ri_list, &buffer_list); + break; case XFS_LI_INODE: case XFS_LI_DQUOT: case XFS_LI_QUOTAOFF: @@ -1632,7 +1675,7 @@ xlog_recover_reorder_trans( case XFS_LI_EFI: trace_xfs_log_recover_item_reorder_tail(log, trans, item, pass); - list_move_tail(&item->ri_list, &trans->r_itemq); + list_move_tail(&item->ri_list, &inode_list); break; default: xfs_warn(log->l_mp, @@ -1643,6 +1686,14 @@ xlog_recover_reorder_trans( } } ASSERT(list_empty(&sort_list)); + if (!list_empty(&buffer_list)) + list_splice(&buffer_list, &trans->r_itemq); + if (!list_empty(&inode_list)) + list_splice_tail(&inode_list, &trans->r_itemq); + if (!list_empty(&inode_buffer_list)) + list_splice_tail(&inode_buffer_list, &trans->r_itemq); + if (!list_empty(&cancel_list)) + list_splice_tail(&cancel_list, &trans->r_itemq); return 0; } @@ -1861,6 +1912,15 @@ xlog_recover_do_inode_buffer( buffer_nextp = (xfs_agino_t *)xfs_buf_offset(bp, next_unlinked_offset); *buffer_nextp = *logged_nextp; + + /* + * If necessary, recalculate the CRC in the on-disk inode. We + * have to leave the inode in a consistent state for whoever + * reads it next.... + */ + xfs_dinode_calc_crc(mp, (struct xfs_dinode *) + xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize)); + } return 0; @@ -2266,6 +2326,12 @@ xfs_qm_dqcheck( d->dd_diskdq.d_flags = type; d->dd_diskdq.d_id = cpu_to_be32(id); + if (xfs_sb_version_hascrc(&mp->m_sb)) { + uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid); + xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk), + XFS_DQUOT_CRC_OFF); + } + return errs; } @@ -2793,6 +2859,10 @@ xlog_recover_dquot_pass2( } memcpy(ddq, recddq, item->ri_buf[1].i_len); + if (xfs_sb_version_hascrc(&mp->m_sb)) { + xfs_update_cksum((char *)ddq, sizeof(struct xfs_dqblk), + XFS_DQUOT_CRC_OFF); + } ASSERT(dq_f->qlf_size == 2); ASSERT(bp->b_target->bt_mount == mp); diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index f41702b43003..b75c9bb6e71e 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -41,6 +41,7 @@ #include "xfs_qm.h" #include "xfs_trace.h" #include "xfs_icache.h" +#include "xfs_cksum.h" /* * The global quota manager. There is only one of these for the entire @@ -839,7 +840,7 @@ xfs_qm_reset_dqcounts( xfs_dqid_t id, uint type) { - xfs_disk_dquot_t *ddq; + struct xfs_dqblk *dqb; int j; trace_xfs_reset_dqcounts(bp, _RET_IP_); @@ -853,8 +854,12 @@ xfs_qm_reset_dqcounts( do_div(j, sizeof(xfs_dqblk_t)); ASSERT(mp->m_quotainfo->qi_dqperchunk == j); #endif - ddq = bp->b_addr; + dqb = bp->b_addr; for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) { + struct xfs_disk_dquot *ddq; + + ddq = (struct xfs_disk_dquot *)&dqb[j]; + /* * Do a sanity check, and if needed, repair the dqblk. Don't * output any warnings because it's perfectly possible to @@ -871,7 +876,12 @@ xfs_qm_reset_dqcounts( ddq->d_bwarns = 0; ddq->d_iwarns = 0; ddq->d_rtbwarns = 0; - ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1); + + if (xfs_sb_version_hascrc(&mp->m_sb)) { + xfs_update_cksum((char *)&dqb[j], + sizeof(struct xfs_dqblk), + XFS_DQUOT_CRC_OFF); + } } } @@ -907,19 +917,29 @@ xfs_qm_dqiter_bufs( XFS_FSB_TO_DADDR(mp, bno), mp->m_quotainfo->qi_dqchunklen, 0, &bp, &xfs_dquot_buf_ops); - if (error) - break; /* - * XXX(hch): need to figure out if it makes sense to validate - * the CRC here. + * CRC and validation errors will return a EFSCORRUPTED here. If + * this occurs, re-read without CRC validation so that we can + * repair the damage via xfs_qm_reset_dqcounts(). This process + * will leave a trace in the log indicating corruption has + * been detected. */ + if (error == EFSCORRUPTED) { + error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, + XFS_FSB_TO_DADDR(mp, bno), + mp->m_quotainfo->qi_dqchunklen, 0, &bp, + NULL); + } + + if (error) + break; + xfs_qm_reset_dqcounts(mp, bp, firstid, type); xfs_buf_delwri_queue(bp, buffer_list); xfs_buf_relse(bp); - /* - * goto the next block. - */ + + /* goto the next block. */ bno++; firstid += mp->m_quotainfo->qi_dqperchunk; } diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index c61e31c7d997..c38068f26c55 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h @@ -87,6 +87,8 @@ typedef struct xfs_dqblk { uuid_t dd_uuid; /* location information */ } xfs_dqblk_t; +#define XFS_DQUOT_CRC_OFF offsetof(struct xfs_dqblk, dd_crc) + /* * flags for q_flags field in the dquot. */ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index ea341cea68cb..3033ba5e9762 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1373,6 +1373,17 @@ xfs_finish_flags( } /* + * V5 filesystems always use attr2 format for attributes. + */ + if (xfs_sb_version_hascrc(&mp->m_sb) && + (mp->m_flags & XFS_MOUNT_NOATTR2)) { + xfs_warn(mp, +"Cannot mount a V5 filesystem as %s. %s is always enabled for V5 filesystems.", + MNTOPT_NOATTR2, MNTOPT_ATTR2); + return XFS_ERROR(EINVAL); + } + + /* * mkfs'ed attr2 will turn on attr2 mount unless explicitly * told by noattr2 to turn it off */ |