From 773e9b442693b250aa6c452cb0cf5a9343f51cef Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 7 Jun 2011 20:57:14 -0700 Subject: ceph: fix page alignment corrections dd if=/dev/urandom of=/mnt/fs_depot/dd10 bs=500 seek=8388 count=1 dd if=/mnt/fs_depot/dd10 of=/root/dd10out bs=500 skip=8388 count=1 Reported-by: Henry C Chang Signed-off-by: Sage Weil --- fs/ceph/file.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 9542f07d0b93..2be0f35afdfb 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -290,7 +290,6 @@ static int striped_read(struct inode *inode, struct ceph_inode_info *ci = ceph_inode(inode); u64 pos, this_len; int io_align, page_align; - int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */ int left, pages_left; int read; struct page **page_pos; @@ -326,12 +325,11 @@ more: ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : ""); if (ret > 0) { - int didpages = - ((pos & ~PAGE_CACHE_MASK) + ret) >> PAGE_CACHE_SHIFT; + int didpages = (page_align + ret) >> PAGE_CACHE_SHIFT; if (read < pos - off) { dout(" zero gap %llu to %llu\n", off + read, pos); - ceph_zero_page_vector_range(page_off + read, + ceph_zero_page_vector_range(page_align + read, pos - off - read, pages); } pos += ret; @@ -356,7 +354,7 @@ more: left = inode->i_size - pos; dout("zero tail %d\n", left); - ceph_zero_page_vector_range(page_off + read, left, + ceph_zero_page_vector_range(page_align + read, left, pages); read += left; } -- cgit v1.2.1 From d7f124f129a6aea99938e0d4172c741b56fefeda Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 13 Jun 2011 16:22:18 -0700 Subject: ceph: fix sync and dio writes across stripe boundaries We were iterating across stripe boundaries properly, but not moving the write buffer pointer forward. This caused us to rewrite the same data after the break. Fix by adjusting the data pointer forward, and recalculating the io and buffer alignment after the break. Signed-off-by: Sage Weil --- fs/ceph/file.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 2be0f35afdfb..4698a5c553dc 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -476,9 +476,6 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, else pos = *offset; - io_align = pos & ~PAGE_MASK; - buf_align = (unsigned long)data & ~PAGE_MASK; - ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left); if (ret < 0) return ret; @@ -502,6 +499,8 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, * boundary. this isn't atomic, unfortunately. :( */ more: + io_align = pos & ~PAGE_MASK; + buf_align = (unsigned long)data & ~PAGE_MASK; len = left; if (file->f_flags & O_DIRECT) { /* write from beginning of first page, regardless of @@ -591,6 +590,7 @@ out: pos += len; written += len; left -= len; + data += written; if (left) goto more; -- cgit v1.2.1 From 2bea038c52e88badbbd5b420c1de918f7f2579e9 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Thu, 16 Jun 2011 11:35:46 -0400 Subject: pnfs: write: Set mds_offset in the generic layer - it is needed by all LDs In current pnfs tree, all the layouts set mds_offset in their .write_pagelist member. mds_offset is only used by generic layer and should be handled by it. This patch is for upstream. It is needed in this -rc series to fix a bug in objects layout_commit. I'll send patches for objects and blocks to be squashed into current pnfs tree. TODO: It looks like the read path needs the same patch. Signed-off-by: Boaz Harrosh Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 1 - fs/nfs/write.c | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 0bafcc91c27f..f9d03abcd04c 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -398,7 +398,6 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) * this offset and save the original offset. */ data->args.offset = filelayout_get_dserver_offset(lseg, offset); - data->mds_offset = offset; /* Perform an asynchronous write */ status = nfs_initiate_write(data, ds->ds_clp->cl_rpcclient, diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e268e3b23497..727168059684 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -864,6 +864,8 @@ static int nfs_write_rpcsetup(struct nfs_page *req, data->args.fh = NFS_FH(inode); data->args.offset = req_offset(req) + offset; + /* pnfs_set_layoutcommit needs this */ + data->mds_offset = data->args.offset; data->args.pgbase = req->wb_pgbase + offset; data->args.pages = data->pagevec; data->args.count = count; -- cgit v1.2.1 From 50176ddefa4a942419cb693dd2d8345bfdcde67c Mon Sep 17 00:00:00 2001 From: Seth Forshee Date: Tue, 31 May 2011 16:35:50 -0500 Subject: hfsplus: add missing call to bio_put() hfsplus leaks bio objects by failing to call bio_put() on the bios it allocates. Add the missing call to fix the leak. Signed-off-by: Seth Forshee Cc: # .38.x, .39.x Signed-off-by: Christoph Hellwig --- fs/hfsplus/wrapper.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c index 3031d81f5f0f..4ac88ff79aa6 100644 --- a/fs/hfsplus/wrapper.c +++ b/fs/hfsplus/wrapper.c @@ -36,6 +36,7 @@ int hfsplus_submit_bio(struct block_device *bdev, sector_t sector, { DECLARE_COMPLETION_ONSTACK(wait); struct bio *bio; + int ret = 0; bio = bio_alloc(GFP_NOIO, 1); bio->bi_sector = sector; @@ -54,8 +55,10 @@ int hfsplus_submit_bio(struct block_device *bdev, sector_t sector, wait_for_completion(&wait); if (!bio_flagged(bio, BIO_UPTODATE)) - return -EIO; - return 0; + ret = -EIO; + + bio_put(bio); + return ret; } static int hfsplus_read_mdb(void *bufptr, struct hfsplus_wd *wd) -- cgit v1.2.1 From 032016a56a1e9c83646435b32e4416d499e1f1ce Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Fri, 24 Jun 2011 01:15:02 +0400 Subject: hfsplus: Fix double iput of the same inode in hfsplus_fill_super() There is a misprint in resource deallocation code on error path in hfsplus_fill_super(): the sbi->alloc_file inode is iput twice, while the root inode in not iput at all. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Signed-off-by: Christoph Hellwig --- fs/hfsplus/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index b49b55584c84..84a47b709f51 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -500,7 +500,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) out_put_hidden_dir: iput(sbi->hidden_dir); out_put_root: - iput(sbi->alloc_file); + iput(root); out_put_alloc_file: iput(sbi->alloc_file); out_close_cat_tree: -- cgit v1.2.1 From ee1b3ea9e6171d7a842527a44873f9f51e6f239b Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 21 Jun 2011 07:18:26 -0400 Subject: cifs: set socket send and receive timeouts before attempting connect Benjamin S. reported that he was unable to suspend his machine while it had a cifs share mounted. The freezer caused this to spew when he tried it: -----------------------[snip]------------------ PM: Syncing filesystems ... done. Freezing user space processes ... (elapsed 0.01 seconds) done. Freezing remaining freezable tasks ... Freezing of tasks failed after 20.01 seconds (1 tasks refusing to freeze, wq_busy=0): cifsd S ffff880127f7b1b0 0 1821 2 0x00800000 ffff880127f7b1b0 0000000000000046 ffff88005fe008a8 ffff8800ffffffff ffff880127cee6b0 0000000000011100 ffff880127737fd8 0000000000004000 ffff880127737fd8 0000000000011100 ffff880127f7b1b0 ffff880127736010 Call Trace: [] ? sk_reset_timer+0xf/0x19 [] ? tcp_connect+0x43c/0x445 [] ? tcp_v4_connect+0x40d/0x47f [] ? schedule_timeout+0x21/0x1ad [] ? _raw_spin_lock_bh+0x9/0x1f [] ? release_sock+0x19/0xef [] ? inet_stream_connect+0x14c/0x24a [] ? autoremove_wake_function+0x0/0x2a [] ? ipv4_connect+0x39c/0x3b5 [cifs] [] ? cifs_reconnect+0x1fc/0x28a [cifs] [] ? cifs_demultiplex_thread+0x397/0xb9f [cifs] [] ? perf_event_exit_task+0xb9/0x1bf [] ? cifs_demultiplex_thread+0x0/0xb9f [cifs] [] ? cifs_demultiplex_thread+0x0/0xb9f [cifs] [] ? kthread+0x7a/0x82 [] ? kernel_thread_helper+0x4/0x10 [] ? kthread+0x0/0x82 [] ? kernel_thread_helper+0x0/0x10 Restarting tasks ... done. -----------------------[snip]------------------ We do attempt to perform a try_to_freeze in cifs_reconnect, but the connection attempt itself seems to be taking longer than 20s to time out. The connect timeout is governed by the socket send and receive timeouts, so we can shorten that period by setting those timeouts before attempting the connect instead of after. Adam Williamson tested the patch and said that it seems to have fixed suspending on his laptop when a cifs share is mounted. Reported-by: Benjamin S Tested-by: Adam Williamson Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/connect.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 7f540df52527..c8cb83ef6f6f 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2474,14 +2474,6 @@ generic_ip_connect(struct TCP_Server_Info *server) if (rc < 0) return rc; - rc = socket->ops->connect(socket, saddr, slen, 0); - if (rc < 0) { - cFYI(1, "Error %d connecting to server", rc); - sock_release(socket); - server->ssocket = NULL; - return rc; - } - /* * Eventually check for other socket options to change from * the default. sock_setsockopt not used because it expects @@ -2510,6 +2502,14 @@ generic_ip_connect(struct TCP_Server_Info *server) socket->sk->sk_sndbuf, socket->sk->sk_rcvbuf, socket->sk->sk_rcvtimeo); + rc = socket->ops->connect(socket, saddr, slen, 0); + if (rc < 0) { + cFYI(1, "Error %d connecting to server", rc); + sock_release(socket); + server->ssocket = NULL; + return rc; + } + if (sport == htons(RFC1001_PORT)) rc = ip_rfc1001_connect(server); -- cgit v1.2.1 From a51cb91d81f8e6fc4e5e08b772cc3ceb13ac9d37 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 6 Jul 2011 12:33:55 +0200 Subject: fs: fix lock initialization locks_alloc_lock() assumed that the allocated struct file_lock is already initialized to zero members. This is only true for the first allocation of the structure, after reuse some of the members will have random values. This will for example result in passing random fl_start values to userspace in fuse for FL_FLOCK locks, which is an information leak at best. Fix by reinitializing those members which may be non-zero after freeing. Signed-off-by: Miklos Szeredi CC: stable@kernel.org Signed-off-by: Linus Torvalds --- fs/locks.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/locks.c b/fs/locks.c index 0a4f50dfadfb..b286539d547a 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -160,10 +160,28 @@ EXPORT_SYMBOL_GPL(unlock_flocks); static struct kmem_cache *filelock_cache __read_mostly; +static void locks_init_lock_always(struct file_lock *fl) +{ + fl->fl_next = NULL; + fl->fl_fasync = NULL; + fl->fl_owner = NULL; + fl->fl_pid = 0; + fl->fl_nspid = NULL; + fl->fl_file = NULL; + fl->fl_flags = 0; + fl->fl_type = 0; + fl->fl_start = fl->fl_end = 0; +} + /* Allocate an empty lock structure. */ struct file_lock *locks_alloc_lock(void) { - return kmem_cache_alloc(filelock_cache, GFP_KERNEL); + struct file_lock *fl = kmem_cache_alloc(filelock_cache, GFP_KERNEL); + + if (fl) + locks_init_lock_always(fl); + + return fl; } EXPORT_SYMBOL_GPL(locks_alloc_lock); @@ -200,17 +218,9 @@ void locks_init_lock(struct file_lock *fl) INIT_LIST_HEAD(&fl->fl_link); INIT_LIST_HEAD(&fl->fl_block); init_waitqueue_head(&fl->fl_wait); - fl->fl_next = NULL; - fl->fl_fasync = NULL; - fl->fl_owner = NULL; - fl->fl_pid = 0; - fl->fl_nspid = NULL; - fl->fl_file = NULL; - fl->fl_flags = 0; - fl->fl_type = 0; - fl->fl_start = fl->fl_end = 0; fl->fl_ops = NULL; fl->fl_lmops = NULL; + locks_init_lock_always(fl); } EXPORT_SYMBOL(locks_init_lock); -- cgit v1.2.1 From bcb65a797eb7c51e4f227dd19295f14d38738fee Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Wed, 6 Jul 2011 12:26:05 +0100 Subject: FDPIC: Fix memory leak The shdr4extnum variable isn't being freed in the cleanup process of elf_fdpic_core_dump(). Signed-off-by: Davidlohr Bueso Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- fs/binfmt_elf_fdpic.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 63039ed9576f..2bc5dc644b4c 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1864,6 +1864,7 @@ cleanup: kfree(psinfo); kfree(notes); kfree(fpu); + kfree(shdr4extnum); #ifdef ELF_CORE_COPY_XFPREGS kfree(xfpu); #endif -- cgit v1.2.1 From 677d8537d875832019fa989186f084ba47ecd93d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 5 Jul 2011 17:37:37 -0400 Subject: cifs: remove bogus call to cifs_cleanup_volume_info This call to cifs_cleanup_volume_info is clearly wrong. As soon as it's called the following call to cifs_get_tcp_session will oops as the volume_info pointer will then be NULL. The caller of cifs_mount should clean up this data since it passed it in. There's no need for us to call this here. Regression introduced by commit 724d9f1cfba. Reported-by: Adam Williamson Cc: Pavel Shilovsky Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/connect.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index c8cb83ef6f6f..545e85465747 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3012,7 +3012,6 @@ try_mount_again: else if (pSesInfo) cifs_put_smb_ses(pSesInfo); - cifs_cleanup_volume_info(&volume_info); FreeXid(xid); } #endif -- cgit v1.2.1 From b2a0fa152072f0085fa8d8eb0dbf9b3b0c5952fc Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 6 Jul 2011 08:10:36 -0400 Subject: cifs: fix build_unc_path_to_root to account for a prefixpath Regression introduced by commit f87d39d9513. Signed-off-by: Jeff Layton Reviewed-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/connect.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 545e85465747..44376ce41e42 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2855,19 +2855,28 @@ cifs_cleanup_volume_info(struct smb_vol **pvolume_info) /* build_path_to_root returns full path to root when * we do not have an exiting connection (tcon) */ static char * -build_unc_path_to_root(const struct smb_vol *volume_info, +build_unc_path_to_root(const struct smb_vol *vol, const struct cifs_sb_info *cifs_sb) { - char *full_path; + char *full_path, *pos; + unsigned int pplen = vol->prepath ? strlen(vol->prepath) : 0; + unsigned int unc_len = strnlen(vol->UNC, MAX_TREE_SIZE + 1); - int unc_len = strnlen(volume_info->UNC, MAX_TREE_SIZE + 1); - full_path = kmalloc(unc_len + 1, GFP_KERNEL); + full_path = kmalloc(unc_len + pplen + 1, GFP_KERNEL); if (full_path == NULL) return ERR_PTR(-ENOMEM); - strncpy(full_path, volume_info->UNC, unc_len); - full_path[unc_len] = 0; /* add trailing null */ + strncpy(full_path, vol->UNC, unc_len); + pos = full_path + unc_len; + + if (pplen) { + strncpy(pos, vol->prepath, pplen); + pos += pplen; + } + + *pos = '\0'; /* add trailing null */ convert_delimiter(full_path, CIFS_DIR_SEP(cifs_sb)); + cFYI(1, "%s: full_path=%s", __func__, full_path); return full_path; } -- cgit v1.2.1 From f9e59bcba2cff580a3ccf62e89460f9eed295d89 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 6 Jul 2011 08:10:37 -0400 Subject: cifs: have cifs_cleanup_volume_info not take a double pointer ...as that makes for a cumbersome interface. Make it take a regular smb_vol pointer and rely on the caller to zero it out if needed. Signed-off-by: Jeff Layton Reviewed-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 2 +- fs/cifs/cifsproto.h | 2 +- fs/cifs/connect.c | 10 +++------- 3 files changed, 5 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 35f9154615fa..e11b83149833 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -713,7 +713,7 @@ cifs_do_mount(struct file_system_type *fs_type, out_super: deactivate_locked_super(sb); out: - cifs_cleanup_volume_info(&volume_info); + cifs_cleanup_volume_info(volume_info); return root; out_mountdata: diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 257f312ede42..53b1b13581c6 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -154,7 +154,7 @@ extern int set_cifs_acl(struct cifs_ntsd *, __u32, struct inode *, extern void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, struct cifs_sb_info *cifs_sb); extern int cifs_match_super(struct super_block *, void *); -extern void cifs_cleanup_volume_info(struct smb_vol **pvolume_info); +extern void cifs_cleanup_volume_info(struct smb_vol *pvolume_info); extern int cifs_setup_volume_info(struct smb_vol **pvolume_info, char *mount_data, const char *devname); extern int cifs_mount(struct cifs_sb_info *, struct smb_vol *); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 44376ce41e42..dd064075ddfb 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2831,14 +2831,11 @@ is_path_accessible(int xid, struct cifs_tcon *tcon, } void -cifs_cleanup_volume_info(struct smb_vol **pvolume_info) +cifs_cleanup_volume_info(struct smb_vol *volume_info) { - struct smb_vol *volume_info; - - if (!pvolume_info || !*pvolume_info) + if (!volume_info) return; - volume_info = *pvolume_info; kfree(volume_info->username); kzfree(volume_info->password); kfree(volume_info->UNC); @@ -2847,7 +2844,6 @@ cifs_cleanup_volume_info(struct smb_vol **pvolume_info) kfree(volume_info->iocharset); kfree(volume_info->prepath); kfree(volume_info); - *pvolume_info = NULL; return; } @@ -2990,7 +2986,7 @@ int cifs_setup_volume_info(struct smb_vol **pvolume_info, char *mount_data, *pvolume_info = volume_info; return rc; out: - cifs_cleanup_volume_info(&volume_info); + cifs_cleanup_volume_info(volume_info); return rc; } -- cgit v1.2.1 From 1316d4da3f632d5843d5a446203e73067dc40f09 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 4 Jul 2011 05:27:36 +0000 Subject: xfs: unpin stale inodes directly in IOP_COMMITTED When inodes are marked stale in a transaction, they are treated specially when the inode log item is being inserted into the AIL. It tries to avoid moving the log item forward in the AIL due to a race condition with the writing the underlying buffer back to disk. The was "fixed" in commit de25c18 ("xfs: avoid moving stale inodes in the AIL"). To avoid moving the item forward, we return a LSN smaller than the commit_lsn of the completing transaction, thereby trying to trick the commit code into not moving the inode forward at all. I'm not sure this ever worked as intended - it assumes the inode is already in the AIL, but I don't think the returned LSN would have been small enough to prevent moving the inode. It appears that the reason it worked is that the lower LSN of the inodes meant they were inserted into the AIL and flushed before the inode buffer (which was moved to the commit_lsn of the transaction). The big problem is that with delayed logging, the returning of the different LSN means insertion takes the slow, non-bulk path. Worse yet is that insertion is to a position -before- the commit_lsn so it is doing a AIL traversal on every insertion, and has to walk over all the items that have already been inserted into the AIL. It's expensive. To compound the matter further, with delayed logging inodes are likely to go from clean to stale in a single checkpoint, which means they aren't even in the AIL at all when we come across them at AIL insertion time. Hence these were all getting inserted into the AIL when they simply do not need to be as inodes marked XFS_ISTALE are never written back. Transactional/recovery integrity is maintained in this case by the other items in the unlink transaction that were modified (e.g. the AGI btree blocks) and committed in the same checkpoint. So to fix this, simply unpin the stale inodes directly in xfs_inode_item_committed() and return -1 to indicate that the AIL insertion code does not need to do any further processing of these inodes. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/xfs_inode_item.c | 14 ++++++++------ fs/xfs/xfs_trans.c | 4 ++-- 2 files changed, 10 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 09983a3344a5..b1e88d56069c 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -681,15 +681,15 @@ xfs_inode_item_unlock( * where the cluster buffer may be unpinned before the inode is inserted into * the AIL during transaction committed processing. If the buffer is unpinned * before the inode item has been committed and inserted, then it is possible - * for the buffer to be written and IO completions before the inode is inserted + * for the buffer to be written and IO completes before the inode is inserted * into the AIL. In that case, we'd be inserting a clean, stale inode into the * AIL which will never get removed. It will, however, get reclaimed which * triggers an assert in xfs_inode_free() complaining about freein an inode * still in the AIL. * - * To avoid this, return a lower LSN than the one passed in so that the - * transaction committed code will not move the inode forward in the AIL but - * will still unpin it properly. + * To avoid this, just unpin the inode directly and return a LSN of -1 so the + * transaction committed code knows that it does not need to do any further + * processing on the item. */ STATIC xfs_lsn_t xfs_inode_item_committed( @@ -699,8 +699,10 @@ xfs_inode_item_committed( struct xfs_inode_log_item *iip = INODE_ITEM(lip); struct xfs_inode *ip = iip->ili_inode; - if (xfs_iflags_test(ip, XFS_ISTALE)) - return lsn - 1; + if (xfs_iflags_test(ip, XFS_ISTALE)) { + xfs_inode_item_unpin(lip, 0); + return -1; + } return lsn; } diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 7c7bc2b786bd..c83f63b33aae 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -1361,7 +1361,7 @@ xfs_trans_item_committed( lip->li_flags |= XFS_LI_ABORTED; item_lsn = IOP_COMMITTED(lip, commit_lsn); - /* If the committed routine returns -1, item has been freed. */ + /* item_lsn of -1 means the item needs no further processing */ if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) return; @@ -1474,7 +1474,7 @@ xfs_trans_committed_bulk( lip->li_flags |= XFS_LI_ABORTED; item_lsn = IOP_COMMITTED(lip, commit_lsn); - /* item_lsn of -1 means the item was freed */ + /* item_lsn of -1 means the item needs no further processing */ if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) continue; -- cgit v1.2.1 From 0942caa373c676dca614ea8352ac77e0270aba73 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 28 Jun 2011 15:10:37 +0000 Subject: btrfs: add missing options displayed in mount output There are three missed mount options settable by user which are not currently displayed in mount output. Signed-off-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 5 +++++ fs/btrfs/super.c | 6 ++++++ 2 files changed, 11 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8e948ec1ee6b..60e13ef23a5e 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1336,6 +1336,11 @@ struct btrfs_ioctl_defrag_range_args { */ #define BTRFS_STRING_ITEM_KEY 253 +/* + * Flags for mount options. + * + * Note: don't forget to add new options to btrfs_show_options() + */ #define BTRFS_MOUNT_NODATASUM (1 << 0) #define BTRFS_MOUNT_NODATACOW (1 << 1) #define BTRFS_MOUNT_NOBARRIER (1 << 2) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 3559d0b3518a..5746081199ee 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -721,6 +721,12 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_puts(seq, ",clear_cache"); if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED)) seq_puts(seq, ",user_subvol_rm_allowed"); + if (btrfs_test_opt(root, ENOSPC_DEBUG)) + seq_puts(seq, ",enospc_debug"); + if (btrfs_test_opt(root, AUTO_DEFRAG)) + seq_puts(seq, ",autodefrag"); + if (btrfs_test_opt(root, INODE_MAP_CACHE)) + seq_puts(seq, ",inode_cache"); return 0; } -- cgit v1.2.1 From 508794eb5ec2a2b832742e78c6766844b10c0c94 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Sat, 2 Jul 2011 21:24:41 +0000 Subject: Btrfs: don't panic if we get an error while balancing V2 A user reported an error where if we try to balance an fs after a device has been removed it will blow up. This is because we get an EIO back and this is where BUG_ON(ret) bites us in the ass. To fix we just exit. Thanks, Reported-by: Anand Jain Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 1efa56e18f9b..19450bc53632 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2098,7 +2098,8 @@ int btrfs_balance(struct btrfs_root *dev_root) chunk_root->root_key.objectid, found_key.objectid, found_key.offset); - BUG_ON(ret && ret != -ENOSPC); + if (ret && ret != -ENOSPC) + goto error; key.offset = found_key.offset - 1; } ret = 0; -- cgit v1.2.1 From 149e2d76b4886c4c7ff5e077646a8ba3563c8026 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 6 Jul 2011 18:51:53 -0400 Subject: btrfs: fix oops when doing space balance We need to make sure the data relocation inode doesn't go through the delayed metadata updates, otherwise we get an oops during balance: kernel BUG at fs/btrfs/relocation.c:4303! [SNIP] Call Trace: [] ? update_ref_for_cow+0x22d/0x330 [btrfs] [] __btrfs_cow_block+0x451/0x5e0 [btrfs] [] ? read_block_for_search+0x14d/0x4d0 [btrfs] [] btrfs_cow_block+0x10b/0x240 [btrfs] [] btrfs_search_slot+0x49e/0x7a0 [btrfs] [] btrfs_lookup_inode+0x2f/0xa0 [btrfs] [] ? mutex_lock+0x1e/0x50 [] btrfs_update_delayed_inode+0x71/0x160 [btrfs] [] ? __btrfs_release_delayed_node+0x67/0x190 [btrfs] [] btrfs_run_delayed_items+0xe8/0x120 [btrfs] [] btrfs_commit_transaction+0x250/0x850 [btrfs] [] ? find_get_pages+0x39/0x130 [] ? join_transaction+0x25/0x250 [btrfs] [] ? wake_up_bit+0x40/0x40 [] prepare_to_relocate+0xda/0xf0 [btrfs] [] relocate_block_group+0x4b/0x620 [btrfs] [] ? btrfs_clean_old_snapshots+0x35/0x150 [btrfs] [] btrfs_relocate_block_group+0x1b3/0x2e0 [btrfs] [] ? btrfs_tree_unlock+0x50/0x50 [btrfs] [] btrfs_relocate_chunk+0x8b/0x670 [btrfs] [] ? btrfs_set_path_blocking+0x3d/0x50 [btrfs] [] ? read_extent_buffer+0xd8/0x1d0 [btrfs] [] ? btrfs_previous_item+0xb1/0x150 [btrfs] [] ? read_extent_buffer+0xd8/0x1d0 [btrfs] [] btrfs_balance+0x21a/0x2b0 [btrfs] [] btrfs_ioctl+0x798/0xd20 [btrfs] [] ? handle_mm_fault+0x148/0x270 [] ? do_page_fault+0x1d8/0x4b0 [] do_vfs_ioctl+0x9a/0x540 [] sys_ioctl+0xa1/0xb0 [] system_call_fastpath+0x16/0x1b [SNIP] RIP [] btrfs_reloc_cow_block+0x22c/0x270 [btrfs] Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 447612d3a16a..4a1373083747 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2678,12 +2678,14 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, int ret; /* - * If root is tree root, it means this inode is used to - * store free space information. And these inodes are updated - * when committing the transaction, so they needn't delaye to - * be updated, or deadlock will occured. + * If the inode is a free space inode, we can deadlock during commit + * if we put it into the delayed code. + * + * The data relocation inode should also be directly updated + * without delay */ - if (!is_free_space_inode(root, inode)) { + if (!is_free_space_inode(root, inode) + && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) { ret = btrfs_delayed_update_inode(trans, root, inode); if (!ret) btrfs_set_inode_last_trans(trans, inode); -- cgit v1.2.1 From c902ce1bfb40d8b049bd2319b388b4b68b04bc27 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 7 Jul 2011 12:19:48 +0100 Subject: FS-Cache: Add a helper to bulk uncache pages on an inode Add an FS-Cache helper to bulk uncache pages on an inode. This will only work for the circumstance where the pages in the cache correspond 1:1 with the pages attached to an inode's page cache. This is required for CIFS and NFS: When disabling inode cookie, we were returning the cookie and setting cifsi->fscache to NULL but failed to invalidate any previously mapped pages. This resulted in "Bad page state" errors and manifested in other kind of errors when running fsstress. Fix it by uncaching mapped pages when we disable the inode cookie. This patch should fix the following oops and "Bad page state" errors seen during fsstress testing. ------------[ cut here ]------------ kernel BUG at fs/cachefiles/namei.c:201! invalid opcode: 0000 [#1] SMP Pid: 5, comm: kworker/u:0 Not tainted 2.6.38.7-30.fc15.x86_64 #1 Bochs Bochs RIP: 0010: cachefiles_walk_to_object+0x436/0x745 [cachefiles] RSP: 0018:ffff88002ce6dd00 EFLAGS: 00010282 RAX: ffff88002ef165f0 RBX: ffff88001811f500 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000100 RDI: 0000000000000282 RBP: ffff88002ce6dda0 R08: 0000000000000100 R09: ffffffff81b3a300 R10: 0000ffff00066c0a R11: 0000000000000003 R12: ffff88002ae54840 R13: ffff88002ae54840 R14: ffff880029c29c00 R15: ffff88001811f4b0 FS: 00007f394dd32720(0000) GS:ffff88002ef00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007fffcb62ddf8 CR3: 000000001825f000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process kworker/u:0 (pid: 5, threadinfo ffff88002ce6c000, task ffff88002ce55cc0) Stack: 0000000000000246 ffff88002ce55cc0 ffff88002ce6dd58 ffff88001815dc00 ffff8800185246c0 ffff88001811f618 ffff880029c29d18 ffff88001811f380 ffff88002ce6dd50 ffffffff814757e4 ffff88002ce6dda0 ffffffff8106ac56 Call Trace: cachefiles_lookup_object+0x78/0xd4 [cachefiles] fscache_lookup_object+0x131/0x16d [fscache] fscache_object_work_func+0x1bc/0x669 [fscache] process_one_work+0x186/0x298 worker_thread+0xda/0x15d kthread+0x84/0x8c kernel_thread_helper+0x4/0x10 RIP cachefiles_walk_to_object+0x436/0x745 [cachefiles] ---[ end trace 1d481c9af1804caa ]--- I tested the uncaching by the following means: (1) Create a big file on my NFS server (104857600 bytes). (2) Read the file into the cache with md5sum on the NFS client. Look in /proc/fs/fscache/stats: Pages : mrk=25601 unc=0 (3) Open the file for read/write ("bash 5<>/warthog/bigfile"). Look in proc again: Pages : mrk=25601 unc=25601 Reported-by: Jeff Layton Signed-off-by: David Howells Reviewed-and-Tested-by: Suresh Jayaraman cc: stable@kernel.org Signed-off-by: Linus Torvalds --- fs/cifs/fscache.c | 1 + fs/fscache/page.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/fscache.c | 8 +++----- 3 files changed, 48 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c index 816696621ec9..42e5363b4102 100644 --- a/fs/cifs/fscache.c +++ b/fs/cifs/fscache.c @@ -92,6 +92,7 @@ static void cifs_fscache_disable_inode_cookie(struct inode *inode) if (cifsi->fscache) { cFYI(1, "%s: (0x%p)", __func__, cifsi->fscache); + fscache_uncache_all_inode_pages(cifsi->fscache, inode); fscache_relinquish_cookie(cifsi->fscache, 1); cifsi->fscache = NULL; } diff --git a/fs/fscache/page.c b/fs/fscache/page.c index a2a5d19ece6a..2f343b4d7a7d 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -954,3 +954,47 @@ void fscache_mark_pages_cached(struct fscache_retrieval *op, pagevec_reinit(pagevec); } EXPORT_SYMBOL(fscache_mark_pages_cached); + +/* + * Uncache all the pages in an inode that are marked PG_fscache, assuming them + * to be associated with the given cookie. + */ +void __fscache_uncache_all_inode_pages(struct fscache_cookie *cookie, + struct inode *inode) +{ + struct address_space *mapping = inode->i_mapping; + struct pagevec pvec; + pgoff_t next; + int i; + + _enter("%p,%p", cookie, inode); + + if (!mapping || mapping->nrpages == 0) { + _leave(" [no pages]"); + return; + } + + pagevec_init(&pvec, 0); + next = 0; + while (next <= (loff_t)-1 && + pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE) + ) { + for (i = 0; i < pagevec_count(&pvec); i++) { + struct page *page = pvec.pages[i]; + pgoff_t page_index = page->index; + + ASSERTCMP(page_index, >=, next); + next = page_index + 1; + + if (PageFsCache(page)) { + __fscache_wait_on_page_write(cookie, page); + __fscache_uncache_page(cookie, page); + } + } + pagevec_release(&pvec); + cond_resched(); + } + + _leave(""); +} +EXPORT_SYMBOL(__fscache_uncache_all_inode_pages); diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index ce153a6b3aec..419119c371bf 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -259,12 +259,10 @@ static void nfs_fscache_disable_inode_cookie(struct inode *inode) dfprintk(FSCACHE, "NFS: nfsi 0x%p turning cache off\n", NFS_I(inode)); - /* Need to invalidate any mapped pages that were read in before - * turning off the cache. + /* Need to uncache any pages attached to this inode that + * fscache knows about before turning off the cache. */ - if (inode->i_mapping && inode->i_mapping->nrpages) - invalidate_inode_pages2(inode->i_mapping); - + fscache_uncache_all_inode_pages(NFS_I(inode)->fscache, inode); nfs_fscache_zap_inode_cookie(inode); } } -- cgit v1.2.1 From 04db79b015dafcb79371fda6b5c32ffdbd31a2ff Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 6 Jul 2011 08:10:38 -0400 Subject: cifs: factor smb_vol allocation out of cifs_setup_volume_info Signed-off-by: Jeff Layton Reviewed-by: Pavel Shilovsky Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 6 +++--- fs/cifs/cifsproto.h | 4 ++-- fs/cifs/connect.c | 56 ++++++++++++++++++++++++++--------------------------- 3 files changed, 33 insertions(+), 33 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index e11b83149833..3e2989976297 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -649,9 +649,9 @@ cifs_do_mount(struct file_system_type *fs_type, cFYI(1, "Devname: %s flags: %d ", dev_name, flags); - rc = cifs_setup_volume_info(&volume_info, (char *)data, dev_name); - if (rc) - return ERR_PTR(rc); + volume_info = cifs_get_volume_info((char *)data, dev_name); + if (IS_ERR(volume_info)) + return ERR_CAST(volume_info); cifs_sb = kzalloc(sizeof(struct cifs_sb_info), GFP_KERNEL); if (cifs_sb == NULL) { diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 53b1b13581c6..8df28e925e5b 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -155,8 +155,8 @@ extern void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, struct cifs_sb_info *cifs_sb); extern int cifs_match_super(struct super_block *, void *); extern void cifs_cleanup_volume_info(struct smb_vol *pvolume_info); -extern int cifs_setup_volume_info(struct smb_vol **pvolume_info, - char *mount_data, const char *devname); +extern struct smb_vol *cifs_get_volume_info(char *mount_data, + const char *devname); extern int cifs_mount(struct cifs_sb_info *, struct smb_vol *); extern void cifs_umount(struct cifs_sb_info *); extern void cifs_dfs_release_automount_timer(void); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index dd064075ddfb..46cc0ad03487 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2931,33 +2931,20 @@ expand_dfs_referral(int xid, struct cifs_ses *pSesInfo, } #endif -int cifs_setup_volume_info(struct smb_vol **pvolume_info, char *mount_data, - const char *devname) +static int +cifs_setup_volume_info(struct smb_vol *volume_info, char *mount_data, + const char *devname) { - struct smb_vol *volume_info; int rc = 0; - *pvolume_info = NULL; - - volume_info = kzalloc(sizeof(struct smb_vol), GFP_KERNEL); - if (!volume_info) { - rc = -ENOMEM; - goto out; - } - - if (cifs_parse_mount_options(mount_data, devname, - volume_info)) { - rc = -EINVAL; - goto out; - } + if (cifs_parse_mount_options(mount_data, devname, volume_info)) + return -EINVAL; if (volume_info->nullauth) { cFYI(1, "null user"); volume_info->username = kzalloc(1, GFP_KERNEL); - if (volume_info->username == NULL) { - rc = -ENOMEM; - goto out; - } + if (volume_info->username == NULL) + return -ENOMEM; } else if (volume_info->username) { /* BB fixme parse for domain name here */ cFYI(1, "Username: %s", volume_info->username); @@ -2965,8 +2952,7 @@ int cifs_setup_volume_info(struct smb_vol **pvolume_info, char *mount_data, cifserror("No username specified"); /* In userspace mount helper we can get user name from alternate locations such as env variables and files on disk */ - rc = -EINVAL; - goto out; + return -EINVAL; } /* this is needed for ASCII cp to Unicode converts */ @@ -2978,18 +2964,32 @@ int cifs_setup_volume_info(struct smb_vol **pvolume_info, char *mount_data, if (volume_info->local_nls == NULL) { cERROR(1, "CIFS mount error: iocharset %s not found", volume_info->iocharset); - rc = -ELIBACC; - goto out; + return -ELIBACC; } } - *pvolume_info = volume_info; - return rc; -out: - cifs_cleanup_volume_info(volume_info); return rc; } +struct smb_vol * +cifs_get_volume_info(char *mount_data, const char *devname) +{ + int rc; + struct smb_vol *volume_info; + + volume_info = kzalloc(sizeof(struct smb_vol), GFP_KERNEL); + if (!volume_info) + return ERR_PTR(-ENOMEM); + + rc = cifs_setup_volume_info(volume_info, mount_data, devname); + if (rc) { + cifs_cleanup_volume_info(volume_info); + volume_info = ERR_PTR(rc); + } + + return volume_info; +} + int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info) { -- cgit v1.2.1 From 20547490c12b0ee3d32152b85e9f9bd183aa7224 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 9 Jul 2011 12:21:07 -0400 Subject: cifs: move bdi_setup_and_register outside of CONFIG_CIFS_DFS_UPCALL This needs to be done regardless of whether that KConfig option is set or not. Reported-by: Sven-Haegar Koch Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/connect.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 46cc0ad03487..511176903e57 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3002,6 +3002,7 @@ cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info) struct tcon_link *tlink; #ifdef CONFIG_CIFS_DFS_UPCALL int referral_walks_count = 0; +#endif rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs", BDI_CAP_MAP_COPY); if (rc) @@ -3009,6 +3010,7 @@ cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info) cifs_sb->bdi.ra_pages = default_backing_dev_info.ra_pages; +#ifdef CONFIG_CIFS_DFS_UPCALL try_mount_again: /* cleanup activities if we're chasing a referral */ if (referral_walks_count) { -- cgit v1.2.1 From b9bce2e9f9936cfd12fbc62ead11edcdd46dec7e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 6 Jul 2011 08:10:39 -0400 Subject: cifs: fix expand_dfs_referral Regression introduced in commit 724d9f1cfba. Prior to that, expand_dfs_referral would regenerate the mount data string and then call cifs_parse_mount_options to re-parse it (klunky, but it worked). The above commit moved cifs_parse_mount_options out of cifs_mount, so the re-parsing of the new mount options no longer occurred. Fix it by making expand_dfs_referral re-parse the mount options. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/connect.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 511176903e57..6ec366ff28b3 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -65,6 +65,8 @@ static int ip_connect(struct TCP_Server_Info *server); static int generic_ip_connect(struct TCP_Server_Info *server); static void tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink); static void cifs_prune_tlinks(struct work_struct *work); +static int cifs_setup_volume_info(struct smb_vol *volume_info, char *mount_data, + const char *devname); /* * cifs tcp session reconnection @@ -2830,12 +2832,9 @@ is_path_accessible(int xid, struct cifs_tcon *tcon, return rc; } -void -cifs_cleanup_volume_info(struct smb_vol *volume_info) +static void +cleanup_volume_info_contents(struct smb_vol *volume_info) { - if (!volume_info) - return; - kfree(volume_info->username); kzfree(volume_info->password); kfree(volume_info->UNC); @@ -2843,10 +2842,18 @@ cifs_cleanup_volume_info(struct smb_vol *volume_info) kfree(volume_info->domainname); kfree(volume_info->iocharset); kfree(volume_info->prepath); +} + +void +cifs_cleanup_volume_info(struct smb_vol *volume_info) +{ + if (!volume_info) + return; + cleanup_volume_info_contents(volume_info); kfree(volume_info); - return; } + #ifdef CONFIG_CIFS_DFS_UPCALL /* build_path_to_root returns full path to root when * we do not have an exiting connection (tcon) */ @@ -2915,15 +2922,18 @@ expand_dfs_referral(int xid, struct cifs_ses *pSesInfo, &fake_devname); free_dfs_info_array(referrals, num_referrals); - kfree(fake_devname); - - if (cifs_sb->mountdata != NULL) - kfree(cifs_sb->mountdata); if (IS_ERR(mdata)) { rc = PTR_ERR(mdata); mdata = NULL; + } else { + cleanup_volume_info_contents(volume_info); + memset(volume_info, '\0', sizeof(*volume_info)); + rc = cifs_setup_volume_info(volume_info, mdata, + fake_devname); } + kfree(fake_devname); + kfree(cifs_sb->mountdata); cifs_sb->mountdata = mdata; } kfree(full_path); -- cgit v1.2.1 From f484b5d001a972a42129570e98086a2a6d216ce0 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 11 Jul 2011 10:16:34 -0400 Subject: cifs: drop spinlock before calling cifs_put_tlink ...as that function can sleep. Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/connect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 6ec366ff28b3..dbd669cc5bc7 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2242,8 +2242,8 @@ cifs_match_super(struct super_block *sb, void *data) rc = compare_mount_options(sb, mnt_data); out: - cifs_put_tlink(tlink); spin_unlock(&cifs_tcp_ses_lock); + cifs_put_tlink(tlink); return rc; } -- cgit v1.2.1 From e5012d1f3861d18c7f3814e757c1c3ab3741dbcd Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Mon, 11 Jul 2011 17:17:42 -0400 Subject: NFSv4.1: update nfs4_fattr_bitmap_maxsz Attribute IDs assigned in RFC 5661 now require three bitmaps. Fixes hitting a BUG_ON in xdr_shrink_bufhead when getting ACLs. Signed-off-by: Andy Adamson Cc:stable@kernel.org [2.6.39] Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 6870bc61ceec..e6e8f3b9a1de 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -91,7 +91,7 @@ static int nfs4_stat_to_errno(int); #define encode_getfh_maxsz (op_encode_hdr_maxsz) #define decode_getfh_maxsz (op_decode_hdr_maxsz + 1 + \ ((3+NFS4_FHSIZE) >> 2)) -#define nfs4_fattr_bitmap_maxsz 3 +#define nfs4_fattr_bitmap_maxsz 4 #define encode_getattr_maxsz (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz) #define nfs4_name_maxsz (1 + ((3 + NFS4_MAXNAMLEN) >> 2)) #define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2)) -- cgit v1.2.1 From 1ce533686c7d40bf900dc346a7279c17a9ee8e0e Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Mon, 13 Jun 2011 14:27:40 -0500 Subject: GFS2: force a log flush when invalidating the rindex glock Right now, there is nothing that forces the log to get flushed when a node drops its rindex glock so that another node can grow the filesystem. If the log doesn't get flushed, GFS2 can corrupt the sd_log_le_rg list in the following way. A node puts an rgd on the list in rg_lo_add(), and then the rindex glock is dropped so the other node can grow the filesystem. When the node reacquires the rindex glock, that rgd gets deleted in clear_rgrpdi() before ever being removed from the list by gfs2_log_flush(). This code simply forces a log flush when the rindex glock is invalidated, solving the problem. Signed-off-by: Benjamin Marzinski Signed-off-by: Steven Whitehouse --- fs/gfs2/glops.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 8ef70f464731..712b722030a1 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -221,8 +221,10 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags) } } - if (ip == GFS2_I(gl->gl_sbd->sd_rindex)) + if (ip == GFS2_I(gl->gl_sbd->sd_rindex)) { + gfs2_log_flush(gl->gl_sbd, NULL); gl->gl_sbd->sd_rindex_uptodate = 0; + } if (ip && S_ISREG(ip->i_inode.i_mode)) truncate_inode_pages(ip->i_inode.i_mapping, 0); } -- cgit v1.2.1 From 3942ae5319640ced5844b75f44884e4bcb8a2f16 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 11 Jul 2011 08:53:30 +0100 Subject: GFS2: Fix race during filesystem mount There is a potential race during filesystem mounting which has recently been reported. It occurs when the userland gfs_controld is able to process requests fast enough that it tries to use the sysfs interface before the lock module is properly initialised. This is a pretty unusual case as normally the lock module initialisation is very quick compared with gfs_controld. This patch adds an interruptible completion which is used to ensure that userland will wait for the initialisation of the lock module to complete. There are other potential solutions to this problem, but this is the quickest at this stage and has been tested both with and without mount.gfs2 present in the system. Signed-off-by: Steven Whitehouse Reported-by: David Booher --- fs/gfs2/incore.h | 2 ++ fs/gfs2/ops_fstype.c | 3 +++ fs/gfs2/sys.c | 7 ++++++- 3 files changed, 11 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 0a064e91ac70..81206e70cbf6 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -17,6 +17,7 @@ #include #include #include +#include #define DIO_WAIT 0x00000010 #define DIO_METADATA 0x00000020 @@ -546,6 +547,7 @@ struct gfs2_sbd { struct gfs2_glock *sd_trans_gl; wait_queue_head_t sd_glock_wait; atomic_t sd_glock_disposal; + struct completion sd_locking_init; /* Inode Stuff */ diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 8ac9ae189b53..2a77071fb7b6 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -72,6 +72,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) init_waitqueue_head(&sdp->sd_glock_wait); atomic_set(&sdp->sd_glock_disposal, 0); + init_completion(&sdp->sd_locking_init); spin_lock_init(&sdp->sd_statfs_spin); spin_lock_init(&sdp->sd_rindex_spin); @@ -1017,11 +1018,13 @@ hostdata_error: fsname++; if (lm->lm_mount == NULL) { fs_info(sdp, "Now mounting FS...\n"); + complete(&sdp->sd_locking_init); return 0; } ret = lm->lm_mount(sdp, fsname); if (ret == 0) fs_info(sdp, "Joined cluster. Now mounting FS...\n"); + complete(&sdp->sd_locking_init); return ret; } diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index e20eab37bc80..443cabcfcd23 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -338,6 +338,9 @@ static ssize_t lkfirst_store(struct gfs2_sbd *sdp, const char *buf, size_t len) rv = sscanf(buf, "%u", &first); if (rv != 1 || first > 1) return -EINVAL; + rv = wait_for_completion_killable(&sdp->sd_locking_init); + if (rv) + return rv; spin_lock(&sdp->sd_jindex_spin); rv = -EBUSY; if (test_bit(SDF_NOJOURNALID, &sdp->sd_flags) == 0) @@ -414,7 +417,9 @@ static ssize_t jid_store(struct gfs2_sbd *sdp, const char *buf, size_t len) rv = sscanf(buf, "%d", &jid); if (rv != 1) return -EINVAL; - + rv = wait_for_completion_killable(&sdp->sd_locking_init); + if (rv) + return rv; spin_lock(&sdp->sd_jindex_spin); rv = -EINVAL; if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) -- cgit v1.2.1 From 62411ab2fe5f002dff27417630ddf02cc40ca404 Mon Sep 17 00:00:00 2001 From: Shirish Pargaonkar Date: Sun, 10 Jul 2011 06:55:32 -0500 Subject: cifs: Fix signing failure when server mandates signing for NTLMSSP When using NTLMSSP authentication mechanism, if server mandates signing, keep the flags in type 3 messages of the NTLMSSP exchange same as in type 1 messages (i.e. keep the indicated capabilities same). Some of the servers such as Samba, expect the flags such as Negotiate_Key_Exchange in type 3 message of NTLMSSP exchange as well. Some servers like Windows do not. https://bugzilla.samba.org/show_bug.cgi?id=8212 Signed-off-by: Shirish Pargaonkar Acked-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/sess.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 3892ab817a36..d3e619692ee0 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -428,8 +428,7 @@ static void build_ntlmssp_negotiate_blob(unsigned char *pbuffer, (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { flags |= NTLMSSP_NEGOTIATE_SIGN; if (!ses->server->session_estab) - flags |= NTLMSSP_NEGOTIATE_KEY_XCH | - NTLMSSP_NEGOTIATE_EXTENDED_SEC; + flags |= NTLMSSP_NEGOTIATE_KEY_XCH; } sec_blob->NegotiateFlags = cpu_to_le32(flags); @@ -465,10 +464,11 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer, NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE | NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC; if (ses->server->sec_mode & - (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) + (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { flags |= NTLMSSP_NEGOTIATE_SIGN; - if (ses->server->sec_mode & SECMODE_SIGN_REQUIRED) - flags |= NTLMSSP_NEGOTIATE_ALWAYS_SIGN; + if (!ses->server->session_estab) + flags |= NTLMSSP_NEGOTIATE_KEY_XCH; + } tmp = pbuffer + sizeof(AUTHENTICATE_MESSAGE); sec_blob->NegotiateFlags = cpu_to_le32(flags); -- cgit v1.2.1 From ea1be1a3c3c4b2bbc32aeb7995e18336c8060e0e Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 12 Jul 2011 18:24:43 +0000 Subject: [CIFS] update limit for snprintf in cifs_construct_tcon In 34c87901e113 "Shrink stack space usage in cifs_construct_tcon" we change the size of the username name buffer from MAX_USERNAME_SIZE (256) to 28. This call to snprintf() needs to be updated as well. Reported by Dan Carpenter. Reviewed-by: Dan Carpenter Signed-off-by: Steve French --- fs/cifs/connect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index dbd669cc5bc7..ccc1afa0bf3b 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3485,7 +3485,7 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, uid_t fsuid) goto out; } - snprintf(username, MAX_USERNAME_SIZE, "krb50x%x", fsuid); + snprintf(username, sizeof(username), "krb50x%x", fsuid); vol_info->username = username; vol_info->local_nls = cifs_sb->local_nls; vol_info->linux_uid = fsuid; -- cgit v1.2.1 From c2ec9471b5b1307429aef1cfaa2b3ae453a61d6f Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 12 Jul 2011 19:15:02 +0000 Subject: [CIFS] update cifs to version 1.74 Signed-off-by: Steve French --- fs/cifs/cifsfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 0900e1658c96..036ca83e5f46 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -129,5 +129,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern const struct export_operations cifs_export_ops; #endif /* CIFS_NFSD_EXPORT */ -#define CIFS_VERSION "1.73" +#define CIFS_VERSION "1.74" #endif /* _CIFSFS_H */ -- cgit v1.2.1 From 94c0d4ecbe7f9fe56e052b26b2ab484e246c07b4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 12 Jul 2011 21:40:23 -0400 Subject: Fix ->d_lock locking order in unlazy_walk() Make sure that child is still a child of parent before nested locking of child->d_lock in unlazy_walk(); otherwise we are risking a violation of locking order and deadlocks. Signed-off-by: Al Viro --- fs/namei.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 0223c41fb114..5c867dd1c0b3 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -433,6 +433,8 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry) goto err_parent; BUG_ON(nd->inode != parent->d_inode); } else { + if (dentry->d_parent != parent) + goto err_parent; spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); if (!__d_rcu_to_refcount(dentry, nd->seq)) goto err_child; -- cgit v1.2.1 From 380f7c65a7eb3288e4b6812acf3474a1de230707 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 14 Jul 2011 08:59:44 +0100 Subject: GFS2: Resolve inode eviction and ail list interaction bug This patch contains a few misc fixes which resolve a recently reported issue. This patch has been a real team effort and has received a lot of testing. The first issue is that the ail lock needs to be held over a few more operations. The lock thats added into gfs2_releasepage() may possibly be a candidate for replacing with RCU at some future point, but at this stage we've gone for the obvious fix. The second issue is that gfs2_write_inode() can end up calling a glock recursively when called from gfs2_evict_inode() via the syncing code, so it needs a guard added. The third issue is that we either need to not truncate the metadata pages of inodes which have zero link count, but which we cannot deallocate due to them still being in use by other nodes, or we need to ensure that those pages have all made it through the journal and ail lists first. This patch takes the former approach, but the latter has also been tested and there is nothing to choose between them performance-wise. So again, we could revise that decision in the future. Also, the inode eviction process is now better documented. Signed-off-by: Steven Whitehouse Tested-by: Bob Peterson Tested-by: Abhijith Das Reported-by: Barry J. Marson Reported-by: David Teigland --- fs/gfs2/aops.c | 3 +++ fs/gfs2/glops.c | 4 ++-- fs/gfs2/log.c | 1 + fs/gfs2/super.c | 36 ++++++++++++++++++++++++++++++------ 4 files changed, 36 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 802ac5eeba28..f9fbbe96c222 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -1069,6 +1069,7 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask) return 0; gfs2_log_lock(sdp); + spin_lock(&sdp->sd_ail_lock); head = bh = page_buffers(page); do { if (atomic_read(&bh->b_count)) @@ -1080,6 +1081,7 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask) goto not_possible; bh = bh->b_this_page; } while(bh != head); + spin_unlock(&sdp->sd_ail_lock); gfs2_log_unlock(sdp); head = bh = page_buffers(page); @@ -1112,6 +1114,7 @@ not_possible: /* Should never happen */ WARN_ON(buffer_dirty(bh)); WARN_ON(buffer_pinned(bh)); cannot_release: + spin_unlock(&sdp->sd_ail_lock); gfs2_log_unlock(sdp); return 0; } diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 712b722030a1..2cca29316bd6 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -47,10 +47,10 @@ static void __gfs2_ail_flush(struct gfs2_glock *gl) bd_ail_gl_list); bh = bd->bd_bh; gfs2_remove_from_ail(bd); - spin_unlock(&sdp->sd_ail_lock); - bd->bd_bh = NULL; bh->b_private = NULL; + spin_unlock(&sdp->sd_ail_lock); + bd->bd_blkno = bh->b_blocknr; gfs2_log_lock(sdp); gfs2_assert_withdraw(sdp, !buffer_busy(bh)); diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 903115f2bb34..85c62923ee29 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -903,6 +903,7 @@ void gfs2_meta_syncfs(struct gfs2_sbd *sdp) if (gfs2_ail1_empty(sdp)) break; } + gfs2_log_flush(sdp, NULL); } static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index ed540e7018be..fb0edf735483 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -757,13 +757,17 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc) struct timespec atime; struct gfs2_dinode *di; int ret = -EAGAIN; + int unlock_required = 0; /* Skip timestamp update, if this is from a memalloc */ if (current->flags & PF_MEMALLOC) goto do_flush; - ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); - if (ret) - goto do_flush; + if (!gfs2_glock_is_locked_by_me(ip->i_gl)) { + ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); + if (ret) + goto do_flush; + unlock_required = 1; + } ret = gfs2_trans_begin(sdp, RES_DINODE, 0); if (ret) goto do_unlock; @@ -780,7 +784,8 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc) } gfs2_trans_end(sdp); do_unlock: - gfs2_glock_dq_uninit(&gh); + if (unlock_required) + gfs2_glock_dq_uninit(&gh); do_flush: if (wbc->sync_mode == WB_SYNC_ALL) gfs2_log_flush(GFS2_SB(inode), ip->i_gl); @@ -1427,7 +1432,20 @@ out: return error; } -/* +/** + * gfs2_evict_inode - Remove an inode from cache + * @inode: The inode to evict + * + * There are three cases to consider: + * 1. i_nlink == 0, we are final opener (and must deallocate) + * 2. i_nlink == 0, we are not the final opener (and cannot deallocate) + * 3. i_nlink > 0 + * + * If the fs is read only, then we have to treat all cases as per #3 + * since we are unable to do any deallocation. The inode will be + * deallocated by the next read/write node to attempt an allocation + * in the same resource group + * * We have to (at the moment) hold the inodes main lock to cover * the gap between unlocking the shared lock on the iopen lock and * taking the exclusive lock. I'd rather do a shared -> exclusive @@ -1470,6 +1488,8 @@ static void gfs2_evict_inode(struct inode *inode) if (error) goto out_truncate; + /* Case 1 starts here */ + if (S_ISDIR(inode->i_mode) && (ip->i_diskflags & GFS2_DIF_EXHASH)) { error = gfs2_dir_exhash_dealloc(ip); @@ -1493,13 +1513,16 @@ static void gfs2_evict_inode(struct inode *inode) goto out_unlock; out_truncate: + /* Case 2 starts here */ error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); if (error) goto out_unlock; - gfs2_final_release_pages(ip); + /* Needs to be done before glock release & also in a transaction */ + truncate_inode_pages(&inode->i_data, 0); gfs2_trans_end(sdp); out_unlock: + /* Error path for case 1 */ if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) gfs2_glock_dq(&ip->i_iopen_gh); gfs2_holder_uninit(&ip->i_iopen_gh); @@ -1507,6 +1530,7 @@ out_unlock: if (error && error != GLR_TRYFAILED && error != -EROFS) fs_warn(sdp, "gfs2_evict_inode: %d\n", error); out: + /* Case 3 starts here */ truncate_inode_pages(&inode->i_data, 0); end_writeback(inode); -- cgit v1.2.1 From 1836750115f20b774e55c032a3893e8c5bdf41ed Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 12 Jul 2011 21:42:24 -0400 Subject: fix loop checks in d_materialise_unique() Both __d_unalias() and __d_materialise_dentry() need loop prevention. Grab rename_lock in caller, check for loops there... As a side benefit, we have dentry_lock_for_move() called only under rename_lock, which seriously reduces deadlock potential of the execrable "locking order" used for ->d_lock. Signed-off-by: Al Viro --- fs/dcache.c | 51 ++++++++++++++++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/dcache.c b/fs/dcache.c index 37f72ee5bf7c..6e4ea6d87774 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2213,14 +2213,15 @@ static void dentry_unlock_parents_for_move(struct dentry *dentry, * The hash value has to match the hash queue that the dentry is on.. */ /* - * d_move - move a dentry + * __d_move - move a dentry * @dentry: entry to move * @target: new dentry * * Update the dcache to reflect the move of a file name. Negative - * dcache entries should not be moved in this way. + * dcache entries should not be moved in this way. Caller hold + * rename_lock. */ -void d_move(struct dentry * dentry, struct dentry * target) +static void __d_move(struct dentry * dentry, struct dentry * target) { if (!dentry->d_inode) printk(KERN_WARNING "VFS: moving negative dcache entry\n"); @@ -2228,8 +2229,6 @@ void d_move(struct dentry * dentry, struct dentry * target) BUG_ON(d_ancestor(dentry, target)); BUG_ON(d_ancestor(target, dentry)); - write_seqlock(&rename_lock); - dentry_lock_for_move(dentry, target); write_seqcount_begin(&dentry->d_seq); @@ -2275,6 +2274,20 @@ void d_move(struct dentry * dentry, struct dentry * target) spin_unlock(&target->d_lock); fsnotify_d_move(dentry); spin_unlock(&dentry->d_lock); +} + +/* + * d_move - move a dentry + * @dentry: entry to move + * @target: new dentry + * + * Update the dcache to reflect the move of a file name. Negative + * dcache entries should not be moved in this way. + */ +void d_move(struct dentry *dentry, struct dentry *target) +{ + write_seqlock(&rename_lock); + __d_move(dentry, target); write_sequnlock(&rename_lock); } EXPORT_SYMBOL(d_move); @@ -2302,7 +2315,7 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2) * This helper attempts to cope with remotely renamed directories * * It assumes that the caller is already holding - * dentry->d_parent->d_inode->i_mutex and the inode->i_lock + * dentry->d_parent->d_inode->i_mutex, inode->i_lock and rename_lock * * Note: If ever the locking in lock_rename() changes, then please * remember to update this too... @@ -2317,11 +2330,6 @@ static struct dentry *__d_unalias(struct inode *inode, if (alias->d_parent == dentry->d_parent) goto out_unalias; - /* Check for loops */ - ret = ERR_PTR(-ELOOP); - if (d_ancestor(alias, dentry)) - goto out_err; - /* See lock_rename() */ ret = ERR_PTR(-EBUSY); if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex)) @@ -2331,7 +2339,7 @@ static struct dentry *__d_unalias(struct inode *inode, goto out_err; m2 = &alias->d_parent->d_inode->i_mutex; out_unalias: - d_move(alias, dentry); + __d_move(alias, dentry); ret = alias; out_err: spin_unlock(&inode->i_lock); @@ -2416,15 +2424,24 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) alias = __d_find_alias(inode, 0); if (alias) { actual = alias; - /* Is this an anonymous mountpoint that we could splice - * into our tree? */ - if (IS_ROOT(alias)) { + write_seqlock(&rename_lock); + + if (d_ancestor(alias, dentry)) { + /* Check for loops */ + actual = ERR_PTR(-ELOOP); + } else if (IS_ROOT(alias)) { + /* Is this an anonymous mountpoint that we + * could splice into our tree? */ __d_materialise_dentry(dentry, alias); + write_sequnlock(&rename_lock); __d_drop(alias); goto found; + } else { + /* Nope, but we must(!) avoid directory + * aliasing */ + actual = __d_unalias(inode, dentry, alias); } - /* Nope, but we must(!) avoid directory aliasing */ - actual = __d_unalias(inode, dentry, alias); + write_sequnlock(&rename_lock); if (IS_ERR(actual)) dput(alias); goto out_nolock; -- cgit v1.2.1 From dc137bf553dbb6855bd7efc34fedcd03102455f7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 16 Jul 2011 23:37:20 -0400 Subject: cifs: build_path_from_dentry() race fix deal with d_move() races properly; rename_lock read-retry loop, rcu_read_lock() held while walking to root, d_lock held over subtraction from namelen and copying the component to stabilize ->d_name. Signed-off-by: Al Viro --- fs/cifs/dir.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 81914df47ef1..fa8c21d913bc 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -55,6 +55,7 @@ build_path_from_dentry(struct dentry *direntry) char dirsep; struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb); struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); + unsigned seq; if (direntry == NULL) return NULL; /* not much we can do if dentry is freed and @@ -68,22 +69,29 @@ build_path_from_dentry(struct dentry *direntry) dfsplen = 0; cifs_bp_rename_retry: namelen = dfsplen; + seq = read_seqbegin(&rename_lock); + rcu_read_lock(); for (temp = direntry; !IS_ROOT(temp);) { namelen += (1 + temp->d_name.len); temp = temp->d_parent; if (temp == NULL) { cERROR(1, "corrupt dentry"); + rcu_read_unlock(); return NULL; } } + rcu_read_unlock(); full_path = kmalloc(namelen+1, GFP_KERNEL); if (full_path == NULL) return full_path; full_path[namelen] = 0; /* trailing null */ + rcu_read_lock(); for (temp = direntry; !IS_ROOT(temp);) { + spin_lock(&temp->d_lock); namelen -= 1 + temp->d_name.len; if (namelen < 0) { + spin_unlock(&temp->d_lock); break; } else { full_path[namelen] = dirsep; @@ -91,14 +99,17 @@ cifs_bp_rename_retry: temp->d_name.len); cFYI(0, "name: %s", full_path + namelen); } + spin_unlock(&temp->d_lock); temp = temp->d_parent; if (temp == NULL) { cERROR(1, "corrupt dentry"); + rcu_read_unlock(); kfree(full_path); return NULL; } } - if (namelen != dfsplen) { + rcu_read_unlock(); + if (namelen != dfsplen || read_seqretry(&rename_lock, seq)) { cERROR(1, "did not end path lookup where expected namelen is %d", namelen); /* presumably this is only possible if racing with a rename -- cgit v1.2.1 From 1b71fe2efa31cd18c865db474a4cd473b6ab5281 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 16 Jul 2011 23:43:58 -0400 Subject: ceph analog of cifs build_path_from_dentry() race fix ... unfortunately, cifs bug got copied. Fix is essentially the same. Signed-off-by: Al Viro --- fs/ceph/mds_client.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 79743d146be6..0c1d91756528 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1438,12 +1438,15 @@ char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base, struct dentry *temp; char *path; int len, pos; + unsigned seq; if (dentry == NULL) return ERR_PTR(-EINVAL); retry: len = 0; + seq = read_seqbegin(&rename_lock); + rcu_read_lock(); for (temp = dentry; !IS_ROOT(temp);) { struct inode *inode = temp->d_inode; if (inode && ceph_snap(inode) == CEPH_SNAPDIR) @@ -1455,10 +1458,12 @@ retry: len += 1 + temp->d_name.len; temp = temp->d_parent; if (temp == NULL) { + rcu_read_unlock(); pr_err("build_path corrupt dentry %p\n", dentry); return ERR_PTR(-EINVAL); } } + rcu_read_unlock(); if (len) len--; /* no leading '/' */ @@ -1467,9 +1472,12 @@ retry: return ERR_PTR(-ENOMEM); pos = len; path[pos] = 0; /* trailing null */ + rcu_read_lock(); for (temp = dentry; !IS_ROOT(temp) && pos != 0; ) { - struct inode *inode = temp->d_inode; + struct inode *inode; + spin_lock(&temp->d_lock); + inode = temp->d_inode; if (inode && ceph_snap(inode) == CEPH_SNAPDIR) { dout("build_path path+%d: %p SNAPDIR\n", pos, temp); @@ -1478,21 +1486,26 @@ retry: break; } else { pos -= temp->d_name.len; - if (pos < 0) + if (pos < 0) { + spin_unlock(&temp->d_lock); break; + } strncpy(path + pos, temp->d_name.name, temp->d_name.len); } + spin_unlock(&temp->d_lock); if (pos) path[--pos] = '/'; temp = temp->d_parent; if (temp == NULL) { + rcu_read_unlock(); pr_err("build_path corrupt dentry\n"); kfree(path); return ERR_PTR(-EINVAL); } } - if (pos != 0) { + rcu_read_unlock(); + if (pos != 0 || read_seqretry(&rename_lock, seq)) { pr_err("build_path did not end path lookup where " "expected, namelen is %d, pos is %d\n", len, pos); /* presumably this is only possible if racing with a -- cgit v1.2.1 From a803b8067e317832d6a251c5b0486e36a4f81922 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 8 Jul 2011 20:56:55 -0400 Subject: fix exofs ->get_parent() NULL is not a possible return value for that method, TYVM... Signed-off-by: Al Viro --- fs/exofs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 06065bd37fc3..c57beddcc217 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -913,7 +913,7 @@ struct dentry *exofs_get_parent(struct dentry *child) unsigned long ino = exofs_parent_ino(child); if (!ino) - return NULL; + return ERR_PTR(-ESTALE); return d_obtain_alias(exofs_iget(child->d_inode->i_sb, ino)); } -- cgit v1.2.1 From 642c937b4ed2e51d2f2e4c46ab7cd8b5bddf268b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 17 Jul 2011 10:07:34 -0400 Subject: ufs should use d_splice_alias() it's NFS-exportable, so... Signed-off-by: Al Viro --- fs/ufs/namei.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index 29309e25417f..b57aab9a1184 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -56,16 +56,12 @@ static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, stru lock_ufs(dir->i_sb); ino = ufs_inode_by_name(dir, &dentry->d_name); - if (ino) { + if (ino) inode = ufs_iget(dir->i_sb, ino); - if (IS_ERR(inode)) { - unlock_ufs(dir->i_sb); - return ERR_CAST(inode); - } - } unlock_ufs(dir->i_sb); - d_add(dentry, inode); - return NULL; + if (IS_ERR(inode)) + return ERR_CAST(inode); + return d_splice_alias(inode, dentry); } /* -- cgit v1.2.1 From 0577d1ba411f9c40693b8b3e4aa7e0892cd03091 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 17 Jul 2011 19:04:14 -0400 Subject: cramfs: get_cramfs_inode() returns ERR_PTR() on failure ... and we want to report these failures in ->lookup() anyway. Signed-off-by: Al Viro --- fs/cramfs/inode.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index e141939080f0..739fb59bcdc2 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -37,7 +37,7 @@ static DEFINE_MUTEX(read_mutex); /* These macros may change in future, to provide better st_ino semantics. */ #define OFFSET(x) ((x)->i_ino) -static unsigned long cramino(struct cramfs_inode *cino, unsigned int offset) +static unsigned long cramino(const struct cramfs_inode *cino, unsigned int offset) { if (!cino->offset) return offset + 1; @@ -61,7 +61,7 @@ static unsigned long cramino(struct cramfs_inode *cino, unsigned int offset) } static struct inode *get_cramfs_inode(struct super_block *sb, - struct cramfs_inode *cramfs_inode, unsigned int offset) + const struct cramfs_inode *cramfs_inode, unsigned int offset) { struct inode *inode; static struct timespec zerotime; @@ -317,7 +317,7 @@ static int cramfs_fill_super(struct super_block *sb, void *data, int silent) /* Set it all up.. */ sb->s_op = &cramfs_ops; root = get_cramfs_inode(sb, &super.root, 0); - if (!root) + if (IS_ERR(root)) goto out; sb->s_root = d_alloc_root(root); if (!sb->s_root) { @@ -423,6 +423,7 @@ static int cramfs_readdir(struct file *filp, void *dirent, filldir_t filldir) static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { unsigned int offset = 0; + struct inode *inode = NULL; int sorted; mutex_lock(&read_mutex); @@ -449,8 +450,8 @@ static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry, s for (;;) { if (!namelen) { - mutex_unlock(&read_mutex); - return ERR_PTR(-EIO); + inode = ERR_PTR(-EIO); + goto out; } if (name[namelen-1]) break; @@ -462,17 +463,18 @@ static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry, s if (retval > 0) continue; if (!retval) { - struct cramfs_inode entry = *de; - mutex_unlock(&read_mutex); - d_add(dentry, get_cramfs_inode(dir->i_sb, &entry, dir_off)); - return NULL; + inode = get_cramfs_inode(dir->i_sb, de, dir_off); + break; } /* else (retval < 0) */ if (sorted) break; } +out: mutex_unlock(&read_mutex); - d_add(dentry, NULL); + if (IS_ERR(inode)) + return ERR_CAST(inode); + d_add(dentry, inode); return NULL; } -- cgit v1.2.1 From 3cc0658e35124ace881f6942839dcae877c3eaed Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 17 Jul 2011 22:24:15 -0400 Subject: hppfs: fix dentry leak Signed-off-by: Al Viro --- fs/hppfs/hppfs.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index 87ed48e0343d..7d6a0e92bcf0 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c @@ -174,13 +174,11 @@ static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry, err = -ENOMEM; inode = get_inode(ino->i_sb, proc_dentry); if (!inode) - goto out_dput; + goto out; d_add(dentry, inode); return NULL; - out_dput: - dput(proc_dentry); out: return ERR_PTR(err); } @@ -690,8 +688,10 @@ static struct inode *get_inode(struct super_block *sb, struct dentry *dentry) struct inode *proc_ino = dentry->d_inode; struct inode *inode = new_inode(sb); - if (!inode) + if (!inode) { + dput(dentry); return ERR_PTR(-ENOMEM); + } if (S_ISDIR(dentry->d_inode->i_mode)) { inode->i_op = &hppfs_dir_iops; @@ -704,7 +704,7 @@ static struct inode *get_inode(struct super_block *sb, struct dentry *dentry) inode->i_fop = &hppfs_file_fops; } - HPPFS_I(inode)->proc_dentry = dget(dentry); + HPPFS_I(inode)->proc_dentry = dentry; inode->i_uid = proc_ino->i_uid; inode->i_gid = proc_ino->i_gid; @@ -737,7 +737,7 @@ static int hppfs_fill_super(struct super_block *sb, void *d, int silent) sb->s_fs_info = proc_mnt; err = -ENOMEM; - root_inode = get_inode(sb, proc_mnt->mnt_sb->s_root); + root_inode = get_inode(sb, dget(proc_mnt->mnt_sb->s_root)); if (!root_inode) goto out_mntput; -- cgit v1.2.1 From 0916a5e45fbd2604a303c8cc18e6b2b7c815e4c9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 17 Jul 2011 22:27:22 -0400 Subject: hppfs_lookup(): don't open-code lookup_one_len() ... and it's getting it wrong, too - missing ->d_revalidate() calls when it's dealing with filesystem (procfs) that has non-trivial ->d_revalidate()... Signed-off-by: Al Viro --- fs/hppfs/hppfs.c | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) (limited to 'fs') diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index 7d6a0e92bcf0..85c098a499f3 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c @@ -139,7 +139,8 @@ static int file_removed(struct dentry *dentry, const char *file) static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry, struct nameidata *nd) { - struct dentry *proc_dentry, *new, *parent; + struct dentry *proc_dentry, *parent; + struct qstr *name = &dentry->d_name; struct inode *inode; int err, deleted; @@ -149,23 +150,9 @@ static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry, else if (deleted) return ERR_PTR(-ENOENT); - err = -ENOMEM; parent = HPPFS_I(ino)->proc_dentry; mutex_lock(&parent->d_inode->i_mutex); - proc_dentry = d_lookup(parent, &dentry->d_name); - if (proc_dentry == NULL) { - proc_dentry = d_alloc(parent, &dentry->d_name); - if (proc_dentry == NULL) { - mutex_unlock(&parent->d_inode->i_mutex); - goto out; - } - new = (*parent->d_inode->i_op->lookup)(parent->d_inode, - proc_dentry, NULL); - if (new) { - dput(proc_dentry); - proc_dentry = new; - } - } + proc_dentry = lookup_one_len(name->name, parent, name->len); mutex_unlock(&parent->d_inode->i_mutex); if (IS_ERR(proc_dentry)) -- cgit v1.2.1 From fec11dd9a0109fe52fd631e5c510778d6cbff6cc Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 18 Jul 2011 13:50:40 -0400 Subject: Fix cifs_get_root() Add missing ->i_mutex, convert to lookup_one_len() instead of (broken) open-coded analog, cope with getting something like a//b as relative pathname. Simplify the hell out of it, while we are there... Signed-off-by: Al Viro Reviewed-by: Jeff Layton --- fs/cifs/cifsfs.c | 100 ++++++++++++++++--------------------------------------- 1 file changed, 29 insertions(+), 71 deletions(-) (limited to 'fs') diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 3e2989976297..bc4b12ca537b 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include "cifsfs.h" #include "cifspdu.h" @@ -542,14 +543,12 @@ static const struct super_operations cifs_super_ops = { static struct dentry * cifs_get_root(struct smb_vol *vol, struct super_block *sb) { - int xid, rc; - struct inode *inode; - struct qstr name; - struct dentry *dparent = NULL, *dchild = NULL, *alias; + struct dentry *dentry; struct cifs_sb_info *cifs_sb = CIFS_SB(sb); - unsigned int i, full_len, len; - char *full_path = NULL, *pstart; + char *full_path = NULL; + char *s, *p; char sep; + int xid; full_path = cifs_build_path_to_root(vol, cifs_sb, cifs_sb_master_tcon(cifs_sb)); @@ -560,73 +559,32 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb) xid = GetXid(); sep = CIFS_DIR_SEP(cifs_sb); - dparent = dget(sb->s_root); - full_len = strlen(full_path); - full_path[full_len] = sep; - pstart = full_path + 1; - - for (i = 1, len = 0; i <= full_len; i++) { - if (full_path[i] != sep || !len) { - len++; - continue; - } - - full_path[i] = 0; - cFYI(1, "get dentry for %s", pstart); - - name.name = pstart; - name.len = len; - name.hash = full_name_hash(pstart, len); - dchild = d_lookup(dparent, &name); - if (dchild == NULL) { - cFYI(1, "not exists"); - dchild = d_alloc(dparent, &name); - if (dchild == NULL) { - dput(dparent); - dparent = ERR_PTR(-ENOMEM); - goto out; - } - } - - cFYI(1, "get inode"); - if (dchild->d_inode == NULL) { - cFYI(1, "not exists"); - inode = NULL; - if (cifs_sb_master_tcon(CIFS_SB(sb))->unix_ext) - rc = cifs_get_inode_info_unix(&inode, full_path, - sb, xid); - else - rc = cifs_get_inode_info(&inode, full_path, - NULL, sb, xid, NULL); - if (rc) { - dput(dchild); - dput(dparent); - dparent = ERR_PTR(rc); - goto out; - } - alias = d_materialise_unique(dchild, inode); - if (alias != NULL) { - dput(dchild); - if (IS_ERR(alias)) { - dput(dparent); - dparent = ERR_PTR(-EINVAL); /* XXX */ - goto out; - } - dchild = alias; - } - } - cFYI(1, "parent %p, child %p", dparent, dchild); - - dput(dparent); - dparent = dchild; - len = 0; - pstart = full_path + i + 1; - full_path[i] = sep; - } -out: + dentry = dget(sb->s_root); + p = s = full_path; + + do { + struct inode *dir = dentry->d_inode; + struct dentry *child; + + /* skip separators */ + while (*s == sep) + s++; + if (!*s) + break; + p = s++; + /* next separator */ + while (*s && *s != sep) + s++; + + mutex_lock(&dir->i_mutex); + child = lookup_one_len(p, dentry, s - p); + mutex_unlock(&dir->i_mutex); + dput(dentry); + dentry = child; + } while (!IS_ERR(dentry)); _FreeXid(xid); kfree(full_path); - return dparent; + return dentry; } static int cifs_set_super(struct super_block *sb, void *data) -- cgit v1.2.1 From 59430262401bec02d415179c43dbe5b8819c09ce Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 18 Jul 2011 15:43:29 -0700 Subject: vfs: fix race in rcu lookup of pruned dentry Don't update *inode in __follow_mount_rcu() until we'd verified that there is mountpoint there. Kudos to Hugh Dickins for catching that one in the first place and eventually figuring out the solution (and catching a braino in the earlier version of patch). Signed-off-by: Linus Torvalds Signed-off-by: Al Viro --- fs/namei.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index 5c867dd1c0b3..14ab8d3f2f0c 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -942,7 +942,6 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, * Don't forget we might have a non-mountpoint managed dentry * that wants to block transit. */ - *inode = path->dentry->d_inode; if (unlikely(managed_dentry_might_block(path->dentry))) return false; @@ -955,6 +954,12 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, path->mnt = mounted; path->dentry = mounted->mnt_root; nd->seq = read_seqcount_begin(&path->dentry->d_seq); + /* + * Update the inode too. We don't need to re-check the + * dentry sequence number here after this d_inode read, + * because a mount-point is always pinned. + */ + *inode = path->dentry->d_inode; } return true; } -- cgit v1.2.1 From f7b88631a89757d70192044c9d9f2e8d2fc02f2c Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Tue, 19 Jul 2011 08:49:25 -0700 Subject: fs/libfs.c: fix simple_attr_write() on 32bit machines Assume that /sys/kernel/debug/dummy64 is debugfs file created by debugfs_create_x64(). # cd /sys/kernel/debug # echo 0x1234567812345678 > dummy64 # cat dummy64 0x0000000012345678 # echo 0x80000000 > dummy64 # cat dummy64 0xffffffff80000000 A value larger than INT_MAX cannot be written to the debugfs file created by debugfs_create_u64 or debugfs_create_x64 on 32bit machine. Because simple_attr_write() uses simple_strtol() for the conversion. To fix this, use simple_strtoll() instead. Signed-off-by: Akinobu Mita Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/libfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/libfs.c b/fs/libfs.c index c88eab55aec9..275ca4749a2e 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -822,7 +822,7 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf, goto out; attr->set_buf[size] = '\0'; - val = simple_strtol(attr->set_buf, NULL, 0); + val = simple_strtoll(attr->set_buf, NULL, 0); ret = attr->set(attr->data, val); if (ret == 0) ret = len; /* on success, claim we got the whole input */ -- cgit v1.2.1 From b307d4655a71749ac3f91c6dbe33d28cc026ceeb Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 21 Jul 2011 15:02:43 +0100 Subject: FS-Cache: Fix __fscache_uncache_all_inode_pages()'s outer loop The compiler, at least for ix86 and m68k, validly warns that the comparison: next <= (loff_t)-1 is always true (and it's always true also for x86-64 and probably all other arches - as long as pgoff_t isn't wider than loff_t). The intention appears to be to avoid wrapping of "next", so rather than eliminating the pointless comparison, fix the loop to indeed get exited when "next" would otherwise wrap. On m68k the following warning is observed: fs/fscache/page.c: In function '__fscache_uncache_all_inode_pages': fs/fscache/page.c:979: warning: comparison is always false due to limited range of data type Reported-by: Geert Uytterhoeven Reported-by: Jan Beulich Signed-off-by: Jan Beulich Signed-off-by: David Howells Cc: Suresh Jayaraman Cc: Geert Uytterhoeven Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- fs/fscache/page.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 2f343b4d7a7d..3f7a59bfa7ad 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -976,16 +976,12 @@ void __fscache_uncache_all_inode_pages(struct fscache_cookie *cookie, pagevec_init(&pvec, 0); next = 0; - while (next <= (loff_t)-1 && - pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE) - ) { + do { + if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) + break; for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; - pgoff_t page_index = page->index; - - ASSERTCMP(page_index, >=, next); - next = page_index + 1; - + next = page->index; if (PageFsCache(page)) { __fscache_wait_on_page_write(cookie, page); __fscache_uncache_page(cookie, page); @@ -993,7 +989,7 @@ void __fscache_uncache_all_inode_pages(struct fscache_cookie *cookie, } pagevec_release(&pvec); cond_resched(); - } + } while (++next); _leave(""); } -- cgit v1.2.1 From b91da88fed84843313a1b6fd1b1c834a24bbcf9e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 21 Jul 2011 11:01:42 -0700 Subject: vfs: drop conditional inode prefetch in __do_lookup_rcu It seems to hurt performance in real life. Yes, the inode will be used later, but the conditional doesn't seem to predict all that well (negative dentries are not uncommon) and it looks like the cost of prefetching is simply higher than depending on the cache doing the right thing. As usual. Signed-off-by: Linus Torvalds --- fs/dcache.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs') diff --git a/fs/dcache.c b/fs/dcache.c index 6e4ea6d87774..fbdcbca40725 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1813,8 +1813,6 @@ seqretry: tname = dentry->d_name.name; i = dentry->d_inode; prefetch(tname); - if (i) - prefetch(i); /* * This seqcount check is required to ensure name and * len are loaded atomically, so as not to walk off the -- cgit v1.2.1