From 60545d0d4610b02e55f65d141c95b18ccf855b6e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 7 Jun 2013 01:20:27 -0400 Subject: [O_TMPFILE] it's still short a few helpers, but infrastructure should be OK now... Signed-off-by: Al Viro --- include/linux/dcache.h | 2 ++ include/linux/fs.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 1a6bb81f0fe5..86da7595ba31 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -246,6 +246,8 @@ extern struct dentry * d_make_root(struct inode *); /* - the ramfs-type tree */ extern void d_genocide(struct dentry *); +extern void d_tmpfile(struct dentry *, struct inode *); + extern struct dentry *d_find_alias(struct inode *); extern void d_prune_aliases(struct inode *); diff --git a/include/linux/fs.h b/include/linux/fs.h index 7c30e3a62baf..dd6615f0fd13 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1580,6 +1580,7 @@ struct inode_operations { int (*atomic_open)(struct inode *, struct dentry *, struct file *, unsigned open_flag, umode_t create_mode, int *opened); + int (*tmpfile) (struct inode *, struct dentry *, umode_t); } ____cacheline_aligned; ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, -- cgit v1.2.1 From f4e0c30c191f87851c4a53454abb55ee276f4a7e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 11 Jun 2013 08:34:36 +0400 Subject: allow the temp files created by open() to be linked to O_TMPFILE | O_CREAT => linkat() with AT_SYMLINK_FOLLOW and /proc/self/fd/ as oldpath (i.e. flink()) will create a link O_TMPFILE | O_CREAT | O_EXCL => ENOENT on attempt to link those guys Signed-off-by: Al Viro --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index dd6615f0fd13..ab11c44b0697 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1744,6 +1744,7 @@ struct super_operations { #define I_REFERENCED (1 << 8) #define __I_DIO_WAKEUP 9 #define I_DIO_WAKEUP (1 << I_DIO_WAKEUP) +#define I_LINKABLE (1 << 10) #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) -- cgit v1.2.1 From c77cecee52e9b599da1f8ffd9170d4374c99a345 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 13 Jun 2013 23:37:49 +0100 Subject: Replace a bunch of file->dentry->d_inode refs with file_inode() Replace a bunch of file->dentry->d_inode refs with file_inode(). In __fput(), use file->f_inode instead so as not to be affected by any tricks that file_inode() might grow. Signed-off-by: David Howells Signed-off-by: Al Viro --- include/linux/fsnotify.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index a78680a92dba..1c804b057fb1 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -38,7 +38,7 @@ static inline int fsnotify_parent(struct path *path, struct dentry *dentry, __u3 static inline int fsnotify_perm(struct file *file, int mask) { struct path *path = &file->f_path; - struct inode *inode = path->dentry->d_inode; + struct inode *inode = file_inode(file); __u32 fsnotify_mask = 0; int ret; @@ -192,7 +192,7 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry) static inline void fsnotify_access(struct file *file) { struct path *path = &file->f_path; - struct inode *inode = path->dentry->d_inode; + struct inode *inode = file_inode(file); __u32 mask = FS_ACCESS; if (S_ISDIR(inode->i_mode)) @@ -210,7 +210,7 @@ static inline void fsnotify_access(struct file *file) static inline void fsnotify_modify(struct file *file) { struct path *path = &file->f_path; - struct inode *inode = path->dentry->d_inode; + struct inode *inode = file_inode(file); __u32 mask = FS_MODIFY; if (S_ISDIR(inode->i_mode)) @@ -228,7 +228,7 @@ static inline void fsnotify_modify(struct file *file) static inline void fsnotify_open(struct file *file) { struct path *path = &file->f_path; - struct inode *inode = path->dentry->d_inode; + struct inode *inode = file_inode(file); __u32 mask = FS_OPEN; if (S_ISDIR(inode->i_mode)) -- cgit v1.2.1 From 0b3fca1fd1499f0f5a7486d494f96538f2b7e5b9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 15 Jun 2013 11:37:47 +0400 Subject: kill find_inode_number() the only remaining caller (in ncpfs) is guaranteed to return 0 - we only hit it if we'd just checked that there's no dentry with such name. Signed-off-by: Al Viro --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index ab11c44b0697..1db01c13ddce 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2311,7 +2311,6 @@ extern struct file * open_exec(const char *); /* fs/dcache.c -- generic fs support functions */ extern int is_subdir(struct dentry *, struct dentry *); extern int path_is_under(struct path *, struct path *); -extern ino_t find_inode_number(struct dentry *, struct qstr *); #include -- cgit v1.2.1 From 1bf9d14dff4a2c4de6152c6f751bdaf6896b68bb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 16 Jun 2013 20:27:42 +0400 Subject: new helper: fixed_size_llseek() Signed-off-by: Al Viro --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 1db01c13ddce..803b7fa2520a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2428,6 +2428,8 @@ extern loff_t no_llseek(struct file *file, loff_t offset, int whence); extern loff_t generic_file_llseek(struct file *file, loff_t offset, int whence); extern loff_t generic_file_llseek_size(struct file *file, loff_t offset, int whence, loff_t maxsize, loff_t eof); +extern loff_t fixed_size_llseek(struct file *file, loff_t offset, + int whence, loff_t size); extern int generic_file_open(struct inode * inode, struct file * filp); extern int nonseekable_open(struct inode * inode, struct file * filp); -- cgit v1.2.1 From 68d70d03f8f5bd10a0e7337210b13f536fd4aeb9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 19 Jun 2013 15:26:04 +0400 Subject: constify rw_verify_area() Signed-off-by: Al Viro --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 803b7fa2520a..68f10204ab29 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1898,7 +1898,6 @@ extern int current_umask(void); extern struct kobject *fs_kobj; #define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK) -extern int rw_verify_area(int, struct file *, loff_t *, size_t); #define FLOCK_VERIFY_READ 1 #define FLOCK_VERIFY_WRITE 2 -- cgit v1.2.1 From da53be12bbb4fabbe2e9f6f908de0cf478b5161d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 21 May 2013 15:22:44 -0700 Subject: Don't pass inode to ->d_hash() and ->d_compare() Instances either don't look at it at all (the majority of cases) or only want it to find the superblock (which can be had as dentry->d_sb). A few cases that want more are actually safe with dentry->d_inode - the only precaution needed is the check that it hadn't been replaced with NULL by rmdir() or by overwriting rename(), which case should be simply treated as cache miss. Signed-off-by: Linus Torvalds Signed-off-by: Al Viro --- include/linux/dcache.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 86da7595ba31..f42dbe145479 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -146,10 +146,8 @@ enum dentry_d_lock_class struct dentry_operations { int (*d_revalidate)(struct dentry *, unsigned int); int (*d_weak_revalidate)(struct dentry *, unsigned int); - int (*d_hash)(const struct dentry *, const struct inode *, - struct qstr *); - int (*d_compare)(const struct dentry *, const struct inode *, - const struct dentry *, const struct inode *, + int (*d_hash)(const struct dentry *, struct qstr *); + int (*d_compare)(const struct dentry *, const struct dentry *, unsigned int, const char *, const struct qstr *); int (*d_delete)(const struct dentry *); void (*d_release)(struct dentry *); @@ -302,8 +300,7 @@ extern struct dentry *d_lookup(const struct dentry *, const struct qstr *); extern struct dentry *d_hash_and_lookup(struct dentry *, struct qstr *); extern struct dentry *__d_lookup(const struct dentry *, const struct qstr *); extern struct dentry *__d_lookup_rcu(const struct dentry *parent, - const struct qstr *name, - unsigned *seq, struct inode *inode); + const struct qstr *name, unsigned *seq); /** * __d_rcu_to_refcount - take a refcount on dentry if sequence check is ok -- cgit v1.2.1 From f891a29f46553a384edbaa0f6ac446b1d03bccac Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 21 Jun 2013 08:58:09 -0400 Subject: locks: drop the unused filp argument to posix_unblock_lock Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- include/linux/fs.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 68f10204ab29..172303655702 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -994,7 +994,7 @@ extern void locks_release_private(struct file_lock *); extern void posix_test_lock(struct file *, struct file_lock *); extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *); extern int posix_lock_file_wait(struct file *, struct file_lock *); -extern int posix_unblock_lock(struct file *, struct file_lock *); +extern int posix_unblock_lock(struct file_lock *); extern int vfs_test_lock(struct file *, struct file_lock *); extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); @@ -1084,8 +1084,7 @@ static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl) return -ENOLCK; } -static inline int posix_unblock_lock(struct file *filp, - struct file_lock *waiter) +static inline int posix_unblock_lock(struct file_lock *waiter) { return -ENOENT; } -- cgit v1.2.1 From 1a9e64a7118c5ad13dd5119da18375a5bd45b330 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 21 Jun 2013 08:58:10 -0400 Subject: cifs: use posix_unblock_lock instead of locks_delete_block commit 66189be74 (CIFS: Fix VFS lock usage for oplocked files) exported the locks_delete_block symbol. There's already an exported helper function that provides this capability however, so make cifs use that instead and turn locks_delete_block back into a static function. Note that if fl->fl_next == NULL then this lock has already been through locks_delete_block(), so we should be OK to ignore an ENOENT error here and simply not retry the lock. Cc: Pavel Shilovsky Signed-off-by: Jeff Layton Acked-by: J. Bruce Fields Signed-off-by: Al Viro --- include/linux/fs.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 172303655702..6cfc9a29a783 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1006,7 +1006,6 @@ extern int vfs_setlease(struct file *, long, struct file_lock **); extern int lease_modify(struct file_lock **, int); extern int lock_may_read(struct inode *, loff_t start, unsigned long count); extern int lock_may_write(struct inode *, loff_t start, unsigned long count); -extern void locks_delete_block(struct file_lock *waiter); extern void lock_flocks(void); extern void unlock_flocks(void); #else /* !CONFIG_FILE_LOCKING */ @@ -1150,10 +1149,6 @@ static inline int lock_may_write(struct inode *inode, loff_t start, return 1; } -static inline void locks_delete_block(struct file_lock *waiter) -{ -} - static inline void lock_flocks(void) { } -- cgit v1.2.1 From 1cb360125966cb6cb594e414ea80a0154617b846 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 21 Jun 2013 08:58:12 -0400 Subject: locks: comment cleanups and clarifications Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- include/linux/fs.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 6cfc9a29a783..ed9fdaaf3223 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -926,6 +926,24 @@ int locks_in_grace(struct net *); /* that will die - we need it for nfs_lock_info */ #include +/* + * struct file_lock represents a generic "file lock". It's used to represent + * POSIX byte range locks, BSD (flock) locks, and leases. It's important to + * note that the same struct is used to represent both a request for a lock and + * the lock itself, but the same object is never used for both. + * + * FIXME: should we create a separate "struct lock_request" to help distinguish + * these two uses? + * + * The i_flock list is ordered by: + * + * 1) lock type -- FL_LEASEs first, then FL_FLOCK, and finally FL_POSIX + * 2) lock owner + * 3) lock range start + * 4) lock range end + * + * Obviously, the last two criteria only matter for POSIX locks. + */ struct file_lock { struct file_lock *fl_next; /* singly linked list for this inode */ struct list_head fl_link; /* doubly linked list of all locks */ -- cgit v1.2.1 From 1c8c601a8c0dc59fe64907dcd9d512a3d181ddc7 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 21 Jun 2013 08:58:15 -0400 Subject: locks: protect most of the file_lock handling with i_lock Having a global lock that protects all of this code is a clear scalability problem. Instead of doing that, move most of the code to be protected by the i_lock instead. The exceptions are the global lists that the ->fl_link sits on, and the ->fl_block list. ->fl_link is what connects these structures to the global lists, so we must ensure that we hold those locks when iterating over or updating these lists. Furthermore, sound deadlock detection requires that we hold the blocked_list state steady while checking for loops. We also must ensure that the search and update to the list are atomic. For the checking and insertion side of the blocked_list, push the acquisition of the global lock into __posix_lock_file and ensure that checking and update of the blocked_list is done without dropping the lock in between. On the removal side, when waking up blocked lock waiters, take the global lock before walking the blocked list and dequeue the waiters from the global list prior to removal from the fl_block list. With this, deadlock detection should be race free while we minimize excessive file_lock_lock thrashing. Finally, in order to avoid a lock inversion problem when handling /proc/locks output we must ensure that manipulations of the fl_block list are also protected by the file_lock_lock. Signed-off-by: Jeff Layton Signed-off-by: Al Viro --- include/linux/fs.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index ed9fdaaf3223..24fe998795e1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1024,8 +1024,6 @@ extern int vfs_setlease(struct file *, long, struct file_lock **); extern int lease_modify(struct file_lock **, int); extern int lock_may_read(struct inode *, loff_t start, unsigned long count); extern int lock_may_write(struct inode *, loff_t start, unsigned long count); -extern void lock_flocks(void); -extern void unlock_flocks(void); #else /* !CONFIG_FILE_LOCKING */ static inline int fcntl_getlk(struct file *file, struct flock __user *user) { @@ -1166,15 +1164,6 @@ static inline int lock_may_write(struct inode *inode, loff_t start, { return 1; } - -static inline void lock_flocks(void) -{ -} - -static inline void unlock_flocks(void) -{ -} - #endif /* !CONFIG_FILE_LOCKING */ -- cgit v1.2.1 From 139ca04ee572fea6c0c105e88aba3a534efcd7c4 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 21 Jun 2013 08:58:17 -0400 Subject: locks: convert fl_link to a hlist_node Testing has shown that iterating over the blocked_list for deadlock detection turns out to be a bottleneck. In order to alleviate that, begin the process of turning it into a hashtable. We start by turning the fl_link into a hlist_node and the global lists into hlists. A later patch will do the conversion of the blocked_list to a hashtable. Signed-off-by: Jeff Layton Acked-by: J. Bruce Fields Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 24fe998795e1..fab064a3b65f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -946,7 +946,7 @@ int locks_in_grace(struct net *); */ struct file_lock { struct file_lock *fl_next; /* singly linked list for this inode */ - struct list_head fl_link; /* doubly linked list of all locks */ + struct hlist_node fl_link; /* node in global lists */ struct list_head fl_block; /* circular list of blocked processes */ fl_owner_t fl_owner; unsigned int fl_flags; -- cgit v1.2.1 From 3999e49364193f7dbbba66e2be655fe91ba1fced Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 21 Jun 2013 08:58:19 -0400 Subject: locks: add a new "lm_owner_key" lock operation Currently, the hashing that the locking code uses to add these values to the blocked_hash is simply calculated using fl_owner field. That's valid in most cases except for server-side lockd, which validates the owner of a lock based on fl_owner and fl_pid. In the case where you have a small number of NFS clients doing a lot of locking between different processes, you could end up with all the blocked requests sitting in a very small number of hash buckets. Add a new lm_owner_key operation to the lock_manager_operations that will generate an unsigned long to use as the key in the hashtable. That function is only implemented for server-side lockd, and simply XORs the fl_owner and fl_pid. Signed-off-by: Jeff Layton Acked-by: J. Bruce Fields Signed-off-by: Al Viro --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index fab064a3b65f..a137a73fc1fe 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -908,6 +908,7 @@ struct file_lock_operations { struct lock_manager_operations { int (*lm_compare_owner)(struct file_lock *, struct file_lock *); + unsigned long (*lm_owner_key)(struct file_lock *); void (*lm_notify)(struct file_lock *); /* unblock callback */ int (*lm_grant)(struct file_lock *, struct file_lock *, int); void (*lm_break)(struct file_lock *); -- cgit v1.2.1 From 46a1c2c7ae53de2a5676754b54a73c591a3951d2 Mon Sep 17 00:00:00 2001 From: Jie Liu Date: Tue, 25 Jun 2013 12:02:13 +0800 Subject: vfs: export lseek_execute() to modules For those file systems(btrfs/ext4/ocfs2/tmpfs) that support SEEK_DATA/SEEK_HOLE functions, we end up handling the similar matter in lseek_execute() to update the current file offset to the desired offset if it is valid, ceph also does the simliar things at ceph_llseek(). To reduce the duplications, this patch make lseek_execute() public accessible so that we can call it directly from the underlying file systems. Thanks Dave Chinner for this suggestion. [AV: call it vfs_setpos(), don't bring the removed 'inode' argument back] v2->v1: - Add kernel-doc comments for lseek_execute() - Call lseek_execute() in ceph->llseek() Signed-off-by: Jie Liu Cc: Dave Chinner Cc: Al Viro Cc: Andi Kleen Cc: Andrew Morton Cc: Christoph Hellwig Cc: Chris Mason Cc: Josef Bacik Cc: Ben Myers Cc: Ted Tso Cc: Hugh Dickins Cc: Mark Fasheh Cc: Joel Becker Cc: Sage Weil Signed-off-by: Al Viro --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index a137a73fc1fe..bccb1924ec93 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2426,6 +2426,7 @@ extern void file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); extern loff_t noop_llseek(struct file *file, loff_t offset, int whence); extern loff_t no_llseek(struct file *file, loff_t offset, int whence); +extern loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize); extern loff_t generic_file_llseek(struct file *file, loff_t offset, int whence); extern loff_t generic_file_llseek_size(struct file *file, loff_t offset, int whence, loff_t maxsize, loff_t eof); -- cgit v1.2.1