From d7065da038227a4d09a244e6014e0186a6bd21d0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 26 May 2010 15:13:55 -0400 Subject: get rid of the magic around f_count in aio __aio_put_req() plays sick games with file refcount. What it wants is fput() from atomic context; it's almost always done with f_count > 1, so they only have to deal with delayed work in rare cases when their reference happens to be the last one. Current code decrements f_count and if it hasn't hit 0, everything is fine. Otherwise it keeps a pointer to struct file (with zero f_count!) around and has delayed work do __fput() on it. Better way to do it: use atomic_long_add_unless( , -1, 1) instead of !atomic_long_dec_and_test(). IOW, decrement it only if it's not the last reference, leave refcount alone if it was. And use normal fput() in delayed work. I've made that atomic_long_add_unless call a new helper - fput_atomic(). Drops a reference to file if it's safe to do in atomic (i.e. if that's not the last one), tells if it had been able to do that. aio.c converted to it, __fput() use is gone. req->ki_file *always* contributes to refcount now. And __fput() became static. Signed-off-by: Al Viro --- include/linux/file.h | 1 - include/linux/fs.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/file.h b/include/linux/file.h index 5555508fd517..b1e12970f617 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -11,7 +11,6 @@ struct file; -extern void __fput(struct file *); extern void fput(struct file *); extern void drop_file_write_access(struct file *file); diff --git a/include/linux/fs.h b/include/linux/fs.h index 85e823adcd4a..3d9ed8302402 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -954,6 +954,7 @@ extern spinlock_t files_lock; #define file_list_unlock() spin_unlock(&files_lock); #define get_file(x) atomic_long_inc(&(x)->f_count) +#define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1) #define file_count(x) atomic_long_read(&(x)->f_count) #ifdef CONFIG_DEBUG_WRITECOUNT -- cgit v1.2.1 From 7ea8085910ef3dd4f3cad6845aaa2b580d39b115 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 26 May 2010 17:53:25 +0200 Subject: drop unused dentry argument to ->fsync Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/buffer_head.h | 2 +- include/linux/ext3_fs.h | 2 +- include/linux/fb.h | 5 +---- include/linux/fs.h | 8 ++++---- 4 files changed, 7 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 16ed0284d780..05e5f5996216 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -224,7 +224,7 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, void block_sync_page(struct page *); sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); int block_truncate_page(struct address_space *, loff_t, get_block_t *); -int file_fsync(struct file *, struct dentry *, int); +int file_fsync(struct file *, int); int nobh_write_begin(struct file *, struct address_space *, loff_t, unsigned, unsigned, struct page **, void **, get_block_t*); diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index 5f494b465097..7fc62d4550b2 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -868,7 +868,7 @@ extern int ext3_htree_store_dirent(struct file *dir_file, __u32 hash, extern void ext3_htree_free_dir_info(struct dir_private_info *p); /* fsync.c */ -extern int ext3_sync_file (struct file *, struct dentry *, int); +extern int ext3_sync_file(struct file *, int); /* hash.c */ extern int ext3fs_dirhash(const char *name, int len, struct diff --git a/include/linux/fb.h b/include/linux/fb.h index f3793ebc241c..907ace3a64c8 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -4,8 +4,6 @@ #include #include -struct dentry; - /* Definitions of frame buffers */ #define FB_MAX 32 /* sufficient for now */ @@ -1017,8 +1015,7 @@ extern void fb_deferred_io_open(struct fb_info *info, struct inode *inode, struct file *file); extern void fb_deferred_io_cleanup(struct fb_info *info); -extern int fb_deferred_io_fsync(struct file *file, struct dentry *dentry, - int datasync); +extern int fb_deferred_io_fsync(struct file *file, int datasync); static inline bool fb_be_math(struct fb_info *info) { diff --git a/include/linux/fs.h b/include/linux/fs.h index 3d9ed8302402..eb39e5eb77f5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1498,7 +1498,7 @@ struct file_operations { int (*open) (struct inode *, struct file *); int (*flush) (struct file *, fl_owner_t id); int (*release) (struct inode *, struct file *); - int (*fsync) (struct file *, struct dentry *, int datasync); + int (*fsync) (struct file *, int datasync); int (*aio_fsync) (struct kiocb *, int datasync); int (*fasync) (int, struct file *, int); int (*lock) (struct file *, int, struct file_lock *); @@ -2213,7 +2213,7 @@ extern int generic_segment_checks(const struct iovec *iov, /* fs/block_dev.c */ extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos); -extern int blkdev_fsync(struct file *filp, struct dentry *dentry, int datasync); +extern int blkdev_fsync(struct file *filp, int datasync); /* fs/splice.c */ extern ssize_t generic_file_splice_read(struct file *, loff_t *, @@ -2348,7 +2348,7 @@ extern int simple_link(struct dentry *, struct inode *, struct dentry *); extern int simple_unlink(struct inode *, struct dentry *); extern int simple_rmdir(struct inode *, struct dentry *); extern int simple_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); -extern int simple_sync_file(struct file *, struct dentry *, int); +extern int simple_sync_file(struct file *, int); extern int simple_empty(struct dentry *); extern int simple_readpage(struct file *file, struct page *page); extern int simple_write_begin(struct file *file, struct address_space *mapping, @@ -2373,7 +2373,7 @@ extern ssize_t simple_read_from_buffer(void __user *to, size_t count, extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos, const void __user *from, size_t count); -extern int simple_fsync(struct file *, struct dentry *, int); +extern int simple_fsync(struct file *, int); #ifdef CONFIG_MIGRATION extern int buffer_migrate_page(struct address_space *, -- cgit v1.2.1 From 1b061d9247f71cd15edc4c4c4600191a903642c0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 26 May 2010 17:53:41 +0200 Subject: rename the generic fsync implementations We don't name our generic fsync implementations very well currently. The no-op implementation for in-memory filesystems currently is called simple_sync_file which doesn't make too much sense to start with, the the generic one for simple filesystems is called simple_fsync which can lead to some confusion. This patch renames the generic file fsync method to generic_file_fsync to match the other generic_file_* routines it is supposed to be used with, and the no-op implementation to noop_fsync to make it obvious what to expect. In addition add some documentation for both methods. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index eb39e5eb77f5..acf6c52a50dd 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2348,7 +2348,7 @@ extern int simple_link(struct dentry *, struct inode *, struct dentry *); extern int simple_unlink(struct inode *, struct dentry *); extern int simple_rmdir(struct inode *, struct dentry *); extern int simple_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); -extern int simple_sync_file(struct file *, int); +extern int noop_fsync(struct file *, int); extern int simple_empty(struct dentry *); extern int simple_readpage(struct file *file, struct page *page); extern int simple_write_begin(struct file *file, struct address_space *mapping, @@ -2373,7 +2373,7 @@ extern ssize_t simple_read_from_buffer(void __user *to, size_t count, extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos, const void __user *from, size_t count); -extern int simple_fsync(struct file *, int); +extern int generic_file_fsync(struct file *, int); #ifdef CONFIG_MIGRATION extern int buffer_migrate_page(struct address_space *, -- cgit v1.2.1 From 7bb46a6734a7e1ad4beaecc11cae7ed3ff81d30f Mon Sep 17 00:00:00 2001 From: "npiggin@suse.de" Date: Thu, 27 May 2010 01:05:33 +1000 Subject: fs: introduce new truncate sequence Introduce a new truncate calling sequence into fs/mm subsystems. Rather than setattr > vmtruncate > truncate, have filesystems call their truncate sequence from ->setattr if filesystem specific operations are required. vmtruncate is deprecated, and truncate_pagecache and inode_newsize_ok helpers introduced previously should be used. simple_setattr is introduced for simple in-ram filesystems to implement the new truncate sequence. Eventually all filesystems should be converted to implement a setattr, and the default code in notify_change should go away. simple_setsize is also introduced to perform just the ATTR_SIZE portion of simple_setattr (ie. changing i_size and trimming pagecache). To implement the new truncate sequence: - filesystem specific manipulations (eg freeing blocks) must be done in the setattr method rather than ->truncate. - vmtruncate can not be used by core code to trim blocks past i_size in the event of write failure after allocation, so this must be performed in the fs code. - convert usage of helpers block_write_begin, nobh_write_begin, cont_write_begin, and *blockdev_direct_IO* to use _newtrunc postfixed variants. These avoid calling vmtruncate to trim blocks (see previous). - inode_setattr should not be used. generic_setattr is a new function to be used to copy simple attributes into the generic inode. - make use of the better opportunity to handle errors with the new sequence. Big problem with the previous calling sequence: the filesystem is not called until i_size has already changed. This means it is not allowed to fail the call, and also it does not know what the previous i_size was. Also, generic code calling vmtruncate to truncate allocated blocks in case of error had no good way to return a meaningful error (or, for example, atomically handle block deallocation). Cc: Christoph Hellwig Acked-by: Jan Kara Signed-off-by: Nick Piggin Signed-off-by: Al Viro --- include/linux/buffer_head.h | 9 +++++++++ include/linux/fs.h | 27 ++++++++++++++++++++++++++- 2 files changed, 35 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 05e5f5996216..1b9ba193b789 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -203,6 +203,9 @@ int block_write_full_page_endio(struct page *page, get_block_t *get_block, int block_read_full_page(struct page*, get_block_t*); int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, unsigned long from); +int block_write_begin_newtrunc(struct file *, struct address_space *, + loff_t, unsigned, unsigned, + struct page **, void **, get_block_t*); int block_write_begin(struct file *, struct address_space *, loff_t, unsigned, unsigned, struct page **, void **, get_block_t*); @@ -214,6 +217,9 @@ int generic_write_end(struct file *, struct address_space *, struct page *, void *); void page_zero_new_buffers(struct page *page, unsigned from, unsigned to); int block_prepare_write(struct page*, unsigned, unsigned, get_block_t*); +int cont_write_begin_newtrunc(struct file *, struct address_space *, loff_t, + unsigned, unsigned, struct page **, void **, + get_block_t *, loff_t *); int cont_write_begin(struct file *, struct address_space *, loff_t, unsigned, unsigned, struct page **, void **, get_block_t *, loff_t *); @@ -225,6 +231,9 @@ void block_sync_page(struct page *); sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); int block_truncate_page(struct address_space *, loff_t, get_block_t *); int file_fsync(struct file *, int); +int nobh_write_begin_newtrunc(struct file *, struct address_space *, + loff_t, unsigned, unsigned, + struct page **, void **, get_block_t*); int nobh_write_begin(struct file *, struct address_space *, loff_t, unsigned, unsigned, struct page **, void **, get_block_t*); diff --git a/include/linux/fs.h b/include/linux/fs.h index acf6c52a50dd..3428393942a6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2257,6 +2257,10 @@ typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode, loff_t file_offset); void dio_end_io(struct bio *bio, int error); +ssize_t __blockdev_direct_IO_newtrunc(int rw, struct kiocb *iocb, struct inode *inode, + struct block_device *bdev, const struct iovec *iov, loff_t offset, + unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, + dio_submit_t submit_io, int lock_type); ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, struct block_device *bdev, const struct iovec *iov, loff_t offset, unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, @@ -2270,6 +2274,24 @@ enum { DIO_SKIP_HOLES = 0x02, }; +static inline ssize_t blockdev_direct_IO_newtrunc(int rw, struct kiocb *iocb, + struct inode *inode, struct block_device *bdev, const struct iovec *iov, + loff_t offset, unsigned long nr_segs, get_block_t get_block, + dio_iodone_t end_io) +{ + return __blockdev_direct_IO_newtrunc(rw, iocb, inode, bdev, iov, offset, + nr_segs, get_block, end_io, NULL, + DIO_LOCKING | DIO_SKIP_HOLES); +} + +static inline ssize_t blockdev_direct_IO_no_locking_newtrunc(int rw, struct kiocb *iocb, + struct inode *inode, struct block_device *bdev, const struct iovec *iov, + loff_t offset, unsigned long nr_segs, get_block_t get_block, + dio_iodone_t end_io) +{ + return __blockdev_direct_IO_newtrunc(rw, iocb, inode, bdev, iov, offset, + nr_segs, get_block, end_io, NULL, 0); +} static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, struct block_device *bdev, const struct iovec *iov, loff_t offset, unsigned long nr_segs, get_block_t get_block, @@ -2342,12 +2364,14 @@ extern int dcache_dir_open(struct inode *, struct file *); extern int dcache_dir_close(struct inode *, struct file *); extern loff_t dcache_dir_lseek(struct file *, loff_t, int); extern int dcache_readdir(struct file *, void *, filldir_t); +extern int simple_setattr(struct dentry *, struct iattr *); extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *); extern int simple_statfs(struct dentry *, struct kstatfs *); extern int simple_link(struct dentry *, struct inode *, struct dentry *); extern int simple_unlink(struct inode *, struct dentry *); extern int simple_rmdir(struct inode *, struct dentry *); extern int simple_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); +extern int simple_setsize(struct inode *, loff_t); extern int noop_fsync(struct file *, int); extern int simple_empty(struct dentry *); extern int simple_readpage(struct file *file, struct page *page); @@ -2384,7 +2408,8 @@ extern int buffer_migrate_page(struct address_space *, extern int inode_change_ok(const struct inode *, struct iattr *); extern int inode_newsize_ok(const struct inode *, loff_t offset); -extern int __must_check inode_setattr(struct inode *, struct iattr *); +extern int __must_check inode_setattr(struct inode *, const struct iattr *); +extern void generic_setattr(struct inode *inode, const struct iattr *attr); extern void file_update_time(struct file *file); -- cgit v1.2.1