diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-12-04 09:07:19 +0100 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-12-04 09:07:19 +0100 |
commit | b8307db2477f9c551e54e0c7b643ea349a3349cd (patch) | |
tree | 88654f8bd73857bbd40f75013ce41d8882d16ce6 /fs | |
parent | f0461d0146ee30927bc7efa2ae24ea8c6693b725 (diff) | |
parent | 061e41fdb5047b1fb161e89664057835935ca1d2 (diff) | |
download | talos-obmc-linux-b8307db2477f9c551e54e0c7b643ea349a3349cd.tar.gz talos-obmc-linux-b8307db2477f9c551e54e0c7b643ea349a3349cd.zip |
Merge commit 'v2.6.28-rc7' into tracing/core
Diffstat (limited to 'fs')
-rw-r--r-- | fs/buffer.c | 1 | ||||
-rw-r--r-- | fs/cifs/file.c | 77 | ||||
-rw-r--r-- | fs/eventpoll.c | 85 | ||||
-rw-r--r-- | fs/ntfs/debug.h | 8 | ||||
-rw-r--r-- | fs/ocfs2/buffer_head_io.c | 15 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmfs.c | 4 | ||||
-rw-r--r-- | fs/ocfs2/dlm/userdlm.h | 2 | ||||
-rw-r--r-- | fs/ocfs2/dlmglue.c | 3 | ||||
-rw-r--r-- | fs/ocfs2/ocfs2.h | 2 | ||||
-rw-r--r-- | fs/ocfs2/stack_user.c | 3 | ||||
-rw-r--r-- | fs/udf/inode.c | 1 |
11 files changed, 150 insertions, 51 deletions
diff --git a/fs/buffer.c b/fs/buffer.c index 6569fda5cfed..10179cfa1152 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -878,6 +878,7 @@ void invalidate_inode_buffers(struct inode *inode) spin_unlock(&buffer_mapping->private_lock); } } +EXPORT_SYMBOL(invalidate_inode_buffers); /* * Remove any clean buffers from the inode's buffer list. This is called diff --git a/fs/cifs/file.c b/fs/cifs/file.c index b691b893a848..f0a81e631ae6 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1475,7 +1475,11 @@ static int cifs_write_end(struct file *file, struct address_space *mapping, cFYI(1, ("write_end for page %p from pos %lld with %d bytes", page, pos, copied)); - if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE) + if (PageChecked(page)) { + if (copied == len) + SetPageUptodate(page); + ClearPageChecked(page); + } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE) SetPageUptodate(page); if (!PageUptodate(page)) { @@ -2062,39 +2066,70 @@ static int cifs_write_begin(struct file *file, struct address_space *mapping, { pgoff_t index = pos >> PAGE_CACHE_SHIFT; loff_t offset = pos & (PAGE_CACHE_SIZE - 1); + loff_t page_start = pos & PAGE_MASK; + loff_t i_size; + struct page *page; + int rc = 0; cFYI(1, ("write_begin from %lld len %d", (long long)pos, len)); - *pagep = __grab_cache_page(mapping, index); - if (!*pagep) - return -ENOMEM; - - if (PageUptodate(*pagep)) - return 0; + page = __grab_cache_page(mapping, index); + if (!page) { + rc = -ENOMEM; + goto out; + } - /* If we are writing a full page it will be up to date, - no need to read from the server */ - if (len == PAGE_CACHE_SIZE && flags & AOP_FLAG_UNINTERRUPTIBLE) - return 0; + if (PageUptodate(page)) + goto out; - if ((file->f_flags & O_ACCMODE) != O_WRONLY) { - int rc; + /* + * If we write a full page it will be up to date, no need to read from + * the server. If the write is short, we'll end up doing a sync write + * instead. + */ + if (len == PAGE_CACHE_SIZE) + goto out; - /* might as well read a page, it is fast enough */ - rc = cifs_readpage_worker(file, *pagep, &offset); + /* + * optimize away the read when we have an oplock, and we're not + * expecting to use any of the data we'd be reading in. That + * is, when the page lies beyond the EOF, or straddles the EOF + * and the write will cover all of the existing data. + */ + if (CIFS_I(mapping->host)->clientCanCacheRead) { + i_size = i_size_read(mapping->host); + if (page_start >= i_size || + (offset == 0 && (pos + len) >= i_size)) { + zero_user_segments(page, 0, offset, + offset + len, + PAGE_CACHE_SIZE); + /* + * PageChecked means that the parts of the page + * to which we're not writing are considered up + * to date. Once the data is copied to the + * page, it can be set uptodate. + */ + SetPageChecked(page); + goto out; + } + } - /* we do not need to pass errors back - e.g. if we do not have read access to the file - because cifs_write_end will attempt synchronous writes - -- shaggy */ + if ((file->f_flags & O_ACCMODE) != O_WRONLY) { + /* + * might as well read a page, it is fast enough. If we get + * an error, we don't need to return it. cifs_write_end will + * do a sync write instead since PG_uptodate isn't set. + */ + cifs_readpage_worker(file, page, &page_start); } else { /* we could try using another file handle if there is one - but how would we lock it to prevent close of that handle racing with this read? In any case this will be written out by write_end so is fine */ } - - return 0; +out: + *pagep = page; + return rc; } const struct address_space_operations cifs_addr_ops = { diff --git a/fs/eventpoll.c b/fs/eventpoll.c index aec5c13f6341..96355d505347 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -102,6 +102,8 @@ #define EP_UNACTIVE_PTR ((void *) -1L) +#define EP_ITEM_COST (sizeof(struct epitem) + sizeof(struct eppoll_entry)) + struct epoll_filefd { struct file *file; int fd; @@ -200,6 +202,9 @@ struct eventpoll { * holding ->lock. */ struct epitem *ovflist; + + /* The user that created the eventpoll descriptor */ + struct user_struct *user; }; /* Wait structure used by the poll hooks */ @@ -227,9 +232,17 @@ struct ep_pqueue { }; /* + * Configuration options available inside /proc/sys/fs/epoll/ + */ +/* Maximum number of epoll devices, per user */ +static int max_user_instances __read_mostly; +/* Maximum number of epoll watched descriptors, per user */ +static int max_user_watches __read_mostly; + +/* * This mutex is used to serialize ep_free() and eventpoll_release_file(). */ -static struct mutex epmutex; +static DEFINE_MUTEX(epmutex); /* Safe wake up implementation */ static struct poll_safewake psw; @@ -240,6 +253,33 @@ static struct kmem_cache *epi_cache __read_mostly; /* Slab cache used to allocate "struct eppoll_entry" */ static struct kmem_cache *pwq_cache __read_mostly; +#ifdef CONFIG_SYSCTL + +#include <linux/sysctl.h> + +static int zero; + +ctl_table epoll_table[] = { + { + .procname = "max_user_instances", + .data = &max_user_instances, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .extra1 = &zero, + }, + { + .procname = "max_user_watches", + .data = &max_user_watches, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .extra1 = &zero, + }, + { .ctl_name = 0 } +}; +#endif /* CONFIG_SYSCTL */ + /* Setup the structure that is used as key for the RB tree */ static inline void ep_set_ffd(struct epoll_filefd *ffd, @@ -402,6 +442,8 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) /* At this point it is safe to free the eventpoll item */ kmem_cache_free(epi_cache, epi); + atomic_dec(&ep->user->epoll_watches); + DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_remove(%p, %p)\n", current, ep, file)); @@ -449,6 +491,8 @@ static void ep_free(struct eventpoll *ep) mutex_unlock(&epmutex); mutex_destroy(&ep->mtx); + atomic_dec(&ep->user->epoll_devs); + free_uid(ep->user); kfree(ep); } @@ -532,10 +576,19 @@ void eventpoll_release_file(struct file *file) static int ep_alloc(struct eventpoll **pep) { - struct eventpoll *ep = kzalloc(sizeof(*ep), GFP_KERNEL); + int error; + struct user_struct *user; + struct eventpoll *ep; - if (!ep) - return -ENOMEM; + user = get_current_user(); + error = -EMFILE; + if (unlikely(atomic_read(&user->epoll_devs) >= + max_user_instances)) + goto free_uid; + error = -ENOMEM; + ep = kzalloc(sizeof(*ep), GFP_KERNEL); + if (unlikely(!ep)) + goto free_uid; spin_lock_init(&ep->lock); mutex_init(&ep->mtx); @@ -544,12 +597,17 @@ static int ep_alloc(struct eventpoll **pep) INIT_LIST_HEAD(&ep->rdllist); ep->rbr = RB_ROOT; ep->ovflist = EP_UNACTIVE_PTR; + ep->user = user; *pep = ep; DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_alloc() ep=%p\n", current, ep)); return 0; + +free_uid: + free_uid(user); + return error; } /* @@ -703,9 +761,11 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, struct epitem *epi; struct ep_pqueue epq; - error = -ENOMEM; + if (unlikely(atomic_read(&ep->user->epoll_watches) >= + max_user_watches)) + return -ENOSPC; if (!(epi = kmem_cache_alloc(epi_cache, GFP_KERNEL))) - goto error_return; + return -ENOMEM; /* Item initialization follow here ... */ INIT_LIST_HEAD(&epi->rdllink); @@ -735,6 +795,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, * install process. Namely an allocation for a wait queue failed due * high memory pressure. */ + error = -ENOMEM; if (epi->nwait < 0) goto error_unregister; @@ -765,6 +826,8 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event, spin_unlock_irqrestore(&ep->lock, flags); + atomic_inc(&ep->user->epoll_watches); + /* We have to call this outside the lock */ if (pwake) ep_poll_safewake(&psw, &ep->poll_wait); @@ -789,7 +852,7 @@ error_unregister: spin_unlock_irqrestore(&ep->lock, flags); kmem_cache_free(epi_cache, epi); -error_return: + return error; } @@ -1078,6 +1141,7 @@ asmlinkage long sys_epoll_create1(int flags) flags & O_CLOEXEC); if (fd < 0) ep_free(ep); + atomic_inc(&ep->user->epoll_devs); error_return: DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", @@ -1299,7 +1363,12 @@ asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events, static int __init eventpoll_init(void) { - mutex_init(&epmutex); + struct sysinfo si; + + si_meminfo(&si); + max_user_instances = 128; + max_user_watches = (((si.totalram - si.totalhigh) / 32) << PAGE_SHIFT) / + EP_ITEM_COST; /* Initialize the structure used to perform safe poll wait head wake ups */ ep_poll_safewake_init(&psw); diff --git a/fs/ntfs/debug.h b/fs/ntfs/debug.h index 5e6724c1afd1..2142b1c68b61 100644 --- a/fs/ntfs/debug.h +++ b/fs/ntfs/debug.h @@ -30,7 +30,8 @@ extern int debug_msgs; -#if 0 /* Fool kernel-doc since it doesn't do macros yet */ +extern void __ntfs_debug(const char *file, int line, const char *function, + const char *format, ...) __attribute__ ((format (printf, 4, 5))); /** * ntfs_debug - write a debug level message to syslog * @f: a printf format string containing the message @@ -39,11 +40,6 @@ extern int debug_msgs; * ntfs_debug() writes a DEBUG level message to the syslog but only if the * driver was compiled with -DDEBUG. Otherwise, the call turns into a NOP. */ -static void ntfs_debug(const char *f, ...); -#endif - -extern void __ntfs_debug (const char *file, int line, const char *function, - const char *format, ...) __attribute__ ((format (printf, 4, 5))); #define ntfs_debug(f, a...) \ __ntfs_debug(__FILE__, __LINE__, __func__, f, ##a) diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index 7e947c672469..3a178ec48d7c 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c @@ -112,7 +112,7 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, bh = bhs[i]; if (buffer_jbd(bh)) { - mlog(ML_ERROR, + mlog(ML_BH_IO, "trying to sync read a jbd " "managed bh (blocknr = %llu), skipping\n", (unsigned long long)bh->b_blocknr); @@ -147,15 +147,10 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, for (i = nr; i > 0; i--) { bh = bhs[i - 1]; - if (buffer_jbd(bh)) { - mlog(ML_ERROR, - "the journal got the buffer while it was " - "locked for io! (blocknr = %llu)\n", - (unsigned long long)bh->b_blocknr); - BUG(); - } + /* No need to wait on the buffer if it's managed by JBD. */ + if (!buffer_jbd(bh)) + wait_on_buffer(bh); - wait_on_buffer(bh); if (!buffer_uptodate(bh)) { /* Status won't be cleared from here on out, * so we can safely record this and loop back @@ -251,8 +246,6 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr, ignore_cache = 1; } - /* XXX: Can we ever get this and *not* have the cached - * flag set? */ if (buffer_jbd(bh)) { if (ignore_cache) mlog(ML_BH_IO, "trying to sync read a jbd " diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c index 533a789c3ef8..ba962d71b34d 100644 --- a/fs/ocfs2/dlm/dlmfs.c +++ b/fs/ocfs2/dlm/dlmfs.c @@ -608,8 +608,10 @@ static int __init init_dlmfs_fs(void) 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD), dlmfs_init_once); - if (!dlmfs_inode_cache) + if (!dlmfs_inode_cache) { + status = -ENOMEM; goto bail; + } cleanup_inode = 1; user_dlm_worker = create_singlethread_workqueue("user_dlm"); diff --git a/fs/ocfs2/dlm/userdlm.h b/fs/ocfs2/dlm/userdlm.h index 39ec27738499..0c3cc03c61fa 100644 --- a/fs/ocfs2/dlm/userdlm.h +++ b/fs/ocfs2/dlm/userdlm.h @@ -33,7 +33,7 @@ #include <linux/workqueue.h> /* user_lock_res->l_flags flags. */ -#define USER_LOCK_ATTACHED (0x00000001) /* have we initialized +#define USER_LOCK_ATTACHED (0x00000001) /* we have initialized * the lvb */ #define USER_LOCK_BUSY (0x00000002) /* we are currently in * dlm_lock */ diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index ec684426034b..6e6cc0a2e5f7 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -2841,9 +2841,8 @@ static void ocfs2_unlock_ast(void *opaque, int error) lockres_clear_flags(lockres, OCFS2_LOCK_BUSY); lockres->l_unlock_action = OCFS2_UNLOCK_INVALID; - spin_unlock_irqrestore(&lockres->l_lock, flags); - wake_up(&lockres->l_event); + spin_unlock_irqrestore(&lockres->l_lock, flags); mlog_exit_void(); } diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index fef7ece32376..3fed9e3d8992 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -85,7 +85,7 @@ enum ocfs2_unlock_action { }; /* ocfs2_lock_res->l_flags flags. */ -#define OCFS2_LOCK_ATTACHED (0x00000001) /* have we initialized +#define OCFS2_LOCK_ATTACHED (0x00000001) /* we have initialized * the lvb */ #define OCFS2_LOCK_BUSY (0x00000002) /* we are currently in * dlm_lock */ diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index faec2d879357..9b76d41a8ac6 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c @@ -740,6 +740,9 @@ static int user_dlm_lock_status(union ocfs2_dlm_lksb *lksb) static void *user_dlm_lvb(union ocfs2_dlm_lksb *lksb) { + if (!lksb->lksb_fsdlm.sb_lvbptr) + lksb->lksb_fsdlm.sb_lvbptr = (char *)lksb + + sizeof(struct dlm_lksb); return (void *)(lksb->lksb_fsdlm.sb_lvbptr); } diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 6e74b117aaf0..30ebde490f7f 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -106,6 +106,7 @@ void udf_clear_inode(struct inode *inode) udf_truncate_tail_extent(inode); unlock_kernel(); write_inode_now(inode, 0); + invalidate_inode_buffers(inode); } iinfo = UDF_I(inode); kfree(iinfo->i_ext.i_data); |