diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ecryptfs/crypto.c | 2 | ||||
-rw-r--r-- | fs/ecryptfs/kthread.c | 6 | ||||
-rw-r--r-- | fs/ecryptfs/mmap.c | 12 | ||||
-rw-r--r-- | fs/eventpoll.c | 22 | ||||
-rw-r--r-- | fs/ext4/extents.c | 22 | ||||
-rw-r--r-- | fs/ext4/file.c | 8 | ||||
-rw-r--r-- | fs/ext4/fsync.c | 2 | ||||
-rw-r--r-- | fs/ext4/inode.c | 99 | ||||
-rw-r--r-- | fs/ext4/namei.c | 3 | ||||
-rw-r--r-- | fs/ext4/super.c | 30 | ||||
-rw-r--r-- | fs/f2fs/acl.c | 1 | ||||
-rw-r--r-- | fs/jbd2/transaction.c | 30 | ||||
-rw-r--r-- | fs/proc/generic.c | 13 | ||||
-rw-r--r-- | fs/proc/task_mmu.c | 2 |
14 files changed, 181 insertions, 71 deletions
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index ea9931281557..a7b0c2dfb3db 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -1935,7 +1935,7 @@ static const unsigned char filename_rev_map[256] = { * @src: Source location for the filename to encode * @src_size: Size of the source in bytes */ -void ecryptfs_encode_for_filename(unsigned char *dst, size_t *dst_size, +static void ecryptfs_encode_for_filename(unsigned char *dst, size_t *dst_size, unsigned char *src, size_t src_size) { size_t num_blocks; diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c index 809e67d05ca3..f1ea610362c6 100644 --- a/fs/ecryptfs/kthread.c +++ b/fs/ecryptfs/kthread.c @@ -102,12 +102,12 @@ int __init ecryptfs_init_kthread(void) void ecryptfs_destroy_kthread(void) { - struct ecryptfs_open_req *req; + struct ecryptfs_open_req *req, *tmp; mutex_lock(&ecryptfs_kthread_ctl.mux); ecryptfs_kthread_ctl.flags |= ECRYPTFS_KTHREAD_ZOMBIE; - list_for_each_entry(req, &ecryptfs_kthread_ctl.req_list, - kthread_ctl_list) { + list_for_each_entry_safe(req, tmp, &ecryptfs_kthread_ctl.req_list, + kthread_ctl_list) { list_del(&req->kthread_ctl_list); *req->lower_file = ERR_PTR(-EIO); complete(&req->done); diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index bd1d57f98f74..564a1fa34b99 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -338,7 +338,8 @@ static int ecryptfs_write_begin(struct file *file, if (prev_page_end_size >= i_size_read(page->mapping->host)) { zero_user(page, 0, PAGE_CACHE_SIZE); - } else { + SetPageUptodate(page); + } else if (len < PAGE_CACHE_SIZE) { rc = ecryptfs_decrypt_page(page); if (rc) { printk(KERN_ERR "%s: Error decrypting " @@ -348,8 +349,8 @@ static int ecryptfs_write_begin(struct file *file, ClearPageUptodate(page); goto out; } + SetPageUptodate(page); } - SetPageUptodate(page); } } /* If creating a page or more of holes, zero them out via truncate. @@ -499,6 +500,13 @@ static int ecryptfs_write_end(struct file *file, } goto out; } + if (!PageUptodate(page)) { + if (copied < PAGE_CACHE_SIZE) { + rc = 0; + goto out; + } + SetPageUptodate(page); + } /* Fills in zeros if 'to' goes beyond inode size */ rc = fill_zeros_to_end_of_page(page, to); if (rc) { diff --git a/fs/eventpoll.c b/fs/eventpoll.c index be56b21435f8..9fec1836057a 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1313,7 +1313,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even * otherwise we might miss an event that happens between the * f_op->poll() call and the new event set registering. */ - epi->event.events = event->events; + epi->event.events = event->events; /* need barrier below */ pt._key = event->events; epi->event.data = event->data; /* protected by mtx */ if (epi->event.events & EPOLLWAKEUP) { @@ -1324,6 +1324,26 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even } /* + * The following barrier has two effects: + * + * 1) Flush epi changes above to other CPUs. This ensures + * we do not miss events from ep_poll_callback if an + * event occurs immediately after we call f_op->poll(). + * We need this because we did not take ep->lock while + * changing epi above (but ep_poll_callback does take + * ep->lock). + * + * 2) We also need to ensure we do not miss _past_ events + * when calling f_op->poll(). This barrier also + * pairs with the barrier in wq_has_sleeper (see + * comments for wq_has_sleeper). + * + * This barrier will now guarantee ep_poll_callback or f_op->poll + * (or both) will notice the readiness of an item. + */ + smp_mb(); + + /* * Get current event bits. We can safely use the file* here because * its usage count has been increased by the caller of this function. */ diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 26af22832a84..5ae1674ec12f 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2226,13 +2226,14 @@ errout: * removes index from the index block. */ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, - struct ext4_ext_path *path) + struct ext4_ext_path *path, int depth) { int err; ext4_fsblk_t leaf; /* free index block */ - path--; + depth--; + path = path + depth; leaf = ext4_idx_pblock(path->p_idx); if (unlikely(path->p_hdr->eh_entries == 0)) { EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0"); @@ -2257,6 +2258,19 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, ext4_free_blocks(handle, inode, NULL, leaf, 1, EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); + + while (--depth >= 0) { + if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr)) + break; + path--; + err = ext4_ext_get_access(handle, inode, path); + if (err) + break; + path->p_idx->ei_block = (path+1)->p_idx->ei_block; + err = ext4_ext_dirty(handle, inode, path); + if (err) + break; + } return err; } @@ -2599,7 +2613,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, /* if this leaf is free, then we should * remove it from index block above */ if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL) - err = ext4_ext_rm_idx(handle, inode, path + depth); + err = ext4_ext_rm_idx(handle, inode, path, depth); out: return err; @@ -2802,7 +2816,7 @@ again: /* index is empty, remove it; * handle must be already prepared by the * truncatei_leaf() */ - err = ext4_ext_rm_idx(handle, inode, path + i); + err = ext4_ext_rm_idx(handle, inode, path, i); } /* root level has p_bh == NULL, brelse() eats this */ brelse(path[i].p_bh); diff --git a/fs/ext4/file.c b/fs/ext4/file.c index d07c27ca594a..405565a62277 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -108,14 +108,6 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov, /* Unaligned direct AIO must be serialized; see comment above */ if (unaligned_aio) { - static unsigned long unaligned_warn_time; - - /* Warn about this once per day */ - if (printk_timed_ratelimit(&unaligned_warn_time, 60*60*24*HZ)) - ext4_msg(inode->i_sb, KERN_WARNING, - "Unaligned AIO/DIO on inode %ld by %s; " - "performance will be poor.", - inode->i_ino, current->comm); mutex_lock(ext4_aio_mutex(inode)); ext4_unwritten_wait(inode); } diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index dfbc1fe96674..3278e64e57b6 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -109,8 +109,6 @@ static int __sync_inode(struct inode *inode, int datasync) * * What we do is just kick off a commit and wait on it. This will snapshot the * inode to disk. - * - * i_mutex lock is held when entering and exiting this function */ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index cb1c1ab2720b..cbfe13bf5b2a 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2880,8 +2880,6 @@ static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offs static void ext4_invalidatepage(struct page *page, unsigned long offset) { - journal_t *journal = EXT4_JOURNAL(page->mapping->host); - trace_ext4_invalidatepage(page, offset); /* @@ -2889,16 +2887,34 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset) */ if (ext4_should_dioread_nolock(page->mapping->host)) ext4_invalidatepage_free_endio(page, offset); + + /* No journalling happens on data buffers when this function is used */ + WARN_ON(page_has_buffers(page) && buffer_jbd(page_buffers(page))); + + block_invalidatepage(page, offset); +} + +static int __ext4_journalled_invalidatepage(struct page *page, + unsigned long offset) +{ + journal_t *journal = EXT4_JOURNAL(page->mapping->host); + + trace_ext4_journalled_invalidatepage(page, offset); + /* * If it's a full truncate we just forget about the pending dirtying */ if (offset == 0) ClearPageChecked(page); - if (journal) - jbd2_journal_invalidatepage(journal, page, offset); - else - block_invalidatepage(page, offset); + return jbd2_journal_invalidatepage(journal, page, offset); +} + +/* Wrapper for aops... */ +static void ext4_journalled_invalidatepage(struct page *page, + unsigned long offset) +{ + WARN_ON(__ext4_journalled_invalidatepage(page, offset) < 0); } static int ext4_releasepage(struct page *page, gfp_t wait) @@ -3264,7 +3280,7 @@ static const struct address_space_operations ext4_journalled_aops = { .write_end = ext4_journalled_write_end, .set_page_dirty = ext4_journalled_set_page_dirty, .bmap = ext4_bmap, - .invalidatepage = ext4_invalidatepage, + .invalidatepage = ext4_journalled_invalidatepage, .releasepage = ext4_releasepage, .direct_IO = ext4_direct_IO, .is_partially_uptodate = block_is_partially_uptodate, @@ -4305,6 +4321,47 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) } /* + * In data=journal mode ext4_journalled_invalidatepage() may fail to invalidate + * buffers that are attached to a page stradding i_size and are undergoing + * commit. In that case we have to wait for commit to finish and try again. + */ +static void ext4_wait_for_tail_page_commit(struct inode *inode) +{ + struct page *page; + unsigned offset; + journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; + tid_t commit_tid = 0; + int ret; + + offset = inode->i_size & (PAGE_CACHE_SIZE - 1); + /* + * All buffers in the last page remain valid? Then there's nothing to + * do. We do the check mainly to optimize the common PAGE_CACHE_SIZE == + * blocksize case + */ + if (offset > PAGE_CACHE_SIZE - (1 << inode->i_blkbits)) + return; + while (1) { + page = find_lock_page(inode->i_mapping, + inode->i_size >> PAGE_CACHE_SHIFT); + if (!page) + return; + ret = __ext4_journalled_invalidatepage(page, offset); + unlock_page(page); + page_cache_release(page); + if (ret != -EBUSY) + return; + commit_tid = 0; + read_lock(&journal->j_state_lock); + if (journal->j_committing_transaction) + commit_tid = journal->j_committing_transaction->t_tid; + read_unlock(&journal->j_state_lock); + if (commit_tid) + jbd2_log_wait_commit(journal, commit_tid); + } +} + +/* * ext4_setattr() * * Called from notify_change. @@ -4417,16 +4474,28 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) } if (attr->ia_valid & ATTR_SIZE) { - if (attr->ia_size != i_size_read(inode)) { - truncate_setsize(inode, attr->ia_size); - /* Inode size will be reduced, wait for dio in flight. - * Temporarily disable dioread_nolock to prevent - * livelock. */ + if (attr->ia_size != inode->i_size) { + loff_t oldsize = inode->i_size; + + i_size_write(inode, attr->ia_size); + /* + * Blocks are going to be removed from the inode. Wait + * for dio in flight. Temporarily disable + * dioread_nolock to prevent livelock. + */ if (orphan) { - ext4_inode_block_unlocked_dio(inode); - inode_dio_wait(inode); - ext4_inode_resume_unlocked_dio(inode); + if (!ext4_should_journal_data(inode)) { + ext4_inode_block_unlocked_dio(inode); + inode_dio_wait(inode); + ext4_inode_resume_unlocked_dio(inode); + } else + ext4_wait_for_tail_page_commit(inode); } + /* + * Truncate pagecache after we've waited for commit + * in data=journal mode to make pages freeable. + */ + truncate_pagecache(inode, oldsize, inode->i_size); } ext4_truncate(inode); } diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index cac448282331..8990165346ee 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2648,7 +2648,8 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) struct ext4_iloc iloc; int err = 0; - if (!EXT4_SB(inode->i_sb)->s_journal) + if ((!EXT4_SB(inode->i_sb)->s_journal) && + !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) return 0; mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 3cdb0a2fc648..3d4fb81bacd5 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1645,9 +1645,7 @@ static int parse_options(char *options, struct super_block *sb, unsigned int *journal_ioprio, int is_remount) { -#ifdef CONFIG_QUOTA struct ext4_sb_info *sbi = EXT4_SB(sb); -#endif char *p; substring_t args[MAX_OPT_ARGS]; int token; @@ -1696,6 +1694,16 @@ static int parse_options(char *options, struct super_block *sb, } } #endif + if (test_opt(sb, DIOREAD_NOLOCK)) { + int blocksize = + BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); + + if (blocksize < PAGE_CACHE_SIZE) { + ext4_msg(sb, KERN_ERR, "can't mount with " + "dioread_nolock if block size != PAGE_SIZE"); + return 0; + } + } return 1; } @@ -2212,7 +2220,9 @@ static void ext4_orphan_cleanup(struct super_block *sb, __func__, inode->i_ino, inode->i_size); jbd_debug(2, "truncating inode %lu to %lld bytes\n", inode->i_ino, inode->i_size); + mutex_lock(&inode->i_mutex); ext4_truncate(inode); + mutex_unlock(&inode->i_mutex); nr_truncates++; } else { ext4_msg(sb, KERN_DEBUG, @@ -3223,6 +3233,10 @@ int ext4_calculate_overhead(struct super_block *sb) memset(buf, 0, PAGE_SIZE); cond_resched(); } + /* Add the journal blocks as well */ + if (sbi->s_journal) + overhead += EXT4_B2C(sbi, sbi->s_journal->j_maxlen); + sbi->s_overhead = overhead; smp_wmb(); free_page((unsigned long) buf); @@ -3436,15 +3450,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) clear_opt(sb, DELALLOC); } - blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); - if (test_opt(sb, DIOREAD_NOLOCK)) { - if (blocksize < PAGE_SIZE) { - ext4_msg(sb, KERN_ERR, "can't mount with " - "dioread_nolock if block size != PAGE_SIZE"); - goto failed_mount; - } - } - sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); @@ -3486,6 +3491,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY))) goto failed_mount; + blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); if (blocksize < EXT4_MIN_BLOCK_SIZE || blocksize > EXT4_MAX_BLOCK_SIZE) { ext4_msg(sb, KERN_ERR, @@ -4725,7 +4731,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) } ext4_setup_system_zone(sb); - if (sbi->s_journal == NULL) + if (sbi->s_journal == NULL && !(old_sb_flags & MS_RDONLY)) ext4_commit_super(sb, 1); #ifdef CONFIG_QUOTA diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index fed74d193ffb..e95b94945d5f 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -82,7 +82,6 @@ static struct posix_acl *f2fs_acl_from_disk(const char *value, size_t size) case ACL_GROUP_OBJ: case ACL_MASK: case ACL_OTHER: - acl->a_entries[i].e_id = ACL_UNDEFINED_ID; entry = (struct f2fs_acl_entry *)((char *)entry + sizeof(struct f2fs_acl_entry_short)); break; diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 42f6615af0ac..df9f29760efa 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -209,7 +209,8 @@ repeat: if (!new_transaction) goto alloc_transaction; write_lock(&journal->j_state_lock); - if (!journal->j_running_transaction) { + if (!journal->j_running_transaction && + !journal->j_barrier_count) { jbd2_get_transaction(journal, new_transaction); new_transaction = NULL; } @@ -1839,7 +1840,6 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh, BUFFER_TRACE(bh, "entry"); -retry: /* * It is safe to proceed here without the j_list_lock because the * buffers cannot be stolen by try_to_free_buffers as long as we are @@ -1934,14 +1934,11 @@ retry: * for commit and try again. */ if (partial_page) { - tid_t tid = journal->j_committing_transaction->t_tid; - jbd2_journal_put_journal_head(jh); spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); write_unlock(&journal->j_state_lock); - jbd2_log_wait_commit(journal, tid); - goto retry; + return -EBUSY; } /* * OK, buffer won't be reachable after truncate. We just set @@ -2002,21 +1999,23 @@ zap_buffer_unlocked: * @page: page to flush * @offset: length of page to invalidate. * - * Reap page buffers containing data after offset in page. - * + * Reap page buffers containing data after offset in page. Can return -EBUSY + * if buffers are part of the committing transaction and the page is straddling + * i_size. Caller then has to wait for current commit and try again. */ -void jbd2_journal_invalidatepage(journal_t *journal, - struct page *page, - unsigned long offset) +int jbd2_journal_invalidatepage(journal_t *journal, + struct page *page, + unsigned long offset) { struct buffer_head *head, *bh, *next; unsigned int curr_off = 0; int may_free = 1; + int ret = 0; if (!PageLocked(page)) BUG(); if (!page_has_buffers(page)) - return; + return 0; /* We will potentially be playing with lists other than just the * data lists (especially for journaled data mode), so be @@ -2030,9 +2029,11 @@ void jbd2_journal_invalidatepage(journal_t *journal, if (offset <= curr_off) { /* This block is wholly outside the truncation point */ lock_buffer(bh); - may_free &= journal_unmap_buffer(journal, bh, - offset > 0); + ret = journal_unmap_buffer(journal, bh, offset > 0); unlock_buffer(bh); + if (ret < 0) + return ret; + may_free &= ret; } curr_off = next_off; bh = next; @@ -2043,6 +2044,7 @@ void jbd2_journal_invalidatepage(journal_t *journal, if (may_free && try_to_free_buffers(page)) J_ASSERT(!page_has_buffers(page)); } + return 0; } /* diff --git a/fs/proc/generic.c b/fs/proc/generic.c index e064f562b1f7..76ddae83daa5 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -352,18 +352,18 @@ retry: if (!ida_pre_get(&proc_inum_ida, GFP_KERNEL)) return -ENOMEM; - spin_lock_bh(&proc_inum_lock); + spin_lock_irq(&proc_inum_lock); error = ida_get_new(&proc_inum_ida, &i); - spin_unlock_bh(&proc_inum_lock); + spin_unlock_irq(&proc_inum_lock); if (error == -EAGAIN) goto retry; else if (error) return error; if (i > UINT_MAX - PROC_DYNAMIC_FIRST) { - spin_lock_bh(&proc_inum_lock); + spin_lock_irq(&proc_inum_lock); ida_remove(&proc_inum_ida, i); - spin_unlock_bh(&proc_inum_lock); + spin_unlock_irq(&proc_inum_lock); return -ENOSPC; } *inum = PROC_DYNAMIC_FIRST + i; @@ -372,9 +372,10 @@ retry: void proc_free_inum(unsigned int inum) { - spin_lock_bh(&proc_inum_lock); + unsigned long flags; + spin_lock_irqsave(&proc_inum_lock, flags); ida_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST); - spin_unlock_bh(&proc_inum_lock); + spin_unlock_irqrestore(&proc_inum_lock, flags); } static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 448455b7fd91..ca5ce7f9f800 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1278,7 +1278,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) walk.mm = mm; pol = get_vma_policy(task, vma, vma->vm_start); - mpol_to_str(buffer, sizeof(buffer), pol, 0); + mpol_to_str(buffer, sizeof(buffer), pol); mpol_cond_put(pol); seq_printf(m, "%08lx %s", vma->vm_start, buffer); |