diff options
Diffstat (limited to 'fs/inode.c')
-rw-r--r-- | fs/inode.c | 136 |
1 files changed, 48 insertions, 88 deletions
diff --git a/fs/inode.c b/fs/inode.c index 4be128cbc754..a48fa5355fb4 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -33,8 +33,8 @@ * * inode->i_lock protects: * inode->i_state, inode->i_hash, __iget() - * inode_lru_lock protects: - * inode_lru, inode->i_lru + * inode->i_sb->s_inode_lru_lock protects: + * inode->i_sb->s_inode_lru, inode->i_lru * inode_sb_list_lock protects: * sb->s_inodes, inode->i_sb_list * bdi->wb.list_lock protects: @@ -46,7 +46,7 @@ * * inode_sb_list_lock * inode->i_lock - * inode_lru_lock + * inode->i_sb->s_inode_lru_lock * * bdi->wb.list_lock * inode->i_lock @@ -64,23 +64,9 @@ static unsigned int i_hash_shift __read_mostly; static struct hlist_head *inode_hashtable __read_mostly; static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock); -static LIST_HEAD(inode_lru); -static DEFINE_SPINLOCK(inode_lru_lock); - __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock); /* - * iprune_sem provides exclusion between the icache shrinking and the - * umount path. - * - * We don't actually need it to protect anything in the umount path, - * but only need to cycle through it to make sure any inode that - * prune_icache took off the LRU list has been fully torn down by the - * time we are past evict_inodes. - */ -static DECLARE_RWSEM(iprune_sem); - -/* * Empty aops. Can be used for the cases where the user does not * define any of the address_space operations. */ @@ -94,6 +80,7 @@ EXPORT_SYMBOL(empty_aops); struct inodes_stat_t inodes_stat; static DEFINE_PER_CPU(unsigned int, nr_inodes); +static DEFINE_PER_CPU(unsigned int, nr_unused); static struct kmem_cache *inode_cachep __read_mostly; @@ -108,7 +95,11 @@ static int get_nr_inodes(void) static inline int get_nr_inodes_unused(void) { - return inodes_stat.nr_unused; + int i; + int sum = 0; + for_each_possible_cpu(i) + sum += per_cpu(nr_unused, i); + return sum < 0 ? 0 : sum; } int get_nr_dirty_inodes(void) @@ -126,6 +117,7 @@ int proc_nr_inodes(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { inodes_stat.nr_inodes = get_nr_inodes(); + inodes_stat.nr_unused = get_nr_inodes_unused(); return proc_dointvec(table, write, buffer, lenp, ppos); } #endif @@ -175,8 +167,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) mutex_init(&inode->i_mutex); lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key); - init_rwsem(&inode->i_alloc_sem); - lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key); + atomic_set(&inode->i_dio_count, 0); mapping->a_ops = &empty_aops; mapping->host = inode; @@ -336,22 +327,24 @@ EXPORT_SYMBOL(ihold); static void inode_lru_list_add(struct inode *inode) { - spin_lock(&inode_lru_lock); + spin_lock(&inode->i_sb->s_inode_lru_lock); if (list_empty(&inode->i_lru)) { - list_add(&inode->i_lru, &inode_lru); - inodes_stat.nr_unused++; + list_add(&inode->i_lru, &inode->i_sb->s_inode_lru); + inode->i_sb->s_nr_inodes_unused++; + this_cpu_inc(nr_unused); } - spin_unlock(&inode_lru_lock); + spin_unlock(&inode->i_sb->s_inode_lru_lock); } static void inode_lru_list_del(struct inode *inode) { - spin_lock(&inode_lru_lock); + spin_lock(&inode->i_sb->s_inode_lru_lock); if (!list_empty(&inode->i_lru)) { list_del_init(&inode->i_lru); - inodes_stat.nr_unused--; + inode->i_sb->s_nr_inodes_unused--; + this_cpu_dec(nr_unused); } - spin_unlock(&inode_lru_lock); + spin_unlock(&inode->i_sb->s_inode_lru_lock); } /** @@ -422,7 +415,14 @@ EXPORT_SYMBOL(remove_inode_hash); void end_writeback(struct inode *inode) { might_sleep(); + /* + * We have to cycle tree_lock here because reclaim can be still in the + * process of removing the last page (in __delete_from_page_cache()) + * and we must not free mapping under it. + */ + spin_lock_irq(&inode->i_data.tree_lock); BUG_ON(inode->i_data.nrpages); + spin_unlock_irq(&inode->i_data.tree_lock); BUG_ON(!list_empty(&inode->i_data.private_list)); BUG_ON(!(inode->i_state & I_FREEING)); BUG_ON(inode->i_state & I_CLEAR); @@ -529,14 +529,6 @@ void evict_inodes(struct super_block *sb) spin_unlock(&inode_sb_list_lock); dispose_list(&dispose); - - /* - * Cycle through iprune_sem to make sure any inode that prune_icache - * moved off the list before we took the lock has been fully torn - * down. - */ - down_write(&iprune_sem); - up_write(&iprune_sem); } /** @@ -599,8 +591,10 @@ static int can_unuse(struct inode *inode) } /* - * Scan `goal' inodes on the unused list for freeable ones. They are moved to a - * temporary list and then are freed outside inode_lru_lock by dispose_list(). + * Walk the superblock inode LRU for freeable inodes and attempt to free them. + * This is called from the superblock shrinker function with a number of inodes + * to trim from the LRU. Inodes to be freed are moved to a temporary list and + * then are freed outside inode_lock by dispose_list(). * * Any inodes which are pinned purely because of attached pagecache have their * pagecache removed. If the inode has metadata buffers attached to @@ -614,29 +608,28 @@ static int can_unuse(struct inode *inode) * LRU does not have strict ordering. Hence we don't want to reclaim inodes * with this flag set because they are the inodes that are out of order. */ -static void prune_icache(int nr_to_scan) +void prune_icache_sb(struct super_block *sb, int nr_to_scan) { LIST_HEAD(freeable); int nr_scanned; unsigned long reap = 0; - down_read(&iprune_sem); - spin_lock(&inode_lru_lock); - for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { + spin_lock(&sb->s_inode_lru_lock); + for (nr_scanned = nr_to_scan; nr_scanned >= 0; nr_scanned--) { struct inode *inode; - if (list_empty(&inode_lru)) + if (list_empty(&sb->s_inode_lru)) break; - inode = list_entry(inode_lru.prev, struct inode, i_lru); + inode = list_entry(sb->s_inode_lru.prev, struct inode, i_lru); /* - * we are inverting the inode_lru_lock/inode->i_lock here, + * we are inverting the sb->s_inode_lru_lock/inode->i_lock here, * so use a trylock. If we fail to get the lock, just move the * inode to the back of the list so we don't spin on it. */ if (!spin_trylock(&inode->i_lock)) { - list_move(&inode->i_lru, &inode_lru); + list_move(&inode->i_lru, &sb->s_inode_lru); continue; } @@ -648,28 +641,29 @@ static void prune_icache(int nr_to_scan) (inode->i_state & ~I_REFERENCED)) { list_del_init(&inode->i_lru); spin_unlock(&inode->i_lock); - inodes_stat.nr_unused--; + sb->s_nr_inodes_unused--; + this_cpu_dec(nr_unused); continue; } /* recently referenced inodes get one more pass */ if (inode->i_state & I_REFERENCED) { inode->i_state &= ~I_REFERENCED; - list_move(&inode->i_lru, &inode_lru); + list_move(&inode->i_lru, &sb->s_inode_lru); spin_unlock(&inode->i_lock); continue; } if (inode_has_buffers(inode) || inode->i_data.nrpages) { __iget(inode); spin_unlock(&inode->i_lock); - spin_unlock(&inode_lru_lock); + spin_unlock(&sb->s_inode_lru_lock); if (remove_inode_buffers(inode)) reap += invalidate_mapping_pages(&inode->i_data, 0, -1); iput(inode); - spin_lock(&inode_lru_lock); + spin_lock(&sb->s_inode_lru_lock); - if (inode != list_entry(inode_lru.next, + if (inode != list_entry(sb->s_inode_lru.next, struct inode, i_lru)) continue; /* wrong inode or list_empty */ /* avoid lock inversions with trylock */ @@ -685,51 +679,18 @@ static void prune_icache(int nr_to_scan) spin_unlock(&inode->i_lock); list_move(&inode->i_lru, &freeable); - inodes_stat.nr_unused--; + sb->s_nr_inodes_unused--; + this_cpu_dec(nr_unused); } if (current_is_kswapd()) __count_vm_events(KSWAPD_INODESTEAL, reap); else __count_vm_events(PGINODESTEAL, reap); - spin_unlock(&inode_lru_lock); + spin_unlock(&sb->s_inode_lru_lock); dispose_list(&freeable); - up_read(&iprune_sem); } -/* - * shrink_icache_memory() will attempt to reclaim some unused inodes. Here, - * "unused" means that no dentries are referring to the inodes: the files are - * not open and the dcache references to those inodes have already been - * reclaimed. - * - * This function is passed the number of inodes to scan, and it returns the - * total number of remaining possibly-reclaimable inodes. - */ -static int shrink_icache_memory(struct shrinker *shrink, - struct shrink_control *sc) -{ - int nr = sc->nr_to_scan; - gfp_t gfp_mask = sc->gfp_mask; - - if (nr) { - /* - * Nasty deadlock avoidance. We may hold various FS locks, - * and we don't want to recurse into the FS that called us - * in clear_inode() and friends.. - */ - if (!(gfp_mask & __GFP_FS)) - return -1; - prune_icache(nr); - } - return (get_nr_inodes_unused() / 100) * sysctl_vfs_cache_pressure; -} - -static struct shrinker icache_shrinker = { - .shrink = shrink_icache_memory, - .seeks = DEFAULT_SEEKS, -}; - static void __wait_on_freeing_inode(struct inode *inode); /* * Called with the inode lock held. @@ -1323,7 +1284,7 @@ static void iput_final(struct inode *inode) WARN_ON(inode->i_state & I_NEW); - if (op && op->drop_inode) + if (op->drop_inode) drop = op->drop_inode(inode); else drop = generic_drop_inode(inode); @@ -1609,7 +1570,6 @@ void __init inode_init(void) (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| SLAB_MEM_SPREAD), init_once); - register_shrinker(&icache_shrinker); /* Hash may have been set up in inode_init_early */ if (!hashdist) |