From 44396f4b5cb8566f7118aec55eeac99be7ad94cb Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 31 May 2011 11:58:49 -0400 Subject: fs: add a DCACHE_NEED_LOOKUP flag for d_flags Btrfs (and I'd venture most other fs's) stores its indexes in nice disk order for readdir, but unfortunately in the case of anything that stats the files in order that readdir spits back (like oh say ls) that means we still have to do the normal lookup of the file, which means looking up our other index and then looking up the inode. What I want is a way to create dummy dentries when we find them in readdir so that when ls or anything else subsequently does a stat(), we already have the location information in the dentry and can go straight to the inode itself. The lookup stuff just assumes that if it finds a dentry it is done, it doesn't perform a lookup. So add a DCACHE_NEED_LOOKUP flag so that the lookup code knows it still needs to run i_op->lookup() on the parent to get the inode for the dentry. I have tested this with btrfs and I went from something that looks like this http://people.redhat.com/jwhiter/ls-noreada.png To this http://people.redhat.com/jwhiter/ls-good.png Thats a savings of 1300 seconds, or 22 minutes. That is a significant savings. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Al Viro --- fs/dcache.c | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 6e4ea6d87774..d3902139b533 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -343,6 +343,24 @@ void d_drop(struct dentry *dentry) } EXPORT_SYMBOL(d_drop); +/* + * d_clear_need_lookup - drop a dentry from cache and clear the need lookup flag + * @dentry: dentry to drop + * + * This is called when we do a lookup on a placeholder dentry that needed to be + * looked up. The dentry should have been hashed in order for it to be found by + * the lookup code, but now needs to be unhashed while we do the actual lookup + * and clear the DCACHE_NEED_LOOKUP flag. + */ +void d_clear_need_lookup(struct dentry *dentry) +{ + spin_lock(&dentry->d_lock); + __d_drop(dentry); + dentry->d_flags &= ~DCACHE_NEED_LOOKUP; + spin_unlock(&dentry->d_lock); +} +EXPORT_SYMBOL(d_clear_need_lookup); + /* * Finish off a dentry we've decided to kill. * dentry->d_lock must be held, returns with it unlocked. @@ -432,8 +450,13 @@ repeat: if (d_unhashed(dentry)) goto kill_it; - /* Otherwise leave it cached and ensure it's on the LRU */ - dentry->d_flags |= DCACHE_REFERENCED; + /* + * If this dentry needs lookup, don't set the referenced flag so that it + * is more likely to be cleaned up by the dcache shrinker in case of + * memory pressure. + */ + if (!d_need_lookup(dentry)) + dentry->d_flags |= DCACHE_REFERENCED; dentry_lru_add(dentry); dentry->d_count--; @@ -1707,6 +1730,13 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, return found; } + /* + * We are going to instantiate this dentry, unhash it and clear the + * lookup flag so we can do that. + */ + if (unlikely(d_need_lookup(found))) + d_clear_need_lookup(found); + /* * Negative dentry: instantiate it unless the inode is a directory and * already has a dentry. -- cgit v1.2.1 From a4464dbc0ca6a3ab8e9d1206bc05059dae2a559d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 7 Jul 2011 15:03:58 -0400 Subject: Make ->d_sb assign-once and always non-NULL New helper (non-exported, fs/internal.h-only): __d_alloc(sb, name). Allocates dentry, sets its ->d_sb to given superblock and sets ->d_op accordingly. Old d_alloc(NULL, name) callers are converted to that (all of them know what superblock they want). d_alloc() itself is left only for parent != NULl case; uses __d_alloc(), inserts result into the list of parent's children. Note that now ->d_sb is assign-once and never NULL *and* ->d_parent is never NULL either. Signed-off-by: Al Viro --- fs/dcache.c | 75 ++++++++++++++++++++++++++++++++----------------------------- 1 file changed, 39 insertions(+), 36 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index d3902139b533..c61edd0318c3 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1275,8 +1275,8 @@ static struct shrinker dcache_shrinker = { }; /** - * d_alloc - allocate a dcache entry - * @parent: parent of entry to allocate + * __d_alloc - allocate a dcache entry + * @sb: filesystem it will belong to * @name: qstr of the name * * Allocates a dentry. It returns %NULL if there is insufficient memory @@ -1284,7 +1284,7 @@ static struct shrinker dcache_shrinker = { * copied and the copy passed in may be reused after this call. */ -struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) +struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) { struct dentry *dentry; char *dname; @@ -1314,8 +1314,8 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) spin_lock_init(&dentry->d_lock); seqcount_init(&dentry->d_seq); dentry->d_inode = NULL; - dentry->d_parent = NULL; - dentry->d_sb = NULL; + dentry->d_parent = dentry; + dentry->d_sb = sb; dentry->d_op = NULL; dentry->d_fsdata = NULL; INIT_HLIST_BL_NODE(&dentry->d_hash); @@ -1323,36 +1323,47 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) INIT_LIST_HEAD(&dentry->d_subdirs); INIT_LIST_HEAD(&dentry->d_alias); INIT_LIST_HEAD(&dentry->d_u.d_child); - - if (parent) { - spin_lock(&parent->d_lock); - /* - * don't need child lock because it is not subject - * to concurrency here - */ - __dget_dlock(parent); - dentry->d_parent = parent; - dentry->d_sb = parent->d_sb; - d_set_d_op(dentry, dentry->d_sb->s_d_op); - list_add(&dentry->d_u.d_child, &parent->d_subdirs); - spin_unlock(&parent->d_lock); - } + d_set_d_op(dentry, dentry->d_sb->s_d_op); this_cpu_inc(nr_dentry); return dentry; } + +/** + * d_alloc - allocate a dcache entry + * @parent: parent of entry to allocate + * @name: qstr of the name + * + * Allocates a dentry. It returns %NULL if there is insufficient memory + * available. On a success the dentry is returned. The name passed in is + * copied and the copy passed in may be reused after this call. + */ +struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) +{ + struct dentry *dentry = __d_alloc(parent->d_sb, name); + if (!dentry) + return NULL; + + spin_lock(&parent->d_lock); + /* + * don't need child lock because it is not subject + * to concurrency here + */ + __dget_dlock(parent); + dentry->d_parent = parent; + list_add(&dentry->d_u.d_child, &parent->d_subdirs); + spin_unlock(&parent->d_lock); + + return dentry; +} EXPORT_SYMBOL(d_alloc); struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name) { - struct dentry *dentry = d_alloc(NULL, name); - if (dentry) { - dentry->d_sb = sb; - d_set_d_op(dentry, dentry->d_sb->s_d_op); - dentry->d_parent = dentry; + struct dentry *dentry = __d_alloc(sb, name); + if (dentry) dentry->d_flags |= DCACHE_DISCONNECTED; - } return dentry; } EXPORT_SYMBOL(d_alloc_pseudo); @@ -1522,13 +1533,9 @@ struct dentry * d_alloc_root(struct inode * root_inode) if (root_inode) { static const struct qstr name = { .name = "/", .len = 1 }; - res = d_alloc(NULL, &name); - if (res) { - res->d_sb = root_inode->i_sb; - d_set_d_op(res, res->d_sb->s_d_op); - res->d_parent = res; + res = __d_alloc(root_inode->i_sb, &name); + if (res) d_instantiate(res, root_inode); - } } return res; } @@ -1589,13 +1596,11 @@ struct dentry *d_obtain_alias(struct inode *inode) if (res) goto out_iput; - tmp = d_alloc(NULL, &anonstring); + tmp = __d_alloc(inode->i_sb, &anonstring); if (!tmp) { res = ERR_PTR(-ENOMEM); goto out_iput; } - tmp->d_parent = tmp; /* make sure dput doesn't croak */ - spin_lock(&inode->i_lock); res = __d_find_any_alias(inode); @@ -1607,8 +1612,6 @@ struct dentry *d_obtain_alias(struct inode *inode) /* attach a disconnected dentry */ spin_lock(&tmp->d_lock); - tmp->d_sb = inode->i_sb; - d_set_d_op(tmp, tmp->d_sb->s_d_op); tmp->d_inode = inode; tmp->d_flags |= DCACHE_DISCONNECTED; list_add(&tmp->d_alias, &inode->i_dentry); -- cgit v1.2.1 From a9049376ee05bf966bfe2b081b5071326856890a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 8 Jul 2011 21:20:11 -0400 Subject: make d_splice_alias(ERR_PTR(err), dentry) = ERR_PTR(err) ... and simplify the living hell out of callers Signed-off-by: Al Viro --- fs/dcache.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index c61edd0318c3..41e2085d430b 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1652,6 +1652,9 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) { struct dentry *new = NULL; + if (IS_ERR(inode)) + return ERR_CAST(inode); + if (inode && S_ISDIR(inode->i_mode)) { spin_lock(&inode->i_lock); new = __d_find_alias(inode, 1); -- cgit v1.2.1 From b0d40c92adafde7c2d81203ce7c1c69275f41140 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Fri, 8 Jul 2011 14:14:42 +1000 Subject: superblock: introduce per-sb cache shrinker infrastructure With context based shrinkers, we can implement a per-superblock shrinker that shrinks the caches attached to the superblock. We currently have global shrinkers for the inode and dentry caches that split up into per-superblock operations via a coarse proportioning method that does not batch very well. The global shrinkers also have a dependency - dentries pin inodes - so we have to be very careful about how we register the global shrinkers so that the implicit call order is always correct. With a per-sb shrinker callout, we can encode this dependency directly into the per-sb shrinker, hence avoiding the need for strictly ordering shrinker registrations. We also have no need for any proportioning code for the shrinker subsystem already provides this functionality across all shrinkers. Allowing the shrinker to operate on a single superblock at a time means that we do less superblock list traversals and locking and reclaim should batch more effectively. This should result in less CPU overhead for reclaim and potentially faster reclaim of items from each filesystem. Signed-off-by: Dave Chinner Signed-off-by: Al Viro --- fs/dcache.c | 121 ++++++------------------------------------------------------ 1 file changed, 12 insertions(+), 109 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 41e2085d430b..2762804a140d 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -743,13 +743,11 @@ static void shrink_dentry_list(struct list_head *list) * * If flags contains DCACHE_REFERENCED reference dentries will not be pruned. */ -static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags) +static void __shrink_dcache_sb(struct super_block *sb, int count, int flags) { - /* called from prune_dcache() and shrink_dcache_parent() */ struct dentry *dentry; LIST_HEAD(referenced); LIST_HEAD(tmp); - int cnt = *count; relock: spin_lock(&dcache_lru_lock); @@ -777,7 +775,7 @@ relock: } else { list_move_tail(&dentry->d_lru, &tmp); spin_unlock(&dentry->d_lock); - if (!--cnt) + if (!--count) break; } cond_resched_lock(&dcache_lru_lock); @@ -787,83 +785,22 @@ relock: spin_unlock(&dcache_lru_lock); shrink_dentry_list(&tmp); - - *count = cnt; } /** - * prune_dcache - shrink the dcache - * @count: number of entries to try to free + * prune_dcache_sb - shrink the dcache + * @nr_to_scan: number of entries to try to free * - * Shrink the dcache. This is done when we need more memory, or simply when we - * need to unmount something (at which point we need to unuse all dentries). + * Attempt to shrink the superblock dcache LRU by @nr_to_scan entries. This is + * done when we need more memory an called from the superblock shrinker + * function. * - * This function may fail to free any resources if all the dentries are in use. + * This function may fail to free any resources if all the dentries are in + * use. */ -static void prune_dcache(int count) +void prune_dcache_sb(struct super_block *sb, int nr_to_scan) { - struct super_block *sb, *p = NULL; - int w_count; - int unused = dentry_stat.nr_unused; - int prune_ratio; - int pruned; - - if (unused == 0 || count == 0) - return; - if (count >= unused) - prune_ratio = 1; - else - prune_ratio = unused / count; - spin_lock(&sb_lock); - list_for_each_entry(sb, &super_blocks, s_list) { - if (list_empty(&sb->s_instances)) - continue; - if (sb->s_nr_dentry_unused == 0) - continue; - sb->s_count++; - /* Now, we reclaim unused dentrins with fairness. - * We reclaim them same percentage from each superblock. - * We calculate number of dentries to scan on this sb - * as follows, but the implementation is arranged to avoid - * overflows: - * number of dentries to scan on this sb = - * count * (number of dentries on this sb / - * number of dentries in the machine) - */ - spin_unlock(&sb_lock); - if (prune_ratio != 1) - w_count = (sb->s_nr_dentry_unused / prune_ratio) + 1; - else - w_count = sb->s_nr_dentry_unused; - pruned = w_count; - /* - * We need to be sure this filesystem isn't being unmounted, - * otherwise we could race with generic_shutdown_super(), and - * end up holding a reference to an inode while the filesystem - * is unmounted. So we try to get s_umount, and make sure - * s_root isn't NULL. - */ - if (down_read_trylock(&sb->s_umount)) { - if ((sb->s_root != NULL) && - (!list_empty(&sb->s_dentry_lru))) { - __shrink_dcache_sb(sb, &w_count, - DCACHE_REFERENCED); - pruned -= w_count; - } - up_read(&sb->s_umount); - } - spin_lock(&sb_lock); - if (p) - __put_super(p); - count -= pruned; - p = sb; - /* more work left to do? */ - if (count <= 0) - break; - } - if (p) - __put_super(p); - spin_unlock(&sb_lock); + __shrink_dcache_sb(sb, nr_to_scan, DCACHE_REFERENCED); } /** @@ -1238,42 +1175,10 @@ void shrink_dcache_parent(struct dentry * parent) int found; while ((found = select_parent(parent)) != 0) - __shrink_dcache_sb(sb, &found, 0); + __shrink_dcache_sb(sb, found, 0); } EXPORT_SYMBOL(shrink_dcache_parent); -/* - * Scan `sc->nr_slab_to_reclaim' dentries and return the number which remain. - * - * We need to avoid reentering the filesystem if the caller is performing a - * GFP_NOFS allocation attempt. One example deadlock is: - * - * ext2_new_block->getblk->GFP->shrink_dcache_memory->prune_dcache-> - * prune_one_dentry->dput->dentry_iput->iput->inode->i_sb->s_op->put_inode-> - * ext2_discard_prealloc->ext2_free_blocks->lock_super->DEADLOCK. - * - * In this case we return -1 to tell the caller that we baled. - */ -static int shrink_dcache_memory(struct shrinker *shrink, - struct shrink_control *sc) -{ - int nr = sc->nr_to_scan; - gfp_t gfp_mask = sc->gfp_mask; - - if (nr) { - if (!(gfp_mask & __GFP_FS)) - return -1; - prune_dcache(nr); - } - - return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; -} - -static struct shrinker dcache_shrinker = { - .shrink = shrink_dcache_memory, - .seeks = DEFAULT_SEEKS, -}; - /** * __d_alloc - allocate a dcache entry * @sb: filesystem it will belong to @@ -3083,8 +2988,6 @@ static void __init dcache_init(void) */ dentry_cache = KMEM_CACHE(dentry, SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD); - - register_shrinker(&dcache_shrinker); /* Hash may have been set up in dcache_init_early */ if (!hashdist) -- cgit v1.2.1 From 4513d899c418ff69052420e29e354e4c64b3ef76 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 17 Jul 2011 10:52:14 -0400 Subject: switch d_add_ci() to d_splice_alias() in "found negative" case as well Signed-off-by: Al Viro --- fs/dcache.c | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 2762804a140d..d1d6b3349ec7 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1652,26 +1652,12 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, * Negative dentry: instantiate it unless the inode is a directory and * already has a dentry. */ - spin_lock(&inode->i_lock); - if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) { - __d_instantiate(found, inode); - spin_unlock(&inode->i_lock); - security_d_instantiate(found, inode); - return found; + new = d_splice_alias(inode, found); + if (new) { + dput(found); + found = new; } - - /* - * In case a directory already has a (disconnected) entry grab a - * reference to it, move it in place and use it. - */ - new = list_entry(inode->i_dentry.next, struct dentry, d_alias); - __dget(new); - spin_unlock(&inode->i_lock); - security_d_instantiate(found, inode); - d_move(new, found); - iput(inode); - dput(found); - return new; + return found; err_out: iput(inode); -- cgit v1.2.1 From 86c98e8cdb21ff4628f4d48559ab6e006380fa4b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 18 Jul 2011 23:39:07 -0400 Subject: Remove dead code in dget_parent() ->d_parent is never NULL... Signed-off-by: Al Viro --- fs/dcache.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index d1d6b3349ec7..3c34ac0e9a1b 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -549,10 +549,6 @@ repeat: */ rcu_read_lock(); ret = dentry->d_parent; - if (!ret) { - rcu_read_unlock(); - goto out; - } spin_lock(&ret->d_lock); if (unlikely(ret != dentry->d_parent)) { spin_unlock(&ret->d_lock); @@ -563,7 +559,6 @@ repeat: BUG_ON(!ret->d_count); ret->d_count++; spin_unlock(&ret->d_lock); -out: return ret; } EXPORT_SYMBOL(dget_parent); -- cgit v1.2.1