tcp: Skip empty hash buckets faster in /proc/net/tcp

On most systems most of the TCP established/time-wait hash buckets are empty. When walking the hash table for /proc/net/tcp their read locks would always be aquired just to find out they're empty. This patch changes the code to check first if the buckets have any entries before taking the lock, which is much cheaper than taking a lock. Since the hash tables are large this makes a measurable difference on processing /proc/net/tcp, especially on architectures with slow read_lock (e.g. PPC) On a 2GB Core2 system time cat /proc/net/tcp > /dev/null (with a mostly empty hash table) goes from 0.046s to 0.005s. On systems with slower atomics (like P4 or POWER4) or larger hash tables (more RAM) the difference is much higher. This can be noticeable because there are some daemons around who regularly scan /proc/net/tcp. Original idea for this patch from Marcus Meissner, but redone by me. Signed-off-by: Andi Kleen <ak@suse.de> Signed-off-by: David S. Miller <davem@davemloft.net>
author: Andi Kleen <ak@suse.de> 2008-08-28 01:08:02 -0700
committer: David S. Miller <davem@davemloft.net> 2008-08-28 01:08:02 -0700
commit: 6eac56040787c3ff604fe7d48bbbb7897cd1387c (patch)
tree: 1d3271c33d8d65bfea4aaf5d770f73ccd6da5825 /net
parent: 4d40555250320520c5398569457962b3984fc75e (diff)
download: blackbird-op-linux-6eac56040787c3ff604fe7d48bbbb7897cd1387c.tar.gz
blackbird-op-linux-6eac56040787c3ff604fe7d48bbbb7897cd1387c.zip
1 files changed, 19 insertions, 7 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 44c1e934824b..37ca3843c40b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1946,6 +1946,12 @@ static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
 	return rc;
 }
 
+static inline int empty_bucket(struct tcp_iter_state *st)
+{
+	return hlist_empty(&tcp_hashinfo.ehash[st->bucket].chain) &&
+		hlist_empty(&tcp_hashinfo.ehash[st->bucket].twchain);
+}
+
 static void *established_get_first(struct seq_file *seq)
 {
 	struct tcp_iter_state* st = seq->private;
@@ -1958,6 +1964,10 @@ static void *established_get_first(struct seq_file *seq)
 		struct inet_timewait_sock *tw;
 		rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
 
+		/* Lockless fast path for the common case of empty buckets */
+		if (empty_bucket(st))
+			continue;
+
 		read_lock_bh(lock);
 		sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
 			if (sk->sk_family != st->family ||
@@ -2008,13 +2018,15 @@ get_tw:
 		read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
 		st->state = TCP_SEQ_STATE_ESTABLISHED;
 
-		if (++st->bucket < tcp_hashinfo.ehash_size) {
-			read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
-			sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
-		} else {
-			cur = NULL;
-			goto out;
-		}
+		/* Look for next non empty bucket */
+		while (++st->bucket < tcp_hashinfo.ehash_size &&
+				empty_bucket(st))
+			;
+		if (st->bucket >= tcp_hashinfo.ehash_size)
+			return NULL;
+
+		read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
+		sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
 	} else
 		sk = sk_next(sk);
author	Andi Kleen <ak@suse.de>	2008-08-28 01:08:02 -0700
committer	David S. Miller <davem@davemloft.net>	2008-08-28 01:08:02 -0700
commit	6eac56040787c3ff604fe7d48bbbb7897cd1387c (patch)
tree	1d3271c33d8d65bfea4aaf5d770f73ccd6da5825 /net
parent	4d40555250320520c5398569457962b3984fc75e (diff)
download	blackbird-op-linux-6eac56040787c3ff604fe7d48bbbb7897cd1387c.tar.gz blackbird-op-linux-6eac56040787c3ff604fe7d48bbbb7897cd1387c.zip