From 81c3d5470ecc70564eb9209946730fe2be93ad06 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 3 Oct 2005 14:13:38 -0700 Subject: [INET]: speedup inet (tcp/dccp) lookups Arnaldo and I agreed it could be applied now, because I have other pending patches depending on this one (Thank you Arnaldo) (The other important patch moves skc_refcnt in a separate cache line, so that the SMP/NUMA performance doesnt suffer from cache line ping pongs) 1) First some performance data : -------------------------------- tcp_v4_rcv() wastes a *lot* of time in __inet_lookup_established() The most time critical code is : sk_for_each(sk, node, &head->chain) { if (INET_MATCH(sk, acookie, saddr, daddr, ports, dif)) goto hit; /* You sunk my battleship! */ } The sk_for_each() does use prefetch() hints but only the begining of "struct sock" is prefetched. As INET_MATCH first comparison uses inet_sk(__sk)->daddr, wich is far away from the begining of "struct sock", it has to bring into CPU cache cold cache line. Each iteration has to use at least 2 cache lines. This can be problematic if some chains are very long. 2) The goal ----------- The idea I had is to change things so that INET_MATCH() may return FALSE in 99% of cases only using the data already in the CPU cache, using one cache line per iteration. 3) Description of the patch --------------------------- Adds a new 'unsigned int skc_hash' field in 'struct sock_common', filling a 32 bits hole on 64 bits platform. struct sock_common { unsigned short skc_family; volatile unsigned char skc_state; unsigned char skc_reuse; int skc_bound_dev_if; struct hlist_node skc_node; struct hlist_node skc_bind_node; atomic_t skc_refcnt; + unsigned int skc_hash; struct proto *skc_prot; }; Store in this 32 bits field the full hash, not masked by (ehash_size - 1) Using this full hash as the first comparison done in INET_MATCH permits us immediatly skip the element without touching a second cache line in case of a miss. Suppress the sk_hashent/tw_hashent fields since skc_hash (aliased to sk_hash and tw_hash) already contains the slot number if we mask with (ehash_size - 1) File include/net/inet_hashtables.h 64 bits platforms : #define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ (((__sk)->sk_hash == (__hash)) ((*((__u64 *)&(inet_sk(__sk)->daddr)))== (__cookie)) && \ ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) 32bits platforms: #define TCP_IPV4_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ (((__sk)->sk_hash == (__hash)) && \ (inet_sk(__sk)->daddr == (__saddr)) && \ (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) - Adds a prefetch(head->chain.first) in __inet_lookup_established()/__tcp_v4_check_established() and __inet6_lookup_established()/__tcp_v6_check_established() and __dccp_v4_check_established() to bring into cache the first element of the list, before the {read|write}_lock(&head->lock); Signed-off-by: Eric Dumazet Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/sock.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/net/sock.h') diff --git a/include/net/sock.h b/include/net/sock.h index 8c48fbecb7cf..b6440805c420 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -99,6 +99,7 @@ struct proto; * @skc_node: main hash linkage for various protocol lookup tables * @skc_bind_node: bind hash linkage for various protocol lookup tables * @skc_refcnt: reference count + * @skc_hash: hash value used with various protocol lookup tables * @skc_prot: protocol handlers inside a network family * * This is the minimal network layer representation of sockets, the header @@ -112,6 +113,7 @@ struct sock_common { struct hlist_node skc_node; struct hlist_node skc_bind_node; atomic_t skc_refcnt; + unsigned int skc_hash; struct proto *skc_prot; }; @@ -139,7 +141,6 @@ struct sock_common { * @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) * @sk_lingertime: %SO_LINGER l_linger setting - * @sk_hashent: hash entry in several tables (e.g. inet_hashinfo.ehash) * @sk_backlog: always used with the per-socket spinlock held * @sk_callback_lock: used with the callbacks in the end of this struct * @sk_error_queue: rarely used @@ -186,6 +187,7 @@ struct sock { #define sk_node __sk_common.skc_node #define sk_bind_node __sk_common.skc_bind_node #define sk_refcnt __sk_common.skc_refcnt +#define sk_hash __sk_common.skc_hash #define sk_prot __sk_common.skc_prot unsigned char sk_shutdown : 2, sk_no_check : 2, @@ -208,7 +210,6 @@ struct sock { unsigned int sk_allocation; int sk_sndbuf; int sk_route_caps; - int sk_hashent; unsigned long sk_flags; unsigned long sk_lingertime; /* -- cgit v1.2.1 From dd0fc66fb33cd610bc1a5db8a5e232d34879b4d7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 7 Oct 2005 07:46:04 +0100 Subject: [PATCH] gfp flags annotations - part 1 - added typedef unsigned int __nocast gfp_t; - replaced __nocast uses for gfp flags with gfp_t - it gives exactly the same warnings as far as sparse is concerned, doesn't change generated code (from gcc point of view we replaced unsigned int with typedef) and documents what's going on far better. Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- include/net/sock.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/net/sock.h') diff --git a/include/net/sock.h b/include/net/sock.h index b6440805c420..ecb75526cba0 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -739,18 +739,18 @@ extern void FASTCALL(release_sock(struct sock *sk)); #define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock)) extern struct sock *sk_alloc(int family, - unsigned int __nocast priority, + gfp_t priority, struct proto *prot, int zero_it); extern void sk_free(struct sock *sk); extern struct sock *sk_clone(const struct sock *sk, - const unsigned int __nocast priority); + const gfp_t priority); extern struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, - unsigned int __nocast priority); + gfp_t priority); extern struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, - unsigned int __nocast priority); + gfp_t priority); extern void sock_wfree(struct sk_buff *skb); extern void sock_rfree(struct sk_buff *skb); @@ -766,7 +766,7 @@ extern struct sk_buff *sock_alloc_send_skb(struct sock *sk, int noblock, int *errcode); extern void *sock_kmalloc(struct sock *sk, int size, - unsigned int __nocast priority); + gfp_t priority); extern void sock_kfree_s(struct sock *sk, void *mem, int size); extern void sk_send_sigurg(struct sock *sk); @@ -1201,7 +1201,7 @@ static inline void sk_stream_moderate_sndbuf(struct sock *sk) static inline struct sk_buff *sk_stream_alloc_pskb(struct sock *sk, int size, int mem, - unsigned int __nocast gfp) + gfp_t gfp) { struct sk_buff *skb; int hdr_len; @@ -1224,7 +1224,7 @@ static inline struct sk_buff *sk_stream_alloc_pskb(struct sock *sk, static inline struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, - unsigned int __nocast gfp) + gfp_t gfp) { return sk_stream_alloc_pskb(sk, size, 0, gfp); } @@ -1255,7 +1255,7 @@ static inline int sock_writeable(const struct sock *sk) return atomic_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf / 2); } -static inline unsigned int __nocast gfp_any(void) +static inline gfp_t gfp_any(void) { return in_softirq() ? GFP_ATOMIC : GFP_KERNEL; } -- cgit v1.2.1