diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-24 10:01:50 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-24 10:01:50 -0700 |
commit | 3c4cfadef6a1665d9cd02a543782d03d3e6740c6 (patch) | |
tree | 3df72faaacd494d5ac8c9668df4f529b1b5e4457 /include/net/dst.h | |
parent | e017507f37d5cb8b541df165a824958bc333bec3 (diff) | |
parent | 320f5ea0cedc08ef65d67e056bcb9d181386ef2c (diff) | |
download | blackbird-op-linux-3c4cfadef6a1665d9cd02a543782d03d3e6740c6.tar.gz blackbird-op-linux-3c4cfadef6a1665d9cd02a543782d03d3e6740c6.zip |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking changes from David S Miller:
1) Remove the ipv4 routing cache. Now lookups go directly into the FIB
trie and use prebuilt routes cached there.
No more garbage collection, no more rDOS attacks on the routing
cache. Instead we now get predictable and consistent performance,
no matter what the pattern of traffic we service.
This has been almost 2 years in the making. Special thanks to
Julian Anastasov, Eric Dumazet, Steffen Klassert, and others who
have helped along the way.
I'm sure that with a change of this magnitude there will be some
kind of fallout, but such things ought the be simple to fix at this
point. Luckily I'm not European so I'll be around all of August to
fix things :-)
The major stages of this work here are each fronted by a forced
merge commit whose commit message contains a top-level description
of the motivations and implementation issues.
2) Pre-demux of established ipv4 TCP sockets, saves a route demux on
input.
3) TCP SYN/ACK performance tweaks from Eric Dumazet.
4) Add namespace support for netfilter L4 conntrack helpers, from Gao
Feng.
5) Add config mechanism for Energy Efficient Ethernet to ethtool, from
Yuval Mintz.
6) Remove quadratic behavior from /proc/net/unix, from Eric Dumazet.
7) Support for connection tracker helpers in userspace, from Pablo
Neira Ayuso.
8) Allow userspace driven TX load balancing functions in TEAM driver,
from Jiri Pirko.
9) Kill off NLMSG_PUT and RTA_PUT macros, more gross stuff with
embedded gotos.
10) TCP Small Queues, essentially minimize the amount of TCP data queued
up in the packet scheduler layer. Whereas the existing BQL (Byte
Queue Limits) limits the pkt_sched --> netdevice queuing levels,
this controls the TCP --> pkt_sched queueing levels.
From Eric Dumazet.
11) Reduce the number of get_page/put_page ops done on SKB fragments,
from Alexander Duyck.
12) Implement protection against blind resets in TCP (RFC 5961), from
Eric Dumazet.
13) Support the client side of TCP Fast Open, basically the ability to
send data in the SYN exchange, from Yuchung Cheng.
Basically, the sender queues up data with a sendmsg() call using
MSG_FASTOPEN, then they do the connect() which emits the queued up
fastopen data.
14) Avoid all the problems we get into in TCP when timers or PMTU events
hit a locked socket. The TCP Small Queues changes added a
tcp_release_cb() that allows us to queue work up to the
release_sock() caller, and that's what we use here too. From Eric
Dumazet.
15) Zero copy on TX support for TUN driver, from Michael S. Tsirkin.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1870 commits)
genetlink: define lockdep_genl_is_held() when CONFIG_LOCKDEP
r8169: revert "add byte queue limit support".
ipv4: Change rt->rt_iif encoding.
net: Make skb->skb_iif always track skb->dev
ipv4: Prepare for change of rt->rt_iif encoding.
ipv4: Remove all RTCF_DIRECTSRC handliing.
ipv4: Really ignore ICMP address requests/replies.
decnet: Don't set RTCF_DIRECTSRC.
net/ipv4/ip_vti.c: Fix __rcu warnings detected by sparse.
ipv4: Remove redundant assignment
rds: set correct msg_namelen
openvswitch: potential NULL deref in sample()
tcp: dont drop MTU reduction indications
bnx2x: Add new 57840 device IDs
tcp: avoid oops in tcp_metrics and reset tcpm_stamp
niu: Change niu_rbr_fill() to use unlikely() to check niu_rbr_add_page() return value
niu: Fix to check for dma mapping errors.
net: Fix references to out-of-scope variables in put_cmsg_compat()
net: ethernet: davinci_emac: add pm_runtime support
net: ethernet: davinci_emac: Remove unnecessary #include
...
Diffstat (limited to 'include/net/dst.h')
-rw-r--r-- | include/net/dst.h | 92 |
1 files changed, 51 insertions, 41 deletions
diff --git a/include/net/dst.h b/include/net/dst.h index 8197eadca819..baf597890064 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -42,16 +42,16 @@ struct dst_entry { struct dst_entry *from; }; struct dst_entry *path; - struct neighbour __rcu *_neighbour; + void *__pad0; #ifdef CONFIG_XFRM struct xfrm_state *xfrm; #else void *__pad1; #endif - int (*input)(struct sk_buff*); - int (*output)(struct sk_buff*); + int (*input)(struct sk_buff *); + int (*output)(struct sk_buff *); - int flags; + unsigned short flags; #define DST_HOST 0x0001 #define DST_NOXFRM 0x0002 #define DST_NOPOLICY 0x0004 @@ -62,8 +62,23 @@ struct dst_entry { #define DST_FAKE_RTABLE 0x0080 #define DST_XFRM_TUNNEL 0x0100 + unsigned short pending_confirm; + short error; + + /* A non-zero value of dst->obsolete forces by-hand validation + * of the route entry. Positive values are set by the generic + * dst layer to indicate that the entry has been forcefully + * destroyed. + * + * Negative values are used by the implementation layer code to + * force invocation of the dst_ops->check() method. + */ short obsolete; +#define DST_OBSOLETE_NONE 0 +#define DST_OBSOLETE_DEAD 2 +#define DST_OBSOLETE_FORCE_CHK -1 +#define DST_OBSOLETE_KILL -2 unsigned short header_len; /* more space at head required */ unsigned short trailer_len; /* space to reserve at tail */ #ifdef CONFIG_IP_ROUTE_CLASSID @@ -94,21 +109,6 @@ struct dst_entry { }; }; -static inline struct neighbour *dst_get_neighbour_noref(struct dst_entry *dst) -{ - return rcu_dereference(dst->_neighbour); -} - -static inline struct neighbour *dst_get_neighbour_noref_raw(struct dst_entry *dst) -{ - return rcu_dereference_raw(dst->_neighbour); -} - -static inline void dst_set_neighbour(struct dst_entry *dst, struct neighbour *neigh) -{ - rcu_assign_pointer(dst->_neighbour, neigh); -} - extern u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old); extern const u32 dst_default_metrics[RTAX_MAX]; @@ -222,12 +222,6 @@ static inline unsigned long dst_metric_rtt(const struct dst_entry *dst, int metr return msecs_to_jiffies(dst_metric(dst, metric)); } -static inline void set_dst_metric_rtt(struct dst_entry *dst, int metric, - unsigned long rtt) -{ - dst_metric_set(dst, metric, jiffies_to_msecs(rtt)); -} - static inline u32 dst_allfrag(const struct dst_entry *dst) { @@ -241,7 +235,7 @@ dst_metric_locked(const struct dst_entry *dst, int metric) return dst_metric(dst, RTAX_LOCK) & (1<<metric); } -static inline void dst_hold(struct dst_entry * dst) +static inline void dst_hold(struct dst_entry *dst) { /* * If your kernel compilation stops here, please check @@ -264,8 +258,7 @@ static inline void dst_use_noref(struct dst_entry *dst, unsigned long time) dst->lastuse = time; } -static inline -struct dst_entry * dst_clone(struct dst_entry * dst) +static inline struct dst_entry *dst_clone(struct dst_entry *dst) { if (dst) atomic_inc(&dst->__refcnt); @@ -371,14 +364,15 @@ static inline struct dst_entry *skb_dst_pop(struct sk_buff *skb) } extern int dst_discard(struct sk_buff *skb); -extern void *dst_alloc(struct dst_ops * ops, struct net_device *dev, - int initial_ref, int initial_obsolete, int flags); -extern void __dst_free(struct dst_entry * dst); -extern struct dst_entry *dst_destroy(struct dst_entry * dst); +extern void *dst_alloc(struct dst_ops *ops, struct net_device *dev, + int initial_ref, int initial_obsolete, + unsigned short flags); +extern void __dst_free(struct dst_entry *dst); +extern struct dst_entry *dst_destroy(struct dst_entry *dst); -static inline void dst_free(struct dst_entry * dst) +static inline void dst_free(struct dst_entry *dst) { - if (dst->obsolete > 1) + if (dst->obsolete > 0) return; if (!atomic_read(&dst->__refcnt)) { dst = dst_destroy(dst); @@ -396,19 +390,35 @@ static inline void dst_rcu_free(struct rcu_head *head) static inline void dst_confirm(struct dst_entry *dst) { - if (dst) { - struct neighbour *n; + dst->pending_confirm = 1; +} - rcu_read_lock(); - n = dst_get_neighbour_noref(dst); - neigh_confirm(n); - rcu_read_unlock(); +static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n, + struct sk_buff *skb) +{ + struct hh_cache *hh; + + if (unlikely(dst->pending_confirm)) { + n->confirmed = jiffies; + dst->pending_confirm = 0; } + + hh = &n->hh; + if ((n->nud_state & NUD_CONNECTED) && hh->hh_len) + return neigh_hh_output(hh, skb); + else + return n->output(n, skb); } static inline struct neighbour *dst_neigh_lookup(const struct dst_entry *dst, const void *daddr) { - return dst->ops->neigh_lookup(dst, daddr); + return dst->ops->neigh_lookup(dst, NULL, daddr); +} + +static inline struct neighbour *dst_neigh_lookup_skb(const struct dst_entry *dst, + struct sk_buff *skb) +{ + return dst->ops->neigh_lookup(dst, skb, NULL); } static inline void dst_link_failure(struct sk_buff *skb) |