summaryrefslogtreecommitdiffstats
path: root/include/net/route.h
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-07-24 10:01:50 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2012-07-24 10:01:50 -0700
commit3c4cfadef6a1665d9cd02a543782d03d3e6740c6 (patch)
tree3df72faaacd494d5ac8c9668df4f529b1b5e4457 /include/net/route.h
parente017507f37d5cb8b541df165a824958bc333bec3 (diff)
parent320f5ea0cedc08ef65d67e056bcb9d181386ef2c (diff)
downloadblackbird-op-linux-3c4cfadef6a1665d9cd02a543782d03d3e6740c6.tar.gz
blackbird-op-linux-3c4cfadef6a1665d9cd02a543782d03d3e6740c6.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking changes from David S Miller: 1) Remove the ipv4 routing cache. Now lookups go directly into the FIB trie and use prebuilt routes cached there. No more garbage collection, no more rDOS attacks on the routing cache. Instead we now get predictable and consistent performance, no matter what the pattern of traffic we service. This has been almost 2 years in the making. Special thanks to Julian Anastasov, Eric Dumazet, Steffen Klassert, and others who have helped along the way. I'm sure that with a change of this magnitude there will be some kind of fallout, but such things ought the be simple to fix at this point. Luckily I'm not European so I'll be around all of August to fix things :-) The major stages of this work here are each fronted by a forced merge commit whose commit message contains a top-level description of the motivations and implementation issues. 2) Pre-demux of established ipv4 TCP sockets, saves a route demux on input. 3) TCP SYN/ACK performance tweaks from Eric Dumazet. 4) Add namespace support for netfilter L4 conntrack helpers, from Gao Feng. 5) Add config mechanism for Energy Efficient Ethernet to ethtool, from Yuval Mintz. 6) Remove quadratic behavior from /proc/net/unix, from Eric Dumazet. 7) Support for connection tracker helpers in userspace, from Pablo Neira Ayuso. 8) Allow userspace driven TX load balancing functions in TEAM driver, from Jiri Pirko. 9) Kill off NLMSG_PUT and RTA_PUT macros, more gross stuff with embedded gotos. 10) TCP Small Queues, essentially minimize the amount of TCP data queued up in the packet scheduler layer. Whereas the existing BQL (Byte Queue Limits) limits the pkt_sched --> netdevice queuing levels, this controls the TCP --> pkt_sched queueing levels. From Eric Dumazet. 11) Reduce the number of get_page/put_page ops done on SKB fragments, from Alexander Duyck. 12) Implement protection against blind resets in TCP (RFC 5961), from Eric Dumazet. 13) Support the client side of TCP Fast Open, basically the ability to send data in the SYN exchange, from Yuchung Cheng. Basically, the sender queues up data with a sendmsg() call using MSG_FASTOPEN, then they do the connect() which emits the queued up fastopen data. 14) Avoid all the problems we get into in TCP when timers or PMTU events hit a locked socket. The TCP Small Queues changes added a tcp_release_cb() that allows us to queue work up to the release_sock() caller, and that's what we use here too. From Eric Dumazet. 15) Zero copy on TX support for TUN driver, from Michael S. Tsirkin. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1870 commits) genetlink: define lockdep_genl_is_held() when CONFIG_LOCKDEP r8169: revert "add byte queue limit support". ipv4: Change rt->rt_iif encoding. net: Make skb->skb_iif always track skb->dev ipv4: Prepare for change of rt->rt_iif encoding. ipv4: Remove all RTCF_DIRECTSRC handliing. ipv4: Really ignore ICMP address requests/replies. decnet: Don't set RTCF_DIRECTSRC. net/ipv4/ip_vti.c: Fix __rcu warnings detected by sparse. ipv4: Remove redundant assignment rds: set correct msg_namelen openvswitch: potential NULL deref in sample() tcp: dont drop MTU reduction indications bnx2x: Add new 57840 device IDs tcp: avoid oops in tcp_metrics and reset tcpm_stamp niu: Change niu_rbr_fill() to use unlikely() to check niu_rbr_add_page() return value niu: Fix to check for dma mapping errors. net: Fix references to out-of-scope variables in put_cmsg_compat() net: ethernet: davinci_emac: add pm_runtime support net: ethernet: davinci_emac: Remove unnecessary #include ...
Diffstat (limited to 'include/net/route.h')
-rw-r--r--include/net/route.h76
1 files changed, 25 insertions, 51 deletions
diff --git a/include/net/route.h b/include/net/route.h
index 98705468ac03..c29ef2733f2d 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -40,45 +40,39 @@
#define RT_CONN_FLAGS(sk) (RT_TOS(inet_sk(sk)->tos) | sock_flag(sk, SOCK_LOCALROUTE))
struct fib_nh;
-struct inet_peer;
struct fib_info;
struct rtable {
struct dst_entry dst;
- /* Lookup key. */
- __be32 rt_key_dst;
- __be32 rt_key_src;
-
int rt_genid;
unsigned int rt_flags;
__u16 rt_type;
- __u8 rt_key_tos;
+ __u16 rt_is_input;
- __be32 rt_dst; /* Path destination */
- __be32 rt_src; /* Path source */
- int rt_route_iif;
int rt_iif;
- int rt_oif;
- __u32 rt_mark;
/* Info on neighbour */
__be32 rt_gateway;
/* Miscellaneous cached information */
- __be32 rt_spec_dst; /* RFC1122 specific destination */
- u32 rt_peer_genid;
- struct inet_peer *peer; /* long-living peer info */
- struct fib_info *fi; /* for client ref to shared metrics */
+ u32 rt_pmtu;
};
static inline bool rt_is_input_route(const struct rtable *rt)
{
- return rt->rt_route_iif != 0;
+ return rt->rt_is_input != 0;
}
static inline bool rt_is_output_route(const struct rtable *rt)
{
- return rt->rt_route_iif == 0;
+ return rt->rt_is_input == 0;
+}
+
+static inline __be32 rt_nexthop(const struct rtable *rt, __be32 daddr)
+{
+ if (rt->rt_gateway)
+ return rt->rt_gateway;
+ return daddr;
}
struct ip_rt_acct {
@@ -111,10 +105,7 @@ extern struct ip_rt_acct __percpu *ip_rt_acct;
struct in_device;
extern int ip_rt_init(void);
-extern void ip_rt_redirect(__be32 old_gw, __be32 dst, __be32 new_gw,
- __be32 src, struct net_device *dev);
extern void rt_cache_flush(struct net *net, int how);
-extern void rt_cache_flush_batch(struct net *net);
extern struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp);
extern struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp,
struct sock *sk);
@@ -166,24 +157,16 @@ static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4
return ip_route_output_key(net, fl4);
}
-extern int ip_route_input_common(struct sk_buff *skb, __be32 dst, __be32 src,
- u8 tos, struct net_device *devin, bool noref);
-
-static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src,
- u8 tos, struct net_device *devin)
-{
- return ip_route_input_common(skb, dst, src, tos, devin, false);
-}
-
-static inline int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src,
- u8 tos, struct net_device *devin)
-{
- return ip_route_input_common(skb, dst, src, tos, devin, true);
-}
+extern int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src,
+ u8 tos, struct net_device *devin);
-extern unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph,
- unsigned short new_mtu, struct net_device *dev);
-extern void ip_rt_send_redirect(struct sk_buff *skb);
+extern void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
+ int oif, u32 mark, u8 protocol, int flow_flags);
+extern void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu);
+extern void ipv4_redirect(struct sk_buff *skb, struct net *net,
+ int oif, u32 mark, u8 protocol, int flow_flags);
+extern void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk);
+extern void ip_rt_send_redirect(struct sk_buff *skb);
extern unsigned int inet_addr_type(struct net *net, __be32 addr);
extern unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, __be32 addr);
@@ -244,8 +227,6 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32
if (inet_sk(sk)->transparent)
flow_flags |= FLOWI_FLAG_ANYSRC;
- if (protocol == IPPROTO_TCP)
- flow_flags |= FLOWI_FLAG_PRECOW_METRICS;
if (can_sleep)
flow_flags |= FLOWI_FLAG_CAN_SLEEP;
@@ -294,20 +275,13 @@ static inline struct rtable *ip_route_newports(struct flowi4 *fl4, struct rtable
return rt;
}
-extern void rt_bind_peer(struct rtable *rt, __be32 daddr, int create);
-
-static inline struct inet_peer *rt_get_peer(struct rtable *rt, __be32 daddr)
-{
- if (rt->peer)
- return rt->peer;
-
- rt_bind_peer(rt, daddr, 0);
- return rt->peer;
-}
-
static inline int inet_iif(const struct sk_buff *skb)
{
- return skb_rtable(skb)->rt_iif;
+ int iif = skb_rtable(skb)->rt_iif;
+
+ if (iif)
+ return iif;
+ return skb->skb_iif;
}
extern int sysctl_ip_default_ttl;
OpenPOWER on IntegriCloud