diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/Kconfig | 6 | ||||
-rw-r--r-- | net/ipv4/arp.c | 52 | ||||
-rw-r--r-- | net/ipv4/devinet.c | 1 | ||||
-rw-r--r-- | net/ipv4/ipconfig.c | 2 | ||||
-rw-r--r-- | net/ipv4/route.c | 7 | ||||
-rw-r--r-- | net/ipv4/syncookies.c | 3 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 30 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 22 |
8 files changed, 89 insertions, 34 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 70491d9035eb..0c94a1ac2946 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -166,7 +166,7 @@ config IP_PNP_DHCP If unsure, say Y. Note that if you want to use DHCP, a DHCP server must be operating on your network. Read - <file:Documentation/filesystems/nfsroot.txt> for details. + <file:Documentation/filesystems/nfs/nfsroot.txt> for details. config IP_PNP_BOOTP bool "IP: BOOTP support" @@ -181,7 +181,7 @@ config IP_PNP_BOOTP does BOOTP itself, providing all necessary information on the kernel command line, you can say N here. If unsure, say Y. Note that if you want to use BOOTP, a BOOTP server must be operating on your network. - Read <file:Documentation/filesystems/nfsroot.txt> for details. + Read <file:Documentation/filesystems/nfs/nfsroot.txt> for details. config IP_PNP_RARP bool "IP: RARP support" @@ -194,7 +194,7 @@ config IP_PNP_RARP older protocol which is being obsoleted by BOOTP and DHCP), say Y here. Note that if you want to use RARP, a RARP server must be operating on your network. Read - <file:Documentation/filesystems/nfsroot.txt> for details. + <file:Documentation/filesystems/nfs/nfsroot.txt> for details. # not yet ready.. # bool ' IP: ARP support' CONFIG_IP_PNP_ARP diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index c95cd93acf29..078709233bc4 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -70,6 +70,7 @@ * bonding can change the skb before * sending (e.g. insert 8021q tag). * Harald Welte : convert to make use of jenkins hash + * Jesper D. Brouer: Proxy ARP PVLAN RFC 3069 support. */ #include <linux/module.h> @@ -524,12 +525,15 @@ int arp_bind_neighbour(struct dst_entry *dst) /* * Check if we can use proxy ARP for this path */ - -static inline int arp_fwd_proxy(struct in_device *in_dev, struct rtable *rt) +static inline int arp_fwd_proxy(struct in_device *in_dev, + struct net_device *dev, struct rtable *rt) { struct in_device *out_dev; int imi, omi = -1; + if (rt->u.dst.dev == dev) + return 0; + if (!IN_DEV_PROXY_ARP(in_dev)) return 0; @@ -548,6 +552,43 @@ static inline int arp_fwd_proxy(struct in_device *in_dev, struct rtable *rt) } /* + * Check for RFC3069 proxy arp private VLAN (allow to send back to same dev) + * + * RFC3069 supports proxy arp replies back to the same interface. This + * is done to support (ethernet) switch features, like RFC 3069, where + * the individual ports are not allowed to communicate with each + * other, BUT they are allowed to talk to the upstream router. As + * described in RFC 3069, it is possible to allow these hosts to + * communicate through the upstream router, by proxy_arp'ing. + * + * RFC 3069: "VLAN Aggregation for Efficient IP Address Allocation" + * + * This technology is known by different names: + * In RFC 3069 it is called VLAN Aggregation. + * Cisco and Allied Telesyn call it Private VLAN. + * Hewlett-Packard call it Source-Port filtering or port-isolation. + * Ericsson call it MAC-Forced Forwarding (RFC Draft). + * + */ +static inline int arp_fwd_pvlan(struct in_device *in_dev, + struct net_device *dev, struct rtable *rt, + __be32 sip, __be32 tip) +{ + /* Private VLAN is only concerned about the same ethernet segment */ + if (rt->u.dst.dev != dev) + return 0; + + /* Don't reply on self probes (often done by windowz boxes)*/ + if (sip == tip) + return 0; + + if (IN_DEV_PROXY_ARP_PVLAN(in_dev)) + return 1; + else + return 0; +} + +/* * Interface to link layer: send routine and receive handler. */ @@ -833,8 +874,11 @@ static int arp_process(struct sk_buff *skb) } goto out; } else if (IN_DEV_FORWARD(in_dev)) { - if (addr_type == RTN_UNICAST && rt->u.dst.dev != dev && - (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) { + if (addr_type == RTN_UNICAST && + (arp_fwd_proxy(in_dev, dev, rt) || + arp_fwd_pvlan(in_dev, dev, rt, sip, tip) || + pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) + { n = neigh_event_ns(&arp_tbl, sha, &sip, dev); if (n) neigh_release(n); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 040c4f05b653..cd71a3908391 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1408,6 +1408,7 @@ static struct devinet_sysctl_table { DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"), DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"), DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"), + DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"), DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"), DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"), diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 4e08b7f2331c..10a6a604bf32 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1446,7 +1446,7 @@ late_initcall(ip_auto_config); /* * Decode any IP configuration options in the "ip=" or "nfsaddrs=" kernel - * command line parameter. See Documentation/filesystems/nfsroot.txt. + * command line parameter. See Documentation/filesystems/nfs/nfsroot.txt. */ static int __init ic_proto_name(char *name) { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index e446496f564f..1cc339441e7d 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1988,8 +1988,13 @@ static int __mkroute_input(struct sk_buff *skb, if (skb->protocol != htons(ETH_P_IP)) { /* Not IP (i.e. ARP). Do not create route, if it is * invalid for proxy arp. DNAT routes are always valid. + * + * Proxy arp feature have been extended to allow, ARP + * replies back to the same interface, to support + * Private VLAN switch technologies. See arp.c. */ - if (out_dev == in_dev) { + if (out_dev == in_dev && + IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) { err = -EINVAL; goto cleanup; } diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 66fd80ef2473..5c24db4a3c91 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -358,7 +358,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, tcp_select_initial_window(tcp_full_space(sk), req->mss, &req->rcv_wnd, &req->window_clamp, - ireq->wscale_ok, &rcv_wscale); + ireq->wscale_ok, &rcv_wscale, + dst_metric(&rt->u.dst, RTAX_INITRWND)); ireq->rcv_wscale = rcv_wscale; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b0a26bb25e2e..d5d69ea8f249 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -536,8 +536,7 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb) tp->nonagle &= ~TCP_NAGLE_PUSH; } -static inline void tcp_mark_urg(struct tcp_sock *tp, int flags, - struct sk_buff *skb) +static inline void tcp_mark_urg(struct tcp_sock *tp, int flags) { if (flags & MSG_OOB) tp->snd_up = tp->write_seq; @@ -546,13 +545,13 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags, static inline void tcp_push(struct sock *sk, int flags, int mss_now, int nonagle) { - struct tcp_sock *tp = tcp_sk(sk); - if (tcp_send_head(sk)) { - struct sk_buff *skb = tcp_write_queue_tail(sk); + struct tcp_sock *tp = tcp_sk(sk); + if (!(flags & MSG_MORE) || forced_push(tp)) - tcp_mark_push(tp, skb); - tcp_mark_urg(tp, flags, skb); + tcp_mark_push(tp, tcp_write_queue_tail(sk)); + + tcp_mark_urg(tp, flags); __tcp_push_pending_frames(sk, mss_now, (flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle); } @@ -877,12 +876,12 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, #define TCP_PAGE(sk) (sk->sk_sndmsg_page) #define TCP_OFF(sk) (sk->sk_sndmsg_off) -static inline int select_size(struct sock *sk) +static inline int select_size(struct sock *sk, int sg) { struct tcp_sock *tp = tcp_sk(sk); int tmp = tp->mss_cache; - if (sk->sk_route_caps & NETIF_F_SG) { + if (sg) { if (sk_can_gso(sk)) tmp = 0; else { @@ -906,7 +905,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, struct sk_buff *skb; int iovlen, flags; int mss_now, size_goal; - int err, copied; + int sg, err, copied; long timeo; lock_sock(sk); @@ -934,6 +933,8 @@ int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) goto out_err; + sg = sk->sk_route_caps & NETIF_F_SG; + while (--iovlen >= 0) { int seglen = iov->iov_len; unsigned char __user *from = iov->iov_base; @@ -959,8 +960,9 @@ new_segment: if (!sk_stream_memory_free(sk)) goto wait_for_sndbuf; - skb = sk_stream_alloc_skb(sk, select_size(sk), - sk->sk_allocation); + skb = sk_stream_alloc_skb(sk, + select_size(sk, sg), + sk->sk_allocation); if (!skb) goto wait_for_memory; @@ -997,9 +999,7 @@ new_segment: /* We can extend the last page * fragment. */ merge = 1; - } else if (i == MAX_SKB_FRAGS || - (!i && - !(sk->sk_route_caps & NETIF_F_SG))) { + } else if (i == MAX_SKB_FRAGS || !sg) { /* Need to add new fragment and cannot * do this because interface is non-SG, * or because all the page slots are diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 383ce237640f..4a1605d3f909 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -183,7 +183,8 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) */ void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd, __u32 *window_clamp, - int wscale_ok, __u8 *rcv_wscale) + int wscale_ok, __u8 *rcv_wscale, + __u32 init_rcv_wnd) { unsigned int space = (__space < 0 ? 0 : __space); @@ -232,7 +233,13 @@ void tcp_select_initial_window(int __space, __u32 mss, init_cwnd = 2; else if (mss > 1460) init_cwnd = 3; - if (*rcv_wnd > init_cwnd * mss) + /* when initializing use the value from init_rcv_wnd + * rather than the default from above + */ + if (init_rcv_wnd && + (*rcv_wnd > init_rcv_wnd * mss)) + *rcv_wnd = init_rcv_wnd * mss; + else if (*rcv_wnd > init_cwnd * mss) *rcv_wnd = init_cwnd * mss; } @@ -1794,11 +1801,6 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, int nonagle) { - struct sk_buff *skb = tcp_send_head(sk); - - if (!skb) - return; - /* If we are closed, the bytes will have to remain here. * In time closedown will finish, we empty the write queue and * all will be happy. @@ -2422,7 +2424,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, &req->rcv_wnd, &req->window_clamp, ireq->wscale_ok, - &rcv_wscale); + &rcv_wscale, + dst_metric(dst, RTAX_INITRWND)); ireq->rcv_wscale = rcv_wscale; } @@ -2549,7 +2552,8 @@ static void tcp_connect_init(struct sock *sk) &tp->rcv_wnd, &tp->window_clamp, sysctl_tcp_window_scaling, - &rcv_wscale); + &rcv_wscale, + dst_metric(dst, RTAX_INITRWND)); tp->rx_opt.rcv_wscale = rcv_wscale; tp->rcv_ssthresh = tp->rcv_wnd; |