summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Kconfig6
-rw-r--r--net/ipv4/arp.c52
-rw-r--r--net/ipv4/devinet.c1
-rw-r--r--net/ipv4/ipconfig.c2
-rw-r--r--net/ipv4/route.c7
-rw-r--r--net/ipv4/syncookies.c3
-rw-r--r--net/ipv4/tcp.c30
-rw-r--r--net/ipv4/tcp_output.c22
8 files changed, 89 insertions, 34 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 70491d9035eb..0c94a1ac2946 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -166,7 +166,7 @@ config IP_PNP_DHCP
If unsure, say Y. Note that if you want to use DHCP, a DHCP server
must be operating on your network. Read
- <file:Documentation/filesystems/nfsroot.txt> for details.
+ <file:Documentation/filesystems/nfs/nfsroot.txt> for details.
config IP_PNP_BOOTP
bool "IP: BOOTP support"
@@ -181,7 +181,7 @@ config IP_PNP_BOOTP
does BOOTP itself, providing all necessary information on the kernel
command line, you can say N here. If unsure, say Y. Note that if you
want to use BOOTP, a BOOTP server must be operating on your network.
- Read <file:Documentation/filesystems/nfsroot.txt> for details.
+ Read <file:Documentation/filesystems/nfs/nfsroot.txt> for details.
config IP_PNP_RARP
bool "IP: RARP support"
@@ -194,7 +194,7 @@ config IP_PNP_RARP
older protocol which is being obsoleted by BOOTP and DHCP), say Y
here. Note that if you want to use RARP, a RARP server must be
operating on your network. Read
- <file:Documentation/filesystems/nfsroot.txt> for details.
+ <file:Documentation/filesystems/nfs/nfsroot.txt> for details.
# not yet ready..
# bool ' IP: ARP support' CONFIG_IP_PNP_ARP
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index c95cd93acf29..078709233bc4 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -70,6 +70,7 @@
* bonding can change the skb before
* sending (e.g. insert 8021q tag).
* Harald Welte : convert to make use of jenkins hash
+ * Jesper D. Brouer: Proxy ARP PVLAN RFC 3069 support.
*/
#include <linux/module.h>
@@ -524,12 +525,15 @@ int arp_bind_neighbour(struct dst_entry *dst)
/*
* Check if we can use proxy ARP for this path
*/
-
-static inline int arp_fwd_proxy(struct in_device *in_dev, struct rtable *rt)
+static inline int arp_fwd_proxy(struct in_device *in_dev,
+ struct net_device *dev, struct rtable *rt)
{
struct in_device *out_dev;
int imi, omi = -1;
+ if (rt->u.dst.dev == dev)
+ return 0;
+
if (!IN_DEV_PROXY_ARP(in_dev))
return 0;
@@ -548,6 +552,43 @@ static inline int arp_fwd_proxy(struct in_device *in_dev, struct rtable *rt)
}
/*
+ * Check for RFC3069 proxy arp private VLAN (allow to send back to same dev)
+ *
+ * RFC3069 supports proxy arp replies back to the same interface. This
+ * is done to support (ethernet) switch features, like RFC 3069, where
+ * the individual ports are not allowed to communicate with each
+ * other, BUT they are allowed to talk to the upstream router. As
+ * described in RFC 3069, it is possible to allow these hosts to
+ * communicate through the upstream router, by proxy_arp'ing.
+ *
+ * RFC 3069: "VLAN Aggregation for Efficient IP Address Allocation"
+ *
+ * This technology is known by different names:
+ * In RFC 3069 it is called VLAN Aggregation.
+ * Cisco and Allied Telesyn call it Private VLAN.
+ * Hewlett-Packard call it Source-Port filtering or port-isolation.
+ * Ericsson call it MAC-Forced Forwarding (RFC Draft).
+ *
+ */
+static inline int arp_fwd_pvlan(struct in_device *in_dev,
+ struct net_device *dev, struct rtable *rt,
+ __be32 sip, __be32 tip)
+{
+ /* Private VLAN is only concerned about the same ethernet segment */
+ if (rt->u.dst.dev != dev)
+ return 0;
+
+ /* Don't reply on self probes (often done by windowz boxes)*/
+ if (sip == tip)
+ return 0;
+
+ if (IN_DEV_PROXY_ARP_PVLAN(in_dev))
+ return 1;
+ else
+ return 0;
+}
+
+/*
* Interface to link layer: send routine and receive handler.
*/
@@ -833,8 +874,11 @@ static int arp_process(struct sk_buff *skb)
}
goto out;
} else if (IN_DEV_FORWARD(in_dev)) {
- if (addr_type == RTN_UNICAST && rt->u.dst.dev != dev &&
- (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) {
+ if (addr_type == RTN_UNICAST &&
+ (arp_fwd_proxy(in_dev, dev, rt) ||
+ arp_fwd_pvlan(in_dev, dev, rt, sip, tip) ||
+ pneigh_lookup(&arp_tbl, net, &tip, dev, 0)))
+ {
n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
if (n)
neigh_release(n);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 040c4f05b653..cd71a3908391 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1408,6 +1408,7 @@ static struct devinet_sysctl_table {
DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
+ DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 4e08b7f2331c..10a6a604bf32 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1446,7 +1446,7 @@ late_initcall(ip_auto_config);
/*
* Decode any IP configuration options in the "ip=" or "nfsaddrs=" kernel
- * command line parameter. See Documentation/filesystems/nfsroot.txt.
+ * command line parameter. See Documentation/filesystems/nfs/nfsroot.txt.
*/
static int __init ic_proto_name(char *name)
{
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index e446496f564f..1cc339441e7d 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1988,8 +1988,13 @@ static int __mkroute_input(struct sk_buff *skb,
if (skb->protocol != htons(ETH_P_IP)) {
/* Not IP (i.e. ARP). Do not create route, if it is
* invalid for proxy arp. DNAT routes are always valid.
+ *
+ * Proxy arp feature have been extended to allow, ARP
+ * replies back to the same interface, to support
+ * Private VLAN switch technologies. See arp.c.
*/
- if (out_dev == in_dev) {
+ if (out_dev == in_dev &&
+ IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
err = -EINVAL;
goto cleanup;
}
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 66fd80ef2473..5c24db4a3c91 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -358,7 +358,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
tcp_select_initial_window(tcp_full_space(sk), req->mss,
&req->rcv_wnd, &req->window_clamp,
- ireq->wscale_ok, &rcv_wscale);
+ ireq->wscale_ok, &rcv_wscale,
+ dst_metric(&rt->u.dst, RTAX_INITRWND));
ireq->rcv_wscale = rcv_wscale;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b0a26bb25e2e..d5d69ea8f249 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -536,8 +536,7 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
tp->nonagle &= ~TCP_NAGLE_PUSH;
}
-static inline void tcp_mark_urg(struct tcp_sock *tp, int flags,
- struct sk_buff *skb)
+static inline void tcp_mark_urg(struct tcp_sock *tp, int flags)
{
if (flags & MSG_OOB)
tp->snd_up = tp->write_seq;
@@ -546,13 +545,13 @@ static inline void tcp_mark_urg(struct tcp_sock *tp, int flags,
static inline void tcp_push(struct sock *sk, int flags, int mss_now,
int nonagle)
{
- struct tcp_sock *tp = tcp_sk(sk);
-
if (tcp_send_head(sk)) {
- struct sk_buff *skb = tcp_write_queue_tail(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+
if (!(flags & MSG_MORE) || forced_push(tp))
- tcp_mark_push(tp, skb);
- tcp_mark_urg(tp, flags, skb);
+ tcp_mark_push(tp, tcp_write_queue_tail(sk));
+
+ tcp_mark_urg(tp, flags);
__tcp_push_pending_frames(sk, mss_now,
(flags & MSG_MORE) ? TCP_NAGLE_CORK : nonagle);
}
@@ -877,12 +876,12 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset,
#define TCP_PAGE(sk) (sk->sk_sndmsg_page)
#define TCP_OFF(sk) (sk->sk_sndmsg_off)
-static inline int select_size(struct sock *sk)
+static inline int select_size(struct sock *sk, int sg)
{
struct tcp_sock *tp = tcp_sk(sk);
int tmp = tp->mss_cache;
- if (sk->sk_route_caps & NETIF_F_SG) {
+ if (sg) {
if (sk_can_gso(sk))
tmp = 0;
else {
@@ -906,7 +905,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
struct sk_buff *skb;
int iovlen, flags;
int mss_now, size_goal;
- int err, copied;
+ int sg, err, copied;
long timeo;
lock_sock(sk);
@@ -934,6 +933,8 @@ int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
goto out_err;
+ sg = sk->sk_route_caps & NETIF_F_SG;
+
while (--iovlen >= 0) {
int seglen = iov->iov_len;
unsigned char __user *from = iov->iov_base;
@@ -959,8 +960,9 @@ new_segment:
if (!sk_stream_memory_free(sk))
goto wait_for_sndbuf;
- skb = sk_stream_alloc_skb(sk, select_size(sk),
- sk->sk_allocation);
+ skb = sk_stream_alloc_skb(sk,
+ select_size(sk, sg),
+ sk->sk_allocation);
if (!skb)
goto wait_for_memory;
@@ -997,9 +999,7 @@ new_segment:
/* We can extend the last page
* fragment. */
merge = 1;
- } else if (i == MAX_SKB_FRAGS ||
- (!i &&
- !(sk->sk_route_caps & NETIF_F_SG))) {
+ } else if (i == MAX_SKB_FRAGS || !sg) {
/* Need to add new fragment and cannot
* do this because interface is non-SG,
* or because all the page slots are
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 383ce237640f..4a1605d3f909 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -183,7 +183,8 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
*/
void tcp_select_initial_window(int __space, __u32 mss,
__u32 *rcv_wnd, __u32 *window_clamp,
- int wscale_ok, __u8 *rcv_wscale)
+ int wscale_ok, __u8 *rcv_wscale,
+ __u32 init_rcv_wnd)
{
unsigned int space = (__space < 0 ? 0 : __space);
@@ -232,7 +233,13 @@ void tcp_select_initial_window(int __space, __u32 mss,
init_cwnd = 2;
else if (mss > 1460)
init_cwnd = 3;
- if (*rcv_wnd > init_cwnd * mss)
+ /* when initializing use the value from init_rcv_wnd
+ * rather than the default from above
+ */
+ if (init_rcv_wnd &&
+ (*rcv_wnd > init_rcv_wnd * mss))
+ *rcv_wnd = init_rcv_wnd * mss;
+ else if (*rcv_wnd > init_cwnd * mss)
*rcv_wnd = init_cwnd * mss;
}
@@ -1794,11 +1801,6 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
int nonagle)
{
- struct sk_buff *skb = tcp_send_head(sk);
-
- if (!skb)
- return;
-
/* If we are closed, the bytes will have to remain here.
* In time closedown will finish, we empty the write queue and
* all will be happy.
@@ -2422,7 +2424,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
&req->rcv_wnd,
&req->window_clamp,
ireq->wscale_ok,
- &rcv_wscale);
+ &rcv_wscale,
+ dst_metric(dst, RTAX_INITRWND));
ireq->rcv_wscale = rcv_wscale;
}
@@ -2549,7 +2552,8 @@ static void tcp_connect_init(struct sock *sk)
&tp->rcv_wnd,
&tp->window_clamp,
sysctl_tcp_window_scaling,
- &rcv_wscale);
+ &rcv_wscale,
+ dst_metric(dst, RTAX_INITRWND));
tp->rx_opt.rcv_wscale = rcv_wscale;
tp->rcv_ssthresh = tp->rcv_wnd;
OpenPOWER on IntegriCloud