From dcf1a3573eae69937fb14462369c4d3e6f4a37f1 Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Wed, 22 Apr 2009 20:18:19 -0400 Subject: net/sunrpc/svc_xprt.c: fix sparse warnings Fix the following sparse warnings in net/sunrpc/svc_xprt.c. warning: symbol 'svc_recv' was not declared. Should it be static? warning: symbol 'svc_drop' was not declared. Should it be static? warning: symbol 'svc_send' was not declared. Should it be static? warning: symbol 'svc_close_all' was not declared. Should it be static? Signed-off-by: H Hartley Sweeten Signed-off-by: J. Bruce Fields --- net/sunrpc/svc_xprt.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index c200d92e57e4..f200393ac877 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -11,6 +11,7 @@ #include #include #include +#include #define RPCDBG_FACILITY RPCDBG_SVCXPRT -- cgit v1.2.1 From abc5c44d6284fab8fb21bcfc52c0f16f980637df Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 23 Apr 2009 19:31:25 -0400 Subject: SUNRPC: Fix error return value of svc_addr_len() The svc_addr_len() helper function returns -EAFNOSUPPORT if it doesn't recognize the address family of the passed-in socket address. However, the return type of this function is size_t, which means -EAFNOSUPPORT is turned into a very large positive value in this case. The check in svc_udp_recvfrom() to see if the return value is less than zero therefore won't work at all. Additionally, handle_connect_req() passes this value directly to memset(). This could cause memset() to clobber a large chunk of memory if svc_addr_len() has returned an error. Currently the address family of these addresses, however, is known to be supported long before handle_connect_req() is called, so this isn't a real risk. Change the error return value of svc_addr_len() to zero, which fits in the range of size_t, and is safer to pass to memset() directly. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index af3198814c15..8b0832834135 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -426,13 +426,14 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) long all[SVC_PKTINFO_SPACE / sizeof(long)]; } buffer; struct cmsghdr *cmh = &buffer.hdr; - int err, len; struct msghdr msg = { .msg_name = svc_addr(rqstp), .msg_control = cmh, .msg_controllen = sizeof(buffer), .msg_flags = MSG_DONTWAIT, }; + size_t len; + int err; if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags)) /* udp sockets need large rcvbuf as all pending @@ -464,8 +465,8 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) return -EAGAIN; } len = svc_addr_len(svc_addr(rqstp)); - if (len < 0) - return len; + if (len == 0) + return -EAFNOSUPPORT; rqstp->rq_addrlen = len; if (skb->tstamp.tv64 == 0) { skb->tstamp = ktime_get_real(); -- cgit v1.2.1 From 335c54bdc4d3bacdbd619ec95cd0b352435bd37f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 23 Apr 2009 19:32:25 -0400 Subject: NFSD: Prevent a buffer overflow in svc_xprt_names() The svc_xprt_names() function can overflow its buffer if it's so near the end of the passed in buffer that the "name too long" string still doesn't fit. Of course, it could never tell if it was near the end of the passed in buffer, since its only caller passes in zero as the buffer length. Let's make this API a little safer. Change svc_xprt_names() so it *always* checks for a buffer overflow, and change its only caller to pass in the correct buffer length. If svc_xprt_names() does overflow its buffer, it now fails with an ENAMETOOLONG errno, instead of trying to write a message at the end of the buffer. I don't like this much, but I can't figure out a clean way that's always safe to return some of the names, *and* an indication that the buffer was not long enough. The displayed error when doing a 'cat /proc/fs/nfsd/portlist' is "File name too long". Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/svc_xprt.c | 56 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index f200393ac877..6f33d33cc064 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -1098,36 +1098,58 @@ struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, } EXPORT_SYMBOL_GPL(svc_find_xprt); -/* - * Format a buffer with a list of the active transports. A zero for - * the buflen parameter disables target buffer overflow checking. +static int svc_one_xprt_name(const struct svc_xprt *xprt, + char *pos, int remaining) +{ + int len; + + len = snprintf(pos, remaining, "%s %u\n", + xprt->xpt_class->xcl_name, + svc_xprt_local_port(xprt)); + if (len >= remaining) + return -ENAMETOOLONG; + return len; +} + +/** + * svc_xprt_names - format a buffer with a list of transport names + * @serv: pointer to an RPC service + * @buf: pointer to a buffer to be filled in + * @buflen: length of buffer to be filled in + * + * Fills in @buf with a string containing a list of transport names, + * each name terminated with '\n'. + * + * Returns positive length of the filled-in string on success; otherwise + * a negative errno value is returned if an error occurs. */ -int svc_xprt_names(struct svc_serv *serv, char *buf, int buflen) +int svc_xprt_names(struct svc_serv *serv, char *buf, const int buflen) { struct svc_xprt *xprt; - char xprt_str[64]; - int totlen = 0; - int len; + int len, totlen; + char *pos; /* Sanity check args */ if (!serv) return 0; spin_lock_bh(&serv->sv_lock); + + pos = buf; + totlen = 0; list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) { - len = snprintf(xprt_str, sizeof(xprt_str), - "%s %d\n", xprt->xpt_class->xcl_name, - svc_xprt_local_port(xprt)); - /* If the string was truncated, replace with error string */ - if (len >= sizeof(xprt_str)) - strcpy(xprt_str, "name-too-long\n"); - /* Don't overflow buffer */ - len = strlen(xprt_str); - if (buflen && (len + totlen >= buflen)) + len = svc_one_xprt_name(xprt, pos, buflen - totlen); + if (len < 0) { + *buf = '\0'; + totlen = len; + } + if (len <= 0) break; - strcpy(buf+totlen, xprt_str); + + pos += len; totlen += len; } + spin_unlock_bh(&serv->sv_lock); return totlen; } -- cgit v1.2.1 From bfba9ab4c64f0e5c33930711e6c073c285e01fcf Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 23 Apr 2009 19:32:33 -0400 Subject: SUNRPC: pass buffer size to svc_addsock() Adjust the synopsis of svc_addsock() to pass in the size of the output buffer. Add a documenting comment. This is a cosmetic change for now. A subsequent patch will make sure the buffer length is passed to one_sock_name(), where the length will actually be useful. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 8b0832834135..6bec1e25b542 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1128,9 +1128,19 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, return svsk; } -int svc_addsock(struct svc_serv *serv, - int fd, - char *name_return) +/** + * svc_addsock - add a listener socket to an RPC service + * @serv: pointer to RPC service to which to add a new listener + * @fd: file descriptor of the new listener + * @name_return: pointer to buffer to fill in with name of listener + * @len: size of the buffer + * + * Fills in socket name and returns positive length of name if successful. + * Name is terminated with '\n'. On error, returns a negative errno + * value. + */ +int svc_addsock(struct svc_serv *serv, const int fd, char *name_return, + const size_t len) { int err = 0; struct socket *so = sockfd_lookup(fd, &err); -- cgit v1.2.1 From 8435d34dbbe75678c3cdad3d53b1e7996a79b3bf Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 23 Apr 2009 19:32:40 -0400 Subject: SUNRPC: pass buffer size to svc_sock_names() Adjust the synopsis of svc_sock_names() to pass in the size of the output buffer. Add a documenting comment. This is a cosmetic change for now. A subsequent patch will make sure the buffer length is passed to one_sock_name(), where the length will actually be useful. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 6bec1e25b542..032b52ea9541 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -259,8 +259,23 @@ static int one_sock_name(char *buf, struct svc_sock *svsk) return len; } -int -svc_sock_names(char *buf, struct svc_serv *serv, char *toclose) +/** + * svc_sock_names - construct a list of listener names in a string + * @serv: pointer to RPC service + * @buf: pointer to a buffer to fill in with socket names + * @buflen: size of the buffer to be filled + * @toclose: pointer to '\0'-terminated C string containing the name + * of a listener to be closed + * + * Fills in @buf with a '\n'-separated list of names of listener + * sockets. If @toclose is not NULL, the socket named by @toclose + * is closed, and is not included in the output list. + * + * Returns positive length of the socket name string, or a negative + * errno value on error. + */ +int svc_sock_names(struct svc_serv *serv, char *buf, const size_t buflen, + const char *toclose) { struct svc_sock *svsk, *closesk = NULL; int len = 0; -- cgit v1.2.1 From e7942b9f2586fb15e1b898acc7c198ffd60aee4e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 23 Apr 2009 19:32:48 -0400 Subject: SUNRPC: Switch one_sock_name() to use snprintf() Use snprintf() in one_sock_name() to prevent overflowing the output buffer. If the name doesn't fit in the buffer, the buffer is filled in with an empty string, and -ENAMETOOLONG is returned. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 032b52ea9541..61d4a3281f94 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -240,22 +240,27 @@ out: /* * Report socket names for nfsdfs */ -static int one_sock_name(char *buf, struct svc_sock *svsk) +static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining) { int len; switch(svsk->sk_sk->sk_family) { - case AF_INET: - len = sprintf(buf, "ipv4 %s %pI4 %d\n", + case PF_INET: + len = snprintf(buf, remaining, "ipv4 %s %pI4 %d\n", svsk->sk_sk->sk_protocol == IPPROTO_UDP ? "udp" : "tcp", &inet_sk(svsk->sk_sk)->rcv_saddr, inet_sk(svsk->sk_sk)->num); break; default: - len = sprintf(buf, "*unknown-%d*\n", + len = snprintf(buf, remaining, "*unknown-%d*\n", svsk->sk_sk->sk_family); } + + if (len >= remaining) { + *buf = '\0'; + return -ENAMETOOLONG; + } return len; } @@ -282,15 +287,21 @@ int svc_sock_names(struct svc_serv *serv, char *buf, const size_t buflen, if (!serv) return 0; + spin_lock_bh(&serv->sv_lock); list_for_each_entry(svsk, &serv->sv_permsocks, sk_xprt.xpt_list) { - int onelen = one_sock_name(buf+len, svsk); - if (toclose && strcmp(toclose, buf+len) == 0) + int onelen = svc_one_sock_name(svsk, buf + len, buflen - len); + if (onelen < 0) { + len = onelen; + break; + } + if (toclose && strcmp(toclose, buf + len) == 0) closesk = svsk; else len += onelen; } spin_unlock_bh(&serv->sv_lock); + if (closesk) /* Should unregister with portmap, but you cannot * unregister just one protocol... @@ -1195,7 +1206,7 @@ int svc_addsock(struct svc_serv *serv, const int fd, char *name_return, sockfd_put(so); return err; } - return one_sock_name(name_return, svsk); + return svc_one_sock_name(svsk, name_return, len); } EXPORT_SYMBOL_GPL(svc_addsock); -- cgit v1.2.1 From 58de2f86585dd8fc785aca6625adee32af84b8d7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 23 Apr 2009 19:32:55 -0400 Subject: SUNRPC: Support PF_INET6 in one_sock_name() Add an arm to the switch statement in svc_one_sock_name() so it can construct the name of PF_INET6 sockets properly. Signed-off-by: Chuck Lever Cc: Aime Le Rouzic Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 61d4a3281f94..983bfa9f3102 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -252,6 +252,13 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining) &inet_sk(svsk->sk_sk)->rcv_saddr, inet_sk(svsk->sk_sk)->num); break; + case PF_INET6: + len = snprintf(buf, remaining, "ipv6 %s %pI6 %d\n", + svsk->sk_sk->sk_protocol == IPPROTO_UDP ? + "udp" : "tcp", + &inet6_sk(svsk->sk_sk)->rcv_saddr, + inet_sk(svsk->sk_sk)->num); + break; default: len = snprintf(buf, remaining, "*unknown-%d*\n", svsk->sk_sk->sk_family); -- cgit v1.2.1 From 017cb47f46722f29d101a709a2094d151111ed6d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 23 Apr 2009 19:33:03 -0400 Subject: SUNRPC: Clean up one_sock_name() Clean up svc_one_sock_name() by setting up automatic variables for frequently used expressions. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 983bfa9f3102..4e6d406264a0 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -242,26 +242,27 @@ out: */ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining) { + const struct sock *sk = svsk->sk_sk; + const char *proto_name = sk->sk_protocol == IPPROTO_UDP ? + "udp" : "tcp"; int len; - switch(svsk->sk_sk->sk_family) { + switch (sk->sk_family) { case PF_INET: len = snprintf(buf, remaining, "ipv4 %s %pI4 %d\n", - svsk->sk_sk->sk_protocol == IPPROTO_UDP ? - "udp" : "tcp", - &inet_sk(svsk->sk_sk)->rcv_saddr, - inet_sk(svsk->sk_sk)->num); + proto_name, + &inet_sk(sk)->rcv_saddr, + inet_sk(sk)->num); break; case PF_INET6: len = snprintf(buf, remaining, "ipv6 %s %pI6 %d\n", - svsk->sk_sk->sk_protocol == IPPROTO_UDP ? - "udp" : "tcp", - &inet6_sk(svsk->sk_sk)->rcv_saddr, - inet_sk(svsk->sk_sk)->num); + proto_name, + &inet6_sk(sk)->rcv_saddr, + inet_sk(sk)->num); break; default: len = snprintf(buf, remaining, "*unknown-%d*\n", - svsk->sk_sk->sk_family); + sk->sk_family); } if (len >= remaining) { -- cgit v1.2.1 From 6aad89c8376e432231b78d1bdbcc10ef7708b011 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 10 Jun 2009 12:52:21 -0700 Subject: sunrpc: align cache_clean work's timer Align cache_clean work. Signed-off-by: Anton Blanchard Cc: Neil Brown Cc: Trond Myklebust Cc: "David S. Miller" Signed-off-by: Andrew Morton Acked-by: David S. Miller Signed-off-by: J. Bruce Fields --- net/sunrpc/cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 20029a79a5de..ff0c23053d2f 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -488,7 +488,7 @@ static void do_cache_clean(struct work_struct *work) { int delay = 5; if (cache_clean() == -1) - delay = 30*HZ; + delay = round_jiffies_relative(30*HZ); if (list_empty(&cache_list)) delay = 0; -- cgit v1.2.1 From 59fb30660be3f3ead54b3850c401d462449caaaa Mon Sep 17 00:00:00 2001 From: Christian Engelmayer Date: Sun, 14 Jun 2009 00:05:26 +0200 Subject: sunrpc: potential memory leak in function rdma_read_xdr In case the check on ch_count fails the cleanup path is skipped and the previously allocated memory 'rpl_map', 'chl_map' is not freed. Reported by Coverity. Signed-off-by: Christian Engelmayer Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 42a6f9f20285..9e884383134f 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -397,14 +397,14 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt, if (!ch) return 0; - /* Allocate temporary reply and chunk maps */ - rpl_map = svc_rdma_get_req_map(); - chl_map = svc_rdma_get_req_map(); - svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); if (ch_count > RPCSVC_MAXPAGES) return -EINVAL; + /* Allocate temporary reply and chunk maps */ + rpl_map = svc_rdma_get_req_map(); + chl_map = svc_rdma_get_req_map(); + if (!xprt->sc_frmr_pg_list_len) sge_count = map_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp, rpl_map, chl_map, ch_count, -- cgit v1.2.1 From aae2006e9b0c294114915c13022fa348e1a88023 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 1 Apr 2009 09:22:40 -0400 Subject: nfs41: sunrpc: Export the call prepare state for session reset Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 13 +++++++++++++ net/sunrpc/sched.c | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 5abab094441f..d00e8135f866 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -694,6 +694,19 @@ void rpc_force_rebind(struct rpc_clnt *clnt) } EXPORT_SYMBOL_GPL(rpc_force_rebind); +/* + * Restart an (async) RPC call from the call_prepare state. + * Usually called from within the exit handler. + */ +void +rpc_restart_call_prepare(struct rpc_task *task) +{ + if (RPC_ASSASSINATED(task)) + return; + task->tk_action = rpc_prepare_task; +} +EXPORT_SYMBOL_GPL(rpc_restart_call_prepare); + /* * Restart an (async) RPC call. Usually called from within the * exit handler. diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index ff50a0546865..1102ce1251f7 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -569,7 +569,7 @@ EXPORT_SYMBOL_GPL(rpc_delay); /* * Helper to call task->tk_ops->rpc_call_prepare */ -static void rpc_prepare_task(struct rpc_task *task) +void rpc_prepare_task(struct rpc_task *task) { task->tk_ops->rpc_call_prepare(task, task->tk_calldata); } -- cgit v1.2.1 From 18dca02aeb3c49dfce87c76be643b139d05cf647 Mon Sep 17 00:00:00 2001 From: Ricardo Labiaga Date: Wed, 1 Apr 2009 09:22:53 -0400 Subject: nfs41: Add ability to read RPC call direction on TCP stream. NFSv4.1 callbacks can arrive over an existing connection. This patch adds the logic to read the RPC call direction (call or reply). It does this by updating the state machine to look for the call direction invoking xs_tcp_read_calldir(...) after reading the XID. [nfs41: Keep track of RPC call/reply direction with a flag] As per 11/14/08 review of RFC 53/85. Add a new flag to track whether the incoming message is an RPC call or an RPC reply. TCP_RPC_REPLY is set in the 'struct sock_xprt' tcp_flags in xs_tcp_read_calldir() if the message is an RPC reply sent on the forechannel. It is cleared if the message is an RPC request sent on the back channel. Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy --- net/sunrpc/xprtsock.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 49 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index e18596146013..8975c10591c3 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -270,6 +270,12 @@ struct sock_xprt { #define TCP_RCV_COPY_FRAGHDR (1UL << 1) #define TCP_RCV_COPY_XID (1UL << 2) #define TCP_RCV_COPY_DATA (1UL << 3) +#define TCP_RCV_COPY_CALLDIR (1UL << 4) + +/* + * TCP RPC flags + */ +#define TCP_RPC_REPLY (1UL << 5) static inline struct sockaddr *xs_addr(struct rpc_xprt *xprt) { @@ -956,7 +962,7 @@ static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_rea transport->tcp_offset = 0; /* Sanity check of the record length */ - if (unlikely(transport->tcp_reclen < 4)) { + if (unlikely(transport->tcp_reclen < 8)) { dprintk("RPC: invalid TCP record fragment length\n"); xprt_force_disconnect(xprt); return; @@ -991,13 +997,48 @@ static inline void xs_tcp_read_xid(struct sock_xprt *transport, struct xdr_skb_r if (used != len) return; transport->tcp_flags &= ~TCP_RCV_COPY_XID; - transport->tcp_flags |= TCP_RCV_COPY_DATA; + transport->tcp_flags |= TCP_RCV_COPY_CALLDIR; transport->tcp_copied = 4; - dprintk("RPC: reading reply for XID %08x\n", + dprintk("RPC: reading %s XID %08x\n", + (transport->tcp_flags & TCP_RPC_REPLY) ? "reply for" + : "request with", ntohl(transport->tcp_xid)); xs_tcp_check_fraghdr(transport); } +static inline void xs_tcp_read_calldir(struct sock_xprt *transport, + struct xdr_skb_reader *desc) +{ + size_t len, used; + u32 offset; + __be32 calldir; + + /* + * We want transport->tcp_offset to be 8 at the end of this routine + * (4 bytes for the xid and 4 bytes for the call/reply flag). + * When this function is called for the first time, + * transport->tcp_offset is 4 (after having already read the xid). + */ + offset = transport->tcp_offset - sizeof(transport->tcp_xid); + len = sizeof(calldir) - offset; + dprintk("RPC: reading CALL/REPLY flag (%Zu bytes)\n", len); + used = xdr_skb_read_bits(desc, &calldir, len); + transport->tcp_offset += used; + if (used != len) + return; + transport->tcp_flags &= ~TCP_RCV_COPY_CALLDIR; + transport->tcp_flags |= TCP_RCV_COPY_DATA; + transport->tcp_copied += 4; + if (ntohl(calldir) == RPC_REPLY) + transport->tcp_flags |= TCP_RPC_REPLY; + else + transport->tcp_flags &= ~TCP_RPC_REPLY; + dprintk("RPC: reading %s CALL/REPLY flag %08x\n", + (transport->tcp_flags & TCP_RPC_REPLY) ? + "reply for" : "request with", calldir); + xs_tcp_check_fraghdr(transport); +} + static inline void xs_tcp_read_request(struct rpc_xprt *xprt, struct xdr_skb_reader *desc) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); @@ -1114,6 +1155,11 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns xs_tcp_read_xid(transport, &desc); continue; } + /* Read in the call/reply flag */ + if (transport->tcp_flags & TCP_RCV_COPY_CALLDIR) { + xs_tcp_read_calldir(transport, &desc); + continue; + } /* Read in the request data */ if (transport->tcp_flags & TCP_RCV_COPY_DATA) { xs_tcp_read_request(xprt, &desc); -- cgit v1.2.1 From f4a2e418bfd03a1f25f515e8a92ecd584d96cfc1 Mon Sep 17 00:00:00 2001 From: Ricardo Labiaga Date: Wed, 1 Apr 2009 09:22:54 -0400 Subject: nfs41: Process the RPC call direction Reading and storing the RPC direction is a three step process. 1. xs_tcp_read_calldir() reads the RPC direction, but it will not store it in the XDR buffer since the 'struct rpc_rqst' is not yet available. 2. The 'struct rpc_rqst' is obtained during the TCP_RCV_COPY_DATA state. This state need not necessarily be preceeded by the TCP_RCV_READ_CALLDIR. For example, we may be reading a continuation packet to a large reply. Therefore, we can't simply obtain the 'struct rpc_rqst' during the TCP_RCV_READ_CALLDIR state and assume it's available during TCP_RCV_COPY_DATA. This patch adds a new TCP_RCV_READ_CALLDIR flag to indicate the need to read the RPC direction. It then uses TCP_RCV_COPY_CALLDIR to indicate the RPC direction needs to be saved after the 'struct rpc_rqst' has been allocated. 3. The 'struct rpc_rqst' is obtained by the xs_tcp_read_data() helper functions. xs_tcp_read_common() then saves the RPC direction in the XDR buffer if TCP_RCV_COPY_CALLDIR is set. This will happen when we're reading the data immediately after the direction was read. xs_tcp_read_common() then clears this flag. [was nfs41: Skip past the RPC call direction] Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy [nfs41: sunrpc: Add RPC direction back into the XDR buffer] Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy [nfs41: sunrpc: Don't skip past the RPC call direction] Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy --- net/sunrpc/clnt.c | 5 +++-- net/sunrpc/xprtsock.c | 31 +++++++++++++++++++++++++------ 2 files changed, 28 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index d00e8135f866..aca3ab6fc140 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1390,13 +1390,14 @@ rpc_verify_header(struct rpc_task *task) } if ((len -= 3) < 0) goto out_overflow; - p += 1; /* skip XID */ + p += 1; /* skip XID */ if ((n = ntohl(*p++)) != RPC_REPLY) { dprintk("RPC: %5u %s: not an RPC reply: %x\n", - task->tk_pid, __func__, n); + task->tk_pid, __func__, n); goto out_garbage; } + if ((n = ntohl(*p++)) != RPC_MSG_ACCEPTED) { if (--len < 0) goto out_overflow; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 8975c10591c3..a48df1449ece 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -270,12 +270,13 @@ struct sock_xprt { #define TCP_RCV_COPY_FRAGHDR (1UL << 1) #define TCP_RCV_COPY_XID (1UL << 2) #define TCP_RCV_COPY_DATA (1UL << 3) -#define TCP_RCV_COPY_CALLDIR (1UL << 4) +#define TCP_RCV_READ_CALLDIR (1UL << 4) +#define TCP_RCV_COPY_CALLDIR (1UL << 5) /* * TCP RPC flags */ -#define TCP_RPC_REPLY (1UL << 5) +#define TCP_RPC_REPLY (1UL << 6) static inline struct sockaddr *xs_addr(struct rpc_xprt *xprt) { @@ -997,7 +998,7 @@ static inline void xs_tcp_read_xid(struct sock_xprt *transport, struct xdr_skb_r if (used != len) return; transport->tcp_flags &= ~TCP_RCV_COPY_XID; - transport->tcp_flags |= TCP_RCV_COPY_CALLDIR; + transport->tcp_flags |= TCP_RCV_READ_CALLDIR; transport->tcp_copied = 4; dprintk("RPC: reading %s XID %08x\n", (transport->tcp_flags & TCP_RPC_REPLY) ? "reply for" @@ -1026,9 +1027,13 @@ static inline void xs_tcp_read_calldir(struct sock_xprt *transport, transport->tcp_offset += used; if (used != len) return; - transport->tcp_flags &= ~TCP_RCV_COPY_CALLDIR; + transport->tcp_flags &= ~TCP_RCV_READ_CALLDIR; + transport->tcp_flags |= TCP_RCV_COPY_CALLDIR; transport->tcp_flags |= TCP_RCV_COPY_DATA; - transport->tcp_copied += 4; + /* + * We don't yet have the XDR buffer, so we will write the calldir + * out after we get the buffer from the 'struct rpc_rqst' + */ if (ntohl(calldir) == RPC_REPLY) transport->tcp_flags |= TCP_RPC_REPLY; else @@ -1059,6 +1064,20 @@ static inline void xs_tcp_read_request(struct rpc_xprt *xprt, struct xdr_skb_rea } rcvbuf = &req->rq_private_buf; + + if (transport->tcp_flags & TCP_RCV_COPY_CALLDIR) { + /* + * Save the RPC direction in the XDR buffer + */ + __be32 calldir = transport->tcp_flags & TCP_RPC_REPLY ? + htonl(RPC_REPLY) : 0; + + memcpy(rcvbuf->head[0].iov_base + transport->tcp_copied, + &calldir, sizeof(calldir)); + transport->tcp_copied += sizeof(calldir); + transport->tcp_flags &= ~TCP_RCV_COPY_CALLDIR; + } + len = desc->count; if (len > transport->tcp_reclen - transport->tcp_offset) { struct xdr_skb_reader my_desc; @@ -1156,7 +1175,7 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns continue; } /* Read in the call/reply flag */ - if (transport->tcp_flags & TCP_RCV_COPY_CALLDIR) { + if (transport->tcp_flags & TCP_RCV_READ_CALLDIR) { xs_tcp_read_calldir(transport, &desc); continue; } -- cgit v1.2.1 From f9acac1a4710ce88871f1ae323fc91c1cb6e9d52 Mon Sep 17 00:00:00 2001 From: Ricardo Labiaga Date: Wed, 1 Apr 2009 09:22:59 -0400 Subject: nfs41: Initialize new rpc_xprt callback related fields Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy --- net/sunrpc/xprt.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 06ca058572f2..52739f82df1e 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1049,6 +1049,11 @@ found: INIT_LIST_HEAD(&xprt->free); INIT_LIST_HEAD(&xprt->recv); +#if defined(CONFIG_NFS_V4_1) + spin_lock_init(&xprt->bc_pa_lock); + INIT_LIST_HEAD(&xprt->bc_pa_list); +#endif /* CONFIG_NFS_V4_1 */ + INIT_WORK(&xprt->task_cleanup, xprt_autoclose); setup_timer(&xprt->timer, xprt_init_autodisconnect, (unsigned long)xprt); -- cgit v1.2.1 From fb7a0b9addbdbbb13b7bc02abf55ee524ea19ce1 Mon Sep 17 00:00:00 2001 From: Ricardo Labiaga Date: Wed, 1 Apr 2009 09:23:00 -0400 Subject: nfs41: New backchannel helper routines This patch introduces support to setup the callback xprt on the client side. It allocates/ destroys the preallocated memory structures used to process backchannel requests. At setup time, xprt_setup_backchannel() is invoked to allocate one or more rpc_rqst structures and substructures. This ensures that they are available when an RPC callback arrives. The rpc_rqst structures are maintained in a linked list attached to the rpc_xprt structure. We keep track of the number of allocations so that they can be correctly removed when the channel is destroyed. When an RPC callback arrives, xprt_alloc_bc_request() is invoked to obtain a preallocated rpc_rqst structure. An rpc_xprt structure is returned, and its RPC_BC_PREALLOC_IN_USE bit is set in rpc_xprt->bc_flags. The structure is removed from the the list since it is now in use, and it will be later added back when its user is done with it. After the RPC callback replies, the rpc_rqst structure is returned by invoking xprt_free_bc_request(). This clears the RPC_BC_PREALLOC_IN_USE bit and adds it back to the list, allowing it to be reused by a subsequent RPC callback request. To be consistent with the reception of RPC messages, the backchannel requests should be placed into the 'struct rpc_rqst' rq_rcv_buf, which is then in turn copied to the 'struct rpc_rqst' rq_private_buf. [nfs41: Preallocate rpc_rqst receive buffer for handling callbacks] Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy [Update copyright notice and explain page allocation] Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy --- net/sunrpc/Makefile | 1 + net/sunrpc/backchannel_rqst.c | 278 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 279 insertions(+) create mode 100644 net/sunrpc/backchannel_rqst.c (limited to 'net') diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index 5369aa369b35..4a01f9684b85 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile @@ -13,5 +13,6 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ rpcb_clnt.o timer.o xdr.o \ sunrpc_syms.o cache.o rpc_pipe.o \ svc_xprt.o +sunrpc-$(CONFIG_NFS_V4_1) += backchannel_rqst.o sunrpc-$(CONFIG_PROC_FS) += stats.o sunrpc-$(CONFIG_SYSCTL) += sysctl.o diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c new file mode 100644 index 000000000000..f56e18a23498 --- /dev/null +++ b/net/sunrpc/backchannel_rqst.c @@ -0,0 +1,278 @@ +/****************************************************************************** + +(c) 2007 Network Appliance, Inc. All Rights Reserved. +(c) 2009 NetApp. All Rights Reserved. + +NetApp provides this source code under the GPL v2 License. +The GPL v2 license is available at +http://opensource.org/licenses/gpl-license.php. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +******************************************************************************/ + +#include +#include + +#ifdef RPC_DEBUG +#define RPCDBG_FACILITY RPCDBG_TRANS +#endif + +#if defined(CONFIG_NFS_V4_1) + +/* + * Helper routines that track the number of preallocation elements + * on the transport. + */ +static inline int xprt_need_to_requeue(struct rpc_xprt *xprt) +{ + return xprt->bc_alloc_count > 0; +} + +static inline void xprt_inc_alloc_count(struct rpc_xprt *xprt, unsigned int n) +{ + xprt->bc_alloc_count += n; +} + +static inline int xprt_dec_alloc_count(struct rpc_xprt *xprt, unsigned int n) +{ + return xprt->bc_alloc_count -= n; +} + +/* + * Free the preallocated rpc_rqst structure and the memory + * buffers hanging off of it. + */ +static void xprt_free_allocation(struct rpc_rqst *req) +{ + struct xdr_buf *xbufp; + + dprintk("RPC: free allocations for req= %p\n", req); + BUG_ON(test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state)); + xbufp = &req->rq_private_buf; + free_page((unsigned long)xbufp->head[0].iov_base); + xbufp = &req->rq_snd_buf; + free_page((unsigned long)xbufp->head[0].iov_base); + list_del(&req->rq_bc_pa_list); + kfree(req); +} + +/* + * Preallocate up to min_reqs structures and related buffers for use + * by the backchannel. This function can be called multiple times + * when creating new sessions that use the same rpc_xprt. The + * preallocated buffers are added to the pool of resources used by + * the rpc_xprt. Anyone of these resources may be used used by an + * incoming callback request. It's up to the higher levels in the + * stack to enforce that the maximum number of session slots is not + * being exceeded. + * + * Some callback arguments can be large. For example, a pNFS server + * using multiple deviceids. The list can be unbound, but the client + * has the ability to tell the server the maximum size of the callback + * requests. Each deviceID is 16 bytes, so allocate one page + * for the arguments to have enough room to receive a number of these + * deviceIDs. The NFS client indicates to the pNFS server that its + * callback requests can be up to 4096 bytes in size. + */ +int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs) +{ + struct page *page_rcv = NULL, *page_snd = NULL; + struct xdr_buf *xbufp = NULL; + struct rpc_rqst *req, *tmp; + struct list_head tmp_list; + int i; + + dprintk("RPC: setup backchannel transport\n"); + + /* + * We use a temporary list to keep track of the preallocated + * buffers. Once we're done building the list we splice it + * into the backchannel preallocation list off of the rpc_xprt + * struct. This helps minimize the amount of time the list + * lock is held on the rpc_xprt struct. It also makes cleanup + * easier in case of memory allocation errors. + */ + INIT_LIST_HEAD(&tmp_list); + for (i = 0; i < min_reqs; i++) { + /* Pre-allocate one backchannel rpc_rqst */ + req = kzalloc(sizeof(struct rpc_rqst), GFP_KERNEL); + if (req == NULL) { + printk(KERN_ERR "Failed to create bc rpc_rqst\n"); + goto out_free; + } + + /* Add the allocated buffer to the tmp list */ + dprintk("RPC: adding req= %p\n", req); + list_add(&req->rq_bc_pa_list, &tmp_list); + + req->rq_xprt = xprt; + INIT_LIST_HEAD(&req->rq_list); + INIT_LIST_HEAD(&req->rq_bc_list); + + /* Preallocate one XDR receive buffer */ + page_rcv = alloc_page(GFP_KERNEL); + if (page_rcv == NULL) { + printk(KERN_ERR "Failed to create bc receive xbuf\n"); + goto out_free; + } + xbufp = &req->rq_rcv_buf; + xbufp->head[0].iov_base = page_address(page_rcv); + xbufp->head[0].iov_len = PAGE_SIZE; + xbufp->tail[0].iov_base = NULL; + xbufp->tail[0].iov_len = 0; + xbufp->page_len = 0; + xbufp->len = PAGE_SIZE; + xbufp->buflen = PAGE_SIZE; + + /* Preallocate one XDR send buffer */ + page_snd = alloc_page(GFP_KERNEL); + if (page_snd == NULL) { + printk(KERN_ERR "Failed to create bc snd xbuf\n"); + goto out_free; + } + + xbufp = &req->rq_snd_buf; + xbufp->head[0].iov_base = page_address(page_snd); + xbufp->head[0].iov_len = 0; + xbufp->tail[0].iov_base = NULL; + xbufp->tail[0].iov_len = 0; + xbufp->page_len = 0; + xbufp->len = 0; + xbufp->buflen = PAGE_SIZE; + } + + /* + * Add the temporary list to the backchannel preallocation list + */ + spin_lock_bh(&xprt->bc_pa_lock); + list_splice(&tmp_list, &xprt->bc_pa_list); + xprt_inc_alloc_count(xprt, min_reqs); + spin_unlock_bh(&xprt->bc_pa_lock); + + dprintk("RPC: setup backchannel transport done\n"); + return 0; + +out_free: + /* + * Memory allocation failed, free the temporary list + */ + list_for_each_entry_safe(req, tmp, &tmp_list, rq_bc_pa_list) + xprt_free_allocation(req); + + dprintk("RPC: setup backchannel transport failed\n"); + return -1; +} +EXPORT_SYMBOL(xprt_setup_backchannel); + +/* + * Destroys the backchannel preallocated structures. + * Since these structures may have been allocated by multiple calls + * to xprt_setup_backchannel, we only destroy up to the maximum number + * of reqs specified by the caller. + * @xprt: the transport holding the preallocated strucures + * @max_reqs the maximum number of preallocated structures to destroy + */ +void xprt_destroy_backchannel(struct rpc_xprt *xprt, unsigned int max_reqs) +{ + struct rpc_rqst *req = NULL, *tmp = NULL; + + dprintk("RPC: destroy backchannel transport\n"); + + BUG_ON(max_reqs == 0); + spin_lock_bh(&xprt->bc_pa_lock); + xprt_dec_alloc_count(xprt, max_reqs); + list_for_each_entry_safe(req, tmp, &xprt->bc_pa_list, rq_bc_pa_list) { + dprintk("RPC: req=%p\n", req); + xprt_free_allocation(req); + if (--max_reqs == 0) + break; + } + spin_unlock_bh(&xprt->bc_pa_lock); + + dprintk("RPC: backchannel list empty= %s\n", + list_empty(&xprt->bc_pa_list) ? "true" : "false"); +} +EXPORT_SYMBOL(xprt_destroy_backchannel); + +/* + * One or more rpc_rqst structure have been preallocated during the + * backchannel setup. Buffer space for the send and private XDR buffers + * has been preallocated as well. Use xprt_alloc_bc_request to allocate + * to this request. Use xprt_free_bc_request to return it. + * + * Return an available rpc_rqst, otherwise NULL if non are available. + */ +struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt) +{ + struct rpc_rqst *req; + + dprintk("RPC: allocate a backchannel request\n"); + spin_lock_bh(&xprt->bc_pa_lock); + if (!list_empty(&xprt->bc_pa_list)) { + req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst, + rq_bc_pa_list); + list_del(&req->rq_bc_pa_list); + } else { + req = NULL; + } + spin_unlock_bh(&xprt->bc_pa_lock); + + if (req != NULL) { + set_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); + req->rq_received = 0; + req->rq_bytes_sent = 0; + memcpy(&req->rq_private_buf, &req->rq_rcv_buf, + sizeof(req->rq_private_buf)); + } + dprintk("RPC: backchannel req=%p\n", req); + return req; +} + +/* + * Return the preallocated rpc_rqst structure and XDR buffers + * associated with this rpc_task. + */ +void xprt_free_bc_request(struct rpc_rqst *req) +{ + struct rpc_xprt *xprt = req->rq_xprt; + + dprintk("RPC: free backchannel req=%p\n", req); + + smp_mb__before_clear_bit(); + BUG_ON(!test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state)); + clear_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); + smp_mb__after_clear_bit(); + + if (!xprt_need_to_requeue(xprt)) { + /* + * The last remaining session was destroyed while this + * entry was in use. Free the entry and don't attempt + * to add back to the list because there is no need to + * have anymore preallocated entries. + */ + dprintk("RPC: Last session removed req=%p\n", req); + xprt_free_allocation(req); + return; + } + + /* + * Return it to the list of preallocations so that it + * may be reused by a new callback request. + */ + spin_lock_bh(&xprt->bc_pa_lock); + list_add(&req->rq_bc_pa_list, &xprt->bc_pa_list); + spin_unlock_bh(&xprt->bc_pa_lock); +} + +#endif /* CONFIG_NFS_V4_1 */ -- cgit v1.2.1 From 44b98efdd0a205bdca2cb63493350d06ff6804b1 Mon Sep 17 00:00:00 2001 From: Ricardo Labiaga Date: Wed, 1 Apr 2009 09:23:02 -0400 Subject: nfs41: New xs_tcp_read_data() Handles RPC replies and backchannel callbacks. Traditionally the NFS client has expected only RPC replies on its open connections. With NFSv4.1, callbacks can arrive over an existing open connection. This patch refactors the old xs_tcp_read_request() into an RPC reply handler: xs_tcp_read_reply(), a new backchannel callback handler: xs_tcp_read_callback(), and a common routine to read the data off the transport: xs_tcp_read_common(). The new xs_tcp_read_callback() queues callback requests onto a queue where the callback service (a separate thread) is listening for the processing. This patch incorporates work and suggestions from Rahul Iyer (iyer@netapp.com) and Benny Halevy (bhalevy@panasas.com). xs_tcp_read_callback() drops the connection when the number of expected callbacks is exceeded. Use xprt_force_disconnect(), ensuring tasks on the pending queue are awaken on disconnect. [nfs41: Keep track of RPC call/reply direction with a flag] [nfs41: Preallocate rpc_rqst receive buffer for handling callbacks] Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy [nfs41: sunrpc: xs_tcp_read_callback() should use xprt_force_disconnect()] Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy [Moves embedded #ifdefs into #ifdef function blocks] Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy --- net/sunrpc/xprtsock.c | 144 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 126 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index a48df1449ece..e3e3a57116fb 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -34,6 +34,9 @@ #include #include #include +#ifdef CONFIG_NFS_V4_1 +#include +#endif #include #include @@ -1044,25 +1047,16 @@ static inline void xs_tcp_read_calldir(struct sock_xprt *transport, xs_tcp_check_fraghdr(transport); } -static inline void xs_tcp_read_request(struct rpc_xprt *xprt, struct xdr_skb_reader *desc) +static inline void xs_tcp_read_common(struct rpc_xprt *xprt, + struct xdr_skb_reader *desc, + struct rpc_rqst *req) { - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); - struct rpc_rqst *req; + struct sock_xprt *transport = + container_of(xprt, struct sock_xprt, xprt); struct xdr_buf *rcvbuf; size_t len; ssize_t r; - /* Find and lock the request corresponding to this xid */ - spin_lock(&xprt->transport_lock); - req = xprt_lookup_rqst(xprt, transport->tcp_xid); - if (!req) { - transport->tcp_flags &= ~TCP_RCV_COPY_DATA; - dprintk("RPC: XID %08x request not found!\n", - ntohl(transport->tcp_xid)); - spin_unlock(&xprt->transport_lock); - return; - } - rcvbuf = &req->rq_private_buf; if (transport->tcp_flags & TCP_RCV_COPY_CALLDIR) { @@ -1114,7 +1108,7 @@ static inline void xs_tcp_read_request(struct rpc_xprt *xprt, struct xdr_skb_rea "tcp_offset = %u, tcp_reclen = %u\n", xprt, transport->tcp_copied, transport->tcp_offset, transport->tcp_reclen); - goto out; + return; } dprintk("RPC: XID %08x read %Zd bytes\n", @@ -1130,11 +1124,125 @@ static inline void xs_tcp_read_request(struct rpc_xprt *xprt, struct xdr_skb_rea transport->tcp_flags &= ~TCP_RCV_COPY_DATA; } -out: + return; +} + +/* + * Finds the request corresponding to the RPC xid and invokes the common + * tcp read code to read the data. + */ +static inline int xs_tcp_read_reply(struct rpc_xprt *xprt, + struct xdr_skb_reader *desc) +{ + struct sock_xprt *transport = + container_of(xprt, struct sock_xprt, xprt); + struct rpc_rqst *req; + + dprintk("RPC: read reply XID %08x\n", ntohl(transport->tcp_xid)); + + /* Find and lock the request corresponding to this xid */ + spin_lock(&xprt->transport_lock); + req = xprt_lookup_rqst(xprt, transport->tcp_xid); + if (!req) { + dprintk("RPC: XID %08x request not found!\n", + ntohl(transport->tcp_xid)); + spin_unlock(&xprt->transport_lock); + return -1; + } + + xs_tcp_read_common(xprt, desc, req); + if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) xprt_complete_rqst(req->rq_task, transport->tcp_copied); + spin_unlock(&xprt->transport_lock); - xs_tcp_check_fraghdr(transport); + return 0; +} + +#if defined(CONFIG_NFS_V4_1) +/* + * Obtains an rpc_rqst previously allocated and invokes the common + * tcp read code to read the data. The result is placed in the callback + * queue. + * If we're unable to obtain the rpc_rqst we schedule the closing of the + * connection and return -1. + */ +static inline int xs_tcp_read_callback(struct rpc_xprt *xprt, + struct xdr_skb_reader *desc) +{ + struct sock_xprt *transport = + container_of(xprt, struct sock_xprt, xprt); + struct rpc_rqst *req; + + req = xprt_alloc_bc_request(xprt); + if (req == NULL) { + printk(KERN_WARNING "Callback slot table overflowed\n"); + xprt_force_disconnect(xprt); + return -1; + } + + req->rq_xid = transport->tcp_xid; + dprintk("RPC: read callback XID %08x\n", ntohl(req->rq_xid)); + xs_tcp_read_common(xprt, desc, req); + + if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) { + struct svc_serv *bc_serv = xprt->bc_serv; + + /* + * Add callback request to callback list. The callback + * service sleeps on the sv_cb_waitq waiting for new + * requests. Wake it up after adding enqueing the + * request. + */ + dprintk("RPC: add callback request to list\n"); + spin_lock(&bc_serv->sv_cb_lock); + list_add(&req->rq_bc_list, &bc_serv->sv_cb_list); + spin_unlock(&bc_serv->sv_cb_lock); + wake_up(&bc_serv->sv_cb_waitq); + } + + req->rq_private_buf.len = transport->tcp_copied; + + return 0; +} + +static inline int _xs_tcp_read_data(struct rpc_xprt *xprt, + struct xdr_skb_reader *desc) +{ + struct sock_xprt *transport = + container_of(xprt, struct sock_xprt, xprt); + + return (transport->tcp_flags & TCP_RPC_REPLY) ? + xs_tcp_read_reply(xprt, desc) : + xs_tcp_read_callback(xprt, desc); +} +#else +static inline int _xs_tcp_read_data(struct rpc_xprt *xprt, + struct xdr_skb_reader *desc) +{ + return xs_tcp_read_reply(xprt, desc); +} +#endif /* CONFIG_NFS_V4_1 */ + +/* + * Read data off the transport. This can be either an RPC_CALL or an + * RPC_REPLY. Relay the processing to helper functions. + */ +static void xs_tcp_read_data(struct rpc_xprt *xprt, + struct xdr_skb_reader *desc) +{ + struct sock_xprt *transport = + container_of(xprt, struct sock_xprt, xprt); + + if (_xs_tcp_read_data(xprt, desc) == 0) + xs_tcp_check_fraghdr(transport); + else { + /* + * The transport_lock protects the request handling. + * There's no need to hold it to update the tcp_flags. + */ + transport->tcp_flags &= ~TCP_RCV_COPY_DATA; + } } static inline void xs_tcp_read_discard(struct sock_xprt *transport, struct xdr_skb_reader *desc) @@ -1181,7 +1289,7 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns } /* Read in the request data */ if (transport->tcp_flags & TCP_RCV_COPY_DATA) { - xs_tcp_read_request(xprt, &desc); + xs_tcp_read_data(xprt, &desc); continue; } /* Skip over any trailing bytes on short reads */ -- cgit v1.2.1 From 88b5ed73bcd0f21e008b6e303a02c8b7cb1199f4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 17 Jun 2009 13:22:57 -0700 Subject: SUNRPC: Fix a missing "break" option in xs_tcp_setup_socket() In the case of -EADDRNOTAVAIL and/or unhandled connection errors, we want to get rid of the existing socket and retry immediately, just as the comment says. Currently we end up sleeping for a minute, due to the missing "break" statement. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 6c2d61586551..f7f3dfd211ea 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1792,6 +1792,7 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt, */ set_bit(XPRT_CONNECTION_CLOSE, &xprt->state); xprt_force_disconnect(xprt); + break; case -ECONNREFUSED: case -ECONNRESET: case -ENETUNREACH: -- cgit v1.2.1 From 55ae1aabfb108106dd095de2578ceef1c755a8b8 Mon Sep 17 00:00:00 2001 From: Ricardo Labiaga Date: Wed, 1 Apr 2009 09:23:03 -0400 Subject: nfs41: Add backchannel processing support to RPC state machine Adds rpc_run_bc_task() which is called by the NFS callback service to process backchannel requests. It performs similar work to rpc_run_task() though "schedules" the backchannel task to be executed starting at the call_trasmit state in the RPC state machine. It also introduces some miscellaneous updates to the argument validation, call_transmit, and transport cleanup functions to take into account that there are now forechannel and backchannel tasks. Backchannel requests do not carry an RPC message structure, since the payload has already been XDR encoded using the existing NFSv4 callback mechanism. Introduce a new transmit state for the client to reply on to backchannel requests. This new state simply reserves the transport and issues the reply. In case of a connection related error, disconnects the transport and drops the reply. It requires the forechannel to re-establish the connection and the server to retransmit the request, as stated in NFSv4.1 section 2.9.2 "Client and Server Transport Behavior". Note: There is no need to loop attempting to reserve the transport. If EAGAIN is returned by xprt_prepare_transmit(), return with tk_status == 0, setting tk_action to call_bc_transmit. rpc_execute() will invoke it again after the task is taken off the sleep queue. [nfs41: rpc_run_bc_task() need not be exported outside RPC module] [nfs41: New call_bc_transmit RPC state] Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy [nfs41: Backchannel: No need to loop in call_bc_transmit()] Signed-off-by: Andy Adamson Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy [rpc_count_iostats incorrectly exits early] Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy [Convert rpc_reply_expected() to inline function] [Remove unnecessary BUG_ON()] [Rename variable] Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy --- net/sunrpc/clnt.c | 117 +++++++++++++++++++++++++++++++++++++++++++++++++++- net/sunrpc/stats.c | 6 ++- net/sunrpc/sunrpc.h | 37 +++++++++++++++++ net/sunrpc/xprt.c | 38 ++++++++++++++--- 4 files changed, 189 insertions(+), 9 deletions(-) create mode 100644 net/sunrpc/sunrpc.h (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index aca3ab6fc140..f3e93b8eb90f 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -36,7 +36,9 @@ #include #include #include +#include +#include "sunrpc.h" #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_CALL @@ -63,6 +65,9 @@ static void call_decode(struct rpc_task *task); static void call_bind(struct rpc_task *task); static void call_bind_status(struct rpc_task *task); static void call_transmit(struct rpc_task *task); +#if defined(CONFIG_NFS_V4_1) +static void call_bc_transmit(struct rpc_task *task); +#endif /* CONFIG_NFS_V4_1 */ static void call_status(struct rpc_task *task); static void call_transmit_status(struct rpc_task *task); static void call_refresh(struct rpc_task *task); @@ -613,6 +618,50 @@ rpc_call_async(struct rpc_clnt *clnt, const struct rpc_message *msg, int flags, } EXPORT_SYMBOL_GPL(rpc_call_async); +#if defined(CONFIG_NFS_V4_1) +/** + * rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run + * rpc_execute against it + * @ops: RPC call ops + */ +struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req, + const struct rpc_call_ops *tk_ops) +{ + struct rpc_task *task; + struct xdr_buf *xbufp = &req->rq_snd_buf; + struct rpc_task_setup task_setup_data = { + .callback_ops = tk_ops, + }; + + dprintk("RPC: rpc_run_bc_task req= %p\n", req); + /* + * Create an rpc_task to send the data + */ + task = rpc_new_task(&task_setup_data); + if (!task) { + xprt_free_bc_request(req); + goto out; + } + task->tk_rqstp = req; + + /* + * Set up the xdr_buf length. + * This also indicates that the buffer is XDR encoded already. + */ + xbufp->len = xbufp->head[0].iov_len + xbufp->page_len + + xbufp->tail[0].iov_len; + + task->tk_action = call_bc_transmit; + atomic_inc(&task->tk_count); + BUG_ON(atomic_read(&task->tk_count) != 2); + rpc_execute(task); + +out: + dprintk("RPC: rpc_run_bc_task: task= %p\n", task); + return task; +} +#endif /* CONFIG_NFS_V4_1 */ + void rpc_call_start(struct rpc_task *task) { @@ -1098,7 +1147,7 @@ call_transmit(struct rpc_task *task) * in order to allow access to the socket to other RPC requests. */ call_transmit_status(task); - if (task->tk_msg.rpc_proc->p_decode != NULL) + if (rpc_reply_expected(task)) return; task->tk_action = rpc_exit_task; rpc_wake_up_queued_task(&task->tk_xprt->pending, task); @@ -1133,6 +1182,72 @@ call_transmit_status(struct rpc_task *task) } } +#if defined(CONFIG_NFS_V4_1) +/* + * 5b. Send the backchannel RPC reply. On error, drop the reply. In + * addition, disconnect on connectivity errors. + */ +static void +call_bc_transmit(struct rpc_task *task) +{ + struct rpc_rqst *req = task->tk_rqstp; + + BUG_ON(task->tk_status != 0); + task->tk_status = xprt_prepare_transmit(task); + if (task->tk_status == -EAGAIN) { + /* + * Could not reserve the transport. Try again after the + * transport is released. + */ + task->tk_status = 0; + task->tk_action = call_bc_transmit; + return; + } + + task->tk_action = rpc_exit_task; + if (task->tk_status < 0) { + printk(KERN_NOTICE "RPC: Could not send backchannel reply " + "error: %d\n", task->tk_status); + return; + } + + xprt_transmit(task); + xprt_end_transmit(task); + dprint_status(task); + switch (task->tk_status) { + case 0: + /* Success */ + break; + case -EHOSTDOWN: + case -EHOSTUNREACH: + case -ENETUNREACH: + case -ETIMEDOUT: + /* + * Problem reaching the server. Disconnect and let the + * forechannel reestablish the connection. The server will + * have to retransmit the backchannel request and we'll + * reprocess it. Since these ops are idempotent, there's no + * need to cache our reply at this time. + */ + printk(KERN_NOTICE "RPC: Could not send backchannel reply " + "error: %d\n", task->tk_status); + xprt_conditional_disconnect(task->tk_xprt, + req->rq_connect_cookie); + break; + default: + /* + * We were unable to reply and will have to drop the + * request. The server should reconnect and retransmit. + */ + BUG_ON(task->tk_status == -EAGAIN); + printk(KERN_NOTICE "RPC: Could not send backchannel reply " + "error: %d\n", task->tk_status); + break; + } + rpc_wake_up_queued_task(&req->rq_xprt->pending, task); +} +#endif /* CONFIG_NFS_V4_1 */ + /* * 6. Sort out the RPC call status */ diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index 1ef6e46d9da2..8487aa0f1f5a 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c @@ -141,12 +141,14 @@ EXPORT_SYMBOL_GPL(rpc_free_iostats); void rpc_count_iostats(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; - struct rpc_iostats *stats = task->tk_client->cl_metrics; + struct rpc_iostats *stats; struct rpc_iostats *op_metrics; long rtt, execute, queue; - if (!stats || !req) + if (!task->tk_client || !task->tk_client->cl_metrics || !req) return; + + stats = task->tk_client->cl_metrics; op_metrics = &stats[task->tk_msg.rpc_proc->p_statidx]; op_metrics->om_ops++; diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h new file mode 100644 index 000000000000..5d9dd742264b --- /dev/null +++ b/net/sunrpc/sunrpc.h @@ -0,0 +1,37 @@ +/****************************************************************************** + +(c) 2008 NetApp. All Rights Reserved. + +NetApp provides this source code under the GPL v2 License. +The GPL v2 license is available at +http://opensource.org/licenses/gpl-license.php. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +******************************************************************************/ + +/* + * Functions and macros used internally by RPC + */ + +#ifndef _NET_SUNRPC_SUNRPC_H +#define _NET_SUNRPC_SUNRPC_H + +static inline int rpc_reply_expected(struct rpc_task *task) +{ + return (task->tk_msg.rpc_proc != NULL) && + (task->tk_msg.rpc_proc->p_decode != NULL); +} + +#endif /* _NET_SUNRPC_SUNRPC_H */ + diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 52739f82df1e..0eea2bfe111b 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -12,8 +12,9 @@ * - Next, the caller puts together the RPC message, stuffs it into * the request struct, and calls xprt_transmit(). * - xprt_transmit sends the message and installs the caller on the - * transport's wait list. At the same time, it installs a timer that - * is run after the packet's timeout has expired. + * transport's wait list. At the same time, if a reply is expected, + * it installs a timer that is run after the packet's timeout has + * expired. * - When a packet arrives, the data_ready handler walks the list of * pending requests for that transport. If a matching XID is found, the * caller is woken up, and the timer removed. @@ -46,6 +47,8 @@ #include #include +#include "sunrpc.h" + /* * Local variables */ @@ -873,7 +876,10 @@ void xprt_transmit(struct rpc_task *task) dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen); if (!req->rq_received) { - if (list_empty(&req->rq_list)) { + if (list_empty(&req->rq_list) && rpc_reply_expected(task)) { + /* + * Add to the list only if we're expecting a reply + */ spin_lock_bh(&xprt->transport_lock); /* Update the softirq receive buffer */ memcpy(&req->rq_private_buf, &req->rq_rcv_buf, @@ -908,8 +914,13 @@ void xprt_transmit(struct rpc_task *task) /* Don't race with disconnect */ if (!xprt_connected(xprt)) task->tk_status = -ENOTCONN; - else if (!req->rq_received) + else if (!req->rq_received && rpc_reply_expected(task)) { + /* + * Sleep on the pending queue since + * we're expecting a reply. + */ rpc_sleep_on(&xprt->pending, task, xprt_timer); + } spin_unlock_bh(&xprt->transport_lock); } @@ -982,11 +993,17 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) */ void xprt_release(struct rpc_task *task) { - struct rpc_xprt *xprt = task->tk_xprt; + struct rpc_xprt *xprt; struct rpc_rqst *req; + int is_bc_request; if (!(req = task->tk_rqstp)) return; + + /* Preallocated backchannel request? */ + is_bc_request = bc_prealloc(req); + + xprt = req->rq_xprt; rpc_count_iostats(task); spin_lock_bh(&xprt->transport_lock); xprt->ops->release_xprt(xprt, task); @@ -999,10 +1016,19 @@ void xprt_release(struct rpc_task *task) mod_timer(&xprt->timer, xprt->last_used + xprt->idle_timeout); spin_unlock_bh(&xprt->transport_lock); - xprt->ops->buf_free(req->rq_buffer); + if (!bc_prealloc(req)) + xprt->ops->buf_free(req->rq_buffer); task->tk_rqstp = NULL; if (req->rq_release_snd_buf) req->rq_release_snd_buf(req); + + /* + * Early exit if this is a backchannel preallocated request. + * There is no need to have it added to the RPC slot list. + */ + if (is_bc_request) + return; + memset(req, 0, sizeof(*req)); /* mark unused */ dprintk("RPC: %5u release request %p\n", task->tk_pid, req); -- cgit v1.2.1 From 0d90ba1cd416525c4825c111db862d8b15a02e9b Mon Sep 17 00:00:00 2001 From: Ricardo Labiaga Date: Wed, 1 Apr 2009 09:23:04 -0400 Subject: nfs41: Backchannel callback service helper routines Executes the backchannel task on the RPC state machine using the existing open connection previously established by the client. Signed-off-by: Ricardo Labiaga nfs41: Add bc_svc.o to sunrpc Makefile. [nfs41: bc_send() does not need to be exported outside RPC module] [nfs41: xprt_free_bc_request() need not be exported outside RPC module] Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy [Update copyright] Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy --- net/sunrpc/Makefile | 2 +- net/sunrpc/bc_svc.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++ net/sunrpc/xprtsock.c | 3 ++ 3 files changed, 85 insertions(+), 1 deletion(-) create mode 100644 net/sunrpc/bc_svc.c (limited to 'net') diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index 4a01f9684b85..db73fd2a3f0e 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile @@ -13,6 +13,6 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ rpcb_clnt.o timer.o xdr.o \ sunrpc_syms.o cache.o rpc_pipe.o \ svc_xprt.o -sunrpc-$(CONFIG_NFS_V4_1) += backchannel_rqst.o +sunrpc-$(CONFIG_NFS_V4_1) += backchannel_rqst.o bc_svc.o sunrpc-$(CONFIG_PROC_FS) += stats.o sunrpc-$(CONFIG_SYSCTL) += sysctl.o diff --git a/net/sunrpc/bc_svc.c b/net/sunrpc/bc_svc.c new file mode 100644 index 000000000000..13f214f53120 --- /dev/null +++ b/net/sunrpc/bc_svc.c @@ -0,0 +1,81 @@ +/****************************************************************************** + +(c) 2007 Network Appliance, Inc. All Rights Reserved. +(c) 2009 NetApp. All Rights Reserved. + +NetApp provides this source code under the GPL v2 License. +The GPL v2 license is available at +http://opensource.org/licenses/gpl-license.php. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +******************************************************************************/ + +/* + * The NFSv4.1 callback service helper routines. + * They implement the transport level processing required to send the + * reply over an existing open connection previously established by the client. + */ + +#if defined(CONFIG_NFS_V4_1) + +#include + +#include +#include +#include + +#define RPCDBG_FACILITY RPCDBG_SVCDSP + +void bc_release_request(struct rpc_task *task) +{ + struct rpc_rqst *req = task->tk_rqstp; + + dprintk("RPC: bc_release_request: task= %p\n", task); + + /* + * Release this request only if it's a backchannel + * preallocated request + */ + if (!bc_prealloc(req)) + return; + xprt_free_bc_request(req); +} + +/* Empty callback ops */ +static const struct rpc_call_ops nfs41_callback_ops = { +}; + + +/* + * Send the callback reply + */ +int bc_send(struct rpc_rqst *req) +{ + struct rpc_task *task; + int ret; + + dprintk("RPC: bc_send req= %p\n", req); + task = rpc_run_bc_task(req, &nfs41_callback_ops); + if (IS_ERR(task)) + ret = PTR_ERR(task); + else { + BUG_ON(atomic_read(&task->tk_count) != 1); + ret = task->tk_status; + rpc_put_task(task); + } + return ret; + dprintk("RPC: bc_send ret= %d \n", ret); +} + +#endif /* CONFIG_NFS_V4_1 */ diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index e3e3a57116fb..8a721867b601 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2183,6 +2183,9 @@ static struct rpc_xprt_ops xs_tcp_ops = { .buf_free = rpc_free, .send_request = xs_tcp_send_request, .set_retrans_timeout = xprt_set_retrans_timeout_def, +#if defined(CONFIG_NFS_V4_1) + .release_request = bc_release_request, +#endif /* CONFIG_NFS_V4_1 */ .close = xs_tcp_close, .destroy = xs_destroy, .print_stats = xs_tcp_print_stats, -- cgit v1.2.1 From 1cad7ea6fe98dc414bd3df55275c147bd15ebf97 Mon Sep 17 00:00:00 2001 From: Ricardo Labiaga Date: Wed, 1 Apr 2009 09:23:06 -0400 Subject: nfs41: Refactor svc_process() net/sunrpc/svc.c:svc_process() is used by the NFSv4 callback service to process RPC requests arriving over connections initiated by the server. NFSv4.1 supports callbacks over the backchannel on connections initiated by the client. This patch refactors svc_process() so that common code can also be used by the backchannel. Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy --- net/sunrpc/svc.c | 80 +++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 50 insertions(+), 30 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 8847add6ca16..bfda66db2f4f 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -970,20 +970,18 @@ svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) } /* - * Process the RPC request. + * Common routine for processing the RPC request. */ -int -svc_process(struct svc_rqst *rqstp) +static int +svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) { struct svc_program *progp; struct svc_version *versp = NULL; /* compiler food */ struct svc_procedure *procp = NULL; - struct kvec * argv = &rqstp->rq_arg.head[0]; - struct kvec * resv = &rqstp->rq_res.head[0]; struct svc_serv *serv = rqstp->rq_server; kxdrproc_t xdr; __be32 *statp; - u32 dir, prog, vers, proc; + u32 prog, vers, proc; __be32 auth_stat, rpc_stat; int auth_res; __be32 *reply_statp; @@ -993,19 +991,6 @@ svc_process(struct svc_rqst *rqstp) if (argv->iov_len < 6*4) goto err_short_len; - /* setup response xdr_buf. - * Initially it has just one page - */ - rqstp->rq_resused = 1; - resv->iov_base = page_address(rqstp->rq_respages[0]); - resv->iov_len = 0; - rqstp->rq_res.pages = rqstp->rq_respages + 1; - rqstp->rq_res.len = 0; - rqstp->rq_res.page_base = 0; - rqstp->rq_res.page_len = 0; - rqstp->rq_res.buflen = PAGE_SIZE; - rqstp->rq_res.tail[0].iov_base = NULL; - rqstp->rq_res.tail[0].iov_len = 0; /* Will be turned off only in gss privacy case: */ rqstp->rq_splice_ok = 1; /* Will be turned off only when NFSv4 Sessions are used */ @@ -1014,17 +999,13 @@ svc_process(struct svc_rqst *rqstp) /* Setup reply header */ rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp); - rqstp->rq_xid = svc_getu32(argv); svc_putu32(resv, rqstp->rq_xid); - dir = svc_getnl(argv); vers = svc_getnl(argv); /* First words of reply: */ svc_putnl(resv, 1); /* REPLY */ - if (dir != 0) /* direction != CALL */ - goto err_bad_dir; if (vers != 2) /* RPC version number */ goto err_bad_rpc; @@ -1147,7 +1128,7 @@ svc_process(struct svc_rqst *rqstp) sendit: if (svc_authorise(rqstp)) goto dropit; - return svc_send(rqstp); + return 1; /* Caller can now send it */ dropit: svc_authorise(rqstp); /* doesn't hurt to call this twice */ @@ -1161,12 +1142,6 @@ err_short_len: goto dropit; /* drop request */ -err_bad_dir: - svc_printk(rqstp, "bad direction %d, dropping request\n", dir); - - serv->sv_stats->rpcbadfmt++; - goto dropit; /* drop request */ - err_bad_rpc: serv->sv_stats->rpcbadfmt++; svc_putnl(resv, 1); /* REJECT */ @@ -1219,6 +1194,51 @@ err_bad: } EXPORT_SYMBOL_GPL(svc_process); +/* + * Process the RPC request. + */ +int +svc_process(struct svc_rqst *rqstp) +{ + struct kvec *argv = &rqstp->rq_arg.head[0]; + struct kvec *resv = &rqstp->rq_res.head[0]; + struct svc_serv *serv = rqstp->rq_server; + u32 dir; + int error; + + /* + * Setup response xdr_buf. + * Initially it has just one page + */ + rqstp->rq_resused = 1; + resv->iov_base = page_address(rqstp->rq_respages[0]); + resv->iov_len = 0; + rqstp->rq_res.pages = rqstp->rq_respages + 1; + rqstp->rq_res.len = 0; + rqstp->rq_res.page_base = 0; + rqstp->rq_res.page_len = 0; + rqstp->rq_res.buflen = PAGE_SIZE; + rqstp->rq_res.tail[0].iov_base = NULL; + rqstp->rq_res.tail[0].iov_len = 0; + + rqstp->rq_xid = svc_getu32(argv); + + dir = svc_getnl(argv); + if (dir != 0) { + /* direction != CALL */ + svc_printk(rqstp, "bad direction %d, dropping request\n", dir); + serv->sv_stats->rpcbadfmt++; + svc_drop(rqstp); + return 0; + } + + error = svc_process_common(rqstp, argv, resv); + if (error <= 0) + return error; + + return svc_send(rqstp); +} + /* * Return (transport-specific) limit on the rpc payload. */ -- cgit v1.2.1 From 4d6bbb6233c9cf23822a2f66f8470c9f40854b77 Mon Sep 17 00:00:00 2001 From: Ricardo Labiaga Date: Wed, 1 Apr 2009 09:23:07 -0400 Subject: nfs41: Backchannel bc_svc_process() Implement the NFSv4.1 backchannel service. Invokes the common callback processing logic svc_process_common() to authenticate the call and dispatch the appropriate NFSv4.1 XDR decoder and operation procedure. It then invokes bc_send() to send the reply over the same connection. bc_send() is implemented in a separate patch. At this time there is no slot validation or reply cache handling. [nfs41: Preallocate rpc_rqst receive buffer for handling callbacks] Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy [Move bc_svc_process() declaration to correct patch] Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy --- net/sunrpc/svc.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'net') diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index bfda66db2f4f..06b52e465f47 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -25,6 +25,7 @@ #include #include #include +#include #define RPCDBG_FACILITY RPCDBG_SVCDSP @@ -1239,6 +1240,54 @@ svc_process(struct svc_rqst *rqstp) return svc_send(rqstp); } +#if defined(CONFIG_NFS_V4_1) +/* + * Process a backchannel RPC request that arrived over an existing + * outbound connection + */ +int +bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, + struct svc_rqst *rqstp) +{ + struct kvec *argv = &rqstp->rq_arg.head[0]; + struct kvec *resv = &rqstp->rq_res.head[0]; + int error; + + /* Build the svc_rqst used by the common processing routine */ + rqstp->rq_xid = req->rq_xid; + rqstp->rq_prot = req->rq_xprt->prot; + rqstp->rq_server = serv; + + rqstp->rq_addrlen = sizeof(req->rq_xprt->addr); + memcpy(&rqstp->rq_addr, &req->rq_xprt->addr, rqstp->rq_addrlen); + memcpy(&rqstp->rq_arg, &req->rq_rcv_buf, sizeof(rqstp->rq_arg)); + memcpy(&rqstp->rq_res, &req->rq_snd_buf, sizeof(rqstp->rq_res)); + + /* reset result send buffer "put" position */ + resv->iov_len = 0; + + if (rqstp->rq_prot != IPPROTO_TCP) { + printk(KERN_ERR "No support for Non-TCP transports!\n"); + BUG(); + } + + /* + * Skip the next two words because they've already been + * processed in the trasport + */ + svc_getu32(argv); /* XID */ + svc_getnl(argv); /* CALLDIR */ + + error = svc_process_common(rqstp, argv, resv); + if (error <= 0) + return error; + + memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf)); + return bc_send(req); +} +EXPORT_SYMBOL(bc_svc_process); +#endif /* CONFIG_NFS_V4_1 */ + /* * Return (transport-specific) limit on the rpc payload. */ -- cgit v1.2.1 From 7652e5a09ba319241607b22d9055ce93fd5b8039 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Wed, 1 Apr 2009 09:23:09 -0400 Subject: nfs41: sunrpc: provide functions to create and destroy a svc_xprt for backchannel use For nfs41 callbacks we need an svc_xprt to process requests coming up the backchannel socket as rpc_rqst's that are transformed into svc_rqst's that need a rq_xprt to be processed. The svc_{udp,tcp}_create methods are too heavy for this job as svc_create_socket creates an actual socket to listen on while for nfs41 we're "reusing" the fore channel's socket. Signed-off-by: Benny Halevy --- net/sunrpc/svcsock.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'net') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 9d504234af4a..a2a03e500533 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1327,3 +1327,42 @@ static void svc_sock_free(struct svc_xprt *xprt) sock_release(svsk->sk_sock); kfree(svsk); } + +/* + * Create a svc_xprt. + * + * For internal use only (e.g. nfsv4.1 backchannel). + * Callers should typically use the xpo_create() method. + */ +struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot) +{ + struct svc_sock *svsk; + struct svc_xprt *xprt = NULL; + + dprintk("svc: %s\n", __func__); + svsk = kzalloc(sizeof(*svsk), GFP_KERNEL); + if (!svsk) + goto out; + + xprt = &svsk->sk_xprt; + if (prot == IPPROTO_TCP) + svc_xprt_init(&svc_tcp_class, xprt, serv); + else if (prot == IPPROTO_UDP) + svc_xprt_init(&svc_udp_class, xprt, serv); + else + BUG(); +out: + dprintk("svc: %s return %p\n", __func__, xprt); + return xprt; +} +EXPORT_SYMBOL_GPL(svc_sock_create); + +/* + * Destroy a svc_sock. + */ +void svc_sock_destroy(struct svc_xprt *xprt) +{ + if (xprt) + kfree(container_of(xprt, struct svc_sock, sk_xprt)); +} +EXPORT_SYMBOL_GPL(svc_sock_destroy); -- cgit v1.2.1 From 9c9f3f5fa62cc4959e4d4d1cf1ec74f2d6ac1197 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 1 Apr 2009 09:23:10 -0400 Subject: nfs41: sunrpc: add a struct svc_xprt pointer to struct svc_serv for backchannel use This svc_xprt is passed on to the callback service thread to be later used to processes incoming svc_rqst's Signed-off-by: Benny Halevy --- net/sunrpc/svc.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 06b52e465f47..b35048fabe22 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -487,6 +487,10 @@ svc_destroy(struct svc_serv *serv) if (svc_serv_is_pooled(serv)) svc_pool_map_put(); +#if defined(CONFIG_NFS_V4_1) + svc_sock_destroy(serv->bc_xprt); +#endif /* CONFIG_NFS_V4_1 */ + svc_unregister(serv); kfree(serv->sv_pools); kfree(serv); -- cgit v1.2.1 From 8f975242352e92898dc641ebff0d24808f39848a Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Wed, 1 Apr 2009 09:23:11 -0400 Subject: nfs41: create a svc_xprt for nfs41 callback thread and use for incoming callbacks Signed-off-by: Benny Halevy --- net/sunrpc/svc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index b35048fabe22..6b90ce439c00 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1258,6 +1258,7 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, int error; /* Build the svc_rqst used by the common processing routine */ + rqstp->rq_xprt = serv->bc_xprt; rqstp->rq_xid = req->rq_xid; rqstp->rq_prot = req->rq_xprt->prot; rqstp->rq_server = serv; -- cgit v1.2.1 From 343952fa5aac888934ffc203abed26a823400eb6 Mon Sep 17 00:00:00 2001 From: Rahul Iyer Date: Wed, 1 Apr 2009 09:23:17 -0400 Subject: nfs41: Get the rpc_xprt * from the rpc_rqst instead of the rpc_clnt. Obtain the rpc_xprt from the rpc_rqst so that calls and callback replies can both use the same code path. A client needs the rpc_xprt in order to reply to a callback. Signed-off-by: Rahul Iyer Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy --- net/sunrpc/xprt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 0eea2bfe111b..c144611223fc 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -195,8 +195,8 @@ EXPORT_SYMBOL_GPL(xprt_load_transport); */ int xprt_reserve_xprt(struct rpc_task *task) { - struct rpc_xprt *xprt = task->tk_xprt; struct rpc_rqst *req = task->tk_rqstp; + struct rpc_xprt *xprt = req->rq_xprt; if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { if (task == xprt->snd_task) @@ -858,7 +858,7 @@ out_unlock: void xprt_end_transmit(struct rpc_task *task) { - xprt_release_write(task->tk_xprt, task); + xprt_release_write(task->tk_rqstp->rq_xprt, task); } /** -- cgit v1.2.1 From dd2b63d049480979016b959abc2d141cdddb1389 Mon Sep 17 00:00:00 2001 From: Ricardo Labiaga Date: Wed, 1 Apr 2009 09:23:28 -0400 Subject: nfs41: Rename rq_received to rq_reply_bytes_recvd The 'rq_received' member of 'struct rpc_rqst' is used to track when we have received a reply to our request. With v4.1, the backchannel can now accept callback requests over the existing connection. Rename this field to make it clear that it is only used for tracking reply bytes and not all bytes received on the connection. Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy --- net/sunrpc/backchannel_rqst.c | 2 +- net/sunrpc/clnt.c | 8 ++++---- net/sunrpc/stats.c | 2 +- net/sunrpc/xprt.c | 15 ++++++++------- 4 files changed, 14 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index f56e18a23498..5a7d342e3087 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -230,7 +230,7 @@ struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt) if (req != NULL) { set_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); - req->rq_received = 0; + req->rq_reply_bytes_recvd = 0; req->rq_bytes_sent = 0; memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(req->rq_private_buf)); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index f3e93b8eb90f..5bc2f45bddf0 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1258,8 +1258,8 @@ call_status(struct rpc_task *task) struct rpc_rqst *req = task->tk_rqstp; int status; - if (req->rq_received > 0 && !req->rq_bytes_sent) - task->tk_status = req->rq_received; + if (req->rq_reply_bytes_recvd > 0 && !req->rq_bytes_sent) + task->tk_status = req->rq_reply_bytes_recvd; dprint_status(task); @@ -1376,7 +1376,7 @@ call_decode(struct rpc_task *task) /* * Ensure that we see all writes made by xprt_complete_rqst() - * before it changed req->rq_received. + * before it changed req->rq_reply_bytes_recvd. */ smp_rmb(); req->rq_rcv_buf.len = req->rq_private_buf.len; @@ -1417,7 +1417,7 @@ out_retry: task->tk_status = 0; /* Note: rpc_verify_header() may have freed the RPC slot */ if (task->tk_rqstp == req) { - req->rq_received = req->rq_rcv_buf.len = 0; + req->rq_reply_bytes_recvd = req->rq_rcv_buf.len = 0; if (task->tk_client->cl_discrtry) xprt_conditional_disconnect(task->tk_xprt, req->rq_connect_cookie); diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index 8487aa0f1f5a..1b4e6791ecf3 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c @@ -156,7 +156,7 @@ void rpc_count_iostats(struct rpc_task *task) op_metrics->om_timeouts += task->tk_timeouts; op_metrics->om_bytes_sent += task->tk_bytes_sent; - op_metrics->om_bytes_recv += req->rq_received; + op_metrics->om_bytes_recv += req->rq_reply_bytes_recvd; queue = (long)req->rq_xtime - task->tk_start; if (queue < 0) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index c144611223fc..f412a852bc73 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -806,9 +806,10 @@ void xprt_complete_rqst(struct rpc_task *task, int copied) list_del_init(&req->rq_list); req->rq_private_buf.len = copied; - /* Ensure all writes are done before we update req->rq_received */ + /* Ensure all writes are done before we update */ + /* req->rq_reply_bytes_recvd */ smp_wmb(); - req->rq_received = copied; + req->rq_reply_bytes_recvd = copied; rpc_wake_up_queued_task(&xprt->pending, task); } EXPORT_SYMBOL_GPL(xprt_complete_rqst); @@ -823,7 +824,7 @@ static void xprt_timer(struct rpc_task *task) dprintk("RPC: %5u xprt_timer\n", task->tk_pid); spin_lock_bh(&xprt->transport_lock); - if (!req->rq_received) { + if (!req->rq_reply_bytes_recvd) { if (xprt->ops->timer) xprt->ops->timer(task); } else @@ -845,8 +846,8 @@ int xprt_prepare_transmit(struct rpc_task *task) dprintk("RPC: %5u xprt_prepare_transmit\n", task->tk_pid); spin_lock_bh(&xprt->transport_lock); - if (req->rq_received && !req->rq_bytes_sent) { - err = req->rq_received; + if (req->rq_reply_bytes_recvd && !req->rq_bytes_sent) { + err = req->rq_reply_bytes_recvd; goto out_unlock; } if (!xprt->ops->reserve_xprt(task)) @@ -875,7 +876,7 @@ void xprt_transmit(struct rpc_task *task) dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen); - if (!req->rq_received) { + if (!req->rq_reply_bytes_recvd) { if (list_empty(&req->rq_list) && rpc_reply_expected(task)) { /* * Add to the list only if we're expecting a reply @@ -914,7 +915,7 @@ void xprt_transmit(struct rpc_task *task) /* Don't race with disconnect */ if (!xprt_connected(xprt)) task->tk_status = -ENOTCONN; - else if (!req->rq_received && rpc_reply_expected(task)) { + else if (!req->rq_reply_bytes_recvd && rpc_reply_expected(task)) { /* * Sleep on the pending queue since * we're expecting a reply. -- cgit v1.2.1 From 47fcb03fefee2501e79176932a4184fc24d6f8ec Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 18 May 2009 17:47:56 -0400 Subject: SUNRPC: Fix the TCP server's send buffer accounting Currently, the sunrpc server is refusing to allow us to process new RPC calls if the TCP send buffer is 2/3 full, even if we do actually have enough free space to guarantee that we can send another request. The following patch fixes svc_tcp_has_wspace() so that we only stop processing requests if we know that the socket buffer cannot possibly fit another reply. It also fixes the tcp write_space() callback so that we only clear the SOCK_NOSPACE flag when the TCP send buffer is less than 2/3 full. This should ensure that the send window will grow as per the standard TCP socket code. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 004a2f9dc432..b09c80c56ee3 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -380,6 +380,7 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd, sock->sk->sk_sndbuf = snd * 2; sock->sk->sk_rcvbuf = rcv * 2; sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK; + sock->sk->sk_write_space(sock->sk); release_sock(sock->sk); #endif } @@ -421,6 +422,15 @@ static void svc_write_space(struct sock *sk) } } +static void svc_tcp_write_space(struct sock *sk) +{ + struct socket *sock = sk->sk_socket; + + if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) + clear_bit(SOCK_NOSPACE, &sock->flags); + svc_write_space(sk); +} + /* * Copy the UDP datagram's destination address to the rqstp structure. * The 'destination' address in this case is the address to which the @@ -1015,25 +1025,16 @@ static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp) static int svc_tcp_has_wspace(struct svc_xprt *xprt) { struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); - struct svc_serv *serv = svsk->sk_xprt.xpt_server; + struct svc_serv *serv = svsk->sk_xprt.xpt_server; int required; - int wspace; - /* - * Set the SOCK_NOSPACE flag before checking the available - * sock space. - */ + if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) + return 1; + required = atomic_read(&xprt->xpt_reserved) + serv->sv_max_mesg; + if (sk_stream_wspace(svsk->sk_sk) >= required) + return 1; set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); - required = atomic_read(&svsk->sk_xprt.xpt_reserved) + serv->sv_max_mesg; - wspace = sk_stream_wspace(svsk->sk_sk); - - if (wspace < sk_stream_min_wspace(svsk->sk_sk)) - return 0; - if (required * 2 > wspace) - return 0; - - clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); - return 1; + return 0; } static struct svc_xprt *svc_tcp_create(struct svc_serv *serv, @@ -1089,7 +1090,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) dprintk("setting up TCP socket for reading\n"); sk->sk_state_change = svc_tcp_state_change; sk->sk_data_ready = svc_tcp_data_ready; - sk->sk_write_space = svc_write_space; + sk->sk_write_space = svc_tcp_write_space; svsk->sk_reclen = 0; svsk->sk_tcplen = 0; -- cgit v1.2.1 From bb664f49f8be17d7b8bf9821144e8a53d7fcfe8a Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Wed, 17 Jun 2009 21:54:47 +0000 Subject: af_iucv: Change if condition in sendmsg() for more readability Change the if condition to exit sendmsg() if the socket in not connected. Signed-off-by: Hendrik Brueckner Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/iucv/af_iucv.c | 163 ++++++++++++++++++++++++++--------------------------- 1 file changed, 81 insertions(+), 82 deletions(-) (limited to 'net') diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index a9b3a6f9ea95..42b7198a6883 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -747,108 +747,107 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, goto out; } - if (sk->sk_state == IUCV_CONNECTED) { - /* initialize defaults */ - cmsg_done = 0; /* check for duplicate headers */ - txmsg.class = 0; - - /* iterate over control messages */ - for (cmsg = CMSG_FIRSTHDR(msg); cmsg; - cmsg = CMSG_NXTHDR(msg, cmsg)) { - - if (!CMSG_OK(msg, cmsg)) { - err = -EINVAL; - goto out; - } + /* Return if the socket is not in connected state */ + if (sk->sk_state != IUCV_CONNECTED) { + err = -ENOTCONN; + goto out; + } - if (cmsg->cmsg_level != SOL_IUCV) - continue; + /* initialize defaults */ + cmsg_done = 0; /* check for duplicate headers */ + txmsg.class = 0; - if (cmsg->cmsg_type & cmsg_done) { - err = -EINVAL; - goto out; - } - cmsg_done |= cmsg->cmsg_type; + /* iterate over control messages */ + for (cmsg = CMSG_FIRSTHDR(msg); cmsg; + cmsg = CMSG_NXTHDR(msg, cmsg)) { - switch (cmsg->cmsg_type) { - case SCM_IUCV_TRGCLS: - if (cmsg->cmsg_len != CMSG_LEN(TRGCLS_SIZE)) { - err = -EINVAL; - goto out; - } + if (!CMSG_OK(msg, cmsg)) { + err = -EINVAL; + goto out; + } - /* set iucv message target class */ - memcpy(&txmsg.class, - (void *) CMSG_DATA(cmsg), TRGCLS_SIZE); + if (cmsg->cmsg_level != SOL_IUCV) + continue; - break; + if (cmsg->cmsg_type & cmsg_done) { + err = -EINVAL; + goto out; + } + cmsg_done |= cmsg->cmsg_type; - default: + switch (cmsg->cmsg_type) { + case SCM_IUCV_TRGCLS: + if (cmsg->cmsg_len != CMSG_LEN(TRGCLS_SIZE)) { err = -EINVAL; goto out; - break; } - } - /* allocate one skb for each iucv message: - * this is fine for SOCK_SEQPACKET (unless we want to support - * segmented records using the MSG_EOR flag), but - * for SOCK_STREAM we might want to improve it in future */ - if (!(skb = sock_alloc_send_skb(sk, len, - msg->msg_flags & MSG_DONTWAIT, - &err))) - goto out; + /* set iucv message target class */ + memcpy(&txmsg.class, + (void *) CMSG_DATA(cmsg), TRGCLS_SIZE); - if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { - err = -EFAULT; - goto fail; + break; + + default: + err = -EINVAL; + goto out; + break; } + } - /* increment and save iucv message tag for msg_completion cbk */ - txmsg.tag = iucv->send_tag++; - memcpy(CB_TAG(skb), &txmsg.tag, CB_TAG_LEN); - skb_queue_tail(&iucv->send_skb_q, skb); + /* allocate one skb for each iucv message: + * this is fine for SOCK_SEQPACKET (unless we want to support + * segmented records using the MSG_EOR flag), but + * for SOCK_STREAM we might want to improve it in future */ + skb = sock_alloc_send_skb(sk, len, msg->msg_flags & MSG_DONTWAIT, + &err); + if (!skb) + goto out; + if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { + err = -EFAULT; + goto fail; + } - if (((iucv->path->flags & IUCV_IPRMDATA) & iucv->flags) - && skb->len <= 7) { - err = iucv_send_iprm(iucv->path, &txmsg, skb); + /* increment and save iucv message tag for msg_completion cbk */ + txmsg.tag = iucv->send_tag++; + memcpy(CB_TAG(skb), &txmsg.tag, CB_TAG_LEN); + skb_queue_tail(&iucv->send_skb_q, skb); - /* on success: there is no message_complete callback - * for an IPRMDATA msg; remove skb from send queue */ - if (err == 0) { - skb_unlink(skb, &iucv->send_skb_q); - kfree_skb(skb); - } + if (((iucv->path->flags & IUCV_IPRMDATA) & iucv->flags) + && skb->len <= 7) { + err = iucv_send_iprm(iucv->path, &txmsg, skb); - /* this error should never happen since the - * IUCV_IPRMDATA path flag is set... sever path */ - if (err == 0x15) { - iucv_path_sever(iucv->path, NULL); - skb_unlink(skb, &iucv->send_skb_q); - err = -EPIPE; - goto fail; - } - } else - err = iucv_message_send(iucv->path, &txmsg, 0, 0, - (void *) skb->data, skb->len); - if (err) { - if (err == 3) { - user_id[8] = 0; - memcpy(user_id, iucv->dst_user_id, 8); - appl_id[8] = 0; - memcpy(appl_id, iucv->dst_name, 8); - pr_err("Application %s on z/VM guest %s" - " exceeds message limit\n", - user_id, appl_id); - } + /* on success: there is no message_complete callback + * for an IPRMDATA msg; remove skb from send queue */ + if (err == 0) { + skb_unlink(skb, &iucv->send_skb_q); + kfree_skb(skb); + } + + /* this error should never happen since the + * IUCV_IPRMDATA path flag is set... sever path */ + if (err == 0x15) { + iucv_path_sever(iucv->path, NULL); skb_unlink(skb, &iucv->send_skb_q); err = -EPIPE; goto fail; } - - } else { - err = -ENOTCONN; - goto out; + } else + err = iucv_message_send(iucv->path, &txmsg, 0, 0, + (void *) skb->data, skb->len); + if (err) { + if (err == 3) { + user_id[8] = 0; + memcpy(user_id, iucv->dst_user_id, 8); + appl_id[8] = 0; + memcpy(appl_id, iucv->dst_name, 8); + pr_err("Application %s on z/VM guest %s" + " exceeds message limit\n", + appl_id, user_id); + } + skb_unlink(skb, &iucv->send_skb_q); + err = -EPIPE; + goto fail; } release_sock(sk); -- cgit v1.2.1 From 0ea920d211e0a870871965418923b08da2025b4a Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Wed, 17 Jun 2009 21:54:48 +0000 Subject: af_iucv: Return -EAGAIN if iucv msg limit is exceeded If the iucv message limit for a communication path is exceeded, sendmsg() returns -EAGAIN instead of -EPIPE. The calling application can then handle this error situtation, e.g. to try again after waiting some time. For blocking sockets, sendmsg() waits up to the socket timeout before returning -EAGAIN. For the new wait condition, a macro has been introduced and the iucv_sock_wait_state() has been refactored to this macro. Signed-off-by: Hendrik Brueckner Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/iucv/af_iucv.c | 144 ++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 103 insertions(+), 41 deletions(-) (limited to 'net') diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 42b7198a6883..abadb4a846cf 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -53,6 +53,38 @@ static const u8 iprm_shutdown[8] = #define CB_TRGCLS(skb) ((skb)->cb + CB_TAG_LEN) /* iucv msg target class */ #define CB_TRGCLS_LEN (TRGCLS_SIZE) +#define __iucv_sock_wait(sk, condition, timeo, ret) \ +do { \ + DEFINE_WAIT(__wait); \ + long __timeo = timeo; \ + ret = 0; \ + while (!(condition)) { \ + prepare_to_wait(sk->sk_sleep, &__wait, TASK_INTERRUPTIBLE); \ + if (!__timeo) { \ + ret = -EAGAIN; \ + break; \ + } \ + if (signal_pending(current)) { \ + ret = sock_intr_errno(__timeo); \ + break; \ + } \ + release_sock(sk); \ + __timeo = schedule_timeout(__timeo); \ + lock_sock(sk); \ + ret = sock_error(sk); \ + if (ret) \ + break; \ + } \ + finish_wait(sk->sk_sleep, &__wait); \ +} while (0) + +#define iucv_sock_wait(sk, condition, timeo) \ +({ \ + int __ret = 0; \ + if (!(condition)) \ + __iucv_sock_wait(sk, condition, timeo, __ret); \ + __ret; \ +}) static void iucv_sock_kill(struct sock *sk); static void iucv_sock_close(struct sock *sk); @@ -121,6 +153,48 @@ static inline size_t iucv_msg_length(struct iucv_message *msg) return msg->length; } +/** + * iucv_sock_in_state() - check for specific states + * @sk: sock structure + * @state: first iucv sk state + * @state: second iucv sk state + * + * Returns true if the socket in either in the first or second state. + */ +static int iucv_sock_in_state(struct sock *sk, int state, int state2) +{ + return (sk->sk_state == state || sk->sk_state == state2); +} + +/** + * iucv_below_msglim() - function to check if messages can be sent + * @sk: sock structure + * + * Returns true if the send queue length is lower than the message limit. + * Always returns true if the socket is not connected (no iucv path for + * checking the message limit). + */ +static inline int iucv_below_msglim(struct sock *sk) +{ + struct iucv_sock *iucv = iucv_sk(sk); + + if (sk->sk_state != IUCV_CONNECTED) + return 1; + return (skb_queue_len(&iucv->send_skb_q) < iucv->path->msglim); +} + +/** + * iucv_sock_wake_msglim() - Wake up thread waiting on msg limit + */ +static void iucv_sock_wake_msglim(struct sock *sk) +{ + read_lock(&sk->sk_callback_lock); + if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) + wake_up_interruptible_all(sk->sk_sleep); + sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); + read_unlock(&sk->sk_callback_lock); +} + /* Timers */ static void iucv_sock_timeout(unsigned long arg) { @@ -212,7 +286,9 @@ static void iucv_sock_close(struct sock *sk) timeo = sk->sk_lingertime; else timeo = IUCV_DISCONN_TIMEOUT; - err = iucv_sock_wait_state(sk, IUCV_CLOSED, 0, timeo); + err = iucv_sock_wait(sk, + iucv_sock_in_state(sk, IUCV_CLOSED, 0), + timeo); } case IUCV_CLOSING: /* fall through */ @@ -393,39 +469,6 @@ struct sock *iucv_accept_dequeue(struct sock *parent, struct socket *newsock) return NULL; } -int iucv_sock_wait_state(struct sock *sk, int state, int state2, - unsigned long timeo) -{ - DECLARE_WAITQUEUE(wait, current); - int err = 0; - - add_wait_queue(sk->sk_sleep, &wait); - while (sk->sk_state != state && sk->sk_state != state2) { - set_current_state(TASK_INTERRUPTIBLE); - - if (!timeo) { - err = -EAGAIN; - break; - } - - if (signal_pending(current)) { - err = sock_intr_errno(timeo); - break; - } - - release_sock(sk); - timeo = schedule_timeout(timeo); - lock_sock(sk); - - err = sock_error(sk); - if (err) - break; - } - set_current_state(TASK_RUNNING); - remove_wait_queue(sk->sk_sleep, &wait); - return err; -} - /* Bind an unbound socket */ static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) @@ -570,8 +613,9 @@ static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr, } if (sk->sk_state != IUCV_CONNECTED) { - err = iucv_sock_wait_state(sk, IUCV_CONNECTED, IUCV_DISCONN, - sock_sndtimeo(sk, flags & O_NONBLOCK)); + err = iucv_sock_wait(sk, iucv_sock_in_state(sk, IUCV_CONNECTED, + IUCV_DISCONN), + sock_sndtimeo(sk, flags & O_NONBLOCK)); } if (sk->sk_state == IUCV_DISCONN) { @@ -725,9 +769,11 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct iucv_message txmsg; struct cmsghdr *cmsg; int cmsg_done; + long timeo; char user_id[9]; char appl_id[9]; int err; + int noblock = msg->msg_flags & MSG_DONTWAIT; err = sock_error(sk); if (err) @@ -799,8 +845,7 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, * this is fine for SOCK_SEQPACKET (unless we want to support * segmented records using the MSG_EOR flag), but * for SOCK_STREAM we might want to improve it in future */ - skb = sock_alloc_send_skb(sk, len, msg->msg_flags & MSG_DONTWAIT, - &err); + skb = sock_alloc_send_skb(sk, len, noblock, &err); if (!skb) goto out; if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { @@ -808,6 +853,18 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, goto fail; } + /* wait if outstanding messages for iucv path has reached */ + timeo = sock_sndtimeo(sk, noblock); + err = iucv_sock_wait(sk, iucv_below_msglim(sk), timeo); + if (err) + goto fail; + + /* return -ECONNRESET if the socket is no longer connected */ + if (sk->sk_state != IUCV_CONNECTED) { + err = -ECONNRESET; + goto fail; + } + /* increment and save iucv message tag for msg_completion cbk */ txmsg.tag = iucv->send_tag++; memcpy(CB_TAG(skb), &txmsg.tag, CB_TAG_LEN); @@ -844,9 +901,10 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, pr_err("Application %s on z/VM guest %s" " exceeds message limit\n", appl_id, user_id); - } + err = -EAGAIN; + } else + err = -EPIPE; skb_unlink(skb, &iucv->send_skb_q); - err = -EPIPE; goto fail; } @@ -1463,7 +1521,11 @@ static void iucv_callback_txdone(struct iucv_path *path, spin_unlock_irqrestore(&list->lock, flags); - kfree_skb(this); + if (this) { + kfree_skb(this); + /* wake up any process waiting for sending */ + iucv_sock_wake_msglim(sk); + } } BUG_ON(!this); -- cgit v1.2.1 From 25502bda07b4cd4f1d9942cca2df446c4a0f167c Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 18 Jun 2009 04:16:46 +0000 Subject: ieee802154: use standard routine for printing dumps Use print_hex_dump_bytes instead of self-written dumping function for outputting packet dumps. Signed-off-by: Dmitry Eremin-Solenikov Signed-off-by: David S. Miller --- net/ieee802154/af_ieee802154.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c index 882a927cefae..3bb6bdb1dac1 100644 --- a/net/ieee802154/af_ieee802154.c +++ b/net/ieee802154/af_ieee802154.c @@ -39,14 +39,6 @@ #include "af802154.h" -#define DBG_DUMP(data, len) { \ - int i; \ - pr_debug("function: %s: data: len %d:\n", __func__, len); \ - for (i = 0; i < len; i++) {\ - pr_debug("%02x: %02x\n", i, (data)[i]); \ - } \ -} - /* * Utility function for families */ @@ -302,10 +294,12 @@ static struct net_proto_family ieee802154_family_ops = { static int ieee802154_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { - DBG_DUMP(skb->data, skb->len); if (!netif_running(dev)) return -ENODEV; pr_debug("got frame, type %d, dev %p\n", dev->type, dev); +#ifdef DEBUG + print_hex_dump_bytes("ieee802154_rcv ", DUMP_PREFIX_NONE, skb->data, skb->len); +#endif if (!net_eq(dev_net(dev), &init_net)) goto drop; -- cgit v1.2.1 From 7fa20a7f60df0afceafbb8197b5d110507f42c72 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Tue, 16 Jun 2009 14:53:24 +0100 Subject: rfkill: rfkill_set_block() when suspended nitpick If we return after fiddling with the state, userspace will see the wrong state and rfkill_set_sw_state() won't work until the next call to rfkill_set_block(). At the moment rfkill_set_block() will always be called from rfkill_resume(), but this will change in future. Also, presumably the point of this test is to avoid bothering devices which may be suspended. If we don't want to call set_block(), we probably don't want to call query() either :-). Signed-off-by: Alan Jenkins Signed-off-by: John W. Linville --- net/rfkill/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 4e68ab439d5d..868d79f8ac1d 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -270,6 +270,9 @@ static void rfkill_set_block(struct rfkill *rfkill, bool blocked) unsigned long flags; int err; + if (unlikely(rfkill->dev.power.power_state.event & PM_EVENT_SLEEP)) + return; + /* * Some platforms (...!) generate input events which affect the * _hard_ kill state -- whenever something tries to change the @@ -292,9 +295,6 @@ static void rfkill_set_block(struct rfkill *rfkill, bool blocked) rfkill->state |= RFKILL_BLOCK_SW_SETCALL; spin_unlock_irqrestore(&rfkill->lock, flags); - if (unlikely(rfkill->dev.power.power_state.event & PM_EVENT_SLEEP)) - return; - err = rfkill->ops->set_block(rfkill->data, blocked); spin_lock_irqsave(&rfkill->lock, flags); -- cgit v1.2.1 From 06d5caf47ef4fbd9efdceae33293c42778cb7b0c Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Tue, 16 Jun 2009 15:39:51 +0100 Subject: rfkill: don't restore software blocked state on persistent devices The setting of the "persistent" flag is also made more explicit using a new rfkill_init_sw_state() function, instead of special-casing rfkill_set_sw_state() when it is called before registration. Suspend is a bit of a corner case so we try to get away without adding another hack to rfkill-input - it's going to be removed soon. If the state does change over suspend, users will simply have to prod rfkill-input twice in order to toggle the state. Userspace policy agents will be able to implement a more consistent user experience. For example, they can avoid the above problem if they toggle devices individually. Then there would be no "global state" to get out of sync. Currently there are only two rfkill drivers with persistent soft-blocked state. thinkpad-acpi already checks the software state on resume. eeepc-laptop will require modification. Signed-off-by: Alan Jenkins CC: Marcel Holtmann Acked-by: Henrique de Moraes Holschuh Signed-off-by: John W. Linville --- net/rfkill/core.c | 40 +++++++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/rfkill/core.c b/net/rfkill/core.c index 868d79f8ac1d..dcf8df7c573c 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -56,7 +56,6 @@ struct rfkill { u32 idx; bool registered; - bool suspended; bool persistent; const struct rfkill_ops *ops; @@ -224,7 +223,7 @@ static void rfkill_send_events(struct rfkill *rfkill, enum rfkill_operation op) static void rfkill_event(struct rfkill *rfkill) { - if (!rfkill->registered || rfkill->suspended) + if (!rfkill->registered) return; kobject_uevent(&rfkill->dev.kobj, KOBJ_CHANGE); @@ -508,19 +507,32 @@ bool rfkill_set_sw_state(struct rfkill *rfkill, bool blocked) blocked = blocked || hwblock; spin_unlock_irqrestore(&rfkill->lock, flags); - if (!rfkill->registered) { - rfkill->persistent = true; - } else { - if (prev != blocked && !hwblock) - schedule_work(&rfkill->uevent_work); + if (!rfkill->registered) + return blocked; - rfkill_led_trigger_event(rfkill); - } + if (prev != blocked && !hwblock) + schedule_work(&rfkill->uevent_work); + + rfkill_led_trigger_event(rfkill); return blocked; } EXPORT_SYMBOL(rfkill_set_sw_state); +void rfkill_init_sw_state(struct rfkill *rfkill, bool blocked) +{ + unsigned long flags; + + BUG_ON(!rfkill); + BUG_ON(rfkill->registered); + + spin_lock_irqsave(&rfkill->lock, flags); + __rfkill_set_sw_state(rfkill, blocked); + rfkill->persistent = true; + spin_unlock_irqrestore(&rfkill->lock, flags); +} +EXPORT_SYMBOL(rfkill_init_sw_state); + void rfkill_set_states(struct rfkill *rfkill, bool sw, bool hw) { unsigned long flags; @@ -718,8 +730,6 @@ static int rfkill_suspend(struct device *dev, pm_message_t state) rfkill_pause_polling(rfkill); - rfkill->suspended = true; - return 0; } @@ -728,10 +738,10 @@ static int rfkill_resume(struct device *dev) struct rfkill *rfkill = to_rfkill(dev); bool cur; - cur = !!(rfkill->state & RFKILL_BLOCK_SW); - rfkill_set_block(rfkill, cur); - - rfkill->suspended = false; + if (!rfkill->persistent) { + cur = !!(rfkill->state & RFKILL_BLOCK_SW); + rfkill_set_block(rfkill, cur); + } rfkill_resume_polling(rfkill); -- cgit v1.2.1 From 464902e812025792c9e33e19e1555c343672d5cf Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Tue, 16 Jun 2009 14:54:04 +0100 Subject: rfkill: export persistent attribute in sysfs This information allows userspace to implement a hybrid policy where it can store the rfkill soft-blocked state in platform non-volatile storage if available, and if not then file-based storage can be used. Some users prefer platform non-volatile storage because of the behaviour when dual-booting multiple versions of Linux, or if the rfkill setting is changed in the BIOS setting screens, or if the BIOS responds to wireless-toggle hotkeys itself before the relevant platform driver has been loaded. Signed-off-by: Alan Jenkins Acked-by: Henrique de Moraes Holschuh Signed-off-by: John W. Linville --- net/rfkill/core.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'net') diff --git a/net/rfkill/core.c b/net/rfkill/core.c index dcf8df7c573c..79693fe2001e 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -610,6 +610,15 @@ static ssize_t rfkill_idx_show(struct device *dev, return sprintf(buf, "%d\n", rfkill->idx); } +static ssize_t rfkill_persistent_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct rfkill *rfkill = to_rfkill(dev); + + return sprintf(buf, "%d\n", rfkill->persistent); +} + static u8 user_state_from_blocked(unsigned long state) { if (state & RFKILL_BLOCK_HW) @@ -668,6 +677,7 @@ static struct device_attribute rfkill_dev_attrs[] = { __ATTR(name, S_IRUGO, rfkill_name_show, NULL), __ATTR(type, S_IRUGO, rfkill_type_show, NULL), __ATTR(index, S_IRUGO, rfkill_idx_show, NULL), + __ATTR(persistent, S_IRUGO, rfkill_persistent_show, NULL), __ATTR(state, S_IRUGO|S_IWUSR, rfkill_state_show, rfkill_state_store), __ATTR(claim, S_IRUGO|S_IWUSR, rfkill_claim_show, rfkill_claim_store), __ATTR_NULL -- cgit v1.2.1 From 155cc9e4b1d60161ee53ffaf2c15b9411f086fa7 Mon Sep 17 00:00:00 2001 From: Andrey Yurovsky Date: Tue, 16 Jun 2009 11:31:04 -0700 Subject: cfg80211: allow adding/deleting stations on mesh Commit b2a151a288 added a check that prevents adding or deleting stations on non-AP interfaces. Adding and deleting stations is supported for Mesh Point interfaces, so add Mesh Point to that check as well. Signed-off-by: Andrey Yurovsky Signed-off-by: John W. Linville --- net/wireless/nl80211.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 24168560ebae..2c55d25ed34d 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1763,7 +1763,8 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) goto out_rtnl; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN) { + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) { err = -EINVAL; goto out; } @@ -1812,7 +1813,8 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info) goto out_rtnl; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN) { + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) { err = -EINVAL; goto out; } -- cgit v1.2.1 From 9a5e8bbc8fece7851a2a69a8676a6fd0507bc550 Mon Sep 17 00:00:00 2001 From: Andrey Yurovsky Date: Tue, 16 Jun 2009 16:09:37 -0700 Subject: cfg80211: allow setting station parameters in mesh Mesh Point interfaces can also set parameters, for example plink_open is used to manually establish peer links from user-space (currently via iw). Add Mesh Point to the check in nl80211_set_station. Signed-off-by: Andrey Yurovsky Signed-off-by: John W. Linville --- net/wireless/nl80211.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 2c55d25ed34d..304b3d568e07 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1688,7 +1688,8 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) goto out_rtnl; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN) { + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) { err = -EINVAL; goto out; } -- cgit v1.2.1 From a97f4424fb4cddecf9b13c9b0e3f79924b624a7f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 18 Jun 2009 17:23:43 +0200 Subject: cfg80211: validate station settings When I disallowed interfering with stations on non-AP interfaces, I not only forget mesh but also managed interfaces which need this for the authorized flag. Let's actually validate everything properly. This fixes an nl80211 regression introduced by the interfering, under which wpa_supplicant -Dnl80211 could not properly connect. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/nl80211.c | 94 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 78 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 304b3d568e07..241bddd0b4f1 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1687,14 +1687,52 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) if (err) goto out_rtnl; - if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) { - err = -EINVAL; + err = get_vlan(info->attrs[NL80211_ATTR_STA_VLAN], drv, ¶ms.vlan); + if (err) goto out; + + /* validate settings */ + err = 0; + + switch (dev->ieee80211_ptr->iftype) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + /* disallow mesh-specific things */ + if (params.plink_action) + err = -EINVAL; + break; + case NL80211_IFTYPE_STATION: + /* disallow everything but AUTHORIZED flag */ + if (params.plink_action) + err = -EINVAL; + if (params.vlan) + err = -EINVAL; + if (params.supported_rates) + err = -EINVAL; + if (params.ht_capa) + err = -EINVAL; + if (params.listen_interval >= 0) + err = -EINVAL; + if (params.sta_flags_mask & ~BIT(NL80211_STA_FLAG_AUTHORIZED)) + err = -EINVAL; + break; + case NL80211_IFTYPE_MESH_POINT: + /* disallow things mesh doesn't support */ + if (params.vlan) + err = -EINVAL; + if (params.ht_capa) + err = -EINVAL; + if (params.listen_interval >= 0) + err = -EINVAL; + if (params.supported_rates) + err = -EINVAL; + if (params.sta_flags_mask) + err = -EINVAL; + break; + default: + err = -EINVAL; } - err = get_vlan(info->attrs[NL80211_ATTR_STA_VLAN], drv, ¶ms.vlan); if (err) goto out; @@ -1729,9 +1767,6 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; - if (!info->attrs[NL80211_ATTR_STA_AID]) - return -EINVAL; - if (!info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]) return -EINVAL; @@ -1746,9 +1781,11 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) params.listen_interval = nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]); - params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]); - if (!params.aid || params.aid > IEEE80211_MAX_AID) - return -EINVAL; + if (info->attrs[NL80211_ATTR_STA_AID]) { + params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]); + if (!params.aid || params.aid > IEEE80211_MAX_AID) + return -EINVAL; + } if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) params.ht_capa = @@ -1763,14 +1800,39 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) if (err) goto out_rtnl; - if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN && - dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) { - err = -EINVAL; + err = get_vlan(info->attrs[NL80211_ATTR_STA_VLAN], drv, ¶ms.vlan); + if (err) goto out; + + /* validate settings */ + err = 0; + + switch (dev->ieee80211_ptr->iftype) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + /* all ok but must have AID */ + if (!params.aid) + err = -EINVAL; + break; + case NL80211_IFTYPE_MESH_POINT: + /* disallow things mesh doesn't support */ + if (params.vlan) + err = -EINVAL; + if (params.aid) + err = -EINVAL; + if (params.ht_capa) + err = -EINVAL; + if (params.listen_interval >= 0) + err = -EINVAL; + if (params.supported_rates) + err = -EINVAL; + if (params.sta_flags_mask) + err = -EINVAL; + break; + default: + err = -EINVAL; } - err = get_vlan(info->attrs[NL80211_ATTR_STA_VLAN], drv, ¶ms.vlan); if (err) goto out; -- cgit v1.2.1 From 73e42897e8e5619eacb787d2ce69be12f47cfc21 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Sat, 20 Jun 2009 01:15:16 -0700 Subject: ipv4: fix NULL pointer + success return in route lookup path Don't drop route if we're not caching I recently got a report of an oops on a route lookup. Maxime was testing what would happen if route caching was turned off (doing so by setting making rt_caching always return 0), and found that it triggered an oops. I looked at it and found that the problem stemmed from the fact that the route lookup routines were returning success from their lookup paths (which is good), but never set the **rp pointer to anything (which is bad). This happens because in rt_intern_hash, if rt_caching returns false, we call rt_drop and return 0. This almost emulates slient success. What we should be doing is assigning *rp = rt and _not_ dropping the route. This way, during slow path lookups, when we create a new route cache entry, we don't immediately discard it, rather we just don't add it into the cache hash table, but we let this one lookup use it for the purpose of this route request. Maxime has tested and reports it prevents the oops. There is still a subsequent routing issue that I'm looking into further, but I'm confident that, even if its related to this same path, this patch makes sense to take. Signed-off-by: Neil Horman Signed-off-by: David S. Miller --- net/ipv4/route.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index cd76b3cb7092..65b3a8b11a6c 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1085,8 +1085,16 @@ restart: now = jiffies; if (!rt_caching(dev_net(rt->u.dst.dev))) { - rt_drop(rt); - return 0; + /* + * If we're not caching, just tell the caller we + * were successful and don't touch the route. The + * caller hold the sole reference to the cache entry, and + * it will be released when the caller is done with it. + * If we drop it here, the callers have no way to resolve routes + * when we're not caching. Instead, just point *rp at rt, so + * the caller gets a single use out of the route + */ + goto report_and_exit; } rthp = &rt_hash_table[hash].chain; @@ -1217,6 +1225,8 @@ restart: rcu_assign_pointer(rt_hash_table[hash].chain, rt); spin_unlock_bh(rt_hash_lock_addr(hash)); + +report_and_exit: if (rp) *rp = rt; else -- cgit v1.2.1 From e9f029855865e917821ef6034b31e340a4cfc815 Mon Sep 17 00:00:00 2001 From: Ricardo Labiaga Date: Thu, 18 Jun 2009 22:01:24 -0400 Subject: nfs41: sunrpc: xprt_alloc_bc_request() should not use spin_lock_bh() xprt_alloc_bc_request() is always called in soft interrupt context. Grab the spin_lock instead of the bottom half spin_lock. Softirqs do not preempt other softirqs running on the same processor, so there is no need to disable bottom halves. Signed-off-by: Ricardo Labiaga Signed-off-by: Benny Halevy Signed-off-by: Trond Myklebust --- net/sunrpc/backchannel_rqst.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 5a7d342e3087..553621fb2c41 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -211,6 +211,9 @@ EXPORT_SYMBOL(xprt_destroy_backchannel); * has been preallocated as well. Use xprt_alloc_bc_request to allocate * to this request. Use xprt_free_bc_request to return it. * + * We know that we're called in soft interrupt context, grab the spin_lock + * since there is no need to grab the bottom half spin_lock. + * * Return an available rpc_rqst, otherwise NULL if non are available. */ struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt) @@ -218,7 +221,7 @@ struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt) struct rpc_rqst *req; dprintk("RPC: allocate a backchannel request\n"); - spin_lock_bh(&xprt->bc_pa_lock); + spin_lock(&xprt->bc_pa_lock); if (!list_empty(&xprt->bc_pa_list)) { req = list_first_entry(&xprt->bc_pa_list, struct rpc_rqst, rq_bc_pa_list); @@ -226,7 +229,7 @@ struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt) } else { req = NULL; } - spin_unlock_bh(&xprt->bc_pa_lock); + spin_unlock(&xprt->bc_pa_lock); if (req != NULL) { set_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); -- cgit v1.2.1 From 8cc20198cfccd06cef705c14fd50bde603e2e306 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 22 Jun 2009 14:13:55 +0200 Subject: netfilter: nf_conntrack: death_by_timeout() fix death_by_timeout() might delete a conntrack from hash list and insert it in dying list. nf_ct_delete_from_lists(ct); nf_ct_insert_dying_list(ct); I believe a (lockless) reader could *catch* ct while doing a lookup and miss the end of its chain. (nulls lookup algo must check the null value at the end of lookup and should restart if the null value is not the expected one. cf Documentation/RCU/rculist_nulls.txt for details) We need to change nf_conntrack_init_net() and use a different "null" value, guaranteed not being used in regular lists. Choose very large values, since hash table uses [0..size-1] null values. Signed-off-by: Eric Dumazet Acked-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_core.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 5f72b94b4918..5276a2dd56fe 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1267,13 +1267,19 @@ err_cache: return ret; } +/* + * We need to use special "null" values, not used in hash table + */ +#define UNCONFIRMED_NULLS_VAL ((1<<30)+0) +#define DYING_NULLS_VAL ((1<<30)+1) + static int nf_conntrack_init_net(struct net *net) { int ret; atomic_set(&net->ct.count, 0); - INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, 0); - INIT_HLIST_NULLS_HEAD(&net->ct.dying, 0); + INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, UNCONFIRMED_NULLS_VAL); + INIT_HLIST_NULLS_HEAD(&net->ct.dying, DYING_NULLS_VAL); net->ct.stat = alloc_percpu(struct ip_conntrack_stat); if (!net->ct.stat) { ret = -ENOMEM; -- cgit v1.2.1 From 5c8ec910e789a92229978d8fd1fce7b62e8ac711 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 22 Jun 2009 14:14:16 +0200 Subject: netfilter: nf_conntrack: fix confirmation race condition New connection tracking entries are inserted into the hash before they are fully set up, namely the CONFIRMED bit is not set and the timer not started yet. This can theoretically lead to a race with timer, which would set the timeout value to a relative value, most likely already in the past. Perform hash insertion as the final step to fix this. Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_core.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 5276a2dd56fe..b0b06c7a9483 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -425,7 +425,6 @@ __nf_conntrack_confirm(struct sk_buff *skb) /* Remove from unconfirmed list */ hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); - __nf_conntrack_hash_insert(ct, hash, repl_hash); /* Timer relative to confirmation time, not original setting time, otherwise we'd get timer wrap in weird delay cases. */ @@ -433,8 +432,16 @@ __nf_conntrack_confirm(struct sk_buff *skb) add_timer(&ct->timeout); atomic_inc(&ct->ct_general.use); set_bit(IPS_CONFIRMED_BIT, &ct->status); + + /* Since the lookup is lockless, hash insertion must be done after + * starting the timer and setting the CONFIRMED bit. The RCU barriers + * guarantee that no other CPU can find the conntrack before the above + * stores are visible. + */ + __nf_conntrack_hash_insert(ct, hash, repl_hash); NF_CT_STAT_INC(net, insert); spin_unlock_bh(&nf_conntrack_lock); + help = nfct_help(ct); if (help && help->helper) nf_conntrack_event_cache(IPCT_HELPER, ct); -- cgit v1.2.1 From 8d8890b7751387f58ce0a6428773de2fbc0fd596 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 22 Jun 2009 14:14:41 +0200 Subject: netfilter: nf_conntrack: fix conntrack lookup race The RCU protected conntrack hash lookup only checks whether the entry has a refcount of zero to decide whether it is stale. This is not sufficient, entries are explicitly removed while there is at least one reference left, possibly more. Explicitly check whether the entry has been marked as dying to fix this. Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index b0b06c7a9483..7508f11c5b39 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -335,7 +335,8 @@ begin: h = __nf_conntrack_find(net, tuple); if (h) { ct = nf_ct_tuplehash_to_ctrack(h); - if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) + if (unlikely(nf_ct_is_dying(ct) || + !atomic_inc_not_zero(&ct->ct_general.use))) h = NULL; else { if (unlikely(!nf_ct_tuple_equal(tuple, &h->tuple))) { @@ -510,7 +511,8 @@ static noinline int early_drop(struct net *net, unsigned int hash) cnt++; } - if (ct && unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) + if (ct && unlikely(nf_ct_is_dying(ct) || + !atomic_inc_not_zero(&ct->ct_general.use))) ct = NULL; if (ct || cnt >= NF_CT_EVICTION_RANGE) break; -- cgit v1.2.1 From f9ffc31251c2caa11962c9b74ce650e2167fa8d1 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 22 Jun 2009 14:15:02 +0200 Subject: netfilter: fix some sparse endianess warnings net/netfilter/xt_NFQUEUE.c:46:9: warning: incorrect type in assignment (different base types) net/netfilter/xt_NFQUEUE.c:46:9: expected unsigned int [unsigned] [usertype] ipaddr net/netfilter/xt_NFQUEUE.c:46:9: got restricted unsigned int net/netfilter/xt_NFQUEUE.c:68:10: warning: incorrect type in assignment (different base types) net/netfilter/xt_NFQUEUE.c:68:10: expected unsigned int [unsigned] net/netfilter/xt_NFQUEUE.c:68:10: got restricted unsigned int net/netfilter/xt_NFQUEUE.c:69:10: warning: incorrect type in assignment (different base types) net/netfilter/xt_NFQUEUE.c:69:10: expected unsigned int [unsigned] net/netfilter/xt_NFQUEUE.c:69:10: got restricted unsigned int net/netfilter/xt_NFQUEUE.c:70:10: warning: incorrect type in assignment (different base types) net/netfilter/xt_NFQUEUE.c:70:10: expected unsigned int [unsigned] net/netfilter/xt_NFQUEUE.c:70:10: got restricted unsigned int net/netfilter/xt_NFQUEUE.c:71:10: warning: incorrect type in assignment (different base types) net/netfilter/xt_NFQUEUE.c:71:10: expected unsigned int [unsigned] net/netfilter/xt_NFQUEUE.c:71:10: got restricted unsigned int net/netfilter/xt_cluster.c:20:55: warning: incorrect type in return expression (different base types) net/netfilter/xt_cluster.c:20:55: expected unsigned int net/netfilter/xt_cluster.c:20:55: got restricted unsigned int const [usertype] ip net/netfilter/xt_cluster.c:20:55: warning: incorrect type in return expression (different base types) net/netfilter/xt_cluster.c:20:55: expected unsigned int net/netfilter/xt_cluster.c:20:55: got restricted unsigned int const [usertype] ip Signed-off-by: Patrick McHardy --- net/netfilter/xt_NFQUEUE.c | 8 ++++---- net/netfilter/xt_cluster.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index 498b45101df7..f28f6a5fc02d 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -40,12 +40,12 @@ nfqueue_tg(struct sk_buff *skb, const struct xt_target_param *par) static u32 hash_v4(const struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); - u32 ipaddr; + __be32 ipaddr; /* packets in either direction go into same queue */ ipaddr = iph->saddr ^ iph->daddr; - return jhash_2words(ipaddr, iph->protocol, jhash_initval); + return jhash_2words((__force u32)ipaddr, iph->protocol, jhash_initval); } static unsigned int @@ -63,14 +63,14 @@ nfqueue_tg4_v1(struct sk_buff *skb, const struct xt_target_param *par) static u32 hash_v6(const struct sk_buff *skb) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); - u32 addr[4]; + __be32 addr[4]; addr[0] = ip6h->saddr.s6_addr32[0] ^ ip6h->daddr.s6_addr32[0]; addr[1] = ip6h->saddr.s6_addr32[1] ^ ip6h->daddr.s6_addr32[1]; addr[2] = ip6h->saddr.s6_addr32[2] ^ ip6h->daddr.s6_addr32[2]; addr[3] = ip6h->saddr.s6_addr32[3] ^ ip6h->daddr.s6_addr32[3]; - return jhash2(addr, ARRAY_SIZE(addr), jhash_initval); + return jhash2((__force u32 *)addr, ARRAY_SIZE(addr), jhash_initval); } static unsigned int diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c index 69a639f35403..225ee3ecd69d 100644 --- a/net/netfilter/xt_cluster.c +++ b/net/netfilter/xt_cluster.c @@ -15,14 +15,14 @@ #include #include -static inline u_int32_t nf_ct_orig_ipv4_src(const struct nf_conn *ct) +static inline u32 nf_ct_orig_ipv4_src(const struct nf_conn *ct) { - return ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; + return (__force u32)ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; } -static inline const void *nf_ct_orig_ipv6_src(const struct nf_conn *ct) +static inline const u32 *nf_ct_orig_ipv6_src(const struct nf_conn *ct) { - return ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip6; + return (__force u32 *)ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip6; } static inline u_int32_t -- cgit v1.2.1 From 249556192859490b6280552d4b877064f9f5ee48 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 22 Jun 2009 14:15:30 +0200 Subject: netfilter: nf_log: fix direct userspace memory access in proc handler Signed-off-by: Patrick McHardy --- net/netfilter/nf_log.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 2fefe147750a..4e620305f28c 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -47,7 +47,6 @@ int nf_log_register(u_int8_t pf, struct nf_logger *logger) mutex_lock(&nf_log_mutex); if (pf == NFPROTO_UNSPEC) { - int i; for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) list_add_tail(&(logger->list[i]), &(nf_loggers_l[i])); } else { @@ -216,7 +215,7 @@ static const struct file_operations nflog_file_ops = { #endif /* PROC_FS */ #ifdef CONFIG_SYSCTL -struct ctl_path nf_log_sysctl_path[] = { +static struct ctl_path nf_log_sysctl_path[] = { { .procname = "net", .ctl_name = CTL_NET, }, { .procname = "netfilter", .ctl_name = NET_NETFILTER, }, { .procname = "nf_log", .ctl_name = CTL_UNNUMBERED, }, @@ -228,19 +227,26 @@ static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1]; static struct ctl_table_header *nf_log_dir_header; static int nf_log_proc_dostring(ctl_table *table, int write, struct file *filp, - void *buffer, size_t *lenp, loff_t *ppos) + void __user *buffer, size_t *lenp, loff_t *ppos) { const struct nf_logger *logger; + char buf[NFLOGGER_NAME_LEN]; + size_t size = *lenp; int r = 0; int tindex = (unsigned long)table->extra1; if (write) { - if (!strcmp(buffer, "NONE")) { + if (size > sizeof(buf)) + size = sizeof(buf); + if (copy_from_user(buf, buffer, size)) + return -EFAULT; + + if (!strcmp(buf, "NONE")) { nf_log_unbind_pf(tindex); return 0; } mutex_lock(&nf_log_mutex); - logger = __find_logger(tindex, buffer); + logger = __find_logger(tindex, buf); if (logger == NULL) { mutex_unlock(&nf_log_mutex); return -ENOENT; -- cgit v1.2.1 From 6d62182fea6cc6bbc8d82a691ad0608d68a54aeb Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 22 Jun 2009 14:16:45 +0200 Subject: netfilter: xt_quota: fix incomplete initialization Commit v2.6.29-rc5-872-gacc738f ("xtables: avoid pointer to self") forgot to copy the initial quota value supplied by iptables into the private structure, thus counting from whatever was in the memory kmalloc returned. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- net/netfilter/xt_quota.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c index 01dd07b764ec..98fc190e8f0e 100644 --- a/net/netfilter/xt_quota.c +++ b/net/netfilter/xt_quota.c @@ -54,6 +54,7 @@ static bool quota_mt_check(const struct xt_mtchk_param *par) if (q->master == NULL) return -ENOMEM; + q->master->quota = q->quota; return true; } -- cgit v1.2.1 From 4d900f9df5f0569c2dc536701e2c11b6d50ebebf Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 22 Jun 2009 14:17:12 +0200 Subject: netfilter: xt_rateest: fix comparison with self MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As noticed by Török Edwin : Compiling the kernel with clang has shown this warning: net/netfilter/xt_rateest.c:69:16: warning: self-comparison always results in a constant value ret &= pps2 == pps2; ^ Looking at the code: if (info->flags & XT_RATEEST_MATCH_BPS) ret &= bps1 == bps2; if (info->flags & XT_RATEEST_MATCH_PPS) ret &= pps2 == pps2; Judging from the MATCH_BPS case it seems to be a typo, with the intention of comparing pps1 with pps2. http://bugzilla.kernel.org/show_bug.cgi?id=13535 Signed-off-by: Patrick McHardy --- net/netfilter/xt_rateest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c index 220a1d588ee0..4fc6a917f6de 100644 --- a/net/netfilter/xt_rateest.c +++ b/net/netfilter/xt_rateest.c @@ -66,7 +66,7 @@ xt_rateest_mt(const struct sk_buff *skb, const struct xt_match_param *par) if (info->flags & XT_RATEEST_MATCH_BPS) ret &= bps1 == bps2; if (info->flags & XT_RATEEST_MATCH_PPS) - ret &= pps2 == pps2; + ret &= pps1 == pps2; break; } -- cgit v1.2.1 From d5fdd6babcfc2b0e6a8da1acf492a69fb54b4c47 Mon Sep 17 00:00:00 2001 From: Brian Haley Date: Tue, 23 Jun 2009 04:31:07 -0700 Subject: ipv6: Use correct data types for ICMPv6 type and code Change all the code that deals directly with ICMPv6 type and code values to use u8 instead of a signed int as that's the actual data type. Signed-off-by: Brian Haley Signed-off-by: David S. Miller --- net/dccp/ipv6.c | 2 +- net/ipv6/ah6.c | 2 +- net/ipv6/esp6.c | 2 +- net/ipv6/icmp.c | 12 ++++++------ net/ipv6/ip6_tunnel.c | 18 +++++++++--------- net/ipv6/ipcomp6.c | 2 +- net/ipv6/mip6.c | 2 +- net/ipv6/raw.c | 4 ++-- net/ipv6/route.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- net/ipv6/tunnel6.c | 2 +- net/ipv6/udp.c | 6 +++--- net/ipv6/udp_impl.h | 2 +- net/ipv6/udplite.c | 2 +- net/ipv6/xfrm6_tunnel.c | 2 +- net/sctp/ipv6.c | 2 +- 16 files changed, 32 insertions(+), 32 deletions(-) (limited to 'net') diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 05ea7440d9e5..3e70faab2989 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -85,7 +85,7 @@ static inline __u32 dccp_v6_init_sequence(struct sk_buff *skb) } static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { struct ipv6hdr *hdr = (struct ipv6hdr *)skb->data; const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 52449f7a1b71..86f42a288c4b 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -405,7 +405,7 @@ out: } static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { struct net *net = dev_net(skb->dev); struct ipv6hdr *iph = (struct ipv6hdr*)skb->data; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index c2f250150db1..678bb95b1525 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -354,7 +354,7 @@ static u32 esp6_get_mtu(struct xfrm_state *x, int mtu) } static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { struct net *net = dev_net(skb->dev); struct ipv6hdr *iph = (struct ipv6hdr*)skb->data; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 36dff8807183..eab62a7a8f06 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -117,7 +117,7 @@ static __inline__ void icmpv6_xmit_unlock(struct sock *sk) /* * Slightly more convenient version of icmpv6_send. */ -void icmpv6_param_prob(struct sk_buff *skb, int code, int pos) +void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos) { icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev); kfree_skb(skb); @@ -161,7 +161,7 @@ static int is_ineligible(struct sk_buff *skb) /* * Check the ICMP output rate limit */ -static inline int icmpv6_xrlim_allow(struct sock *sk, int type, +static inline int icmpv6_xrlim_allow(struct sock *sk, u8 type, struct flowi *fl) { struct dst_entry *dst; @@ -305,7 +305,7 @@ static inline void mip6_addr_swap(struct sk_buff *skb) {} /* * Send an ICMP message in response to a packet in error */ -void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, +void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, struct net_device *dev) { struct net *net = dev_net(skb->dev); @@ -590,7 +590,7 @@ out: icmpv6_xmit_unlock(sk); } -static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info) +static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) { struct inet6_protocol *ipprot; int inner_offset; @@ -643,7 +643,7 @@ static int icmpv6_rcv(struct sk_buff *skb) struct in6_addr *saddr, *daddr; struct ipv6hdr *orig_hdr; struct icmp6hdr *hdr; - int type; + u8 type; if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { struct sec_path *sp = skb_sec_path(skb); @@ -914,7 +914,7 @@ static const struct icmp6_err { }, }; -int icmpv6_err_convert(int type, int code, int *err) +int icmpv6_err_convert(u8 type, u8 code, int *err) { int fatal = 0; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 404d16a97d5c..51f410e7775a 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -394,13 +394,13 @@ parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw) static int ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, - int *type, int *code, int *msg, __u32 *info, int offset) + u8 *type, u8 *code, int *msg, __u32 *info, int offset) { struct ipv6hdr *ipv6h = (struct ipv6hdr *) skb->data; struct ip6_tnl *t; int rel_msg = 0; - int rel_type = ICMPV6_DEST_UNREACH; - int rel_code = ICMPV6_ADDR_UNREACH; + u8 rel_type = ICMPV6_DEST_UNREACH; + u8 rel_code = ICMPV6_ADDR_UNREACH; __u32 rel_info = 0; __u16 len; int err = -ENOENT; @@ -488,11 +488,11 @@ out: static int ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { int rel_msg = 0; - int rel_type = type; - int rel_code = code; + u8 rel_type = type; + u8 rel_code = code; __u32 rel_info = ntohl(info); int err; struct sk_buff *skb2; @@ -586,11 +586,11 @@ out: static int ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { int rel_msg = 0; - int rel_type = type; - int rel_code = code; + u8 rel_type = type; + u8 rel_code = code; __u32 rel_info = ntohl(info); int err; diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 3a0b3be7ece5..79c172f1ff01 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -51,7 +51,7 @@ #include static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { __be32 spi; struct ipv6hdr *iph = (struct ipv6hdr*)skb->data; diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index f995e19c87a9..f797e8c6f3b3 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -54,7 +54,7 @@ static inline void *mip6_padn(__u8 *data, __u8 padlen) return data + padlen; } -static inline void mip6_param_prob(struct sk_buff *skb, int code, int pos) +static inline void mip6_param_prob(struct sk_buff *skb, u8 code, int pos) { icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev); } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 8b0b6f948063..d6c3c1c34b2d 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -310,7 +310,7 @@ out: static void rawv6_err(struct sock *sk, struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); @@ -343,7 +343,7 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, } void raw6_icmp_error(struct sk_buff *skb, int nexthdr, - int type, int code, int inner_offset, __be32 info) + u8 type, u8 code, int inner_offset, __be32 info) { struct sock *sk; int hash; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 658293ea05ba..1473ee0a1f51 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1865,7 +1865,7 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) * Drop the packet on the floor */ -static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes) +static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) { int type; struct dst_entry *dst = skb_dst(skb); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 53b6a4192b16..58810c65b635 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -317,7 +317,7 @@ failure: } static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data; const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c index 669f280989c3..633ad789effc 100644 --- a/net/ipv6/tunnel6.c +++ b/net/ipv6/tunnel6.c @@ -124,7 +124,7 @@ drop: } static void tunnel6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { struct xfrm6_tunnel *handler; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 023beda6b224..33b59bd92c4d 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -312,7 +312,7 @@ csum_copy_err: } void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info, + u8 type, u8 code, int offset, __be32 info, struct udp_table *udptable) { struct ipv6_pinfo *np; @@ -346,8 +346,8 @@ out: } static __inline__ void udpv6_err(struct sk_buff *skb, - struct inet6_skb_parm *opt, int type, - int code, int offset, __be32 info ) + struct inet6_skb_parm *opt, u8 type, + u8 code, int offset, __be32 info ) { __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table); } diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index 23779208c334..6bb303471e20 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -9,7 +9,7 @@ extern int __udp6_lib_rcv(struct sk_buff *, struct udp_table *, int ); extern void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, - int , int , int , __be32 , struct udp_table *); + u8 , u8 , int , __be32 , struct udp_table *); extern int udp_v6_get_port(struct sock *sk, unsigned short snum); diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index ba162a824585..4818c48688f2 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -20,7 +20,7 @@ static int udplitev6_rcv(struct sk_buff *skb) static void udplitev6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { __udp6_lib_err(skb, opt, type, code, offset, info, &udplite_table); } diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 80193db224d9..81a95c00e503 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -262,7 +262,7 @@ static int xfrm6_tunnel_rcv(struct sk_buff *skb) } static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { /* xfrm6_tunnel native err handling */ switch (type) { diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index a63de3f7f185..6a4b19094143 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -133,7 +133,7 @@ static struct notifier_block sctp_inet6addr_notifier = { /* ICMP error handler. */ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - int type, int code, int offset, __be32 info) + u8 type, u8 code, int offset, __be32 info) { struct inet6_dev *idev; struct sock *sk; -- cgit v1.2.1 From d55d87fdff8252d0e2f7c28c2d443aee17e9d70f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 22 Jun 2009 02:25:25 +0000 Subject: net: Move rx skb_orphan call to where needed In order to get the tun driver to account packets, we need to be able to receive packets with destructors set. To be on the safe side, I added an skb_orphan call for all protocols by default since some of them (IP in particular) cannot handle receiving packets destructors properly. Now it seems that at least one protocol (CAN) expects to be able to pass skb->sk through the rx path without getting clobbered. So this patch attempts to fix this properly by moving the skb_orphan call to where it's actually needed. In particular, I've added it to skb_set_owner_[rw] which is what most users of skb->destructor call. This is actually an improvement for tun too since it means that we only give back the amount charged to the socket when the skb is passed to another socket that will also be charged accordingly. Signed-off-by: Herbert Xu Tested-by: Oliver Hartkopp Signed-off-by: David S. Miller --- net/ax25/ax25_in.c | 3 +-- net/core/dev.c | 2 -- net/irda/af_irda.c | 3 --- net/irda/ircomm/ircomm_lmp.c | 1 + 4 files changed, 2 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c index 5f1d2107a1dd..de56d3983de0 100644 --- a/net/ax25/ax25_in.c +++ b/net/ax25/ax25_in.c @@ -437,8 +437,7 @@ free: int ax25_kiss_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev) { - skb->sk = NULL; /* Initially we don't know who it's for */ - skb->destructor = NULL; /* Who initializes this, dammit?! */ + skb_orphan(skb); if (!net_eq(dev_net(dev), &init_net)) { kfree_skb(skb); diff --git a/net/core/dev.c b/net/core/dev.c index baf2dc13a34a..60b572812278 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2310,8 +2310,6 @@ ncls: if (!skb) goto out; - skb_orphan(skb); - type = skb->protocol; list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 5922febe25c4..cb762c8723ea 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -913,9 +913,6 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags) /* Clean up the original one to keep it in listen state */ irttp_listen(self->tsap); - /* Wow ! What is that ? Jean II */ - skb->sk = NULL; - skb->destructor = NULL; kfree_skb(skb); sk->sk_ack_backlog--; diff --git a/net/irda/ircomm/ircomm_lmp.c b/net/irda/ircomm/ircomm_lmp.c index 67c99d20857f..7ba96618660e 100644 --- a/net/irda/ircomm/ircomm_lmp.c +++ b/net/irda/ircomm/ircomm_lmp.c @@ -196,6 +196,7 @@ static int ircomm_lmp_data_request(struct ircomm_cb *self, /* Don't forget to refcount it - see ircomm_tty_do_softint() */ skb_get(skb); + skb_orphan(skb); skb->destructor = ircomm_lmp_flow_control; if ((self->pkt_count++ > 7) && (self->flow_status == FLOW_START)) { -- cgit v1.2.1 From b6280b47a7a42970d098a3059f4ebe7e55e90d8d Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 22 Jun 2009 10:18:53 +0000 Subject: ipv4 routing: Ensure that route cache entries are usable and reclaimable with caching is off When route caching is disabled (rt_caching returns false), We still use route cache entries that are created and passed into rt_intern_hash once. These routes need to be made usable for the one call path that holds a reference to them, and they need to be reclaimed when they're finished with their use. To be made usable, they need to be associated with a neighbor table entry (which they currently are not), otherwise iproute_finish2 just discards the packet, since we don't know which L2 peer to send the packet to. To do this binding, we need to follow the path a bit higher up in rt_intern_hash, which calls arp_bind_neighbour, but not assign the route entry to the hash table. Currently, if caching is off, we simply assign the route to the rp pointer and are reutrn success. This patch associates us with a neighbor entry first. Secondly, we need to make sure that any single use routes like this are known to the garbage collector when caching is off. If caching is off, and we try to hash in a route, it will leak when its refcount reaches zero. To avoid this, this patch calls rt_free on the route cache entry passed into rt_intern_hash. This places us on the gc list for the route cache garbage collector, so that when its refcount reaches zero, it will be reclaimed (Thanks to Alexey for this suggestion). I've tested this on a local system here, and with these patches in place, I'm able to maintain routed connectivity to remote systems, even if I set /proc/sys/net/ipv4/rt_cache_rebuild_count to -1, which forces rt_caching to return false. Signed-off-by: Neil Horman Reported-by: Jarek Poplawski Reported-by: Maxime Bizon Signed-off-by: David S. Miller --- net/ipv4/route.c | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 65b3a8b11a6c..278f46f5011b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1093,8 +1093,27 @@ restart: * If we drop it here, the callers have no way to resolve routes * when we're not caching. Instead, just point *rp at rt, so * the caller gets a single use out of the route + * Note that we do rt_free on this new route entry, so that + * once its refcount hits zero, we are still able to reap it + * (Thanks Alexey) + * Note also the rt_free uses call_rcu. We don't actually + * need rcu protection here, this is just our path to get + * on the route gc list. */ - goto report_and_exit; + + if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { + int err = arp_bind_neighbour(&rt->u.dst); + if (err) { + if (net_ratelimit()) + printk(KERN_WARNING + "Neighbour table failure & not caching routes.\n"); + rt_drop(rt); + return err; + } + } + + rt_free(rt); + goto skip_hashing; } rthp = &rt_hash_table[hash].chain; @@ -1211,7 +1230,8 @@ restart: #if RT_CACHE_DEBUG >= 2 if (rt->u.dst.rt_next) { struct rtable *trt; - printk(KERN_DEBUG "rt_cache @%02x: %pI4", hash, &rt->rt_dst); + printk(KERN_DEBUG "rt_cache @%02x: %pI4", + hash, &rt->rt_dst); for (trt = rt->u.dst.rt_next; trt; trt = trt->u.dst.rt_next) printk(" . %pI4", &trt->rt_dst); printk("\n"); @@ -1226,7 +1246,7 @@ restart: spin_unlock_bh(rt_hash_lock_addr(hash)); -report_and_exit: +skip_hashing: if (rp) *rp = rt; else -- cgit v1.2.1 From 245acb87729bc76ba65c7476665c01837e0cdccb Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 24 Jun 2009 03:55:41 -0700 Subject: ipsec: Fix name of CAST algorithm Our CAST algorithm is called cast5, not cast128. Clearly nobody has ever used it :) Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/xfrm/xfrm_algo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index d31ccb487730..faf54c6bf96b 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -292,8 +292,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, { - .name = "cbc(cast128)", - .compat = "cast128", + .name = "cbc(cast5)", + .compat = "cast5", .uinfo = { .encr = { -- cgit v1.2.1 From c7a1a4c80f873d5d6ecd173035bb80eba489f380 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Wed, 24 Jun 2009 01:07:44 +0000 Subject: Phonet: publicize the Netlink notification function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/pn_netlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index cec4e5951681..f8b4cee434c2 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -32,7 +32,7 @@ static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr, u32 pid, u32 seq, int event); -static void rtmsg_notify(int event, struct net_device *dev, u8 addr) +void phonet_address_notify(int event, struct net_device *dev, u8 addr) { struct sk_buff *skb; int err = -ENOBUFS; @@ -94,7 +94,7 @@ static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *attr) else err = phonet_address_del(dev, pnaddr); if (!err) - rtmsg_notify(nlh->nlmsg_type, dev, pnaddr); + phonet_address_notify(nlh->nlmsg_type, dev, pnaddr); return err; } -- cgit v1.2.1 From 2be6fa4c7e5731375cc5e70843a3444293c27514 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Denis-Courmont?= Date: Wed, 24 Jun 2009 01:07:45 +0000 Subject: Phonet: generate Netlink RTM_DELADDR when destroying a device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Netlink address deletion events were not sent when a network device vanished neither when Phonet was unloaded. Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/pn_dev.c | 52 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index 80a322d77909..b0d6ddd82a9d 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -69,10 +69,27 @@ static struct phonet_device *__phonet_get(struct net_device *dev) return NULL; } -static void __phonet_device_free(struct phonet_device *pnd) +static void phonet_device_destroy(struct net_device *dev) { - list_del(&pnd->list); - kfree(pnd); + struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev)); + struct phonet_device *pnd; + + ASSERT_RTNL(); + + spin_lock_bh(&pndevs->lock); + pnd = __phonet_get(dev); + if (pnd) + list_del(&pnd->list); + spin_unlock_bh(&pndevs->lock); + + if (pnd) { + u8 addr; + + for (addr = find_first_bit(pnd->addrs, 64); addr < 64; + addr = find_next_bit(pnd->addrs, 64, 1+addr)) + phonet_address_notify(RTM_DELADDR, dev, addr); + kfree(pnd); + } } struct net_device *phonet_device_get(struct net *net) @@ -126,8 +143,10 @@ int phonet_address_del(struct net_device *dev, u8 addr) pnd = __phonet_get(dev); if (!pnd || !test_and_clear_bit(addr >> 2, pnd->addrs)) err = -EADDRNOTAVAIL; - else if (bitmap_empty(pnd->addrs, 64)) - __phonet_device_free(pnd); + else if (bitmap_empty(pnd->addrs, 64)) { + list_del(&pnd->list); + kfree(pnd); + } spin_unlock_bh(&pndevs->lock); return err; } @@ -181,18 +200,8 @@ static int phonet_device_notify(struct notifier_block *me, unsigned long what, { struct net_device *dev = arg; - if (what == NETDEV_UNREGISTER) { - struct phonet_device_list *pndevs; - struct phonet_device *pnd; - - /* Destroy phonet-specific device data */ - pndevs = phonet_device_list(dev_net(dev)); - spin_lock_bh(&pndevs->lock); - pnd = __phonet_get(dev); - if (pnd) - __phonet_device_free(pnd); - spin_unlock_bh(&pndevs->lock); - } + if (what == NETDEV_UNREGISTER) + phonet_device_destroy(dev); return 0; } @@ -218,11 +227,12 @@ static int phonet_init_net(struct net *net) static void phonet_exit_net(struct net *net) { struct phonet_net *pnn = net_generic(net, phonet_net_id); - struct phonet_device *pnd, *n; - - list_for_each_entry_safe(pnd, n, &pnn->pndevs.list, list) - __phonet_device_free(pnd); + struct net_device *dev; + rtnl_lock(); + for_each_netdev(net, dev) + phonet_device_destroy(dev); + rtnl_unlock(); kfree(pnn); } -- cgit v1.2.1 From 308ff823ebd749a94d3b6ac26b95bc0eb114c39e Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 25 Jun 2009 16:32:52 +0200 Subject: nf_conntrack: Use rcu_barrier() RCU barriers, rcu_barrier(), is inserted two places. In nf_conntrack_expect.c nf_conntrack_expect_fini() before the kmem_cache_destroy(). Firstly to make sure the callback to the nf_ct_expect_free_rcu() code is still around. Secondly because I'm unsure about the consequence of having in flight nf_ct_expect_free_rcu/kmem_cache_free() calls while doing a kmem_cache_destroy() slab destroy. And in nf_conntrack_extend.c nf_ct_extend_unregister(), inorder to wait for completion of callbacks to __nf_ct_ext_free_rcu(), which is invoked by __nf_ct_ext_add(). It might be more efficient to call rcu_barrier() in nf_conntrack_core.c nf_conntrack_cleanup_net(), but thats make it more difficult to read the code (as the callback code in located in nf_conntrack_extend.c). Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_expect.c | 4 +++- net/netfilter/nf_conntrack_extend.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index afde8f991646..2032dfe25ca8 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -617,8 +617,10 @@ err1: void nf_conntrack_expect_fini(struct net *net) { exp_proc_remove(net); - if (net_eq(net, &init_net)) + if (net_eq(net, &init_net)) { + rcu_barrier(); /* Wait for call_rcu() before destroy */ kmem_cache_destroy(nf_ct_expect_cachep); + } nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc, nf_ct_expect_hsize); } diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 4b2c769d555f..fef95be334bd 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -186,6 +186,6 @@ void nf_ct_extend_unregister(struct nf_ct_ext_type *type) rcu_assign_pointer(nf_ct_ext_types[type->id], NULL); update_alloc_size(type); mutex_unlock(&nf_ct_ext_type_mutex); - synchronize_rcu(); + rcu_barrier(); /* Wait for completion of call_rcu()'s */ } EXPORT_SYMBOL_GPL(nf_ct_extend_unregister); -- cgit v1.2.1 From 1ac530b3553e0b4dc1e18a32bed57cfa84cd57cb Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 24 Jun 2009 22:29:31 +0000 Subject: tcp: missing check ACK flag of received segment in FIN-WAIT-2 state RFC0793 defined that in FIN-WAIT-2 state if the ACK bit is off drop the segment and return[Page 72]. But this check is missing in function tcp_timewait_state_process(). This cause the segment with FIN flag but no ACK has two diffent action: Case 1: Node A Node B <------------- FIN,ACK (enter FIN-WAIT-1) ACK -------------> (enter FIN-WAIT-2) FIN -------------> discard (move sk to tw list) Case 2: Node A Node B <------------- FIN,ACK (enter FIN-WAIT-1) ACK -------------> (enter FIN-WAIT-2) (move sk to tw list) FIN -------------> <------------- ACK This patch fixed the problem. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/ipv4/tcp_minisocks.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 43bbba7926ee..f8d67ccc64f3 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -128,7 +128,8 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, goto kill_with_rst; /* Dup ACK? */ - if (!after(TCP_SKB_CB(skb)->end_seq, tcptw->tw_rcv_nxt) || + if (!th->ack || + !after(TCP_SKB_CB(skb)->end_seq, tcptw->tw_rcv_nxt) || TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) { inet_twsk_put(tw); return TCP_TW_SUCCESS; -- cgit v1.2.1 From a1faa69810b2af562b70b2a71c116c7d03575dd3 Mon Sep 17 00:00:00 2001 From: Jens Rosenboom Date: Thu, 25 Jun 2009 04:55:50 +0000 Subject: ipv6: avoid wraparound for expired preferred lifetime Avoid showing wrong high values when the preferred lifetime of an address is expired. Signed-off-by: Jens Rosenboom Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 8c1e86afbbf5..3883b4036a74 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3362,7 +3362,10 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, valid = ifa->valid_lft; if (preferred != INFINITY_LIFE_TIME) { long tval = (jiffies - ifa->tstamp)/HZ; - preferred -= tval; + if (preferred > tval) + preferred -= tval; + else + preferred = 0; if (valid != INFINITY_LIFE_TIME) valid -= tval; } -- cgit v1.2.1 From 10e85448019097e4fcfa535f612f51d0d31a34f4 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 26 Jun 2009 10:46:08 +0000 Subject: decnet: Use rcu_barrier() on module unload. The decnet module unloading as been disabled with a '#if 0' statement, because it have had issues. We add a rcu_barrier() anyhow for correctness. The maintainer (Chrissie Caulfield) will look into the unload issue when time permits. Acked-by: Paul E. McKenney Acked-by: Chrissie Caulfield Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/decnet/af_decnet.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index d351b8db0df5..77d40289653c 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -2413,6 +2413,8 @@ static void __exit decnet_exit(void) proc_net_remove(&init_net, "decnet"); proto_unregister(&dn_proto); + + rcu_barrier_bh(); /* Wait for completion of call_rcu_bh()'s */ } module_exit(decnet_exit); #endif -- cgit v1.2.1 From 1f2ccd00f224a4e2d6d26f590f3e6851f3deef99 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 26 Jun 2009 10:46:03 +0000 Subject: ipv6: Use rcu_barrier() on module unload. The ipv6 module uses rcu_call() thus it should use rcu_barrier() on module unload. Acked-by: Paul E. McKenney Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 85b3d0036afd..caa0278d30a9 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -1284,6 +1284,8 @@ static void __exit inet6_exit(void) proto_unregister(&udplitev6_prot); proto_unregister(&udpv6_prot); proto_unregister(&tcpv6_prot); + + rcu_barrier(); /* Wait for completion of call_rcu()'s */ } module_exit(inet6_exit); -- cgit v1.2.1 From 473c22d759e73cbbe604f41105b497817cc2ee8e Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 26 Jun 2009 10:45:48 +0000 Subject: bridge: Use rcu_barrier() instead of syncronize_net() on unload. When unloading modules that uses call_rcu() callbacks, then we must use rcu_barrier(). This module uses syncronize_net() which is not enough to be sure that all callback has been completed. Acked-by: Paul E. McKenney Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/bridge/br.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br.c b/net/bridge/br.c index 9aac5213105a..e1241c76239a 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -93,7 +93,7 @@ static void __exit br_deinit(void) unregister_pernet_subsys(&br_net_ops); - synchronize_net(); + rcu_barrier(); /* Wait for completion of call_rcu()'s */ br_netfilter_fini(); #if defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE) -- cgit v1.2.1 From 75de874f5c35f679c6370fccc2bf4930e638ef3b Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 26 Jun 2009 10:45:58 +0000 Subject: sunrpc: Use rcu_barrier() on unload. The sunrpc module uses rcu_call() thus it should use rcu_barrier() on module unload. Have not verified that the possibility for new call_rcu() callbacks has been disabled. As a hint for checking, the functions calling call_rcu() (unx_destroy_cred and generic_destroy_cred) are registered as crdestroy function pointer in struct rpc_credops. Acked-by: Paul E. McKenney Acked-by: Trond Myklebust Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/sunrpc/sunrpc_syms.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 843629f55763..adaa81982f74 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -66,6 +66,7 @@ cleanup_sunrpc(void) #ifdef CONFIG_PROC_FS rpc_proc_exit(); #endif + rcu_barrier(); /* Wait for completion of call_rcu()'s */ } MODULE_LICENSE("GPL"); module_init(init_sunrpc); -- cgit v1.2.1 From 4a27096bbe2cad4c6e78802a0d9dfe0e598a1129 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 26 Jun 2009 10:45:53 +0000 Subject: mac80211: Use rcu_barrier() on unload. The mac80211 module uses rcu_call() thus it should use rcu_barrier() on module unload. The rcu_barrier() is placed in mech.c ieee80211_stop_mesh() which is invoked from ieee80211_stop() in case vif.type == NL80211_IFTYPE_MESH_POINT. Acked-by: Paul E. McKenney Acked-by: Johannes Berg Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/mac80211/mesh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index fc712e60705d..11cf45bce38a 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -494,7 +494,7 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) * should it be using the interface and enqueuing * frames at this very time on another CPU. */ - synchronize_rcu(); + rcu_barrier(); /* Wait for RX path and call_rcu()'s */ skb_queue_purge(&sdata->u.mesh.skb_queue); } -- cgit v1.2.1 From 71f9dacd2e4d233029e9e956ca3f79531f411827 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 26 Jun 2009 19:22:37 -0700 Subject: inet: Call skb_orphan before tproxy activates As transparent proxying looks up the socket early and assigns it to the skb for later processing, we must drop any existing socket ownership prior to that in order to distinguish between the case where tproxy is active and where it is not. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/ip_input.c | 3 +++ net/ipv6/ip6_input.c | 3 +++ 2 files changed, 6 insertions(+) (limited to 'net') diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 490ce20faf38..db46b4b5b2b9 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -440,6 +440,9 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, /* Remove any debris in the socket control block */ memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); + /* Must drop socket now because of tproxy. */ + skb_orphan(skb); + return NF_HOOK(PF_INET, NF_INET_PRE_ROUTING, skb, dev, NULL, ip_rcv_finish); diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index c3a07d75b5f5..6d6a4277c677 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -139,6 +139,9 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt rcu_read_unlock(); + /* Must drop socket now because of tproxy. */ + skb_orphan(skb); + return NF_HOOK(PF_INET6, NF_INET_PRE_ROUTING, skb, dev, NULL, ip6_rcv_finish); err: -- cgit v1.2.1 From ff780cd8f2fa928b193554f593b36d1243554212 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 26 Jun 2009 19:27:04 -0700 Subject: gro: Flush GRO packets in napi_disable_pending path When NAPI is disabled while we're in net_rx_action, we end up calling __napi_complete without flushing GRO packets. This is a bug as it would cause the GRO packets to linger, of course it also literally BUGs to catch error like this :) This patch changes it to napi_complete, with the obligatory IRQ reenabling. This should be safe because we've only just disabled IRQs and it does not materially affect the test conditions in between. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 60b572812278..70c27e0c7c32 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2823,9 +2823,11 @@ static void net_rx_action(struct softirq_action *h) * move the instance around on the list at-will. */ if (unlikely(work == weight)) { - if (unlikely(napi_disable_pending(n))) - __napi_complete(n); - else + if (unlikely(napi_disable_pending(n))) { + local_irq_enable(); + napi_complete(n); + local_irq_disable(); + } else list_move_tail(&n->poll_list, list); } -- cgit v1.2.1 From a3a9f79e361e864f0e9d75ebe2a0cb43d17c4272 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 29 Jun 2009 14:07:56 +0200 Subject: netfilter: tcp conntrack: fix unacknowledged data detection with NAT When NAT helpers change the TCP packet size, the highest seen sequence number needs to be corrected. This is currently only done upwards, when the packet size is reduced the sequence number is unchanged. This causes TCP conntrack to falsely detect unacknowledged data and decrease the timeout. Fix by updating the highest seen sequence number in both directions after packet mangling. Tested-by: Krzysztof Piotr Oledzki Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/nf_nat_helper.c | 17 +++++++++++------ net/netfilter/nf_conntrack_proto_tcp.c | 6 +++--- 2 files changed, 14 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index 155c008626c8..09172a65d9b6 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -191,7 +191,8 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb, ct, ctinfo); /* Tell TCP window tracking about seq change */ nf_conntrack_tcp_update(skb, ip_hdrlen(skb), - ct, CTINFO2DIR(ctinfo)); + ct, CTINFO2DIR(ctinfo), + (int)rep_len - (int)match_len); nf_conntrack_event_cache(IPCT_NATSEQADJ, ct); } @@ -377,6 +378,7 @@ nf_nat_seq_adjust(struct sk_buff *skb, struct tcphdr *tcph; int dir; __be32 newseq, newack; + s16 seqoff, ackoff; struct nf_conn_nat *nat = nfct_nat(ct); struct nf_nat_seq *this_way, *other_way; @@ -390,15 +392,18 @@ nf_nat_seq_adjust(struct sk_buff *skb, tcph = (void *)skb->data + ip_hdrlen(skb); if (after(ntohl(tcph->seq), this_way->correction_pos)) - newseq = htonl(ntohl(tcph->seq) + this_way->offset_after); + seqoff = this_way->offset_after; else - newseq = htonl(ntohl(tcph->seq) + this_way->offset_before); + seqoff = this_way->offset_before; if (after(ntohl(tcph->ack_seq) - other_way->offset_before, other_way->correction_pos)) - newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_after); + ackoff = other_way->offset_after; else - newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_before); + ackoff = other_way->offset_before; + + newseq = htonl(ntohl(tcph->seq) + seqoff); + newack = htonl(ntohl(tcph->ack_seq) - ackoff); inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0); inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0); @@ -413,7 +418,7 @@ nf_nat_seq_adjust(struct sk_buff *skb, if (!nf_nat_sack_adjust(skb, tcph, ct, ctinfo)) return 0; - nf_conntrack_tcp_update(skb, ip_hdrlen(skb), ct, dir); + nf_conntrack_tcp_update(skb, ip_hdrlen(skb), ct, dir, seqoff); return 1; } diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 33fc0a443f3d..97a82ba75376 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -720,8 +720,8 @@ static bool tcp_in_window(const struct nf_conn *ct, /* Caller must linearize skb at tcp header. */ void nf_conntrack_tcp_update(const struct sk_buff *skb, unsigned int dataoff, - struct nf_conn *ct, - int dir) + struct nf_conn *ct, int dir, + s16 offset) { const struct tcphdr *tcph = (const void *)skb->data + dataoff; const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[dir]; @@ -734,7 +734,7 @@ void nf_conntrack_tcp_update(const struct sk_buff *skb, /* * We have to worry for the ack in the reply packet only... */ - if (after(end, ct->proto.tcp.seen[dir].td_end)) + if (ct->proto.tcp.seen[dir].td_end + offset == end) ct->proto.tcp.seen[dir].td_end = end; ct->proto.tcp.last_end = end; spin_unlock_bh(&ct->lock); -- cgit v1.2.1 From d6d3f08b0fd998b647a05540cedd11a067b72867 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 29 Jun 2009 14:31:46 +0200 Subject: netfilter: xtables: conntrack match revision 2 As reported by Philip, the UNTRACKED state bit does not fit within the 8-bit state_mask member. Enlarge state_mask and give status_mask a few more bits too. Reported-by: Philip Craig References: http://markmail.org/thread/b7eg6aovfh4agyz7 Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- net/netfilter/xt_conntrack.c | 66 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 60 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index 0b7139f3dd78..fc581800698e 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -129,7 +129,7 @@ conntrack_addrcmp(const union nf_inet_addr *kaddr, static inline bool conntrack_mt_origsrc(const struct nf_conn *ct, - const struct xt_conntrack_mtinfo1 *info, + const struct xt_conntrack_mtinfo2 *info, u_int8_t family) { return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3, @@ -138,7 +138,7 @@ conntrack_mt_origsrc(const struct nf_conn *ct, static inline bool conntrack_mt_origdst(const struct nf_conn *ct, - const struct xt_conntrack_mtinfo1 *info, + const struct xt_conntrack_mtinfo2 *info, u_int8_t family) { return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3, @@ -147,7 +147,7 @@ conntrack_mt_origdst(const struct nf_conn *ct, static inline bool conntrack_mt_replsrc(const struct nf_conn *ct, - const struct xt_conntrack_mtinfo1 *info, + const struct xt_conntrack_mtinfo2 *info, u_int8_t family) { return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3, @@ -156,7 +156,7 @@ conntrack_mt_replsrc(const struct nf_conn *ct, static inline bool conntrack_mt_repldst(const struct nf_conn *ct, - const struct xt_conntrack_mtinfo1 *info, + const struct xt_conntrack_mtinfo2 *info, u_int8_t family) { return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3, @@ -164,7 +164,7 @@ conntrack_mt_repldst(const struct nf_conn *ct, } static inline bool -ct_proto_port_check(const struct xt_conntrack_mtinfo1 *info, +ct_proto_port_check(const struct xt_conntrack_mtinfo2 *info, const struct nf_conn *ct) { const struct nf_conntrack_tuple *tuple; @@ -204,7 +204,7 @@ ct_proto_port_check(const struct xt_conntrack_mtinfo1 *info, static bool conntrack_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_conntrack_mtinfo1 *info = par->matchinfo; + const struct xt_conntrack_mtinfo2 *info = par->matchinfo; enum ip_conntrack_info ctinfo; const struct nf_conn *ct; unsigned int statebit; @@ -278,6 +278,16 @@ conntrack_mt(const struct sk_buff *skb, const struct xt_match_param *par) return true; } +static bool +conntrack_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par) +{ + const struct xt_conntrack_mtinfo2 *const *info = par->matchinfo; + struct xt_match_param newpar = *par; + + newpar.matchinfo = *info; + return conntrack_mt(skb, &newpar); +} + static bool conntrack_mt_check(const struct xt_mtchk_param *par) { if (nf_ct_l3proto_try_module_get(par->family) < 0) { @@ -288,11 +298,45 @@ static bool conntrack_mt_check(const struct xt_mtchk_param *par) return true; } +static bool conntrack_mt_check_v1(const struct xt_mtchk_param *par) +{ + struct xt_conntrack_mtinfo1 *info = par->matchinfo; + struct xt_conntrack_mtinfo2 *up; + int ret = conntrack_mt_check(par); + + if (ret < 0) + return ret; + + up = kmalloc(sizeof(*up), GFP_KERNEL); + if (up == NULL) { + nf_ct_l3proto_module_put(par->family); + return -ENOMEM; + } + + /* + * The strategy here is to minimize the overhead of v1 matching, + * by prebuilding a v2 struct and putting the pointer into the + * v1 dataspace. + */ + memcpy(up, info, offsetof(typeof(*info), state_mask)); + up->state_mask = info->state_mask; + up->status_mask = info->status_mask; + *(void **)info = up; + return true; +} + static void conntrack_mt_destroy(const struct xt_mtdtor_param *par) { nf_ct_l3proto_module_put(par->family); } +static void conntrack_mt_destroy_v1(const struct xt_mtdtor_param *par) +{ + struct xt_conntrack_mtinfo2 **info = par->matchinfo; + kfree(*info); + conntrack_mt_destroy(par); +} + #ifdef CONFIG_COMPAT struct compat_xt_conntrack_info { @@ -363,6 +407,16 @@ static struct xt_match conntrack_mt_reg[] __read_mostly = { .revision = 1, .family = NFPROTO_UNSPEC, .matchsize = sizeof(struct xt_conntrack_mtinfo1), + .match = conntrack_mt_v1, + .checkentry = conntrack_mt_check_v1, + .destroy = conntrack_mt_destroy_v1, + .me = THIS_MODULE, + }, + { + .name = "conntrack", + .revision = 2, + .family = NFPROTO_UNSPEC, + .matchsize = sizeof(struct xt_conntrack_mtinfo2), .match = conntrack_mt, .checkentry = conntrack_mt_check, .destroy = conntrack_mt_destroy, -- cgit v1.2.1 From 932c1329acebc03ef5efa3647c9c3a967b59d0c4 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Fri, 19 Jun 2009 17:00:08 +0400 Subject: nl802154: fix Oops in ieee802154_nl_get_dev ieee802154_nl_get_dev() lacks check for the existance of the device that was returned by dev_get_XXX, thus resulting in Oops for non-existing devices. Fix it. Signed-off-by: Dmitry Eremin-Solenikov --- net/ieee802154/netlink.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index 105ad10876af..332b947ae812 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -276,6 +276,9 @@ static struct net_device *ieee802154_nl_get_dev(struct genl_info *info) else return NULL; + if (!dev) + return NULL; + if (dev->type != ARPHRD_IEEE802154) { dev_put(dev); return NULL; -- cgit v1.2.1 From dfd06fe8246c0425f8d6850b8e2c872b0d691ec3 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Fri, 19 Jun 2009 17:02:09 +0400 Subject: nl802154: add module license and description Signed-off-by: Dmitry Eremin-Solenikov --- net/ieee802154/netlink.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index 332b947ae812..27eda9fdf3c2 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -524,3 +524,6 @@ static void __exit ieee802154_nl_exit(void) } module_exit(ieee802154_nl_exit); +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("ieee 802.15.4 configuration interface"); + -- cgit v1.2.1 From 8e5b9dda99cc86bdbd822935fcc37c5808e271b3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 28 Jun 2009 18:03:30 +0000 Subject: tcp: Stop non-TSO packets morphing into TSO If a socket starts out on a non-TSO route, and then switches to a TSO route, then the tail on the tx queue can morph into a TSO packet, causing mischief because the rest of the stack does not expect a partially linear TSO packet. This patch fixes this by ensuring that skb->ip_summed is set to CHECKSUM_PARTIAL before declaring a packet as TSO. Reported-by: Johannes Berg Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 416fc4c2e7eb..5bdf08d312d9 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -725,7 +725,8 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { - if (skb->len <= mss_now || !sk_can_gso(sk)) { + if (skb->len <= mss_now || !sk_can_gso(sk) || + skb->ip_summed == CHECKSUM_NONE) { /* Avoid the costly divide in the normal * non-TSO case. */ -- cgit v1.2.1 From 6828b92bd21acd65113dfe0541f19f5df0d9668f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 28 Jun 2009 18:06:41 +0000 Subject: tcp: Do not tack on TSO data to non-TSO packet If a socket starts out on a non-TSO route, and then switches to a TSO route, then we will tack on data to the tail of the tx queue even if it started out life as non-TSO. This is suboptimal because all of it will then be copied and checksummed unnecessarily. This patch fixes this by ensuring that skb->ip_summed is set to CHECKSUM_PARTIAL before appending extra data beyond the MSS. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 17b89c523f9d..7870a535dac6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -903,13 +903,17 @@ int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, iov++; while (seglen > 0) { - int copy; + int copy = 0; + int max = size_goal; skb = tcp_write_queue_tail(sk); + if (tcp_send_head(sk)) { + if (skb->ip_summed == CHECKSUM_NONE) + max = mss_now; + copy = max - skb->len; + } - if (!tcp_send_head(sk) || - (copy = size_goal - skb->len) <= 0) { - + if (copy <= 0) { new_segment: /* Allocate new segment. If the interface is SG, * allocate skb fitting to single page. @@ -930,6 +934,7 @@ new_segment: skb_entail(sk, skb); copy = size_goal; + max = size_goal; } /* Try to append data to the end of skb. */ @@ -1028,7 +1033,7 @@ new_segment: if ((seglen -= copy) == 0 && iovlen == 0) goto out; - if (skb->len < size_goal || (flags & MSG_OOB)) + if (skb->len < max || (flags & MSG_OOB)) continue; if (forced_push(tp)) { -- cgit v1.2.1 From 1802571b9865c0fc1d8d0fa39cf73275f3a75af3 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 28 Jun 2009 18:42:53 +0000 Subject: xfrm: use xfrm_addr_cmp() instead of compare addresses directly Clean up to use xfrm_addr_cmp() instead of compare addresses directly. Signed-off-by: Wei Yongjun Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 57 ++++++++------------------------------------------- 1 file changed, 8 insertions(+), 49 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 5f1f86565f16..f2f7c638083e 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -668,22 +668,10 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, xfrm_address_t *d hlist_for_each_entry(x, entry, net->xfrm.state_byspi+h, byspi) { if (x->props.family != family || x->id.spi != spi || - x->id.proto != proto) + x->id.proto != proto || + xfrm_addr_cmp(&x->id.daddr, daddr, family)) continue; - switch (family) { - case AF_INET: - if (x->id.daddr.a4 != daddr->a4) - continue; - break; - case AF_INET6: - if (!ipv6_addr_equal((struct in6_addr *)daddr, - (struct in6_addr *) - x->id.daddr.a6)) - continue; - break; - } - xfrm_state_hold(x); return x; } @@ -699,26 +687,11 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, xfrm_addre hlist_for_each_entry(x, entry, net->xfrm.state_bysrc+h, bysrc) { if (x->props.family != family || - x->id.proto != proto) + x->id.proto != proto || + xfrm_addr_cmp(&x->id.daddr, daddr, family) || + xfrm_addr_cmp(&x->props.saddr, saddr, family)) continue; - switch (family) { - case AF_INET: - if (x->id.daddr.a4 != daddr->a4 || - x->props.saddr.a4 != saddr->a4) - continue; - break; - case AF_INET6: - if (!ipv6_addr_equal((struct in6_addr *)daddr, - (struct in6_addr *) - x->id.daddr.a6) || - !ipv6_addr_equal((struct in6_addr *)saddr, - (struct in6_addr *) - x->props.saddr.a6)) - continue; - break; - } - xfrm_state_hold(x); return x; } @@ -1001,25 +974,11 @@ static struct xfrm_state *__find_acq_core(struct net *net, unsigned short family x->props.family != family || x->km.state != XFRM_STATE_ACQ || x->id.spi != 0 || - x->id.proto != proto) + x->id.proto != proto || + xfrm_addr_cmp(&x->id.daddr, daddr, family) || + xfrm_addr_cmp(&x->props.saddr, saddr, family)) continue; - switch (family) { - case AF_INET: - if (x->id.daddr.a4 != daddr->a4 || - x->props.saddr.a4 != saddr->a4) - continue; - break; - case AF_INET6: - if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6, - (struct in6_addr *)daddr) || - !ipv6_addr_equal((struct in6_addr *) - x->props.saddr.a6, - (struct in6_addr *)saddr)) - continue; - break; - } - xfrm_state_hold(x); return x; } -- cgit v1.2.1 From ff0ac74afb5b9916641723a78796d4ee7937c2ea Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 28 Jun 2009 22:49:37 +0000 Subject: sctp: xmit sctp packet always return no route error Commit 'net: skb->dst accessors'(adf30907d63893e4208dfe3f5c88ae12bc2f25d5) broken the sctp protocol stack, the sctp packet can never be sent out after Eric Dumazet's patch, which have typo in the sctp code. Signed-off-by: Wei Yongjun Acked-by: Eric Dumazet Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/output.c b/net/sctp/output.c index b76411444515..b94c21190566 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -407,7 +407,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) } dst = dst_clone(tp->dst); skb_dst_set(nskb, dst); - if (dst) + if (!dst) goto no_route; /* Build the SCTP header. */ -- cgit v1.2.1 From 008440e3ad4b72f5048d1b1f6f5ed894fdc5ad08 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Tue, 30 Jun 2009 12:47:19 -0700 Subject: ipv4: Fix fib_trie rebalancing, part 3 Alas current delaying of freeing old tnodes by RCU in trie_rebalance is still not enough because we can free a top tnode before updating a t->trie pointer. Reported-by: Pawel Staszewski Tested-by: Pawel Staszewski Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 012cf5a68581..00a54b246dfe 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1021,6 +1021,9 @@ static void trie_rebalance(struct trie *t, struct tnode *tn) (struct node *)tn, wasfull); tp = node_parent((struct node *) tn); + if (!tp) + rcu_assign_pointer(t->trie, (struct node *)tn); + tnode_free_flush(); if (!tp) break; -- cgit v1.2.1 From f8a68e752bc4e39644843403168137663c984524 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 30 Jun 2009 16:27:17 +0000 Subject: Revert "ipv4: arp announce, arp_proxy and windows ip conflict verification" This reverts commit 73ce7b01b4496a5fbf9caf63033c874be692333f. After discovering that we don't listen to gratuitious arps in 2.6.30 I tracked the failure down to this commit. The patch makes absolutely no sense. RFC2131 RFC3927 and RFC5227. are all in agreement that an arp request with sip == 0 should be used for the probe (to prevent learning) and an arp request with sip == tip should be used for the gratitous announcement that people can learn from. It appears the author of the broken patch got those two cases confused and modified the code to drop all gratuitous arp traffic. Ouch! Cc: stable@kernel.org Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/ipv4/arp.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 8a3881e28aca..c29d75d8f1b1 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -801,11 +801,8 @@ static int arp_process(struct sk_buff *skb) * cache. */ - /* - * Special case: IPv4 duplicate address detection packet (RFC2131) - * and Gratuitous ARP/ARP Announce. (RFC3927, Section 2.4) - */ - if (sip == 0 || tip == sip) { + /* Special case: IPv4 duplicate address detection packet (RFC2131) */ + if (sip == 0) { if (arp->ar_op == htons(ARPOP_REQUEST) && inet_addr_type(net, tip) == RTN_LOCAL && !arp_ignore(in_dev, sip, tip)) -- cgit v1.2.1