From c164a9ba0a8870c5c9d353f63085319931d69f23 Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Tue, 22 Aug 2006 11:50:39 -0700 Subject: Fix sctp privilege elevation (CVE-2006-3745) sctp_make_abort_user() now takes the msg_len along with the msg so that we don't have to recalculate the bytes in iovec. It also uses memcpy_fromiovec() so that we don't go beyond the length allocated. It is good to have this fix even if verify_iovec() is fixed to return error on overflow. Signed-off-by: Sridhar Samudrala Signed-off-by: Greg Kroah-Hartman --- net/sctp/sm_make_chunk.c | 30 +++++++++--------------------- net/sctp/sm_statefuns.c | 20 ++++---------------- net/sctp/socket.c | 10 +++++++++- 3 files changed, 22 insertions(+), 38 deletions(-) (limited to 'net') diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 4f11f5858209..17b509282cf2 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -806,38 +806,26 @@ no_mem: /* Helper to create ABORT with a SCTP_ERROR_USER_ABORT error. */ struct sctp_chunk *sctp_make_abort_user(const struct sctp_association *asoc, - const struct sctp_chunk *chunk, - const struct msghdr *msg) + const struct msghdr *msg, + size_t paylen) { struct sctp_chunk *retval; - void *payload = NULL, *payoff; - size_t paylen = 0; - struct iovec *iov = NULL; - int iovlen = 0; - - if (msg) { - iov = msg->msg_iov; - iovlen = msg->msg_iovlen; - paylen = get_user_iov_size(iov, iovlen); - } + void *payload = NULL; + int err; - retval = sctp_make_abort(asoc, chunk, sizeof(sctp_errhdr_t) + paylen); + retval = sctp_make_abort(asoc, NULL, sizeof(sctp_errhdr_t) + paylen); if (!retval) goto err_chunk; if (paylen) { /* Put the msg_iov together into payload. */ - payload = kmalloc(paylen, GFP_ATOMIC); + payload = kmalloc(paylen, GFP_KERNEL); if (!payload) goto err_payload; - payoff = payload; - for (; iovlen > 0; --iovlen) { - if (copy_from_user(payoff, iov->iov_base,iov->iov_len)) - goto err_copy; - payoff += iov->iov_len; - iov++; - } + err = memcpy_fromiovec(payload, msg->msg_iov, paylen); + if (err < 0) + goto err_copy; } sctp_init_cause(retval, SCTP_ERROR_USER_ABORT, payload, paylen); diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index ead3f1b0ea3d..5b5ae7958322 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -4031,18 +4031,12 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort( * from its upper layer, but retransmits data to the far end * if necessary to fill gaps. */ - struct msghdr *msg = arg; - struct sctp_chunk *abort; + struct sctp_chunk *abort = arg; sctp_disposition_t retval; retval = SCTP_DISPOSITION_CONSUME; - /* Generate ABORT chunk to send the peer. */ - abort = sctp_make_abort_user(asoc, NULL, msg); - if (!abort) - retval = SCTP_DISPOSITION_NOMEM; - else - sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); /* Even if we can't send the ABORT due to low memory delete the * TCB. This is a departure from our typical NOMEM handling. @@ -4166,8 +4160,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort( void *arg, sctp_cmd_seq_t *commands) { - struct msghdr *msg = arg; - struct sctp_chunk *abort; + struct sctp_chunk *abort = arg; sctp_disposition_t retval; /* Stop T1-init timer */ @@ -4175,12 +4168,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort( SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT)); retval = SCTP_DISPOSITION_CONSUME; - /* Generate ABORT chunk to send the peer */ - abort = sctp_make_abort_user(asoc, NULL, msg); - if (!abort) - retval = SCTP_DISPOSITION_NOMEM; - else - sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_CLOSED)); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 54722e622e6d..fde3f55bfd4b 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1520,8 +1520,16 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, goto out_unlock; } if (sinfo_flags & SCTP_ABORT) { + struct sctp_chunk *chunk; + + chunk = sctp_make_abort_user(asoc, msg, msg_len); + if (!chunk) { + err = -ENOMEM; + goto out_unlock; + } + SCTP_DEBUG_PRINTK("Aborting association: %p\n", asoc); - sctp_primitive_ABORT(asoc, msg); + sctp_primitive_ABORT(asoc, chunk); err = 0; goto out_unlock; } -- cgit v1.2.1 From e0b7cde9975e17a61b4511c7822803dfb7210011 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 21 Aug 2006 15:31:08 -0700 Subject: [NETFILTER]: arp_tables: fix table locking in arpt_do_table table->private might change because of ruleset changes, don't use it without holding the lock. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/arp_tables.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index df4854cf598b..8d1d7a6e72a5 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -236,7 +236,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb, struct arpt_entry *e, *back; const char *indev, *outdev; void *table_base; - struct xt_table_info *private = table->private; + struct xt_table_info *private; /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ if (!pskb_may_pull((*pskb), (sizeof(struct arphdr) + @@ -248,6 +248,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb, outdev = out ? out->name : nulldevname; read_lock_bh(&table->lock); + private = table->private; table_base = (void *)private->entries[smp_processor_id()]; e = get_entry(table_base, private->hook_entry[hook]); back = get_entry(table_base, private->underflow[hook]); -- cgit v1.2.1 From 316c1592bea94ead75301cb764523661fbbcc1ca Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 22 Aug 2006 00:06:11 -0700 Subject: [TCP]: Limit window scaling if window is clamped. This small change allows for easy per-route workarounds for broken hosts or middleboxes that are not compliant with TCP standards for window scaling. Rather than having to turn off window scaling globally. This patch allows reducing or disabling window scaling if window clamp is present. Example: Mark Lord reported a problem with 2.6.17 kernel being unable to access http://www.everymac.com # ip route add 216.145.246.23/32 via 10.8.0.1 window 65535 Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 507adefbc17c..b4f3ffe1b3b4 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -201,6 +201,7 @@ void tcp_select_initial_window(int __space, __u32 mss, * See RFC1323 for an explanation of the limit to 14 */ space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max); + space = min_t(u32, space, *window_clamp); while (space > 65535 && (*rcv_wscale) < 14) { space >>= 1; (*rcv_wscale)++; -- cgit v1.2.1 From 5d67476fff2df6ff12f60b540fd0e74cf2a668f9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 31 Jul 2006 14:11:48 -0700 Subject: SUNRPC: make rpc_unlink() take a dentry argument instead of a path Signe-off-by: Trond Myklebust (cherry picked from 88bf6d811b01a4be7fd507d18bf5f1c527989089 commit) --- net/sunrpc/auth_gss/auth_gss.c | 2 +- net/sunrpc/rpc_pipe.c | 20 ++++++-------------- 2 files changed, 7 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 4a9aa9393b97..beaa7b848246 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -718,7 +718,7 @@ gss_destroy(struct rpc_auth *auth) auth, auth->au_flavor); gss_auth = container_of(auth, struct gss_auth, rpc_auth); - rpc_unlink(gss_auth->path); + rpc_unlink(gss_auth->dentry); dput(gss_auth->dentry); gss_auth->dentry = NULL; gss_mech_put(gss_auth->mech); diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index a3bd2db2e024..9144f2767b66 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -746,22 +746,15 @@ err_dput: } int -rpc_unlink(char *path) +rpc_unlink(struct dentry *dentry) { - struct nameidata nd; - struct dentry *dentry; + struct dentry *parent; struct inode *dir; - int error; + int error = 0; - if ((error = rpc_lookup_parent(path, &nd)) != 0) - return error; - dir = nd.dentry->d_inode; + parent = dget_parent(dentry); + dir = parent->d_inode; mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); - dentry = lookup_one_len(nd.last.name, nd.dentry, nd.last.len); - if (IS_ERR(dentry)) { - error = PTR_ERR(dentry); - goto out_release; - } d_drop(dentry); if (dentry->d_inode) { rpc_close_pipes(dentry->d_inode); @@ -769,9 +762,8 @@ rpc_unlink(char *path) } dput(dentry); inode_dir_notify(dir, DN_DELETE); -out_release: mutex_unlock(&dir->i_mutex); - rpc_release_path(&nd); + dput(parent); return error; } -- cgit v1.2.1 From dff02cc1a34fcb60904a2c57cb351857cc11219e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 31 Jul 2006 14:17:18 -0700 Subject: NFS: clean up rpc_rmdir Make it take a dentry argument instead of a path Signed-off-by: Trond Myklebust (cherry picked from 648d4116eb2509f010f7f34704a650150309b3e7 commit) --- net/sunrpc/clnt.c | 6 +++--- net/sunrpc/rpc_pipe.c | 18 +++++------------- 2 files changed, 8 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index d6409e757219..d307556872db 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -183,7 +183,7 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname, out_no_auth: if (!IS_ERR(clnt->cl_dentry)) { - rpc_rmdir(clnt->cl_pathname); + rpc_rmdir(clnt->cl_dentry); dput(clnt->cl_dentry); rpc_put_mount(); } @@ -320,8 +320,8 @@ rpc_destroy_client(struct rpc_clnt *clnt) rpc_destroy_client(clnt->cl_parent); goto out_free; } - if (clnt->cl_pathname[0]) - rpc_rmdir(clnt->cl_pathname); + if (!IS_ERR(clnt->cl_dentry)) + rpc_rmdir(clnt->cl_dentry); if (clnt->cl_xprt) { xprt_destroy(clnt->cl_xprt); clnt->cl_xprt = NULL; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 9144f2767b66..9c355e1ae61a 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -684,28 +684,20 @@ err_dput: } int -rpc_rmdir(char *path) +rpc_rmdir(struct dentry *dentry) { - struct nameidata nd; - struct dentry *dentry; + struct dentry *parent; struct inode *dir; int error; - if ((error = rpc_lookup_parent(path, &nd)) != 0) - return error; - dir = nd.dentry->d_inode; + parent = dget_parent(dentry); + dir = parent->d_inode; mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); - dentry = lookup_one_len(nd.last.name, nd.dentry, nd.last.len); - if (IS_ERR(dentry)) { - error = PTR_ERR(dentry); - goto out_release; - } rpc_depopulate(dentry); error = __rpc_rmdir(dir, dentry); dput(dentry); -out_release: mutex_unlock(&dir->i_mutex); - rpc_release_path(&nd); + dput(parent); return error; } -- cgit v1.2.1 From 68adb0af51ebccb72ffb14d49cb8121b1afc4259 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 10 Aug 2006 17:51:46 -0400 Subject: SUNRPC: rpc_unlink() must check for unhashed dentries A prior call to rpc_depopulate() by rpc_rmdir() on the parent directory may have already called simple_unlink() on this entry. Add the same check to rpc_rmdir(). Also remove a redundant call to rpc_close_pipes() in rpc_rmdir. Signed-off-by: Trond Myklebust (cherry picked from 0bbfb9d20f6437c4031aa3bf9b4d311a053e58e3 commit) --- net/sunrpc/rpc_pipe.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 9c355e1ae61a..0b1a1ac8a4bc 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -539,6 +539,7 @@ repeat: rpc_close_pipes(dentry->d_inode); simple_unlink(dir, dentry); } + inode_dir_notify(dir, DN_DELETE); dput(dentry); } while (n); goto repeat; @@ -610,8 +611,8 @@ __rpc_rmdir(struct inode *dir, struct dentry *dentry) int error; shrink_dcache_parent(dentry); - if (dentry->d_inode) - rpc_close_pipes(dentry->d_inode); + if (d_unhashed(dentry)) + return 0; if ((error = simple_rmdir(dir, dentry)) != 0) return error; if (!error) { @@ -747,13 +748,15 @@ rpc_unlink(struct dentry *dentry) parent = dget_parent(dentry); dir = parent->d_inode; mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); - d_drop(dentry); - if (dentry->d_inode) { - rpc_close_pipes(dentry->d_inode); - error = simple_unlink(dir, dentry); + if (!d_unhashed(dentry)) { + d_drop(dentry); + if (dentry->d_inode) { + rpc_close_pipes(dentry->d_inode); + error = simple_unlink(dir, dentry); + } + inode_dir_notify(dir, DN_DELETE); } dput(dentry); - inode_dir_notify(dir, DN_DELETE); mutex_unlock(&dir->i_mutex); dput(parent); return error; -- cgit v1.2.1 From 8f8e7a50f450fcb86a5b2ffb94543c57a14f8260 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 14 Aug 2006 13:11:15 -0400 Subject: SUNRPC: Fix dentry refcounting issues with users of rpc_pipefs rpc_unlink() and rpc_rmdir() will dput the dentry reference for you. Signed-off-by: Trond Myklebust (cherry picked from a05a57effa71a1f67ccbfc52335c10c8b85f3f6a commit) --- net/sunrpc/auth_gss/auth_gss.c | 1 - net/sunrpc/clnt.c | 15 ++++++--------- 2 files changed, 6 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index beaa7b848246..ef1cf5b476c8 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -719,7 +719,6 @@ gss_destroy(struct rpc_auth *auth) gss_auth = container_of(auth, struct gss_auth, rpc_auth); rpc_unlink(gss_auth->dentry); - dput(gss_auth->dentry); gss_auth->dentry = NULL; gss_mech_put(gss_auth->mech); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index d307556872db..d9eac7069101 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -184,7 +184,6 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname, out_no_auth: if (!IS_ERR(clnt->cl_dentry)) { rpc_rmdir(clnt->cl_dentry); - dput(clnt->cl_dentry); rpc_put_mount(); } out_no_path: @@ -251,10 +250,8 @@ rpc_clone_client(struct rpc_clnt *clnt) new->cl_autobind = 0; new->cl_oneshot = 0; new->cl_dead = 0; - if (!IS_ERR(new->cl_dentry)) { + if (!IS_ERR(new->cl_dentry)) dget(new->cl_dentry); - rpc_get_mount(); - } rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval); if (new->cl_auth) atomic_inc(&new->cl_auth->au_count); @@ -317,11 +314,15 @@ rpc_destroy_client(struct rpc_clnt *clnt) clnt->cl_auth = NULL; } if (clnt->cl_parent != clnt) { + if (!IS_ERR(clnt->cl_dentry)) + dput(clnt->cl_dentry); rpc_destroy_client(clnt->cl_parent); goto out_free; } - if (!IS_ERR(clnt->cl_dentry)) + if (!IS_ERR(clnt->cl_dentry)) { rpc_rmdir(clnt->cl_dentry); + rpc_put_mount(); + } if (clnt->cl_xprt) { xprt_destroy(clnt->cl_xprt); clnt->cl_xprt = NULL; @@ -331,10 +332,6 @@ rpc_destroy_client(struct rpc_clnt *clnt) out_free: rpc_free_iostats(clnt->cl_metrics); clnt->cl_metrics = NULL; - if (!IS_ERR(clnt->cl_dentry)) { - dput(clnt->cl_dentry); - rpc_put_mount(); - } kfree(clnt); return 0; } -- cgit v1.2.1 From e8896495bca8490a427409e0886d63d05419ec65 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 24 Aug 2006 15:44:19 -0400 Subject: NFS: Check lengths more thoroughly in NFS4 readdir XDR decode Check the bounds of length specifiers more thoroughly in the XDR decoding of NFS4 readdir reply data. Currently, if the server returns a bitmap or attr length that causes the current decode point pointer to wrap, this could go undetected (consider a small "negative" length on a 32-bit machine). Also add a check into the main XDR decode handler to make sure that the amount of data is a multiple of four bytes (as specified by RFC-1014). This makes sure that we can do u32* pointer subtraction in the NFS client without risking an undefined result (the result is undefined if the pointers are not correctly aligned with respect to one another). Signed-Off-By: David Howells Signed-off-by: Trond Myklebust (cherry picked from 5861fddd64a7eaf7e8b1a9997455a24e7f688092 commit) --- net/sunrpc/clnt.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index d9eac7069101..3e19d321067a 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1181,6 +1181,17 @@ call_verify(struct rpc_task *task) u32 *p = iov->iov_base, n; int error = -EACCES; + if ((task->tk_rqstp->rq_rcv_buf.len & 3) != 0) { + /* RFC-1014 says that the representation of XDR data must be a + * multiple of four bytes + * - if it isn't pointer subtraction in the NFS client may give + * undefined results + */ + printk(KERN_WARNING + "call_verify: XDR representation not a multiple of" + " 4 bytes: 0x%x\n", task->tk_rqstp->rq_rcv_buf.len); + goto out_eio; + } if ((len -= 3) < 0) goto out_overflow; p += 1; /* skip XID */ -- cgit v1.2.1 From 59eed279c5daa88d95e429782ddb8ef87e52c44b Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 25 Aug 2006 15:55:43 -0700 Subject: [IPV6]: Segmentation offload not set correctly on TCP children TCP over IPV6 would incorrectly inherit the GSO settings. This would cause kernel to send Tcp Segmentation Offload packets for IPV6 data to devices that can't handle it. It caused the sky2 driver to lock http://bugzilla.kernel.org/show_bug.cgi?id=7050 and the e1000 would generate bogus packets. I can't blame the hardware for gagging if the upper layers feed it garbage. This was a new bug in 2.6.18 introduced with GSO support. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b843a650be71..802a1a6b1037 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -944,7 +944,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, * comment in that function for the gory details. -acme */ - sk->sk_gso_type = SKB_GSO_TCPV6; + newsk->sk_gso_type = SKB_GSO_TCPV6; __ip6_dst_store(newsk, dst, NULL); newtcp6sk = (struct tcp6_sock *)newsk; -- cgit v1.2.1 From f3166c07175c1639687288006aeabed363a921f3 Mon Sep 17 00:00:00 2001 From: Ian McDonald Date: Sat, 26 Aug 2006 19:01:03 -0700 Subject: [DCCP]: Fix typo This fixes a small typo in net/dccp/libs/packet_history.c Signed off by: Ian McDonald Signed-off-by: David S. Miller --- net/dccp/ccids/lib/packet_history.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index ad98d6a322eb..6739be1681c9 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -1,5 +1,5 @@ /* - * net/dccp/packet_history.h + * net/dccp/packet_history.c * * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. * -- cgit v1.2.1 From e6bccd357343e98db9e1fd0d487f4f924e1a7921 Mon Sep 17 00:00:00 2001 From: Ian McDonald Date: Sat, 26 Aug 2006 19:01:30 -0700 Subject: [DCCP]: Update contact details and copyright Just updating copyright and contacts Signed off by: Ian McDonald Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 4 ++-- net/dccp/ccids/ccid3.h | 4 ++-- net/dccp/ccids/lib/loss_interval.c | 2 +- net/dccp/ccids/lib/loss_interval.h | 2 +- net/dccp/ccids/lib/packet_history.c | 6 +++--- net/dccp/ccids/lib/packet_history.h | 4 ++-- net/dccp/ccids/lib/tfrc.h | 2 +- net/dccp/ccids/lib/tfrc_equation.c | 2 +- net/dccp/dccp.h | 2 +- net/dccp/options.c | 2 +- 10 files changed, 15 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index c39bff706cfc..0f85970ee6d1 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -2,7 +2,7 @@ * net/dccp/ccids/ccid3.c * * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. - * Copyright (c) 2005-6 Ian McDonald + * Copyright (c) 2005-6 Ian McDonald * * An implementation of the DCCP protocol * @@ -1230,7 +1230,7 @@ static __exit void ccid3_module_exit(void) } module_exit(ccid3_module_exit); -MODULE_AUTHOR("Ian McDonald , " +MODULE_AUTHOR("Ian McDonald , " "Arnaldo Carvalho de Melo "); MODULE_DESCRIPTION("DCCP TFRC CCID3 CCID"); MODULE_LICENSE("GPL"); diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index 5ade4f668b22..22cb9f80a09d 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -1,13 +1,13 @@ /* * net/dccp/ccids/ccid3.h * - * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. + * Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand. * * An implementation of the DCCP protocol * * This code has been developed by the University of Waikato WAND * research group. For further information please see http://www.wand.net.nz/ - * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz + * or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz * * This code also uses code from Lulea University, rereleased as GPL by its * authors: diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index 5d7b7d864385..b93d9fc98cb8 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -2,7 +2,7 @@ * net/dccp/ccids/lib/loss_interval.c * * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. - * Copyright (c) 2005 Ian McDonald + * Copyright (c) 2005-6 Ian McDonald * Copyright (c) 2005 Arnaldo Carvalho de Melo * * This program is free software; you can redistribute it and/or modify diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h index 43bf78269d1d..dcb370a53f57 100644 --- a/net/dccp/ccids/lib/loss_interval.h +++ b/net/dccp/ccids/lib/loss_interval.h @@ -4,7 +4,7 @@ * net/dccp/ccids/lib/loss_interval.h * * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. - * Copyright (c) 2005 Ian McDonald + * Copyright (c) 2005 Ian McDonald * Copyright (c) 2005 Arnaldo Carvalho de Melo * * This program is free software; you can redistribute it and/or modify it diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index 6739be1681c9..7b6b03e9271a 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -1,13 +1,13 @@ /* * net/dccp/packet_history.c * - * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. + * Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand. * * An implementation of the DCCP protocol * * This code has been developed by the University of Waikato WAND * research group. For further information please see http://www.wand.net.nz/ - * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz + * or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz * * This code also uses code from Lulea University, rereleased as GPL by its * authors: @@ -391,7 +391,7 @@ void dccp_tx_hist_purge(struct dccp_tx_hist *hist, struct list_head *list) EXPORT_SYMBOL_GPL(dccp_tx_hist_purge); -MODULE_AUTHOR("Ian McDonald , " +MODULE_AUTHOR("Ian McDonald , " "Arnaldo Carvalho de Melo "); MODULE_DESCRIPTION("DCCP TFRC library"); MODULE_LICENSE("GPL"); diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index 673c209e4e85..27c43092636c 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -1,13 +1,13 @@ /* * net/dccp/packet_history.h * - * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. + * Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand. * * An implementation of the DCCP protocol * * This code has been developed by the University of Waikato WAND * research group. For further information please see http://www.wand.net.nz/ - * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz + * or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz * * This code also uses code from Lulea University, rereleased as GPL by its * authors: diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h index 130c4c40cfe3..45f30f59ea2a 100644 --- a/net/dccp/ccids/lib/tfrc.h +++ b/net/dccp/ccids/lib/tfrc.h @@ -4,7 +4,7 @@ * net/dccp/ccids/lib/tfrc.h * * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. - * Copyright (c) 2005 Ian McDonald + * Copyright (c) 2005 Ian McDonald * Copyright (c) 2005 Arnaldo Carvalho de Melo * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon * diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c index 4fd2ebebf5a0..44076e0c6591 100644 --- a/net/dccp/ccids/lib/tfrc_equation.c +++ b/net/dccp/ccids/lib/tfrc_equation.c @@ -2,7 +2,7 @@ * net/dccp/ccids/lib/tfrc_equation.c * * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. - * Copyright (c) 2005 Ian McDonald + * Copyright (c) 2005 Ian McDonald * Copyright (c) 2005 Arnaldo Carvalho de Melo * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon * diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index d00a2f4ee5dd..b8931d33bec3 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -5,7 +5,7 @@ * * An implementation of the DCCP protocol * Copyright (c) 2005 Arnaldo Carvalho de Melo - * Copyright (c) 2005 Ian McDonald + * Copyright (c) 2005-6 Ian McDonald * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as diff --git a/net/dccp/options.c b/net/dccp/options.c index daf72bb671f0..07a34696ac97 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -4,7 +4,7 @@ * An implementation of the DCCP protocol * Copyright (c) 2005 Aristeu Sergio Rozanski Filho * Copyright (c) 2005 Arnaldo Carvalho de Melo - * Copyright (c) 2005 Ian McDonald + * Copyright (c) 2005 Ian McDonald * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License -- cgit v1.2.1 From 837d107cd101fbf734e9ea2bbb5c7336a329e432 Mon Sep 17 00:00:00 2001 From: Ian McDonald Date: Sat, 26 Aug 2006 19:06:42 -0700 Subject: [DCCP]: Introduces follows48 function This adds a new function to see if two sequence numbers follow each other. Signed off by: Ian McDonald Signed-off-by: David S. Miller --- net/dccp/dccp.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index b8931d33bec3..a5c5475724c0 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -81,6 +81,14 @@ static inline u64 max48(const u64 seq1, const u64 seq2) return after48(seq1, seq2) ? seq1 : seq2; } +/* is seq1 next seqno after seq2 */ +static inline int follows48(const u64 seq1, const u64 seq2) +{ + int diff = (seq1 & 0xFFFF) - (seq2 & 0xFFFF); + + return diff==1; +} + enum { DCCP_MIB_NUM = 0, DCCP_MIB_ACTIVEOPENS, /* ActiveOpens */ -- cgit v1.2.1 From 80193aee18bc862e284ba18504f3a3e14706a997 Mon Sep 17 00:00:00 2001 From: Ian McDonald Date: Sat, 26 Aug 2006 19:07:36 -0700 Subject: [DCCP]: Introduce dccp_rx_hist_find_entry This adds a new function dccp_rx_hist_find_entry. Signed off by: Ian McDonald Signed-off-by: David S. Miller --- net/dccp/ccids/lib/packet_history.c | 19 +++++++++++++++++++ net/dccp/ccids/lib/packet_history.h | 2 ++ 2 files changed, 21 insertions(+) (limited to 'net') diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index 7b6b03e9271a..420c60f8604d 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -365,6 +365,25 @@ struct dccp_tx_hist_entry * EXPORT_SYMBOL_GPL(dccp_tx_hist_find_entry); +int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq, + u8 *ccval) +{ + struct dccp_rx_hist_entry *packet = NULL, *entry; + + list_for_each_entry(entry, list, dccphrx_node) + if (entry->dccphrx_seqno == seq) { + packet = entry; + break; + } + + if (packet) + *ccval = packet->dccphrx_ccval; + + return packet != NULL; +} + +EXPORT_SYMBOL_GPL(dccp_rx_hist_find_entry); + void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist, struct list_head *list, struct dccp_tx_hist_entry *packet) diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index 27c43092636c..aea9c5d70910 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -106,6 +106,8 @@ static inline void dccp_tx_hist_entry_delete(struct dccp_tx_hist *hist, extern struct dccp_tx_hist_entry * dccp_tx_hist_find_entry(const struct list_head *list, const u64 seq); +extern int dccp_rx_hist_find_entry(const struct list_head *list, const u64 seq, + u8 *ccval); static inline void dccp_tx_hist_add_entry(struct list_head *list, struct dccp_tx_hist_entry *entry) -- cgit v1.2.1 From 3a13813e6effcfad5910d47b15b724621b50b878 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sat, 26 Aug 2006 20:28:30 -0700 Subject: [BRIDGE] netfilter: memory corruption fix The bridge-netfilter code will overwrite memory if there is not headroom in the skb to save the header. This first showed up when using Xen with sky2 driver that doesn't allocate the extra space. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_forward.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 6ccd32b30809..864fbbc7b24d 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -40,11 +40,15 @@ int br_dev_queue_push_xmit(struct sk_buff *skb) else { #ifdef CONFIG_BRIDGE_NETFILTER /* ip_refrag calls ip_fragment, doesn't copy the MAC header. */ - nf_bridge_maybe_copy_header(skb); + if (nf_bridge_maybe_copy_header(skb)) + kfree_skb(skb); + else #endif - skb_push(skb, ETH_HLEN); + { + skb_push(skb, ETH_HLEN); - dev_queue_xmit(skb); + dev_queue_xmit(skb); + } } return 0; -- cgit v1.2.1 From 66a377c5041e1e399633153c8b500d457281e7c1 Mon Sep 17 00:00:00 2001 From: Ian McDonald Date: Sat, 26 Aug 2006 23:40:50 -0700 Subject: [DCCP]: Fix CCID3 This fixes CCID3 to give much closer performance to RFC4342. CCID3 is meant to alter sending rate based on RTT and loss. The performance was verified against: http://wand.net.nz/~perry/max_download.php For example I tested with netem and had the following parameters: Delayed Acks 1, MSS 256 bytes, RTT 105 ms, packet loss 5%. This gives a theoretical speed of 71.9 Kbits/s. I measured across three runs with this patch set and got 70.1 Kbits/s. Without this patchset the average was 232 Kbits/s which means Linux can't be used for CCID3 research properly. I also tested with netem turned off so box just acting as router with 1.2 msec RTT. The performance with this is the same with or without the patch at around 30 Mbit/s. Signed off by: Ian McDonald Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 149 ++++++++++++++++++++++++++++-------- net/dccp/ccids/ccid3.h | 5 +- net/dccp/ccids/lib/loss_interval.c | 34 ++++---- net/dccp/ccids/lib/loss_interval.h | 7 +- net/dccp/ccids/lib/packet_history.c | 141 +++------------------------------- net/dccp/ccids/lib/packet_history.h | 11 +-- 6 files changed, 154 insertions(+), 193 deletions(-) (limited to 'net') diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 0f85970ee6d1..090bc39e8199 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -342,6 +342,8 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, new_packet->dccphtx_ccval = DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; + timeval_add_usecs(&hctx->ccid3hctx_t_nom, + hctx->ccid3hctx_t_ipi); } out: return rc; @@ -413,7 +415,8 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) case TFRC_SSTATE_NO_FBACK: case TFRC_SSTATE_FBACK: if (len > 0) { - hctx->ccid3hctx_t_nom = now; + timeval_sub_usecs(&hctx->ccid3hctx_t_nom, + hctx->ccid3hctx_t_ipi); ccid3_calc_new_t_ipi(hctx); ccid3_calc_new_delta(hctx); timeval_add_usecs(&hctx->ccid3hctx_t_nom, @@ -757,8 +760,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) } hcrx->ccid3hcrx_tstamp_last_feedback = now; - hcrx->ccid3hcrx_last_counter = packet->dccphrx_ccval; - hcrx->ccid3hcrx_seqno_last_counter = packet->dccphrx_seqno; + hcrx->ccid3hcrx_ccval_last_counter = packet->dccphrx_ccval; hcrx->ccid3hcrx_bytes_recv = 0; /* Convert to multiples of 10us */ @@ -782,7 +784,7 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) return 0; - DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_last_counter; + DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_ccval_last_counter; if (dccp_packet_without_ack(skb)) return 0; @@ -854,6 +856,11 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) interval = 1; } found: + if (!tail) { + LIMIT_NETDEBUG(KERN_WARNING "%s: tail is null\n", + __FUNCTION__); + return ~0; + } rtt = timeval_delta(&tstamp, &tail->dccphrx_tstamp) * 4 / interval; ccid3_pr_debug("%s, sk=%p, approximated RTT to %uus\n", dccp_role(sk), sk, rtt); @@ -864,9 +871,20 @@ found: delta = timeval_delta(&tstamp, &hcrx->ccid3hcrx_tstamp_last_feedback); x_recv = usecs_div(hcrx->ccid3hcrx_bytes_recv, delta); + if (x_recv == 0) + x_recv = hcrx->ccid3hcrx_x_recv; + tmp1 = (u64)x_recv * (u64)rtt; do_div(tmp1,10000000); tmp2 = (u32)tmp1; + + if (!tmp2) { + LIMIT_NETDEBUG(KERN_WARNING "tmp2 = 0 " + "%s: x_recv = %u, rtt =%u\n", + __FUNCTION__, x_recv, rtt); + return ~0; + } + fval = (hcrx->ccid3hcrx_s * 100000) / tmp2; /* do not alter order above or you will get overflow on 32 bit */ p = tfrc_calc_x_reverse_lookup(fval); @@ -882,31 +900,101 @@ found: static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) { struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); + struct dccp_li_hist_entry *next, *head; + u64 seq_temp; - if (seq_loss != DCCP_MAX_SEQNO + 1 && - list_empty(&hcrx->ccid3hcrx_li_hist)) { - struct dccp_li_hist_entry *li_tail; + if (list_empty(&hcrx->ccid3hcrx_li_hist)) { + if (!dccp_li_hist_interval_new(ccid3_li_hist, + &hcrx->ccid3hcrx_li_hist, seq_loss, win_loss)) + return; - li_tail = dccp_li_hist_interval_new(ccid3_li_hist, - &hcrx->ccid3hcrx_li_hist, - seq_loss, win_loss); - if (li_tail == NULL) + next = (struct dccp_li_hist_entry *) + hcrx->ccid3hcrx_li_hist.next; + next->dccplih_interval = ccid3_hc_rx_calc_first_li(sk); + } else { + struct dccp_li_hist_entry *entry; + struct list_head *tail; + + head = (struct dccp_li_hist_entry *) + hcrx->ccid3hcrx_li_hist.next; + /* FIXME win count check removed as was wrong */ + /* should make this check with receive history */ + /* and compare there as per section 10.2 of RFC4342 */ + + /* new loss event detected */ + /* calculate last interval length */ + seq_temp = dccp_delta_seqno(head->dccplih_seqno, seq_loss); + entry = dccp_li_hist_entry_new(ccid3_li_hist, SLAB_ATOMIC); + + if (entry == NULL) { + printk(KERN_CRIT "%s: out of memory\n",__FUNCTION__); + dump_stack(); return; - li_tail->dccplih_interval = ccid3_hc_rx_calc_first_li(sk); - } else - LIMIT_NETDEBUG(KERN_WARNING "%s: FIXME: find end of " - "interval\n", __FUNCTION__); + } + + list_add(&entry->dccplih_node, &hcrx->ccid3hcrx_li_hist); + + tail = hcrx->ccid3hcrx_li_hist.prev; + list_del(tail); + kmem_cache_free(ccid3_li_hist->dccplih_slab, tail); + + /* Create the newest interval */ + entry->dccplih_seqno = seq_loss; + entry->dccplih_interval = seq_temp; + entry->dccplih_win_count = win_loss; + } } -static void ccid3_hc_rx_detect_loss(struct sock *sk) +static int ccid3_hc_rx_detect_loss(struct sock *sk, + struct dccp_rx_hist_entry *packet) { struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); - u8 win_loss; - const u64 seq_loss = dccp_rx_hist_detect_loss(&hcrx->ccid3hcrx_hist, - &hcrx->ccid3hcrx_li_hist, - &win_loss); + struct dccp_rx_hist_entry *rx_hist = dccp_rx_hist_head(&hcrx->ccid3hcrx_hist); + u64 seqno = packet->dccphrx_seqno; + u64 tmp_seqno; + int loss = 0; + u8 ccval; + + + tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss; + + if (!rx_hist || + follows48(packet->dccphrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) { + hcrx->ccid3hcrx_seqno_nonloss = seqno; + hcrx->ccid3hcrx_ccval_nonloss = packet->dccphrx_ccval; + goto detect_out; + } + - ccid3_hc_rx_update_li(sk, seq_loss, win_loss); + while (dccp_delta_seqno(hcrx->ccid3hcrx_seqno_nonloss, seqno) + > TFRC_RECV_NUM_LATE_LOSS) { + loss = 1; + ccid3_hc_rx_update_li(sk, hcrx->ccid3hcrx_seqno_nonloss, + hcrx->ccid3hcrx_ccval_nonloss); + tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss; + dccp_inc_seqno(&tmp_seqno); + hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno; + dccp_inc_seqno(&tmp_seqno); + while (dccp_rx_hist_find_entry(&hcrx->ccid3hcrx_hist, + tmp_seqno, &ccval)) { + hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno; + hcrx->ccid3hcrx_ccval_nonloss = ccval; + dccp_inc_seqno(&tmp_seqno); + } + } + + /* FIXME - this code could be simplified with above while */ + /* but works at moment */ + if (follows48(packet->dccphrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) { + hcrx->ccid3hcrx_seqno_nonloss = seqno; + hcrx->ccid3hcrx_ccval_nonloss = packet->dccphrx_ccval; + } + +detect_out: + dccp_rx_hist_add_packet(ccid3_rx_hist, &hcrx->ccid3hcrx_hist, + &hcrx->ccid3hcrx_li_hist, packet, + hcrx->ccid3hcrx_seqno_nonloss); + return loss; } static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) @@ -916,8 +1004,8 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) struct dccp_rx_hist_entry *packet; struct timeval now; u8 win_count; - u32 p_prev, r_sample, t_elapsed; - int ins; + u32 p_prev, rtt_prev, r_sample, t_elapsed; + int loss; BUG_ON(hcrx == NULL || !(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA || @@ -932,7 +1020,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) case DCCP_PKT_DATAACK: if (opt_recv->dccpor_timestamp_echo == 0) break; - p_prev = hcrx->ccid3hcrx_rtt; + rtt_prev = hcrx->ccid3hcrx_rtt; dccp_timestamp(sk, &now); timeval_sub_usecs(&now, opt_recv->dccpor_timestamp_echo * 10); r_sample = timeval_usecs(&now); @@ -951,8 +1039,8 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) hcrx->ccid3hcrx_rtt = (hcrx->ccid3hcrx_rtt * 9) / 10 + r_sample / 10; - if (p_prev != hcrx->ccid3hcrx_rtt) - ccid3_pr_debug("%s, New RTT=%luus, elapsed time=%u\n", + if (rtt_prev != hcrx->ccid3hcrx_rtt) + ccid3_pr_debug("%s, New RTT=%uus, elapsed time=%u\n", dccp_role(sk), hcrx->ccid3hcrx_rtt, opt_recv->dccpor_elapsed_time); break; @@ -973,8 +1061,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) win_count = packet->dccphrx_ccval; - ins = dccp_rx_hist_add_packet(ccid3_rx_hist, &hcrx->ccid3hcrx_hist, - &hcrx->ccid3hcrx_li_hist, packet); + loss = ccid3_hc_rx_detect_loss(sk, packet); if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK) return; @@ -991,7 +1078,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) case TFRC_RSTATE_DATA: hcrx->ccid3hcrx_bytes_recv += skb->len - dccp_hdr(skb)->dccph_doff * 4; - if (ins != 0) + if (loss) break; dccp_timestamp(sk, &now); @@ -1012,7 +1099,6 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) ccid3_pr_debug("%s, sk=%p(%s), data loss! Reacting...\n", dccp_role(sk), sk, dccp_state_name(sk->sk_state)); - ccid3_hc_rx_detect_loss(sk); p_prev = hcrx->ccid3hcrx_p; /* Calculate loss event rate */ @@ -1022,6 +1108,9 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) /* Scaling up by 1000000 as fixed decimal */ if (i_mean != 0) hcrx->ccid3hcrx_p = 1000000 / i_mean; + } else { + printk(KERN_CRIT "%s: empty loss hist\n",__FUNCTION__); + dump_stack(); } if (hcrx->ccid3hcrx_p > p_prev) { diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index 22cb9f80a09d..0a2cb7536d26 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -120,9 +120,10 @@ struct ccid3_hc_rx_sock { #define ccid3hcrx_x_recv ccid3hcrx_tfrc.tfrcrx_x_recv #define ccid3hcrx_rtt ccid3hcrx_tfrc.tfrcrx_rtt #define ccid3hcrx_p ccid3hcrx_tfrc.tfrcrx_p - u64 ccid3hcrx_seqno_last_counter:48, + u64 ccid3hcrx_seqno_nonloss:48, + ccid3hcrx_ccval_nonloss:4, ccid3hcrx_state:8, - ccid3hcrx_last_counter:4; + ccid3hcrx_ccval_last_counter:4; u32 ccid3hcrx_bytes_recv; struct timeval ccid3hcrx_tstamp_last_feedback; struct timeval ccid3hcrx_tstamp_last_ack; diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index b93d9fc98cb8..906c81ab9d4f 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -12,6 +12,7 @@ */ #include +#include #include "loss_interval.h" @@ -90,13 +91,13 @@ u32 dccp_li_hist_calc_i_mean(struct list_head *list) u32 w_tot = 0; list_for_each_entry_safe(li_entry, li_next, list, dccplih_node) { - if (i < DCCP_LI_HIST_IVAL_F_LENGTH) { + if (li_entry->dccplih_interval != ~0) { i_tot0 += li_entry->dccplih_interval * dccp_li_hist_w[i]; w_tot += dccp_li_hist_w[i]; + if (i != 0) + i_tot1 += li_entry->dccplih_interval * dccp_li_hist_w[i - 1]; } - if (i != 0) - i_tot1 += li_entry->dccplih_interval * dccp_li_hist_w[i - 1]; if (++i > DCCP_LI_HIST_IVAL_F_LENGTH) break; @@ -107,37 +108,36 @@ u32 dccp_li_hist_calc_i_mean(struct list_head *list) i_tot = max(i_tot0, i_tot1); - /* FIXME: Why do we do this? -Ian McDonald */ - if (i_tot * 4 < w_tot) - i_tot = w_tot * 4; + if (!w_tot) { + LIMIT_NETDEBUG(KERN_WARNING "%s: w_tot = 0\n", __FUNCTION__); + return 1; + } - return i_tot * 4 / w_tot; + return i_tot / w_tot; } EXPORT_SYMBOL_GPL(dccp_li_hist_calc_i_mean); -struct dccp_li_hist_entry *dccp_li_hist_interval_new(struct dccp_li_hist *hist, - struct list_head *list, - const u64 seq_loss, - const u8 win_loss) +int dccp_li_hist_interval_new(struct dccp_li_hist *hist, + struct list_head *list, const u64 seq_loss, const u8 win_loss) { - struct dccp_li_hist_entry *tail = NULL, *entry; + struct dccp_li_hist_entry *entry; int i; - for (i = 0; i <= DCCP_LI_HIST_IVAL_F_LENGTH; ++i) { + for (i = 0; i < DCCP_LI_HIST_IVAL_F_LENGTH; i++) { entry = dccp_li_hist_entry_new(hist, SLAB_ATOMIC); if (entry == NULL) { dccp_li_hist_purge(hist, list); - return NULL; + dump_stack(); + return 0; } - if (tail == NULL) - tail = entry; + entry->dccplih_interval = ~0; list_add(&entry->dccplih_node, list); } entry->dccplih_seqno = seq_loss; entry->dccplih_win_count = win_loss; - return tail; + return 1; } EXPORT_SYMBOL_GPL(dccp_li_hist_interval_new); diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h index dcb370a53f57..0ae85f0340b2 100644 --- a/net/dccp/ccids/lib/loss_interval.h +++ b/net/dccp/ccids/lib/loss_interval.h @@ -52,9 +52,6 @@ extern void dccp_li_hist_purge(struct dccp_li_hist *hist, extern u32 dccp_li_hist_calc_i_mean(struct list_head *list); -extern struct dccp_li_hist_entry * - dccp_li_hist_interval_new(struct dccp_li_hist *hist, - struct list_head *list, - const u64 seq_loss, - const u8 win_loss); +extern int dccp_li_hist_interval_new(struct dccp_li_hist *hist, + struct list_head *list, const u64 seq_loss, const u8 win_loss); #endif /* _DCCP_LI_HIST_ */ diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index 420c60f8604d..b876c9c81c65 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -112,64 +112,27 @@ struct dccp_rx_hist_entry * EXPORT_SYMBOL_GPL(dccp_rx_hist_find_data_packet); -int dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, +void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, struct list_head *rx_list, struct list_head *li_list, - struct dccp_rx_hist_entry *packet) + struct dccp_rx_hist_entry *packet, + u64 nonloss_seqno) { - struct dccp_rx_hist_entry *entry, *next, *iter; + struct dccp_rx_hist_entry *entry, *next; u8 num_later = 0; - iter = dccp_rx_hist_head(rx_list); - if (iter == NULL) - dccp_rx_hist_add_entry(rx_list, packet); - else { - const u64 seqno = packet->dccphrx_seqno; - - if (after48(seqno, iter->dccphrx_seqno)) - dccp_rx_hist_add_entry(rx_list, packet); - else { - if (dccp_rx_hist_entry_data_packet(iter)) - num_later = 1; - - list_for_each_entry_continue(iter, rx_list, - dccphrx_node) { - if (after48(seqno, iter->dccphrx_seqno)) { - dccp_rx_hist_add_entry(&iter->dccphrx_node, - packet); - goto trim_history; - } - - if (dccp_rx_hist_entry_data_packet(iter)) - num_later++; - - if (num_later == TFRC_RECV_NUM_LATE_LOSS) { - dccp_rx_hist_entry_delete(hist, packet); - return 1; - } - } - - if (num_later < TFRC_RECV_NUM_LATE_LOSS) - dccp_rx_hist_add_entry(rx_list, packet); - /* - * FIXME: else what? should we destroy the packet - * like above? - */ - } - } + list_add(&packet->dccphrx_node, rx_list); -trim_history: - /* - * Trim history (remove all packets after the NUM_LATE_LOSS + 1 - * data packets) - */ num_later = TFRC_RECV_NUM_LATE_LOSS + 1; if (!list_empty(li_list)) { list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) { if (num_later == 0) { - list_del_init(&entry->dccphrx_node); - dccp_rx_hist_entry_delete(hist, entry); + if (after48(nonloss_seqno, + entry->dccphrx_seqno)) { + list_del_init(&entry->dccphrx_node); + dccp_rx_hist_entry_delete(hist, entry); + } } else if (dccp_rx_hist_entry_data_packet(entry)) --num_later; } @@ -217,94 +180,10 @@ trim_history: --num_later; } } - - return 0; } EXPORT_SYMBOL_GPL(dccp_rx_hist_add_packet); -u64 dccp_rx_hist_detect_loss(struct list_head *rx_list, - struct list_head *li_list, u8 *win_loss) -{ - struct dccp_rx_hist_entry *entry, *next, *packet; - struct dccp_rx_hist_entry *a_loss = NULL; - struct dccp_rx_hist_entry *b_loss = NULL; - u64 seq_loss = DCCP_MAX_SEQNO + 1; - u8 num_later = TFRC_RECV_NUM_LATE_LOSS; - - list_for_each_entry_safe(entry, next, rx_list, dccphrx_node) { - if (num_later == 0) { - b_loss = entry; - break; - } else if (dccp_rx_hist_entry_data_packet(entry)) - --num_later; - } - - if (b_loss == NULL) - goto out; - - num_later = 1; - list_for_each_entry_safe_continue(entry, next, rx_list, dccphrx_node) { - if (num_later == 0) { - a_loss = entry; - break; - } else if (dccp_rx_hist_entry_data_packet(entry)) - --num_later; - } - - if (a_loss == NULL) { - if (list_empty(li_list)) { - /* no loss event have occured yet */ - LIMIT_NETDEBUG("%s: TODO: find a lost data packet by " - "comparing to initial seqno\n", - __FUNCTION__); - goto out; - } else { - LIMIT_NETDEBUG("%s: Less than 4 data pkts in history!", - __FUNCTION__); - goto out; - } - } - - /* Locate a lost data packet */ - entry = packet = b_loss; - list_for_each_entry_safe_continue(entry, next, rx_list, dccphrx_node) { - u64 delta = dccp_delta_seqno(entry->dccphrx_seqno, - packet->dccphrx_seqno); - - if (delta != 0) { - if (dccp_rx_hist_entry_data_packet(packet)) - --delta; - /* - * FIXME: check this, probably this % usage is because - * in earlier drafts the ndp count was just 8 bits - * long, but now it cam be up to 24 bits long. - */ -#if 0 - if (delta % DCCP_NDP_LIMIT != - (packet->dccphrx_ndp - - entry->dccphrx_ndp) % DCCP_NDP_LIMIT) -#endif - if (delta != packet->dccphrx_ndp - entry->dccphrx_ndp) { - seq_loss = entry->dccphrx_seqno; - dccp_inc_seqno(&seq_loss); - } - } - packet = entry; - if (packet == a_loss) - break; - } -out: - if (seq_loss != DCCP_MAX_SEQNO + 1) - *win_loss = a_loss->dccphrx_ccval; - else - *win_loss = 0; /* Paranoia */ - - return seq_loss; -} - -EXPORT_SYMBOL_GPL(dccp_rx_hist_detect_loss); - struct dccp_tx_hist *dccp_tx_hist_new(const char *name) { struct dccp_tx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index aea9c5d70910..067cf1c85a37 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -166,12 +166,6 @@ static inline void dccp_rx_hist_entry_delete(struct dccp_rx_hist *hist, extern void dccp_rx_hist_purge(struct dccp_rx_hist *hist, struct list_head *list); -static inline void dccp_rx_hist_add_entry(struct list_head *list, - struct dccp_rx_hist_entry *entry) -{ - list_add(&entry->dccphrx_node, list); -} - static inline struct dccp_rx_hist_entry * dccp_rx_hist_head(struct list_head *list) { @@ -190,10 +184,11 @@ static inline int entry->dccphrx_type == DCCP_PKT_DATAACK; } -extern int dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, +extern void dccp_rx_hist_add_packet(struct dccp_rx_hist *hist, struct list_head *rx_list, struct list_head *li_list, - struct dccp_rx_hist_entry *packet); + struct dccp_rx_hist_entry *packet, + u64 nonloss_seqno); extern u64 dccp_rx_hist_detect_loss(struct list_head *rx_list, struct list_head *li_list, u8 *win_loss); -- cgit v1.2.1