From 9b8de7479d0dbab1ed98b5b015d44232c9d3d08e Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 21 Apr 2009 23:00:24 +0100 Subject: FRV: Fix the section attribute on UP DECLARE_PER_CPU() In non-SMP mode, the variable section attribute specified by DECLARE_PER_CPU() does not agree with that specified by DEFINE_PER_CPU(). This means that architectures that have a small data section references relative to a base register may throw up linkage errors due to too great a displacement between where the base register points and the per-CPU variable. On FRV, the .h declaration says that the variable is in the .sdata section, but the .c definition says it's actually in the .data section. The linker throws up the following errors: kernel/built-in.o: In function `release_task': kernel/exit.c:78: relocation truncated to fit: R_FRV_GPREL12 against symbol `per_cpu__process_counts' defined in .data section in kernel/built-in.o kernel/exit.c:78: relocation truncated to fit: R_FRV_GPREL12 against symbol `per_cpu__process_counts' defined in .data section in kernel/built-in.o To fix this, DECLARE_PER_CPU() should simply apply the same section attribute as does DEFINE_PER_CPU(). However, this is made slightly more complex by virtue of the fact that there are several variants on DEFINE, so these need to be matched by variants on DECLARE. Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- net/rds/rds.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/rds/rds.h b/net/rds/rds.h index 619f0a30a4e5..71794449ca4e 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -638,7 +638,7 @@ struct rds_message *rds_send_get_message(struct rds_connection *, void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force); /* stats.c */ -DECLARE_PER_CPU(struct rds_statistics, rds_stats); +DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats); #define rds_stats_inc_which(which, member) do { \ per_cpu(which, get_cpu()).member++; \ put_cpu(); \ -- cgit v1.2.3 From f75e6745aa3084124ae1434fd7629853bdaf6798 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 21 Apr 2009 17:18:20 -0400 Subject: SUNRPC: Fix the problem of EADDRNOTAVAIL syslog floods on reconnect See http://bugzilla.kernel.org/show_bug.cgi?id=13034 If the port gets into a TIME_WAIT state, then we cannot reconnect without binding to a new port. Tested-by: Petr Vandrovec Tested-by: Jean Delvare Signed-off-by: Trond Myklebust Signed-off-by: Linus Torvalds --- include/linux/sunrpc/xprt.h | 1 + net/sunrpc/xprt.c | 6 ++---- net/sunrpc/xprtsock.c | 26 +++++++++++++++++++++----- 3 files changed, 24 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 1758d9f5b5c3..08afe43118f4 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -261,6 +261,7 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie); #define XPRT_BINDING (5) #define XPRT_CLOSING (6) #define XPRT_CONNECTION_ABORT (7) +#define XPRT_CONNECTION_CLOSE (8) static inline void xprt_set_connected(struct rpc_xprt *xprt) { diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index a0bfe53f1621..06ca058572f2 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -672,10 +672,8 @@ xprt_init_autodisconnect(unsigned long data) if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) goto out_abort; spin_unlock(&xprt->transport_lock); - if (xprt_connecting(xprt)) - xprt_release_write(xprt, NULL); - else - queue_work(rpciod_workqueue, &xprt->task_cleanup); + set_bit(XPRT_CONNECTION_CLOSE, &xprt->state); + queue_work(rpciod_workqueue, &xprt->task_cleanup); return; out_abort: spin_unlock(&xprt->transport_lock); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index d40ff50887aa..e18596146013 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -807,6 +807,9 @@ static void xs_reset_transport(struct sock_xprt *transport) * * This is used when all requests are complete; ie, no DRC state remains * on the server we want to save. + * + * The caller _must_ be holding XPRT_LOCKED in order to avoid issues with + * xs_reset_transport() zeroing the socket from underneath a writer. */ static void xs_close(struct rpc_xprt *xprt) { @@ -824,6 +827,14 @@ static void xs_close(struct rpc_xprt *xprt) xprt_disconnect_done(xprt); } +static void xs_tcp_close(struct rpc_xprt *xprt) +{ + if (test_and_clear_bit(XPRT_CONNECTION_CLOSE, &xprt->state)) + xs_close(xprt); + else + xs_tcp_shutdown(xprt); +} + /** * xs_destroy - prepare to shutdown a transport * @xprt: doomed transport @@ -1772,6 +1783,15 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt, xprt, -status, xprt_connected(xprt), sock->sk->sk_state); switch (status) { + default: + printk("%s: connect returned unhandled error %d\n", + __func__, status); + case -EADDRNOTAVAIL: + /* We're probably in TIME_WAIT. Get rid of existing socket, + * and retry + */ + set_bit(XPRT_CONNECTION_CLOSE, &xprt->state); + xprt_force_disconnect(xprt); case -ECONNREFUSED: case -ECONNRESET: case -ENETUNREACH: @@ -1782,10 +1802,6 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt, xprt_clear_connecting(xprt); return; } - /* get rid of existing socket, and retry */ - xs_tcp_shutdown(xprt); - printk("%s: connect returned unhandled error %d\n", - __func__, status); out_eagain: status = -EAGAIN; out: @@ -1994,7 +2010,7 @@ static struct rpc_xprt_ops xs_tcp_ops = { .buf_free = rpc_free, .send_request = xs_tcp_send_request, .set_retrans_timeout = xprt_set_retrans_timeout_def, - .close = xs_tcp_shutdown, + .close = xs_tcp_close, .destroy = xs_destroy, .print_stats = xs_tcp_print_stats, }; -- cgit v1.2.3 From 453ed90d1395a5281a8f1a0de5d8aabc66202e34 Mon Sep 17 00:00:00 2001 From: Latchesar Ionkov Date: Sun, 5 Apr 2009 16:22:16 -0500 Subject: net/9p: set correct stat size when sending Twstat messages The 9P2000 Twstat message requires the size of the stat structure to be specified. Currently the 9p code writes zero instead of the actual size. This behavior confuses some of the file servers that check if the size is correct. This patch adds a new function that calculcates the stat size and puts the value in the appropriate place in the 9P message. Signed-off-by: Latchesar Ionkov Reviewed-by: Eric Van Hensbergen --- net/9p/client.c | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/9p/client.c b/net/9p/client.c index 1eb580c38fbb..93f442aaa119 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1251,12 +1251,42 @@ error: } EXPORT_SYMBOL(p9_client_stat); +static int p9_client_statsize(struct p9_wstat *wst, int optional) +{ + int ret; + + /* size[2] type[2] dev[4] qid[13] */ + /* mode[4] atime[4] mtime[4] length[8]*/ + /* name[s] uid[s] gid[s] muid[s] */ + ret = 2+2+4+13+4+4+4+8+2+2+2+2; + + if (wst->name) + ret += strlen(wst->name); + if (wst->uid) + ret += strlen(wst->uid); + if (wst->gid) + ret += strlen(wst->gid); + if (wst->muid) + ret += strlen(wst->muid); + + if (optional) { + ret += 2+4+4+4; /* extension[s] n_uid[4] n_gid[4] n_muid[4] */ + if (wst->extension) + ret += strlen(wst->extension); + } + + return ret; +} + int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst) { int err; struct p9_req_t *req; struct p9_client *clnt; + err = 0; + clnt = fid->clnt; + wst->size = p9_client_statsize(wst, clnt->dotu); P9_DPRINTK(P9_DEBUG_9P, ">>> TWSTAT fid %d\n", fid->fid); P9_DPRINTK(P9_DEBUG_9P, " sz=%x type=%x dev=%x qid=%x.%llx.%x\n" @@ -1268,10 +1298,8 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst) wst->atime, wst->mtime, (unsigned long long)wst->length, wst->name, wst->uid, wst->gid, wst->muid, wst->extension, wst->n_uid, wst->n_gid, wst->n_muid); - err = 0; - clnt = fid->clnt; - req = p9_client_rpc(clnt, P9_TWSTAT, "dwS", fid->fid, 0, wst); + req = p9_client_rpc(clnt, P9_TWSTAT, "dwS", fid->fid, wst->size, wst); if (IS_ERR(req)) { err = PTR_ERR(req); goto error; -- cgit v1.2.3 From 742b11a7ec60faa25d76c95c268041ab215c25ad Mon Sep 17 00:00:00 2001 From: Latchesar Ionkov Date: Sun, 5 Apr 2009 16:26:41 -0500 Subject: net/9p: return error when p9_client_stat fails p9_client_stat function doesn't return correct value if it fails. p9_client_stat should return ERR_PTR of the error value when it fails. Instead, it always returns a value to the allocated p9_wstat struct even when it is not populated correctly. This patch makes p9_client_stat to handle failure correctly. Signed-off-by: Latchesar Ionkov Reviewed-by: Eric Van Hensbergen --- net/9p/client.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/9p/client.c b/net/9p/client.c index 93f442aaa119..781d89a952e4 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1244,10 +1244,14 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid) ret->name, ret->uid, ret->gid, ret->muid, ret->extension, ret->n_uid, ret->n_gid, ret->n_muid); + p9_free_req(clnt, req); + return ret; + free_and_error: p9_free_req(clnt, req); error: - return ret; + kfree(ret); + return ERR_PTR(err); } EXPORT_SYMBOL(p9_client_stat); -- cgit v1.2.3 From 1bab88b2310998de18b32529a27ea835d164254a Mon Sep 17 00:00:00 2001 From: Latchesar Ionkov Date: Sun, 5 Apr 2009 16:28:59 -0500 Subject: net/9p: handle correctly interrupted 9P requests Currently the 9p code crashes when a operation is interrupted, i.e. for example when the user presses ^C while reading from a file. This patch fixes the code that is responsible for interruption and flushing of 9P operations. Signed-off-by: Latchesar Ionkov --- include/net/9p/client.h | 1 - net/9p/client.c | 74 +++++++++++++------------------------------------ net/9p/trans_fd.c | 14 ++++++---- net/9p/trans_rdma.c | 1 + net/9p/trans_virtio.c | 1 + 5 files changed, 30 insertions(+), 61 deletions(-) (limited to 'net') diff --git a/include/net/9p/client.h b/include/net/9p/client.h index 4012e07162e5..e26812274b75 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -98,7 +98,6 @@ enum p9_req_status_t { struct p9_req_t { int status; int t_err; - u16 flush_tag; wait_queue_head_t *wq; struct p9_fcall *tc; struct p9_fcall *rc; diff --git a/net/9p/client.c b/net/9p/client.c index 781d89a952e4..dd43a8289b0d 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -203,7 +203,6 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) p9pdu_reset(req->tc); p9pdu_reset(req->rc); - req->flush_tag = 0; req->tc->tag = tag-1; req->status = REQ_STATUS_ALLOC; @@ -324,35 +323,9 @@ static void p9_free_req(struct p9_client *c, struct p9_req_t *r) */ void p9_client_cb(struct p9_client *c, struct p9_req_t *req) { - struct p9_req_t *other_req; - unsigned long flags; - P9_DPRINTK(P9_DEBUG_MUX, " tag %d\n", req->tc->tag); - - if (req->status == REQ_STATUS_ERROR) - wake_up(req->wq); - - if (req->flush_tag) { /* flush receive path */ - P9_DPRINTK(P9_DEBUG_9P, "<<< RFLUSH %d\n", req->tc->tag); - spin_lock_irqsave(&c->lock, flags); - other_req = p9_tag_lookup(c, req->flush_tag); - if (other_req->status != REQ_STATUS_FLSH) /* stale flush */ - spin_unlock_irqrestore(&c->lock, flags); - else { - other_req->status = REQ_STATUS_FLSHD; - spin_unlock_irqrestore(&c->lock, flags); - wake_up(other_req->wq); - } - p9_free_req(c, req); - } else { /* normal receive path */ - P9_DPRINTK(P9_DEBUG_MUX, "normal: tag %d\n", req->tc->tag); - spin_lock_irqsave(&c->lock, flags); - if (req->status != REQ_STATUS_FLSHD) - req->status = REQ_STATUS_RCVD; - spin_unlock_irqrestore(&c->lock, flags); - wake_up(req->wq); - P9_DPRINTK(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag); - } + wake_up(req->wq); + P9_DPRINTK(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag); } EXPORT_SYMBOL(p9_client_cb); @@ -486,9 +459,15 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq) if (IS_ERR(req)) return PTR_ERR(req); - req->flush_tag = oldtag; - /* we don't free anything here because RPC isn't complete */ + /* if we haven't received a response for oldreq, + remove it from the list. */ + spin_lock(&c->lock); + if (oldreq->status == REQ_STATUS_FLSH) + list_del(&oldreq->req_list); + spin_unlock(&c->lock); + + p9_free_req(c, req); return 0; } @@ -509,7 +488,6 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) struct p9_req_t *req; unsigned long flags; int sigpending; - int flushed = 0; P9_DPRINTK(P9_DEBUG_MUX, "client %p op %d\n", c, type); @@ -546,42 +524,28 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) goto reterr; } - /* if it was a flush we just transmitted, return our tag */ - if (type == P9_TFLUSH) - return req; -again: P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d\n", req->wq, tag); err = wait_event_interruptible(*req->wq, req->status >= REQ_STATUS_RCVD); - P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d returned %d (flushed=%d)\n", - req->wq, tag, err, flushed); + P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d returned %d\n", + req->wq, tag, err); if (req->status == REQ_STATUS_ERROR) { P9_DPRINTK(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); err = req->t_err; - } else if (err == -ERESTARTSYS && flushed) { - P9_DPRINTK(P9_DEBUG_MUX, "flushed - going again\n"); - goto again; - } else if (req->status == REQ_STATUS_FLSHD) { - P9_DPRINTK(P9_DEBUG_MUX, "flushed - erestartsys\n"); - err = -ERESTARTSYS; } - if ((err == -ERESTARTSYS) && (c->status == Connected) && (!flushed)) { + if ((err == -ERESTARTSYS) && (c->status == Connected)) { P9_DPRINTK(P9_DEBUG_MUX, "flushing\n"); - spin_lock_irqsave(&c->lock, flags); - if (req->status == REQ_STATUS_SENT) - req->status = REQ_STATUS_FLSH; - spin_unlock_irqrestore(&c->lock, flags); sigpending = 1; - flushed = 1; clear_thread_flag(TIF_SIGPENDING); - if (c->trans_mod->cancel(c, req)) { - err = p9_client_flush(c, req); - if (err == 0) - goto again; - } + if (c->trans_mod->cancel(c, req)) + p9_client_flush(c, req); + + /* if we received the response anyway, don't signal error */ + if (req->status == REQ_STATUS_RCVD) + err = 0; } if (sigpending) { diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index c613ed08a5ee..a2a1814c7a8d 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -213,8 +213,8 @@ static void p9_conn_cancel(struct p9_conn *m, int err) spin_unlock_irqrestore(&m->client->lock, flags); list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { - list_del(&req->req_list); P9_DPRINTK(P9_DEBUG_ERROR, "call back req %p\n", req); + list_del(&req->req_list); p9_client_cb(m->client, req); } } @@ -336,7 +336,8 @@ static void p9_read_work(struct work_struct *work) "mux %p pkt: size: %d bytes tag: %d\n", m, n, tag); m->req = p9_tag_lookup(m->client, tag); - if (!m->req) { + if (!m->req || (m->req->status != REQ_STATUS_SENT && + m->req->status != REQ_STATUS_FLSH)) { P9_DPRINTK(P9_DEBUG_ERROR, "Unexpected packet tag %d\n", tag); err = -EIO; @@ -361,10 +362,11 @@ static void p9_read_work(struct work_struct *work) if ((m->req) && (m->rpos == m->rsize)) { /* packet is read in */ P9_DPRINTK(P9_DEBUG_TRANS, "got new packet\n"); spin_lock(&m->client->lock); + if (m->req->status != REQ_STATUS_ERROR) + m->req->status = REQ_STATUS_RCVD; list_del(&m->req->req_list); spin_unlock(&m->client->lock); p9_client_cb(m->client, m->req); - m->rbuf = NULL; m->rpos = 0; m->rsize = 0; @@ -454,6 +456,7 @@ static void p9_write_work(struct work_struct *work) req = list_entry(m->unsent_req_list.next, struct p9_req_t, req_list); req->status = REQ_STATUS_SENT; + P9_DPRINTK(P9_DEBUG_TRANS, "move req %p\n", req); list_move_tail(&req->req_list, &m->req_list); m->wbuf = req->tc->sdata; @@ -683,12 +686,13 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req) P9_DPRINTK(P9_DEBUG_TRANS, "client %p req %p\n", client, req); spin_lock(&client->lock); - list_del(&req->req_list); if (req->status == REQ_STATUS_UNSENT) { + list_del(&req->req_list); req->status = REQ_STATUS_FLSHD; ret = 0; - } + } else if (req->status == REQ_STATUS_SENT) + req->status = REQ_STATUS_FLSH; spin_unlock(&client->lock); diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 7fa0eb20b2f6..ac4990041ebb 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -295,6 +295,7 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma, goto err_out; req->rc = c->rc; + req->status = REQ_STATUS_RCVD; p9_client_cb(client, req); return; diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 2d7781ec663b..bb8579a141a8 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -134,6 +134,7 @@ static void req_done(struct virtqueue *vq) P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc); P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag); req = p9_tag_lookup(chan->client, rc->tag); + req->status = REQ_STATUS_RCVD; p9_client_cb(chan->client, req); } } -- cgit v1.2.3 From 49a88d18a1721ac14dbc67cd390db18ee1f3a42f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Apr 2009 17:06:55 +0200 Subject: netfilter: ip6tables regression fix Commit 7845447 (netfilter: iptables: lock free counters) broke ip6_tables by unconditionally returning ENOMEM in alloc_counters(), Reported-by: Graham Murray Signed-off-by: Eric Dumazet Signed-off-by: Patrick McHardy --- net/ipv6/netfilter/ip6_tables.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index dfed176aed37..800ae8542471 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1033,6 +1033,8 @@ static struct xt_counters *alloc_counters(struct xt_table *table) xt_free_table_info(info); + return counters; + free_counters: vfree(counters); nomem: -- cgit v1.2.3 From 3ae16f13027c26cb4c227392116c2027524a6444 Mon Sep 17 00:00:00 2001 From: Alex Riesen Date: Mon, 6 Apr 2009 17:09:43 +0200 Subject: netfilter: fix selection of "LED" target in netfilter It's plural, not LED_TRIGGERS. Signed-off-by: Alex Riesen Signed-off-by: Patrick McHardy --- net/netfilter/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index bb279bf59a1b..2329c5f50551 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -374,7 +374,7 @@ config NETFILTER_XT_TARGET_HL config NETFILTER_XT_TARGET_LED tristate '"LED" target support' - depends on LEDS_CLASS && LED_TRIGGERS + depends on LEDS_CLASS && LEDS_TRIGGERS depends on NETFILTER_ADVANCED help This option adds a `LED' target, which allows you to blink LEDs in -- cgit v1.2.3 From 83731671d9e6878c0a05d309c68fb71c16d3235a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 6 Apr 2009 17:47:20 +0200 Subject: netfilter: ctnetlink: fix regression in expectation handling This patch fixes a regression (introduced by myself in commit 19abb7b: netfilter: ctnetlink: deliver events for conntracks changed from userspace) that results in an expectation re-insertion since __nf_ct_expect_check() may return 0 for expectation timer refreshing. This patch also removes a unnecessary refcount bump that pretended to avoid a possible race condition with event delivery and expectation timers (as said, not needed since we hold a reference to the object since until we finish the expectation setup). This also merges nf_ct_expect_related_report() and nf_ct_expect_related() which look basically the same. Reported-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack_expect.h | 5 ++++- net/netfilter/nf_conntrack_expect.c | 30 ++++++----------------------- 2 files changed, 10 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index ab17a159ac66..a9652806d0df 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -99,9 +99,12 @@ void nf_ct_expect_init(struct nf_conntrack_expect *, unsigned int, u_int8_t, const union nf_inet_addr *, u_int8_t, const __be16 *, const __be16 *); void nf_ct_expect_put(struct nf_conntrack_expect *exp); -int nf_ct_expect_related(struct nf_conntrack_expect *expect); int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, u32 pid, int report); +static inline int nf_ct_expect_related(struct nf_conntrack_expect *expect) +{ + return nf_ct_expect_related_report(expect, 0, 0); +} #endif /*_NF_CONNTRACK_EXPECT_H*/ diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 3940f996a2e4..afde8f991646 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -372,7 +372,7 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) struct net *net = nf_ct_exp_net(expect); struct hlist_node *n; unsigned int h; - int ret = 0; + int ret = 1; if (!master_help->helper) { ret = -ESHUTDOWN; @@ -412,41 +412,23 @@ out: return ret; } -int nf_ct_expect_related(struct nf_conntrack_expect *expect) +int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, + u32 pid, int report) { int ret; spin_lock_bh(&nf_conntrack_lock); ret = __nf_ct_expect_check(expect); - if (ret < 0) + if (ret <= 0) goto out; + ret = 0; nf_ct_expect_insert(expect); - atomic_inc(&expect->use); - spin_unlock_bh(&nf_conntrack_lock); - nf_ct_expect_event(IPEXP_NEW, expect); - nf_ct_expect_put(expect); - return ret; -out: spin_unlock_bh(&nf_conntrack_lock); + nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report); return ret; -} -EXPORT_SYMBOL_GPL(nf_ct_expect_related); - -int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, - u32 pid, int report) -{ - int ret; - - spin_lock_bh(&nf_conntrack_lock); - ret = __nf_ct_expect_check(expect); - if (ret < 0) - goto out; - nf_ct_expect_insert(expect); out: spin_unlock_bh(&nf_conntrack_lock); - if (ret == 0) - nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report); return ret; } EXPORT_SYMBOL_GPL(nf_ct_expect_related_report); -- cgit v1.2.3 From d543103a0c75edc0a7a08dfd796de67466a15dfb Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 8 Apr 2009 13:15:22 +0000 Subject: net: netif_device_attach/detach should start/stop all queues Currently netif_device_attach/detach are only stopping one queue. They should be starting and stopping all the queues on a given device. Signed-off-by: Alexander Duyck Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 91d792d17e09..ea8eb2214b09 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1430,7 +1430,7 @@ void netif_device_detach(struct net_device *dev) { if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) && netif_running(dev)) { - netif_stop_queue(dev); + netif_tx_stop_all_queues(dev); } } EXPORT_SYMBOL(netif_device_detach); @@ -1445,7 +1445,7 @@ void netif_device_attach(struct net_device *dev) { if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) && netif_running(dev)) { - netif_wake_queue(dev); + netif_tx_wake_all_queues(dev); __netdev_watchdog_up(dev); } } -- cgit v1.2.3 From 3384901f1b1af676ccb9d75aa23a568c294c527b Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 8 Apr 2009 21:27:28 +0000 Subject: tr: fix leakage of device in net/802/tr.c Add dev_put() after dev_get_by_index() to avoid leakage of device. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/802/tr.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/802/tr.c b/net/802/tr.c index e7eb13084d71..e874447ad144 100644 --- a/net/802/tr.c +++ b/net/802/tr.c @@ -561,6 +561,9 @@ static int rif_seq_show(struct seq_file *seq, void *v) } seq_putc(seq, '\n'); } + + if (dev) + dev_put(dev); } return 0; } -- cgit v1.2.3 From 499923c7a3254971873e55a1690d07d3700eea47 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 9 Apr 2009 17:37:33 +0000 Subject: ipv6: Fix NULL pointer dereference with time-wait sockets Commit b2f5e7cd3dee2ed721bf0675e1a1ddebb849aee6 (ipv6: Fix conflict resolutions during ipv6 binding) introduced a regression where time-wait sockets were not treated correctly. This resulted in the following: BUG: unable to handle kernel NULL pointer dereference at 0000000000000062 IP: [] ipv4_rcv_saddr_equal+0x61/0x70 ... Call Trace: [] ipv6_rcv_saddr_equal+0x1bb/0x250 [ipv6] [] inet6_csk_bind_conflict+0x88/0xd0 [ipv6] [] inet_csk_get_port+0x1ee/0x400 [] inet6_bind+0x1cf/0x3a0 [ipv6] [] ? sockfd_lookup_light+0x3c/0xd0 [] sys_bind+0x89/0x100 [] ? trace_hardirqs_on_thunk+0x3a/0x3c [] system_call_fastpath+0x16/0x1b Tested-by: Brian Haley Tested-by: Ed Tomlinson Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/net/udp.h | 2 -- net/ipv4/udp.c | 3 +-- net/ipv6/udp.c | 6 +++++- 3 files changed, 6 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/net/udp.h b/include/net/udp.h index 93dbe294d459..90e6ce56be65 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -124,8 +124,6 @@ static inline void udp_lib_close(struct sock *sk, long timeout) sk_common_release(sk); } -extern int ipv4_rcv_saddr_equal(const struct sock *sk1, - const struct sock *sk2); extern int udp_lib_get_port(struct sock *sk, unsigned short snum, int (*)(const struct sock*,const struct sock*)); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index bda08a09357d..7a1d1ce22e66 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -222,7 +222,7 @@ fail: return error; } -int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) +static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) { struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); @@ -1823,7 +1823,6 @@ EXPORT_SYMBOL(udp_lib_getsockopt); EXPORT_SYMBOL(udp_lib_setsockopt); EXPORT_SYMBOL(udp_poll); EXPORT_SYMBOL(udp_lib_get_port); -EXPORT_SYMBOL(ipv4_rcv_saddr_equal); #ifdef CONFIG_PROC_FS EXPORT_SYMBOL(udp_proc_register); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 6842dd2edd5b..8905712cfbb8 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -53,6 +53,8 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) { const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); + __be32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr; + __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2); int sk_ipv6only = ipv6_only_sock(sk); int sk2_ipv6only = inet_v6_ipv6only(sk2); int addr_type = ipv6_addr_type(sk_rcv_saddr6); @@ -60,7 +62,9 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) /* if both are mapped, treat as IPv4 */ if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) - return ipv4_rcv_saddr_equal(sk, sk2); + return (!sk2_ipv6only && + (!sk_rcv_saddr || !sk2_rcv_saddr || + sk_rcv_saddr == sk2_rcv_saddr)); if (addr_type2 == IPV6_ADDR_ANY && !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) -- cgit v1.2.3 From 1db9e29bb0ff3c9366e8a50fb09ca8dbc364bfd6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 13 Apr 2009 04:41:01 +0000 Subject: gro: Normalise skb before bypassing GRO on netpoll VLAN path Hi: gro: Normalise skb before bypassing GRO on netpoll VLAN path When we detect netpoll RX on the GRO VLAN path we bail out and call the normal VLAN receive handler. However, the packet needs to be normalised by calling eth_type_trans since that's what the normal path expects (normally the GRO path does the fixup). This patch adds the necessary call to vlan_gro_frags. Signed-off-by: Herbert Xu Thanks, Signed-off-by: David S. Miller --- net/8021q/vlan_core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 654e45f5719d..c67fe6f75653 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -121,8 +121,10 @@ int vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp, if (!skb) return NET_RX_DROP; - if (netpoll_rx_on(skb)) + if (netpoll_rx_on(skb)) { + skb->protocol = eth_type_trans(skb, skb->dev); return vlan_hwaccel_receive_skb(skb, grp, vlan_tci); + } return napi_frags_finish(napi, skb, vlan_gro_common(napi, grp, vlan_tci, skb)); -- cgit v1.2.3 From 1a31f2042e938f1b467aa3d807cc5666352bf8a3 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 13 Apr 2009 18:12:57 -0700 Subject: netsched: Allow meta match on vlan tag on receive When vlan acceleration is used on receive, the vlan tag is maintained outside of the skb data. The existing vlan tag match only works on TX path because it uses vlan_get_tag which tests for VLAN_HW_TX_ACCEL. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/sched/em_meta.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 72cf86e3c090..fad596bf32d7 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -176,8 +176,10 @@ META_COLLECTOR(var_dev) META_COLLECTOR(int_vlan_tag) { - unsigned short uninitialized_var(tag); - if (vlan_get_tag(skb, &tag) < 0) + unsigned short tag; + + tag = vlan_tx_tag_get(skb); + if (!tag && __vlan_get_tag(skb, &tag)) *err = -1; else dst->value = tag; -- cgit v1.2.3 From 86bcebafc5e7f5163ccf828792fe694b112ed6fa Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Tue, 14 Apr 2009 02:08:53 -0700 Subject: tcp: fix >2 iw selection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A long-standing feature in tcp_init_metrics() is such that any of its goto reset prevents call to tcp_init_cwnd(). Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2bc8e27a163d..c96a6bb25430 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -928,6 +928,8 @@ static void tcp_init_metrics(struct sock *sk) tcp_set_rto(sk); if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) goto reset; + +cwnd: tp->snd_cwnd = tcp_init_cwnd(tp, dst); tp->snd_cwnd_stamp = tcp_time_stamp; return; @@ -942,6 +944,7 @@ reset: tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT; inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; } + goto cwnd; } static void tcp_update_reordering(struct sock *sk, const int metric, -- cgit v1.2.3 From ce8632ba6b3ed0bf2efa98672e2808de34250389 Mon Sep 17 00:00:00 2001 From: Yang Hongyang Date: Mon, 13 Apr 2009 15:51:00 +0000 Subject: ipv6:remove useless check After switch (rthdr->type) {...},the check below is completely useless.Because: if the type is 2,then hdrlen must be 2 and segments_left must be 1,clearly the check is redundant;if the type is not 2,then goto sticky_done,the check is useless too. Signed-off-by: Yang Hongyang Reviewed-by: Shan Wei Signed-off-by: David S. Miller --- net/ipv6/ipv6_sockglue.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net') diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index d31df0f4bc9a..a7fdf9a27f15 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -380,10 +380,6 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, default: goto sticky_done; } - - if ((rthdr->hdrlen & 1) || - (rthdr->hdrlen >> 1) != rthdr->segments_left) - goto sticky_done; } retv = 0; -- cgit v1.2.3 From fc59f9a3bf8096a1f68a8b78ada7a0e0ab9236b2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 14 Apr 2009 15:11:06 -0700 Subject: gro: Restore correct value to gso_size Since everybody has been focusing on baremetal GRO performance no one noticed when I added a bug that zapped gso_size for all GRO packets. This only gets picked up when you forward the skb out of an interface. Thanks to Mark Wagner for noticing this bug when testing kvm. Reported-by: Mark Wagner Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index ea8eb2214b09..343883f65ea7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2328,8 +2328,10 @@ static int napi_gro_complete(struct sk_buff *skb) struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; int err = -ENOENT; - if (NAPI_GRO_CB(skb)->count == 1) + if (NAPI_GRO_CB(skb)->count == 1) { + skb_shinfo(skb)->gso_size = 0; goto out; + } rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { @@ -2348,7 +2350,6 @@ static int napi_gro_complete(struct sk_buff *skb) } out: - skb_shinfo(skb)->gso_size = 0; return netif_receive_skb(skb); } -- cgit v1.2.3 From 6fd4777a1fec1f7757b5a302ad3fdcc1eae2abba Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 14 Apr 2009 20:28:00 -0700 Subject: Revert "rose: zero length frame filtering in af_rose.c" This reverts commit 244f46ae6e9e18f6fc0be7d1f49febde4762c34b. Alan Cox did the research, and just like the other radio protocols zero-length frames have meaning because at the top level ROSE is X.25 PLP. So this zero-length filtering is invalid. Signed-off-by: David S. Miller --- net/rose/af_rose.c | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'net') diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 0f36e8d59b29..877a7f65f707 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1072,10 +1072,6 @@ static int rose_sendmsg(struct kiocb *iocb, struct socket *sock, unsigned char *asmptr; int n, size, qbit = 0; - /* ROSE empty frame has no meaning : don't send */ - if (len == 0) - return 0; - if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_CMSG_COMPAT)) return -EINVAL; @@ -1273,12 +1269,6 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock, skb_reset_transport_header(skb); copied = skb->len; - /* ROSE empty frame has no meaning : ignore it */ - if (copied == 0) { - skb_free_datagram(sk, skb); - return copied; - } - if (copied > size) { copied = size; msg->msg_flags |= MSG_TRUNC; -- cgit v1.2.3 From b6f0a3652ea9d2296fdc98c3b2c96603be611c4d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 15 Apr 2009 12:16:19 +0200 Subject: netfilter: nf_log regression fix commit ca735b3aaa945626ba65a3e51145bfe4ecd9e222 'netfilter: use a linked list of loggers' introduced an array of list_head in "struct nf_logger", but forgot to initialize it in nf_log_register(). This resulted in oops when calling nf_log_unregister() at module unload time. Reported-and-tested-by: Mariusz Kozlowski Signed-off-by: Eric Dumazet Acked-by: Eric Leblond Signed-off-by: Patrick McHardy --- net/netfilter/nf_log.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 8bb998fe098b..beb37311e1a5 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -36,10 +36,14 @@ static struct nf_logger *__find_logger(int pf, const char *str_logger) int nf_log_register(u_int8_t pf, struct nf_logger *logger) { const struct nf_logger *llog; + int i; if (pf >= ARRAY_SIZE(nf_loggers)) return -EINVAL; + for (i = 0; i < ARRAY_SIZE(logger->list); i++) + INIT_LIST_HEAD(&logger->list[i]); + mutex_lock(&nf_log_mutex); if (pf == NFPROTO_UNSPEC) { -- cgit v1.2.3 From 719bfeaae8104fca4ca5d47c02592b08682f14fa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 15 Apr 2009 03:39:52 -0700 Subject: packet: avoid warnings when high-order page allocation fails Latest tcpdump/libpcap triggers annoying messages because of high order page allocation failures (when lowmem exhausted or fragmented) These allocation errors are correctly handled so could be silent. [22660.208901] tcpdump: page allocation failure. order:5, mode:0xc0d0 [22660.208921] Pid: 13866, comm: tcpdump Not tainted 2.6.30-rc2 #170 [22660.208936] Call Trace: [22660.208950] [] ? printk+0x18/0x1a [22660.208965] [] __alloc_pages_internal+0x357/0x460 [22660.208980] [] __get_free_pages+0x21/0x40 [22660.208995] [] packet_set_ring+0x105/0x3d0 [22660.209009] [] packet_setsockopt+0x21d/0x4d0 [22660.209025] [] ? filemap_fault+0x0/0x450 [22660.209040] [] sys_setsockopt+0x54/0xa0 [22660.209053] [] sys_socketcall+0xef/0x270 [22660.209067] [] sysenter_do_call+0x12/0x26 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/packet/af_packet.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 74776de523ec..f546e81acc45 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1758,8 +1758,9 @@ static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len) static inline char *alloc_one_pg_vec_page(unsigned long order) { - return (char *) __get_free_pages(GFP_KERNEL | __GFP_COMP | __GFP_ZERO, - order); + gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | __GFP_NOWARN; + + return (char *) __get_free_pages(gfp_flags, order); } static char **alloc_pg_vec(struct tpacket_req *req, int order) -- cgit v1.2.3 From 38fb0afcd8761f8858e27135ed89a65117e2019c Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 15 Apr 2009 12:45:08 +0200 Subject: netfilter: nf_conntrack: fix crash when unloading helpers Commit ea781f197d (netfilter: nf_conntrack: use SLAB_DESTROY_BY_RCU and) get rid of call_rcu() was missing one conversion to the hlist_nulls functions, causing a crash when unloading conntrack helper modules. Reported-and-tested-by: Mariusz Kozlowski Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 30b8e9009f99..0fa5a422959f 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -176,7 +176,7 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me, } /* Get rid of expecteds, set helpers to NULL. */ - hlist_for_each_entry(h, nn, &net->ct.unconfirmed, hnnode) + hlist_nulls_for_each_entry(h, nn, &net->ct.unconfirmed, hnnode) unhelp(h, me); for (i = 0; i < nf_conntrack_htable_size; i++) { hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode) -- cgit v1.2.3 From 0ad8acaf434d360ad99813d981a68e605d6c8179 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Tue, 24 Mar 2009 21:21:08 -0400 Subject: cfg80211: fix NULL pointer deference in reg_device_remove() We won't ever get here as regulatory_hint_core() can only fail on -ENOMEM and in that case we don't initialize cfg80211 but this is technically correct code. This is actually good for stable, where we don't check for -ENOMEM failure on __regulatory_hint()'s failure. Cc: stable@kernel.org Reported-by: Quentin Armitage Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/reg.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 6327e1617acb..6c1993d99902 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2095,11 +2095,12 @@ int set_regdom(const struct ieee80211_regdomain *rd) /* Caller must hold cfg80211_mutex */ void reg_device_remove(struct wiphy *wiphy) { - struct wiphy *request_wiphy; + struct wiphy *request_wiphy = NULL; assert_cfg80211_lock(); - request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); + if (last_request) + request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); kfree(wiphy->regd); if (!last_request || !request_wiphy) -- cgit v1.2.3 From b3631286aca3f54427ca0eb950981e9753866f6c Mon Sep 17 00:00:00 2001 From: Vasanthakumar Thiagarajan Date: Wed, 25 Mar 2009 18:10:18 +0530 Subject: mac80211: Fix bug in getting rx status for frames pending in reorder buffer Currently rx status for frames which are completed from reorder buffer is taken from it's cb area which is not always right, cb is not holding the rx status when driver uses mac80211's non-irq rx handler to pass it's received frames. This results in dropping almost all frames from reorder buffer when security is enabled by doing double decryption (first in hw, second in sw because of wrong rx status). This patch copies rx status into cb area before the frame is put into reorder buffer. After this patch, there is a significant improvement in throughput with ath9k + WPA2(AES). Signed-off-by: Vasanthakumar Thiagarajan Acked-by: Johannes Berg Cc: stable@kernel.org Signed-off-by: John W. Linville --- net/mac80211/rx.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 64ebe664effc..5fa7aedd90ed 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -29,6 +29,7 @@ static u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw, struct tid_ampdu_rx *tid_agg_rx, struct sk_buff *skb, + struct ieee80211_rx_status *status, u16 mpdu_seq_num, int bar_req); /* @@ -1688,7 +1689,7 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx) /* manage reordering buffer according to requested */ /* sequence number */ rcu_read_lock(); - ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, NULL, + ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, NULL, NULL, start_seq_num, 1); rcu_read_unlock(); return RX_DROP_UNUSABLE; @@ -2293,6 +2294,7 @@ static inline u16 seq_sub(u16 sq1, u16 sq2) static u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw, struct tid_ampdu_rx *tid_agg_rx, struct sk_buff *skb, + struct ieee80211_rx_status *rxstatus, u16 mpdu_seq_num, int bar_req) { @@ -2374,6 +2376,8 @@ static u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw, /* put the frame in the reordering buffer */ tid_agg_rx->reorder_buf[index] = skb; + memcpy(tid_agg_rx->reorder_buf[index]->cb, rxstatus, + sizeof(*rxstatus)); tid_agg_rx->stored_mpdu_num++; /* release the buffer until next missing frame */ index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) @@ -2399,7 +2403,8 @@ static u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw, } static u8 ieee80211_rx_reorder_ampdu(struct ieee80211_local *local, - struct sk_buff *skb) + struct sk_buff *skb, + struct ieee80211_rx_status *status) { struct ieee80211_hw *hw = &local->hw; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; @@ -2448,7 +2453,7 @@ static u8 ieee80211_rx_reorder_ampdu(struct ieee80211_local *local, /* according to mpdu sequence number deal with reordering buffer */ mpdu_seq_num = (sc & IEEE80211_SCTL_SEQ) >> 4; - ret = ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, skb, + ret = ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, skb, status, mpdu_seq_num, 0); end_reorder: return ret; @@ -2512,7 +2517,7 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb, return; } - if (!ieee80211_rx_reorder_ampdu(local, skb)) + if (!ieee80211_rx_reorder_ampdu(local, skb, status)) __ieee80211_rx_handle_packet(hw, skb, status, rate); rcu_read_unlock(); -- cgit v1.2.3 From 47afbaf5af9454a7a1a64591e20cbfcc27ca67a8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 7 Apr 2009 15:22:28 +0200 Subject: mac80211: correct wext transmit power handler Wext makes no assumptions about the contents of data->txpower.fixed and data->txpower.value when data->txpower.disabled is set, so do not update the user-requested power level while disabling. Also, when wext configures a really _fixed_ power output [1], we should reject it instead of limiting it to the regulatory constraint. If the user wants to set a _limit_ [2] then we should honour that. [1] iwconfig wlan0 txpower 20dBm fixed [2] iwconfig wlan0 txpower 10dBm This fixes http://www.intellinuxwireless.org/bugzilla/show_bug.cgi?id=1942 Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/main.c | 2 +- net/mac80211/wext.c | 41 +++++++++++++++++++++++++++++++++-------- 2 files changed, 34 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index a6f1d8a869bc..fbcbed6cad01 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -258,7 +258,7 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) (chan->max_power - local->power_constr_level) : chan->max_power; - if (local->user_power_level) + if (local->user_power_level >= 0) power = min(power, local->user_power_level); if (local->hw.conf.power_level != power) { diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index deb4ecec122a..ce9115c18152 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -417,6 +417,7 @@ static int ieee80211_ioctl_siwtxpower(struct net_device *dev, { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_channel* chan = local->hw.conf.channel; + bool reconf = false; u32 reconf_flags = 0; int new_power_level; @@ -427,14 +428,38 @@ static int ieee80211_ioctl_siwtxpower(struct net_device *dev, if (!chan) return -EINVAL; - if (data->txpower.fixed) - new_power_level = min(data->txpower.value, chan->max_power); - else /* Automatic power level setting */ - new_power_level = chan->max_power; + /* only change when not disabling */ + if (!data->txpower.disabled) { + if (data->txpower.fixed) { + if (data->txpower.value < 0) + return -EINVAL; + new_power_level = data->txpower.value; + /* + * Debatable, but we cannot do a fixed power + * level above the regulatory constraint. + * Use "iwconfig wlan0 txpower 15dBm" instead. + */ + if (new_power_level > chan->max_power) + return -EINVAL; + } else { + /* + * Automatic power level setting, max being the value + * passed in from userland. + */ + if (data->txpower.value < 0) + new_power_level = -1; + else + new_power_level = data->txpower.value; + } + + reconf = true; - local->user_power_level = new_power_level; - if (local->hw.conf.power_level != new_power_level) - reconf_flags |= IEEE80211_CONF_CHANGE_POWER; + /* + * ieee80211_hw_config() will limit to the channel's + * max power and possibly power constraint from AP. + */ + local->user_power_level = new_power_level; + } if (local->hw.conf.radio_enabled != !(data->txpower.disabled)) { local->hw.conf.radio_enabled = !(data->txpower.disabled); @@ -442,7 +467,7 @@ static int ieee80211_ioctl_siwtxpower(struct net_device *dev, ieee80211_led_radio(local, local->hw.conf.radio_enabled); } - if (reconf_flags) + if (reconf || reconf_flags) ieee80211_hw_config(local, reconf_flags); return 0; -- cgit v1.2.3 From a860402d8f1756dae48cdcabe153c974116fc37e Mon Sep 17 00:00:00 2001 From: Michael Buesch Date: Wed, 15 Apr 2009 14:41:22 -0400 Subject: mac80211: quiet beacon loss messages On Sunday 05 April 2009 11:29:38 Michael Buesch wrote: > On Sunday 05 April 2009 11:23:59 Jaswinder Singh Rajput wrote: > > With latest linus tree I am getting, .config file attached: > > > > [ 22.895051] r8169: eth0: link down > > [ 22.897564] ADDRCONF(NETDEV_UP): eth0: link is not ready > > [ 22.928047] ADDRCONF(NETDEV_UP): wlan0: link is not ready > > [ 22.982292] libvirtd used greatest stack depth: 4200 bytes left > > [ 63.709879] wlan0: authenticate with AP 00:11:95:9e:df:f6 > > [ 63.712096] wlan0: authenticated > > [ 63.712127] wlan0: associate with AP 00:11:95:9e:df:f6 > > [ 63.726831] wlan0: RX AssocResp from 00:11:95:9e:df:f6 (capab=0x471 status=0 aid=1) > > [ 63.726855] wlan0: associated > > [ 63.730093] ADDRCONF(NETDEV_CHANGE): wlan0: link becomes ready > > [ 74.296087] wlan0: no IPv6 routers present > > [ 79.349044] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request > > [ 119.358200] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request > > [ 179.354292] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request > > [ 259.366044] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request > > [ 359.348292] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request > > [ 361.953459] packagekitd used greatest stack depth: 4160 bytes left > > [ 478.824258] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request > > [ 598.813343] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request > > [ 718.817292] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request > > [ 838.824567] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request > > [ 958.815402] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request > > [ 1078.848434] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request > > [ 1198.822913] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request > > [ 1318.824931] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request > > [ 1438.814157] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request > > [ 1558.827336] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request > > [ 1678.823011] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request > > [ 1798.830589] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request > > [ 1918.828044] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request > > [ 2038.827224] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request > > [ 2116.517152] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request > > [ 2158.840243] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request > > [ 2278.827427] wlan0: beacon loss from AP 00:11:95:9e:df:f6 - sending probe request > > > I think this message should only show if CONFIG_MAC80211_VERBOSE_DEBUG is set. > It's kind of expected that we lose a beacon once in a while, so we shouldn't print > verbose messages to the kernel log (even if they are KERN_DEBUG). > > And besides that, I think one can easily remotely trigger this message and flood the logs. > So it should probably _also_ be ratelimited. Something like this: Signed-off-by: Michael Buesch --- net/mac80211/mlme.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 7ecda9d59d8a..1b14d0204dd2 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -945,9 +945,13 @@ void ieee80211_beacon_loss_work(struct work_struct *work) u.mgd.beacon_loss_work); struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - printk(KERN_DEBUG "%s: driver reports beacon loss from AP %pM " - "- sending probe request\n", sdata->dev->name, - sdata->u.mgd.bssid); +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + if (net_ratelimit()) { + printk(KERN_DEBUG "%s: driver reports beacon loss from AP %pM " + "- sending probe request\n", sdata->dev->name, + sdata->u.mgd.bssid); + } +#endif ifmgd->flags |= IEEE80211_STA_PROBEREQ_POLL; ieee80211_send_probe_req(sdata, ifmgd->bssid, ifmgd->ssid, @@ -1007,9 +1011,13 @@ static void ieee80211_associated(struct ieee80211_sub_if_data *sdata) (local->hw.conf.flags & IEEE80211_CONF_PS)) && time_after(jiffies, ifmgd->last_beacon + IEEE80211_MONITORING_INTERVAL)) { - printk(KERN_DEBUG "%s: beacon loss from AP %pM " - "- sending probe request\n", - sdata->dev->name, ifmgd->bssid); +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + if (net_ratelimit()) { + printk(KERN_DEBUG "%s: beacon loss from AP %pM " + "- sending probe request\n", + sdata->dev->name, ifmgd->bssid); + } +#endif ifmgd->flags |= IEEE80211_STA_PROBEREQ_POLL; ieee80211_send_probe_req(sdata, ifmgd->bssid, ifmgd->ssid, ifmgd->ssid_len, NULL, 0); -- cgit v1.2.3 From 23a99840d571a237845fd0906bce78e7c76be650 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Tue, 14 Apr 2009 06:32:56 +0200 Subject: mac80211: Fragmentation threshold (typo) mac80211: Fragmentation threshold (typo) ieee80211_ioctl_siwfrag() sets the fragmentation_threshold to 2352 when frame fragmentation is to be disabled, yet the corresponding 'get' function tests for 2353 bytes instead. This causes user-space tools to display a fragmentation threshold of 2352 bytes even if fragmentation has been disabled. Signed-off-by: Gerrit Renker Signed-off-by: John W. Linville --- net/mac80211/wext.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index ce9115c18152..959aa8379ccf 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -555,7 +555,7 @@ static int ieee80211_ioctl_giwfrag(struct net_device *dev, struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); frag->value = local->fragmentation_threshold; - frag->disabled = (frag->value >= IEEE80211_MAX_RTS_THRESHOLD); + frag->disabled = (frag->value >= IEEE80211_MAX_FRAG_THRESHOLD); frag->fixed = 1; return 0; -- cgit v1.2.3 From 98d500d66cb7940747b424b245fc6a51ecfbf005 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 16 Apr 2009 18:33:01 +0200 Subject: netfilter: nf_nat: add support for persistent mappings The removal of the SAME target accidentally removed one feature that is not available from the normal NAT targets so far, having multi-range mappings that use the same mapping for each connection from a single client. The current behaviour is to choose the address from the range based on source and destination IP, which breaks when communicating with sites having multiple addresses that require all connections to originate from the same IP address. Introduce a IP_NAT_RANGE_PERSISTENT option that controls whether the destination address is taken into account for selecting addresses. http://bugzilla.kernel.org/show_bug.cgi?id=12954 Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_nat.h | 1 + net/ipv4/netfilter/nf_nat_core.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index 9dc1039ff78b..8df0b7f7fc6e 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -18,6 +18,7 @@ enum nf_nat_manip_type #define IP_NAT_RANGE_MAP_IPS 1 #define IP_NAT_RANGE_PROTO_SPECIFIED 2 #define IP_NAT_RANGE_PROTO_RANDOM 4 +#define IP_NAT_RANGE_PERSISTENT 8 /* NAT sequence number modifications */ struct nf_nat_seq { diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index fe65187810f0..3229e0a81ba6 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -211,7 +211,8 @@ find_best_ips_proto(struct nf_conntrack_tuple *tuple, minip = ntohl(range->min_ip); maxip = ntohl(range->max_ip); j = jhash_2words((__force u32)tuple->src.u3.ip, - (__force u32)tuple->dst.u3.ip, 0); + range->flags & IP_NAT_RANGE_PERSISTENT ? + (__force u32)tuple->dst.u3.ip : 0, 0); j = ((u64)j * (maxip - minip + 1)) >> 32; *var_ipp = htonl(minip + j); } -- cgit v1.2.3 From 62bcaa13039538c7f794b0cfcbc26e0313e7fcd5 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Fri, 17 Apr 2009 01:38:46 -0700 Subject: can: Network Drop Monitor: Make use of consume_skb() in af_can.c Since commit ead2ceb0ec9f85cff19c43b5cdb2f8a054484431 ("Network Drop Monitor: Adding kfree_skb_clean for non-drops and modifying end-of-line points for skbs") so called end-of-line points for skb's should use consume_skb() to free the socket buffer. In opposite to consume_skb() the function kfree_skb() is intended to be used for unexpected skb drops e.g. in error conditions that now can trigger the network drop monitor if enabled. This patch moves the skb end-of-line point in af_can.c to use consume_skb(). Signed-off-by: Oliver Hartkopp Signed-off-by: David S. Miller --- net/can/af_can.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/can/af_can.c b/net/can/af_can.c index 547bafc79e28..10f0528c3bf5 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -674,8 +674,8 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev, rcu_read_unlock(); - /* free the skbuff allocated by the netdevice driver */ - kfree_skb(skb); + /* consume the skbuff allocated by the netdevice driver */ + consume_skb(skb); if (matches > 0) { can_stats.matches++; -- cgit v1.2.3 From a0a69a0106dab8d20596f97f6674bed3b394d1ee Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 17 Apr 2009 02:34:38 -0700 Subject: gro: Fix use after free in tcp_gro_receive After calling skb_gro_receive skb->len can no longer be relied on since if the skb was merged using frags, then its pages will have been removed and the length reduced. This caused tcp_gro_receive to prematurely end merging which resulted in suboptimal performance with ixgbe. The fix is to store skb->len on the stack. Reported-by: Mark Wagner Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index fafbec8b073e..1d7f49c6f0ca 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2511,6 +2511,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) struct sk_buff *p; struct tcphdr *th; struct tcphdr *th2; + unsigned int len; unsigned int thlen; unsigned int flags; unsigned int mss = 1; @@ -2531,6 +2532,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) skb_gro_pull(skb, thlen); + len = skb_gro_len(skb); flags = tcp_flag_word(th); for (; (p = *head); head = &p->next) { @@ -2561,7 +2563,7 @@ found: mss = skb_shinfo(p)->gso_size; - flush |= (skb_gro_len(skb) > mss) | !skb_gro_len(skb); + flush |= (len > mss) | !len; flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq); if (flush || skb_gro_receive(head, skb)) { @@ -2574,7 +2576,7 @@ found: tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH); out_check_final: - flush = skb_gro_len(skb) < mss; + flush = len < mss; flush |= flags & (TCP_FLAG_URG | TCP_FLAG_PSH | TCP_FLAG_RST | TCP_FLAG_SYN | TCP_FLAG_FIN); -- cgit v1.2.3 From 150ace0db360373d2016a2497d252138a59c5ba8 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 17 Apr 2009 17:47:31 +0200 Subject: netfilter: ctnetlink: report error if event message allocation fails This patch fixes an inconsistency that results in no error reports to user-space listeners if we fail to allocate the event message. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_netlink.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index c6439c77953c..0ea36e0c8a0e 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -512,7 +512,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, skb = ctnetlink_alloc_skb(tuple(ct, IP_CT_DIR_ORIGINAL), GFP_ATOMIC); if (!skb) - return NOTIFY_DONE; + goto errout; b = skb->tail; @@ -591,8 +591,9 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, nla_put_failure: rcu_read_unlock(); nlmsg_failure: - nfnetlink_set_err(0, group, -ENOBUFS); kfree_skb(skb); +errout: + nfnetlink_set_err(0, group, -ENOBUFS); return NOTIFY_DONE; } #endif /* CONFIG_NF_CONNTRACK_EVENTS */ @@ -1564,7 +1565,7 @@ static int ctnetlink_expect_event(struct notifier_block *this, skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); if (!skb) - return NOTIFY_DONE; + goto errout; b = skb->tail; @@ -1589,8 +1590,9 @@ static int ctnetlink_expect_event(struct notifier_block *this, nla_put_failure: rcu_read_unlock(); nlmsg_failure: - nfnetlink_set_err(0, 0, -ENOBUFS); kfree_skb(skb); +errout: + nfnetlink_set_err(0, 0, -ENOBUFS); return NOTIFY_DONE; } #endif -- cgit v1.2.3 From a0142733a7ef2f3476e63938b330026a08c53f37 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 17 Apr 2009 17:48:44 +0200 Subject: netfilter: nfnetlink: return ENOMEM if we fail to create netlink socket With this patch, nfnetlink returns -ENOMEM instead of -EPERM if we fail to create the nfnetlink netlink socket during the module loading. This is exactly what rtnetlink does in this case. Ideally, it would be better if we propagate the error that has happened in netlink_kernel_create(), however, this function still does not implement this yet. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- net/netfilter/nfnetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 2785d66a7e38..b8ab37ad7ed5 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -203,7 +203,7 @@ static int __init nfnetlink_init(void) nfnetlink_rcv, NULL, THIS_MODULE); if (!nfnl) { printk(KERN_ERR "cannot initialize nfnetlink!\n"); - return -1; + return -ENOMEM; } return 0; -- cgit v1.2.3 From 7181d4673710888b6d7084b37b9d77ed4f4e41b2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 15 Apr 2009 21:33:15 +0200 Subject: mac80211: avoid crashing when no scan sdata Using the scan_sdata variable here is terribly wrong, if there has never been a scan then we fail. However, we need a bandaid... Signed-off-by: Johannes Berg Cc: stable@kernel.org [2.6.29] Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 1b14d0204dd2..dc60804d6dd0 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2113,12 +2113,13 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work) struct ieee80211_local *local = container_of(work, struct ieee80211_local, dynamic_ps_enable_work); + /* XXX: using scan_sdata is completely broken! */ struct ieee80211_sub_if_data *sdata = local->scan_sdata; if (local->hw.conf.flags & IEEE80211_CONF_PS) return; - if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) + if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK && sdata) ieee80211_send_nullfunc(local, sdata, 1); local->hw.conf.flags |= IEEE80211_CONF_PS; -- cgit v1.2.3 From 160002fe845218f5789a26954048592c3920ac7b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 16 Apr 2009 12:15:38 +0200 Subject: cfg80211: copy hold when replacing BSS When we receive a probe response frame we can replace the BSS struct in our list -- but if that struct is held then we need to hold the new one as well. We really should fix this completely and not replace the struct, but this is a bandaid for now. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/scan.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 2a00e362f5fe..4c77669275eb 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -364,6 +364,8 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, list_replace(&found->list, &res->list); rb_replace_node(&found->rbn, &res->rbn, &dev->bss_tree); + /* XXX: workaround */ + res->hold = found->hold; kref_put(&found->ref, bss_release); found = res; } else if (found) { -- cgit v1.2.3 From cd1658f592a60d028dd2e48d86724b737a82cab0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 16 Apr 2009 15:00:58 +0200 Subject: cfg80211: do not replace BSS structs Instead, allocate extra IE memory if necessary. Normally, this isn't even necessary since there's enough space. This is a better way of correcting the "held BSS can disappear" issue, but also a lot more code. It is also necessary for proper auth/assoc BSS handling in the future. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/core.h | 2 +- net/wireless/scan.c | 42 +++++++++++++++++++++++++++++++++--------- 2 files changed, 34 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/wireless/core.h b/net/wireless/core.h index d43daa236ef9..0a592e4295f0 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -90,7 +90,7 @@ struct cfg80211_internal_bss { struct rb_node rbn; unsigned long ts; struct kref ref; - bool hold; + bool hold, ies_allocated; /* must be last because of priv member */ struct cfg80211_bss pub; diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 4c77669275eb..2ae65b39b529 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -58,6 +58,10 @@ static void bss_release(struct kref *ref) bss = container_of(ref, struct cfg80211_internal_bss, ref); if (bss->pub.free_priv) bss->pub.free_priv(&bss->pub); + + if (bss->ies_allocated) + kfree(bss->pub.information_elements); + kfree(bss); } @@ -360,21 +364,41 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, found = rb_find_bss(dev, res); - if (found && overwrite) { - list_replace(&found->list, &res->list); - rb_replace_node(&found->rbn, &res->rbn, - &dev->bss_tree); - /* XXX: workaround */ - res->hold = found->hold; - kref_put(&found->ref, bss_release); - found = res; - } else if (found) { + if (found) { kref_get(&found->ref); found->pub.beacon_interval = res->pub.beacon_interval; found->pub.tsf = res->pub.tsf; found->pub.signal = res->pub.signal; found->pub.capability = res->pub.capability; found->ts = res->ts; + + /* overwrite IEs */ + if (overwrite) { + size_t used = dev->wiphy.bss_priv_size + sizeof(*res); + size_t ielen = res->pub.len_information_elements; + + if (ksize(found) >= used + ielen) { + memcpy(found->pub.information_elements, + res->pub.information_elements, ielen); + found->pub.len_information_elements = ielen; + } else { + u8 *ies = found->pub.information_elements; + + if (found->ies_allocated) { + if (ksize(ies) < ielen) + ies = krealloc(ies, ielen, + GFP_ATOMIC); + } else + ies = kmalloc(ielen, GFP_ATOMIC); + + if (ies) { + memcpy(ies, res->pub.information_elements, ielen); + found->ies_allocated = true; + found->pub.information_elements = ies; + } + } + } + kref_put(&res->ref, bss_release); } else { /* this "consumes" the reference */ -- cgit v1.2.3 From 60375541f7c8a577b977d344565259776c3acfc1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 17 Apr 2009 00:54:23 +0200 Subject: mac80211: validate TIM IE length The TIM IE must not be shorter than 4 bytes, so verify that when parsing it. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index dc60804d6dd0..1619e0cd26e2 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -441,6 +441,9 @@ static bool ieee80211_check_tim(struct ieee802_11_elems *elems, u16 aid) u8 index, indexn1, indexn2; struct ieee80211_tim_ie *tim = (struct ieee80211_tim_ie *) elems->tim; + if (unlikely(!tim || elems->tim_len < 4)) + return false; + aid &= 0x3fff; index = aid / 8; mask = 1 << (aid & 7); -- cgit v1.2.3 From 7816a0a862d851d0b05710e7d94bfe390f3180e2 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 17 Apr 2009 15:59:23 -0700 Subject: vlan/macvlan: fix NULL pointer dereferences in ethtool handlers Check whether the underlying device provides a set of ethtool ops before checking for individual handlers to avoid NULL pointer dereferences. Reported-by: Art van Breemen Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 9 ++++++--- net/8021q/vlan_dev.c | 3 ++- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 70d3ef4a2c5f..214a8cf2b708 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -376,7 +376,8 @@ static u32 macvlan_ethtool_get_rx_csum(struct net_device *dev) const struct macvlan_dev *vlan = netdev_priv(dev); struct net_device *lowerdev = vlan->lowerdev; - if (lowerdev->ethtool_ops->get_rx_csum == NULL) + if (lowerdev->ethtool_ops == NULL || + lowerdev->ethtool_ops->get_rx_csum == NULL) return 0; return lowerdev->ethtool_ops->get_rx_csum(lowerdev); } @@ -387,7 +388,8 @@ static int macvlan_ethtool_get_settings(struct net_device *dev, const struct macvlan_dev *vlan = netdev_priv(dev); struct net_device *lowerdev = vlan->lowerdev; - if (!lowerdev->ethtool_ops->get_settings) + if (!lowerdev->ethtool_ops || + !lowerdev->ethtool_ops->get_settings) return -EOPNOTSUPP; return lowerdev->ethtool_ops->get_settings(lowerdev, cmd); @@ -398,7 +400,8 @@ static u32 macvlan_ethtool_get_flags(struct net_device *dev) const struct macvlan_dev *vlan = netdev_priv(dev); struct net_device *lowerdev = vlan->lowerdev; - if (!lowerdev->ethtool_ops->get_flags) + if (!lowerdev->ethtool_ops || + !lowerdev->ethtool_ops->get_flags) return 0; return lowerdev->ethtool_ops->get_flags(lowerdev); } diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 1b34135cf990..6b0921364014 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -668,7 +668,8 @@ static int vlan_ethtool_get_settings(struct net_device *dev, const struct vlan_dev_info *vlan = vlan_dev_info(dev); struct net_device *real_dev = vlan->real_dev; - if (!real_dev->ethtool_ops->get_settings) + if (!real_dev->ethtool_ops || + !real_dev->ethtool_ops->get_settings) return -EOPNOTSUPP; return real_dev->ethtool_ops->get_settings(real_dev, cmd); -- cgit v1.2.3 From e2139b32726e5dd184974c785ea3f62026590801 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Thu, 26 Mar 2009 16:41:56 +0200 Subject: Bluetooth: Fix removing of RFCOMM DLC timer with DEFER_SETUP There is a missing call to rfcomm_dlc_clear_timer in the case that DEFER_SETUP is used and so the connection gets disconnected after the timeout even if it was successfully accepted previously. This patch adds a call to rfcomm_dlc_clear_timer to rfcomm_dlc_accept which will get called when the user accepts the connection by calling read() on the socket. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/rfcomm/core.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 1d0fb0f23c63..374536e050aa 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -1194,6 +1194,8 @@ void rfcomm_dlc_accept(struct rfcomm_dlc *d) rfcomm_send_ua(d->session, d->dlci); + rfcomm_dlc_clear_timer(d); + rfcomm_dlc_lock(d); d->state = BT_CONNECTED; d->state_change(d, 0); -- cgit v1.2.3 From 732547f96ea2442965a24e0ed529d285321a0fff Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 19 Apr 2009 19:14:14 +0200 Subject: Bluetooth: Fallback from eSCO to SCO on unspecified error Some Bluetooth chips (like the ones from Texas Instruments) don't do proper eSCO negotiations inside the Link Manager. They just return an error code and in case of the Kyocera ED-8800 headset it is just a random error. < HCI Command: Setup Synchronous Connection 0x01|0x0028) plen 17 handle 1 voice setting 0x0060 > HCI Event: Command Status (0x0f) plen 4 Setup Synchronous Connection (0x01|0x0028) status 0x00 ncmd 1 > HCI Event: Synchronous Connect Complete (0x2c) plen 17 status 0x1f handle 257 bdaddr 00:14:0A:xx:xx:xx type eSCO Error: Unspecified Error In these cases it is up to the host stack to fallback to a SCO setup and so retry with SCO parameters. Based on a report by Nick Pelly Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_event.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 55534244c3a0..963f9662eaa8 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1646,20 +1646,28 @@ static inline void hci_sync_conn_complete_evt(struct hci_dev *hdev, struct sk_bu conn->type = SCO_LINK; } - if (conn->out && ev->status == 0x1c && conn->attempt < 2) { - conn->pkt_type = (hdev->esco_type & SCO_ESCO_MASK) | - (hdev->esco_type & EDR_ESCO_MASK); - hci_setup_sync(conn, conn->link->handle); - goto unlock; - } - - if (!ev->status) { + switch (ev->status) { + case 0x00: conn->handle = __le16_to_cpu(ev->handle); conn->state = BT_CONNECTED; hci_conn_add_sysfs(conn); - } else + break; + + case 0x1c: /* SCO interval rejected */ + case 0x1f: /* Unspecified error */ + if (conn->out && conn->attempt < 2) { + conn->pkt_type = (hdev->esco_type & SCO_ESCO_MASK) | + (hdev->esco_type & EDR_ESCO_MASK); + hci_setup_sync(conn, conn->link->handle); + goto unlock; + } + /* fall through */ + + default: conn->state = BT_CLOSED; + break; + } hci_proto_connect_cfm(conn, ev->status); if (ev->status) -- cgit v1.2.3 From 9499237a1c42a27fbcc7ed1d59e34df2b574cdfb Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 19 Apr 2009 19:30:03 +0200 Subject: Bluetooth: Add workaround for wrong HCI event in eSCO setup The Broadcom chips with 2.1 firmware handle the fallback case to a SCO link wrongly when setting up eSCO connections. < HCI Command: Setup Synchronous Connection (0x01|0x0028) plen 17 handle 11 voice setting 0x0060 > HCI Event: Command Status (0x0f) plen 4 Setup Synchronous Connection (0x01|0x0028) status 0x00 ncmd 1 > HCI Event: Connect Complete (0x03) plen 11 status 0x00 handle 1 bdaddr 00:1E:3A:xx:xx:xx type SCO encrypt 0x01 The Link Manager negotiates the fallback to SCO, but then sends out a Connect Complete event. This is wrong and the Link Manager should actually send a Synchronous Connection Complete event if the Setup Synchronous Connection has been used. Only the remote side is allowed to use Connect Complete to indicate the missing support for eSCO in the host stack. This patch adds a workaround for this which clearly should not be needed, but reality is that broken Broadcom devices are deployed. Based on a report by Ville Tervo Signed-off-by: Marcel Holtman --- net/bluetooth/hci_event.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 963f9662eaa8..15f40ea8d544 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -866,8 +866,16 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s hci_dev_lock(hdev); conn = hci_conn_hash_lookup_ba(hdev, ev->link_type, &ev->bdaddr); - if (!conn) - goto unlock; + if (!conn) { + if (ev->link_type != SCO_LINK) + goto unlock; + + conn = hci_conn_hash_lookup_ba(hdev, ESCO_LINK, &ev->bdaddr); + if (!conn) + goto unlock; + + conn->type = SCO_LINK; + } if (!ev->status) { conn->handle = __le16_to_cpu(ev->handle); -- cgit v1.2.3 From c7c1a0f60b90955855ba8cd9cfc480167bf6c3da Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 17 Apr 2009 04:19:36 +0000 Subject: ax25: proc uid file misses header This has been broken for a while. I happened to catch it testing because one app "knew" that the top line of the calls data was the policy line and got confused. Put the header back. Signed-off-by: Alan Cox Signed-off-by: David S. Miller --- net/ax25/ax25_uid.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ax25/ax25_uid.c b/net/ax25/ax25_uid.c index 57aeba729bae..832bcf092a01 100644 --- a/net/ax25/ax25_uid.c +++ b/net/ax25/ax25_uid.c @@ -148,9 +148,13 @@ static void *ax25_uid_seq_start(struct seq_file *seq, loff_t *pos) { struct ax25_uid_assoc *pt; struct hlist_node *node; - int i = 0; + int i = 1; read_lock(&ax25_uid_lock); + + if (*pos == 0) + return SEQ_START_TOKEN; + ax25_uid_for_each(pt, node, &ax25_uid_list) { if (i == *pos) return pt; @@ -162,8 +166,10 @@ static void *ax25_uid_seq_start(struct seq_file *seq, loff_t *pos) static void *ax25_uid_seq_next(struct seq_file *seq, void *v, loff_t *pos) { ++*pos; - - return hlist_entry(((ax25_uid_assoc *)v)->uid_node.next, + if (v == SEQ_START_TOKEN) + return ax25_uid_list.first; + else + return hlist_entry(((ax25_uid_assoc *)v)->uid_node.next, ax25_uid_assoc, uid_node); } -- cgit v1.2.3 From 8caf153974f2274301e583fda732cc8e5b80331f Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Fri, 17 Apr 2009 10:08:49 +0000 Subject: net: sch_netem: Fix an inconsistency in ingress netem timestamps. Alex Sidorenko reported: "while experimenting with 'netem' we have found some strange behaviour. It seemed that ingress delay as measured by 'ping' command shows up on some hosts but not on others. After some investigation I have found that the problem is that skbuff->tstamp field value depends on whether there are any packet sniffers enabled. That is: - if any ptype_all handler is registered, the tstamp field is as expected - if there are no ptype_all handlers, the tstamp field does not show the delay" This patch prevents unnecessary update of tstamp in dev_queue_xmit_nit() on ingress path (with act_mirred) adding a check, so minimal overhead on the fast path, but only when sniffers etc. are active. Since netem at ingress seems to logically emulate a network before a host, tstamp is zeroed to trigger the update and pretend delays are from the outside. Reported-by: Alex Sidorenko Tested-by: Alex Sidorenko Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/core/dev.c | 5 +++++ net/sched/sch_netem.c | 8 ++++++++ 2 files changed, 13 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 343883f65ea7..dcc357e4f91e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1336,7 +1336,12 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) { struct packet_type *ptype; +#ifdef CONFIG_NET_CLS_ACT + if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS))) + net_timestamp(skb); +#else net_timestamp(skb); +#endif rcu_read_lock(); list_for_each_entry_rcu(ptype, &ptype_all, list) { diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index d876b8734848..2b88295cb7b7 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -280,6 +280,14 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) if (unlikely(!skb)) return NULL; +#ifdef CONFIG_NET_CLS_ACT + /* + * If it's at ingress let's pretend the delay is + * from the network (tstamp will be updated). + */ + if (G_TC_FROM(skb->tc_verd) & AT_INGRESS) + skb->tstamp.tv64 = 0; +#endif pr_debug("netem_dequeue: return skb=%p\n", skb); sch->q.qlen--; return skb; -- cgit v1.2.3 From 52cf3cc8acea52ecb93ef1dddb4ef2ae4e35c319 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Sat, 18 Apr 2009 05:48:48 +0000 Subject: tcp: fix mid-wq adjustment helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Just noticed while doing some new work that the recent mid-wq adjustment logic will misbehave when FACK is not in use (happens either due sysctl'ed off or auto-detected reordering) because I forgot the relevant TCPCB tagbit. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 53300fa2359f..59aec609cec6 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -778,7 +778,7 @@ static void tcp_adjust_pcount(struct sock *sk, struct sk_buff *skb, int decr) if (tp->lost_skb_hint && before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) && - (tcp_is_fack(tp) || TCP_SKB_CB(skb)->sacked)) + (tcp_is_fack(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))) tp->lost_cnt_hint -= decr; tcp_verify_left_out(tp); -- cgit v1.2.3 From eb39c57ff7782bc015da517af1d9c3b2592e721e Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Sun, 19 Apr 2009 07:24:24 +0000 Subject: net: fix "compatibility" typos Signed-off-by: Marcin Slusarz Signed-off-by: David S. Miller --- drivers/net/Kconfig | 4 ++-- net/core/dev.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 9e921544ba20..214a92d1ef75 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -28,9 +28,9 @@ if NETDEVICES config COMPAT_NET_DEV_OPS default y - bool "Enable older network device API compatiablity" + bool "Enable older network device API compatibility" ---help--- - This option enables kernel compatiability with older network devices + This option enables kernel compatibility with older network devices that do not use net_device_ops interface. If unsure, say Y. diff --git a/net/core/dev.c b/net/core/dev.c index dcc357e4f91e..001a4c551d44 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4405,7 +4405,7 @@ int register_netdevice(struct net_device *dev) dev->iflink = -1; #ifdef CONFIG_COMPAT_NET_DEV_OPS - /* Netdevice_ops API compatiability support. + /* Netdevice_ops API compatibility support. * This is temporary until all network devices are converted. */ if (dev->netdev_ops) { @@ -4416,7 +4416,7 @@ int register_netdevice(struct net_device *dev) dev->name, netdev_drivername(dev, drivername, 64)); /* This works only because net_device_ops and the - compatiablity structure are the same. */ + compatibility structure are the same. */ dev->netdev_ops = (void *) &(dev->init); } #endif -- cgit v1.2.3 From 5db8765a86a4cbaf45adaf8c231cf9a6ca2dcfaf Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 16 Apr 2009 08:04:20 +0000 Subject: net: Fix GRO for multiple page fragments This loop over fragments in napi_fraginfo_skb() was "interesting". Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 001a4c551d44..308a7d0c277f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2545,9 +2545,9 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi, } BUG_ON(info->nr_frags > MAX_SKB_FRAGS); - frag = &info->frags[info->nr_frags - 1]; + frag = info->frags; - for (i = skb_shinfo(skb)->nr_frags; i < info->nr_frags; i++) { + for (i = 0; i < info->nr_frags; i++) { skb_fill_page_desc(skb, i, frag->page, frag->page_offset, frag->size); frag++; -- cgit v1.2.3 From c197facc8ea08062f8f949aade6a33649ee06771 Mon Sep 17 00:00:00 2001 From: "hummerbliss@gmail.com" Date: Mon, 20 Apr 2009 17:12:35 +0200 Subject: netfilter: bridge: allow fragmentation of VLAN packets traversing a bridge br_nf_dev_queue_xmit only checks for ETH_P_IP packets for fragmenting but not VLAN packets. This results in dropping of large VLAN packets. This can be observed when connection tracking is enabled. Connection tracking re-assembles fragmented packets, and these have to re-fragmented when transmitting out. Also, make sure only refragmented packets are defragmented as per suggestion from Patrick McHardy. Signed-off-by: Saikiran Madugula Signed-off-by: Patrick McHardy --- net/bridge/br_netfilter.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 3953ac4214c8..e4a418fcb35b 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -788,15 +788,23 @@ static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff *skb, return NF_STOLEN; } +#if defined(CONFIG_NF_CONNTRACK_IPV4) || defined(CONFIG_NF_CONNTRACK_IPV4_MODULE) static int br_nf_dev_queue_xmit(struct sk_buff *skb) { - if (skb->protocol == htons(ETH_P_IP) && + if (skb->nfct != NULL && + (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb)) && skb->len > skb->dev->mtu && !skb_is_gso(skb)) return ip_fragment(skb, br_dev_queue_push_xmit); else return br_dev_queue_push_xmit(skb); } +#else +static int br_nf_dev_queue_xmit(struct sk_buff *skb) +{ + return br_dev_queue_push_xmit(skb); +} +#endif /* PF_BRIDGE/POST_ROUTING ********************************************/ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb, -- cgit v1.2.3 From 18aaab15f9a9cd4f20dc596aa38408c5e5d208ed Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Sat, 18 Apr 2009 09:33:55 -0400 Subject: MAC80211: Remove unused MAC80211_VERBOSE_SPECT_MGMT_DEBUG. Remove this unused Kconfig variable, which Intel apparently once promised to make use of but never did. Signed-off-by: Robert P. J. Day Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/Kconfig | 7 ------- 1 file changed, 7 deletions(-) (limited to 'net') diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index f3d9ae350fb6..ecc3faf9f11a 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -202,10 +202,3 @@ config MAC80211_DEBUG_COUNTERS and show them in debugfs. If unsure, say N. - -config MAC80211_VERBOSE_SPECT_MGMT_DEBUG - bool "Verbose Spectrum Management (IEEE 802.11h)debugging" - depends on MAC80211_DEBUG_MENU - ---help--- - Say Y here to print out verbose Spectrum Management (IEEE 802.11h) - debug messages. -- cgit v1.2.3 From d91c01c757bd9659ac10549504586fae610265a4 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Sat, 18 Apr 2009 21:53:15 +0300 Subject: nl80211: Make nl80211_send_mlme_event() atomic One of the code paths sending deauth/disassoc events ends up calling this function with rcu_read_lock held, so we must use GFP_ATOMIC in allocation routines. Reported-by: Johannes Berg Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- net/wireless/nl80211.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 353e1a4ece83..2456e4ee445e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3334,7 +3334,7 @@ static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev, struct sk_buff *msg; void *hdr; - msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + msg = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); if (!msg) return; @@ -3353,7 +3353,7 @@ static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev, return; } - genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, GFP_KERNEL); + genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, GFP_ATOMIC); return; nla_put_failure: -- cgit v1.2.3 From ad935687dbe7307f5abd9e3f610a965a287324a9 Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Sun, 19 Apr 2009 08:47:19 +0300 Subject: mac80211: fix beacon loss detection after scan Currently beacon loss detection triggers after a scan. A probe request is sent and a message like this is printed to the log: wlan0: beacon loss from AP 00:12:17:e7:98:de - sending probe request But in fact there is no beacon loss, the beacons are just not received because of the ongoing scan. Fix it by updating last_beacon after the scan has finished. Reported-by: Jaswinder Singh Rajput Signed-off-by: Kalle Valo Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 1619e0cd26e2..ccfc21aa0b61 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1913,9 +1913,17 @@ static void ieee80211_sta_work(struct work_struct *work) static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata) { - if (sdata->vif.type == NL80211_IFTYPE_STATION) + if (sdata->vif.type == NL80211_IFTYPE_STATION) { + /* + * Need to update last_beacon to avoid beacon loss + * test to trigger. + */ + sdata->u.mgd.last_beacon = jiffies; + + queue_work(sdata->local->hw.workqueue, &sdata->u.mgd.work); + } } /* interface setup */ -- cgit v1.2.3 From 7e0986c17f695952ce5d61ed793ce048ba90a661 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 19 Apr 2009 13:22:11 +0200 Subject: mac80211: fix basic rate bitmap calculation "mac80211: fix basic rates setting from association response" introduced a copy/paste error. Unfortunately, this not just leads to wrong data being passed to the driver but is remotely exploitable for some hardware or driver combinations. Signed-off-by: Johannes Berg Cc: stable@kernel.org [2.6.29] Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index ccfc21aa0b61..132938b073dc 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1366,7 +1366,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, for (i = 0; i < elems.ext_supp_rates_len; i++) { int rate = (elems.ext_supp_rates[i] & 0x7f) * 5; - bool is_basic = !!(elems.supp_rates[i] & 0x80); + bool is_basic = !!(elems.ext_supp_rates[i] & 0x80); if (rate > 110) have_higher_than_11mbit = true; -- cgit v1.2.3 From 7d42081a271bd8a82f2100524085c4f029e47717 Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Mon, 20 Apr 2009 15:36:19 -0700 Subject: mac80211: do not print WARN if config interface It is expected that config interface will always succeed as mac80211 will only request what driver supports. The exception here is when a device has rfkill enabled. At this time the rfkill state is unknown to mac80211 and config interface can fail. When this happens we deal with this error instead of printing a WARN. Signed-off-by: Reinette Chatre Signed-off-by: John W. Linville --- net/mac80211/pm.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 027302326498..81985d27cbda 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -156,8 +156,19 @@ int __ieee80211_resume(struct ieee80211_hw *hw) case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_AP: case NL80211_IFTYPE_MESH_POINT: - WARN_ON(ieee80211_if_config(sdata, changed)); - ieee80211_bss_info_change_notify(sdata, ~0); + /* + * Driver's config_interface can fail if rfkill is + * enabled. Accommodate this return code. + * FIXME: When mac80211 has knowledge of rfkill + * state the code below can change back to: + * WARN(ieee80211_if_config(sdata, changed)); + * ieee80211_bss_info_change_notify(sdata, ~0); + */ + if (ieee80211_if_config(sdata, changed)) + printk(KERN_DEBUG "%s: failed to configure interface during resume\n", + sdata->dev->name); + else + ieee80211_bss_info_change_notify(sdata, ~0); break; case NL80211_IFTYPE_WDS: break; -- cgit v1.2.3 From d1bcb9f1273adee6d2ce5edf84f19409a5cc31b9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 21 Apr 2009 01:36:59 +0200 Subject: mac80211: fix alignment calculation bug When checking whether or not a given frame needs to be moved to be properly aligned to a 4-byte boundary, we use & 4 which wasn't intended, this code should check the lowest two bits. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 5fa7aedd90ed..9776f73c51ad 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1397,7 +1397,7 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) * mac80211. That also explains the __skb_push() * below. */ - align = (unsigned long)skb->data & 4; + align = (unsigned long)skb->data & 3; if (align) { if (WARN_ON(skb_headroom(skb) < 3)) { dev_kfree_skb(skb); -- cgit v1.2.3 From bbe188c8f16effd902d1ad391e06e41ce649b22e Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Tue, 21 Apr 2009 06:04:20 +0000 Subject: af_iucv: consider state IUCV_CLOSING when closing a socket Make sure a second invocation of iucv_sock_close() guarantees proper freeing of an iucv path. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/iucv/af_iucv.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 49e786535dc8..2941ee50393b 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -172,6 +172,7 @@ static void iucv_sock_close(struct sock *sk) err = iucv_sock_wait_state(sk, IUCV_CLOSED, 0, timeo); } + case IUCV_CLOSING: /* fall through */ sk->sk_state = IUCV_CLOSED; sk->sk_state_change(sk); @@ -224,6 +225,8 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio) spin_lock_init(&iucv_sk(sk)->message_q.lock); skb_queue_head_init(&iucv_sk(sk)->backlog_skb_q); iucv_sk(sk)->send_tag = 0; + iucv_sk(sk)->path = NULL; + memset(&iucv_sk(sk)->src_user_id , 0, 32); sk->sk_destruct = iucv_sock_destruct; sk->sk_sndtimeo = IUCV_CONN_TIMEOUT; -- cgit v1.2.3 From 60d3705fcbfe7deca8e94bc7ddecd6f9f1a4647e Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Tue, 21 Apr 2009 06:04:21 +0000 Subject: af_iucv: fix oops in iucv_sock_recvmsg() for MSG_PEEK flag If iucv_sock_recvmsg() is called with MSG_PEEK flag set, the skb is enqueued twice. If the socket is then closed, the pointer to the skb is freed twice. Remove the skb_queue_head() call for MSG_PEEK, because the skb_recv_datagram() function already handles MSG_PEEK (does not dequeue the skb). Signed-off-by: Hendrik Brueckner Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/iucv/af_iucv.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 2941ee50393b..42b3be302c57 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -814,6 +814,8 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); + /* receive/dequeue next skb: + * the function understands MSG_PEEK and, thus, does not dequeue skb */ skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) { if (sk->sk_shutdown & RCV_SHUTDOWN) @@ -861,9 +863,7 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, iucv_process_message_q(sk); spin_unlock_bh(&iucv->message_q.lock); } - - } else - skb_queue_head(&sk->sk_receive_queue, skb); + } done: return err ? : copied; -- cgit v1.2.3 From fe86e54ef9465c97a16337d2a41a4cf486b937ae Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Tue, 21 Apr 2009 06:04:22 +0000 Subject: af_iucv: Reject incoming msgs if RECV_SHUTDOWN is set Reject incoming iucv messages if the receive direction has been shut down. It avoids that the queue of outstanding messages increases and exceeds the message limit of the iucv communication path. Signed-off-by: Hendrik Brueckner Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/iucv/af_iucv.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 42b3be302c57..ee517108856e 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1116,8 +1116,10 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg) struct sock_msg_q *save_msg; int len; - if (sk->sk_shutdown & RCV_SHUTDOWN) + if (sk->sk_shutdown & RCV_SHUTDOWN) { + iucv_message_reject(path, msg); return; + } if (!list_empty(&iucv->message_q.list) || !skb_queue_empty(&iucv->backlog_skb_q)) -- cgit v1.2.3 From e14ad5fa8705fb354e72312479abbe420ebc3f8e Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Tue, 21 Apr 2009 06:04:23 +0000 Subject: af_iucv: Test additional sk states in iucv_sock_shutdown Add few more sk states in iucv_sock_shutdown(). Signed-off-by: Hendrik Brueckner Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/iucv/af_iucv.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index ee517108856e..9cfdaaddc5b2 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -937,6 +937,9 @@ static int iucv_sock_shutdown(struct socket *sock, int how) lock_sock(sk); switch (sk->sk_state) { + case IUCV_DISCONN: + case IUCV_CLOSING: + case IUCV_SEVERED: case IUCV_CLOSED: err = -ENOTCONN; goto fail; -- cgit v1.2.3 From 3fa6b5adbe46b3d665267dee0f879858ab464f44 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Tue, 21 Apr 2009 06:04:24 +0000 Subject: af_iucv: Fix race when queuing incoming iucv messages AF_IUCV runs into a race when queuing incoming iucv messages and receiving the resulting backlog. If the Linux system is under pressure (high load or steal time), the message queue grows up, but messages are not received and queued onto the backlog queue. In that case, applications do not receive any data with recvmsg() even if AF_IUCV puts incoming messages onto the message queue. The race can be avoided if the message queue spinlock in the message_pending callback is spreaded across the entire callback function. Signed-off-by: Hendrik Brueckner Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/iucv/af_iucv.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 9cfdaaddc5b2..b51c9187c347 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1124,6 +1124,8 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg) return; } + spin_lock(&iucv->message_q.lock); + if (!list_empty(&iucv->message_q.list) || !skb_queue_empty(&iucv->backlog_skb_q)) goto save_message; @@ -1137,9 +1139,8 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg) if (!skb) goto save_message; - spin_lock(&iucv->message_q.lock); iucv_process_message(sk, skb, path, msg); - spin_unlock(&iucv->message_q.lock); + goto out_unlock; return; @@ -1150,8 +1151,9 @@ save_message: save_msg->path = path; save_msg->msg = *msg; - spin_lock(&iucv->message_q.lock); list_add_tail(&save_msg->list, &iucv->message_q.list); + +out_unlock: spin_unlock(&iucv->message_q.lock); } -- cgit v1.2.3 From 50b2ff1bc47baacb8e9882b2b2a74b240ddbeecf Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 21 Apr 2009 10:04:22 +0000 Subject: netlabel: Always remove the correct address selector The NetLabel address selector mechanism has a problem where it can get mistakenly remove the wrong selector when similar addresses are used. The problem is caused when multiple addresses are configured that have different netmasks but the same address, e.g. 127.0.0.0/8 and 127.0.0.0/24. This patch fixes the problem. Reported-by: Etienne Basset Signed-off-by: Paul Moore Acked-by: James Morris Tested-by: Etienne Basset Signed-off-by: David S. Miller --- net/netlabel/netlabel_addrlist.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/netlabel/netlabel_addrlist.c b/net/netlabel/netlabel_addrlist.c index 834c6eb7f484..c0519139679e 100644 --- a/net/netlabel/netlabel_addrlist.c +++ b/net/netlabel/netlabel_addrlist.c @@ -256,13 +256,11 @@ struct netlbl_af4list *netlbl_af4list_remove(__be32 addr, __be32 mask, { struct netlbl_af4list *entry; - entry = netlbl_af4list_search(addr, head); - if (entry != NULL && entry->addr == addr && entry->mask == mask) { - netlbl_af4list_remove_entry(entry); - return entry; - } - - return NULL; + entry = netlbl_af4list_search_exact(addr, mask, head); + if (entry == NULL) + return NULL; + netlbl_af4list_remove_entry(entry); + return entry; } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) @@ -299,15 +297,11 @@ struct netlbl_af6list *netlbl_af6list_remove(const struct in6_addr *addr, { struct netlbl_af6list *entry; - entry = netlbl_af6list_search(addr, head); - if (entry != NULL && - ipv6_addr_equal(&entry->addr, addr) && - ipv6_addr_equal(&entry->mask, mask)) { - netlbl_af6list_remove_entry(entry); - return entry; - } - - return NULL; + entry = netlbl_af6list_search_exact(addr, mask, head); + if (entry == NULL) + return NULL; + netlbl_af6list_remove_entry(entry); + return entry; } #endif /* IPv6 */ -- cgit v1.2.3 From cc29c70dd581f85ee7a3e7980fb031f90b90a2ab Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 22 Apr 2009 00:49:51 -0700 Subject: net/netrom: Fix socket locking Patch "af_rose/x25: Sanity check the maximum user frame size" (commit 83e0bbcbe2145f160fbaa109b0439dae7f4a38a9) from Alan Cox got locking wrong. If we bail out due to user frame size being too large, we must unlock the socket beforehand. Signed-off-by: Jean Delvare Signed-off-by: David S. Miller --- net/netrom/af_netrom.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 4e705f87969f..3be0e016ab7d 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1084,8 +1084,10 @@ static int nr_sendmsg(struct kiocb *iocb, struct socket *sock, /* Build a packet - the conventional user limit is 236 bytes. We can do ludicrously large NetROM frames but must not overflow */ - if (len > 65536) - return -EMSGSIZE; + if (len > 65536) { + err = -EMSGSIZE; + goto out; + } SOCK_DEBUG(sk, "NET/ROM: sendto: building packet.\n"); size = len + NR_NETWORK_LEN + NR_TRANSPORT_LEN; -- cgit v1.2.3 From 29fe1b481283a1bada994a69f65736db4ae6f35f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 22 Apr 2009 02:26:37 -0700 Subject: netfilter: ctnetlink: fix gcc warning during compilation This patch fixes a (bogus?) gcc warning during compilation: net/netfilter/nf_conntrack_netlink.c:1234: warning: 'helpname' may be used uninitialized in this function net/netfilter/nf_conntrack_netlink.c:991: warning: 'helpname' may be used uninitialized in this function In fact, helpname is initialized by ctnetlink_parse_help() so I cannot see a way to use it without being initialized. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_netlink.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 0ea36e0c8a0e..f13fc57e1ecb 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -988,7 +988,7 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nlattr *cda[]) { struct nf_conntrack_helper *helper; struct nf_conn_help *help = nfct_help(ct); - char *helpname; + char *helpname = NULL; int err; /* don't change helper of sibling connections */ @@ -1231,7 +1231,7 @@ ctnetlink_create_conntrack(struct nlattr *cda[], rcu_read_lock(); if (cda[CTA_HELP]) { - char *helpname; + char *helpname = NULL; err = ctnetlink_parse_help(cda[CTA_HELP], &helpname); if (err < 0) -- cgit v1.2.3 From 5ff482940f5aa2cdc3424c4a8ea94b9833b2af5f Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 24 Apr 2009 15:37:44 +0200 Subject: netfilter: nf_ct_dccp/udplite: fix protocol registration error Commit d0dba725 (netfilter: ctnetlink: add callbacks to the per-proto nlattrs) changed the protocol registration function to abort if the to-be registered protocol doesn't provide a new callback function. The DCCP and UDP-Lite IPv6 protocols were missed in this conversion, add the required callback pointer. Reported-and-tested-by: Steven Jan Springl Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_proto_dccp.c | 1 + net/netfilter/nf_conntrack_proto_udplite.c | 1 + 2 files changed, 2 insertions(+) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 50dac8dbe7d8..5411d63f31a5 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -777,6 +777,7 @@ static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = { .print_conntrack = dccp_print_conntrack, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) .to_nlattr = dccp_to_nlattr, + .nlattr_size = dccp_nlattr_size, .from_nlattr = nlattr_to_dccp, .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 4614696c1b88..0badedc542d3 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -204,6 +204,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly = .error = udplite_error, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, + .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, .nla_policy = nf_ct_port_nla_policy, #endif -- cgit v1.2.3 From 4b0706624930dc75c3b0d0df463d89759ef7de29 Mon Sep 17 00:00:00 2001 From: Laszlo Attila Toth Date: Fri, 24 Apr 2009 16:55:25 +0200 Subject: netfilter: Kconfig: TProxy doesn't depend on NF_CONNTRACK Signed-off-by: Laszlo Attila Toth Signed-off-by: Patrick McHardy --- net/netfilter/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 2329c5f50551..881203c4a142 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -275,6 +275,8 @@ config NF_CT_NETLINK help This option enables support for a netlink-based userspace interface +endif # NF_CONNTRACK + # transparent proxy support config NETFILTER_TPROXY tristate "Transparent proxying support (EXPERIMENTAL)" @@ -290,8 +292,6 @@ config NETFILTER_TPROXY To compile it as a module, choose M here. If unsure, say N. -endif # NF_CONNTRACK - config NETFILTER_XTABLES tristate "Netfilter Xtables support (required for ip_tables)" default m if NETFILTER_ADVANCED=n -- cgit v1.2.3 From 71951b64a5a87c09eb6fde59ce51aaab2fdaeab2 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 24 Apr 2009 16:58:41 +0200 Subject: netfilter: nf_ct_dccp: add missing role attributes for DCCP This patch adds missing role attribute to the DCCP type, otherwise the creation of entries is not of any use. The attribute added is CTA_PROTOINFO_DCCP_ROLE which contains the role of the conntrack original tuple. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- include/linux/netfilter/nfnetlink_conntrack.h | 1 + net/netfilter/nf_conntrack_proto_dccp.c | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index 29fe9ea1d346..1a865e48b8eb 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -100,6 +100,7 @@ enum ctattr_protoinfo_tcp { enum ctattr_protoinfo_dccp { CTA_PROTOINFO_DCCP_UNSPEC, CTA_PROTOINFO_DCCP_STATE, + CTA_PROTOINFO_DCCP_ROLE, __CTA_PROTOINFO_DCCP_MAX, }; #define CTA_PROTOINFO_DCCP_MAX (__CTA_PROTOINFO_DCCP_MAX - 1) diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 5411d63f31a5..8e757dd53396 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -633,6 +633,8 @@ static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, if (!nest_parms) goto nla_put_failure; NLA_PUT_U8(skb, CTA_PROTOINFO_DCCP_STATE, ct->proto.dccp.state); + NLA_PUT_U8(skb, CTA_PROTOINFO_DCCP_ROLE, + ct->proto.dccp.role[IP_CT_DIR_ORIGINAL]); nla_nest_end(skb, nest_parms); read_unlock_bh(&dccp_lock); return 0; @@ -644,6 +646,7 @@ nla_put_failure: static const struct nla_policy dccp_nla_policy[CTA_PROTOINFO_DCCP_MAX + 1] = { [CTA_PROTOINFO_DCCP_STATE] = { .type = NLA_U8 }, + [CTA_PROTOINFO_DCCP_ROLE] = { .type = NLA_U8 }, }; static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) @@ -661,11 +664,21 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) return err; if (!tb[CTA_PROTOINFO_DCCP_STATE] || - nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]) >= CT_DCCP_IGNORE) + !tb[CTA_PROTOINFO_DCCP_ROLE] || + nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) > CT_DCCP_ROLE_MAX || + nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]) >= CT_DCCP_IGNORE) { return -EINVAL; + } write_lock_bh(&dccp_lock); ct->proto.dccp.state = nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]); + if (nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) == CT_DCCP_ROLE_CLIENT) { + ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT; + ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER; + } else { + ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_SERVER; + ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_CLIENT; + } write_unlock_bh(&dccp_lock); return 0; } -- cgit v1.2.3 From 37e55cf0ceb8803256bf69a3e45bd668bf90b76f Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Fri, 24 Apr 2009 17:05:21 +0200 Subject: netfilter: xt_recent: fix stack overread in compat code Related-to: commit 325fb5b4d26038cba665dd0d8ee09555321061f0 The compat path suffers from a similar problem. It only uses a __be32 when all of the recent code uses, and expects, an nf_inet_addr everywhere. As a result, addresses stored by xt_recents were filled with whatever other stuff was on the stack following the be32. Signed-off-by: Jan Engelhardt With a minor compile fix from Roman. Reported-and-tested-by: Roman Hoog Antink Signed-off-by: Patrick McHardy --- net/netfilter/xt_recent.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 791e030ea903..eb0ceb846527 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -474,7 +474,7 @@ static ssize_t recent_old_proc_write(struct file *file, struct recent_table *t = pde->data; struct recent_entry *e; char buf[sizeof("+255.255.255.255")], *c = buf; - __be32 addr; + union nf_inet_addr addr = {}; int add; if (size > sizeof(buf)) @@ -506,14 +506,13 @@ static ssize_t recent_old_proc_write(struct file *file, add = 1; break; } - addr = in_aton(c); + addr.ip = in_aton(c); spin_lock_bh(&recent_lock); - e = recent_entry_lookup(t, (const void *)&addr, NFPROTO_IPV4, 0); + e = recent_entry_lookup(t, &addr, NFPROTO_IPV4, 0); if (e == NULL) { if (add) - recent_entry_init(t, (const void *)&addr, - NFPROTO_IPV4, 0); + recent_entry_init(t, &addr, NFPROTO_IPV4, 0); } else { if (add) recent_entry_update(t, e); -- cgit v1.2.3 From adc667e84f086aa110d810f3476c494e48eaabaa Mon Sep 17 00:00:00 2001 From: Jay Vosburgh Date: Sat, 25 Apr 2009 18:03:35 -0700 Subject: vlan: update vlan carrier state for admin up/down Currently, the VLAN event handler does not adjust the VLAN device's carrier state when the real device or the VLAN device is set administratively up or down. The following patch adds a transfer of operating state from the real device to the VLAN device when the real device is administratively set up or down, and sets the carrier state up or down during init, open and close of the VLAN device. This permits observers above the VLAN device that care about the carrier state (bonding's link monitor, for example) to receive updates for administrative changes by more closely mimicing the behavior of real devices. Signed-off-by: Jay Vosburgh Signed-off-by: Patrick McHardy --- net/8021q/vlan.c | 2 ++ net/8021q/vlan_dev.c | 5 +++++ 2 files changed, 7 insertions(+) (limited to 'net') diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 2b7390e377b3..d1e10546eb85 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -492,6 +492,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, continue; dev_change_flags(vlandev, flgs & ~IFF_UP); + vlan_transfer_operstate(dev, vlandev); } break; @@ -507,6 +508,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, continue; dev_change_flags(vlandev, flgs | IFF_UP); + vlan_transfer_operstate(dev, vlandev); } break; diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 6b0921364014..b4b9068e55a7 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -462,6 +462,7 @@ static int vlan_dev_open(struct net_device *dev) if (vlan->flags & VLAN_FLAG_GVRP) vlan_gvrp_request_join(dev); + netif_carrier_on(dev); return 0; clear_allmulti: @@ -471,6 +472,7 @@ del_unicast: if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) dev_unicast_delete(real_dev, dev->dev_addr, ETH_ALEN); out: + netif_carrier_off(dev); return err; } @@ -492,6 +494,7 @@ static int vlan_dev_stop(struct net_device *dev) if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) dev_unicast_delete(real_dev, dev->dev_addr, dev->addr_len); + netif_carrier_off(dev); return 0; } @@ -612,6 +615,8 @@ static int vlan_dev_init(struct net_device *dev) struct net_device *real_dev = vlan_dev_info(dev)->real_dev; int subclass = 0; + netif_carrier_off(dev); + /* IFF_BROADCAST|IFF_MULTICAST; ??? */ dev->flags = real_dev->flags & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI); dev->iflink = real_dev->ifindex; -- cgit v1.2.3 From 6a783c9067e3f71aac61a9262fe42c1f68efd4fc Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 27 Apr 2009 02:58:59 -0700 Subject: xfrm: wrong hash value for temporary SA When kernel inserts a temporary SA for IKE, it uses the wrong hash value for dst list. Two hash values were calcultated before: one with source address and one with a wildcard source address. Bug hinted by Junwei Zhang Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 82271720d970..5f1f86565f16 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -794,7 +794,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, { static xfrm_address_t saddr_wildcard = { }; struct net *net = xp_net(pol); - unsigned int h; + unsigned int h, h_wildcard; struct hlist_node *entry; struct xfrm_state *x, *x0, *to_put; int acquire_in_progress = 0; @@ -819,8 +819,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, if (best) goto found; - h = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, family); - hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { + h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, family); + hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h_wildcard, bydst) { if (x->props.family == family && x->props.reqid == tmpl->reqid && !(x->props.flags & XFRM_STATE_WILDRECV) && -- cgit v1.2.3 From c9503e0fe052020e0294cd07d0ecd982eb7c9177 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 27 Apr 2009 05:42:24 -0700 Subject: ipv4: Limit size of route cache hash table Right now we have no upper limit on the size of the route cache hash table. On a 128GB POWER6 box it ends up as 32MB: IP route cache hash table entries: 4194304 (order: 9, 33554432 bytes) It would be nice to cap this for memory consumption reasons, but a massive hashtable also causes a significant spike when measuring OS jitter. With a 32MB hashtable and 4 million entries, rt_worker_func is taking 5 ms to complete. On another system with more memory it's taking 14 ms. Even though rt_worker_func does call cond_sched() to limit its impact, in an HPC environment we want to keep all sources of OS jitter to a minimum. With the patch applied we limit the number of entries to 512k which can still be overriden by using the rt_entries boot option: IP route cache hash table entries: 524288 (order: 6, 4194304 bytes) With this patch rt_worker_func now takes 0.460 ms on the same system. Signed-off-by: Anton Blanchard Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index c40debe51b38..c4c60e9f068a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -3397,7 +3397,7 @@ int __init ip_rt_init(void) 0, &rt_hash_log, &rt_hash_mask, - 0); + rhash_entries ? 0 : 512 * 1024); memset(rt_hash_table, 0, (rt_hash_mask + 1) * sizeof(struct rt_hash_bucket)); rt_hash_lock_init(); -- cgit v1.2.3 From bf368e4e70cd4e0f880923c44e95a4273d725ab4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Apr 2009 02:24:21 -0700 Subject: net: Avoid extra wakeups of threads blocked in wait_for_packet() In 2.6.25 we added UDP mem accounting. This unfortunatly added a penalty when a frame is transmitted, since we have at TX completion time to call sock_wfree() to perform necessary memory accounting. This calls sock_def_write_space() and utimately scheduler if any thread is waiting on the socket. Thread(s) waiting for an incoming frame was scheduled, then had to sleep again as event was meaningless. (All threads waiting on a socket are using same sk_sleep anchor) This adds lot of extra wakeups and increases latencies, as noted by Christoph Lameter, and slows down softirq handler. Reference : http://marc.info/?l=linux-netdev&m=124060437012283&w=2 Fortunatly, Davide Libenzi recently added concept of keyed wakeups into kernel, and particularly for sockets (see commit 37e5540b3c9d838eb20f2ca8ea2eb8072271e403 epoll keyed wakeups: make sockets use keyed wakeups) Davide goal was to optimize epoll, but this new wakeup infrastructure can help non epoll users as well, if they care to setup an appropriate handler. This patch introduces new DEFINE_WAIT_FUNC() helper and uses it in wait_for_packet(), so that only relevant event can wakeup a thread blocked in this function. Trace of function calls from bnx2 TX completion bnx2_poll_work() is : __kfree_skb() skb_release_head_state() sock_wfree() sock_def_write_space() __wake_up_sync_key() __wake_up_common() receiver_wake_function() : Stops here since thread is waiting for an INPUT Reported-by: Christoph Lameter Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/wait.h | 6 ++++-- net/core/datagram.c | 14 +++++++++++++- 2 files changed, 17 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/include/linux/wait.h b/include/linux/wait.h index 5d631c17eaee..bc024632f365 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -440,13 +440,15 @@ void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key); int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key); -#define DEFINE_WAIT(name) \ +#define DEFINE_WAIT_FUNC(name, function) \ wait_queue_t name = { \ .private = current, \ - .func = autoremove_wake_function, \ + .func = function, \ .task_list = LIST_HEAD_INIT((name).task_list), \ } +#define DEFINE_WAIT(name) DEFINE_WAIT_FUNC(name, autoremove_wake_function) + #define DEFINE_WAIT_BIT(name, word, bit) \ struct wait_bit_queue name = { \ .key = __WAIT_BIT_KEY_INITIALIZER(word, bit), \ diff --git a/net/core/datagram.c b/net/core/datagram.c index d0de644b378d..b01a76abe1d2 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -64,13 +64,25 @@ static inline int connection_based(struct sock *sk) return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM; } +static int receiver_wake_function(wait_queue_t *wait, unsigned mode, int sync, + void *key) +{ + unsigned long bits = (unsigned long)key; + + /* + * Avoid a wakeup if event not interesting for us + */ + if (bits && !(bits & (POLLIN | POLLERR))) + return 0; + return autoremove_wake_function(wait, mode, sync, key); +} /* * Wait for a packet.. */ static int wait_for_packet(struct sock *sk, int *err, long *timeo_p) { int error; - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, receiver_wake_function); prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); -- cgit v1.2.3 From f3784d834c71689336fa272df420b45345cb6b84 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Thu, 23 Apr 2009 14:50:54 +0300 Subject: Bluetooth: Ensure that HCI sysfs add/del is preempt safe Use a different work_struct variables for add_conn() and del_conn() and use single work queue instead of two for adding and deleting connections. It eliminates the following error on a preemptible kernel: [ 204.358032] Unable to handle kernel NULL pointer dereference at virtual address 0000000c [ 204.370697] pgd = c0004000 [ 204.373443] [0000000c] *pgd=00000000 [ 204.378601] Internal error: Oops: 17 [#1] PREEMPT [ 204.383361] Modules linked in: vfat fat rfcomm sco l2cap sd_mod scsi_mod iphb pvr2d drm omaplfb ps [ 204.438537] CPU: 0 Not tainted (2.6.28-maemo2 #1) [ 204.443664] PC is at klist_put+0x2c/0xb4 [ 204.447601] LR is at klist_put+0x18/0xb4 [ 204.451568] pc : [] lr : [] psr: a0000113 [ 204.451568] sp : cf1b3f10 ip : cf1b3f10 fp : cf1b3f2c [ 204.463104] r10: 00000000 r9 : 00000000 r8 : bf08029c [ 204.468353] r7 : c7869200 r6 : cfbe2690 r5 : c78692c8 r4 : 00000001 [ 204.474945] r3 : 00000001 r2 : cf1b2000 r1 : 00000001 r0 : 00000000 [ 204.481506] Flags: NzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment kernel [ 204.488861] Control: 10c5387d Table: 887fc018 DAC: 00000017 [ 204.494628] Process btdelconn (pid: 515, stack limit = 0xcf1b22e0) Signed-off-by: Roger Quadros Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 3 ++- net/bluetooth/hci_sysfs.c | 37 ++++++++++++++++--------------------- 2 files changed, 18 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 01f9316b4c23..1224bba24bdd 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -180,7 +180,8 @@ struct hci_conn { struct timer_list disc_timer; struct timer_list idle_timer; - struct work_struct work; + struct work_struct work_add; + struct work_struct work_del; struct device dev; diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index ed82796d4a0f..b7c51082ddeb 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -9,8 +9,7 @@ struct class *bt_class = NULL; EXPORT_SYMBOL_GPL(bt_class); -static struct workqueue_struct *btaddconn; -static struct workqueue_struct *btdelconn; +static struct workqueue_struct *bluetooth; static inline char *link_typetostr(int type) { @@ -88,9 +87,10 @@ static struct device_type bt_link = { static void add_conn(struct work_struct *work) { - struct hci_conn *conn = container_of(work, struct hci_conn, work); + struct hci_conn *conn = container_of(work, struct hci_conn, work_add); - flush_workqueue(btdelconn); + /* ensure previous add/del is complete */ + flush_workqueue(bluetooth); if (device_add(&conn->dev) < 0) { BT_ERR("Failed to register connection device"); @@ -114,9 +114,9 @@ void hci_conn_add_sysfs(struct hci_conn *conn) device_initialize(&conn->dev); - INIT_WORK(&conn->work, add_conn); + INIT_WORK(&conn->work_add, add_conn); - queue_work(btaddconn, &conn->work); + queue_work(bluetooth, &conn->work_add); } /* @@ -131,9 +131,12 @@ static int __match_tty(struct device *dev, void *data) static void del_conn(struct work_struct *work) { - struct hci_conn *conn = container_of(work, struct hci_conn, work); + struct hci_conn *conn = container_of(work, struct hci_conn, work_del); struct hci_dev *hdev = conn->hdev; + /* ensure previous add/del is complete */ + flush_workqueue(bluetooth); + while (1) { struct device *dev; @@ -156,9 +159,9 @@ void hci_conn_del_sysfs(struct hci_conn *conn) if (!device_is_registered(&conn->dev)) return; - INIT_WORK(&conn->work, del_conn); + INIT_WORK(&conn->work_del, del_conn); - queue_work(btdelconn, &conn->work); + queue_work(bluetooth, &conn->work_del); } static inline char *host_typetostr(int type) @@ -435,20 +438,13 @@ void hci_unregister_sysfs(struct hci_dev *hdev) int __init bt_sysfs_init(void) { - btaddconn = create_singlethread_workqueue("btaddconn"); - if (!btaddconn) - return -ENOMEM; - - btdelconn = create_singlethread_workqueue("btdelconn"); - if (!btdelconn) { - destroy_workqueue(btaddconn); + bluetooth = create_singlethread_workqueue("bluetooth"); + if (!bluetooth) return -ENOMEM; - } bt_class = class_create(THIS_MODULE, "bluetooth"); if (IS_ERR(bt_class)) { - destroy_workqueue(btdelconn); - destroy_workqueue(btaddconn); + destroy_workqueue(bluetooth); return PTR_ERR(bt_class); } @@ -457,8 +453,7 @@ int __init bt_sysfs_init(void) void bt_sysfs_cleanup(void) { - destroy_workqueue(btaddconn); - destroy_workqueue(btdelconn); + destroy_workqueue(bluetooth); class_destroy(bt_class); } -- cgit v1.2.3 From 052b30b0a8eec8db5b18ad49effdf2a9ba4c1e1a Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sun, 26 Apr 2009 20:01:22 +0200 Subject: Bluetooth: Add different pairing timeout for Legacy Pairing The Bluetooth stack uses a reference counting for all established ACL links and if no user (L2CAP connection) is present, the link will be terminated to save power. The problem part is the dedicated pairing when using Legacy Pairing (Bluetooth 2.0 and before). At that point no user is present and pairing attempts will be disconnected within 10 seconds or less. In previous kernel version this was not a problem since the disconnect timeout wasn't triggered on incoming connections for the first time. However this caused issues with broken host stacks that kept the connections around after dedicated pairing. When the support for Simple Pairing got added, the link establishment procedure needed to be changed and now causes issues when using Legacy Pairing When using Simple Pairing it is possible to do a proper reference counting of ACL link users. With Legacy Pairing this is not possible since the specification is unclear in some areas and too many broken Bluetooth devices have already been deployed. So instead of trying to deal with all the broken devices, a special pairing timeout will be introduced that increases the timeout to 60 seconds when pairing is triggered. If a broken devices now puts the stack into an unforeseen state, the worst that happens is the disconnect timeout triggers after 120 seconds instead of 4 seconds. This allows successful pairings with legacy and broken devices now. Based on a report by Johan Hedberg Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 1 + include/net/bluetooth/hci_core.h | 5 +++-- net/bluetooth/hci_conn.c | 1 + net/bluetooth/hci_event.c | 36 +++++++++++++++++++++++++++++++++++- 4 files changed, 40 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index f69f015bbcc0..ed3aea1605e8 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -101,6 +101,7 @@ enum { /* HCI timeouts */ #define HCI_CONNECT_TIMEOUT (40000) /* 40 seconds */ #define HCI_DISCONN_TIMEOUT (2000) /* 2 seconds */ +#define HCI_PAIRING_TIMEOUT (60000) /* 60 seconds */ #define HCI_IDLE_TIMEOUT (6000) /* 6 seconds */ #define HCI_INIT_TIMEOUT (10000) /* 10 seconds */ diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 1224bba24bdd..be5bd713d2c9 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -171,6 +171,7 @@ struct hci_conn { __u8 auth_type; __u8 sec_level; __u8 power_save; + __u16 disc_timeout; unsigned long pend; unsigned int sent; @@ -349,9 +350,9 @@ static inline void hci_conn_put(struct hci_conn *conn) if (conn->type == ACL_LINK) { del_timer(&conn->idle_timer); if (conn->state == BT_CONNECTED) { - timeo = msecs_to_jiffies(HCI_DISCONN_TIMEOUT); + timeo = msecs_to_jiffies(conn->disc_timeout); if (!conn->out) - timeo *= 5; + timeo *= 2; } else timeo = msecs_to_jiffies(10); } else diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 1181db08d9de..75ebbe2221a3 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -215,6 +215,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst) conn->state = BT_OPEN; conn->power_save = 1; + conn->disc_timeout = HCI_DISCONN_TIMEOUT; switch (type) { case ACL_LINK: diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 15f40ea8d544..4e7cb88e5da9 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -883,6 +883,7 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s if (conn->type == ACL_LINK) { conn->state = BT_CONFIG; hci_conn_hold(conn); + conn->disc_timeout = HCI_DISCONN_TIMEOUT; } else conn->state = BT_CONNECTED; @@ -1063,9 +1064,14 @@ static inline void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *s hci_proto_connect_cfm(conn, ev->status); hci_conn_put(conn); } - } else + } else { hci_auth_cfm(conn, ev->status); + hci_conn_hold(conn); + conn->disc_timeout = HCI_DISCONN_TIMEOUT; + hci_conn_put(conn); + } + if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend)) { if (!ev->status) { struct hci_cp_set_conn_encrypt cp; @@ -1479,7 +1485,21 @@ static inline void hci_mode_change_evt(struct hci_dev *hdev, struct sk_buff *skb static inline void hci_pin_code_request_evt(struct hci_dev *hdev, struct sk_buff *skb) { + struct hci_ev_pin_code_req *ev = (void *) skb->data; + struct hci_conn *conn; + BT_DBG("%s", hdev->name); + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); + if (conn) { + hci_conn_hold(conn); + conn->disc_timeout = HCI_PAIRING_TIMEOUT; + hci_conn_put(conn); + } + + hci_dev_unlock(hdev); } static inline void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff *skb) @@ -1489,7 +1509,21 @@ static inline void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff static inline void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb) { + struct hci_ev_link_key_notify *ev = (void *) skb->data; + struct hci_conn *conn; + BT_DBG("%s", hdev->name); + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); + if (conn) { + hci_conn_hold(conn); + conn->disc_timeout = HCI_DISCONN_TIMEOUT; + hci_conn_put(conn); + } + + hci_dev_unlock(hdev); } static inline void hci_clock_offset_evt(struct hci_dev *hdev, struct sk_buff *skb) -- cgit v1.2.3 From 3fdca1e1370ffe89980927cdef0583bebcd8caaf Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 28 Apr 2009 09:04:55 -0700 Subject: Bluetooth: Fix connection establishment with low security requirement The Bluetooth 2.1 specification introduced four different security modes that can be mapped using Legacy Pairing and Simple Pairing. With the usage of Simple Pairing it is required that all connections (except the ones for SDP) are encrypted. So even the low security requirement mandates an encrypted connection when using Simple Pairing. When using Legacy Pairing (for Bluetooth 2.0 devices and older) this is not required since it causes interoperability issues. To support this properly the low security requirement translates into different host controller transactions depending if Simple Pairing is supported or not. However in case of Simple Pairing the command to switch on encryption after a successful authentication is not triggered for the low security mode. This patch fixes this and actually makes the logic to differentiate between Simple Pairing and Legacy Pairing a lot simpler. Based on a report by Ville Tervo Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 75ebbe2221a3..375f4b4f7f79 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -425,12 +425,9 @@ int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type) if (sec_level == BT_SECURITY_SDP) return 1; - if (sec_level == BT_SECURITY_LOW) { - if (conn->ssp_mode > 0 && conn->hdev->ssp_mode > 0) - return hci_conn_auth(conn, sec_level, auth_type); - else - return 1; - } + if (sec_level == BT_SECURITY_LOW && + (!conn->ssp_mode || !conn->hdev->ssp_mode)) + return 1; if (conn->link_mode & HCI_LM_ENCRYPT) return hci_conn_auth(conn, sec_level, auth_type); -- cgit v1.2.3 From 942e4a2bd680c606af0211e64eb216be2e19bf61 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 28 Apr 2009 22:36:33 -0700 Subject: netfilter: revised locking for x_tables The x_tables are organized with a table structure and a per-cpu copies of the counters and rules. On older kernels there was a reader/writer lock per table which was a performance bottleneck. In 2.6.30-rc, this was converted to use RCU and the counters/rules which solved the performance problems for do_table but made replacing rules much slower because of the necessary RCU grace period. This version uses a per-cpu set of spinlocks and counters to allow to table processing to proceed without the cache thrashing of a global reader lock and keeps the same performance for table updates. Signed-off-by: Stephen Hemminger Acked-by: Linus Torvalds Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 73 +++++++++++++++++++-- net/ipv4/netfilter/arp_tables.c | 125 +++++++++++------------------------- net/ipv4/netfilter/ip_tables.c | 126 +++++++++++-------------------------- net/ipv6/netfilter/ip6_tables.c | 123 +++++++++++------------------------- net/netfilter/x_tables.c | 53 ++++++++-------- 5 files changed, 204 insertions(+), 296 deletions(-) (limited to 'net') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 7b1a652066c0..1b2e43502ef7 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -354,9 +354,6 @@ struct xt_table /* What hooks you will enter on */ unsigned int valid_hooks; - /* Lock for the curtain */ - struct mutex lock; - /* Man behind the curtain... */ struct xt_table_info *private; @@ -434,8 +431,74 @@ extern void xt_proto_fini(struct net *net, u_int8_t af); extern struct xt_table_info *xt_alloc_table_info(unsigned int size); extern void xt_free_table_info(struct xt_table_info *info); -extern void xt_table_entry_swap_rcu(struct xt_table_info *old, - struct xt_table_info *new); + +/* + * Per-CPU spinlock associated with per-cpu table entries, and + * with a counter for the "reading" side that allows a recursive + * reader to avoid taking the lock and deadlocking. + * + * "reading" is used by ip/arp/ip6 tables rule processing which runs per-cpu. + * It needs to ensure that the rules are not being changed while the packet + * is being processed. In some cases, the read lock will be acquired + * twice on the same CPU; this is okay because of the count. + * + * "writing" is used when reading counters. + * During replace any readers that are using the old tables have to complete + * before freeing the old table. This is handled by the write locking + * necessary for reading the counters. + */ +struct xt_info_lock { + spinlock_t lock; + unsigned char readers; +}; +DECLARE_PER_CPU(struct xt_info_lock, xt_info_locks); + +/* + * Note: we need to ensure that preemption is disabled before acquiring + * the per-cpu-variable, so we do it as a two step process rather than + * using "spin_lock_bh()". + * + * We _also_ need to disable bottom half processing before updating our + * nesting count, to make sure that the only kind of re-entrancy is this + * code being called by itself: since the count+lock is not an atomic + * operation, we can allow no races. + * + * _Only_ that special combination of being per-cpu and never getting + * re-entered asynchronously means that the count is safe. + */ +static inline void xt_info_rdlock_bh(void) +{ + struct xt_info_lock *lock; + + local_bh_disable(); + lock = &__get_cpu_var(xt_info_locks); + if (!lock->readers++) + spin_lock(&lock->lock); +} + +static inline void xt_info_rdunlock_bh(void) +{ + struct xt_info_lock *lock = &__get_cpu_var(xt_info_locks); + + if (!--lock->readers) + spin_unlock(&lock->lock); + local_bh_enable(); +} + +/* + * The "writer" side needs to get exclusive access to the lock, + * regardless of readers. This must be called with bottom half + * processing (and thus also preemption) disabled. + */ +static inline void xt_info_wrlock(unsigned int cpu) +{ + spin_lock(&per_cpu(xt_info_locks, cpu).lock); +} + +static inline void xt_info_wrunlock(unsigned int cpu) +{ + spin_unlock(&per_cpu(xt_info_locks, cpu).lock); +} /* * This helper is performance critical and must be inlined diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 5ba533d234db..831fe1879dc0 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -253,9 +253,9 @@ unsigned int arpt_do_table(struct sk_buff *skb, indev = in ? in->name : nulldevname; outdev = out ? out->name : nulldevname; - rcu_read_lock_bh(); - private = rcu_dereference(table->private); - table_base = rcu_dereference(private->entries[smp_processor_id()]); + xt_info_rdlock_bh(); + private = table->private; + table_base = private->entries[smp_processor_id()]; e = get_entry(table_base, private->hook_entry[hook]); back = get_entry(table_base, private->underflow[hook]); @@ -273,6 +273,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) + (2 * skb->dev->addr_len); + ADD_COUNTER(e->counters, hdr_len, 1); t = arpt_get_target(e); @@ -328,8 +329,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, e = (void *)e + e->next_offset; } } while (!hotdrop); - - rcu_read_unlock_bh(); + xt_info_rdunlock_bh(); if (hotdrop) return NF_DROP; @@ -711,9 +711,12 @@ static void get_counters(const struct xt_table_info *t, /* Instead of clearing (by a previous call to memset()) * the counters and using adds, we set the counters * with data used by 'current' CPU - * We dont care about preemption here. + * + * Bottom half has to be disabled to prevent deadlock + * if new softirq were to run and call ipt_do_table */ - curcpu = raw_smp_processor_id(); + local_bh_disable(); + curcpu = smp_processor_id(); i = 0; ARPT_ENTRY_ITERATE(t->entries[curcpu], @@ -726,73 +729,22 @@ static void get_counters(const struct xt_table_info *t, if (cpu == curcpu) continue; i = 0; + xt_info_wrlock(cpu); ARPT_ENTRY_ITERATE(t->entries[cpu], t->size, add_entry_to_counter, counters, &i); + xt_info_wrunlock(cpu); } -} - - -/* We're lazy, and add to the first CPU; overflow works its fey magic - * and everything is OK. */ -static int -add_counter_to_entry(struct arpt_entry *e, - const struct xt_counters addme[], - unsigned int *i) -{ - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); - - (*i)++; - return 0; -} - -/* Take values from counters and add them back onto the current cpu */ -static void put_counters(struct xt_table_info *t, - const struct xt_counters counters[]) -{ - unsigned int i, cpu; - - local_bh_disable(); - cpu = smp_processor_id(); - i = 0; - ARPT_ENTRY_ITERATE(t->entries[cpu], - t->size, - add_counter_to_entry, - counters, - &i); local_bh_enable(); } -static inline int -zero_entry_counter(struct arpt_entry *e, void *arg) -{ - e->counters.bcnt = 0; - e->counters.pcnt = 0; - return 0; -} - -static void -clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info) -{ - unsigned int cpu; - const void *loc_cpu_entry = info->entries[raw_smp_processor_id()]; - - memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); - for_each_possible_cpu(cpu) { - memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size); - ARPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size, - zero_entry_counter, NULL); - } -} - static struct xt_counters *alloc_counters(struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; struct xt_table_info *private = table->private; - struct xt_table_info *info; /* We need atomic snapshot of counters: rest doesn't change * (other than comefrom, which userspace doesn't care @@ -802,30 +754,11 @@ static struct xt_counters *alloc_counters(struct xt_table *table) counters = vmalloc_node(countersize, numa_node_id()); if (counters == NULL) - goto nomem; - - info = xt_alloc_table_info(private->size); - if (!info) - goto free_counters; - - clone_counters(info, private); - - mutex_lock(&table->lock); - xt_table_entry_swap_rcu(private, info); - synchronize_net(); /* Wait until smoke has cleared */ + return ERR_PTR(-ENOMEM); - get_counters(info, counters); - put_counters(private, counters); - mutex_unlock(&table->lock); - - xt_free_table_info(info); + get_counters(private, counters); return counters; - - free_counters: - vfree(counters); - nomem: - return ERR_PTR(-ENOMEM); } static int copy_entries_to_user(unsigned int total_size, @@ -1094,8 +1027,9 @@ static int __do_replace(struct net *net, const char *name, (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); - /* Get the old counters. */ + /* Get the old counters, and synchronize with replace */ get_counters(oldinfo, counters); + /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, @@ -1165,10 +1099,23 @@ static int do_replace(struct net *net, void __user *user, unsigned int len) return ret; } +/* We're lazy, and add to the first CPU; overflow works its fey magic + * and everything is OK. */ +static int +add_counter_to_entry(struct arpt_entry *e, + const struct xt_counters addme[], + unsigned int *i) +{ + ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); + + (*i)++; + return 0; +} + static int do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) { - unsigned int i; + unsigned int i, curcpu; struct xt_counters_info tmp; struct xt_counters *paddc; unsigned int num_counters; @@ -1224,26 +1171,26 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, goto free; } - mutex_lock(&t->lock); + local_bh_disable(); private = t->private; if (private->number != num_counters) { ret = -EINVAL; goto unlock_up_free; } - preempt_disable(); i = 0; /* Choose the copy that is on our node */ - loc_cpu_entry = private->entries[smp_processor_id()]; + curcpu = smp_processor_id(); + loc_cpu_entry = private->entries[curcpu]; + xt_info_wrlock(curcpu); ARPT_ENTRY_ITERATE(loc_cpu_entry, private->size, add_counter_to_entry, paddc, &i); - preempt_enable(); + xt_info_wrunlock(curcpu); unlock_up_free: - mutex_unlock(&t->lock); - + local_bh_enable(); xt_table_unlock(t); module_put(t->me); free: diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 810c0b62c7d4..2ec8d7290c40 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -338,10 +338,9 @@ ipt_do_table(struct sk_buff *skb, tgpar.hooknum = hook; IP_NF_ASSERT(table->valid_hooks & (1 << hook)); - - rcu_read_lock_bh(); - private = rcu_dereference(table->private); - table_base = rcu_dereference(private->entries[smp_processor_id()]); + xt_info_rdlock_bh(); + private = table->private; + table_base = private->entries[smp_processor_id()]; e = get_entry(table_base, private->hook_entry[hook]); @@ -436,8 +435,7 @@ ipt_do_table(struct sk_buff *skb, e = (void *)e + e->next_offset; } } while (!hotdrop); - - rcu_read_unlock_bh(); + xt_info_rdunlock_bh(); #ifdef DEBUG_ALLOW_ALL return NF_ACCEPT; @@ -896,10 +894,13 @@ get_counters(const struct xt_table_info *t, /* Instead of clearing (by a previous call to memset()) * the counters and using adds, we set the counters - * with data used by 'current' CPU - * We dont care about preemption here. + * with data used by 'current' CPU. + * + * Bottom half has to be disabled to prevent deadlock + * if new softirq were to run and call ipt_do_table */ - curcpu = raw_smp_processor_id(); + local_bh_disable(); + curcpu = smp_processor_id(); i = 0; IPT_ENTRY_ITERATE(t->entries[curcpu], @@ -912,74 +913,22 @@ get_counters(const struct xt_table_info *t, if (cpu == curcpu) continue; i = 0; + xt_info_wrlock(cpu); IPT_ENTRY_ITERATE(t->entries[cpu], t->size, add_entry_to_counter, counters, &i); + xt_info_wrunlock(cpu); } - -} - -/* We're lazy, and add to the first CPU; overflow works its fey magic - * and everything is OK. */ -static int -add_counter_to_entry(struct ipt_entry *e, - const struct xt_counters addme[], - unsigned int *i) -{ - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); - - (*i)++; - return 0; -} - -/* Take values from counters and add them back onto the current cpu */ -static void put_counters(struct xt_table_info *t, - const struct xt_counters counters[]) -{ - unsigned int i, cpu; - - local_bh_disable(); - cpu = smp_processor_id(); - i = 0; - IPT_ENTRY_ITERATE(t->entries[cpu], - t->size, - add_counter_to_entry, - counters, - &i); local_bh_enable(); } - -static inline int -zero_entry_counter(struct ipt_entry *e, void *arg) -{ - e->counters.bcnt = 0; - e->counters.pcnt = 0; - return 0; -} - -static void -clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info) -{ - unsigned int cpu; - const void *loc_cpu_entry = info->entries[raw_smp_processor_id()]; - - memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); - for_each_possible_cpu(cpu) { - memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size); - IPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size, - zero_entry_counter, NULL); - } -} - static struct xt_counters * alloc_counters(struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; struct xt_table_info *private = table->private; - struct xt_table_info *info; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -988,30 +937,11 @@ static struct xt_counters * alloc_counters(struct xt_table *table) counters = vmalloc_node(countersize, numa_node_id()); if (counters == NULL) - goto nomem; + return ERR_PTR(-ENOMEM); - info = xt_alloc_table_info(private->size); - if (!info) - goto free_counters; - - clone_counters(info, private); - - mutex_lock(&table->lock); - xt_table_entry_swap_rcu(private, info); - synchronize_net(); /* Wait until smoke has cleared */ - - get_counters(info, counters); - put_counters(private, counters); - mutex_unlock(&table->lock); - - xt_free_table_info(info); + get_counters(private, counters); return counters; - - free_counters: - vfree(counters); - nomem: - return ERR_PTR(-ENOMEM); } static int @@ -1306,8 +1236,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); - /* Get the old counters. */ + /* Get the old counters, and synchronize with replace */ get_counters(oldinfo, counters); + /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, @@ -1377,11 +1308,23 @@ do_replace(struct net *net, void __user *user, unsigned int len) return ret; } +/* We're lazy, and add to the first CPU; overflow works its fey magic + * and everything is OK. */ +static int +add_counter_to_entry(struct ipt_entry *e, + const struct xt_counters addme[], + unsigned int *i) +{ + ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); + + (*i)++; + return 0; +} static int do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) { - unsigned int i; + unsigned int i, curcpu; struct xt_counters_info tmp; struct xt_counters *paddc; unsigned int num_counters; @@ -1437,25 +1380,26 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat goto free; } - mutex_lock(&t->lock); + local_bh_disable(); private = t->private; if (private->number != num_counters) { ret = -EINVAL; goto unlock_up_free; } - preempt_disable(); i = 0; /* Choose the copy that is on our node */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; + curcpu = smp_processor_id(); + loc_cpu_entry = private->entries[curcpu]; + xt_info_wrlock(curcpu); IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, add_counter_to_entry, paddc, &i); - preempt_enable(); + xt_info_wrunlock(curcpu); unlock_up_free: - mutex_unlock(&t->lock); + local_bh_enable(); xt_table_unlock(t); module_put(t->me); free: diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 800ae8542471..219e165aea10 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -365,9 +365,9 @@ ip6t_do_table(struct sk_buff *skb, IP_NF_ASSERT(table->valid_hooks & (1 << hook)); - rcu_read_lock_bh(); - private = rcu_dereference(table->private); - table_base = rcu_dereference(private->entries[smp_processor_id()]); + xt_info_rdlock_bh(); + private = table->private; + table_base = private->entries[smp_processor_id()]; e = get_entry(table_base, private->hook_entry[hook]); @@ -466,7 +466,7 @@ ip6t_do_table(struct sk_buff *skb, #ifdef CONFIG_NETFILTER_DEBUG ((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON; #endif - rcu_read_unlock_bh(); + xt_info_rdunlock_bh(); #ifdef DEBUG_ALLOW_ALL return NF_ACCEPT; @@ -926,9 +926,12 @@ get_counters(const struct xt_table_info *t, /* Instead of clearing (by a previous call to memset()) * the counters and using adds, we set the counters * with data used by 'current' CPU - * We dont care about preemption here. + * + * Bottom half has to be disabled to prevent deadlock + * if new softirq were to run and call ipt_do_table */ - curcpu = raw_smp_processor_id(); + local_bh_disable(); + curcpu = smp_processor_id(); i = 0; IP6T_ENTRY_ITERATE(t->entries[curcpu], @@ -941,72 +944,22 @@ get_counters(const struct xt_table_info *t, if (cpu == curcpu) continue; i = 0; + xt_info_wrlock(cpu); IP6T_ENTRY_ITERATE(t->entries[cpu], t->size, add_entry_to_counter, counters, &i); + xt_info_wrunlock(cpu); } -} - -/* We're lazy, and add to the first CPU; overflow works its fey magic - * and everything is OK. */ -static int -add_counter_to_entry(struct ip6t_entry *e, - const struct xt_counters addme[], - unsigned int *i) -{ - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); - - (*i)++; - return 0; -} - -/* Take values from counters and add them back onto the current cpu */ -static void put_counters(struct xt_table_info *t, - const struct xt_counters counters[]) -{ - unsigned int i, cpu; - - local_bh_disable(); - cpu = smp_processor_id(); - i = 0; - IP6T_ENTRY_ITERATE(t->entries[cpu], - t->size, - add_counter_to_entry, - counters, - &i); local_bh_enable(); } -static inline int -zero_entry_counter(struct ip6t_entry *e, void *arg) -{ - e->counters.bcnt = 0; - e->counters.pcnt = 0; - return 0; -} - -static void -clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info) -{ - unsigned int cpu; - const void *loc_cpu_entry = info->entries[raw_smp_processor_id()]; - - memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); - for_each_possible_cpu(cpu) { - memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size); - IP6T_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size, - zero_entry_counter, NULL); - } -} - static struct xt_counters *alloc_counters(struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; struct xt_table_info *private = table->private; - struct xt_table_info *info; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -1015,30 +968,11 @@ static struct xt_counters *alloc_counters(struct xt_table *table) counters = vmalloc_node(countersize, numa_node_id()); if (counters == NULL) - goto nomem; + return ERR_PTR(-ENOMEM); - info = xt_alloc_table_info(private->size); - if (!info) - goto free_counters; - - clone_counters(info, private); - - mutex_lock(&table->lock); - xt_table_entry_swap_rcu(private, info); - synchronize_net(); /* Wait until smoke has cleared */ - - get_counters(info, counters); - put_counters(private, counters); - mutex_unlock(&table->lock); - - xt_free_table_info(info); + get_counters(private, counters); return counters; - - free_counters: - vfree(counters); - nomem: - return ERR_PTR(-ENOMEM); } static int @@ -1334,8 +1268,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); - /* Get the old counters. */ + /* Get the old counters, and synchronize with replace */ get_counters(oldinfo, counters); + /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, @@ -1405,11 +1340,24 @@ do_replace(struct net *net, void __user *user, unsigned int len) return ret; } +/* We're lazy, and add to the first CPU; overflow works its fey magic + * and everything is OK. */ +static int +add_counter_to_entry(struct ip6t_entry *e, + const struct xt_counters addme[], + unsigned int *i) +{ + ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); + + (*i)++; + return 0; +} + static int do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) { - unsigned int i; + unsigned int i, curcpu; struct xt_counters_info tmp; struct xt_counters *paddc; unsigned int num_counters; @@ -1465,25 +1413,28 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, goto free; } - mutex_lock(&t->lock); + + local_bh_disable(); private = t->private; if (private->number != num_counters) { ret = -EINVAL; goto unlock_up_free; } - preempt_disable(); i = 0; /* Choose the copy that is on our node */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; + curcpu = smp_processor_id(); + xt_info_wrlock(curcpu); + loc_cpu_entry = private->entries[curcpu]; IP6T_ENTRY_ITERATE(loc_cpu_entry, private->size, add_counter_to_entry, paddc, &i); - preempt_enable(); + xt_info_wrunlock(curcpu); + unlock_up_free: - mutex_unlock(&t->lock); + local_bh_enable(); xt_table_unlock(t); module_put(t->me); free: diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 509a95621f9f..150e5cf62f85 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -625,20 +625,6 @@ void xt_free_table_info(struct xt_table_info *info) } EXPORT_SYMBOL(xt_free_table_info); -void xt_table_entry_swap_rcu(struct xt_table_info *oldinfo, - struct xt_table_info *newinfo) -{ - unsigned int cpu; - - for_each_possible_cpu(cpu) { - void *p = oldinfo->entries[cpu]; - rcu_assign_pointer(oldinfo->entries[cpu], newinfo->entries[cpu]); - newinfo->entries[cpu] = p; - } - -} -EXPORT_SYMBOL_GPL(xt_table_entry_swap_rcu); - /* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, const char *name) @@ -676,32 +662,43 @@ void xt_compat_unlock(u_int8_t af) EXPORT_SYMBOL_GPL(xt_compat_unlock); #endif +DEFINE_PER_CPU(struct xt_info_lock, xt_info_locks); +EXPORT_PER_CPU_SYMBOL_GPL(xt_info_locks); + + struct xt_table_info * xt_replace_table(struct xt_table *table, unsigned int num_counters, struct xt_table_info *newinfo, int *error) { - struct xt_table_info *oldinfo, *private; + struct xt_table_info *private; /* Do the substitution. */ - mutex_lock(&table->lock); + local_bh_disable(); private = table->private; + /* Check inside lock: is the old number correct? */ if (num_counters != private->number) { duprintf("num_counters != table->private->number (%u/%u)\n", num_counters, private->number); - mutex_unlock(&table->lock); + local_bh_enable(); *error = -EAGAIN; return NULL; } - oldinfo = private; - rcu_assign_pointer(table->private, newinfo); - newinfo->initial_entries = oldinfo->initial_entries; - mutex_unlock(&table->lock); - synchronize_net(); - return oldinfo; + table->private = newinfo; + newinfo->initial_entries = private->initial_entries; + + /* + * Even though table entries have now been swapped, other CPU's + * may still be using the old entries. This is okay, because + * resynchronization happens because of the locking done + * during the get_counters() routine. + */ + local_bh_enable(); + + return private; } EXPORT_SYMBOL_GPL(xt_replace_table); @@ -734,7 +731,6 @@ struct xt_table *xt_register_table(struct net *net, struct xt_table *table, /* Simplifies replace_table code. */ table->private = bootstrap; - mutex_init(&table->lock); if (!xt_replace_table(table, 0, newinfo, &ret)) goto unlock; @@ -1147,7 +1143,14 @@ static struct pernet_operations xt_net_ops = { static int __init xt_init(void) { - int i, rv; + unsigned int i; + int rv; + + for_each_possible_cpu(i) { + struct xt_info_lock *lock = &per_cpu(xt_info_locks, i); + spin_lock_init(&lock->lock); + lock->readers = 0; + } xt = kmalloc(sizeof(struct xt_af) * NFPROTO_NUMPROTO, GFP_KERNEL); if (!xt) -- cgit v1.2.3 From d4c4a9a1bce1912ed5681251f0037fd4f2364a3e Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Wed, 29 Apr 2009 11:41:24 +0100 Subject: mac80211: fix modprobe deadlock by not calling wep_init under rtnl_lock - ieee80211_wep_init(), which is called with rtnl_lock held, blocks in request_module() [waiting for modprobe to load a crypto module]. - modprobe blocks in a call to flush_workqueue(), when it closes a TTY [presumably when it exits]. - The workqueue item linkwatch_event() blocks on rtnl_lock. There's no reason for wep_init() to be called with rtnl_lock held, so just move it outside the critical section. Signed-off-by: Alan Jenkins Signed-off-by: John W. Linville --- net/mac80211/main.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index fbcbed6cad01..00968c2d22bb 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -909,6 +909,13 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) if (result < 0) goto fail_sta_info; + result = ieee80211_wep_init(local); + if (result < 0) { + printk(KERN_DEBUG "%s: Failed to initialize wep: %d\n", + wiphy_name(local->hw.wiphy), result); + goto fail_wep; + } + rtnl_lock(); result = dev_alloc_name(local->mdev, local->mdev->name); if (result < 0) @@ -930,14 +937,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) goto fail_rate; } - result = ieee80211_wep_init(local); - - if (result < 0) { - printk(KERN_DEBUG "%s: Failed to initialize wep: %d\n", - wiphy_name(local->hw.wiphy), result); - goto fail_wep; - } - /* add one default STA interface if supported */ if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_STATION)) { result = ieee80211_if_add(local, "wlan%d", NULL, @@ -967,13 +966,13 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) return 0; -fail_wep: - rate_control_deinitialize(local); fail_rate: unregister_netdevice(local->mdev); local->mdev = NULL; fail_dev: rtnl_unlock(); + ieee80211_wep_free(local); +fail_wep: sta_info_stop(local); fail_sta_info: debugfs_hw_del(local); -- cgit v1.2.3 From c428c89201a57a0ce24c37ed79e540d1f4101cf3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 29 Apr 2009 00:28:18 +0200 Subject: mac80211: default to automatic power control In "mac80211: correct wext transmit power handler" I fixed the wext handler, but forgot to make the default of the user_power_level -1 (aka "auto"), so that now the transmit power is always set to 0, causing associations to time out and similar problems since we're transmitting with very little power. Correct this by correcting the default user_power_level to -1. Signed-off-by: Johannes Berg Bisected-by: Niel Lambrechts Signed-off-by: John W. Linville --- net/mac80211/main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 00968c2d22bb..14134193cd17 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -757,6 +757,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, local->hw.conf.long_frame_max_tx_count = 4; local->hw.conf.short_frame_max_tx_count = 7; local->hw.conf.radio_enabled = true; + local->user_power_level = -1; INIT_LIST_HEAD(&local->interfaces); mutex_init(&local->iflist_mtx); -- cgit v1.2.3 From 7a67e56fd362d3edfde1f19170893508c3940d3a Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Thu, 30 Apr 2009 05:41:19 -0700 Subject: net: Fix oops when splicing skbs from a frag_list. Lennert Buytenhek wrote: > Since 4fb669948116d928ae44262ab7743732c574630d ("net: Optimize memory > usage when splicing from sockets.") I'm seeing this oops (e.g. in > 2.6.30-rc3) when splicing from a TCP socket to /dev/null on a driver > (mv643xx_eth) that uses LRO in the skb mode (lro_receive_skb) rather > than the frag mode: My patch incorrectly assumed skb->sk was always valid, but for "frag_listed" skbs we can only use skb->sk of their parent. Reported-by: Lennert Buytenhek Debugged-by: Lennert Buytenhek Tested-by: Lennert Buytenhek Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/core/skbuff.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ce6356cd9f71..f091a5a845c1 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1365,9 +1365,8 @@ static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i) static inline struct page *linear_to_page(struct page *page, unsigned int *len, unsigned int *offset, - struct sk_buff *skb) + struct sk_buff *skb, struct sock *sk) { - struct sock *sk = skb->sk; struct page *p = sk->sk_sndmsg_page; unsigned int off; @@ -1405,13 +1404,14 @@ new_page: */ static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page, unsigned int *len, unsigned int offset, - struct sk_buff *skb, int linear) + struct sk_buff *skb, int linear, + struct sock *sk) { if (unlikely(spd->nr_pages == PIPE_BUFFERS)) return 1; if (linear) { - page = linear_to_page(page, len, &offset, skb); + page = linear_to_page(page, len, &offset, skb, sk); if (!page) return 1; } else @@ -1442,7 +1442,8 @@ static inline void __segment_seek(struct page **page, unsigned int *poff, static inline int __splice_segment(struct page *page, unsigned int poff, unsigned int plen, unsigned int *off, unsigned int *len, struct sk_buff *skb, - struct splice_pipe_desc *spd, int linear) + struct splice_pipe_desc *spd, int linear, + struct sock *sk) { if (!*len) return 1; @@ -1465,7 +1466,7 @@ static inline int __splice_segment(struct page *page, unsigned int poff, /* the linear region may spread across several pages */ flen = min_t(unsigned int, flen, PAGE_SIZE - poff); - if (spd_fill_page(spd, page, &flen, poff, skb, linear)) + if (spd_fill_page(spd, page, &flen, poff, skb, linear, sk)) return 1; __segment_seek(&page, &poff, &plen, flen); @@ -1481,8 +1482,8 @@ static inline int __splice_segment(struct page *page, unsigned int poff, * pipe is full or if we already spliced the requested length. */ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, - unsigned int *len, - struct splice_pipe_desc *spd) + unsigned int *len, struct splice_pipe_desc *spd, + struct sock *sk) { int seg; @@ -1492,7 +1493,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, if (__splice_segment(virt_to_page(skb->data), (unsigned long) skb->data & (PAGE_SIZE - 1), skb_headlen(skb), - offset, len, skb, spd, 1)) + offset, len, skb, spd, 1, sk)) return 1; /* @@ -1502,7 +1503,7 @@ static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset, const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; if (__splice_segment(f->page, f->page_offset, f->size, - offset, len, skb, spd, 0)) + offset, len, skb, spd, 0, sk)) return 1; } @@ -1528,12 +1529,13 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, .ops = &sock_pipe_buf_ops, .spd_release = sock_spd_release, }; + struct sock *sk = skb->sk; /* * __skb_splice_bits() only fails if the output has no room left, * so no point in going over the frag_list for the error case. */ - if (__skb_splice_bits(skb, &offset, &tlen, &spd)) + if (__skb_splice_bits(skb, &offset, &tlen, &spd, sk)) goto done; else if (!tlen) goto done; @@ -1545,14 +1547,13 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, struct sk_buff *list = skb_shinfo(skb)->frag_list; for (; list && tlen; list = list->next) { - if (__skb_splice_bits(list, &offset, &tlen, &spd)) + if (__skb_splice_bits(list, &offset, &tlen, &spd, sk)) break; } } done: if (spd.nr_pages) { - struct sock *sk = skb->sk; int ret; /* -- cgit v1.2.3 From ec581f6a42bbbea5271c66da9769a41b46c74e10 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 1 May 2009 09:05:06 -0700 Subject: net: Fix skb_tx_hash() for forwarding workloads. When skb_rx_queue_recorded() is true, we dont want to use jash distribution as the device driver exactly told us which queue was selected at RX time. jhash makes a statistical shuffle, but this wont work with 8 static inputs. Later improvements would be to compute reciprocal value of real_num_tx_queues to avoid a divide here. But this computation should be done once, when real_num_tx_queues is set. This needs a separate patch, and a new field in struct net_device. Reported-by: Andrew Dickinson Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 308a7d0c277f..e2e9e4af3ace 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1735,11 +1735,12 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) { u32 hash; - if (skb_rx_queue_recorded(skb)) { - hash = skb_get_rx_queue(skb); - } else if (skb->sk && skb->sk->sk_hash) { + if (skb_rx_queue_recorded(skb)) + return skb_get_rx_queue(skb) % dev->real_num_tx_queues; + + if (skb->sk && skb->sk->sk_hash) hash = skb->sk->sk_hash; - } else + else hash = skb->protocol; hash = jhash_1word(hash, skb_tx_hashrnd); -- cgit v1.2.3 From acda074390270ca9e28f2a9729f7b835e2ad6da4 Mon Sep 17 00:00:00 2001 From: Laszlo Attila Toth Date: Fri, 1 May 2009 15:23:10 -0700 Subject: xt_socket: checks for the state of nf_conntrack xt_socket can use connection tracking, and checks whether it is a module. Signed-off-by: Laszlo Attila Toth Signed-off-by: David S. Miller --- net/netfilter/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 881203c4a142..cb3ad741ebf8 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -837,6 +837,7 @@ config NETFILTER_XT_MATCH_SOCKET depends on NETFILTER_TPROXY depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED + depends on !NF_CONNTRACK || NF_CONNTRACK select NF_DEFRAG_IPV4 help This option adds a `socket' match, which can be used to match -- cgit v1.2.3 From d0ab8ff81bf1b01bae7d6b92ca067badbbb02cc9 Mon Sep 17 00:00:00 2001 From: Robert Love Date: Sat, 2 May 2009 13:48:32 -0700 Subject: net: Only store high 16 bits of kernel generated filter priorities The kernel should only be using the high 16 bits of a kernel generated priority. Filter priorities in all other cases only use the upper 16 bits of the u32 'prio' field of 'struct tcf_proto', but when the kernel generates the priority of a filter is saves all 32 bits which can result in incorrect lookup failures when a filter needs to be deleted or modified. Signed-off-by: Robert Love Signed-off-by: David S. Miller --- net/sched/cls_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 173fcc4b050d..0759f32e9dca 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -254,7 +254,7 @@ replay: } tp->ops = tp_ops; tp->protocol = protocol; - tp->prio = nprio ? : tcf_auto_prio(*back); + tp->prio = nprio ? : TC_H_MAJ(tcf_auto_prio(*back)); tp->q = q; tp->classify = tp_ops->classify; tp->classid = parent; -- cgit v1.2.3 From 902e5ea15f8471a3213a37b11b98196f3406aeaf Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Sat, 2 May 2009 13:49:36 -0700 Subject: Subject: [PATCH] br2684: restore net_dev initialization Commit 0ba25ff4c669e5395110ba6ab4958a97a9f96922 ("br2684: convert to net_device_ops") inadvertently deleted the initialization of the net_dev pointer in the br2684_dev structure, leading to crashes. This patch adds it back. Reported-by: Mikko Vinni Tested-by: Mikko Vinni Signed-off-by: Rabin Vincent Signed-off-by: David S. Miller --- net/atm/br2684.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/atm/br2684.c b/net/atm/br2684.c index 334fcd4a4ea4..3100a8940afc 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -549,6 +549,7 @@ static void br2684_setup(struct net_device *netdev) struct br2684_dev *brdev = BRPRIV(netdev); ether_setup(netdev); + brdev->net_dev = netdev; netdev->netdev_ops = &br2684_netdev_ops; -- cgit v1.2.3 From 0c266898b42fe4e4e2f9edfc9d3474c10f93aa6a Mon Sep 17 00:00:00 2001 From: Satoru SATOH Date: Mon, 4 May 2009 11:11:01 -0700 Subject: tcp: Fix tcp_prequeue() to get correct rto_min value tcp_prequeue() refers to the constant value (TCP_RTO_MIN) regardless of the actual value might be tuned. The following patches fix this and make tcp_prequeue get the actual value returns from tcp_rto_min(). Signed-off-by: Satoru SATOH Signed-off-by: David S. Miller --- include/net/tcp.h | 14 +++++++++++++- net/ipv4/tcp_input.c | 10 ---------- 2 files changed, 13 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 1b94b9bfe2dc..646dbe3962ea 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -41,6 +41,7 @@ #include #include #include +#include #include @@ -530,6 +531,17 @@ static inline void tcp_fast_path_check(struct sock *sk) tcp_fast_path_on(tp); } +/* Compute the actual rto_min value */ +static inline u32 tcp_rto_min(struct sock *sk) +{ + struct dst_entry *dst = __sk_dst_get(sk); + u32 rto_min = TCP_RTO_MIN; + + if (dst && dst_metric_locked(dst, RTAX_RTO_MIN)) + rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN); + return rto_min; +} + /* Compute the actual receive window we are currently advertising. * Rcv_nxt can be after the window if our peer push more data * than the offered window. @@ -895,7 +907,7 @@ static inline int tcp_prequeue(struct sock *sk, struct sk_buff *skb) wake_up_interruptible(sk->sk_sleep); if (!inet_csk_ack_scheduled(sk)) inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, - (3 * TCP_RTO_MIN) / 4, + (3 * tcp_rto_min(sk)) / 4, TCP_RTO_MAX); } return 1; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c96a6bb25430..eec3e6f9956c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -597,16 +597,6 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb) tcp_grow_window(sk, skb); } -static u32 tcp_rto_min(struct sock *sk) -{ - struct dst_entry *dst = __sk_dst_get(sk); - u32 rto_min = TCP_RTO_MIN; - - if (dst && dst_metric_locked(dst, RTAX_RTO_MIN)) - rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN); - return rto_min; -} - /* Called to compute a smoothed rtt estimate. The data fed to this * routine either comes from timestamps, or from segments that were * known _not_ to have been retransmitted [see Karn/Partridge -- cgit v1.2.3 From a67e899cf38ae542d1a028ccd021f9189f76fb74 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sat, 2 May 2009 18:24:06 -0700 Subject: Bluetooth: Fix issue with sysfs handling for connections Due to a semantic changes in flush_workqueue() the current approach of synchronizing the sysfs handling for connections doesn't work anymore. The whole approach is actually fully broken and based on assumptions that are no longer valid. With the introduction of Simple Pairing support, the creation of low-level ACL links got changed. This change invalidates the reason why in the past two independent work queues have been used for adding/removing sysfs devices. The adding of the actual sysfs device is now postponed until the host controller successfully assigns an unique handle to that link. So the real synchronization happens inside the controller and not the host. The only left-over problem is that some internals of the sysfs device handling are not initialized ahead of time. This leaves potential access to invalid data and can cause various NULL pointer dereferences. To fix this a new function makes sure that all sysfs details are initialized when an connection attempt is made. The actual sysfs device is only registered when the connection has been successfully established. To avoid a race condition with the registration, the check if a device is registered has been moved into the removal work. As an extra protection two flush_work() calls are left in place to make sure a previous add/del work has been completed first. Based on a report by Marc Pignat Signed-off-by: Marcel Holtmann Tested-by: Justin P. Mattock Tested-by: Roger Quadros Tested-by: Marc Pignat --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_conn.c | 2 ++ net/bluetooth/hci_sysfs.c | 74 ++++++++++++++++++++++------------------ 3 files changed, 43 insertions(+), 34 deletions(-) (limited to 'net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index be5bd713d2c9..73aead222b32 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -457,6 +457,7 @@ int hci_recv_fragment(struct hci_dev *hdev, int type, void *data, int count); int hci_register_sysfs(struct hci_dev *hdev); void hci_unregister_sysfs(struct hci_dev *hdev); +void hci_conn_init_sysfs(struct hci_conn *conn); void hci_conn_add_sysfs(struct hci_conn *conn); void hci_conn_del_sysfs(struct hci_conn *conn); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 375f4b4f7f79..61309b26f271 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -248,6 +248,8 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst) if (hdev->notify) hdev->notify(hdev, HCI_NOTIFY_CONN_ADD); + hci_conn_init_sysfs(conn); + tasklet_enable(&hdev->tx_task); return conn; diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index b7c51082ddeb..582d8877078c 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -9,7 +9,7 @@ struct class *bt_class = NULL; EXPORT_SYMBOL_GPL(bt_class); -static struct workqueue_struct *bluetooth; +static struct workqueue_struct *bt_workq; static inline char *link_typetostr(int type) { @@ -89,8 +89,8 @@ static void add_conn(struct work_struct *work) { struct hci_conn *conn = container_of(work, struct hci_conn, work_add); - /* ensure previous add/del is complete */ - flush_workqueue(bluetooth); + /* ensure previous del is complete */ + flush_work(&conn->work_del); if (device_add(&conn->dev) < 0) { BT_ERR("Failed to register connection device"); @@ -98,27 +98,6 @@ static void add_conn(struct work_struct *work) } } -void hci_conn_add_sysfs(struct hci_conn *conn) -{ - struct hci_dev *hdev = conn->hdev; - - BT_DBG("conn %p", conn); - - conn->dev.type = &bt_link; - conn->dev.class = bt_class; - conn->dev.parent = &hdev->dev; - - dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle); - - dev_set_drvdata(&conn->dev, conn); - - device_initialize(&conn->dev); - - INIT_WORK(&conn->work_add, add_conn); - - queue_work(bluetooth, &conn->work_add); -} - /* * The rfcomm tty device will possibly retain even when conn * is down, and sysfs doesn't support move zombie device, @@ -134,8 +113,11 @@ static void del_conn(struct work_struct *work) struct hci_conn *conn = container_of(work, struct hci_conn, work_del); struct hci_dev *hdev = conn->hdev; - /* ensure previous add/del is complete */ - flush_workqueue(bluetooth); + /* ensure previous add is complete */ + flush_work(&conn->work_add); + + if (!device_is_registered(&conn->dev)) + return; while (1) { struct device *dev; @@ -152,16 +134,40 @@ static void del_conn(struct work_struct *work) hci_dev_put(hdev); } -void hci_conn_del_sysfs(struct hci_conn *conn) +void hci_conn_init_sysfs(struct hci_conn *conn) { + struct hci_dev *hdev = conn->hdev; + BT_DBG("conn %p", conn); - if (!device_is_registered(&conn->dev)) - return; + conn->dev.type = &bt_link; + conn->dev.class = bt_class; + conn->dev.parent = &hdev->dev; + + dev_set_drvdata(&conn->dev, conn); + device_initialize(&conn->dev); + + INIT_WORK(&conn->work_add, add_conn); INIT_WORK(&conn->work_del, del_conn); +} + +void hci_conn_add_sysfs(struct hci_conn *conn) +{ + struct hci_dev *hdev = conn->hdev; + + BT_DBG("conn %p", conn); + + dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle); + + queue_work(bt_workq, &conn->work_add); +} + +void hci_conn_del_sysfs(struct hci_conn *conn) +{ + BT_DBG("conn %p", conn); - queue_work(bluetooth, &conn->work_del); + queue_work(bt_workq, &conn->work_del); } static inline char *host_typetostr(int type) @@ -438,13 +444,13 @@ void hci_unregister_sysfs(struct hci_dev *hdev) int __init bt_sysfs_init(void) { - bluetooth = create_singlethread_workqueue("bluetooth"); - if (!bluetooth) + bt_workq = create_singlethread_workqueue("bluetooth"); + if (!bt_workq) return -ENOMEM; bt_class = class_create(THIS_MODULE, "bluetooth"); if (IS_ERR(bt_class)) { - destroy_workqueue(bluetooth); + destroy_workqueue(bt_workq); return PTR_ERR(bt_class); } @@ -453,7 +459,7 @@ int __init bt_sysfs_init(void) void bt_sysfs_cleanup(void) { - destroy_workqueue(bluetooth); + destroy_workqueue(bt_workq); class_destroy(bt_class); } -- cgit v1.2.3