From d6c416148545bd99d3cc05e672460168245cc156 Mon Sep 17 00:00:00 2001 From: Chang Xiangzhong Date: Thu, 21 Nov 2013 22:56:28 +0100 Subject: net: sctp: find the correct highest_new_tsn in sack Function sctp_check_transmitted(transport t, ...) would iterate all of transport->transmitted queue and looking for the highest __newly__ acked tsn. The original algorithm would depend on the order of the assoc->transport_list (in function sctp_outq_sack line 1215 - 1226). The result might not be the expected due to the order of the tranport_list. Solution: checking if the exising is smaller than the new one before assigning Signed-off-by: Chang Xiangzhong Acked-by: Vlad Yasevich Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/outqueue.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 94df75877869..abb6db008df1 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1391,7 +1391,8 @@ static void sctp_check_transmitted(struct sctp_outq *q, */ if (!tchunk->tsn_gap_acked) { tchunk->tsn_gap_acked = 1; - *highest_new_tsn_in_sack = tsn; + if (TSN_lt(*highest_new_tsn_in_sack, tsn)) + *highest_new_tsn_in_sack = tsn; bytes_acked += sctp_data_size(tchunk); if (!tchunk->transport) migrate_bytes += sctp_data_size(tchunk); -- cgit v1.2.1 From cc5c00bbb44c5d68b883aa5cb9d01514a2525d94 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 22 Nov 2013 10:31:29 +0800 Subject: gro: Only verify TCP checksums for candidates In some cases we may receive IP packets that are longer than their stated lengths. Such packets are never merged in GRO. However, we may end up computing their checksums incorrectly and end up allowing packets with a bogus checksum enter our stack with the checksum status set as verified. Since such packets are rare and not performance-critical, this patch simply skips the checksum verification for them. Reported-by: Alexander Duyck Signed-off-by: Herbert Xu Acked-by: Alexander Duyck Thanks, Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_offload.c | 5 +++++ net/ipv6/tcpv6_offload.c | 5 +++++ 2 files changed, 10 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index a2b68a108eae..55aeec930140 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -276,6 +276,10 @@ static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff * __wsum wsum; __sum16 sum; + /* Don't bother verifying checksum if we're going to flush anyway. */ + if (NAPI_GRO_CB(skb)->flush) + goto skip_csum; + switch (skb->ip_summed) { case CHECKSUM_COMPLETE: if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr, @@ -301,6 +305,7 @@ flush: break; } +skip_csum: return tcp_gro_receive(head, skb); } diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index c1097c798900..71923d14127a 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -39,6 +39,10 @@ static struct sk_buff **tcp6_gro_receive(struct sk_buff **head, __wsum wsum; __sum16 sum; + /* Don't bother verifying checksum if we're going to flush anyway. */ + if (NAPI_GRO_CB(skb)->flush) + goto skip_csum; + switch (skb->ip_summed) { case CHECKSUM_COMPLETE: if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr, @@ -65,6 +69,7 @@ flush: break; } +skip_csum: return tcp_gro_receive(head, skb); } -- cgit v1.2.1 From b8ee93ba80b5a0b6c3c06b65c34dd1276f16c047 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 22 Nov 2013 10:32:11 +0800 Subject: gro: Clean up tcpX_gro_receive checksum verification This patch simplifies the checksum verification in tcpX_gro_receive by reusing the CHECKSUM_COMPLETE code for CHECKSUM_NONE. All it does for CHECKSUM_NONE is compute the partial checksum and then treat it as if it came from the hardware (CHECKSUM_COMPLETE). Signed-off-by: Herbert Xu Cheers, Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_offload.c | 26 ++++++++++---------------- net/ipv6/tcpv6_offload.c | 27 ++++++++++----------------- 2 files changed, 20 insertions(+), 33 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 55aeec930140..05606353c7e7 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -274,35 +274,29 @@ static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff * { const struct iphdr *iph = skb_gro_network_header(skb); __wsum wsum; - __sum16 sum; /* Don't bother verifying checksum if we're going to flush anyway. */ if (NAPI_GRO_CB(skb)->flush) goto skip_csum; + wsum = skb->csum; + switch (skb->ip_summed) { + case CHECKSUM_NONE: + wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), + 0); + + /* fall through */ + case CHECKSUM_COMPLETE: if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr, - skb->csum)) { + wsum)) { skb->ip_summed = CHECKSUM_UNNECESSARY; break; } -flush: + NAPI_GRO_CB(skb)->flush = 1; return NULL; - - case CHECKSUM_NONE: - wsum = csum_tcpudp_nofold(iph->saddr, iph->daddr, - skb_gro_len(skb), IPPROTO_TCP, 0); - sum = csum_fold(skb_checksum(skb, - skb_gro_offset(skb), - skb_gro_len(skb), - wsum)); - if (sum) - goto flush; - - skb->ip_summed = CHECKSUM_UNNECESSARY; - break; } skip_csum: diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index 71923d14127a..6d18157dc32c 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -37,36 +37,29 @@ static struct sk_buff **tcp6_gro_receive(struct sk_buff **head, { const struct ipv6hdr *iph = skb_gro_network_header(skb); __wsum wsum; - __sum16 sum; /* Don't bother verifying checksum if we're going to flush anyway. */ if (NAPI_GRO_CB(skb)->flush) goto skip_csum; + wsum = skb->csum; + switch (skb->ip_summed) { + case CHECKSUM_NONE: + wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), + wsum); + + /* fall through */ + case CHECKSUM_COMPLETE: if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr, - skb->csum)) { + wsum)) { skb->ip_summed = CHECKSUM_UNNECESSARY; break; } -flush: + NAPI_GRO_CB(skb)->flush = 1; return NULL; - - case CHECKSUM_NONE: - wsum = ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr, - skb_gro_len(skb), - IPPROTO_TCP, 0)); - sum = csum_fold(skb_checksum(skb, - skb_gro_offset(skb), - skb_gro_len(skb), - wsum)); - if (sum) - goto flush; - - skb->ip_summed = CHECKSUM_UNNECESSARY; - break; } skip_csum: -- cgit v1.2.1 From fb10f802b0fb76079612cb78505cbc9ad81e683b Mon Sep 17 00:00:00 2001 From: Gao feng Date: Fri, 22 Nov 2013 14:48:40 +0800 Subject: tcp_memcg: remove useless var old_lim nobody needs it. remove. Signed-off-by: Gao feng Signed-off-by: David S. Miller --- net/ipv4/tcp_memcontrol.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 03e9154f7e68..269a89ecd2f4 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -60,7 +60,6 @@ EXPORT_SYMBOL(tcp_destroy_cgroup); static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) { struct cg_proto *cg_proto; - u64 old_lim; int i; int ret; @@ -71,7 +70,6 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) if (val > RES_COUNTER_MAX) val = RES_COUNTER_MAX; - old_lim = res_counter_read_u64(&cg_proto->memory_allocated, RES_LIMIT); ret = res_counter_set_limit(&cg_proto->memory_allocated, val); if (ret) return ret; -- cgit v1.2.1 From ca15a078bd907df5fc1c009477869c5cbde3b753 Mon Sep 17 00:00:00 2001 From: Oussama Ghorbel Date: Fri, 22 Nov 2013 16:23:20 +0100 Subject: sit: generate icmpv6 error when receiving icmpv4 error Send icmpv6 error with type "destination unreachable" and code "address unreachable" when receiving icmpv4 error and sufficient data bytes are available This patch enhances the compliance of sit tunnel with section 3.4 of rfc 4213 Signed-off-by: Oussama Ghorbel Signed-off-by: David S. Miller --- net/ipv6/sit.c | 44 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 1b4a4a953675..8435267836a7 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -478,14 +478,44 @@ static void ipip6_tunnel_uninit(struct net_device *dev) dev_put(dev); } +/* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH + * if sufficient data bytes are available + */ +static int ipip6_err_gen_icmpv6_unreach(struct sk_buff *skb) +{ + const struct iphdr *iph = (const struct iphdr *) skb->data; + struct rt6_info *rt; + struct sk_buff *skb2; + + if (!pskb_may_pull(skb, iph->ihl * 4 + sizeof(struct ipv6hdr) + 8)) + return 1; + + skb2 = skb_clone(skb, GFP_ATOMIC); + + if (!skb2) + return 1; + + skb_dst_drop(skb2); + skb_pull(skb2, iph->ihl * 4); + skb_reset_network_header(skb2); + + rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0); + + if (rt && rt->dst.dev) + skb2->dev = rt->dst.dev; + + icmpv6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); + + if (rt) + ip6_rt_put(rt); + + kfree_skb(skb2); + + return 0; +} static int ipip6_err(struct sk_buff *skb, u32 info) { - -/* All the routers (except for Linux) return only - 8 bytes of packet payload. It means, that precise relaying of - ICMP in the real Internet is absolutely infeasible. - */ const struct iphdr *iph = (const struct iphdr *)skb->data; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; @@ -500,7 +530,6 @@ static int ipip6_err(struct sk_buff *skb, u32 info) case ICMP_DEST_UNREACH: switch (code) { case ICMP_SR_FAILED: - case ICMP_PORT_UNREACH: /* Impossible event. */ return 0; default: @@ -545,6 +574,9 @@ static int ipip6_err(struct sk_buff *skb, u32 info) goto out; err = 0; + if (!ipip6_err_gen_icmpv6_unreach(skb)) + goto out; + if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) goto out; -- cgit v1.2.1 From 85fbaa75037d0b6b786ff18658ddf0b4014ce2a4 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sat, 23 Nov 2013 00:46:12 +0100 Subject: inet: fix addr_len/msg->msg_namelen assignment in recv_error and rxpmtu functions Commit bceaa90240b6019ed73b49965eac7d167610be69 ("inet: prevent leakage of uninitialized memory to user in recv syscalls") conditionally updated addr_len if the msg_name is written to. The recv_error and rxpmtu functions relied on the recvmsg functions to set up addr_len before. As this does not happen any more we have to pass addr_len to those functions as well and set it to the size of the corresponding sockaddr length. This broke traceroute and such. Fixes: bceaa90240b6 ("inet: prevent leakage of uninitialized memory to user in recv syscalls") Reported-by: Brad Spengler Reported-by: Tom Labanowski Cc: mpb Cc: David S. Miller Cc: Eric Dumazet Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv4/ip_sockglue.c | 3 ++- net/ipv4/ping.c | 5 +++-- net/ipv4/raw.c | 2 +- net/ipv4/udp.c | 2 +- net/ipv6/datagram.c | 7 +++++-- net/ipv6/ping.c | 3 ++- net/ipv6/raw.c | 4 ++-- net/ipv6/udp.c | 4 ++-- net/l2tp/l2tp_ip6.c | 2 +- 9 files changed, 19 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 3f858266fa7e..ddf32a6bc415 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -386,7 +386,7 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf /* * Handle MSG_ERRQUEUE */ -int ip_recv_error(struct sock *sk, struct msghdr *msg, int len) +int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) { struct sock_exterr_skb *serr; struct sk_buff *skb, *skb2; @@ -423,6 +423,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len) serr->addr_offset); sin->sin_port = serr->port; memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); + *addr_len = sizeof(*sin); } memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 876c6ca2d8f9..840cf1b9e6ee 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -841,10 +841,11 @@ int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (flags & MSG_ERRQUEUE) { if (family == AF_INET) { - return ip_recv_error(sk, msg, len); + return ip_recv_error(sk, msg, len, addr_len); #if IS_ENABLED(CONFIG_IPV6) } else if (family == AF_INET6) { - return pingv6_ops.ipv6_recv_error(sk, msg, len); + return pingv6_ops.ipv6_recv_error(sk, msg, len, + addr_len); #endif } } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 5cb8ddb505ee..23c3e5b5bb53 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -697,7 +697,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, goto out; if (flags & MSG_ERRQUEUE) { - err = ip_recv_error(sk, msg, len); + err = ip_recv_error(sk, msg, len, addr_len); goto out; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 5944d7d668dd..44dfaa09b584 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1236,7 +1236,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, bool slow; if (flags & MSG_ERRQUEUE) - return ip_recv_error(sk, msg, len); + return ip_recv_error(sk, msg, len, addr_len); try_again: skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index a454b0ff57c7..d690204a0275 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -318,7 +318,7 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu) /* * Handle MSG_ERRQUEUE */ -int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) +int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); struct sock_exterr_skb *serr; @@ -369,6 +369,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) &sin->sin6_addr); sin->sin6_scope_id = 0; } + *addr_len = sizeof(*sin); } memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); @@ -423,7 +424,8 @@ EXPORT_SYMBOL_GPL(ipv6_recv_error); /* * Handle IPV6_RECVPATHMTU */ -int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len) +int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len, + int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *skb; @@ -457,6 +459,7 @@ int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len) sin->sin6_port = 0; sin->sin6_scope_id = mtu_info.ip6m_addr.sin6_scope_id; sin->sin6_addr = mtu_info.ip6m_addr.sin6_addr; + *addr_len = sizeof(*sin); } put_cmsg(msg, SOL_IPV6, IPV6_PATHMTU, sizeof(mtu_info), &mtu_info); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 8815e31a87fe..a83243c3d656 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -57,7 +57,8 @@ static struct inet_protosw pingv6_protosw = { /* Compatibility glue so we can support IPv6 when it's compiled as a module */ -static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) +static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, + int *addr_len) { return -EAFNOSUPPORT; } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index e24ff1df0401..7fb4e14c467f 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -466,10 +466,10 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, return -EOPNOTSUPP; if (flags & MSG_ERRQUEUE) - return ipv6_recv_error(sk, msg, len); + return ipv6_recv_error(sk, msg, len, addr_len); if (np->rxpmtu && np->rxopt.bits.rxpmtu) - return ipv6_recv_rxpmtu(sk, msg, len); + return ipv6_recv_rxpmtu(sk, msg, len, addr_len); skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 81eb8cf8389b..bcd5699313c3 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -393,10 +393,10 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, bool slow; if (flags & MSG_ERRQUEUE) - return ipv6_recv_error(sk, msg, len); + return ipv6_recv_error(sk, msg, len, addr_len); if (np->rxpmtu && np->rxopt.bits.rxpmtu) - return ipv6_recv_rxpmtu(sk, msg, len); + return ipv6_recv_rxpmtu(sk, msg, len, addr_len); try_again: skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index cfd65304be60..d9b437e55007 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -665,7 +665,7 @@ static int l2tp_ip6_recvmsg(struct kiocb *iocb, struct sock *sk, *addr_len = sizeof(*lsa); if (flags & MSG_ERRQUEUE) - return ipv6_recv_error(sk, msg, len); + return ipv6_recv_error(sk, msg, len, addr_len); skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) -- cgit v1.2.1 From 1fa4c710b6fe7b0aac9907240291b6fe6aafc3b8 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sat, 23 Nov 2013 07:22:33 +0100 Subject: ipv6: fix leaking uninitialized port number of offender sockaddr Offenders don't have port numbers, so set it to 0. Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/datagram.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index d690204a0275..8dfe1f4d3c1a 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -378,6 +378,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) { sin->sin6_family = AF_INET6; sin->sin6_flowinfo = 0; + sin->sin6_port = 0; if (skb->protocol == htons(ETH_P_IPV6)) { sin->sin6_addr = ipv6_hdr(skb)->saddr; if (np->rxopt.all) -- cgit v1.2.1 From 4d0820cf6a55d72350cb2d24a4504f62fbde95d9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 23 Nov 2013 12:59:20 -0800 Subject: sch_tbf: handle too small burst If a too small burst is inadvertently set on TBF, we might trigger a bug in tbf_segment(), as 'skb' instead of 'segs' was used in a qdisc_reshape_fail() call. tc qdisc add dev eth0 root handle 1: tbf latency 50ms burst 1KB rate 50mbit Fix the bug, and add a warning, as such configuration is not going to work anyway for non GSO packets. (For some reason, one has to use a burst >= 1520 to get a working configuration, even with old kernels. This is a probable iproute2/tc bug) Based on a report and initial patch from Yang Yingliang Fixes: e43ac79a4bc6 ("sch_tbf: segment too big GSO packets") Signed-off-by: Eric Dumazet Reported-by: Yang Yingliang Signed-off-by: David S. Miller --- net/sched/sch_tbf.c | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 68f98595819c..a6090051c5db 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -21,6 +21,7 @@ #include #include #include +#include /* Simple Token Bucket Filter. @@ -117,6 +118,22 @@ struct tbf_sched_data { }; +/* + * Return length of individual segments of a gso packet, + * including all headers (MAC, IP, TCP/UDP) + */ +static unsigned int skb_gso_seglen(const struct sk_buff *skb) +{ + unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb); + const struct skb_shared_info *shinfo = skb_shinfo(skb); + + if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) + hdr_len += tcp_hdrlen(skb); + else + hdr_len += sizeof(struct udphdr); + return hdr_len + shinfo->gso_size; +} + /* GSO packet is too big, segment it so that tbf can transmit * each segment in time */ @@ -136,12 +153,8 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch) while (segs) { nskb = segs->next; segs->next = NULL; - if (likely(segs->len <= q->max_size)) { - qdisc_skb_cb(segs)->pkt_len = segs->len; - ret = qdisc_enqueue(segs, q->qdisc); - } else { - ret = qdisc_reshape_fail(skb, sch); - } + qdisc_skb_cb(segs)->pkt_len = segs->len; + ret = qdisc_enqueue(segs, q->qdisc); if (ret != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(ret)) sch->qstats.drops++; @@ -163,7 +176,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch) int ret; if (qdisc_pkt_len(skb) > q->max_size) { - if (skb_is_gso(skb)) + if (skb_is_gso(skb) && skb_gso_seglen(skb) <= q->max_size) return tbf_segment(skb, sch); return qdisc_reshape_fail(skb, sch); } @@ -319,6 +332,11 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) if (max_size < 0) goto done; + if (max_size < psched_mtu(qdisc_dev(sch))) + pr_warn_ratelimited("sch_tbf: burst %u is lower than device %s mtu (%u) !\n", + max_size, qdisc_dev(sch)->name, + psched_mtu(qdisc_dev(sch))); + if (q->qdisc != &noop_qdisc) { err = fifo_set_limit(q->qdisc, qopt->limit); if (err) -- cgit v1.2.1 From 2d3db210860f1df099a35b1dd54cca35454e0361 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Tue, 29 Oct 2013 18:11:59 -0400 Subject: Revert "mac80211: allow disable power save in mesh" This reverts commit ee1f668136b2fb6640ee2d54c2a525ea41f98211. The aformentioned commit added a check to allow 'iw wlan0 set power_save off' to work for mesh interfaces. However, this is problematic because it also allows 'iw wlan0 set power_save on', which will crash in short order because all of the subsequent code manipulates sdata->u.mgd. The power-saving states for mesh interfaces can be manipulated through the mesh config, e.g: 'iw wlan0 set mesh_param mesh_power_save=active' (which, despite the name, actualy disables power saving since the setting refers to the type of sleep the interface undergoes). Cc: stable@vger.kernel.org Fixes: ee1f668136b2 ("mac80211: allow disable power save in mesh") Signed-off-by: Bob Copeland Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 95667b088c5b..0ec245120a63 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2488,8 +2488,7 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - if (sdata->vif.type != NL80211_IFTYPE_STATION && - sdata->vif.type != NL80211_IFTYPE_MESH_POINT) + if (sdata->vif.type != NL80211_IFTYPE_STATION) return -EOPNOTSUPP; if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_PS)) -- cgit v1.2.1 From 1fe4517cebc35ef900fa483d19c3090681f3c7bc Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Wed, 30 Oct 2013 16:09:33 +0100 Subject: cfg80211: fix ibss wext chandef creation The wext internal chandefs for ibss should be created using the cfg80211_chandef_create() functions. Initializing fields manually is error-prone. Reported-by: Dirk Gouders Signed-off-by: Simon Wunderlich Signed-off-by: Johannes Berg --- net/wireless/ibss.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index 9d797df56649..89737ee2669a 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -262,7 +262,7 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, /* try to find an IBSS channel if none requested ... */ if (!wdev->wext.ibss.chandef.chan) { - wdev->wext.ibss.chandef.width = NL80211_CHAN_WIDTH_20_NOHT; + struct ieee80211_channel *new_chan = NULL; for (band = 0; band < IEEE80211_NUM_BANDS; band++) { struct ieee80211_supported_band *sband; @@ -278,18 +278,19 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, continue; if (chan->flags & IEEE80211_CHAN_DISABLED) continue; - wdev->wext.ibss.chandef.chan = chan; - wdev->wext.ibss.chandef.center_freq1 = - chan->center_freq; + new_chan = chan; break; } - if (wdev->wext.ibss.chandef.chan) + if (new_chan) break; } - if (!wdev->wext.ibss.chandef.chan) + if (!new_chan) return -EINVAL; + + cfg80211_chandef_create(&wdev->wext.ibss.chandef, new_chan, + NL80211_CHAN_NO_HT); } /* don't join -- SSID is not there */ @@ -363,9 +364,8 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev, return err; if (chan) { - wdev->wext.ibss.chandef.chan = chan; - wdev->wext.ibss.chandef.width = NL80211_CHAN_WIDTH_20_NOHT; - wdev->wext.ibss.chandef.center_freq1 = freq; + cfg80211_chandef_create(&wdev->wext.ibss.chandef, chan, + NL80211_CHAN_NO_HT); wdev->wext.ibss.channel_fixed = true; } else { /* cfg80211_ibss_wext_join will pick one if needed */ -- cgit v1.2.1 From 84a3d1c97d024acd1d27ebbc10cb95784b11f4e7 Mon Sep 17 00:00:00 2001 From: Janusz Dziedzic Date: Tue, 5 Nov 2013 14:48:46 +0100 Subject: mac80211: DFS setup chandef for radar_event correctly Setup chandef for radar event correctly, before we will clear this in ieee80211_dfs_cac_cancel() function. Without this patch mac80211 will report wrong channel width in case we will get radar event during active CAC. Signed-off-by: Janusz Dziedzic Reviewed-by: Luis R. Rodriguez Signed-off-by: Johannes Berg --- net/mac80211/util.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 592a18171f95..e9ce36d32ef5 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2278,17 +2278,15 @@ void ieee80211_dfs_radar_detected_work(struct work_struct *work) { struct ieee80211_local *local = container_of(work, struct ieee80211_local, radar_detected_work); - struct cfg80211_chan_def chandef; + struct cfg80211_chan_def chandef = local->hw.conf.chandef; ieee80211_dfs_cac_cancel(local); if (local->use_chanctx) /* currently not handled */ WARN_ON(1); - else { - chandef = local->hw.conf.chandef; + else cfg80211_radar_event(local->hw.wiphy, &chandef, GFP_KERNEL); - } } void ieee80211_radar_detected(struct ieee80211_hw *hw) -- cgit v1.2.1 From 18db594a1005d908d995a2fc8f5a7bf4286fdca0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 6 Nov 2013 10:34:36 +0100 Subject: mac80211: fix scheduled scan rtnl deadlock When changing cfg80211 to use RTNL locking, this caused a deadlock in mac80211 as it calls cfg80211_sched_scan_stopped() from a work item that's on a workqueue that is flushed with the RTNL held. Fix this by simply using schedule_work(), the work only needs to finish running before the wiphy is unregistered, no other synchronisation (e.g. with suspend) is really required since for suspend userspace is already blocked anyway when we flush the workqueue so will only pick up the event after resume. Cc: stable@vger.kernel.org Fixes: 5fe231e87372 ("cfg80211: vastly simplify locking") Reported-and-tested-by: Eliad Peller Signed-off-by: Johannes Berg --- net/mac80211/main.c | 1 + net/mac80211/scan.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 21d5d44444d0..e765f77bb97a 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1047,6 +1047,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) cancel_work_sync(&local->restart_work); cancel_work_sync(&local->reconfig_filter); + flush_work(&local->sched_scan_stopped_work); ieee80211_clear_tx_pending(local); rate_control_deinitialize(local); diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 5ad66a83ef7f..bcc4833d7542 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -1088,6 +1088,6 @@ void ieee80211_sched_scan_stopped(struct ieee80211_hw *hw) trace_api_sched_scan_stopped(local); - ieee80211_queue_work(&local->hw, &local->sched_scan_stopped_work); + schedule_work(&local->sched_scan_stopped_work); } EXPORT_SYMBOL(ieee80211_sched_scan_stopped); -- cgit v1.2.1 From ae917c9f55862691e31b84de7ec29bedcb83971c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 25 Oct 2013 11:05:22 +0200 Subject: nl80211: check nla_put_* return values Coverity pointed out that in a few functions we don't check the return value of the nla_put_*() calls. Most of these are fairly harmless because the input isn't very dynamic and controlled by the kernel, but the pattern is simply wrong, so fix this. Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 52 +++++++++++++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index a1eb21073176..0ffb18371376 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -9633,8 +9633,9 @@ static int nl80211_add_scan_req(struct sk_buff *msg, nla_put(msg, NL80211_ATTR_IE, req->ie_len, req->ie)) goto nla_put_failure; - if (req->flags) - nla_put_u32(msg, NL80211_ATTR_SCAN_FLAGS, req->flags); + if (req->flags && + nla_put_u32(msg, NL80211_ATTR_SCAN_FLAGS, req->flags)) + goto nla_put_failure; return 0; nla_put_failure: @@ -11118,16 +11119,18 @@ void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev, wakeup->pattern_idx)) goto free_msg; - if (wakeup->tcp_match) - nla_put_flag(msg, NL80211_WOWLAN_TRIG_WAKEUP_TCP_MATCH); + if (wakeup->tcp_match && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_WAKEUP_TCP_MATCH)) + goto free_msg; - if (wakeup->tcp_connlost) - nla_put_flag(msg, - NL80211_WOWLAN_TRIG_WAKEUP_TCP_CONNLOST); + if (wakeup->tcp_connlost && + nla_put_flag(msg, NL80211_WOWLAN_TRIG_WAKEUP_TCP_CONNLOST)) + goto free_msg; - if (wakeup->tcp_nomoretokens) - nla_put_flag(msg, - NL80211_WOWLAN_TRIG_WAKEUP_TCP_NOMORETOKENS); + if (wakeup->tcp_nomoretokens && + nla_put_flag(msg, + NL80211_WOWLAN_TRIG_WAKEUP_TCP_NOMORETOKENS)) + goto free_msg; if (wakeup->packet) { u32 pkt_attr = NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211; @@ -11263,24 +11266,29 @@ void cfg80211_ft_event(struct net_device *netdev, return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FT_EVENT); - if (!hdr) { - nlmsg_free(msg); - return; - } + if (!hdr) + goto out; + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || + nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || + nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, ft_event->target_ap)) + goto out; - nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx); - nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex); - nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, ft_event->target_ap); - if (ft_event->ies) - nla_put(msg, NL80211_ATTR_IE, ft_event->ies_len, ft_event->ies); - if (ft_event->ric_ies) - nla_put(msg, NL80211_ATTR_IE_RIC, ft_event->ric_ies_len, - ft_event->ric_ies); + if (ft_event->ies && + nla_put(msg, NL80211_ATTR_IE, ft_event->ies_len, ft_event->ies)) + goto out; + if (ft_event->ric_ies && + nla_put(msg, NL80211_ATTR_IE_RIC, ft_event->ric_ies_len, + ft_event->ric_ies)) + goto out; genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, GFP_KERNEL); + return; + out: + nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_ft_event); -- cgit v1.2.1 From 9fe271af7d4de96471c5aaee2f4d0d1576050497 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 25 Oct 2013 11:15:12 +0200 Subject: nl80211: fix error path in nl80211_get_key() Coverity pointed out that in the (practically impossible) error case we leak the message - fix this. Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 0ffb18371376..f1370ed9f498 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2687,7 +2687,7 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_NEW_KEY); if (!hdr) - return -ENOBUFS; + goto nla_put_failure; cookie.msg = msg; cookie.idx = key_idx; -- cgit v1.2.1 From 7fa322c878d70e38675f50e17acdce7fa3f5ac8c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 25 Oct 2013 11:16:58 +0200 Subject: nl80211: check nla_nest_start() return value Coverity pointed out that we might dereference NULL later if nla_nest_start() returns a failure. This isn't really true since we'd bomb out before, but we should check the return value directly, so do that. Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f1370ed9f498..e20c27ff0f14 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -11094,6 +11094,8 @@ void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev, struct nlattr *reasons; reasons = nla_nest_start(msg, NL80211_ATTR_WOWLAN_TRIGGERS); + if (!reasons) + goto free_msg; if (wakeup->disconnect && nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) -- cgit v1.2.1 From 57fb089f480d199e4275da086d407b978de67214 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 8 Nov 2013 17:31:37 +0100 Subject: mac80211: fix crash when using AP VLAN interfaces Commit "mac80211: implement SMPS for AP" applies to AP_VLAN as well. It assumes that sta->sdata->vif.bss_conf.bssid is present, which did not get set for AP_VLAN. Initialize it to sdata->vif.addr like for other interface types. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index ff101ea1d9ae..36c3a4cbcabf 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1325,7 +1325,6 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.bssid = NULL; break; case NL80211_IFTYPE_AP_VLAN: - break; case NL80211_IFTYPE_P2P_DEVICE: sdata->vif.bss_conf.bssid = sdata->vif.addr; break; -- cgit v1.2.1 From 6c751ef8a1a15d633cd755eafa86ede9c32b2617 Mon Sep 17 00:00:00 2001 From: Javier Lopez Date: Wed, 6 Nov 2013 10:04:29 -0800 Subject: mac80211: fix for mesh beacon update on powersave Mesh beacon was not being rebuild after user triggered a mesh powersave change. To solve this issue use ieee80211_mbss_info_change_notify instead of ieee80211_bss_info_change_notify. This helper function forces mesh beacon to be rebuild and then notifies the driver about the beacon change. Signed-off-by: Javier Lopez Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 0ec245120a63..9e7e68d7b1a7 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1368,7 +1368,7 @@ static int sta_apply_parameters(struct ieee80211_local *local, changed |= ieee80211_mps_set_sta_local_pm(sta, params->local_pm); - ieee80211_bss_info_change_notify(sdata, changed); + ieee80211_mbss_info_change_notify(sdata, changed); #endif } -- cgit v1.2.1 From 351df099721e02e1a25a498268e52c0378c0e272 Mon Sep 17 00:00:00 2001 From: Karl Beldan Date: Wed, 13 Nov 2013 23:07:07 +0100 Subject: mac80211: minstrel_ht: fix rates selection When initializing rates selections starting indexes upon stats update, the minstrel_sta->max_* rates should be 'group * MCS_GROUP_RATES + i' not 'i'. This affects settings where one of the peers does not support any of the rates of the group 0 (i.e. when ht_cap.mcs.rx_mask[0] == 0). Signed-off-by: Karl Beldan Acked-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/rc80211_minstrel_ht.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 5d60779a0c1b..47aa6f81566b 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -277,13 +277,15 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) if (!(mg->supported & BIT(i))) continue; + index = MCS_GROUP_RATES * group + i; + /* initialize rates selections starting indexes */ if (!mg_rates_valid) { mg->max_tp_rate = mg->max_tp_rate2 = mg->max_prob_rate = i; if (!mi_rates_valid) { mi->max_tp_rate = mi->max_tp_rate2 = - mi->max_prob_rate = i; + mi->max_prob_rate = index; mi_rates_valid = true; } mg_rates_valid = true; @@ -291,7 +293,6 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) mr = &mg->rates[i]; mr->retry_updated = false; - index = MCS_GROUP_RATES * group + i; minstrel_calc_rate_ewma(mr); minstrel_ht_calc_tp(mi, group, i); -- cgit v1.2.1 From 9f16d84ad73ea04145a5dc85c8f1067915b37eea Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 17 Nov 2013 10:37:34 +0100 Subject: cfg80211: disable 5/10 MHz support for all drivers Due to nl80211 API breakage, 5/10 MHz support is broken for all drivers. Fixing it requires adding new API, but that can't be done as a bugfix commit since that would require either updating all APIs in the trees needing the bugfix or cause different kernels to have incompatible API. Therefore, just disable 5/10 MHz support for all drivers. Cc: stable@vger.kernel.org [3.12] Signed-off-by: Johannes Berg --- net/wireless/core.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index aff959e5a1b3..00a65ba3aeaa 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -451,6 +451,9 @@ int wiphy_register(struct wiphy *wiphy) int i; u16 ifmodes = wiphy->interface_modes; + /* support for 5/10 MHz is broken due to nl80211 API mess - disable */ + wiphy->flags &= ~WIPHY_FLAG_SUPPORTS_5_10_MHZ; + #ifdef CONFIG_PM if (WARN_ON(wiphy->wowlan && (wiphy->wowlan->flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) && -- cgit v1.2.1 From 051a41fa4ee14f5c39668f0980973b9a195de560 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 20 Nov 2013 11:28:27 +0100 Subject: mac80211: don't attempt to reorder multicast frames Multicast frames can't be transmitted as part of an aggregation session (such a session couldn't even be set up) so don't try to reorder them. Trying to do so would cause the reorder to stop working correctly since multicast QoS frames (as transmitted by the Aruba APs this was found with) would cause sequence number confusion in the buffer. Cc: stable@vger.kernel.org Reported-by: Blaise Gassend Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index caecef870c0e..2b0debb0422b 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -911,7 +911,8 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx, u16 sc; u8 tid, ack_policy; - if (!ieee80211_is_data_qos(hdr->frame_control)) + if (!ieee80211_is_data_qos(hdr->frame_control) || + is_multicast_ether_addr(hdr->addr1)) goto dont_reorder; /* -- cgit v1.2.1 From 3f718fd8401d7db86b9efc3ea1cdf5df41354b9f Mon Sep 17 00:00:00 2001 From: Chun-Yeow Yeoh Date: Fri, 8 Nov 2013 15:09:43 +0800 Subject: mac80211: fix the mesh channel switch support Mesh STA receiving the mesh CSA action frame is not able to trigger the mesh channel switch due to the incorrect handling and comparison of mesh channel switch parameters element (MCSP)'s TTL. Make sure the MCSP's TTL is updated accordingly before calling the ieee80211_mesh_process_chnswitch. Also, we update the beacon before forwarding the CSA action frame, so MCSP's precedence value and initiator flag need to be updated prior to this. Signed-off-by: Chun-Yeow Yeoh Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 10 +++++++++- net/mac80211/ieee80211_i.h | 1 + net/mac80211/mesh.c | 20 ++++++++++++-------- net/mac80211/spectmgmt.c | 2 ++ net/mac80211/util.c | 5 ----- 5 files changed, 24 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 9e7e68d7b1a7..364ce0c5962f 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3119,9 +3119,17 @@ static int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, params->chandef.chan->band) return -EINVAL; + ifmsh->chsw_init = true; + if (!ifmsh->pre_value) + ifmsh->pre_value = 1; + else + ifmsh->pre_value++; + err = ieee80211_mesh_csa_beacon(sdata, params, true); - if (err < 0) + if (err < 0) { + ifmsh->chsw_init = false; return err; + } break; #endif default: diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 29dc505be125..4aea4e791113 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1228,6 +1228,7 @@ struct ieee80211_csa_ie { u8 mode; u8 count; u8 ttl; + u16 pre_value; }; /* Parsed Information Elements */ diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 896fe3bd599e..ba105257d03f 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -943,14 +943,19 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata, params.chandef.chan->center_freq); params.block_tx = csa_ie.mode & WLAN_EID_CHAN_SWITCH_PARAM_TX_RESTRICT; - if (beacon) + if (beacon) { ifmsh->chsw_ttl = csa_ie.ttl - 1; - else - ifmsh->chsw_ttl = 0; + if (ifmsh->pre_value >= csa_ie.pre_value) + return false; + ifmsh->pre_value = csa_ie.pre_value; + } - if (ifmsh->chsw_ttl > 0) + if (ifmsh->chsw_ttl < ifmsh->mshcfg.dot11MeshTTL) { if (ieee80211_mesh_csa_beacon(sdata, ¶ms, false) < 0) return false; + } else { + return false; + } sdata->csa_radar_required = params.radar_required; @@ -1163,7 +1168,6 @@ static int mesh_fwd_csa_frame(struct ieee80211_sub_if_data *sdata, offset_ttl = (len < 42) ? 7 : 10; *(pos + offset_ttl) -= 1; *(pos + offset_ttl + 1) &= ~WLAN_EID_CHAN_SWITCH_PARAM_INITIATOR; - sdata->u.mesh.chsw_ttl = *(pos + offset_ttl); memcpy(mgmt_fwd, mgmt, len); eth_broadcast_addr(mgmt_fwd->da); @@ -1182,7 +1186,7 @@ static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata, u16 pre_value; bool fwd_csa = true; size_t baselen; - u8 *pos, ttl; + u8 *pos; if (mgmt->u.action.u.measurement.action_code != WLAN_ACTION_SPCT_CHL_SWITCH) @@ -1193,8 +1197,8 @@ static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata, u.action.u.chan_switch.variable); ieee802_11_parse_elems(pos, len - baselen, false, &elems); - ttl = elems.mesh_chansw_params_ie->mesh_ttl; - if (!--ttl) + ifmsh->chsw_ttl = elems.mesh_chansw_params_ie->mesh_ttl; + if (!--ifmsh->chsw_ttl) fwd_csa = false; pre_value = le16_to_cpu(elems.mesh_chansw_params_ie->mesh_pre_value); diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index a40da20b32e0..6ab009070084 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -78,6 +78,8 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, if (elems->mesh_chansw_params_ie) { csa_ie->ttl = elems->mesh_chansw_params_ie->mesh_ttl; csa_ie->mode = elems->mesh_chansw_params_ie->mesh_flags; + csa_ie->pre_value = le16_to_cpu( + elems->mesh_chansw_params_ie->mesh_pre_value); } new_freq = ieee80211_channel_to_frequency(new_chan_no, new_band); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index e9ce36d32ef5..9f9b9bd3fd44 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2457,14 +2457,9 @@ int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata, WLAN_EID_CHAN_SWITCH_PARAM_TX_RESTRICT : 0x00; put_unaligned_le16(WLAN_REASON_MESH_CHAN, pos); /* Reason Cd */ pos += 2; - if (!ifmsh->pre_value) - ifmsh->pre_value = 1; - else - ifmsh->pre_value++; pre_value = cpu_to_le16(ifmsh->pre_value); memcpy(pos, &pre_value, 2); /* Precedence Value */ pos += 2; - ifmsh->chsw_init = true; } ieee80211_tx_skb(sdata, skb); -- cgit v1.2.1 From 12b5f34d2d5934e998975bbae4e29f81d94052f6 Mon Sep 17 00:00:00 2001 From: Eliad Peller Date: Mon, 18 Nov 2013 19:06:46 +0200 Subject: mac80211: fix connection polling Commit 392b9ff ("mac80211: change beacon/connection polling") removed the IEEE80211_STA_BEACON_POLL flag. However, it accidentally removed the setting of IEEE80211_STA_CONNECTION_POLL, making the connection polling completely useless (the flag is always clear, so the result is never being checked). Fix it. Signed-off-by: Eliad Peller Acked-by: Stanislaw Gruszka Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index d7504ab61a34..b3a3ce316656 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1910,6 +1910,8 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, if (ifmgd->flags & IEEE80211_STA_CONNECTION_POLL) already = true; + ifmgd->flags |= IEEE80211_STA_CONNECTION_POLL; + mutex_unlock(&sdata->local->mtx); if (already) -- cgit v1.2.1 From 1b09cd82d8c479700ef6185665839d1020b02519 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 20 Nov 2013 19:40:41 +0100 Subject: cfg80211: ignore supported rates for nonexistant bands on scan Fixes wpa_supplicant p2p_find on 5GHz-only devices Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index e20c27ff0f14..138dc3bb8b67 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5349,6 +5349,10 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) err = -EINVAL; goto out_free; } + + if (!wiphy->bands[band]) + continue; + err = ieee80211_get_ratemask(wiphy->bands[band], nla_data(attr), nla_len(attr), -- cgit v1.2.1 From 5664da4429c177495256f958194c241625074ec0 Mon Sep 17 00:00:00 2001 From: Karl Beldan Date: Wed, 20 Nov 2013 19:13:35 +0100 Subject: mac80211: use capped prob when computing throughputs Commit 3e8b1eb "mac80211/minstrel_ht: improve rate selection stability" introduced a local capped prob in minstrel_ht_calc_tp but omitted to use it to compute the per rate throughput. Signed-off-by: Karl Beldan Cc: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/rc80211_minstrel_ht.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 47aa6f81566b..4096ff6cc24f 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -226,7 +226,7 @@ minstrel_ht_calc_tp(struct minstrel_ht_sta *mi, int group, int rate) nsecs = 1000 * mi->overhead / MINSTREL_TRUNC(mi->avg_ampdu_len); nsecs += minstrel_mcs_groups[group].duration[rate]; - tp = 1000000 * ((mr->probability * 1000) / nsecs); + tp = 1000000 * ((prob * 1000) / nsecs); mr->cur_tp = MINSTREL_TRUNC(tp); } -- cgit v1.2.1 From 24d47300d118c5909a51b7270276d749cce150a2 Mon Sep 17 00:00:00 2001 From: Karl Beldan Date: Tue, 19 Nov 2013 17:12:05 +0100 Subject: mac80211: set hw initial idle state ATM, the first call of ieee80211_do_open will configure the hw as non-idle, even if the interface being brought up is not a monitor, and this leads to inconsistent sequences like: register_hw() do_open(sta) hw_config(non-idle) (.. sta is non-idle ..) scan(sta) hw_config(idle) (after scan finishes) do_stop(sta) do_open(sta) (.. sta is idle ..) Signed-off-by: Karl Beldan Signed-off-by: Johannes Berg --- net/mac80211/main.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index e765f77bb97a..7d1c3ac48ed9 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -940,6 +940,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) wiphy_debug(local->hw.wiphy, "Failed to initialize wep: %d\n", result); + local->hw.conf.flags = IEEE80211_CONF_IDLE; + ieee80211_led_init(local); rtnl_lock(); -- cgit v1.2.1 From b49faea7655ec10ade15d7d007e4218ca578a513 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Thu, 14 Nov 2013 10:41:01 -0500 Subject: netfilter: ipset: fix incorret comparison in hash_netnet4_data_equal() Both sides of the comparison are the same, looks like a cut-and-paste error. Spotted by Coverity. Signed-off-by: Dave Jones Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_netnet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index 2bc2dec20b00..6226803fc490 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -59,7 +59,7 @@ hash_netnet4_data_equal(const struct hash_netnet4_elem *ip1, u32 *multi) { return ip1->ipcmp == ip2->ipcmp && - ip2->ccmp == ip2->ccmp; + ip1->ccmp == ip2->ccmp; } static inline int -- cgit v1.2.1 From c297c8b99b07f496ff69a719cfb8e8fe852832ed Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Wed, 20 Nov 2013 13:00:17 -0500 Subject: SUNRPC: do not fail gss proc NULL calls with EACCES Otherwise RPCSEC_GSS_DESTROY messages are not sent. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 97912b40c254..42fdfc634e56 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1517,7 +1517,7 @@ out: static int gss_refresh_null(struct rpc_task *task) { - return -EACCES; + return 0; } static __be32 * -- cgit v1.2.1 From 0f0e2159c0c101426b705d70f43b9f423d51c869 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sat, 23 Nov 2013 13:01:50 +0100 Subject: genetlink: Fix uninitialized variable in genl_validate_assign_mc_groups() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit net/netlink/genetlink.c: In function ‘genl_validate_assign_mc_groups’: net/netlink/genetlink.c:217: warning: ‘err’ may be used uninitialized in this function Commit 2a94fe48f32ccf7321450a2cc07f2b724a444e5b ("genetlink: make multicast groups const, prevent abuse") split genl_register_mc_group() in multiple functions, but dropped the initialization of err. Initialize err to zero to fix this. Signed-off-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- net/netlink/genetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 4518a57aa5fe..803206e82450 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -214,7 +214,7 @@ static int genl_validate_assign_mc_groups(struct genl_family *family) { int first_id; int n_groups = family->n_mcgrps; - int err, i; + int err = 0, i; bool groups_allocated = false; if (!n_groups) -- cgit v1.2.1 From 5e53e689b737526308db2b5c9f56e9d0371a1676 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 24 Nov 2013 21:09:26 +0100 Subject: genetlink/pmcraid: use proper genetlink multicast API The pmcraid driver is abusing the genetlink API and is using its family ID as the multicast group ID, which is invalid and may belong to somebody else (and likely will.) Make it use the correct API, but since this may already be used as-is by userspace, reserve a family ID for this code and also reserve that group ID to not break userspace assumptions. My previous patch broke event delivery in the driver as I missed that it wasn't using the right API and forgot to update it later in my series. While changing this, I noticed that the genetlink code could use the static group ID instead of a strcmp(), so also do that for the VFS_DQUOT family. Cc: Anil Ravindranath Cc: "James E.J. Bottomley" Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- net/netlink/genetlink.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 803206e82450..713671ae45af 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -74,9 +74,12 @@ static struct list_head family_ht[GENL_FAM_TAB_SIZE]; * Bit 17 is marked as already used since the VFS quota code * also abused this API and relied on family == group ID, we * cater to that by giving it a static family and group ID. + * Bit 18 is marked as already used since the PMCRAID driver + * did the same thing as the VFS quota code (maybe copied?) */ static unsigned long mc_group_start = 0x3 | BIT(GENL_ID_CTRL) | - BIT(GENL_ID_VFS_DQUOT); + BIT(GENL_ID_VFS_DQUOT) | + BIT(GENL_ID_PMCRAID); static unsigned long *mc_groups = &mc_group_start; static unsigned long mc_groups_longs = 1; @@ -139,6 +142,7 @@ static u16 genl_generate_id(void) for (i = 0; i <= GENL_MAX_ID - GENL_MIN_ID; i++) { if (id_gen_idx != GENL_ID_VFS_DQUOT && + id_gen_idx != GENL_ID_PMCRAID && !genl_family_find_byid(id_gen_idx)) return id_gen_idx; if (++id_gen_idx > GENL_MAX_ID) @@ -236,9 +240,12 @@ static int genl_validate_assign_mc_groups(struct genl_family *family) } else if (strcmp(family->name, "NET_DM") == 0) { first_id = 1; BUG_ON(n_groups != 1); - } else if (strcmp(family->name, "VFS_DQUOT") == 0) { + } else if (family->id == GENL_ID_VFS_DQUOT) { first_id = GENL_ID_VFS_DQUOT; BUG_ON(n_groups != 1); + } else if (family->id == GENL_ID_PMCRAID) { + first_id = GENL_ID_PMCRAID; + BUG_ON(n_groups != 1); } else { groups_allocated = true; err = genl_allocate_reserve_groups(n_groups, &first_id); -- cgit v1.2.1 From 6eabca54d6781f61c7318517c1463a098acb7a87 Mon Sep 17 00:00:00 2001 From: Xufeng Zhang Date: Mon, 25 Nov 2013 11:26:57 +0800 Subject: sctp: Restore 'resent' bit to avoid retransmitted chunks for RTT measurements Currently retransmitted DATA chunks could also be used for RTT measurements since there are no flag to identify whether the transmitted DATA chunk is a new one or a retransmitted one. This problem is introduced by commit ae19c5486 ("sctp: remove 'resent' bit from the chunk") which inappropriately removed the 'resent' bit completely, instead of doing this, we should set the resent bit only for the retransmitted DATA chunks. Signed-off-by: Xufeng Zhang Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/output.c | 3 ++- net/sctp/outqueue.c | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/output.c b/net/sctp/output.c index e650978daf27..0e2644d0a773 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -474,10 +474,11 @@ int sctp_packet_transmit(struct sctp_packet *packet) * for a given destination transport address. */ - if (!tp->rto_pending) { + if (!chunk->resent && !tp->rto_pending) { chunk->rtt_in_progress = 1; tp->rto_pending = 1; } + has_data = 1; } diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index abb6db008df1..f51ba985a36e 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -446,6 +446,8 @@ void sctp_retransmit_mark(struct sctp_outq *q, transport->rto_pending = 0; } + chunk->resent = 1; + /* Move the chunk to the retransmit queue. The chunks * on the retransmit queue are always kept in order. */ @@ -1375,6 +1377,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, * instance). */ if (!tchunk->tsn_gap_acked && + !tchunk->resent && tchunk->rtt_in_progress) { tchunk->rtt_in_progress = 0; rtt = jiffies - tchunk->sent_at; -- cgit v1.2.1 From 66028310aedc782e3a9a221f1a9ec12b7e587e50 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Mon, 25 Nov 2013 17:21:24 +0800 Subject: sit: use kfree_skb to replace dev_kfree_skb In failure case, we should use kfree_skb not dev_kfree_skb to free skbuff, dev_kfree_skb is defined as consume_skb. Trace takes advantage of this point. Signed-off-by: Gao feng Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/sit.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 8435267836a7..366fbba3359a 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -951,7 +951,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, if (!new_skb) { ip_rt_put(rt); dev->stats.tx_dropped++; - dev_kfree_skb(skb); + kfree_skb(skb); return NETDEV_TX_OK; } if (skb->sk) @@ -977,7 +977,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, tx_error_icmp: dst_link_failure(skb); tx_error: - dev_kfree_skb(skb); + kfree_skb(skb); out: dev->stats.tx_errors++; return NETDEV_TX_OK; @@ -1017,7 +1017,7 @@ static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb, tx_err: dev->stats.tx_errors++; - dev_kfree_skb(skb); + kfree_skb(skb); return NETDEV_TX_OK; } -- cgit v1.2.1 From 39af0c409e8c06b51caf7dc43e49ef96ae790a55 Mon Sep 17 00:00:00 2001 From: Baker Zhang Date: Tue, 26 Nov 2013 09:29:15 +0800 Subject: net: remove outdated comment for ipv4 and ipv6 protocol handler since f9242b6b28d61295f2bf7e8adfb1060b382e5381 inet: Sanitize inet{,6} protocol demux. there are not pretended hash tables for ipv4 or ipv6 protocol handler. Signed-off-by: Baker Zhang Signed-off-by: David S. Miller --- net/ipv4/protocol.c | 8 -------- net/ipv6/protocol.c | 4 ---- 2 files changed, 12 deletions(-) (limited to 'net') diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c index ce848461acbb..46d6a1c923a8 100644 --- a/net/ipv4/protocol.c +++ b/net/ipv4/protocol.c @@ -31,10 +31,6 @@ const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly; const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS] __read_mostly; -/* - * Add a protocol handler to the hash tables - */ - int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol) { if (!prot->netns_ok) { @@ -55,10 +51,6 @@ int inet_add_offload(const struct net_offload *prot, unsigned char protocol) } EXPORT_SYMBOL(inet_add_offload); -/* - * Remove a protocol from the hash tables. - */ - int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol) { int ret; diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c index 22d1bd4670da..e048cf1bb6a2 100644 --- a/net/ipv6/protocol.c +++ b/net/ipv6/protocol.c @@ -36,10 +36,6 @@ int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol } EXPORT_SYMBOL(inet6_add_protocol); -/* - * Remove a protocol from the hash tables. - */ - int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol) { int ret; -- cgit v1.2.1 From ec6f809ff6f19fafba3212f6aff0dda71dfac8e8 Mon Sep 17 00:00:00 2001 From: Veaceslav Falico Date: Fri, 29 Nov 2013 09:53:23 +0100 Subject: af_packet: block BH in prb_shutdown_retire_blk_timer() Currently we're using plain spin_lock() in prb_shutdown_retire_blk_timer(), however the timer might fire right in the middle and thus try to re-aquire the same spinlock, leaving us in a endless loop. To fix that, use the spin_lock_bh() to block it. Fixes: f6fb8f100b80 ("af-packet: TPACKET_V3 flexible buffer implementation.") CC: "David S. Miller" CC: Daniel Borkmann CC: Willem de Bruijn CC: Phil Sutter CC: Eric Dumazet Reported-by: Jan Stancek Tested-by: Jan Stancek Signed-off-by: Veaceslav Falico Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/packet/af_packet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ac27c86ef6d1..ba2548bd85bf 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -439,9 +439,9 @@ static void prb_shutdown_retire_blk_timer(struct packet_sock *po, pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc; - spin_lock(&rb_queue->lock); + spin_lock_bh(&rb_queue->lock); pkc->delete_blk_timer = 1; - spin_unlock(&rb_queue->lock); + spin_unlock_bh(&rb_queue->lock); prb_del_retire_blk_timer(pkc); } -- cgit v1.2.1 From db31c55a6fb245fdbb752a2ca4aefec89afabb06 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 27 Nov 2013 15:40:21 +0300 Subject: net: clamp ->msg_namelen instead of returning an error If kmsg->msg_namelen > sizeof(struct sockaddr_storage) then in the original code that would lead to memory corruption in the kernel if you had audit configured. If you didn't have audit configured it was harmless. There are some programs such as beta versions of Ruby which use too large of a buffer and returning an error code breaks them. We should clamp the ->msg_namelen value instead. Fixes: 1661bf364ae9 ("net: heap overflow in __audit_sockaddr()") Reported-by: Eric Wong Signed-off-by: Dan Carpenter Tested-by: Eric Wong Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/compat.c | 2 +- net/socket.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/compat.c b/net/compat.c index 618c6a8a911b..dd32e34c1e2c 100644 --- a/net/compat.c +++ b/net/compat.c @@ -72,7 +72,7 @@ int get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr __user *umsg) __get_user(kmsg->msg_flags, &umsg->msg_flags)) return -EFAULT; if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) - return -EINVAL; + kmsg->msg_namelen = sizeof(struct sockaddr_storage); kmsg->msg_name = compat_ptr(tmp1); kmsg->msg_iov = compat_ptr(tmp2); kmsg->msg_control = compat_ptr(tmp3); diff --git a/net/socket.c b/net/socket.c index 0b18693f2be6..e83c416708af 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1973,7 +1973,7 @@ static int copy_msghdr_from_user(struct msghdr *kmsg, if (copy_from_user(kmsg, umsg, sizeof(struct msghdr))) return -EFAULT; if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) - return -EINVAL; + kmsg->msg_namelen = sizeof(struct sockaddr_storage); return 0; } -- cgit v1.2.1 From d3f7d56a7a4671d395e8af87071068a195257bf6 Mon Sep 17 00:00:00 2001 From: Shawn Landden Date: Sun, 24 Nov 2013 22:36:28 -0800 Subject: net: update consumers of MSG_MORE to recognize MSG_SENDPAGE_NOTLAST Commit 35f9c09fe (tcp: tcp_sendpages() should call tcp_push() once) added an internal flag MSG_SENDPAGE_NOTLAST, similar to MSG_MORE. algif_hash, algif_skcipher, and udp used MSG_MORE from tcp_sendpages() and need to see the new flag as identical to MSG_MORE. This fixes sendfile() on AF_ALG. v3: also fix udp Cc: Tom Herbert Cc: Eric Dumazet Cc: David S. Miller Cc: # 3.4.x + 3.2.x Reported-and-tested-by: Shawn Landden Original-patch: Richard Weinberger Signed-off-by: Shawn Landden Signed-off-by: David S. Miller --- net/ipv4/udp.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 44dfaa09b584..bf2b85b25491 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1098,6 +1098,9 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset, struct udp_sock *up = udp_sk(sk); int ret; + if (flags & MSG_SENDPAGE_NOTLAST) + flags |= MSG_MORE; + if (!up->pending) { struct msghdr msg = { .msg_flags = flags|MSG_MORE }; -- cgit v1.2.1 From f1d8cba61c3c4b1eb88e507249c4cb8d635d9a76 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 28 Nov 2013 09:51:22 -0800 Subject: inet: fix possible seqlock deadlocks In commit c9e9042994d3 ("ipv4: fix possible seqlock deadlock") I left another places where IP_INC_STATS_BH() were improperly used. udp_sendmsg(), ping_v4_sendmsg() and tcp_v4_connect() are called from process context, not from softirq context. This was detected by lockdep seqlock support. Reported-by: jongman heo Fixes: 584bdf8cbdf6 ("[IPV4]: Fix "ipOutNoRoutes" counter error for TCP and UDP") Fixes: c319b4d76b9e ("net: ipv4: add IPPROTO_ICMP socket kind") Signed-off-by: Eric Dumazet Cc: Hannes Frederic Sowa Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv4/ping.c | 2 +- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/udp.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 840cf1b9e6ee..242e7f4ed6f4 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -772,7 +772,7 @@ int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, err = PTR_ERR(rt); rt = NULL; if (err == -ENETUNREACH) - IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); goto out; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 59a6f8b90cd9..067213924751 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -177,7 +177,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (IS_ERR(rt)) { err = PTR_ERR(rt); if (err == -ENETUNREACH) - IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); return err; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index bf2b85b25491..44f6a20fa29d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -999,7 +999,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, err = PTR_ERR(rt); rt = NULL; if (err == -ENETUNREACH) - IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); goto out; } -- cgit v1.2.1 From 7f88c6b23afbd31545c676dea77ba9593a1a14bf Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Fri, 29 Nov 2013 06:39:44 +0100 Subject: ipv6: fix possible seqlock deadlock in ip6_finish_output2 IPv6 stats are 64 bits and thus are protected with a seqlock. By not disabling bottom-half we could deadlock here if we don't disable bh and a softirq reentrantly updates the same mib. Cc: Eric Dumazet Signed-off-by: Hannes Frederic Sowa Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 59df872e2f4d..4acdb63495db 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -116,8 +116,8 @@ static int ip6_finish_output2(struct sk_buff *skb) } rcu_read_unlock_bh(); - IP6_INC_STATS_BH(dev_net(dst->dev), - ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); + IP6_INC_STATS(dev_net(dst->dev), + ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); kfree_skb(skb); return -EINVAL; } -- cgit v1.2.1 From 213e3bc723e53af0976421d2808ea3f6cc821c56 Mon Sep 17 00:00:00 2001 From: Arvid Brodin Date: Fri, 29 Nov 2013 23:37:07 +0100 Subject: net/hsr: Very small fix of comment style. Signed-off-by: Arvid Brodin Signed-off-by: David S. Miller --- net/hsr/hsr_framereg.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 003f5bb3acd2..4bdab1521878 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -288,7 +288,8 @@ void hsr_addr_subst_dest(struct hsr_priv *hsr_priv, struct ethhdr *ethhdr, static bool seq_nr_after(u16 a, u16 b) { /* Remove inconsistency where - * seq_nr_after(a, b) == seq_nr_before(a, b) */ + * seq_nr_after(a, b) == seq_nr_before(a, b) + */ if ((int) b - a == 32768) return false; -- cgit v1.2.1 From 98bf8362220af717862b8262b21348774890b7b4 Mon Sep 17 00:00:00 2001 From: Arvid Brodin Date: Fri, 29 Nov 2013 23:38:16 +0100 Subject: net/hsr: Support iproute print_opt ('ip -details ...') This implements the rtnl_link_ops fill_info routine for HSR. Signed-off-by: Arvid Brodin Signed-off-by: David S. Miller --- net/hsr/hsr_netlink.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'net') diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index 5325af85eea6..01a5261ac7a5 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -23,6 +23,8 @@ static const struct nla_policy hsr_policy[IFLA_HSR_MAX + 1] = { [IFLA_HSR_SLAVE1] = { .type = NLA_U32 }, [IFLA_HSR_SLAVE2] = { .type = NLA_U32 }, [IFLA_HSR_MULTICAST_SPEC] = { .type = NLA_U8 }, + [IFLA_HSR_SUPERVISION_ADDR] = { .type = NLA_BINARY, .len = ETH_ALEN }, + [IFLA_HSR_SEQ_NR] = { .type = NLA_U16 }, }; @@ -59,6 +61,31 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, return hsr_dev_finalize(dev, link, multicast_spec); } +static int hsr_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct hsr_priv *hsr_priv; + + hsr_priv = netdev_priv(dev); + + if (hsr_priv->slave[0]) + if (nla_put_u32(skb, IFLA_HSR_SLAVE1, hsr_priv->slave[0]->ifindex)) + goto nla_put_failure; + + if (hsr_priv->slave[1]) + if (nla_put_u32(skb, IFLA_HSR_SLAVE2, hsr_priv->slave[1]->ifindex)) + goto nla_put_failure; + + if (nla_put(skb, IFLA_HSR_SUPERVISION_ADDR, ETH_ALEN, + hsr_priv->sup_multicast_addr) || + nla_put_u16(skb, IFLA_HSR_SEQ_NR, hsr_priv->sequence_nr)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + static struct rtnl_link_ops hsr_link_ops __read_mostly = { .kind = "hsr", .maxtype = IFLA_HSR_MAX, @@ -66,6 +93,7 @@ static struct rtnl_link_ops hsr_link_ops __read_mostly = { .priv_size = sizeof(struct hsr_priv), .setup = hsr_dev_setup, .newlink = hsr_newlink, + .fill_info = hsr_fill_info, }; -- cgit v1.2.1 From 7c2781fa92f5b9ca3188817a56a2ced0400355f3 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Fri, 29 Nov 2013 11:02:43 -0800 Subject: netem: missing break in ge loss generator There is a missing break statement in the Gilbert Elliot loss model generator which makes state machine behave incorrectly. Reported-by: Martin Burri Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 75c94e59a3bd..6e91323f3dac 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -268,6 +268,7 @@ static bool loss_gilb_ell(struct netem_sched_data *q) clg->state = 2; if (net_random() < clg->a4) return true; + break; case 2: if (net_random() < clg->a2) clg->state = 1; -- cgit v1.2.1 From ab6c27be8178a4682446faa5aa017b948997937f Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Fri, 29 Nov 2013 11:03:35 -0800 Subject: netem: fix loss 4 state model Patch from developers of the alternative loss models, downloaded from: http://netgroup.uniroma2.it/twiki/bin/view.cgi/Main/NetemCLG "In the case 1 of the switch statement in the if conditions we need to add clg->a4 to clg->a1, according to the model." Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 6e91323f3dac..9685624f7bc7 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -215,10 +215,10 @@ static bool loss_4state(struct netem_sched_data *q) if (rnd < clg->a4) { clg->state = 4; return true; - } else if (clg->a4 < rnd && rnd < clg->a1) { + } else if (clg->a4 < rnd && rnd < clg->a1 + clg->a4) { clg->state = 3; return true; - } else if (clg->a1 < rnd) + } else if (clg->a1 + clg->a4 < rnd) clg->state = 1; break; -- cgit v1.2.1 From eff7979f00b2c546f36f6829f4072c8db54763a9 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Fri, 29 Nov 2013 11:04:26 -0800 Subject: netem: fix gemodel loss generator Patch from developers of the alternative loss models, downloaded from: http://netgroup.uniroma2.it/twiki/bin/view.cgi/Main/NetemCLG "in case 2, of the switch we change the direction of the inequality to net_random()>clg->a3, because clg->a3 is h in the GE model and when h is 0 all packets will be lost." Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 9685624f7bc7..bccd52b36e97 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -272,7 +272,7 @@ static bool loss_gilb_ell(struct netem_sched_data *q) case 2: if (net_random() < clg->a2) clg->state = 1; - if (clg->a3 > net_random()) + if (net_random() > clg->a3) return true; } -- cgit v1.2.1 From 3868204d6b89ea373a273e760609cb08020beb1a Mon Sep 17 00:00:00 2001 From: "fan.du" Date: Sun, 1 Dec 2013 16:28:48 +0800 Subject: {pktgen, xfrm} Update IPv4 header total len and checksum after tranformation commit a553e4a6317b2cfc7659542c10fe43184ffe53da ("[PKTGEN]: IPSEC support") tried to support IPsec ESP transport transformation for pktgen, but acctually this doesn't work at all for two reasons(The orignal transformed packet has bad IPv4 checksum value, as well as wrong auth value, reported by wireshark) - After transpormation, IPv4 header total length needs update, because encrypted payload's length is NOT same as that of plain text. - After transformation, IPv4 checksum needs re-caculate because of payload has been changed. With this patch, armmed pktgen with below cofiguration, Wireshark is able to decrypted ESP packet generated by pktgen without any IPv4 checksum error or auth value error. pgset "flag IPSEC" pgset "flows 1" Signed-off-by: Fan Du Signed-off-by: David S. Miller --- net/core/pktgen.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 261357a66300..a797fff7f222 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2527,6 +2527,8 @@ static int process_ipsec(struct pktgen_dev *pkt_dev, if (x) { int ret; __u8 *eth; + struct iphdr *iph; + nhead = x->props.header_len - skb_headroom(skb); if (nhead > 0) { ret = pskb_expand_head(skb, nhead, 0, GFP_ATOMIC); @@ -2548,6 +2550,11 @@ static int process_ipsec(struct pktgen_dev *pkt_dev, eth = (__u8 *) skb_push(skb, ETH_HLEN); memcpy(eth, pkt_dev->hh, 12); *(u16 *) ð[12] = protocol; + + /* Update IPv4 header len as well as checksum value */ + iph = ip_hdr(skb); + iph->tot_len = htons(skb->len - ETH_HLEN); + ip_send_check(iph); } } return 1; -- cgit v1.2.1 From dda444d52496aa8ddc501561bca580f1374a96a9 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Tue, 26 Nov 2013 16:07:26 +0100 Subject: cfg80211: disable CSA for all drivers The channel switch announcement code has some major locking problems which can cause a deadlock in worst case. A series of fixes has been proposed, but these are non-trivial and need to be tested first. Therefore disable CSA completely for 3.13. Signed-off-by: Simon Wunderlich Signed-off-by: Johannes Berg --- net/wireless/core.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index 00a65ba3aeaa..52b865fb7351 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -454,6 +454,12 @@ int wiphy_register(struct wiphy *wiphy) /* support for 5/10 MHz is broken due to nl80211 API mess - disable */ wiphy->flags &= ~WIPHY_FLAG_SUPPORTS_5_10_MHZ; + /* + * There are major locking problems in nl80211/mac80211 for CSA, + * disable for all drivers until this has been reworked. + */ + wiphy->flags &= ~WIPHY_FLAG_HAS_CHANNEL_SWITCH; + #ifdef CONFIG_PM if (WARN_ON(wiphy->wowlan && (wiphy->wowlan->flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) && -- cgit v1.2.1 From 0834ae3c3af44480834cce128b6fef83006e537f Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Tue, 26 Nov 2013 16:45:18 +0100 Subject: mac80211: check csa wiphy flag in ibss before switching When external CSA IEs are received (beacons or action messages), a channel switch is triggered as well. This should only be allowed on devices which actually support channel switches, otherwise disconnect. (For the corresponding userspace invocation, the wiphy flag is checked in nl80211). Signed-off-by: Simon Wunderlich Signed-off-by: Johannes Berg --- net/mac80211/ibss.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 531be040b9ae..27a39de89679 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -823,6 +823,10 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata, if (err) return false; + /* channel switch is not supported, disconnect */ + if (!(sdata->local->hw.wiphy->flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH)) + goto disconnect; + params.count = csa_ie.count; params.chandef = csa_ie.chandef; -- cgit v1.2.1 From 30e56918dd1e6d64350661f186657f6a6f2646e6 Mon Sep 17 00:00:00 2001 From: Duan Jiong Date: Tue, 26 Nov 2013 15:46:56 +0800 Subject: ipv6: judge the accept_ra_defrtr before calling rt6_route_rcv when dealing with a RA message, if accept_ra_defrtr is false, the kernel will not add the default route, and then deal with the following route information options. Unfortunately, those options maybe contain default route, so let's judge the accept_ra_defrtr before calling rt6_route_rcv. Signed-off-by: Duan Jiong Signed-off-by: David S. Miller --- net/ipv6/ndisc.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 3512177deb4d..300865171394 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1277,6 +1277,9 @@ skip_linkparms: ri->prefix_len == 0) continue; #endif + if (ri->prefix_len == 0 && + !in6_dev->cnf.accept_ra_defrtr) + continue; if (ri->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen) continue; rt6_route_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3, -- cgit v1.2.1 From 57ec0afe293834f8ca9499214ed74748d89eaa44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois-Xavier=20Le=20Bail?= Date: Mon, 2 Dec 2013 11:28:49 +0100 Subject: ipv6: fix third arg of anycast_dst_alloc(), must be bool. Signed-off-by: Francois-Xavier Le Bail Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 12c97d8aa6bb..d5fa5b8c443e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2613,7 +2613,7 @@ static void init_loopback(struct net_device *dev) if (sp_ifa->rt) continue; - sp_rt = addrconf_dst_alloc(idev, &sp_ifa->addr, 0); + sp_rt = addrconf_dst_alloc(idev, &sp_ifa->addr, false); /* Failure cases are ignored */ if (!IS_ERR(sp_rt)) { -- cgit v1.2.1 From 7150aede5dd241539686e17d9592f5ebd28a2cda Mon Sep 17 00:00:00 2001 From: Kamala R Date: Mon, 2 Dec 2013 19:55:21 +0530 Subject: IPv6: Fixed support for blackhole and prohibit routes The behaviour of blackhole and prohibit routes has been corrected by setting the input and output pointers of the dst variable appropriately. For blackhole routes, they are set to dst_discard and to ip6_pkt_discard and ip6_pkt_discard_out respectively for prohibit routes. ipv6: ip6_pkt_prohibit(_out) should not depend on CONFIG_IPV6_MULTIPLE_TABLES We need ip6_pkt_prohibit(_out) available without CONFIG_IPV6_MULTIPLE_TABLES Signed-off-by: Kamala R Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/route.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7faa9d5e1503..ddb9d41c8eea 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -84,6 +84,8 @@ static int ip6_dst_gc(struct dst_ops *ops); static int ip6_pkt_discard(struct sk_buff *skb); static int ip6_pkt_discard_out(struct sk_buff *skb); +static int ip6_pkt_prohibit(struct sk_buff *skb); +static int ip6_pkt_prohibit_out(struct sk_buff *skb); static void ip6_link_failure(struct sk_buff *skb); static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu); @@ -234,9 +236,6 @@ static const struct rt6_info ip6_null_entry_template = { #ifdef CONFIG_IPV6_MULTIPLE_TABLES -static int ip6_pkt_prohibit(struct sk_buff *skb); -static int ip6_pkt_prohibit_out(struct sk_buff *skb); - static const struct rt6_info ip6_prohibit_entry_template = { .dst = { .__refcnt = ATOMIC_INIT(1), @@ -1565,21 +1564,24 @@ int ip6_route_add(struct fib6_config *cfg) goto out; } } - rt->dst.output = ip6_pkt_discard_out; - rt->dst.input = ip6_pkt_discard; rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; switch (cfg->fc_type) { case RTN_BLACKHOLE: rt->dst.error = -EINVAL; + rt->dst.output = dst_discard; + rt->dst.input = dst_discard; break; case RTN_PROHIBIT: rt->dst.error = -EACCES; + rt->dst.output = ip6_pkt_prohibit_out; + rt->dst.input = ip6_pkt_prohibit; break; case RTN_THROW: - rt->dst.error = -EAGAIN; - break; default: - rt->dst.error = -ENETUNREACH; + rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN + : -ENETUNREACH; + rt->dst.output = ip6_pkt_discard_out; + rt->dst.input = ip6_pkt_discard; break; } goto install_route; @@ -2144,8 +2146,6 @@ static int ip6_pkt_discard_out(struct sk_buff *skb) return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); } -#ifdef CONFIG_IPV6_MULTIPLE_TABLES - static int ip6_pkt_prohibit(struct sk_buff *skb) { return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES); @@ -2157,8 +2157,6 @@ static int ip6_pkt_prohibit_out(struct sk_buff *skb) return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); } -#endif - /* * Allocate a dst for local (unicast / anycast) address. */ -- cgit v1.2.1 From 18fc25c94eadc52a42c025125af24657a93638c0 Mon Sep 17 00:00:00 2001 From: Venkat Venkatsubra Date: Mon, 2 Dec 2013 15:41:39 -0800 Subject: rds: prevent BUG_ON triggered on congestion update to loopback After congestion update on a local connection, when rds_ib_xmit returns less bytes than that are there in the message, rds_send_xmit calls back rds_ib_xmit with an offset that causes BUG_ON(off & RDS_FRAG_SIZE) to trigger. For a 4Kb PAGE_SIZE rds_ib_xmit returns min(8240,4096)=4096 when actually the message contains 8240 bytes. rds_send_xmit thinks there is more to send and calls rds_ib_xmit again with a data offset "off" of 4096-48(rds header) =4048 bytes thus hitting the BUG_ON(off & RDS_FRAG_SIZE) [RDS_FRAG_SIZE=4k]. The commit 6094628bfd94323fc1cea05ec2c6affd98c18f7f "rds: prevent BUG_ON triggering on congestion map updates" introduced this regression. That change was addressing the triggering of a different BUG_ON in rds_send_xmit() on PowerPC architecture with 64Kbytes PAGE_SIZE: BUG_ON(ret != 0 && conn->c_xmit_sg == rm->data.op_nents); This was the sequence it was going through: (rds_ib_xmit) /* Do not send cong updates to IB loopback */ if (conn->c_loopback && rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) { rds_cong_map_updated(conn->c_fcong, ~(u64) 0); return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; } rds_ib_xmit returns 8240 rds_send_xmit: c_xmit_data_off = 0 + 8240 - 48 (rds header accounted only the first time) = 8192 c_xmit_data_off < 65536 (sg->length), so calls rds_ib_xmit again rds_ib_xmit returns 8240 rds_send_xmit: c_xmit_data_off = 8192 + 8240 = 16432, calls rds_ib_xmit again and so on (c_xmit_data_off 24672,32912,41152,49392,57632) rds_ib_xmit returns 8240 On this iteration this sequence causes the BUG_ON in rds_send_xmit: while (ret) { tmp = min_t(int, ret, sg->length - conn->c_xmit_data_off); [tmp = 65536 - 57632 = 7904] conn->c_xmit_data_off += tmp; [c_xmit_data_off = 57632 + 7904 = 65536] ret -= tmp; [ret = 8240 - 7904 = 336] if (conn->c_xmit_data_off == sg->length) { conn->c_xmit_data_off = 0; sg++; conn->c_xmit_sg++; BUG_ON(ret != 0 && conn->c_xmit_sg == rm->data.op_nents); [c_xmit_sg = 1, rm->data.op_nents = 1] What the current fix does: Since the congestion update over loopback is not actually transmitted as a message, all that rds_ib_xmit needs to do is let the caller think the full message has been transmitted and not return partial bytes. It will return 8240 (RDS_CONG_MAP_BYTES+48) when PAGE_SIZE is 4Kb. And 64Kb+48 when page size is 64Kb. Reported-by: Josh Hunt Tested-by: Honggang Li Acked-by: Bang Nguyen Signed-off-by: Venkat Venkatsubra Signed-off-by: David S. Miller --- net/rds/ib_send.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index e59094981175..37be6e226d1b 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -552,9 +552,8 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, && rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) { rds_cong_map_updated(conn->c_fcong, ~(u64) 0); scat = &rm->data.op_sg[sg]; - ret = sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; - ret = min_t(int, ret, scat->length - conn->c_xmit_data_off); - return ret; + ret = max_t(int, RDS_CONG_MAP_BYTES, scat->length); + return sizeof(struct rds_header) + ret; } /* FIXME we may overallocate here */ -- cgit v1.2.1 From 76c82d7a3d24a4ae1f9b098287c18055546c1a47 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Wed, 4 Dec 2013 09:26:52 -0500 Subject: net_sched: Fail if missing mandatory action operation methods Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_api.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sched/act_api.c b/net/sched/act_api.c index fd7072827a40..618695e84190 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -270,6 +270,10 @@ int tcf_register_action(struct tc_action_ops *act) { struct tc_action_ops *a, **ap; + /* Must supply act, dump, cleanup and init */ + if (!act->act || !act->dump || !act->cleanup || !act->init) + return -EINVAL; + write_lock(&act_mod_lock); for (ap = &act_base; (a = *ap) != NULL; ap = &a->next) { if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) { @@ -381,7 +385,7 @@ int tcf_action_exec(struct sk_buff *skb, const struct tc_action *act, } while ((a = act) != NULL) { repeat: - if (a->ops && a->ops->act) { + if (a->ops) { ret = a->ops->act(skb, a, res); if (TC_MUNGED & skb->tc_verd) { /* copied already, allow trampling */ @@ -405,7 +409,7 @@ void tcf_action_destroy(struct tc_action *act, int bind) struct tc_action *a; for (a = act; a; a = act) { - if (a->ops && a->ops->cleanup) { + if (a->ops) { if (a->ops->cleanup(a, bind) == ACT_P_DELETED) module_put(a->ops->owner); act = act->next; @@ -424,7 +428,7 @@ tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { int err = -EINVAL; - if (a->ops == NULL || a->ops->dump == NULL) + if (a->ops == NULL) return err; return a->ops->dump(skb, a, bind, ref); } @@ -436,7 +440,7 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref) unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; - if (a->ops == NULL || a->ops->dump == NULL) + if (a->ops == NULL) return err; if (nla_put_string(skb, TCA_KIND, a->ops->kind)) -- cgit v1.2.1 From 63ef6174654a986f263d25e957ef9d1ff243f649 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Wed, 4 Dec 2013 09:26:53 -0500 Subject: net_sched: Default action lookup method for actions Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_api.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 618695e84190..d1a022e441be 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -274,6 +274,9 @@ int tcf_register_action(struct tc_action_ops *act) if (!act->act || !act->dump || !act->cleanup || !act->init) return -EINVAL; + if (!act->lookup) + act->lookup = tcf_hash_search; + write_lock(&act_mod_lock); for (ap = &act_base; (a = *ap) != NULL; ap = &a->next) { if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) { @@ -727,8 +730,6 @@ tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 portid) a->ops = tc_lookup_action(tb[TCA_ACT_KIND]); if (a->ops == NULL) goto err_free; - if (a->ops->lookup == NULL) - goto err_mod; err = -ENOENT; if (a->ops->lookup(a, index) == 0) goto err_mod; -- cgit v1.2.1 From 43c00dcf8888daea234226e8adf09c37b00d2245 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Wed, 4 Dec 2013 09:26:54 -0500 Subject: net_sched: Use default action lookup functions Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_csum.c | 1 - net/sched/act_gact.c | 1 - net/sched/act_ipt.c | 2 -- net/sched/act_mirred.c | 1 - net/sched/act_nat.c | 1 - net/sched/act_pedit.c | 1 - net/sched/act_police.c | 1 - 7 files changed, 8 deletions(-) (limited to 'net') diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 3a4c0caa1f7d..4225a9382a2b 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -585,7 +585,6 @@ static struct tc_action_ops act_csum_ops = { .act = tcf_csum, .dump = tcf_csum_dump, .cleanup = tcf_csum_cleanup, - .lookup = tcf_hash_search, .init = tcf_csum_init, .walk = tcf_generic_walker }; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index fd2b3cff5fa2..15851da99f3b 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -206,7 +206,6 @@ static struct tc_action_ops act_gact_ops = { .act = tcf_gact, .dump = tcf_gact_dump, .cleanup = tcf_gact_cleanup, - .lookup = tcf_hash_search, .init = tcf_gact_init, .walk = tcf_generic_walker }; diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 60d88b6b9560..1d3e19180c2e 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -298,7 +298,6 @@ static struct tc_action_ops act_ipt_ops = { .act = tcf_ipt, .dump = tcf_ipt_dump, .cleanup = tcf_ipt_cleanup, - .lookup = tcf_hash_search, .init = tcf_ipt_init, .walk = tcf_generic_walker }; @@ -312,7 +311,6 @@ static struct tc_action_ops act_xt_ops = { .act = tcf_ipt, .dump = tcf_ipt_dump, .cleanup = tcf_ipt_cleanup, - .lookup = tcf_hash_search, .init = tcf_ipt_init, .walk = tcf_generic_walker }; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 977c10e0631b..6cb16ec3d624 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -271,7 +271,6 @@ static struct tc_action_ops act_mirred_ops = { .act = tcf_mirred, .dump = tcf_mirred_dump, .cleanup = tcf_mirred_cleanup, - .lookup = tcf_hash_search, .init = tcf_mirred_init, .walk = tcf_generic_walker }; diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 876f0ef29694..30c13dedc94d 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -308,7 +308,6 @@ static struct tc_action_ops act_nat_ops = { .act = tcf_nat, .dump = tcf_nat_dump, .cleanup = tcf_nat_cleanup, - .lookup = tcf_hash_search, .init = tcf_nat_init, .walk = tcf_generic_walker }; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 7ed78c9e505c..ab4fc56f8852 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -243,7 +243,6 @@ static struct tc_action_ops act_pedit_ops = { .act = tcf_pedit, .dump = tcf_pedit_dump, .cleanup = tcf_pedit_cleanup, - .lookup = tcf_hash_search, .init = tcf_pedit_init, .walk = tcf_generic_walker }; diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 272d8e924cf6..16a62c36928a 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -407,7 +407,6 @@ static struct tc_action_ops act_police_ops = { .act = tcf_act_police, .dump = tcf_act_police_dump, .cleanup = tcf_act_police_cleanup, - .lookup = tcf_hash_search, .init = tcf_act_police_locate, .walk = tcf_act_police_walker }; -- cgit v1.2.1 From 382ca8a1ad8963c7676585f9e25f4c5ff8b28439 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Wed, 4 Dec 2013 09:26:55 -0500 Subject: net_sched: Provide default walker function for actions Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_api.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/sched/act_api.c b/net/sched/act_api.c index d1a022e441be..69cb848e8345 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -274,8 +274,11 @@ int tcf_register_action(struct tc_action_ops *act) if (!act->act || !act->dump || !act->cleanup || !act->init) return -EINVAL; + /* Supply defaults */ if (!act->lookup) act->lookup = tcf_hash_search; + if (!act->walk) + act->walk = tcf_generic_walker; write_lock(&act_mod_lock); for (ap = &act_base; (a = *ap) != NULL; ap = &a->next) { @@ -1089,12 +1092,6 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) memset(&a, 0, sizeof(struct tc_action)); a.ops = a_o; - if (a_o->walk == NULL) { - WARN(1, "tc_dump_action: %s !capable of dumping table\n", - a_o->kind); - goto out_module_put; - } - nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type, sizeof(*t), 0); if (!nlh) -- cgit v1.2.1 From 651a6493ae5c055c78777bb7178c23b5565631da Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Wed, 4 Dec 2013 09:26:56 -0500 Subject: net_sched: Use default action walker methods Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_csum.c | 1 - net/sched/act_gact.c | 1 - net/sched/act_ipt.c | 2 -- net/sched/act_mirred.c | 1 - net/sched/act_nat.c | 1 - net/sched/act_pedit.c | 1 - net/sched/act_simple.c | 1 - net/sched/act_skbedit.c | 1 - 8 files changed, 9 deletions(-) (limited to 'net') diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 4225a9382a2b..5c5edf56adbd 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -586,7 +586,6 @@ static struct tc_action_ops act_csum_ops = { .dump = tcf_csum_dump, .cleanup = tcf_csum_cleanup, .init = tcf_csum_init, - .walk = tcf_generic_walker }; MODULE_DESCRIPTION("Checksum updating actions"); diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 15851da99f3b..5645a4d32abd 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -207,7 +207,6 @@ static struct tc_action_ops act_gact_ops = { .dump = tcf_gact_dump, .cleanup = tcf_gact_cleanup, .init = tcf_gact_init, - .walk = tcf_generic_walker }; MODULE_AUTHOR("Jamal Hadi Salim(2002-4)"); diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 1d3e19180c2e..882a89762f77 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -299,7 +299,6 @@ static struct tc_action_ops act_ipt_ops = { .dump = tcf_ipt_dump, .cleanup = tcf_ipt_cleanup, .init = tcf_ipt_init, - .walk = tcf_generic_walker }; static struct tc_action_ops act_xt_ops = { @@ -312,7 +311,6 @@ static struct tc_action_ops act_xt_ops = { .dump = tcf_ipt_dump, .cleanup = tcf_ipt_cleanup, .init = tcf_ipt_init, - .walk = tcf_generic_walker }; MODULE_AUTHOR("Jamal Hadi Salim(2002-13)"); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 6cb16ec3d624..252378121ce7 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -272,7 +272,6 @@ static struct tc_action_ops act_mirred_ops = { .dump = tcf_mirred_dump, .cleanup = tcf_mirred_cleanup, .init = tcf_mirred_init, - .walk = tcf_generic_walker }; MODULE_AUTHOR("Jamal Hadi Salim(2002)"); diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 30c13dedc94d..6a15ace00241 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -309,7 +309,6 @@ static struct tc_action_ops act_nat_ops = { .dump = tcf_nat_dump, .cleanup = tcf_nat_cleanup, .init = tcf_nat_init, - .walk = tcf_generic_walker }; MODULE_DESCRIPTION("Stateless NAT actions"); diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index ab4fc56f8852..03b67674169c 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -244,7 +244,6 @@ static struct tc_action_ops act_pedit_ops = { .dump = tcf_pedit_dump, .cleanup = tcf_pedit_cleanup, .init = tcf_pedit_init, - .walk = tcf_generic_walker }; MODULE_AUTHOR("Jamal Hadi Salim(2002-4)"); diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 7725eb4ab756..31157d3e729c 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -201,7 +201,6 @@ static struct tc_action_ops act_simp_ops = { .dump = tcf_simp_dump, .cleanup = tcf_simp_cleanup, .init = tcf_simp_init, - .walk = tcf_generic_walker, }; MODULE_AUTHOR("Jamal Hadi Salim(2005)"); diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index cb4221171f93..35ea643b4325 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -203,7 +203,6 @@ static struct tc_action_ops act_skbedit_ops = { .dump = tcf_skbedit_dump, .cleanup = tcf_skbedit_cleanup, .init = tcf_skbedit_init, - .walk = tcf_generic_walker, }; MODULE_AUTHOR("Alexander Duyck, "); -- cgit v1.2.1 From 78ac814f120da17053b3d52aa215c7c547c5e77d Mon Sep 17 00:00:00 2001 From: wangweidong Date: Wed, 4 Dec 2013 17:32:39 +0800 Subject: sctp: disable max_burst when the max_burst is 0 As Michael pointed out that when max_burst is 0, it just disable max_burst. It declared in rfc6458#section-8.1.24. so add the check in sctp_transport_burst_limited, when it 0, just do nothing. Reviewed-by: Daniel Borkmann Suggested-by: Vlad Yasevich Suggested-by: Michael Tuexen Signed-off-by: Wang Weidong Acked-by: Neil Horman Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/transport.c b/net/sctp/transport.c index e332efb124cc..efc46ffed1fd 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -573,7 +573,7 @@ void sctp_transport_burst_limited(struct sctp_transport *t) u32 old_cwnd = t->cwnd; u32 max_burst_bytes; - if (t->burst_limited) + if (t->burst_limited || asoc->max_burst == 0) return; max_burst_bytes = t->flight_size + (asoc->max_burst * asoc->pathmtu); -- cgit v1.2.1 From 7f2cbdc28c034ef2c3be729681f631d5744e3cd5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 4 Dec 2013 20:12:04 -0800 Subject: tcp_memcontrol: Cleanup/fix cg_proto->memory_pressure handling. kill memcg_tcp_enter_memory_pressure. The only function of memcg_tcp_enter_memory_pressure was to reduce deal with the unnecessary abstraction that was tcp_memcontrol. Now that struct tcp_memcontrol is gone remove this unnecessary function, the unnecessary function pointer, and modify sk_enter_memory_pressure to set this field directly, just as sk_leave_memory_pressure cleas this field directly. This fixes a small bug I intruduced when killing struct tcp_memcontrol that caused memcg_tcp_enter_memory_pressure to never be called and thus failed to ever set cg_proto->memory_pressure. Remove the cg_proto enter_memory_pressure function as it now serves no useful purpose. Don't test cg_proto->memory_presser in sk_leave_memory_pressure before clearing it. The test was originally there to ensure that the pointer was non-NULL. Now that cg_proto is not a pointer the pointer does not matter. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/ipv4/tcp_memcontrol.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 269a89ecd2f4..f7e522c558ba 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -6,13 +6,6 @@ #include #include -static void memcg_tcp_enter_memory_pressure(struct sock *sk) -{ - if (sk->sk_cgrp->memory_pressure) - sk->sk_cgrp->memory_pressure = 1; -} -EXPORT_SYMBOL(memcg_tcp_enter_memory_pressure); - int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss) { /* -- cgit v1.2.1 From 239c78db9c41a8f524cce60507440d72229d73bc Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 5 Dec 2013 23:29:19 +0100 Subject: net: clear local_df when passing skb between namespaces We must clear local_df when passing the skb between namespaces as the packet is not local to the new namespace any more and thus may not get fragmented by local rules. Fred Templin noticed that other namespaces do fragment IPv6 packets while forwarding. Instead they should have send back a PTB. The same problem should be present when forwarding DF-IPv4 packets between namespaces. Reported-by: Templin, Fred L Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/core/skbuff.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2718fed53d8c..06e72d3cdf60 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3584,6 +3584,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) skb->tstamp.tv64 = 0; skb->pkt_type = PACKET_HOST; skb->skb_iif = 0; + skb->local_df = 0; skb_dst_drop(skb); skb->mark = 0; secpath_reset(skb); -- cgit v1.2.1 From 859828c0ea476b42f3a93d69d117aaba90994b6f Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 5 Dec 2013 16:27:37 +0100 Subject: br: fix use of ->rx_handler_data in code executed on non-rx_handler path br_stp_rcv() is reached by non-rx_handler path. That means there is no guarantee that dev is bridge port and therefore simple NULL check of ->rx_handler_data is not enough. There is need to check if dev is really bridge port and since only rcu read lock is held here, do it by checking ->rx_handler pointer. Note that synchronize_net() in netdev_rx_handler_unregister() ensures this approach as valid. Introduced originally by: commit f350a0a87374418635689471606454abc7beaa3a "bridge: use rx_handler_data pointer to store net_bridge_port pointer" Fixed but not in the best way by: commit b5ed54e94d324f17c97852296d61a143f01b227a "bridge: fix RCU races with bridge port" Reintroduced by: commit 716ec052d2280d511e10e90ad54a86f5b5d4dcc2 "bridge: fix NULL pointer deref of br_port_get_rcu" Please apply to stable trees as well. Thanks. RH bugzilla reference: https://bugzilla.redhat.com/show_bug.cgi?id=1025770 Reported-by: Laine Stump Debugged-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Signed-off-by: Jiri Pirko Acked-by: Michael S. Tsirkin Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/bridge/br_private.h | 10 ++++++++++ net/bridge/br_stp_bpdu.c | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 229d820bdf0b..045d56eaeca2 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -426,6 +426,16 @@ netdev_features_t br_features_recompute(struct net_bridge *br, int br_handle_frame_finish(struct sk_buff *skb); rx_handler_result_t br_handle_frame(struct sk_buff **pskb); +static inline bool br_rx_handler_check_rcu(const struct net_device *dev) +{ + return rcu_dereference(dev->rx_handler) == br_handle_frame; +} + +static inline struct net_bridge_port *br_port_get_check_rcu(const struct net_device *dev) +{ + return br_rx_handler_check_rcu(dev) ? br_port_get_rcu(dev) : NULL; +} + /* br_ioctl.c */ int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index 8660ea3be705..bdb459d21ad8 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -153,7 +153,7 @@ void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb, if (buf[0] != 0 || buf[1] != 0 || buf[2] != 0) goto err; - p = br_port_get_rcu(dev); + p = br_port_get_check_rcu(dev); if (!p) goto err; -- cgit v1.2.1 From b4ef4ce09308955d1aa54a289c0162607b3aa16c Mon Sep 17 00:00:00 2001 From: Sergey Popovich Date: Fri, 6 Dec 2013 10:57:19 +0200 Subject: netfilter: xt_hashlimit: fix proc entry leak in netns destroy path In (32263dd1b netfilter: xt_hashlimit: fix namespace destroy path) the hashlimit_net_exit() function is always called right before hashlimit_mt_destroy() to release netns data. If you use xt_hashlimit with IPv4 and IPv6 together, this produces the following splat via netconsole in the netns destroy path: Pid: 9499, comm: kworker/u:0 Tainted: G WC O 3.2.0-5-netctl-amd64-core2 Call Trace: [] ? warn_slowpath_common+0x78/0x8c [] ? warn_slowpath_fmt+0x45/0x4a [] ? remove_proc_entry+0xd8/0x22e [] ? kfree+0x5b/0x6c [] ? hashlimit_net_exit+0x45/0x8d [xt_hashlimit] [] ? ops_exit_list+0x1c/0x44 [] ? cleanup_net+0xf1/0x180 [] ? should_resched+0x5/0x23 [] ? process_one_work+0x161/0x269 [] ? cwq_activate_delayed_work+0x3c/0x48 [] ? worker_thread+0xc2/0x145 [] ? manage_workers.isra.25+0x15b/0x15b [] ? kthread+0x76/0x7e [] ? kernel_thread_helper+0x4/0x10 [] ? kthread_worker_fn+0x139/0x139 [] ? gs_change+0x13/0x13 ---[ end trace d8c3cc0ad163ef79 ]--- ------------[ cut here ]------------ WARNING: at /usr/src/linux-3.2.52/debian/build/source_netctl/fs/proc/generic.c:849 remove_proc_entry+0x217/0x22e() Hardware name: remove_proc_entry: removing non-empty directory 'net/ip6t_hashlimit', leaking at least 'IN-REJECT' This is due to lack of removal net/ip6t_hashlimit/* entries in hashlimit_proc_net_exit(), since only IPv4 entries are deleted. Fix it by always removing the IPv4 and IPv6 entries and their parent directories in the netns destroy path. Signed-off-by: Sergey Popovich Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_hashlimit.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 9ff035c71403..a3910fc2122b 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -325,21 +325,24 @@ static void htable_gc(unsigned long htlong) add_timer(&ht->timer); } -static void htable_destroy(struct xt_hashlimit_htable *hinfo) +static void htable_remove_proc_entry(struct xt_hashlimit_htable *hinfo) { struct hashlimit_net *hashlimit_net = hashlimit_pernet(hinfo->net); struct proc_dir_entry *parent; - del_timer_sync(&hinfo->timer); - if (hinfo->family == NFPROTO_IPV4) parent = hashlimit_net->ipt_hashlimit; else parent = hashlimit_net->ip6t_hashlimit; - if(parent != NULL) + if (parent != NULL) remove_proc_entry(hinfo->name, parent); +} +static void htable_destroy(struct xt_hashlimit_htable *hinfo) +{ + del_timer_sync(&hinfo->timer); + htable_remove_proc_entry(hinfo); htable_selective_cleanup(hinfo, select_all); kfree(hinfo->name); vfree(hinfo); @@ -883,21 +886,15 @@ static int __net_init hashlimit_proc_net_init(struct net *net) static void __net_exit hashlimit_proc_net_exit(struct net *net) { struct xt_hashlimit_htable *hinfo; - struct proc_dir_entry *pde; struct hashlimit_net *hashlimit_net = hashlimit_pernet(net); - /* recent_net_exit() is called before recent_mt_destroy(). Make sure - * that the parent xt_recent proc entry is is empty before trying to - * remove it. + /* hashlimit_net_exit() is called before hashlimit_mt_destroy(). + * Make sure that the parent ipt_hashlimit and ip6t_hashlimit proc + * entries is empty before trying to remove it. */ mutex_lock(&hashlimit_mutex); - pde = hashlimit_net->ipt_hashlimit; - if (pde == NULL) - pde = hashlimit_net->ip6t_hashlimit; - hlist_for_each_entry(hinfo, &hashlimit_net->htables, node) - remove_proc_entry(hinfo->name, pde); - + htable_remove_proc_entry(hinfo); hashlimit_net->ipt_hashlimit = NULL; hashlimit_net->ip6t_hashlimit = NULL; mutex_unlock(&hashlimit_mutex); -- cgit v1.2.1 From cf9dc09d0949f0b5953fb08caa10bba0dc7ee71f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 24 Nov 2013 20:39:10 +0100 Subject: netfilter: nf_tables: fix missing rules flushing per table This patch allows you to atomically remove all rules stored in a table via the NFT_MSG_DELRULE command. You only need to indicate the specific table and no chain to flush all rules stored in that table. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 46 +++++++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index dcddc49c0e08..f93b7d06f4be 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1717,6 +1717,19 @@ nf_tables_delrule_one(struct nft_ctx *ctx, struct nft_rule *rule) return -ENOENT; } +static int nf_table_delrule_by_chain(struct nft_ctx *ctx) +{ + struct nft_rule *rule; + int err; + + list_for_each_entry(rule, &ctx->chain->rules, list) { + err = nf_tables_delrule_one(ctx, rule); + if (err < 0) + return err; + } + return 0; +} + static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -1725,8 +1738,8 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, const struct nft_af_info *afi; struct net *net = sock_net(skb->sk); const struct nft_table *table; - struct nft_chain *chain; - struct nft_rule *rule, *tmp; + struct nft_chain *chain = NULL; + struct nft_rule *rule; int family = nfmsg->nfgen_family, err = 0; struct nft_ctx ctx; @@ -1738,22 +1751,29 @@ static int nf_tables_delrule(struct sock *nlsk, struct sk_buff *skb, if (IS_ERR(table)) return PTR_ERR(table); - chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); - if (IS_ERR(chain)) - return PTR_ERR(chain); + if (nla[NFTA_RULE_CHAIN]) { + chain = nf_tables_chain_lookup(table, nla[NFTA_RULE_CHAIN]); + if (IS_ERR(chain)) + return PTR_ERR(chain); + } nft_ctx_init(&ctx, skb, nlh, afi, table, chain, nla); - if (nla[NFTA_RULE_HANDLE]) { - rule = nf_tables_rule_lookup(chain, nla[NFTA_RULE_HANDLE]); - if (IS_ERR(rule)) - return PTR_ERR(rule); + if (chain) { + if (nla[NFTA_RULE_HANDLE]) { + rule = nf_tables_rule_lookup(chain, + nla[NFTA_RULE_HANDLE]); + if (IS_ERR(rule)) + return PTR_ERR(rule); - err = nf_tables_delrule_one(&ctx, rule); - } else { - /* Remove all rules in this chain */ - list_for_each_entry_safe(rule, tmp, &chain->rules, list) { err = nf_tables_delrule_one(&ctx, rule); + } else { + err = nf_table_delrule_by_chain(&ctx); + } + } else { + list_for_each_entry(chain, &table->chains, list) { + ctx.chain = chain; + err = nf_table_delrule_by_chain(&ctx); if (err < 0) break; } -- cgit v1.2.1 From 66e56cd46b93ef407c60adcac62cf33b06119d50 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 6 Dec 2013 11:36:15 +0100 Subject: packet: fix send path when running with proto == 0 Commit e40526cb20b5 introduced a cached dev pointer, that gets hooked into register_prot_hook(), __unregister_prot_hook() to update the device used for the send path. We need to fix this up, as otherwise this will not work with sockets created with protocol = 0, plus with sll_protocol = 0 passed via sockaddr_ll when doing the bind. So instead, assign the pointer directly. The compiler can inline these helper functions automagically. While at it, also assume the cached dev fast-path as likely(), and document this variant of socket creation as it seems it is not widely used (seems not even the author of TX_RING was aware of that in his reference example [1]). Tested with reproducer from e40526cb20b5. [1] http://wiki.ipxwarzone.com/index.php5?title=Linux_packet_mmap#Example Fixes: e40526cb20b5 ("packet: fix use after free race in send path when dev is released") Signed-off-by: Daniel Borkmann Tested-by: Salam Noureddine Tested-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/packet/af_packet.c | 65 +++++++++++++++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ba2548bd85bf..88cfbc189558 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -237,6 +237,30 @@ struct packet_skb_cb { static void __fanout_unlink(struct sock *sk, struct packet_sock *po); static void __fanout_link(struct sock *sk, struct packet_sock *po); +static struct net_device *packet_cached_dev_get(struct packet_sock *po) +{ + struct net_device *dev; + + rcu_read_lock(); + dev = rcu_dereference(po->cached_dev); + if (likely(dev)) + dev_hold(dev); + rcu_read_unlock(); + + return dev; +} + +static void packet_cached_dev_assign(struct packet_sock *po, + struct net_device *dev) +{ + rcu_assign_pointer(po->cached_dev, dev); +} + +static void packet_cached_dev_reset(struct packet_sock *po) +{ + RCU_INIT_POINTER(po->cached_dev, NULL); +} + /* register_prot_hook must be invoked with the po->bind_lock held, * or from a context in which asynchronous accesses to the packet * socket is not possible (packet_create()). @@ -246,12 +270,10 @@ static void register_prot_hook(struct sock *sk) struct packet_sock *po = pkt_sk(sk); if (!po->running) { - if (po->fanout) { + if (po->fanout) __fanout_link(sk, po); - } else { + else dev_add_pack(&po->prot_hook); - rcu_assign_pointer(po->cached_dev, po->prot_hook.dev); - } sock_hold(sk); po->running = 1; @@ -270,12 +292,11 @@ static void __unregister_prot_hook(struct sock *sk, bool sync) struct packet_sock *po = pkt_sk(sk); po->running = 0; - if (po->fanout) { + + if (po->fanout) __fanout_unlink(sk, po); - } else { + else __dev_remove_pack(&po->prot_hook); - RCU_INIT_POINTER(po->cached_dev, NULL); - } __sock_put(sk); @@ -2059,19 +2080,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, return tp_len; } -static struct net_device *packet_cached_dev_get(struct packet_sock *po) -{ - struct net_device *dev; - - rcu_read_lock(); - dev = rcu_dereference(po->cached_dev); - if (dev) - dev_hold(dev); - rcu_read_unlock(); - - return dev; -} - static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) { struct sk_buff *skb; @@ -2088,7 +2096,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) mutex_lock(&po->pg_vec_lock); - if (saddr == NULL) { + if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); proto = po->num; addr = NULL; @@ -2242,7 +2250,7 @@ static int packet_snd(struct socket *sock, * Get and verify the address. */ - if (saddr == NULL) { + if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); proto = po->num; addr = NULL; @@ -2451,6 +2459,8 @@ static int packet_release(struct socket *sock) spin_lock(&po->bind_lock); unregister_prot_hook(sk, false); + packet_cached_dev_reset(po); + if (po->prot_hook.dev) { dev_put(po->prot_hook.dev); po->prot_hook.dev = NULL; @@ -2506,14 +2516,17 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protoc spin_lock(&po->bind_lock); unregister_prot_hook(sk, true); + po->num = protocol; po->prot_hook.type = protocol; if (po->prot_hook.dev) dev_put(po->prot_hook.dev); - po->prot_hook.dev = dev; + po->prot_hook.dev = dev; po->ifindex = dev ? dev->ifindex : 0; + packet_cached_dev_assign(po, dev); + if (protocol == 0) goto out_unlock; @@ -2626,7 +2639,8 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, po = pkt_sk(sk); sk->sk_family = PF_PACKET; po->num = proto; - RCU_INIT_POINTER(po->cached_dev, NULL); + + packet_cached_dev_reset(po); sk->sk_destruct = packet_sock_destruct; sk_refcnt_debug_inc(sk); @@ -3337,6 +3351,7 @@ static int packet_notifier(struct notifier_block *this, sk->sk_error_report(sk); } if (msg == NETDEV_UNREGISTER) { + packet_cached_dev_reset(po); po->ifindex = -1; if (po->prot_hook.dev) dev_put(po->prot_hook.dev); -- cgit v1.2.1 From a3300ef4bbb1f1e33ff0400e1e6cf7733d988f4f Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sat, 7 Dec 2013 03:33:45 +0100 Subject: ipv6: don't count addrconf generated routes against gc limit Brett Ciphery reported that new ipv6 addresses failed to get installed because the addrconf generated dsts where counted against the dst gc limit. We don't need to count those routes like we currently don't count administratively added routes. Because the max_addresses check enforces a limit on unbounded address generation first in case someone plays with router advertisments, we are still safe here. Reported-by: Brett Ciphery Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/route.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ddb9d41c8eea..a0a48ac3403f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2166,12 +2166,10 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, bool anycast) { struct net *net = dev_net(idev->dev); - struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL); - - if (!rt) { - net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n"); + struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, + DST_NOCOUNT, NULL); + if (!rt) return ERR_PTR(-ENOMEM); - } in6_dev_hold(idev); -- cgit v1.2.1 From d323e92cc3f4edd943610557c9ea1bb4bb5056e8 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Sun, 8 Dec 2013 09:36:56 -0500 Subject: net: drop_monitor: fix the value of maxattr maxattr in genl_family should be used to save the max attribute type, but not the max command type. Drop monitor doesn't support any attributes, so we should leave it as zero. Signed-off-by: David S. Miller --- net/core/drop_monitor.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 95897183226e..e70301eb7a4a 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -64,7 +64,6 @@ static struct genl_family net_drop_monitor_family = { .hdrsize = 0, .name = "NET_DM", .version = 2, - .maxattr = NET_DM_CMD_MAX, }; static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data); -- cgit v1.2.1 From 673498b8ed4c4d4b7221c5309d891c5eac2b7528 Mon Sep 17 00:00:00 2001 From: Stefan Tomanek Date: Tue, 10 Dec 2013 23:21:25 +0100 Subject: inet: fix NULL pointer Oops in fib(6)_rule_suppress This changes ensures that the routing entry investigated by the suppress function actually does point to a device struct before following that pointer, fixing a possible kernel oops situation when verifying the interface group associated with a routing table entry. According to Daniel Golle, this Oops can be triggered by a user process trying to establish an outgoing IPv6 connection while having no real IPv6 connectivity set up (only autoassigned link-local addresses). Fixes: 6ef94cfafba15 ("fib_rules: add route suppression based on ifgroup") Reported-by: Daniel Golle Tested-by: Daniel Golle Signed-off-by: Stefan Tomanek Signed-off-by: David S. Miller --- net/ipv4/fib_rules.c | 5 ++++- net/ipv6/fib6_rules.c | 6 +++++- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 523be38e37de..f2e15738534d 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -104,7 +104,10 @@ errout: static bool fib4_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg) { struct fib_result *result = (struct fib_result *) arg->result; - struct net_device *dev = result->fi->fib_dev; + struct net_device *dev = NULL; + + if (result->fi) + dev = result->fi->fib_dev; /* do not accept result if the route does * not meet the required prefix length diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index e27591635f92..3fd0a578329e 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -122,7 +122,11 @@ out: static bool fib6_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg) { struct rt6_info *rt = (struct rt6_info *) arg->result; - struct net_device *dev = rt->rt6i_idev->dev; + struct net_device *dev = NULL; + + if (rt->rt6i_idev) + dev = rt->rt6i_idev->dev; + /* do not accept result if the route does * not meet the required prefix length */ -- cgit v1.2.1 From 12663bfc97c8b3fdb292428105dd92d563164050 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Sat, 7 Dec 2013 17:26:27 -0500 Subject: net: unix: allow set_peek_off to fail unix_dgram_recvmsg() will hold the readlock of the socket until recv is complete. In the same time, we may try to setsockopt(SO_PEEK_OFF) which will hang until unix_dgram_recvmsg() will complete (which can take a while) without allowing us to break out of it, triggering a hung task spew. Instead, allow set_peek_off to fail, this way userspace will not hang. Signed-off-by: Sasha Levin Acked-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/core/sock.c | 2 +- net/unix/af_unix.c | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index ab20ed9b0f31..5393b4b719d7 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -882,7 +882,7 @@ set_rcvbuf: case SO_PEEK_OFF: if (sock->ops->set_peek_off) - sock->ops->set_peek_off(sk, val); + ret = sock->ops->set_peek_off(sk, val); else ret = -EOPNOTSUPP; break; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 01625ccc3ae6..a0ca162e5bd5 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -530,13 +530,17 @@ static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *, static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *, struct msghdr *, size_t, int); -static void unix_set_peek_off(struct sock *sk, int val) +static int unix_set_peek_off(struct sock *sk, int val) { struct unix_sock *u = unix_sk(sk); - mutex_lock(&u->readlock); + if (mutex_lock_interruptible(&u->readlock)) + return -EINTR; + sk->sk_peek_off = val; mutex_unlock(&u->readlock); + + return 0; } -- cgit v1.2.1 From 993b858e37b3120ee76d9957a901cca22312ffaa Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Mon, 9 Dec 2013 22:54:46 -0800 Subject: tipc: correct the order of stopping services at rmmod The 'signal handler' service in TIPC is a mechanism that makes it possible to postpone execution of functions, by launcing them into a job queue for execution in a separate tasklet, independent of the launching execution thread. When we do rmmod on the tipc module, this service is stopped after the network service. At the same time, the stopping of the network service may itself launch jobs for execution, with the risk that these functions may be scheduled for execution after the data structures meant to be accessed by the job have already been deleted. We have seen this happen, most often resulting in an oops. This commit ensures that the signal handler is the very first to be stopped when TIPC is shut down, so there are no surprises during the cleanup of the other services. Signed-off-by: Jon Maloy Reviewed-by: Paul Gortmaker Signed-off-by: David S. Miller --- net/tipc/core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/tipc/core.c b/net/tipc/core.c index fd4eeeaa972a..c6d3f75a9e1b 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -113,7 +113,6 @@ err: static void tipc_core_stop(void) { tipc_netlink_stop(); - tipc_handler_stop(); tipc_cfg_stop(); tipc_subscr_stop(); tipc_nametbl_stop(); @@ -146,9 +145,10 @@ static int tipc_core_start(void) res = tipc_subscr_start(); if (!res) res = tipc_cfg_init(); - if (res) + if (res) { + tipc_handler_stop(); tipc_core_stop(); - + } return res; } @@ -178,6 +178,7 @@ static int __init tipc_init(void) static void __exit tipc_exit(void) { + tipc_handler_stop(); tipc_core_stop_net(); tipc_core_stop(); pr_info("Deactivated\n"); -- cgit v1.2.1 From 00ede977098be3296d42d05a4265ec5ec4a28419 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Mon, 9 Dec 2013 22:54:47 -0800 Subject: tipc: protect handler_enabled variable with qitem_lock spin lock 'handler_enabled' is a global flag indicating whether the TIPC signal handling service is enabled or not. The lack of lock protection for this flag incurs a risk for contention, so that a tipc_k_signal() call might queue a signal handler to a destroyed signal queue, with unpredictable results. To correct this, we let the already existing 'qitem_lock' protect the flag, as it already does with the queue itself. This way, we ensure that the flag always is consistent across all cores. Signed-off-by: Ying Xue Reviewed-by: Paul Gortmaker Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/handler.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/tipc/handler.c b/net/tipc/handler.c index b36f0fcd9bdf..e4bc8a296744 100644 --- a/net/tipc/handler.c +++ b/net/tipc/handler.c @@ -56,12 +56,13 @@ unsigned int tipc_k_signal(Handler routine, unsigned long argument) { struct queue_item *item; + spin_lock_bh(&qitem_lock); if (!handler_enabled) { pr_err("Signal request ignored by handler\n"); + spin_unlock_bh(&qitem_lock); return -ENOPROTOOPT; } - spin_lock_bh(&qitem_lock); item = kmem_cache_alloc(tipc_queue_item_cache, GFP_ATOMIC); if (!item) { pr_err("Signal queue out of memory\n"); @@ -112,10 +113,14 @@ void tipc_handler_stop(void) struct list_head *l, *n; struct queue_item *item; - if (!handler_enabled) + spin_lock_bh(&qitem_lock); + if (!handler_enabled) { + spin_unlock_bh(&qitem_lock); return; - + } handler_enabled = 0; + spin_unlock_bh(&qitem_lock); + tasklet_kill(&tipc_tasklet); spin_lock_bh(&qitem_lock); -- cgit v1.2.1 From 9f70f46bd4c7267d48ef461a1d613ec9ec0d520c Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Tue, 10 Dec 2013 06:48:15 -0500 Subject: sctp: properly latch and use autoclose value from sock to association Currently, sctp associations latch a sockets autoclose value to an association at association init time, subject to capping constraints from the max_autoclose sysctl value. This leads to an odd situation where an application may set a socket level autoclose timeout, but sliently sctp will limit the autoclose timeout to something less than that. Fix this by modifying the autoclose setsockopt function to check the limit, cap it and warn the user via syslog that the timeout is capped. This will allow getsockopt to return valid autoclose timeout values that reflect what subsequent associations actually use. While were at it, also elimintate the assoc->autoclose variable, it duplicates whats in the timeout array, which leads to multiple sources for the same information, that may differ (as the former isn't subject to any capping). This gives us the timeout information in a canonical place and saves some space in the association structure as well. Signed-off-by: Neil Horman Acked-by: Vlad Yasevich CC: Wang Weidong CC: David Miller CC: Vlad Yasevich CC: netdev@vger.kernel.org Signed-off-by: David S. Miller --- net/sctp/associola.c | 5 +---- net/sctp/output.c | 3 ++- net/sctp/sm_statefuns.c | 12 ++++++------ net/sctp/socket.c | 4 ++++ 4 files changed, 13 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 68a27f9796d2..31ed008c8e13 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -154,8 +154,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->timeouts[SCTP_EVENT_TIMEOUT_HEARTBEAT] = 0; asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = asoc->sackdelay; - asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] = - min_t(unsigned long, sp->autoclose, net->sctp.max_autoclose) * HZ; + asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] = sp->autoclose * HZ; /* Initializes the timers */ for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i) @@ -291,8 +290,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->peer.ipv6_address = 1; INIT_LIST_HEAD(&asoc->asocs); - asoc->autoclose = sp->autoclose; - asoc->default_stream = sp->default_stream; asoc->default_ppid = sp->default_ppid; asoc->default_flags = sp->default_flags; diff --git a/net/sctp/output.c b/net/sctp/output.c index 0e2644d0a773..0fb140f8f088 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -581,7 +581,8 @@ int sctp_packet_transmit(struct sctp_packet *packet) unsigned long timeout; /* Restart the AUTOCLOSE timer when sending data. */ - if (sctp_state(asoc, ESTABLISHED) && asoc->autoclose) { + if (sctp_state(asoc, ESTABLISHED) && + asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) { timer = &asoc->timers[SCTP_EVENT_TIMEOUT_AUTOCLOSE]; timeout = asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index dfe3f36ff2aa..a26065be7289 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -820,7 +820,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net, SCTP_INC_STATS(net, SCTP_MIB_PASSIVEESTABS); sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); - if (new_asoc->autoclose) + if (new_asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); @@ -908,7 +908,7 @@ sctp_disposition_t sctp_sf_do_5_1E_ca(struct net *net, SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB); SCTP_INC_STATS(net, SCTP_MIB_ACTIVEESTABS); sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); - if (asoc->autoclose) + if (asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); @@ -2970,7 +2970,7 @@ sctp_disposition_t sctp_sf_eat_data_6_2(struct net *net, if (chunk->chunk_hdr->flags & SCTP_DATA_SACK_IMM) force = SCTP_FORCE(); - if (asoc->autoclose) { + if (asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) { sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); } @@ -3878,7 +3878,7 @@ sctp_disposition_t sctp_sf_eat_fwd_tsn(struct net *net, SCTP_CHUNK(chunk)); /* Count this as receiving DATA. */ - if (asoc->autoclose) { + if (asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) { sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); } @@ -5267,7 +5267,7 @@ sctp_disposition_t sctp_sf_do_9_2_start_shutdown( sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); - if (asoc->autoclose) + if (asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); @@ -5346,7 +5346,7 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown_ack( sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_T2_SHUTDOWN)); - if (asoc->autoclose) + if (asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 72046b9729a8..5455043f4496 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2196,6 +2196,7 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval, unsigned int optlen) { struct sctp_sock *sp = sctp_sk(sk); + struct net *net = sock_net(sk); /* Applicable to UDP-style socket only */ if (sctp_style(sk, TCP)) @@ -2205,6 +2206,9 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval, if (copy_from_user(&sp->autoclose, optval, optlen)) return -EFAULT; + if (sp->autoclose > net->sctp.max_autoclose) + sp->autoclose = net->sctp.max_autoclose; + return 0; } -- cgit v1.2.1 From ce7a3bdf18a8dbcba1409f5d335c56fde432ca89 Mon Sep 17 00:00:00 2001 From: Florent Fourcot Date: Tue, 10 Dec 2013 15:15:46 +0100 Subject: ipv6: do not erase dst address with flow label destination This patch is following b579035ff766c9412e2b92abf5cab794bff102b6 "ipv6: remove old conditions on flow label sharing" Since there is no reason to restrict a label to a destination, we should not erase the destination value of a socket with the value contained in the flow label storage. This patch allows to really have the same flow label to more than one destination. Signed-off-by: Florent Fourcot Reviewed-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/dccp/ipv6.c | 1 - net/ipv6/datagram.c | 1 - net/ipv6/raw.c | 1 - net/ipv6/tcp_ipv6.c | 1 - net/ipv6/udp.c | 1 - net/l2tp/l2tp_ip6.c | 1 - 6 files changed, 6 deletions(-) (limited to 'net') diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 4ac71ff7c2e4..2b90a786e475 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -851,7 +851,6 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; - usin->sin6_addr = flowlabel->dst; fl6_sock_release(flowlabel); } } diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 8dfe1f4d3c1a..93b1aa34c432 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -73,7 +73,6 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; - usin->sin6_addr = flowlabel->dst; } } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 7fb4e14c467f..b6bb87e55805 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -792,7 +792,6 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; - daddr = &flowlabel->dst; } } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0740f93a114a..f67033b4bb66 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -156,7 +156,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; - usin->sin6_addr = flowlabel->dst; fl6_sock_release(flowlabel); } } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index bcd5699313c3..089c741a3992 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1140,7 +1140,6 @@ do_udp_sendmsg: flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; - daddr = &flowlabel->dst; } } diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index d9b437e55007..bb6e206ea70b 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -528,7 +528,6 @@ static int l2tp_ip6_sendmsg(struct kiocb *iocb, struct sock *sk, flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; - daddr = &flowlabel->dst; } } -- cgit v1.2.1 From 85f935d41af097a87067367be66de52896b964e1 Mon Sep 17 00:00:00 2001 From: wangweidong Date: Wed, 11 Dec 2013 09:50:38 +0800 Subject: sctp: check the rto_min and rto_max in setsockopt When we set 0 to rto_min or rto_max, just not change the value. Also we should check the rto_min > rto_max. Suggested-by: Vlad Yasevich Signed-off-by: Wang Weidong Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/socket.c | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 5455043f4496..42b709c95cf3 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2815,6 +2815,8 @@ static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval, unsigne { struct sctp_rtoinfo rtoinfo; struct sctp_association *asoc; + unsigned long rto_min, rto_max; + struct sctp_sock *sp = sctp_sk(sk); if (optlen != sizeof (struct sctp_rtoinfo)) return -EINVAL; @@ -2828,26 +2830,36 @@ static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval, unsigne if (!asoc && rtoinfo.srto_assoc_id && sctp_style(sk, UDP)) return -EINVAL; + rto_max = rtoinfo.srto_max; + rto_min = rtoinfo.srto_min; + + if (rto_max) + rto_max = asoc ? msecs_to_jiffies(rto_max) : rto_max; + else + rto_max = asoc ? asoc->rto_max : sp->rtoinfo.srto_max; + + if (rto_min) + rto_min = asoc ? msecs_to_jiffies(rto_min) : rto_min; + else + rto_min = asoc ? asoc->rto_min : sp->rtoinfo.srto_min; + + if (rto_min > rto_max) + return -EINVAL; + if (asoc) { if (rtoinfo.srto_initial != 0) asoc->rto_initial = msecs_to_jiffies(rtoinfo.srto_initial); - if (rtoinfo.srto_max != 0) - asoc->rto_max = msecs_to_jiffies(rtoinfo.srto_max); - if (rtoinfo.srto_min != 0) - asoc->rto_min = msecs_to_jiffies(rtoinfo.srto_min); + asoc->rto_max = rto_max; + asoc->rto_min = rto_min; } else { /* If there is no association or the association-id = 0 * set the values to the endpoint. */ - struct sctp_sock *sp = sctp_sk(sk); - if (rtoinfo.srto_initial != 0) sp->rtoinfo.srto_initial = rtoinfo.srto_initial; - if (rtoinfo.srto_max != 0) - sp->rtoinfo.srto_max = rtoinfo.srto_max; - if (rtoinfo.srto_min != 0) - sp->rtoinfo.srto_min = rtoinfo.srto_min; + sp->rtoinfo.srto_max = rto_max; + sp->rtoinfo.srto_min = rto_min; } return 0; -- cgit v1.2.1 From 4f3fdf3bc59cafd14c3bc2c2369efad34c7aa8b5 Mon Sep 17 00:00:00 2001 From: wangweidong Date: Wed, 11 Dec 2013 09:50:39 +0800 Subject: sctp: add check rto_min and rto_max in sysctl rto_min should be smaller than rto_max while rto_max should be larger than rto_min. Add two proc_handler for the checking. Suggested-by: Vlad Yasevich Signed-off-by: Wang Weidong Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/sysctl.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 65 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 6b36561a1b3b..43b5e3243871 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -61,6 +61,13 @@ static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, void __user *buffer, size_t *lenp, loff_t *ppos); +static int proc_sctp_do_rto_min(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); +static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); + static struct ctl_table sctp_table[] = { { .procname = "sctp_mem", @@ -102,17 +109,17 @@ static struct ctl_table sctp_net_table[] = { .data = &init_net.sctp.rto_min, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_sctp_do_rto_min, .extra1 = &one, - .extra2 = &timer_max + .extra2 = &init_net.sctp.rto_max }, { .procname = "rto_max", .data = &init_net.sctp.rto_max, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &one, + .proc_handler = proc_sctp_do_rto_max, + .extra1 = &init_net.sctp.rto_min, .extra2 = &timer_max }, { @@ -342,6 +349,60 @@ static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, return ret; } +static int proc_sctp_do_rto_min(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + struct net *net = current->nsproxy->net_ns; + int new_value; + struct ctl_table tbl; + unsigned int min = *(unsigned int *) ctl->extra1; + unsigned int max = *(unsigned int *) ctl->extra2; + int ret; + + memset(&tbl, 0, sizeof(struct ctl_table)); + tbl.maxlen = sizeof(unsigned int); + + if (write) + tbl.data = &new_value; + else + tbl.data = &net->sctp.rto_min; + ret = proc_dointvec(&tbl, write, buffer, lenp, ppos); + if (write) { + if (ret || new_value > max || new_value < min) + return -EINVAL; + net->sctp.rto_min = new_value; + } + return ret; +} + +static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + struct net *net = current->nsproxy->net_ns; + int new_value; + struct ctl_table tbl; + unsigned int min = *(unsigned int *) ctl->extra1; + unsigned int max = *(unsigned int *) ctl->extra2; + int ret; + + memset(&tbl, 0, sizeof(struct ctl_table)); + tbl.maxlen = sizeof(unsigned int); + + if (write) + tbl.data = &new_value; + else + tbl.data = &net->sctp.rto_max; + ret = proc_dointvec(&tbl, write, buffer, lenp, ppos); + if (write) { + if (ret || new_value > max || new_value < min) + return -EINVAL; + net->sctp.rto_max = new_value; + } + return ret; +} + int sctp_sysctl_net_register(struct net *net) { struct ctl_table *table; -- cgit v1.2.1 From b486b2289e40797e386a18048a66b535206a463b Mon Sep 17 00:00:00 2001 From: wangweidong Date: Wed, 11 Dec 2013 09:50:40 +0800 Subject: sctp: fix up a spacing fix up spacing of proc_sctp_do_hmac_alg for according to the proc_sctp_do_rto_min[max] in sysctl.c Suggested-by: Daniel Borkmann Signed-off-by: Wang Weidong Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/sysctl.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 43b5e3243871..b0565afb61c7 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -56,10 +56,8 @@ extern long sysctl_sctp_mem[3]; extern int sysctl_sctp_rmem[3]; extern int sysctl_sctp_wmem[3]; -static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, - int write, +static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, - loff_t *ppos); static int proc_sctp_do_rto_min(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, @@ -301,8 +299,7 @@ static struct ctl_table sctp_net_table[] = { { /* sentinel */ } }; -static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, - int write, +static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { -- cgit v1.2.1 From 8afdd99a1315e759de04ad6e2344f0c5f17ecb1b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 10 Dec 2013 18:07:23 -0800 Subject: udp: ipv4: fix an use after free in __udp4_lib_rcv() Dave Jones reported a use after free in UDP stack : [ 5059.434216] ========================= [ 5059.434314] [ BUG: held lock freed! ] [ 5059.434420] 3.13.0-rc3+ #9 Not tainted [ 5059.434520] ------------------------- [ 5059.434620] named/863 is freeing memory ffff88005e960000-ffff88005e96061f, with a lock still held there! [ 5059.434815] (slock-AF_INET){+.-...}, at: [] udp_queue_rcv_skb+0xd1/0x4b0 [ 5059.435012] 3 locks held by named/863: [ 5059.435086] #0: (rcu_read_lock){.+.+..}, at: [] __netif_receive_skb_core+0x11d/0x940 [ 5059.435295] #1: (rcu_read_lock){.+.+..}, at: [] ip_local_deliver_finish+0x3e/0x410 [ 5059.435500] #2: (slock-AF_INET){+.-...}, at: [] udp_queue_rcv_skb+0xd1/0x4b0 [ 5059.435734] stack backtrace: [ 5059.435858] CPU: 0 PID: 863 Comm: named Not tainted 3.13.0-rc3+ #9 [loadavg: 0.21 0.06 0.06 1/115 1365] [ 5059.436052] Hardware name: /D510MO, BIOS MOPNV10J.86A.0175.2010.0308.0620 03/08/2010 [ 5059.436223] 0000000000000002 ffff88007e203ad8 ffffffff8153a372 ffff8800677130e0 [ 5059.436390] ffff88007e203b10 ffffffff8108cafa ffff88005e960000 ffff88007b00cfc0 [ 5059.436554] ffffea00017a5800 ffffffff8141c490 0000000000000246 ffff88007e203b48 [ 5059.436718] Call Trace: [ 5059.436769] [] dump_stack+0x4d/0x66 [ 5059.436904] [] debug_check_no_locks_freed+0x15a/0x160 [ 5059.437037] [] ? __sk_free+0x110/0x230 [ 5059.437147] [] kmem_cache_free+0x6a/0x150 [ 5059.437260] [] __sk_free+0x110/0x230 [ 5059.437364] [] sk_free+0x19/0x20 [ 5059.437463] [] sock_edemux+0x25/0x40 [ 5059.437567] [] sock_queue_rcv_skb+0x81/0x280 [ 5059.437685] [] ? udp_queue_rcv_skb+0xd1/0x4b0 [ 5059.437805] [] __udp_queue_rcv_skb+0x42/0x240 [ 5059.437925] [] ? _raw_spin_lock+0x65/0x70 [ 5059.438038] [] udp_queue_rcv_skb+0x26b/0x4b0 [ 5059.438155] [] __udp4_lib_rcv+0x152/0xb00 [ 5059.438269] [] udp_rcv+0x15/0x20 [ 5059.438367] [] ip_local_deliver_finish+0x10f/0x410 [ 5059.438492] [] ? ip_local_deliver_finish+0x3e/0x410 [ 5059.438621] [] ip_local_deliver+0x43/0x80 [ 5059.438733] [] ip_rcv_finish+0x140/0x5a0 [ 5059.438843] [] ip_rcv+0x296/0x3f0 [ 5059.438945] [] __netif_receive_skb_core+0x742/0x940 [ 5059.439074] [] ? __netif_receive_skb_core+0x11d/0x940 [ 5059.442231] [] ? trace_hardirqs_on+0xd/0x10 [ 5059.442231] [] __netif_receive_skb+0x13/0x60 [ 5059.442231] [] netif_receive_skb+0x1e/0x1f0 [ 5059.442231] [] napi_gro_receive+0x70/0xa0 [ 5059.442231] [] rtl8169_poll+0x166/0x700 [r8169] [ 5059.442231] [] net_rx_action+0x129/0x1e0 [ 5059.442231] [] __do_softirq+0xed/0x240 [ 5059.442231] [] irq_exit+0x125/0x140 [ 5059.442231] [] do_IRQ+0x51/0xc0 [ 5059.442231] [] common_interrupt+0x6f/0x6f We need to keep a reference on the socket, by using skb_steal_sock() at the right place. Note that another patch is needed to fix a race in udp_sk_rx_dst_set(), as we hold no lock protecting the dst. Fixes: 421b3885bf6d ("udp: ipv4: Add udp early demux") Reported-by: Dave Jones Signed-off-by: Eric Dumazet Cc: Shawn Bohrer Signed-off-by: David S. Miller --- net/ipv4/udp.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 44f6a20fa29d..2e2aecbe22c4 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -560,15 +560,11 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport, struct udp_table *udptable) { - struct sock *sk; const struct iphdr *iph = ip_hdr(skb); - if (unlikely(sk = skb_steal_sock(skb))) - return sk; - else - return __udp4_lib_lookup(dev_net(skb_dst(skb)->dev), iph->saddr, sport, - iph->daddr, dport, inet_iif(skb), - udptable); + return __udp4_lib_lookup(dev_net(skb_dst(skb)->dev), iph->saddr, sport, + iph->daddr, dport, inet_iif(skb), + udptable); } struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, @@ -1739,15 +1735,15 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (udp4_csum_init(skb, uh, proto)) goto csum_error; - if (skb->sk) { + sk = skb_steal_sock(skb); + if (sk) { int ret; - sk = skb->sk; if (unlikely(sk->sk_rx_dst == NULL)) udp_sk_rx_dst_set(sk, skb); ret = udp_queue_rcv_skb(sk, skb); - + sock_put(sk); /* a return value > 0 means to resubmit the input, but * it wants the return to be -protocol, or 0 */ -- cgit v1.2.1 From f01b3926ee645974f549f4a6921268142047717c Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 8 Dec 2013 16:52:31 +0000 Subject: netfilter: SYNPROXY target: restrict to INPUT/FORWARD Fix a crash in synproxy_send_tcp() when using the SYNPROXY target in the PREROUTING chain caused by missing routing information. Reported-by: Nicki P. Signed-off-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_SYNPROXY.c | 1 + net/ipv6/netfilter/ip6t_SYNPROXY.c | 1 + 2 files changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index f13bd91d9a56..a313c3fbeb46 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -423,6 +423,7 @@ static void synproxy_tg4_destroy(const struct xt_tgdtor_param *par) static struct xt_target synproxy_tg4_reg __read_mostly = { .name = "SYNPROXY", .family = NFPROTO_IPV4, + .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD), .target = synproxy_tg4, .targetsize = sizeof(struct xt_synproxy_info), .checkentry = synproxy_tg4_check, diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index f78f41aca8e9..a0d17270117c 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -446,6 +446,7 @@ static void synproxy_tg6_destroy(const struct xt_tgdtor_param *par) static struct xt_target synproxy_tg6_reg __read_mostly = { .name = "SYNPROXY", .family = NFPROTO_IPV6, + .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD), .target = synproxy_tg6, .targetsize = sizeof(struct xt_synproxy_info), .checkentry = synproxy_tg6_check, -- cgit v1.2.1 From cc106e441a63bec3b1cb72948df82ea15945c449 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 10 Dec 2013 14:59:27 +0800 Subject: net: sched: tbf: fix the calculation of max_size Current max_size is caluated from rate table. Now, the rate table has been replaced and it's wrong to caculate max_size based on this rate table. It can lead wrong calculation of max_size. The burst in kernel may be lower than user asked, because burst may gets some loss when transform it to buffer(E.g. "burst 40kb rate 30mbit/s") and it seems we cannot avoid this loss. Burst's value(max_size) based on rate table may be equal user asked. If a packet's length is max_size, this packet will be stalled in tbf_dequeue() because its length is above the burst in kernel so that it cannot get enough tokens. The max_size guards against enqueuing packet sizes above q->buffer "time" in tbf_enqueue(). To make consistent with the calculation of tokens, this patch add a helper psched_ns_t2l() to calculate burst(max_size) directly to fix this problem. After this fix, we can support to using 64bit rates to calculate burst as well. Signed-off-by: Yang Yingliang Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_tbf.c | 115 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 70 insertions(+), 45 deletions(-) (limited to 'net') diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index a6090051c5db..a44928c6ba24 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -118,6 +118,30 @@ struct tbf_sched_data { }; +/* Time to Length, convert time in ns to length in bytes + * to determinate how many bytes can be sent in given time. + */ +static u64 psched_ns_t2l(const struct psched_ratecfg *r, + u64 time_in_ns) +{ + /* The formula is : + * len = (time_in_ns * r->rate_bytes_ps) / NSEC_PER_SEC + */ + u64 len = time_in_ns * r->rate_bytes_ps; + + do_div(len, NSEC_PER_SEC); + + if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) + len = (len / 53) * 48; + + if (len > r->overhead) + len -= r->overhead; + else + len = 0; + + return len; +} + /* * Return length of individual segments of a gso packet, * including all headers (MAC, IP, TCP/UDP) @@ -289,10 +313,11 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) struct tbf_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_TBF_MAX + 1]; struct tc_tbf_qopt *qopt; - struct qdisc_rate_table *rtab = NULL; - struct qdisc_rate_table *ptab = NULL; struct Qdisc *child = NULL; - int max_size, n; + struct psched_ratecfg rate; + struct psched_ratecfg peak; + u64 max_size; + s64 buffer, mtu; u64 rate64 = 0, prate64 = 0; err = nla_parse_nested(tb, TCA_TBF_MAX, opt, tbf_policy); @@ -304,38 +329,13 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) goto done; qopt = nla_data(tb[TCA_TBF_PARMS]); - rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB]); - if (rtab == NULL) - goto done; - - if (qopt->peakrate.rate) { - if (qopt->peakrate.rate > qopt->rate.rate) - ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB]); - if (ptab == NULL) - goto done; - } - - for (n = 0; n < 256; n++) - if (rtab->data[n] > qopt->buffer) - break; - max_size = (n << qopt->rate.cell_log) - 1; - if (ptab) { - int size; - - for (n = 0; n < 256; n++) - if (ptab->data[n] > qopt->mtu) - break; - size = (n << qopt->peakrate.cell_log) - 1; - if (size < max_size) - max_size = size; - } - if (max_size < 0) - goto done; + if (qopt->rate.linklayer == TC_LINKLAYER_UNAWARE) + qdisc_put_rtab(qdisc_get_rtab(&qopt->rate, + tb[TCA_TBF_RTAB])); - if (max_size < psched_mtu(qdisc_dev(sch))) - pr_warn_ratelimited("sch_tbf: burst %u is lower than device %s mtu (%u) !\n", - max_size, qdisc_dev(sch)->name, - psched_mtu(qdisc_dev(sch))); + if (qopt->peakrate.linklayer == TC_LINKLAYER_UNAWARE) + qdisc_put_rtab(qdisc_get_rtab(&qopt->peakrate, + tb[TCA_TBF_PTAB])); if (q->qdisc != &noop_qdisc) { err = fifo_set_limit(q->qdisc, qopt->limit); @@ -349,6 +349,39 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) } } + buffer = min_t(u64, PSCHED_TICKS2NS(qopt->buffer), ~0U); + mtu = min_t(u64, PSCHED_TICKS2NS(qopt->mtu), ~0U); + + if (tb[TCA_TBF_RATE64]) + rate64 = nla_get_u64(tb[TCA_TBF_RATE64]); + psched_ratecfg_precompute(&rate, &qopt->rate, rate64); + + max_size = min_t(u64, psched_ns_t2l(&rate, buffer), ~0U); + + if (qopt->peakrate.rate) { + if (tb[TCA_TBF_PRATE64]) + prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]); + psched_ratecfg_precompute(&peak, &qopt->peakrate, prate64); + if (peak.rate_bytes_ps <= rate.rate_bytes_ps) { + pr_warn_ratelimited("sch_tbf: peakrate %llu is lower than or equals to rate %llu !\n", + peak.rate_bytes_ps, rate.rate_bytes_ps); + err = -EINVAL; + goto done; + } + + max_size = min_t(u64, max_size, psched_ns_t2l(&peak, mtu)); + } + + if (max_size < psched_mtu(qdisc_dev(sch))) + pr_warn_ratelimited("sch_tbf: burst %llu is lower than device %s mtu (%u) !\n", + max_size, qdisc_dev(sch)->name, + psched_mtu(qdisc_dev(sch))); + + if (!max_size) { + err = -EINVAL; + goto done; + } + sch_tree_lock(sch); if (child) { qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); @@ -362,13 +395,9 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) q->tokens = q->buffer; q->ptokens = q->mtu; - if (tb[TCA_TBF_RATE64]) - rate64 = nla_get_u64(tb[TCA_TBF_RATE64]); - psched_ratecfg_precompute(&q->rate, &rtab->rate, rate64); - if (ptab) { - if (tb[TCA_TBF_PRATE64]) - prate64 = nla_get_u64(tb[TCA_TBF_PRATE64]); - psched_ratecfg_precompute(&q->peak, &ptab->rate, prate64); + memcpy(&q->rate, &rate, sizeof(struct psched_ratecfg)); + if (qopt->peakrate.rate) { + memcpy(&q->peak, &peak, sizeof(struct psched_ratecfg)); q->peak_present = true; } else { q->peak_present = false; @@ -377,10 +406,6 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) sch_tree_unlock(sch); err = 0; done: - if (rtab) - qdisc_put_rtab(rtab); - if (ptab) - qdisc_put_rtab(ptab); return err; } -- cgit v1.2.1 From 1598f7cb4745c40467decc91ee44ec615773eb45 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 10 Dec 2013 14:59:28 +0800 Subject: net: sched: htb: fix the calculation of quantum Now, 32bit rates may be not the true rate. So use rate_bytes_ps which is from max(rate32, rate64) to calcualte quantum. Signed-off-by: Yang Yingliang Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_htb.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 0e1e38b40025..717b2108f852 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1477,11 +1477,22 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, sch_tree_lock(sch); } + rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0; + + ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0; + + psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64); + psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64); + /* it used to be a nasty bug here, we have to check that node * is really leaf before changing cl->un.leaf ! */ if (!cl->level) { - cl->quantum = hopt->rate.rate / q->rate2quantum; + u64 quantum = cl->rate.rate_bytes_ps; + + do_div(quantum, q->rate2quantum); + cl->quantum = min_t(u64, quantum, INT_MAX); + if (!hopt->quantum && cl->quantum < 1000) { pr_warning( "HTB: quantum of class %X is small. Consider r2q change.\n", @@ -1500,13 +1511,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, cl->prio = TC_HTB_NUMPRIO - 1; } - rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0; - - ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0; - - psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64); - psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64); - cl->buffer = PSCHED_TICKS2NS(hopt->buffer); cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer); -- cgit v1.2.1 From 610438b74496b2986a9025f8e23c134cb638e338 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Dec 2013 08:10:05 -0800 Subject: udp: ipv4: fix potential use after free in udp_v4_early_demux() pskb_may_pull() can reallocate skb->head, we need to move the initialization of iph and uh pointers after its call. Fixes: 421b3885bf6d ("udp: ipv4: Add udp early demux") Signed-off-by: Eric Dumazet Cc: Shawn Bohrer Signed-off-by: David S. Miller --- net/ipv4/udp.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 2e2aecbe22c4..16d246a51a02 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1909,17 +1909,20 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net, void udp_v4_early_demux(struct sk_buff *skb) { - const struct iphdr *iph = ip_hdr(skb); - const struct udphdr *uh = udp_hdr(skb); + struct net *net = dev_net(skb->dev); + const struct iphdr *iph; + const struct udphdr *uh; struct sock *sk; struct dst_entry *dst; - struct net *net = dev_net(skb->dev); int dif = skb->dev->ifindex; /* validate the packet */ if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr))) return; + iph = ip_hdr(skb); + uh = udp_hdr(skb); + if (skb->pkt_type == PACKET_BROADCAST || skb->pkt_type == PACKET_MULTICAST) sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr, -- cgit v1.2.1 From 975022310233fb0f0193873d79a7b8438070fa82 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Dec 2013 14:46:51 -0800 Subject: udp: ipv4: must add synchronization in udp_sk_rx_dst_set() Unlike TCP, UDP input path does not hold the socket lock. Before messing with sk->sk_rx_dst, we must use a spinlock, otherwise multiple cpus could leak a refcount. This patch also takes care of renewing a stale dst entry. (When the sk->sk_rx_dst would not be used by IP early demux) Fixes: 421b3885bf6d ("udp: ipv4: Add udp early demux") Signed-off-by: Eric Dumazet Cc: Shawn Bohrer Signed-off-by: David S. Miller --- net/ipv4/udp.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 16d246a51a02..62c19fdd102d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1599,12 +1599,21 @@ static void flush_stack(struct sock **stack, unsigned int count, kfree_skb(skb1); } -static void udp_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) +/* For TCP sockets, sk_rx_dst is protected by socket lock + * For UDP, we use sk_dst_lock to guard against concurrent changes. + */ +static void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) { - struct dst_entry *dst = skb_dst(skb); + struct dst_entry *old; - dst_hold(dst); - sk->sk_rx_dst = dst; + spin_lock(&sk->sk_dst_lock); + old = sk->sk_rx_dst; + if (likely(old != dst)) { + dst_hold(dst); + sk->sk_rx_dst = dst; + dst_release(old); + } + spin_unlock(&sk->sk_dst_lock); } /* @@ -1737,10 +1746,11 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, sk = skb_steal_sock(skb); if (sk) { + struct dst_entry *dst = skb_dst(skb); int ret; - if (unlikely(sk->sk_rx_dst == NULL)) - udp_sk_rx_dst_set(sk, skb); + if (unlikely(sk->sk_rx_dst != dst)) + udp_sk_rx_dst_set(sk, dst); ret = udp_queue_rcv_skb(sk, skb); sock_put(sk); -- cgit v1.2.1 From d55d282e6af88120ad90e93a88f70e3116dc0e3d Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 12 Dec 2013 10:57:22 +0800 Subject: sch_tbf: use do_div() for 64-bit divide It's doing a 64-bit divide which is not supported on 32-bit architectures in psched_ns_t2l(). The correct way to do this is to use do_div(). It's introduced by commit cc106e441a63 ("net: sched: tbf: fix the calculation of max_size") Reported-by: kbuild test robot Signed-off-by: Yang Yingliang Signed-off-by: David S. Miller --- net/sched/sch_tbf.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index a44928c6ba24..887e672f9d7d 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -131,8 +131,10 @@ static u64 psched_ns_t2l(const struct psched_ratecfg *r, do_div(len, NSEC_PER_SEC); - if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) - len = (len / 53) * 48; + if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) { + do_div(len, 53); + len = len * 48; + } if (len > r->overhead) len -= r->overhead; -- cgit v1.2.1 From a3adadf3018102c24754e0b53a5515c40fbaff4a Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Thu, 12 Dec 2013 08:51:59 +0100 Subject: netfilter: nft_reject: fix endianness in dump function The dump function in nft_reject_ipv4 was not converting a u32 field to network order before sending it to userspace, this needs to happen for consistency with other nf_tables and nfnetlink subsystems. Signed-off-by: Eric Leblond Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nft_reject_ipv4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c index fff5ba1a33b7..4a5e94ac314a 100644 --- a/net/ipv4/netfilter/nft_reject_ipv4.c +++ b/net/ipv4/netfilter/nft_reject_ipv4.c @@ -72,7 +72,7 @@ static int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_reject *priv = nft_expr_priv(expr); - if (nla_put_be32(skb, NFTA_REJECT_TYPE, priv->type)) + if (nla_put_be32(skb, NFTA_REJECT_TYPE, htonl(priv->type))) goto nla_put_failure; switch (priv->type) { -- cgit v1.2.1 From 9bd7d20c45157c175bfd45449e904a404506f99c Mon Sep 17 00:00:00 2001 From: wangweidong Date: Fri, 13 Dec 2013 11:00:10 +0800 Subject: sctp: loading sctp when load sctp_probe when I modprobe sctp_probe, it failed with "FATAL: ". I found that sctp should load before sctp_probe register jprobe. So I add a sctp_setup_jprobe for loading 'sctp' when first failed to register jprobe, just do this similar to dccp_probe. v2: add MODULE_SOFTDEP and check of request_module, as suggested by Neil Signed-off-by: Wang Weidong Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/probe.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/probe.c b/net/sctp/probe.c index 53c452efb40b..5e68b94ee640 100644 --- a/net/sctp/probe.c +++ b/net/sctp/probe.c @@ -38,6 +38,7 @@ #include #include +MODULE_SOFTDEP("pre: sctp"); MODULE_AUTHOR("Wei Yongjun "); MODULE_DESCRIPTION("SCTP snooper"); MODULE_LICENSE("GPL"); @@ -182,6 +183,20 @@ static struct jprobe sctp_recv_probe = { .entry = jsctp_sf_eat_sack, }; +static __init int sctp_setup_jprobe(void) +{ + int ret = register_jprobe(&sctp_recv_probe); + + if (ret) { + if (request_module("sctp")) + goto out; + ret = register_jprobe(&sctp_recv_probe); + } + +out: + return ret; +} + static __init int sctpprobe_init(void) { int ret = -ENOMEM; @@ -202,7 +217,7 @@ static __init int sctpprobe_init(void) &sctpprobe_fops)) goto free_kfifo; - ret = register_jprobe(&sctp_recv_probe); + ret = sctp_setup_jprobe(); if (ret) goto remove_proc; -- cgit v1.2.1 From e47eb5dfb296bf217e9ebee7b2f07486670b9c1b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 15 Dec 2013 10:53:46 -0800 Subject: udp: ipv4: do not use sk_dst_lock from softirq context Using sk_dst_lock from softirq context is not supported right now. Instead of adding BH protection everywhere, udp_sk_rx_dst_set() can instead use xchg(), as suggested by David. Reported-by: Fengguang Wu Fixes: 975022310233 ("udp: ipv4: must add synchronization in udp_sk_rx_dst_set()") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/udp.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 62c19fdd102d..f140048334ce 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1600,20 +1600,15 @@ static void flush_stack(struct sock **stack, unsigned int count, } /* For TCP sockets, sk_rx_dst is protected by socket lock - * For UDP, we use sk_dst_lock to guard against concurrent changes. + * For UDP, we use xchg() to guard against concurrent changes. */ static void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) { struct dst_entry *old; - spin_lock(&sk->sk_dst_lock); - old = sk->sk_rx_dst; - if (likely(old != dst)) { - dst_hold(dst); - sk->sk_rx_dst = dst; - dst_release(old); - } - spin_unlock(&sk->sk_dst_lock); + dst_hold(dst); + old = xchg(&sk->sk_rx_dst, dst); + dst_release(old); } /* -- cgit v1.2.1 From 37ab4fa7844a044dc21fde45e2a0fc2f3c3b6490 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Fri, 13 Dec 2013 10:54:22 -0500 Subject: net: unix: allow bind to fail on mutex lock This is similar to the set_peek_off patch where calling bind while the socket is stuck in unix_dgram_recvmsg() will block and cause a hung task spew after a while. This is also the last place that did a straightforward mutex_lock(), so there shouldn't be any more of these patches. Signed-off-by: Sasha Levin Signed-off-by: David S. Miller --- net/unix/af_unix.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index a0ca162e5bd5..a427623ee574 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -718,7 +718,9 @@ static int unix_autobind(struct socket *sock) int err; unsigned int retries = 0; - mutex_lock(&u->readlock); + err = mutex_lock_interruptible(&u->readlock); + if (err) + return err; err = 0; if (u->addr) @@ -877,7 +879,9 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out; addr_len = err; - mutex_lock(&u->readlock); + err = mutex_lock_interruptible(&u->readlock); + if (err) + goto out; err = -EINVAL; if (u->addr) -- cgit v1.2.1 From 53385d2d1de84f4036a0919ec46964c4e81b83f5 Mon Sep 17 00:00:00 2001 From: Bob Gilligan Date: Sun, 15 Dec 2013 13:39:56 -0800 Subject: neigh: Netlink notification for administrative NUD state change The neighbour code sends up an RTM_NEWNEIGH netlink notification if the NUD state of a neighbour cache entry is changed by a timer (e.g. from REACHABLE to STALE), even if the lladdr of the entry has not changed. But an administrative change to the the NUD state of a neighbour cache entry that does not change the lladdr (e.g. via "ip -4 neigh change ... nud ...") does not trigger a netlink notification. This means that netlink listeners will not hear about administrative NUD state changes such as from a resolved state to PERMANENT. This patch changes the neighbor code to generate an RTM_NEWNEIGH message when the NUD state of an entry is changed administratively. Signed-off-by: Bob Gilligan Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/core/neighbour.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index ca15f32821fb..36b1443f9ae4 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1161,6 +1161,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, neigh->parms->reachable_time : 0))); neigh->nud_state = new; + notify = 1; } if (lladdr != neigh->ha) { -- cgit v1.2.1