diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/af_inet.c | 11 | ||||
-rw-r--r-- | net/ipv4/arp.c | 34 | ||||
-rw-r--r-- | net/ipv4/igmp.c | 17 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 17 | ||||
-rw-r--r-- | net/ipv4/inet_diag.c | 257 | ||||
-rw-r--r-- | net/ipv4/ip_gre.c | 6 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 23 | ||||
-rw-r--r-- | net/ipv4/ip_sockglue.c | 35 | ||||
-rw-r--r-- | net/ipv4/ipconfig.c | 15 | ||||
-rw-r--r-- | net/ipv4/ipip.c | 2 | ||||
-rw-r--r-- | net/ipv4/ipmr.c | 4 | ||||
-rw-r--r-- | net/ipv4/netfilter/ip_queue.c | 6 | ||||
-rw-r--r-- | net/ipv4/proc.c | 9 | ||||
-rw-r--r-- | net/ipv4/raw.c | 10 | ||||
-rw-r--r-- | net/ipv4/route.c | 12 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 46 | ||||
-rw-r--r-- | net/ipv4/tcp_cong.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_diag.c | 4 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 46 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 1 | ||||
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 10 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 25 | ||||
-rw-r--r-- | net/ipv4/udp.c | 6 |
23 files changed, 371 insertions, 227 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 1b5096a9875a..15dc4c4828de 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1250,7 +1250,8 @@ out: return err; } -static struct sk_buff *inet_gso_segment(struct sk_buff *skb, u32 features) +static struct sk_buff *inet_gso_segment(struct sk_buff *skb, + netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); struct iphdr *iph; @@ -1572,9 +1573,9 @@ static __net_init int ipv4_mib_init_net(struct net *net) sizeof(struct icmp_mib), __alignof__(struct icmp_mib)) < 0) goto err_icmp_mib; - if (snmp_mib_init((void __percpu **)net->mib.icmpmsg_statistics, - sizeof(struct icmpmsg_mib), - __alignof__(struct icmpmsg_mib)) < 0) + net->mib.icmpmsg_statistics = kzalloc(sizeof(struct icmpmsg_mib), + GFP_KERNEL); + if (!net->mib.icmpmsg_statistics) goto err_icmpmsg_mib; tcp_mib_init(net); @@ -1598,7 +1599,7 @@ err_tcp_mib: static __net_exit void ipv4_mib_exit_net(struct net *net) { - snmp_mib_free((void __percpu **)net->mib.icmpmsg_statistics); + kfree(net->mib.icmpmsg_statistics); snmp_mib_free((void __percpu **)net->mib.icmp_statistics); snmp_mib_free((void __percpu **)net->mib.udplite_statistics); snmp_mib_free((void __percpu **)net->mib.udp_statistics); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 96a164aa1367..381a0876c363 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -112,11 +112,6 @@ #include <net/arp.h> #include <net/ax25.h> #include <net/netrom.h> -#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) -#include <net/atmclip.h> -struct neigh_table *clip_tbl_hook; -EXPORT_SYMBOL(clip_tbl_hook); -#endif #include <asm/system.h> #include <linux/uaccess.h> @@ -164,7 +159,6 @@ static const struct neigh_ops arp_broken_ops = { struct neigh_table arp_tbl = { .family = AF_INET, - .entry_size = sizeof(struct neighbour) + 4, .key_len = 4, .hash = arp_hash, .constructor = arp_constructor, @@ -177,7 +171,7 @@ struct neigh_table arp_tbl = { .gc_staletime = 60 * HZ, .reachable_time = 30 * HZ, .delay_probe_time = 5 * HZ, - .queue_len = 3, + .queue_len_bytes = 64*1024, .ucast_probes = 3, .mcast_probes = 3, .anycast_delay = 1 * HZ, @@ -283,9 +277,9 @@ static int arp_constructor(struct neighbour *neigh) default: break; case ARPHRD_ROSE: -#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) +#if IS_ENABLED(CONFIG_AX25) case ARPHRD_AX25: -#if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE) +#if IS_ENABLED(CONFIG_NETROM) case ARPHRD_NETROM: #endif neigh->ops = &arp_broken_ops; @@ -592,16 +586,18 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, struct sk_buff *skb; struct arphdr *arp; unsigned char *arp_ptr; + int hlen = LL_RESERVED_SPACE(dev); + int tlen = dev->needed_tailroom; /* * Allocate a buffer */ - skb = alloc_skb(arp_hdr_len(dev) + LL_ALLOCATED_SPACE(dev), GFP_ATOMIC); + skb = alloc_skb(arp_hdr_len(dev) + hlen + tlen, GFP_ATOMIC); if (skb == NULL) return NULL; - skb_reserve(skb, LL_RESERVED_SPACE(dev)); + skb_reserve(skb, hlen); skb_reset_network_header(skb); arp = (struct arphdr *) skb_put(skb, arp_hdr_len(dev)); skb->dev = dev; @@ -633,13 +629,13 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, arp->ar_pro = htons(ETH_P_IP); break; -#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) +#if IS_ENABLED(CONFIG_AX25) case ARPHRD_AX25: arp->ar_hrd = htons(ARPHRD_AX25); arp->ar_pro = htons(AX25_P_IP); break; -#if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE) +#if IS_ENABLED(CONFIG_NETROM) case ARPHRD_NETROM: arp->ar_hrd = htons(ARPHRD_NETROM); arp->ar_pro = htons(AX25_P_IP); @@ -647,13 +643,13 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, #endif #endif -#if defined(CONFIG_FDDI) || defined(CONFIG_FDDI_MODULE) +#if IS_ENABLED(CONFIG_FDDI) case ARPHRD_FDDI: arp->ar_hrd = htons(ARPHRD_ETHER); arp->ar_pro = htons(ETH_P_IP); break; #endif -#if defined(CONFIG_TR) || defined(CONFIG_TR_MODULE) +#if IS_ENABLED(CONFIG_TR) case ARPHRD_IEEE802_TR: arp->ar_hrd = htons(ARPHRD_IEEE802); arp->ar_pro = htons(ETH_P_IP); @@ -1040,7 +1036,7 @@ static int arp_req_set(struct net *net, struct arpreq *r, return -EINVAL; } switch (dev->type) { -#if defined(CONFIG_FDDI) || defined(CONFIG_FDDI_MODULE) +#if IS_ENABLED(CONFIG_FDDI) case ARPHRD_FDDI: /* * According to RFC 1390, FDDI devices should accept ARP @@ -1286,7 +1282,7 @@ void __init arp_init(void) } #ifdef CONFIG_PROC_FS -#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) +#if IS_ENABLED(CONFIG_AX25) /* ------------------------------------------------------------------------ */ /* @@ -1334,7 +1330,7 @@ static void arp_format_neigh_entry(struct seq_file *seq, read_lock(&n->lock); /* Convert hardware address to XX:XX:XX:XX ... form. */ -#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) +#if IS_ENABLED(CONFIG_AX25) if (hatype == ARPHRD_AX25 || hatype == ARPHRD_NETROM) ax2asc2((ax25_address *)n->ha, hbuffer); else { @@ -1347,7 +1343,7 @@ static void arp_format_neigh_entry(struct seq_file *seq, if (k != 0) --k; hbuffer[k] = 0; -#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) +#if IS_ENABLED(CONFIG_AX25) } #endif sprintf(tbuf, "%pI4", n->primary_key); diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index b2ca095cb9da..fa057d105bef 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -304,9 +304,11 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) struct igmpv3_report *pig; struct net *net = dev_net(dev); struct flowi4 fl4; + int hlen = LL_RESERVED_SPACE(dev); + int tlen = dev->needed_tailroom; while (1) { - skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev), + skb = alloc_skb(size + hlen + tlen, GFP_ATOMIC | __GFP_NOWARN); if (skb) break; @@ -327,7 +329,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) skb_dst_set(skb, &rt->dst); skb->dev = dev; - skb_reserve(skb, LL_RESERVED_SPACE(dev)); + skb_reserve(skb, hlen); skb_reset_network_header(skb); pip = ip_hdr(skb); @@ -647,6 +649,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, __be32 group = pmc ? pmc->multiaddr : 0; struct flowi4 fl4; __be32 dst; + int hlen, tlen; if (type == IGMPV3_HOST_MEMBERSHIP_REPORT) return igmpv3_send_report(in_dev, pmc); @@ -661,7 +664,9 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, if (IS_ERR(rt)) return -1; - skb = alloc_skb(IGMP_SIZE+LL_ALLOCATED_SPACE(dev), GFP_ATOMIC); + hlen = LL_RESERVED_SPACE(dev); + tlen = dev->needed_tailroom; + skb = alloc_skb(IGMP_SIZE + hlen + tlen, GFP_ATOMIC); if (skb == NULL) { ip_rt_put(rt); return -1; @@ -669,7 +674,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, skb_dst_set(skb, &rt->dst); - skb_reserve(skb, LL_RESERVED_SPACE(dev)); + skb_reserve(skb, hlen); skb_reset_network_header(skb); iph = ip_hdr(skb); @@ -1574,7 +1579,7 @@ out_unlock: * Add multicast single-source filter to the interface list */ static int ip_mc_add1_src(struct ip_mc_list *pmc, int sfmode, - __be32 *psfsrc, int delta) + __be32 *psfsrc) { struct ip_sf_list *psf, *psf_prev; @@ -1709,7 +1714,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, pmc->sfcount[sfmode]++; err = 0; for (i=0; i<sfcount; i++) { - err = ip_mc_add1_src(pmc, sfmode, &psfsrc[i], delta); + err = ip_mc_add1_src(pmc, sfmode, &psfsrc[i]); if (err) break; } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index c14d88ad348d..a598768c616c 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -588,10 +588,19 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, } EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune); -struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, - const gfp_t priority) +/** + * inet_csk_clone_lock - clone an inet socket, and lock its clone + * @sk: the socket to clone + * @req: request_sock + * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) + * + * Caller must unlock socket even in error path (bh_unlock_sock(newsk)) + */ +struct sock *inet_csk_clone_lock(const struct sock *sk, + const struct request_sock *req, + const gfp_t priority) { - struct sock *newsk = sk_clone(sk, priority); + struct sock *newsk = sk_clone_lock(sk, priority); if (newsk != NULL) { struct inet_connection_sock *newicsk = inet_csk(newsk); @@ -615,7 +624,7 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, } return newsk; } -EXPORT_SYMBOL_GPL(inet_csk_clone); +EXPORT_SYMBOL_GPL(inet_csk_clone_lock); /* * At this point, there should be no process reference to this diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index ccee270a9b65..b56b7ba8beeb 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -33,6 +33,7 @@ #include <linux/stddef.h> #include <linux/inet_diag.h> +#include <linux/sock_diag.h> static const struct inet_diag_handler **inet_diag_table; @@ -45,24 +46,22 @@ struct inet_diag_entry { u16 userlocks; }; -static struct sock *idiagnl; - #define INET_DIAG_PUT(skb, attrtype, attrlen) \ RTA_DATA(__RTA_PUT(skb, attrtype, attrlen)) static DEFINE_MUTEX(inet_diag_table_mutex); -static const struct inet_diag_handler *inet_diag_lock_handler(int type) +static const struct inet_diag_handler *inet_diag_lock_handler(int proto) { - if (!inet_diag_table[type]) + if (!inet_diag_table[proto]) request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, - NETLINK_INET_DIAG, type); + NETLINK_SOCK_DIAG, proto); mutex_lock(&inet_diag_table_mutex); - if (!inet_diag_table[type]) + if (!inet_diag_table[proto]) return ERR_PTR(-ENOENT); - return inet_diag_table[type]; + return inet_diag_table[proto]; } static inline void inet_diag_unlock_handler( @@ -72,8 +71,8 @@ static inline void inet_diag_unlock_handler( } static int inet_csk_diag_fill(struct sock *sk, - struct sk_buff *skb, - int ext, u32 pid, u32 seq, u16 nlmsg_flags, + struct sk_buff *skb, struct inet_diag_req *req, + u32 pid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { const struct inet_sock *inet = inet_sk(sk); @@ -84,8 +83,9 @@ static int inet_csk_diag_fill(struct sock *sk, struct inet_diag_meminfo *minfo = NULL; unsigned char *b = skb_tail_pointer(skb); const struct inet_diag_handler *handler; + int ext = req->idiag_ext; - handler = inet_diag_table[unlh->nlmsg_type]; + handler = inet_diag_table[req->sdiag_protocol]; BUG_ON(handler == NULL); nlh = NLMSG_PUT(skb, pid, seq, unlh->nlmsg_type, sizeof(*r)); @@ -132,13 +132,10 @@ static int inet_csk_diag_fill(struct sock *sk, if (r->idiag_family == AF_INET6) { const struct ipv6_pinfo *np = inet6_sk(sk); + *(struct in6_addr *)r->id.idiag_src = np->rcv_saddr; + *(struct in6_addr *)r->id.idiag_dst = np->daddr; if (ext & (1 << (INET_DIAG_TCLASS - 1))) RTA_PUT_U8(skb, INET_DIAG_TCLASS, np->tclass); - - ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, - &np->rcv_saddr); - ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, - &np->daddr); } #endif @@ -188,8 +185,8 @@ nlmsg_failure: } static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, - struct sk_buff *skb, int ext, u32 pid, - u32 seq, u16 nlmsg_flags, + struct sk_buff *skb, struct inet_diag_req *req, + u32 pid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { long tmo; @@ -228,10 +225,8 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw); - ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, - &tw6->tw_v6_rcv_saddr); - ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, - &tw6->tw_v6_daddr); + *(struct in6_addr *)r->id.idiag_src = tw6->tw_v6_rcv_saddr; + *(struct in6_addr *)r->id.idiag_dst = tw6->tw_v6_daddr; } #endif nlh->nlmsg_len = skb_tail_pointer(skb) - previous_tail; @@ -242,27 +237,27 @@ nlmsg_failure: } static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, - int ext, u32 pid, u32 seq, u16 nlmsg_flags, + struct inet_diag_req *r, u32 pid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { if (sk->sk_state == TCP_TIME_WAIT) return inet_twsk_diag_fill((struct inet_timewait_sock *)sk, - skb, ext, pid, seq, nlmsg_flags, + skb, r, pid, seq, nlmsg_flags, unlh); - return inet_csk_diag_fill(sk, skb, ext, pid, seq, nlmsg_flags, unlh); + return inet_csk_diag_fill(sk, skb, r, pid, seq, nlmsg_flags, unlh); } static int inet_diag_get_exact(struct sk_buff *in_skb, - const struct nlmsghdr *nlh) + const struct nlmsghdr *nlh, + struct inet_diag_req *req) { int err; struct sock *sk; - struct inet_diag_req *req = NLMSG_DATA(nlh); struct sk_buff *rep; struct inet_hashinfo *hashinfo; const struct inet_diag_handler *handler; - handler = inet_diag_lock_handler(nlh->nlmsg_type); + handler = inet_diag_lock_handler(req->sdiag_protocol); if (IS_ERR(handler)) { err = PTR_ERR(handler); goto unlock; @@ -271,13 +266,13 @@ static int inet_diag_get_exact(struct sk_buff *in_skb, hashinfo = handler->idiag_hashinfo; err = -EINVAL; - if (req->idiag_family == AF_INET) { + if (req->sdiag_family == AF_INET) { sk = inet_lookup(&init_net, hashinfo, req->id.idiag_dst[0], req->id.idiag_dport, req->id.idiag_src[0], req->id.idiag_sport, req->id.idiag_if); } #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) - else if (req->idiag_family == AF_INET6) { + else if (req->sdiag_family == AF_INET6) { sk = inet6_lookup(&init_net, hashinfo, (struct in6_addr *)req->id.idiag_dst, req->id.idiag_dport, @@ -309,7 +304,7 @@ static int inet_diag_get_exact(struct sk_buff *in_skb, if (!rep) goto out; - err = sk_diag_fill(sk, rep, req->idiag_ext, + err = sk_diag_fill(sk, rep, req, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 0, nlh); if (err < 0) { @@ -317,7 +312,7 @@ static int inet_diag_get_exact(struct sk_buff *in_skb, kfree_skb(rep); goto out; } - err = netlink_unicast(idiagnl, rep, NETLINK_CB(in_skb).pid, + err = netlink_unicast(sock_diag_nlsk, rep, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); if (err > 0) err = 0; @@ -493,15 +488,12 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) static int inet_csk_diag_dump(struct sock *sk, struct sk_buff *skb, - struct netlink_callback *cb) + struct netlink_callback *cb, + struct inet_diag_req *r, + const struct nlattr *bc) { - struct inet_diag_req *r = NLMSG_DATA(cb->nlh); - - if (nlmsg_attrlen(cb->nlh, sizeof(*r))) { + if (bc != NULL) { struct inet_diag_entry entry; - const struct nlattr *bc = nlmsg_find_attr(cb->nlh, - sizeof(*r), - INET_DIAG_REQ_BYTECODE); struct inet_sock *inet = inet_sk(sk); entry.family = sk->sk_family; @@ -525,22 +517,19 @@ static int inet_csk_diag_dump(struct sock *sk, return 0; } - return inet_csk_diag_fill(sk, skb, r->idiag_ext, + return inet_csk_diag_fill(sk, skb, r, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); } static int inet_twsk_diag_dump(struct inet_timewait_sock *tw, struct sk_buff *skb, - struct netlink_callback *cb) + struct netlink_callback *cb, + struct inet_diag_req *r, + const struct nlattr *bc) { - struct inet_diag_req *r = NLMSG_DATA(cb->nlh); - - if (nlmsg_attrlen(cb->nlh, sizeof(*r))) { + if (bc != NULL) { struct inet_diag_entry entry; - const struct nlattr *bc = nlmsg_find_attr(cb->nlh, - sizeof(*r), - INET_DIAG_REQ_BYTECODE); entry.family = tw->tw_family; #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) @@ -563,7 +552,7 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw, return 0; } - return inet_twsk_diag_fill(tw, skb, r->idiag_ext, + return inet_twsk_diag_fill(tw, skb, r, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); } @@ -607,10 +596,8 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, r->idiag_inode = 0; #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) if (r->idiag_family == AF_INET6) { - ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, - &inet6_rsk(req)->loc_addr); - ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, - &inet6_rsk(req)->rmt_addr); + *(struct in6_addr *)r->id.idiag_src = inet6_rsk(req)->loc_addr; + *(struct in6_addr *)r->id.idiag_dst = inet6_rsk(req)->rmt_addr; } #endif nlh->nlmsg_len = skb_tail_pointer(skb) - b; @@ -623,13 +610,13 @@ nlmsg_failure: } static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, - struct netlink_callback *cb) + struct netlink_callback *cb, + struct inet_diag_req *r, + const struct nlattr *bc) { struct inet_diag_entry entry; - struct inet_diag_req *r = NLMSG_DATA(cb->nlh); struct inet_connection_sock *icsk = inet_csk(sk); struct listen_sock *lopt; - const struct nlattr *bc = NULL; struct inet_sock *inet = inet_sk(sk); int j, s_j; int reqnum, s_reqnum; @@ -649,9 +636,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, if (!lopt || !lopt->qlen) goto out; - if (nlmsg_attrlen(cb->nlh, sizeof(*r))) { - bc = nlmsg_find_attr(cb->nlh, sizeof(*r), - INET_DIAG_REQ_BYTECODE); + if (bc != NULL) { entry.sport = inet->inet_num; entry.userlocks = sk->sk_userlocks; } @@ -708,15 +693,15 @@ out: return err; } -static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) +static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, + struct inet_diag_req *r, struct nlattr *bc) { int i, num; int s_i, s_num; - struct inet_diag_req *r = NLMSG_DATA(cb->nlh); const struct inet_diag_handler *handler; struct inet_hashinfo *hashinfo; - handler = inet_diag_lock_handler(cb->nlh->nlmsg_type); + handler = inet_diag_lock_handler(r->sdiag_protocol); if (IS_ERR(handler)) goto unlock; @@ -745,6 +730,10 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) continue; } + if (r->sdiag_family != AF_UNSPEC && + sk->sk_family != r->sdiag_family) + goto next_listen; + if (r->id.idiag_sport != inet->inet_sport && r->id.idiag_sport) goto next_listen; @@ -754,7 +743,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->args[3] > 0) goto syn_recv; - if (inet_csk_diag_dump(sk, skb, cb) < 0) { + if (inet_csk_diag_dump(sk, skb, cb, r, bc) < 0) { spin_unlock_bh(&ilb->lock); goto done; } @@ -763,7 +752,7 @@ syn_recv: if (!(r->idiag_states & TCPF_SYN_RECV)) goto next_listen; - if (inet_diag_dump_reqs(skb, sk, cb) < 0) { + if (inet_diag_dump_reqs(skb, sk, cb, r, bc) < 0) { spin_unlock_bh(&ilb->lock); goto done; } @@ -810,13 +799,16 @@ skip_listen_ht: goto next_normal; if (!(r->idiag_states & (1 << sk->sk_state))) goto next_normal; + if (r->sdiag_family != AF_UNSPEC && + sk->sk_family != r->sdiag_family) + goto next_normal; if (r->id.idiag_sport != inet->inet_sport && r->id.idiag_sport) goto next_normal; if (r->id.idiag_dport != inet->inet_dport && r->id.idiag_dport) goto next_normal; - if (inet_csk_diag_dump(sk, skb, cb) < 0) { + if (inet_csk_diag_dump(sk, skb, cb, r, bc) < 0) { spin_unlock_bh(lock); goto done; } @@ -832,13 +824,16 @@ next_normal: if (num < s_num) goto next_dying; + if (r->sdiag_family != AF_UNSPEC && + tw->tw_family != r->sdiag_family) + goto next_dying; if (r->id.idiag_sport != tw->tw_sport && r->id.idiag_sport) goto next_dying; if (r->id.idiag_dport != tw->tw_dport && r->id.idiag_dport) goto next_dying; - if (inet_twsk_diag_dump(tw, skb, cb) < 0) { + if (inet_twsk_diag_dump(tw, skb, cb, r, bc) < 0) { spin_unlock_bh(lock); goto done; } @@ -857,10 +852,67 @@ unlock: return skb->len; } -static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) { + struct nlattr *bc = NULL; int hdrlen = sizeof(struct inet_diag_req); + if (nlmsg_attrlen(cb->nlh, hdrlen)) + bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE); + + return __inet_diag_dump(skb, cb, (struct inet_diag_req *)NLMSG_DATA(cb->nlh), bc); +} + +static inline int inet_diag_type2proto(int type) +{ + switch (type) { + case TCPDIAG_GETSOCK: + return IPPROTO_TCP; + case DCCPDIAG_GETSOCK: + return IPPROTO_DCCP; + default: + return 0; + } +} + +static int inet_diag_dump_compat(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct inet_diag_req_compat *rc = NLMSG_DATA(cb->nlh); + struct inet_diag_req req; + struct nlattr *bc = NULL; + int hdrlen = sizeof(struct inet_diag_req_compat); + + req.sdiag_family = AF_UNSPEC; /* compatibility */ + req.sdiag_protocol = inet_diag_type2proto(cb->nlh->nlmsg_type); + req.idiag_ext = rc->idiag_ext; + req.idiag_states = rc->idiag_states; + req.id = rc->id; + + if (nlmsg_attrlen(cb->nlh, hdrlen)) + bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE); + + return __inet_diag_dump(skb, cb, &req, bc); +} + +static int inet_diag_get_exact_compat(struct sk_buff *in_skb, + const struct nlmsghdr *nlh) +{ + struct inet_diag_req_compat *rc = NLMSG_DATA(nlh); + struct inet_diag_req req; + + req.sdiag_family = rc->idiag_family; + req.sdiag_protocol = inet_diag_type2proto(nlh->nlmsg_type); + req.idiag_ext = rc->idiag_ext; + req.idiag_states = rc->idiag_states; + req.id = rc->id; + + return inet_diag_get_exact(in_skb, nlh, &req); +} + +static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) +{ + int hdrlen = sizeof(struct inet_diag_req_compat); + if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX || nlmsg_len(nlh) < hdrlen) return -EINVAL; @@ -877,28 +929,54 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return -EINVAL; } - return netlink_dump_start(idiagnl, skb, nlh, - inet_diag_dump, NULL, 0); + return netlink_dump_start(sock_diag_nlsk, skb, nlh, + inet_diag_dump_compat, NULL, 0); } - return inet_diag_get_exact(skb, nlh); + return inet_diag_get_exact_compat(skb, nlh); } -static DEFINE_MUTEX(inet_diag_mutex); - -static void inet_diag_rcv(struct sk_buff *skb) +static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) { - mutex_lock(&inet_diag_mutex); - netlink_rcv_skb(skb, &inet_diag_rcv_msg); - mutex_unlock(&inet_diag_mutex); + int hdrlen = sizeof(struct inet_diag_req); + + if (nlmsg_len(h) < hdrlen) + return -EINVAL; + + if (h->nlmsg_flags & NLM_F_DUMP) { + if (nlmsg_attrlen(h, hdrlen)) { + struct nlattr *attr; + attr = nlmsg_find_attr(h, hdrlen, + INET_DIAG_REQ_BYTECODE); + if (attr == NULL || + nla_len(attr) < sizeof(struct inet_diag_bc_op) || + inet_diag_bc_audit(nla_data(attr), nla_len(attr))) + return -EINVAL; + } + + return netlink_dump_start(sock_diag_nlsk, skb, h, + inet_diag_dump, NULL, 0); + } + + return inet_diag_get_exact(skb, h, (struct inet_diag_req *)NLMSG_DATA(h)); } +static struct sock_diag_handler inet_diag_handler = { + .family = AF_INET, + .dump = inet_diag_handler_dump, +}; + +static struct sock_diag_handler inet6_diag_handler = { + .family = AF_INET6, + .dump = inet_diag_handler_dump, +}; + int inet_diag_register(const struct inet_diag_handler *h) { const __u16 type = h->idiag_type; int err = -EINVAL; - if (type >= INET_DIAG_GETSOCK_MAX) + if (type >= IPPROTO_MAX) goto out; mutex_lock(&inet_diag_table_mutex); @@ -917,7 +995,7 @@ void inet_diag_unregister(const struct inet_diag_handler *h) { const __u16 type = h->idiag_type; - if (type >= INET_DIAG_GETSOCK_MAX) + if (type >= IPPROTO_MAX) return; mutex_lock(&inet_diag_table_mutex); @@ -928,7 +1006,7 @@ EXPORT_SYMBOL_GPL(inet_diag_unregister); static int __init inet_diag_init(void) { - const int inet_diag_table_size = (INET_DIAG_GETSOCK_MAX * + const int inet_diag_table_size = (IPPROTO_MAX * sizeof(struct inet_diag_handler *)); int err = -ENOMEM; @@ -936,25 +1014,34 @@ static int __init inet_diag_init(void) if (!inet_diag_table) goto out; - idiagnl = netlink_kernel_create(&init_net, NETLINK_INET_DIAG, 0, - inet_diag_rcv, NULL, THIS_MODULE); - if (idiagnl == NULL) - goto out_free_table; - err = 0; + err = sock_diag_register(&inet_diag_handler); + if (err) + goto out_free_nl; + + err = sock_diag_register(&inet6_diag_handler); + if (err) + goto out_free_inet; + + sock_diag_register_inet_compat(inet_diag_rcv_msg_compat); out: return err; -out_free_table: + +out_free_inet: + sock_diag_unregister(&inet_diag_handler); +out_free_nl: kfree(inet_diag_table); goto out; } static void __exit inet_diag_exit(void) { - netlink_kernel_release(idiagnl); + sock_diag_unregister(&inet6_diag_handler); + sock_diag_unregister(&inet_diag_handler); + sock_diag_unregister_inet_compat(inet_diag_rcv_msg_compat); kfree(inet_diag_table); } module_init(inet_diag_init); module_exit(inet_diag_exit); MODULE_LICENSE("GPL"); -MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_INET_DIAG); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 0); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index d55110e93120..fe070c1593ab 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -171,7 +171,7 @@ struct pcpu_tstats { unsigned long rx_bytes; unsigned long tx_packets; unsigned long tx_bytes; -}; +} __attribute__((aligned(4*sizeof(unsigned long)))); static struct net_device_stats *ipgre_get_stats(struct net_device *dev) { @@ -731,7 +731,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) else if (skb->protocol == htons(ETH_P_IPV6)) { - struct neighbour *neigh = dst_get_neighbour(skb_dst(skb)); + struct neighbour *neigh = dst_get_neighbour_noref(skb_dst(skb)); const struct in6_addr *addr6; int addr_type; @@ -835,6 +835,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev if (skb_headroom(skb) < max_headroom || skb_shared(skb)|| (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); + if (max_headroom > dev->needed_headroom) + dev->needed_headroom = max_headroom; if (!new_skb) { ip_rt_put(rt); dev->stats.tx_dropped++; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 0bc95f3977d2..ff302bde8890 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -206,7 +206,7 @@ static inline int ip_finish_output2(struct sk_buff *skb) } rcu_read_lock(); - neigh = dst_get_neighbour(dst); + neigh = dst_get_neighbour_noref(dst); if (neigh) { int res = neigh_output(neigh, skb); @@ -319,6 +319,20 @@ int ip_output(struct sk_buff *skb) !(IPCB(skb)->flags & IPSKB_REROUTED)); } +/* + * copy saddr and daddr, possibly using 64bit load/stores + * Equivalent to : + * iph->saddr = fl4->saddr; + * iph->daddr = fl4->daddr; + */ +static void ip_copy_addrs(struct iphdr *iph, const struct flowi4 *fl4) +{ + BUILD_BUG_ON(offsetof(typeof(*fl4), daddr) != + offsetof(typeof(*fl4), saddr) + sizeof(fl4->saddr)); + memcpy(&iph->saddr, &fl4->saddr, + sizeof(fl4->saddr) + sizeof(fl4->daddr)); +} + int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl) { struct sock *sk = skb->sk; @@ -381,8 +395,8 @@ packet_routed: iph->frag_off = 0; iph->ttl = ip_select_ttl(inet, &rt->dst); iph->protocol = sk->sk_protocol; - iph->saddr = fl4->saddr; - iph->daddr = fl4->daddr; + ip_copy_addrs(iph, fl4); + /* Transport layer set skb->h.foo itself. */ if (inet_opt && inet_opt->opt.optlen) { @@ -1337,8 +1351,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, ip_select_ident(iph, &rt->dst, sk); iph->ttl = ttl; iph->protocol = sk->sk_protocol; - iph->saddr = fl4->saddr; - iph->daddr = fl4->daddr; + ip_copy_addrs(iph, fl4); if (opt) { iph->ihl += opt->optlen>>2; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 09ff51bf16a4..80d5fa450210 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -55,20 +55,13 @@ /* * SOL_IP control messages. */ +#define PKTINFO_SKB_CB(__skb) ((struct in_pktinfo *)((__skb)->cb)) static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) { - struct in_pktinfo info; - struct rtable *rt = skb_rtable(skb); + struct in_pktinfo info = *PKTINFO_SKB_CB(skb); info.ipi_addr.s_addr = ip_hdr(skb)->daddr; - if (rt) { - info.ipi_ifindex = rt->rt_iif; - info.ipi_spec_dst.s_addr = rt->rt_spec_dst; - } else { - info.ipi_ifindex = 0; - info.ipi_spec_dst.s_addr = 0; - } put_cmsg(msg, SOL_IP, IP_PKTINFO, sizeof(info), &info); } @@ -992,20 +985,28 @@ e_inval: } /** - * ip_queue_rcv_skb - Queue an skb into sock receive queue + * ipv4_pktinfo_prepare - transfert some info from rtable to skb * @sk: socket * @skb: buffer * - * Queues an skb into socket receive queue. If IP_CMSG_PKTINFO option - * is not set, we drop skb dst entry now, while dst cache line is hot. + * To support IP_CMSG_PKTINFO option, we store rt_iif and rt_spec_dst + * in skb->cb[] before dst drop. + * This way, receiver doesnt make cache line misses to read rtable. */ -int ip_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) +void ipv4_pktinfo_prepare(struct sk_buff *skb) { - if (!(inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO)) - skb_dst_drop(skb); - return sock_queue_rcv_skb(sk, skb); + struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb); + const struct rtable *rt = skb_rtable(skb); + + if (rt) { + pktinfo->ipi_ifindex = rt->rt_iif; + pktinfo->ipi_spec_dst.s_addr = rt->rt_spec_dst; + } else { + pktinfo->ipi_ifindex = 0; + pktinfo->ipi_spec_dst.s_addr = 0; + } + skb_dst_drop(skb); } -EXPORT_SYMBOL(ip_queue_rcv_skb); int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 0da2afc97f32..915eb5265b2e 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -763,13 +763,15 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d struct sk_buff *skb; struct bootp_pkt *b; struct iphdr *h; + int hlen = LL_RESERVED_SPACE(dev); + int tlen = dev->needed_tailroom; /* Allocate packet */ - skb = alloc_skb(sizeof(struct bootp_pkt) + LL_ALLOCATED_SPACE(dev) + 15, + skb = alloc_skb(sizeof(struct bootp_pkt) + hlen + tlen + 15, GFP_KERNEL); if (!skb) return; - skb_reserve(skb, LL_RESERVED_SPACE(dev)); + skb_reserve(skb, hlen); b = (struct bootp_pkt *) skb_put(skb, sizeof(struct bootp_pkt)); memset(b, 0, sizeof(struct bootp_pkt)); @@ -822,8 +824,13 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d skb->dev = dev; skb->protocol = htons(ETH_P_IP); if (dev_hard_header(skb, dev, ntohs(skb->protocol), - dev->broadcast, dev->dev_addr, skb->len) < 0 || - dev_queue_xmit(skb) < 0) + dev->broadcast, dev->dev_addr, skb->len) < 0) { + kfree_skb(skb); + printk("E"); + return; + } + + if (dev_queue_xmit(skb) < 0) printk("E"); } diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 065effd8349a..94906908a416 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -148,7 +148,7 @@ struct pcpu_tstats { unsigned long rx_bytes; unsigned long tx_packets; unsigned long tx_bytes; -}; +} __attribute__((aligned(4*sizeof(unsigned long)))); static struct net_device_stats *ipip_get_stats(struct net_device *dev) { diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 76a7f07b38b6..8e54490ee3f4 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1520,7 +1520,6 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v struct mr_table *mrt; struct vif_device *v; int ct; - LIST_HEAD(list); if (event != NETDEV_UNREGISTER) return NOTIFY_DONE; @@ -1529,10 +1528,9 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v v = &mrt->vif_table[0]; for (ct = 0; ct < mrt->maxvif; ct++, v++) { if (v->dev == dev) - vif_delete(mrt, ct, 1, &list); + vif_delete(mrt, ct, 1, NULL); } } - unregister_netdevice_many(&list); return NOTIFY_DONE; } diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index e59aabd0eae4..a057fe64debd 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -404,6 +404,7 @@ __ipq_rcv_skb(struct sk_buff *skb) int status, type, pid, flags; unsigned int nlmsglen, skblen; struct nlmsghdr *nlh; + bool enable_timestamp = false; skblen = skb->len; if (skblen < sizeof(*nlh)) @@ -441,12 +442,13 @@ __ipq_rcv_skb(struct sk_buff *skb) RCV_SKB_FAIL(-EBUSY); } } else { - net_enable_timestamp(); + enable_timestamp = true; peer_pid = pid; } spin_unlock_bh(&queue_lock); - + if (enable_timestamp) + net_enable_timestamp(); status = ipq_receive_peer(NLMSG_DATA(nlh), type, nlmsglen - NLMSG_LENGTH(0)); if (status < 0) diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 466ea8bb7a4d..961eed4f510a 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -288,7 +288,7 @@ static void icmpmsg_put(struct seq_file *seq) count = 0; for (i = 0; i < ICMPMSG_MIB_MAX; i++) { - val = snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics, i); + val = atomic_long_read(&net->mib.icmpmsg_statistics->mibs[i]); if (val) { type[count] = i; vals[count++] = val; @@ -307,6 +307,7 @@ static void icmp_put(struct seq_file *seq) { int i; struct net *net = seq->private; + atomic_long_t *ptr = net->mib.icmpmsg_statistics->mibs; seq_puts(seq, "\nIcmp: InMsgs InErrors"); for (i=0; icmpmibmap[i].name != NULL; i++) @@ -319,15 +320,13 @@ static void icmp_put(struct seq_file *seq) snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS)); for (i=0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics, - icmpmibmap[i].index)); + atomic_long_read(ptr + icmpmibmap[i].index)); seq_printf(seq, " %lu %lu", snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); for (i=0; icmpmibmap[i].name != NULL; i++) seq_printf(seq, " %lu", - snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics, - icmpmibmap[i].index | 0x100)); + atomic_long_read(ptr + (icmpmibmap[i].index | 0x100))); } /* diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 007e2eb769d3..3ccda5ae8a27 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -292,7 +292,8 @@ static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb) { /* Charge it to the socket. */ - if (ip_queue_rcv_skb(sk, skb) < 0) { + ipv4_pktinfo_prepare(skb); + if (sock_queue_rcv_skb(sk, skb) < 0) { kfree_skb(skb); return NET_RX_DROP; } @@ -327,6 +328,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, unsigned int iphlen; int err; struct rtable *rt = *rtp; + int hlen, tlen; if (length > rt->dst.dev->mtu) { ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport, @@ -336,12 +338,14 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, if (flags&MSG_PROBE) goto out; + hlen = LL_RESERVED_SPACE(rt->dst.dev); + tlen = rt->dst.dev->needed_tailroom; skb = sock_alloc_send_skb(sk, - length + LL_ALLOCATED_SPACE(rt->dst.dev) + 15, + length + hlen + tlen + 15, flags & MSG_DONTWAIT, &err); if (skb == NULL) goto error; - skb_reserve(skb, LL_RESERVED_SPACE(rt->dst.dev)); + skb_reserve(skb, hlen); skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 46af62363b8c..f30112f7559a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -108,7 +108,6 @@ #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> #endif -#include <net/atmclip.h> #include <net/secure_seq.h> #define RT_FL_TOS(oldflp4) \ @@ -420,7 +419,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) int len, HHUptod; rcu_read_lock(); - n = dst_get_neighbour(&r->dst); + n = dst_get_neighbour_noref(&r->dst); HHUptod = (n && (n->nud_state & NUD_CONNECTED)) ? 1 : 0; rcu_read_unlock(); @@ -1019,23 +1018,18 @@ static int slow_chain_length(const struct rtable *head) static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr) { - struct neigh_table *tbl = &arp_tbl; static const __be32 inaddr_any = 0; struct net_device *dev = dst->dev; const __be32 *pkey = daddr; struct neighbour *n; -#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) - if (dev->type == ARPHRD_ATM) - tbl = clip_tbl_hook; -#endif if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) pkey = &inaddr_any; - n = __ipv4_neigh_lookup(tbl, dev, *(__force u32 *)pkey); + n = __ipv4_neigh_lookup(&arp_tbl, dev, *(__force u32 *)pkey); if (n) return n; - return neigh_create(tbl, pkey, dev); + return neigh_create(&arp_tbl, pkey, dev); } static int rt_bind_neighbour(struct rtable *rt) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 34f5db1e1c8b..43dfccce62e9 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -888,18 +888,18 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset, } EXPORT_SYMBOL(tcp_sendpage); -#define TCP_PAGE(sk) (sk->sk_sndmsg_page) -#define TCP_OFF(sk) (sk->sk_sndmsg_off) - -static inline int select_size(const struct sock *sk, int sg) +static inline int select_size(const struct sock *sk, bool sg) { const struct tcp_sock *tp = tcp_sk(sk); int tmp = tp->mss_cache; if (sg) { - if (sk_can_gso(sk)) - tmp = 0; - else { + if (sk_can_gso(sk)) { + /* Small frames wont use a full page: + * Payload will immediately follow tcp header. + */ + tmp = SKB_WITH_OVERHEAD(2048 - MAX_TCP_HEADER); + } else { int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER); if (tmp >= pgbreak && @@ -917,9 +917,9 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct iovec *iov; struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; - int iovlen, flags; + int iovlen, flags, err, copied; int mss_now, size_goal; - int sg, err, copied; + bool sg; long timeo; lock_sock(sk); @@ -946,7 +946,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) goto out_err; - sg = sk->sk_route_caps & NETIF_F_SG; + sg = !!(sk->sk_route_caps & NETIF_F_SG); while (--iovlen >= 0) { size_t seglen = iov->iov_len; @@ -1005,8 +1005,13 @@ new_segment: } else { int merge = 0; int i = skb_shinfo(skb)->nr_frags; - struct page *page = TCP_PAGE(sk); - int off = TCP_OFF(sk); + struct page *page = sk->sk_sndmsg_page; + int off; + + if (page && page_count(page) == 1) + sk->sk_sndmsg_off = 0; + + off = sk->sk_sndmsg_off; if (skb_can_coalesce(skb, i, page, off) && off != PAGE_SIZE) { @@ -1023,7 +1028,7 @@ new_segment: } else if (page) { if (off == PAGE_SIZE) { put_page(page); - TCP_PAGE(sk) = page = NULL; + sk->sk_sndmsg_page = page = NULL; off = 0; } } else @@ -1049,9 +1054,9 @@ new_segment: /* If this page was new, give it to the * socket so it does not get leaked. */ - if (!TCP_PAGE(sk)) { - TCP_PAGE(sk) = page; - TCP_OFF(sk) = 0; + if (!sk->sk_sndmsg_page) { + sk->sk_sndmsg_page = page; + sk->sk_sndmsg_off = 0; } goto do_error; } @@ -1061,15 +1066,15 @@ new_segment: skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); } else { skb_fill_page_desc(skb, i, page, off, copy); - if (TCP_PAGE(sk)) { + if (sk->sk_sndmsg_page) { get_page(page); } else if (off + copy < PAGE_SIZE) { get_page(page); - TCP_PAGE(sk) = page; + sk->sk_sndmsg_page = page; } } - TCP_OFF(sk) = off + copy; + sk->sk_sndmsg_off = off + copy; } if (!copied) @@ -2653,7 +2658,8 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname, EXPORT_SYMBOL(compat_tcp_getsockopt); #endif -struct sk_buff *tcp_tso_segment(struct sk_buff *skb, u32 features) +struct sk_buff *tcp_tso_segment(struct sk_buff *skb, + netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); struct tcphdr *th; diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 850c737e08e2..fc6d475f488f 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -292,7 +292,7 @@ int tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight) left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd && left * tp->mss_cache < sk->sk_gso_max_size) return 1; - return left <= tcp_max_burst(tp); + return left <= tcp_max_tso_deferred_mss(tp); } EXPORT_SYMBOL_GPL(tcp_is_cwnd_limited); diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 939edb3b8e4d..981497795d49 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -37,7 +37,7 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, static const struct inet_diag_handler tcp_diag_handler = { .idiag_hashinfo = &tcp_hashinfo, .idiag_get_info = tcp_diag_get_info, - .idiag_type = TCPDIAG_GETSOCK, + .idiag_type = IPPROTO_TCP, .idiag_info_size = sizeof(struct tcp_info), }; @@ -54,4 +54,4 @@ static void __exit tcp_diag_exit(void) module_init(tcp_diag_init); module_exit(tcp_diag_exit); MODULE_LICENSE("GPL"); -MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_INET_DIAG, TCPDIAG_GETSOCK); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 6); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 52b5c2d0ecd0..0cbb44076cfa 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2858,7 +2858,7 @@ static void tcp_try_keep_open(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); int state = TCP_CA_Open; - if (tcp_left_out(tp) || tcp_any_retrans_done(sk) || tp->undo_marker) + if (tcp_left_out(tp) || tcp_any_retrans_done(sk)) state = TCP_CA_Disorder; if (inet_csk(sk)->icsk_ca_state != state) { @@ -2881,7 +2881,8 @@ static void tcp_try_to_open(struct sock *sk, int flag) if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { tcp_try_keep_open(sk); - tcp_moderate_cwnd(tp); + if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open) + tcp_moderate_cwnd(tp); } else { tcp_cwnd_down(sk, flag); } @@ -3009,11 +3010,11 @@ static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked, * tcp_xmit_retransmit_queue(). */ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, - int newly_acked_sacked, int flag) + int newly_acked_sacked, bool is_dupack, + int flag) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - int is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && (tcp_fackets_out(tp) > tp->reordering)); int fast_rexmit = 0, mib_idx; @@ -3066,17 +3067,6 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, } break; - case TCP_CA_Disorder: - tcp_try_undo_dsack(sk); - if (!tp->undo_marker || - /* For SACK case do not Open to allow to undo - * catching for all duplicate ACKs. */ - tcp_is_reno(tp) || tp->snd_una != tp->high_seq) { - tp->undo_marker = 0; - tcp_set_ca_state(sk, TCP_CA_Open); - } - break; - case TCP_CA_Recovery: if (tcp_is_reno(tp)) tcp_reset_reno_sack(tp); @@ -3117,7 +3107,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, tcp_add_reno_sack(sk); } - if (icsk->icsk_ca_state == TCP_CA_Disorder) + if (icsk->icsk_ca_state <= TCP_CA_Disorder) tcp_try_undo_dsack(sk); if (!tcp_time_to_recover(sk)) { @@ -3681,10 +3671,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) u32 prior_snd_una = tp->snd_una; u32 ack_seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; + bool is_dupack = false; u32 prior_in_flight; u32 prior_fackets; int prior_packets; int prior_sacked = tp->sacked_out; + int pkts_acked = 0; int newly_acked_sacked = 0; int frto_cwnd = 0; @@ -3757,6 +3749,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) /* See if we can take anything off of the retransmit queue. */ flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); + pkts_acked = prior_packets - tp->packets_out; newly_acked_sacked = (prior_packets - prior_sacked) - (tp->packets_out - tp->sacked_out); @@ -3771,8 +3764,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if ((flag & FLAG_DATA_ACKED) && !frto_cwnd && tcp_may_raise_cwnd(sk, flag)) tcp_cong_avoid(sk, ack, prior_in_flight); - tcp_fastretrans_alert(sk, prior_packets - tp->packets_out, - newly_acked_sacked, flag); + is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); + tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked, + is_dupack, flag); } else { if ((flag & FLAG_DATA_ACKED) && !frto_cwnd) tcp_cong_avoid(sk, ack, prior_in_flight); @@ -3784,6 +3778,10 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) return 1; no_queue: + /* If data was DSACKed, see if we can undo a cwnd reduction. */ + if (flag & FLAG_DSACKING_ACK) + tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked, + is_dupack, flag); /* If this ack opens up a zero window, clear backoff. It was * being used to time the probes, and is probably far higher than * it needs to be for normal retransmission. @@ -3797,10 +3795,14 @@ invalid_ack: return -1; old_ack: + /* If data was SACKed, tag it and see if we should send more data. + * If data was DSACKed, see if we can undo a cwnd reduction. + */ if (TCP_SKB_CB(skb)->sacked) { - tcp_sacktag_write_queue(sk, skb, prior_snd_una); - if (icsk->icsk_ca_state == TCP_CA_Open) - tcp_try_keep_open(sk); + flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); + newly_acked_sacked = tp->sacked_out - prior_sacked; + tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked, + is_dupack, flag); } SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt); @@ -5809,6 +5811,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, goto discard; if (th->syn) { + if (th->fin) + goto discard; if (icsk->icsk_af_ops->conn_request(sk, skb) < 0) return 1; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a9db4b1a2215..c4b8b09db9f5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1511,6 +1511,7 @@ exit: return NULL; put_and_exit: tcp_clear_xmit_timers(newsk); + tcp_cleanup_congestion_control(newsk); bh_unlock_sock(newsk); sock_put(newsk); goto exit; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 66363b689ad6..9dc146e5ed65 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -343,8 +343,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot); tw6 = inet6_twsk((struct sock *)tw); - ipv6_addr_copy(&tw6->tw_v6_daddr, &np->daddr); - ipv6_addr_copy(&tw6->tw_v6_rcv_saddr, &np->rcv_saddr); + tw6->tw_v6_daddr = np->daddr; + tw6->tw_v6_rcv_saddr = np->rcv_saddr; tw->tw_tclass = np->tclass; tw->tw_ipv6only = np->ipv6only; } @@ -425,7 +425,7 @@ static inline void TCP_ECN_openreq_child(struct tcp_sock *tp, */ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct sk_buff *skb) { - struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC); + struct sock *newsk = inet_csk_clone_lock(sk, req, GFP_ATOMIC); if (newsk != NULL) { const struct inet_request_sock *ireq = inet_rsk(req); @@ -495,7 +495,9 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->frto_counter = 0; newtp->frto_highmark = 0; - newicsk->icsk_ca_ops = &tcp_init_congestion_ops; + if (newicsk->icsk_ca_ops != &tcp_init_congestion_ops && + !try_module_get(newicsk->icsk_ca_ops->owner)) + newicsk->icsk_ca_ops = &tcp_init_congestion_ops; tcp_set_ca_state(newsk, TCP_CA_Open); tcp_init_xmit_timers(newsk); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 63170e297540..cf3068038942 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1093,6 +1093,13 @@ static void __pskb_trim_head(struct sk_buff *skb, int len) { int i, k, eat; + eat = min_t(int, len, skb_headlen(skb)); + if (eat) { + __skb_pull(skb, eat); + len -= eat; + if (!len) + return; + } eat = len; k = 0; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { @@ -1124,11 +1131,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) return -ENOMEM; - /* If len == headlen, we avoid __skb_pull to preserve alignment. */ - if (unlikely(len < skb_headlen(skb))) - __skb_pull(skb, len); - else - __pskb_trim_head(skb, len - skb_headlen(skb)); + __pskb_trim_head(skb, len); TCP_SKB_CB(skb)->seq += len; skb->ip_summed = CHECKSUM_PARTIAL; @@ -1581,7 +1584,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) * frame, so if we have space for more than 3 frames * then send now. */ - if (limit > tcp_max_burst(tp) * tp->mss_cache) + if (limit > tcp_max_tso_deferred_mss(tp) * tp->mss_cache) goto send_now; } @@ -2147,7 +2150,15 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) */ TCP_SKB_CB(skb)->when = tcp_time_stamp; - err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); + /* make sure skb->data is aligned on arches that require it */ + if (unlikely(NET_IP_ALIGN && ((unsigned long)skb->data & 3))) { + struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER, + GFP_ATOMIC); + err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : + -ENOBUFS; + } else { + err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); + } if (err == 0) { /* Update global TCP statistics. */ diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 5a65eeac1d29..ad481b32f1e3 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1358,7 +1358,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) if (inet_sk(sk)->inet_daddr) sock_rps_save_rxhash(sk, skb); - rc = ip_queue_rcv_skb(sk, skb); + rc = sock_queue_rcv_skb(sk, skb); if (rc < 0) { int is_udplite = IS_UDPLITE(sk); @@ -1474,6 +1474,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) rc = 0; + ipv4_pktinfo_prepare(skb); bh_lock_sock(sk); if (!sock_owned_by_user(sk)) rc = __udp_queue_rcv_skb(sk, skb); @@ -2247,7 +2248,8 @@ int udp4_ufo_send_check(struct sk_buff *skb) return 0; } -struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, u32 features) +struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, + netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); unsigned int mss; |