summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c11
-rw-r--r--net/ipv4/arp.c34
-rw-r--r--net/ipv4/igmp.c17
-rw-r--r--net/ipv4/inet_connection_sock.c17
-rw-r--r--net/ipv4/inet_diag.c257
-rw-r--r--net/ipv4/ip_gre.c6
-rw-r--r--net/ipv4/ip_output.c23
-rw-r--r--net/ipv4/ip_sockglue.c35
-rw-r--r--net/ipv4/ipconfig.c15
-rw-r--r--net/ipv4/ipip.c2
-rw-r--r--net/ipv4/ipmr.c4
-rw-r--r--net/ipv4/netfilter/ip_queue.c6
-rw-r--r--net/ipv4/proc.c9
-rw-r--r--net/ipv4/raw.c10
-rw-r--r--net/ipv4/route.c12
-rw-r--r--net/ipv4/tcp.c46
-rw-r--r--net/ipv4/tcp_cong.c2
-rw-r--r--net/ipv4/tcp_diag.c4
-rw-r--r--net/ipv4/tcp_input.c46
-rw-r--r--net/ipv4/tcp_ipv4.c1
-rw-r--r--net/ipv4/tcp_minisocks.c10
-rw-r--r--net/ipv4/tcp_output.c25
-rw-r--r--net/ipv4/udp.c6
23 files changed, 371 insertions, 227 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 1b5096a9875a..15dc4c4828de 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1250,7 +1250,8 @@ out:
return err;
}
-static struct sk_buff *inet_gso_segment(struct sk_buff *skb, u32 features)
+static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
+ netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
struct iphdr *iph;
@@ -1572,9 +1573,9 @@ static __net_init int ipv4_mib_init_net(struct net *net)
sizeof(struct icmp_mib),
__alignof__(struct icmp_mib)) < 0)
goto err_icmp_mib;
- if (snmp_mib_init((void __percpu **)net->mib.icmpmsg_statistics,
- sizeof(struct icmpmsg_mib),
- __alignof__(struct icmpmsg_mib)) < 0)
+ net->mib.icmpmsg_statistics = kzalloc(sizeof(struct icmpmsg_mib),
+ GFP_KERNEL);
+ if (!net->mib.icmpmsg_statistics)
goto err_icmpmsg_mib;
tcp_mib_init(net);
@@ -1598,7 +1599,7 @@ err_tcp_mib:
static __net_exit void ipv4_mib_exit_net(struct net *net)
{
- snmp_mib_free((void __percpu **)net->mib.icmpmsg_statistics);
+ kfree(net->mib.icmpmsg_statistics);
snmp_mib_free((void __percpu **)net->mib.icmp_statistics);
snmp_mib_free((void __percpu **)net->mib.udplite_statistics);
snmp_mib_free((void __percpu **)net->mib.udp_statistics);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 96a164aa1367..381a0876c363 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -112,11 +112,6 @@
#include <net/arp.h>
#include <net/ax25.h>
#include <net/netrom.h>
-#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
-#include <net/atmclip.h>
-struct neigh_table *clip_tbl_hook;
-EXPORT_SYMBOL(clip_tbl_hook);
-#endif
#include <asm/system.h>
#include <linux/uaccess.h>
@@ -164,7 +159,6 @@ static const struct neigh_ops arp_broken_ops = {
struct neigh_table arp_tbl = {
.family = AF_INET,
- .entry_size = sizeof(struct neighbour) + 4,
.key_len = 4,
.hash = arp_hash,
.constructor = arp_constructor,
@@ -177,7 +171,7 @@ struct neigh_table arp_tbl = {
.gc_staletime = 60 * HZ,
.reachable_time = 30 * HZ,
.delay_probe_time = 5 * HZ,
- .queue_len = 3,
+ .queue_len_bytes = 64*1024,
.ucast_probes = 3,
.mcast_probes = 3,
.anycast_delay = 1 * HZ,
@@ -283,9 +277,9 @@ static int arp_constructor(struct neighbour *neigh)
default:
break;
case ARPHRD_ROSE:
-#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+#if IS_ENABLED(CONFIG_AX25)
case ARPHRD_AX25:
-#if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
+#if IS_ENABLED(CONFIG_NETROM)
case ARPHRD_NETROM:
#endif
neigh->ops = &arp_broken_ops;
@@ -592,16 +586,18 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
struct sk_buff *skb;
struct arphdr *arp;
unsigned char *arp_ptr;
+ int hlen = LL_RESERVED_SPACE(dev);
+ int tlen = dev->needed_tailroom;
/*
* Allocate a buffer
*/
- skb = alloc_skb(arp_hdr_len(dev) + LL_ALLOCATED_SPACE(dev), GFP_ATOMIC);
+ skb = alloc_skb(arp_hdr_len(dev) + hlen + tlen, GFP_ATOMIC);
if (skb == NULL)
return NULL;
- skb_reserve(skb, LL_RESERVED_SPACE(dev));
+ skb_reserve(skb, hlen);
skb_reset_network_header(skb);
arp = (struct arphdr *) skb_put(skb, arp_hdr_len(dev));
skb->dev = dev;
@@ -633,13 +629,13 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
arp->ar_pro = htons(ETH_P_IP);
break;
-#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+#if IS_ENABLED(CONFIG_AX25)
case ARPHRD_AX25:
arp->ar_hrd = htons(ARPHRD_AX25);
arp->ar_pro = htons(AX25_P_IP);
break;
-#if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
+#if IS_ENABLED(CONFIG_NETROM)
case ARPHRD_NETROM:
arp->ar_hrd = htons(ARPHRD_NETROM);
arp->ar_pro = htons(AX25_P_IP);
@@ -647,13 +643,13 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
#endif
#endif
-#if defined(CONFIG_FDDI) || defined(CONFIG_FDDI_MODULE)
+#if IS_ENABLED(CONFIG_FDDI)
case ARPHRD_FDDI:
arp->ar_hrd = htons(ARPHRD_ETHER);
arp->ar_pro = htons(ETH_P_IP);
break;
#endif
-#if defined(CONFIG_TR) || defined(CONFIG_TR_MODULE)
+#if IS_ENABLED(CONFIG_TR)
case ARPHRD_IEEE802_TR:
arp->ar_hrd = htons(ARPHRD_IEEE802);
arp->ar_pro = htons(ETH_P_IP);
@@ -1040,7 +1036,7 @@ static int arp_req_set(struct net *net, struct arpreq *r,
return -EINVAL;
}
switch (dev->type) {
-#if defined(CONFIG_FDDI) || defined(CONFIG_FDDI_MODULE)
+#if IS_ENABLED(CONFIG_FDDI)
case ARPHRD_FDDI:
/*
* According to RFC 1390, FDDI devices should accept ARP
@@ -1286,7 +1282,7 @@ void __init arp_init(void)
}
#ifdef CONFIG_PROC_FS
-#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+#if IS_ENABLED(CONFIG_AX25)
/* ------------------------------------------------------------------------ */
/*
@@ -1334,7 +1330,7 @@ static void arp_format_neigh_entry(struct seq_file *seq,
read_lock(&n->lock);
/* Convert hardware address to XX:XX:XX:XX ... form. */
-#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+#if IS_ENABLED(CONFIG_AX25)
if (hatype == ARPHRD_AX25 || hatype == ARPHRD_NETROM)
ax2asc2((ax25_address *)n->ha, hbuffer);
else {
@@ -1347,7 +1343,7 @@ static void arp_format_neigh_entry(struct seq_file *seq,
if (k != 0)
--k;
hbuffer[k] = 0;
-#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+#if IS_ENABLED(CONFIG_AX25)
}
#endif
sprintf(tbuf, "%pI4", n->primary_key);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index b2ca095cb9da..fa057d105bef 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -304,9 +304,11 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
struct igmpv3_report *pig;
struct net *net = dev_net(dev);
struct flowi4 fl4;
+ int hlen = LL_RESERVED_SPACE(dev);
+ int tlen = dev->needed_tailroom;
while (1) {
- skb = alloc_skb(size + LL_ALLOCATED_SPACE(dev),
+ skb = alloc_skb(size + hlen + tlen,
GFP_ATOMIC | __GFP_NOWARN);
if (skb)
break;
@@ -327,7 +329,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
skb_dst_set(skb, &rt->dst);
skb->dev = dev;
- skb_reserve(skb, LL_RESERVED_SPACE(dev));
+ skb_reserve(skb, hlen);
skb_reset_network_header(skb);
pip = ip_hdr(skb);
@@ -647,6 +649,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
__be32 group = pmc ? pmc->multiaddr : 0;
struct flowi4 fl4;
__be32 dst;
+ int hlen, tlen;
if (type == IGMPV3_HOST_MEMBERSHIP_REPORT)
return igmpv3_send_report(in_dev, pmc);
@@ -661,7 +664,9 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
if (IS_ERR(rt))
return -1;
- skb = alloc_skb(IGMP_SIZE+LL_ALLOCATED_SPACE(dev), GFP_ATOMIC);
+ hlen = LL_RESERVED_SPACE(dev);
+ tlen = dev->needed_tailroom;
+ skb = alloc_skb(IGMP_SIZE + hlen + tlen, GFP_ATOMIC);
if (skb == NULL) {
ip_rt_put(rt);
return -1;
@@ -669,7 +674,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
skb_dst_set(skb, &rt->dst);
- skb_reserve(skb, LL_RESERVED_SPACE(dev));
+ skb_reserve(skb, hlen);
skb_reset_network_header(skb);
iph = ip_hdr(skb);
@@ -1574,7 +1579,7 @@ out_unlock:
* Add multicast single-source filter to the interface list
*/
static int ip_mc_add1_src(struct ip_mc_list *pmc, int sfmode,
- __be32 *psfsrc, int delta)
+ __be32 *psfsrc)
{
struct ip_sf_list *psf, *psf_prev;
@@ -1709,7 +1714,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
pmc->sfcount[sfmode]++;
err = 0;
for (i=0; i<sfcount; i++) {
- err = ip_mc_add1_src(pmc, sfmode, &psfsrc[i], delta);
+ err = ip_mc_add1_src(pmc, sfmode, &psfsrc[i]);
if (err)
break;
}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index c14d88ad348d..a598768c616c 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -588,10 +588,19 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
}
EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune);
-struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
- const gfp_t priority)
+/**
+ * inet_csk_clone_lock - clone an inet socket, and lock its clone
+ * @sk: the socket to clone
+ * @req: request_sock
+ * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
+ *
+ * Caller must unlock socket even in error path (bh_unlock_sock(newsk))
+ */
+struct sock *inet_csk_clone_lock(const struct sock *sk,
+ const struct request_sock *req,
+ const gfp_t priority)
{
- struct sock *newsk = sk_clone(sk, priority);
+ struct sock *newsk = sk_clone_lock(sk, priority);
if (newsk != NULL) {
struct inet_connection_sock *newicsk = inet_csk(newsk);
@@ -615,7 +624,7 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req,
}
return newsk;
}
-EXPORT_SYMBOL_GPL(inet_csk_clone);
+EXPORT_SYMBOL_GPL(inet_csk_clone_lock);
/*
* At this point, there should be no process reference to this
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index ccee270a9b65..b56b7ba8beeb 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -33,6 +33,7 @@
#include <linux/stddef.h>
#include <linux/inet_diag.h>
+#include <linux/sock_diag.h>
static const struct inet_diag_handler **inet_diag_table;
@@ -45,24 +46,22 @@ struct inet_diag_entry {
u16 userlocks;
};
-static struct sock *idiagnl;
-
#define INET_DIAG_PUT(skb, attrtype, attrlen) \
RTA_DATA(__RTA_PUT(skb, attrtype, attrlen))
static DEFINE_MUTEX(inet_diag_table_mutex);
-static const struct inet_diag_handler *inet_diag_lock_handler(int type)
+static const struct inet_diag_handler *inet_diag_lock_handler(int proto)
{
- if (!inet_diag_table[type])
+ if (!inet_diag_table[proto])
request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
- NETLINK_INET_DIAG, type);
+ NETLINK_SOCK_DIAG, proto);
mutex_lock(&inet_diag_table_mutex);
- if (!inet_diag_table[type])
+ if (!inet_diag_table[proto])
return ERR_PTR(-ENOENT);
- return inet_diag_table[type];
+ return inet_diag_table[proto];
}
static inline void inet_diag_unlock_handler(
@@ -72,8 +71,8 @@ static inline void inet_diag_unlock_handler(
}
static int inet_csk_diag_fill(struct sock *sk,
- struct sk_buff *skb,
- int ext, u32 pid, u32 seq, u16 nlmsg_flags,
+ struct sk_buff *skb, struct inet_diag_req *req,
+ u32 pid, u32 seq, u16 nlmsg_flags,
const struct nlmsghdr *unlh)
{
const struct inet_sock *inet = inet_sk(sk);
@@ -84,8 +83,9 @@ static int inet_csk_diag_fill(struct sock *sk,
struct inet_diag_meminfo *minfo = NULL;
unsigned char *b = skb_tail_pointer(skb);
const struct inet_diag_handler *handler;
+ int ext = req->idiag_ext;
- handler = inet_diag_table[unlh->nlmsg_type];
+ handler = inet_diag_table[req->sdiag_protocol];
BUG_ON(handler == NULL);
nlh = NLMSG_PUT(skb, pid, seq, unlh->nlmsg_type, sizeof(*r));
@@ -132,13 +132,10 @@ static int inet_csk_diag_fill(struct sock *sk,
if (r->idiag_family == AF_INET6) {
const struct ipv6_pinfo *np = inet6_sk(sk);
+ *(struct in6_addr *)r->id.idiag_src = np->rcv_saddr;
+ *(struct in6_addr *)r->id.idiag_dst = np->daddr;
if (ext & (1 << (INET_DIAG_TCLASS - 1)))
RTA_PUT_U8(skb, INET_DIAG_TCLASS, np->tclass);
-
- ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
- &np->rcv_saddr);
- ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
- &np->daddr);
}
#endif
@@ -188,8 +185,8 @@ nlmsg_failure:
}
static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
- struct sk_buff *skb, int ext, u32 pid,
- u32 seq, u16 nlmsg_flags,
+ struct sk_buff *skb, struct inet_diag_req *req,
+ u32 pid, u32 seq, u16 nlmsg_flags,
const struct nlmsghdr *unlh)
{
long tmo;
@@ -228,10 +225,8 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
const struct inet6_timewait_sock *tw6 =
inet6_twsk((struct sock *)tw);
- ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
- &tw6->tw_v6_rcv_saddr);
- ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
- &tw6->tw_v6_daddr);
+ *(struct in6_addr *)r->id.idiag_src = tw6->tw_v6_rcv_saddr;
+ *(struct in6_addr *)r->id.idiag_dst = tw6->tw_v6_daddr;
}
#endif
nlh->nlmsg_len = skb_tail_pointer(skb) - previous_tail;
@@ -242,27 +237,27 @@ nlmsg_failure:
}
static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
- int ext, u32 pid, u32 seq, u16 nlmsg_flags,
+ struct inet_diag_req *r, u32 pid, u32 seq, u16 nlmsg_flags,
const struct nlmsghdr *unlh)
{
if (sk->sk_state == TCP_TIME_WAIT)
return inet_twsk_diag_fill((struct inet_timewait_sock *)sk,
- skb, ext, pid, seq, nlmsg_flags,
+ skb, r, pid, seq, nlmsg_flags,
unlh);
- return inet_csk_diag_fill(sk, skb, ext, pid, seq, nlmsg_flags, unlh);
+ return inet_csk_diag_fill(sk, skb, r, pid, seq, nlmsg_flags, unlh);
}
static int inet_diag_get_exact(struct sk_buff *in_skb,
- const struct nlmsghdr *nlh)
+ const struct nlmsghdr *nlh,
+ struct inet_diag_req *req)
{
int err;
struct sock *sk;
- struct inet_diag_req *req = NLMSG_DATA(nlh);
struct sk_buff *rep;
struct inet_hashinfo *hashinfo;
const struct inet_diag_handler *handler;
- handler = inet_diag_lock_handler(nlh->nlmsg_type);
+ handler = inet_diag_lock_handler(req->sdiag_protocol);
if (IS_ERR(handler)) {
err = PTR_ERR(handler);
goto unlock;
@@ -271,13 +266,13 @@ static int inet_diag_get_exact(struct sk_buff *in_skb,
hashinfo = handler->idiag_hashinfo;
err = -EINVAL;
- if (req->idiag_family == AF_INET) {
+ if (req->sdiag_family == AF_INET) {
sk = inet_lookup(&init_net, hashinfo, req->id.idiag_dst[0],
req->id.idiag_dport, req->id.idiag_src[0],
req->id.idiag_sport, req->id.idiag_if);
}
#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
- else if (req->idiag_family == AF_INET6) {
+ else if (req->sdiag_family == AF_INET6) {
sk = inet6_lookup(&init_net, hashinfo,
(struct in6_addr *)req->id.idiag_dst,
req->id.idiag_dport,
@@ -309,7 +304,7 @@ static int inet_diag_get_exact(struct sk_buff *in_skb,
if (!rep)
goto out;
- err = sk_diag_fill(sk, rep, req->idiag_ext,
+ err = sk_diag_fill(sk, rep, req,
NETLINK_CB(in_skb).pid,
nlh->nlmsg_seq, 0, nlh);
if (err < 0) {
@@ -317,7 +312,7 @@ static int inet_diag_get_exact(struct sk_buff *in_skb,
kfree_skb(rep);
goto out;
}
- err = netlink_unicast(idiagnl, rep, NETLINK_CB(in_skb).pid,
+ err = netlink_unicast(sock_diag_nlsk, rep, NETLINK_CB(in_skb).pid,
MSG_DONTWAIT);
if (err > 0)
err = 0;
@@ -493,15 +488,12 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len)
static int inet_csk_diag_dump(struct sock *sk,
struct sk_buff *skb,
- struct netlink_callback *cb)
+ struct netlink_callback *cb,
+ struct inet_diag_req *r,
+ const struct nlattr *bc)
{
- struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
-
- if (nlmsg_attrlen(cb->nlh, sizeof(*r))) {
+ if (bc != NULL) {
struct inet_diag_entry entry;
- const struct nlattr *bc = nlmsg_find_attr(cb->nlh,
- sizeof(*r),
- INET_DIAG_REQ_BYTECODE);
struct inet_sock *inet = inet_sk(sk);
entry.family = sk->sk_family;
@@ -525,22 +517,19 @@ static int inet_csk_diag_dump(struct sock *sk,
return 0;
}
- return inet_csk_diag_fill(sk, skb, r->idiag_ext,
+ return inet_csk_diag_fill(sk, skb, r,
NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
}
static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
struct sk_buff *skb,
- struct netlink_callback *cb)
+ struct netlink_callback *cb,
+ struct inet_diag_req *r,
+ const struct nlattr *bc)
{
- struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
-
- if (nlmsg_attrlen(cb->nlh, sizeof(*r))) {
+ if (bc != NULL) {
struct inet_diag_entry entry;
- const struct nlattr *bc = nlmsg_find_attr(cb->nlh,
- sizeof(*r),
- INET_DIAG_REQ_BYTECODE);
entry.family = tw->tw_family;
#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
@@ -563,7 +552,7 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
return 0;
}
- return inet_twsk_diag_fill(tw, skb, r->idiag_ext,
+ return inet_twsk_diag_fill(tw, skb, r,
NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
}
@@ -607,10 +596,8 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
r->idiag_inode = 0;
#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
if (r->idiag_family == AF_INET6) {
- ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
- &inet6_rsk(req)->loc_addr);
- ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
- &inet6_rsk(req)->rmt_addr);
+ *(struct in6_addr *)r->id.idiag_src = inet6_rsk(req)->loc_addr;
+ *(struct in6_addr *)r->id.idiag_dst = inet6_rsk(req)->rmt_addr;
}
#endif
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
@@ -623,13 +610,13 @@ nlmsg_failure:
}
static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
- struct netlink_callback *cb)
+ struct netlink_callback *cb,
+ struct inet_diag_req *r,
+ const struct nlattr *bc)
{
struct inet_diag_entry entry;
- struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
struct inet_connection_sock *icsk = inet_csk(sk);
struct listen_sock *lopt;
- const struct nlattr *bc = NULL;
struct inet_sock *inet = inet_sk(sk);
int j, s_j;
int reqnum, s_reqnum;
@@ -649,9 +636,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
if (!lopt || !lopt->qlen)
goto out;
- if (nlmsg_attrlen(cb->nlh, sizeof(*r))) {
- bc = nlmsg_find_attr(cb->nlh, sizeof(*r),
- INET_DIAG_REQ_BYTECODE);
+ if (bc != NULL) {
entry.sport = inet->inet_num;
entry.userlocks = sk->sk_userlocks;
}
@@ -708,15 +693,15 @@ out:
return err;
}
-static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
+static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
+ struct inet_diag_req *r, struct nlattr *bc)
{
int i, num;
int s_i, s_num;
- struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
const struct inet_diag_handler *handler;
struct inet_hashinfo *hashinfo;
- handler = inet_diag_lock_handler(cb->nlh->nlmsg_type);
+ handler = inet_diag_lock_handler(r->sdiag_protocol);
if (IS_ERR(handler))
goto unlock;
@@ -745,6 +730,10 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
continue;
}
+ if (r->sdiag_family != AF_UNSPEC &&
+ sk->sk_family != r->sdiag_family)
+ goto next_listen;
+
if (r->id.idiag_sport != inet->inet_sport &&
r->id.idiag_sport)
goto next_listen;
@@ -754,7 +743,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
cb->args[3] > 0)
goto syn_recv;
- if (inet_csk_diag_dump(sk, skb, cb) < 0) {
+ if (inet_csk_diag_dump(sk, skb, cb, r, bc) < 0) {
spin_unlock_bh(&ilb->lock);
goto done;
}
@@ -763,7 +752,7 @@ syn_recv:
if (!(r->idiag_states & TCPF_SYN_RECV))
goto next_listen;
- if (inet_diag_dump_reqs(skb, sk, cb) < 0) {
+ if (inet_diag_dump_reqs(skb, sk, cb, r, bc) < 0) {
spin_unlock_bh(&ilb->lock);
goto done;
}
@@ -810,13 +799,16 @@ skip_listen_ht:
goto next_normal;
if (!(r->idiag_states & (1 << sk->sk_state)))
goto next_normal;
+ if (r->sdiag_family != AF_UNSPEC &&
+ sk->sk_family != r->sdiag_family)
+ goto next_normal;
if (r->id.idiag_sport != inet->inet_sport &&
r->id.idiag_sport)
goto next_normal;
if (r->id.idiag_dport != inet->inet_dport &&
r->id.idiag_dport)
goto next_normal;
- if (inet_csk_diag_dump(sk, skb, cb) < 0) {
+ if (inet_csk_diag_dump(sk, skb, cb, r, bc) < 0) {
spin_unlock_bh(lock);
goto done;
}
@@ -832,13 +824,16 @@ next_normal:
if (num < s_num)
goto next_dying;
+ if (r->sdiag_family != AF_UNSPEC &&
+ tw->tw_family != r->sdiag_family)
+ goto next_dying;
if (r->id.idiag_sport != tw->tw_sport &&
r->id.idiag_sport)
goto next_dying;
if (r->id.idiag_dport != tw->tw_dport &&
r->id.idiag_dport)
goto next_dying;
- if (inet_twsk_diag_dump(tw, skb, cb) < 0) {
+ if (inet_twsk_diag_dump(tw, skb, cb, r, bc) < 0) {
spin_unlock_bh(lock);
goto done;
}
@@ -857,10 +852,67 @@ unlock:
return skb->len;
}
-static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct nlattr *bc = NULL;
int hdrlen = sizeof(struct inet_diag_req);
+ if (nlmsg_attrlen(cb->nlh, hdrlen))
+ bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE);
+
+ return __inet_diag_dump(skb, cb, (struct inet_diag_req *)NLMSG_DATA(cb->nlh), bc);
+}
+
+static inline int inet_diag_type2proto(int type)
+{
+ switch (type) {
+ case TCPDIAG_GETSOCK:
+ return IPPROTO_TCP;
+ case DCCPDIAG_GETSOCK:
+ return IPPROTO_DCCP;
+ default:
+ return 0;
+ }
+}
+
+static int inet_diag_dump_compat(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct inet_diag_req_compat *rc = NLMSG_DATA(cb->nlh);
+ struct inet_diag_req req;
+ struct nlattr *bc = NULL;
+ int hdrlen = sizeof(struct inet_diag_req_compat);
+
+ req.sdiag_family = AF_UNSPEC; /* compatibility */
+ req.sdiag_protocol = inet_diag_type2proto(cb->nlh->nlmsg_type);
+ req.idiag_ext = rc->idiag_ext;
+ req.idiag_states = rc->idiag_states;
+ req.id = rc->id;
+
+ if (nlmsg_attrlen(cb->nlh, hdrlen))
+ bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE);
+
+ return __inet_diag_dump(skb, cb, &req, bc);
+}
+
+static int inet_diag_get_exact_compat(struct sk_buff *in_skb,
+ const struct nlmsghdr *nlh)
+{
+ struct inet_diag_req_compat *rc = NLMSG_DATA(nlh);
+ struct inet_diag_req req;
+
+ req.sdiag_family = rc->idiag_family;
+ req.sdiag_protocol = inet_diag_type2proto(nlh->nlmsg_type);
+ req.idiag_ext = rc->idiag_ext;
+ req.idiag_states = rc->idiag_states;
+ req.id = rc->id;
+
+ return inet_diag_get_exact(in_skb, nlh, &req);
+}
+
+static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+ int hdrlen = sizeof(struct inet_diag_req_compat);
+
if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX ||
nlmsg_len(nlh) < hdrlen)
return -EINVAL;
@@ -877,28 +929,54 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
return -EINVAL;
}
- return netlink_dump_start(idiagnl, skb, nlh,
- inet_diag_dump, NULL, 0);
+ return netlink_dump_start(sock_diag_nlsk, skb, nlh,
+ inet_diag_dump_compat, NULL, 0);
}
- return inet_diag_get_exact(skb, nlh);
+ return inet_diag_get_exact_compat(skb, nlh);
}
-static DEFINE_MUTEX(inet_diag_mutex);
-
-static void inet_diag_rcv(struct sk_buff *skb)
+static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
{
- mutex_lock(&inet_diag_mutex);
- netlink_rcv_skb(skb, &inet_diag_rcv_msg);
- mutex_unlock(&inet_diag_mutex);
+ int hdrlen = sizeof(struct inet_diag_req);
+
+ if (nlmsg_len(h) < hdrlen)
+ return -EINVAL;
+
+ if (h->nlmsg_flags & NLM_F_DUMP) {
+ if (nlmsg_attrlen(h, hdrlen)) {
+ struct nlattr *attr;
+ attr = nlmsg_find_attr(h, hdrlen,
+ INET_DIAG_REQ_BYTECODE);
+ if (attr == NULL ||
+ nla_len(attr) < sizeof(struct inet_diag_bc_op) ||
+ inet_diag_bc_audit(nla_data(attr), nla_len(attr)))
+ return -EINVAL;
+ }
+
+ return netlink_dump_start(sock_diag_nlsk, skb, h,
+ inet_diag_dump, NULL, 0);
+ }
+
+ return inet_diag_get_exact(skb, h, (struct inet_diag_req *)NLMSG_DATA(h));
}
+static struct sock_diag_handler inet_diag_handler = {
+ .family = AF_INET,
+ .dump = inet_diag_handler_dump,
+};
+
+static struct sock_diag_handler inet6_diag_handler = {
+ .family = AF_INET6,
+ .dump = inet_diag_handler_dump,
+};
+
int inet_diag_register(const struct inet_diag_handler *h)
{
const __u16 type = h->idiag_type;
int err = -EINVAL;
- if (type >= INET_DIAG_GETSOCK_MAX)
+ if (type >= IPPROTO_MAX)
goto out;
mutex_lock(&inet_diag_table_mutex);
@@ -917,7 +995,7 @@ void inet_diag_unregister(const struct inet_diag_handler *h)
{
const __u16 type = h->idiag_type;
- if (type >= INET_DIAG_GETSOCK_MAX)
+ if (type >= IPPROTO_MAX)
return;
mutex_lock(&inet_diag_table_mutex);
@@ -928,7 +1006,7 @@ EXPORT_SYMBOL_GPL(inet_diag_unregister);
static int __init inet_diag_init(void)
{
- const int inet_diag_table_size = (INET_DIAG_GETSOCK_MAX *
+ const int inet_diag_table_size = (IPPROTO_MAX *
sizeof(struct inet_diag_handler *));
int err = -ENOMEM;
@@ -936,25 +1014,34 @@ static int __init inet_diag_init(void)
if (!inet_diag_table)
goto out;
- idiagnl = netlink_kernel_create(&init_net, NETLINK_INET_DIAG, 0,
- inet_diag_rcv, NULL, THIS_MODULE);
- if (idiagnl == NULL)
- goto out_free_table;
- err = 0;
+ err = sock_diag_register(&inet_diag_handler);
+ if (err)
+ goto out_free_nl;
+
+ err = sock_diag_register(&inet6_diag_handler);
+ if (err)
+ goto out_free_inet;
+
+ sock_diag_register_inet_compat(inet_diag_rcv_msg_compat);
out:
return err;
-out_free_table:
+
+out_free_inet:
+ sock_diag_unregister(&inet_diag_handler);
+out_free_nl:
kfree(inet_diag_table);
goto out;
}
static void __exit inet_diag_exit(void)
{
- netlink_kernel_release(idiagnl);
+ sock_diag_unregister(&inet6_diag_handler);
+ sock_diag_unregister(&inet_diag_handler);
+ sock_diag_unregister_inet_compat(inet_diag_rcv_msg_compat);
kfree(inet_diag_table);
}
module_init(inet_diag_init);
module_exit(inet_diag_exit);
MODULE_LICENSE("GPL");
-MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_INET_DIAG);
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 0);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index d55110e93120..fe070c1593ab 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -171,7 +171,7 @@ struct pcpu_tstats {
unsigned long rx_bytes;
unsigned long tx_packets;
unsigned long tx_bytes;
-};
+} __attribute__((aligned(4*sizeof(unsigned long))));
static struct net_device_stats *ipgre_get_stats(struct net_device *dev)
{
@@ -731,7 +731,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
}
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
else if (skb->protocol == htons(ETH_P_IPV6)) {
- struct neighbour *neigh = dst_get_neighbour(skb_dst(skb));
+ struct neighbour *neigh = dst_get_neighbour_noref(skb_dst(skb));
const struct in6_addr *addr6;
int addr_type;
@@ -835,6 +835,8 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
(skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
+ if (max_headroom > dev->needed_headroom)
+ dev->needed_headroom = max_headroom;
if (!new_skb) {
ip_rt_put(rt);
dev->stats.tx_dropped++;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 0bc95f3977d2..ff302bde8890 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -206,7 +206,7 @@ static inline int ip_finish_output2(struct sk_buff *skb)
}
rcu_read_lock();
- neigh = dst_get_neighbour(dst);
+ neigh = dst_get_neighbour_noref(dst);
if (neigh) {
int res = neigh_output(neigh, skb);
@@ -319,6 +319,20 @@ int ip_output(struct sk_buff *skb)
!(IPCB(skb)->flags & IPSKB_REROUTED));
}
+/*
+ * copy saddr and daddr, possibly using 64bit load/stores
+ * Equivalent to :
+ * iph->saddr = fl4->saddr;
+ * iph->daddr = fl4->daddr;
+ */
+static void ip_copy_addrs(struct iphdr *iph, const struct flowi4 *fl4)
+{
+ BUILD_BUG_ON(offsetof(typeof(*fl4), daddr) !=
+ offsetof(typeof(*fl4), saddr) + sizeof(fl4->saddr));
+ memcpy(&iph->saddr, &fl4->saddr,
+ sizeof(fl4->saddr) + sizeof(fl4->daddr));
+}
+
int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl)
{
struct sock *sk = skb->sk;
@@ -381,8 +395,8 @@ packet_routed:
iph->frag_off = 0;
iph->ttl = ip_select_ttl(inet, &rt->dst);
iph->protocol = sk->sk_protocol;
- iph->saddr = fl4->saddr;
- iph->daddr = fl4->daddr;
+ ip_copy_addrs(iph, fl4);
+
/* Transport layer set skb->h.foo itself. */
if (inet_opt && inet_opt->opt.optlen) {
@@ -1337,8 +1351,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
ip_select_ident(iph, &rt->dst, sk);
iph->ttl = ttl;
iph->protocol = sk->sk_protocol;
- iph->saddr = fl4->saddr;
- iph->daddr = fl4->daddr;
+ ip_copy_addrs(iph, fl4);
if (opt) {
iph->ihl += opt->optlen>>2;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 09ff51bf16a4..80d5fa450210 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -55,20 +55,13 @@
/*
* SOL_IP control messages.
*/
+#define PKTINFO_SKB_CB(__skb) ((struct in_pktinfo *)((__skb)->cb))
static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
{
- struct in_pktinfo info;
- struct rtable *rt = skb_rtable(skb);
+ struct in_pktinfo info = *PKTINFO_SKB_CB(skb);
info.ipi_addr.s_addr = ip_hdr(skb)->daddr;
- if (rt) {
- info.ipi_ifindex = rt->rt_iif;
- info.ipi_spec_dst.s_addr = rt->rt_spec_dst;
- } else {
- info.ipi_ifindex = 0;
- info.ipi_spec_dst.s_addr = 0;
- }
put_cmsg(msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
}
@@ -992,20 +985,28 @@ e_inval:
}
/**
- * ip_queue_rcv_skb - Queue an skb into sock receive queue
+ * ipv4_pktinfo_prepare - transfert some info from rtable to skb
* @sk: socket
* @skb: buffer
*
- * Queues an skb into socket receive queue. If IP_CMSG_PKTINFO option
- * is not set, we drop skb dst entry now, while dst cache line is hot.
+ * To support IP_CMSG_PKTINFO option, we store rt_iif and rt_spec_dst
+ * in skb->cb[] before dst drop.
+ * This way, receiver doesnt make cache line misses to read rtable.
*/
-int ip_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+void ipv4_pktinfo_prepare(struct sk_buff *skb)
{
- if (!(inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO))
- skb_dst_drop(skb);
- return sock_queue_rcv_skb(sk, skb);
+ struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb);
+ const struct rtable *rt = skb_rtable(skb);
+
+ if (rt) {
+ pktinfo->ipi_ifindex = rt->rt_iif;
+ pktinfo->ipi_spec_dst.s_addr = rt->rt_spec_dst;
+ } else {
+ pktinfo->ipi_ifindex = 0;
+ pktinfo->ipi_spec_dst.s_addr = 0;
+ }
+ skb_dst_drop(skb);
}
-EXPORT_SYMBOL(ip_queue_rcv_skb);
int ip_setsockopt(struct sock *sk, int level,
int optname, char __user *optval, unsigned int optlen)
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 0da2afc97f32..915eb5265b2e 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -763,13 +763,15 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d
struct sk_buff *skb;
struct bootp_pkt *b;
struct iphdr *h;
+ int hlen = LL_RESERVED_SPACE(dev);
+ int tlen = dev->needed_tailroom;
/* Allocate packet */
- skb = alloc_skb(sizeof(struct bootp_pkt) + LL_ALLOCATED_SPACE(dev) + 15,
+ skb = alloc_skb(sizeof(struct bootp_pkt) + hlen + tlen + 15,
GFP_KERNEL);
if (!skb)
return;
- skb_reserve(skb, LL_RESERVED_SPACE(dev));
+ skb_reserve(skb, hlen);
b = (struct bootp_pkt *) skb_put(skb, sizeof(struct bootp_pkt));
memset(b, 0, sizeof(struct bootp_pkt));
@@ -822,8 +824,13 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d
skb->dev = dev;
skb->protocol = htons(ETH_P_IP);
if (dev_hard_header(skb, dev, ntohs(skb->protocol),
- dev->broadcast, dev->dev_addr, skb->len) < 0 ||
- dev_queue_xmit(skb) < 0)
+ dev->broadcast, dev->dev_addr, skb->len) < 0) {
+ kfree_skb(skb);
+ printk("E");
+ return;
+ }
+
+ if (dev_queue_xmit(skb) < 0)
printk("E");
}
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 065effd8349a..94906908a416 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -148,7 +148,7 @@ struct pcpu_tstats {
unsigned long rx_bytes;
unsigned long tx_packets;
unsigned long tx_bytes;
-};
+} __attribute__((aligned(4*sizeof(unsigned long))));
static struct net_device_stats *ipip_get_stats(struct net_device *dev)
{
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 76a7f07b38b6..8e54490ee3f4 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1520,7 +1520,6 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
struct mr_table *mrt;
struct vif_device *v;
int ct;
- LIST_HEAD(list);
if (event != NETDEV_UNREGISTER)
return NOTIFY_DONE;
@@ -1529,10 +1528,9 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
v = &mrt->vif_table[0];
for (ct = 0; ct < mrt->maxvif; ct++, v++) {
if (v->dev == dev)
- vif_delete(mrt, ct, 1, &list);
+ vif_delete(mrt, ct, 1, NULL);
}
}
- unregister_netdevice_many(&list);
return NOTIFY_DONE;
}
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index e59aabd0eae4..a057fe64debd 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -404,6 +404,7 @@ __ipq_rcv_skb(struct sk_buff *skb)
int status, type, pid, flags;
unsigned int nlmsglen, skblen;
struct nlmsghdr *nlh;
+ bool enable_timestamp = false;
skblen = skb->len;
if (skblen < sizeof(*nlh))
@@ -441,12 +442,13 @@ __ipq_rcv_skb(struct sk_buff *skb)
RCV_SKB_FAIL(-EBUSY);
}
} else {
- net_enable_timestamp();
+ enable_timestamp = true;
peer_pid = pid;
}
spin_unlock_bh(&queue_lock);
-
+ if (enable_timestamp)
+ net_enable_timestamp();
status = ipq_receive_peer(NLMSG_DATA(nlh), type,
nlmsglen - NLMSG_LENGTH(0));
if (status < 0)
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 466ea8bb7a4d..961eed4f510a 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -288,7 +288,7 @@ static void icmpmsg_put(struct seq_file *seq)
count = 0;
for (i = 0; i < ICMPMSG_MIB_MAX; i++) {
- val = snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics, i);
+ val = atomic_long_read(&net->mib.icmpmsg_statistics->mibs[i]);
if (val) {
type[count] = i;
vals[count++] = val;
@@ -307,6 +307,7 @@ static void icmp_put(struct seq_file *seq)
{
int i;
struct net *net = seq->private;
+ atomic_long_t *ptr = net->mib.icmpmsg_statistics->mibs;
seq_puts(seq, "\nIcmp: InMsgs InErrors");
for (i=0; icmpmibmap[i].name != NULL; i++)
@@ -319,15 +320,13 @@ static void icmp_put(struct seq_file *seq)
snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_INERRORS));
for (i=0; icmpmibmap[i].name != NULL; i++)
seq_printf(seq, " %lu",
- snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics,
- icmpmibmap[i].index));
+ atomic_long_read(ptr + icmpmibmap[i].index));
seq_printf(seq, " %lu %lu",
snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTMSGS),
snmp_fold_field((void __percpu **) net->mib.icmp_statistics, ICMP_MIB_OUTERRORS));
for (i=0; icmpmibmap[i].name != NULL; i++)
seq_printf(seq, " %lu",
- snmp_fold_field((void __percpu **) net->mib.icmpmsg_statistics,
- icmpmibmap[i].index | 0x100));
+ atomic_long_read(ptr + (icmpmibmap[i].index | 0x100)));
}
/*
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 007e2eb769d3..3ccda5ae8a27 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -292,7 +292,8 @@ static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb)
{
/* Charge it to the socket. */
- if (ip_queue_rcv_skb(sk, skb) < 0) {
+ ipv4_pktinfo_prepare(skb);
+ if (sock_queue_rcv_skb(sk, skb) < 0) {
kfree_skb(skb);
return NET_RX_DROP;
}
@@ -327,6 +328,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
unsigned int iphlen;
int err;
struct rtable *rt = *rtp;
+ int hlen, tlen;
if (length > rt->dst.dev->mtu) {
ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
@@ -336,12 +338,14 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
if (flags&MSG_PROBE)
goto out;
+ hlen = LL_RESERVED_SPACE(rt->dst.dev);
+ tlen = rt->dst.dev->needed_tailroom;
skb = sock_alloc_send_skb(sk,
- length + LL_ALLOCATED_SPACE(rt->dst.dev) + 15,
+ length + hlen + tlen + 15,
flags & MSG_DONTWAIT, &err);
if (skb == NULL)
goto error;
- skb_reserve(skb, LL_RESERVED_SPACE(rt->dst.dev));
+ skb_reserve(skb, hlen);
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 46af62363b8c..f30112f7559a 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -108,7 +108,6 @@
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif
-#include <net/atmclip.h>
#include <net/secure_seq.h>
#define RT_FL_TOS(oldflp4) \
@@ -420,7 +419,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
int len, HHUptod;
rcu_read_lock();
- n = dst_get_neighbour(&r->dst);
+ n = dst_get_neighbour_noref(&r->dst);
HHUptod = (n && (n->nud_state & NUD_CONNECTED)) ? 1 : 0;
rcu_read_unlock();
@@ -1019,23 +1018,18 @@ static int slow_chain_length(const struct rtable *head)
static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr)
{
- struct neigh_table *tbl = &arp_tbl;
static const __be32 inaddr_any = 0;
struct net_device *dev = dst->dev;
const __be32 *pkey = daddr;
struct neighbour *n;
-#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
- if (dev->type == ARPHRD_ATM)
- tbl = clip_tbl_hook;
-#endif
if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))
pkey = &inaddr_any;
- n = __ipv4_neigh_lookup(tbl, dev, *(__force u32 *)pkey);
+ n = __ipv4_neigh_lookup(&arp_tbl, dev, *(__force u32 *)pkey);
if (n)
return n;
- return neigh_create(tbl, pkey, dev);
+ return neigh_create(&arp_tbl, pkey, dev);
}
static int rt_bind_neighbour(struct rtable *rt)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 34f5db1e1c8b..43dfccce62e9 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -888,18 +888,18 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset,
}
EXPORT_SYMBOL(tcp_sendpage);
-#define TCP_PAGE(sk) (sk->sk_sndmsg_page)
-#define TCP_OFF(sk) (sk->sk_sndmsg_off)
-
-static inline int select_size(const struct sock *sk, int sg)
+static inline int select_size(const struct sock *sk, bool sg)
{
const struct tcp_sock *tp = tcp_sk(sk);
int tmp = tp->mss_cache;
if (sg) {
- if (sk_can_gso(sk))
- tmp = 0;
- else {
+ if (sk_can_gso(sk)) {
+ /* Small frames wont use a full page:
+ * Payload will immediately follow tcp header.
+ */
+ tmp = SKB_WITH_OVERHEAD(2048 - MAX_TCP_HEADER);
+ } else {
int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER);
if (tmp >= pgbreak &&
@@ -917,9 +917,9 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
struct iovec *iov;
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
- int iovlen, flags;
+ int iovlen, flags, err, copied;
int mss_now, size_goal;
- int sg, err, copied;
+ bool sg;
long timeo;
lock_sock(sk);
@@ -946,7 +946,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
goto out_err;
- sg = sk->sk_route_caps & NETIF_F_SG;
+ sg = !!(sk->sk_route_caps & NETIF_F_SG);
while (--iovlen >= 0) {
size_t seglen = iov->iov_len;
@@ -1005,8 +1005,13 @@ new_segment:
} else {
int merge = 0;
int i = skb_shinfo(skb)->nr_frags;
- struct page *page = TCP_PAGE(sk);
- int off = TCP_OFF(sk);
+ struct page *page = sk->sk_sndmsg_page;
+ int off;
+
+ if (page && page_count(page) == 1)
+ sk->sk_sndmsg_off = 0;
+
+ off = sk->sk_sndmsg_off;
if (skb_can_coalesce(skb, i, page, off) &&
off != PAGE_SIZE) {
@@ -1023,7 +1028,7 @@ new_segment:
} else if (page) {
if (off == PAGE_SIZE) {
put_page(page);
- TCP_PAGE(sk) = page = NULL;
+ sk->sk_sndmsg_page = page = NULL;
off = 0;
}
} else
@@ -1049,9 +1054,9 @@ new_segment:
/* If this page was new, give it to the
* socket so it does not get leaked.
*/
- if (!TCP_PAGE(sk)) {
- TCP_PAGE(sk) = page;
- TCP_OFF(sk) = 0;
+ if (!sk->sk_sndmsg_page) {
+ sk->sk_sndmsg_page = page;
+ sk->sk_sndmsg_off = 0;
}
goto do_error;
}
@@ -1061,15 +1066,15 @@ new_segment:
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
} else {
skb_fill_page_desc(skb, i, page, off, copy);
- if (TCP_PAGE(sk)) {
+ if (sk->sk_sndmsg_page) {
get_page(page);
} else if (off + copy < PAGE_SIZE) {
get_page(page);
- TCP_PAGE(sk) = page;
+ sk->sk_sndmsg_page = page;
}
}
- TCP_OFF(sk) = off + copy;
+ sk->sk_sndmsg_off = off + copy;
}
if (!copied)
@@ -2653,7 +2658,8 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
EXPORT_SYMBOL(compat_tcp_getsockopt);
#endif
-struct sk_buff *tcp_tso_segment(struct sk_buff *skb, u32 features)
+struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
+ netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
struct tcphdr *th;
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 850c737e08e2..fc6d475f488f 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -292,7 +292,7 @@ int tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd &&
left * tp->mss_cache < sk->sk_gso_max_size)
return 1;
- return left <= tcp_max_burst(tp);
+ return left <= tcp_max_tso_deferred_mss(tp);
}
EXPORT_SYMBOL_GPL(tcp_is_cwnd_limited);
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 939edb3b8e4d..981497795d49 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -37,7 +37,7 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
static const struct inet_diag_handler tcp_diag_handler = {
.idiag_hashinfo = &tcp_hashinfo,
.idiag_get_info = tcp_diag_get_info,
- .idiag_type = TCPDIAG_GETSOCK,
+ .idiag_type = IPPROTO_TCP,
.idiag_info_size = sizeof(struct tcp_info),
};
@@ -54,4 +54,4 @@ static void __exit tcp_diag_exit(void)
module_init(tcp_diag_init);
module_exit(tcp_diag_exit);
MODULE_LICENSE("GPL");
-MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_INET_DIAG, TCPDIAG_GETSOCK);
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 6);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 52b5c2d0ecd0..0cbb44076cfa 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2858,7 +2858,7 @@ static void tcp_try_keep_open(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
int state = TCP_CA_Open;
- if (tcp_left_out(tp) || tcp_any_retrans_done(sk) || tp->undo_marker)
+ if (tcp_left_out(tp) || tcp_any_retrans_done(sk))
state = TCP_CA_Disorder;
if (inet_csk(sk)->icsk_ca_state != state) {
@@ -2881,7 +2881,8 @@ static void tcp_try_to_open(struct sock *sk, int flag)
if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
tcp_try_keep_open(sk);
- tcp_moderate_cwnd(tp);
+ if (inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
+ tcp_moderate_cwnd(tp);
} else {
tcp_cwnd_down(sk, flag);
}
@@ -3009,11 +3010,11 @@ static void tcp_update_cwnd_in_recovery(struct sock *sk, int newly_acked_sacked,
* tcp_xmit_retransmit_queue().
*/
static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
- int newly_acked_sacked, int flag)
+ int newly_acked_sacked, bool is_dupack,
+ int flag)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
- int is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
(tcp_fackets_out(tp) > tp->reordering));
int fast_rexmit = 0, mib_idx;
@@ -3066,17 +3067,6 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
}
break;
- case TCP_CA_Disorder:
- tcp_try_undo_dsack(sk);
- if (!tp->undo_marker ||
- /* For SACK case do not Open to allow to undo
- * catching for all duplicate ACKs. */
- tcp_is_reno(tp) || tp->snd_una != tp->high_seq) {
- tp->undo_marker = 0;
- tcp_set_ca_state(sk, TCP_CA_Open);
- }
- break;
-
case TCP_CA_Recovery:
if (tcp_is_reno(tp))
tcp_reset_reno_sack(tp);
@@ -3117,7 +3107,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
tcp_add_reno_sack(sk);
}
- if (icsk->icsk_ca_state == TCP_CA_Disorder)
+ if (icsk->icsk_ca_state <= TCP_CA_Disorder)
tcp_try_undo_dsack(sk);
if (!tcp_time_to_recover(sk)) {
@@ -3681,10 +3671,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
u32 prior_snd_una = tp->snd_una;
u32 ack_seq = TCP_SKB_CB(skb)->seq;
u32 ack = TCP_SKB_CB(skb)->ack_seq;
+ bool is_dupack = false;
u32 prior_in_flight;
u32 prior_fackets;
int prior_packets;
int prior_sacked = tp->sacked_out;
+ int pkts_acked = 0;
int newly_acked_sacked = 0;
int frto_cwnd = 0;
@@ -3757,6 +3749,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
/* See if we can take anything off of the retransmit queue. */
flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
+ pkts_acked = prior_packets - tp->packets_out;
newly_acked_sacked = (prior_packets - prior_sacked) -
(tp->packets_out - tp->sacked_out);
@@ -3771,8 +3764,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if ((flag & FLAG_DATA_ACKED) && !frto_cwnd &&
tcp_may_raise_cwnd(sk, flag))
tcp_cong_avoid(sk, ack, prior_in_flight);
- tcp_fastretrans_alert(sk, prior_packets - tp->packets_out,
- newly_acked_sacked, flag);
+ is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
+ tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked,
+ is_dupack, flag);
} else {
if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
tcp_cong_avoid(sk, ack, prior_in_flight);
@@ -3784,6 +3778,10 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
return 1;
no_queue:
+ /* If data was DSACKed, see if we can undo a cwnd reduction. */
+ if (flag & FLAG_DSACKING_ACK)
+ tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked,
+ is_dupack, flag);
/* If this ack opens up a zero window, clear backoff. It was
* being used to time the probes, and is probably far higher than
* it needs to be for normal retransmission.
@@ -3797,10 +3795,14 @@ invalid_ack:
return -1;
old_ack:
+ /* If data was SACKed, tag it and see if we should send more data.
+ * If data was DSACKed, see if we can undo a cwnd reduction.
+ */
if (TCP_SKB_CB(skb)->sacked) {
- tcp_sacktag_write_queue(sk, skb, prior_snd_una);
- if (icsk->icsk_ca_state == TCP_CA_Open)
- tcp_try_keep_open(sk);
+ flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
+ newly_acked_sacked = tp->sacked_out - prior_sacked;
+ tcp_fastretrans_alert(sk, pkts_acked, newly_acked_sacked,
+ is_dupack, flag);
}
SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
@@ -5809,6 +5811,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
goto discard;
if (th->syn) {
+ if (th->fin)
+ goto discard;
if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
return 1;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a9db4b1a2215..c4b8b09db9f5 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1511,6 +1511,7 @@ exit:
return NULL;
put_and_exit:
tcp_clear_xmit_timers(newsk);
+ tcp_cleanup_congestion_control(newsk);
bh_unlock_sock(newsk);
sock_put(newsk);
goto exit;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 66363b689ad6..9dc146e5ed65 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -343,8 +343,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot);
tw6 = inet6_twsk((struct sock *)tw);
- ipv6_addr_copy(&tw6->tw_v6_daddr, &np->daddr);
- ipv6_addr_copy(&tw6->tw_v6_rcv_saddr, &np->rcv_saddr);
+ tw6->tw_v6_daddr = np->daddr;
+ tw6->tw_v6_rcv_saddr = np->rcv_saddr;
tw->tw_tclass = np->tclass;
tw->tw_ipv6only = np->ipv6only;
}
@@ -425,7 +425,7 @@ static inline void TCP_ECN_openreq_child(struct tcp_sock *tp,
*/
struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct sk_buff *skb)
{
- struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC);
+ struct sock *newsk = inet_csk_clone_lock(sk, req, GFP_ATOMIC);
if (newsk != NULL) {
const struct inet_request_sock *ireq = inet_rsk(req);
@@ -495,7 +495,9 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
newtp->frto_counter = 0;
newtp->frto_highmark = 0;
- newicsk->icsk_ca_ops = &tcp_init_congestion_ops;
+ if (newicsk->icsk_ca_ops != &tcp_init_congestion_ops &&
+ !try_module_get(newicsk->icsk_ca_ops->owner))
+ newicsk->icsk_ca_ops = &tcp_init_congestion_ops;
tcp_set_ca_state(newsk, TCP_CA_Open);
tcp_init_xmit_timers(newsk);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 63170e297540..cf3068038942 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1093,6 +1093,13 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
{
int i, k, eat;
+ eat = min_t(int, len, skb_headlen(skb));
+ if (eat) {
+ __skb_pull(skb, eat);
+ len -= eat;
+ if (!len)
+ return;
+ }
eat = len;
k = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
@@ -1124,11 +1131,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
return -ENOMEM;
- /* If len == headlen, we avoid __skb_pull to preserve alignment. */
- if (unlikely(len < skb_headlen(skb)))
- __skb_pull(skb, len);
- else
- __pskb_trim_head(skb, len - skb_headlen(skb));
+ __pskb_trim_head(skb, len);
TCP_SKB_CB(skb)->seq += len;
skb->ip_summed = CHECKSUM_PARTIAL;
@@ -1581,7 +1584,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
* frame, so if we have space for more than 3 frames
* then send now.
*/
- if (limit > tcp_max_burst(tp) * tp->mss_cache)
+ if (limit > tcp_max_tso_deferred_mss(tp) * tp->mss_cache)
goto send_now;
}
@@ -2147,7 +2150,15 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
*/
TCP_SKB_CB(skb)->when = tcp_time_stamp;
- err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
+ /* make sure skb->data is aligned on arches that require it */
+ if (unlikely(NET_IP_ALIGN && ((unsigned long)skb->data & 3))) {
+ struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
+ -ENOBUFS;
+ } else {
+ err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
+ }
if (err == 0) {
/* Update global TCP statistics. */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 5a65eeac1d29..ad481b32f1e3 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1358,7 +1358,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
if (inet_sk(sk)->inet_daddr)
sock_rps_save_rxhash(sk, skb);
- rc = ip_queue_rcv_skb(sk, skb);
+ rc = sock_queue_rcv_skb(sk, skb);
if (rc < 0) {
int is_udplite = IS_UDPLITE(sk);
@@ -1474,6 +1474,7 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
rc = 0;
+ ipv4_pktinfo_prepare(skb);
bh_lock_sock(sk);
if (!sock_owned_by_user(sk))
rc = __udp_queue_rcv_skb(sk, skb);
@@ -2247,7 +2248,8 @@ int udp4_ufo_send_check(struct sk_buff *skb)
return 0;
}
-struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, u32 features)
+struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
+ netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
unsigned int mss;
OpenPOWER on IntegriCloud