summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c14
-rw-r--r--net/ipv4/esp4_offload.c4
-rw-r--r--net/ipv4/fou.c20
-rw-r--r--net/ipv4/gre_offload.c8
-rw-r--r--net/ipv4/icmp.c9
-rw-r--r--net/ipv4/inet_fragment.c1
-rw-r--r--net/ipv4/ip_gre.c2
-rw-r--r--net/ipv4/ip_input.c147
-rw-r--r--net/ipv4/ip_output.c22
-rw-r--r--net/ipv4/ipmr.c22
-rw-r--r--net/ipv4/ipmr_base.c1
-rw-r--r--net/ipv4/netfilter/nf_log_ipv4.c8
-rw-r--r--net/ipv4/ping.c10
-rw-r--r--net/ipv4/proc.c2
-rw-r--r--net/ipv4/raw.c11
-rw-r--r--net/ipv4/tcp.c20
-rw-r--r--net/ipv4/tcp_bbr.c6
-rw-r--r--net/ipv4/tcp_input.c48
-rw-r--r--net/ipv4/tcp_ipv4.c3
-rw-r--r--net/ipv4/tcp_minisocks.c229
-rw-r--r--net/ipv4/tcp_offload.c17
-rw-r--r--net/ipv4/tcp_output.c14
-rw-r--r--net/ipv4/tcp_rate.c4
-rw-r--r--net/ipv4/udp.c11
-rw-r--r--net/ipv4/udp_offload.c13
25 files changed, 389 insertions, 257 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index b403499fdabe..f2a0a3bab6b5 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -229,6 +229,7 @@ int inet_listen(struct socket *sock, int backlog)
err = inet_csk_listen_start(sk, backlog);
if (err)
goto out;
+ tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_LISTEN_CB, 0, NULL);
}
sk->sk_max_ack_backlog = backlog;
err = 0;
@@ -1384,12 +1385,12 @@ out:
}
EXPORT_SYMBOL(inet_gso_segment);
-struct sk_buff **inet_gro_receive(struct sk_buff **head, struct sk_buff *skb)
+struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
{
const struct net_offload *ops;
- struct sk_buff **pp = NULL;
- struct sk_buff *p;
+ struct sk_buff *pp = NULL;
const struct iphdr *iph;
+ struct sk_buff *p;
unsigned int hlen;
unsigned int off;
unsigned int id;
@@ -1425,7 +1426,7 @@ struct sk_buff **inet_gro_receive(struct sk_buff **head, struct sk_buff *skb)
flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id & ~IP_DF));
id >>= 16;
- for (p = *head; p; p = p->next) {
+ list_for_each_entry(p, head, list) {
struct iphdr *iph2;
u16 flush_id;
@@ -1505,8 +1506,8 @@ out:
}
EXPORT_SYMBOL(inet_gro_receive);
-static struct sk_buff **ipip_gro_receive(struct sk_buff **head,
- struct sk_buff *skb)
+static struct sk_buff *ipip_gro_receive(struct list_head *head,
+ struct sk_buff *skb)
{
if (NAPI_GRO_CB(skb)->encap_mark) {
NAPI_GRO_CB(skb)->flush = 1;
@@ -1882,6 +1883,7 @@ fs_initcall(ipv4_offload_init);
static struct packet_type ip_packet_type __read_mostly = {
.type = cpu_to_be16(ETH_P_IP),
.func = ip_rcv,
+ .list_func = ip_list_rcv,
};
static int __init inet_init(void)
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 7cf755ef9efb..bbeecd13e534 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -28,8 +28,8 @@
#include <linux/spinlock.h>
#include <net/udp.h>
-static struct sk_buff **esp4_gro_receive(struct sk_buff **head,
- struct sk_buff *skb)
+static struct sk_buff *esp4_gro_receive(struct list_head *head,
+ struct sk_buff *skb)
{
int offset = skb_gro_offset(skb);
struct xfrm_offload *xo;
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index c9ec1603666b..500a59906b87 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -224,14 +224,14 @@ drop:
return 0;
}
-static struct sk_buff **fou_gro_receive(struct sock *sk,
- struct sk_buff **head,
- struct sk_buff *skb)
+static struct sk_buff *fou_gro_receive(struct sock *sk,
+ struct list_head *head,
+ struct sk_buff *skb)
{
- const struct net_offload *ops;
- struct sk_buff **pp = NULL;
u8 proto = fou_from_sock(sk)->protocol;
const struct net_offload **offloads;
+ const struct net_offload *ops;
+ struct sk_buff *pp = NULL;
/* We can clear the encap_mark for FOU as we are essentially doing
* one of two possible things. We are either adding an L4 tunnel
@@ -305,13 +305,13 @@ static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off,
return guehdr;
}
-static struct sk_buff **gue_gro_receive(struct sock *sk,
- struct sk_buff **head,
- struct sk_buff *skb)
+static struct sk_buff *gue_gro_receive(struct sock *sk,
+ struct list_head *head,
+ struct sk_buff *skb)
{
const struct net_offload **offloads;
const struct net_offload *ops;
- struct sk_buff **pp = NULL;
+ struct sk_buff *pp = NULL;
struct sk_buff *p;
struct guehdr *guehdr;
size_t len, optlen, hdrlen, off;
@@ -397,7 +397,7 @@ static struct sk_buff **gue_gro_receive(struct sock *sk,
skb_gro_pull(skb, hdrlen);
- for (p = *head; p; p = p->next) {
+ list_for_each_entry(p, head, list) {
const struct guehdr *guehdr2;
if (!NAPI_GRO_CB(p)->same_flow)
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 6a7d980105f6..6c63524f598a 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -108,10 +108,10 @@ out:
return segs;
}
-static struct sk_buff **gre_gro_receive(struct sk_buff **head,
- struct sk_buff *skb)
+static struct sk_buff *gre_gro_receive(struct list_head *head,
+ struct sk_buff *skb)
{
- struct sk_buff **pp = NULL;
+ struct sk_buff *pp = NULL;
struct sk_buff *p;
const struct gre_base_hdr *greh;
unsigned int hlen, grehlen;
@@ -182,7 +182,7 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head,
null_compute_pseudo);
}
- for (p = *head; p; p = p->next) {
+ list_for_each_entry(p, head, list) {
const struct gre_base_hdr *greh2;
if (!NAPI_GRO_CB(p)->same_flow)
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 1617604c9284..695979b7ef6d 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -429,14 +429,11 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
icmp_param->data.icmph.checksum = 0;
+ ipcm_init(&ipc);
inet->tos = ip_hdr(skb)->tos;
sk->sk_mark = mark;
daddr = ipc.addr = ip_hdr(skb)->saddr;
saddr = fib_compute_spec_dst(skb);
- ipc.opt = NULL;
- ipc.tx_flags = 0;
- ipc.ttl = 0;
- ipc.tos = -1;
if (icmp_param->replyopts.opt.opt.optlen) {
ipc.opt = &icmp_param->replyopts.opt;
@@ -710,11 +707,9 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
icmp_param.offset = skb_network_offset(skb_in);
inet_sk(sk)->tos = tos;
sk->sk_mark = mark;
+ ipcm_init(&ipc);
ipc.addr = iph->saddr;
ipc.opt = &icmp_param.replyopts.opt;
- ipc.tx_flags = 0;
- ipc.ttl = 0;
- ipc.tos = -1;
rt = icmp_route_lookup(net, &fl4, skb_in, iph, saddr, tos, mark,
type, code, &icmp_param);
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 1e4cf3ab560f..d3162baca9f1 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -20,6 +20,7 @@
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/slab.h>
+#include <linux/rhashtable.h>
#include <net/sock.h>
#include <net/inet_frag.h>
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 2d8efeecf619..c8ca5d8f0f75 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -587,6 +587,8 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
goto err_free_skb;
key = &tun_info->key;
+ if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT))
+ goto err_free_rt;
md = ip_tunnel_info_opts(tun_info);
if (!md)
goto err_free_rt;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 7582713dd18f..3196cf58f418 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -307,7 +307,8 @@ drop:
return true;
}
-static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+static int ip_rcv_finish_core(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
{
const struct iphdr *iph = ip_hdr(skb);
int (*edemux)(struct sk_buff *skb);
@@ -315,13 +316,6 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
struct rtable *rt;
int err;
- /* if ingress device is enslaved to an L3 master device pass the
- * skb to its handler for processing
- */
- skb = l3mdev_ip_rcv(skb);
- if (!skb)
- return NET_RX_SUCCESS;
-
if (net->ipv4.sysctl_ip_early_demux &&
!skb_dst(skb) &&
!skb->sk &&
@@ -393,7 +387,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
goto drop;
}
- return dst_input(skb);
+ return NET_RX_SUCCESS;
drop:
kfree_skb(skb);
@@ -405,13 +399,29 @@ drop_error:
goto drop;
}
+static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+ int ret;
+
+ /* if ingress device is enslaved to an L3 master device pass the
+ * skb to its handler for processing
+ */
+ skb = l3mdev_ip_rcv(skb);
+ if (!skb)
+ return NET_RX_SUCCESS;
+
+ ret = ip_rcv_finish_core(net, sk, skb);
+ if (ret != NET_RX_DROP)
+ ret = dst_input(skb);
+ return ret;
+}
+
/*
* Main IP Receive routine.
*/
-int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
+static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
{
const struct iphdr *iph;
- struct net *net;
u32 len;
/* When the interface is in promisc. mode, drop all the crap
@@ -421,7 +431,6 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
goto drop;
- net = dev_net(dev);
__IP_UPD_PO_STATS(net, IPSTATS_MIB_IN, skb->len);
skb = skb_share_check(skb, GFP_ATOMIC);
@@ -489,9 +498,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
/* Must drop socket now because of tproxy. */
skb_orphan(skb);
- return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
- net, NULL, skb, dev, NULL,
- ip_rcv_finish);
+ return skb;
csum_error:
__IP_INC_STATS(net, IPSTATS_MIB_CSUMERRORS);
@@ -500,5 +507,113 @@ inhdr_error:
drop:
kfree_skb(skb);
out:
- return NET_RX_DROP;
+ return NULL;
+}
+
+/*
+ * IP receive entry point
+ */
+int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
+ struct net_device *orig_dev)
+{
+ struct net *net = dev_net(dev);
+
+ skb = ip_rcv_core(skb, net);
+ if (skb == NULL)
+ return NET_RX_DROP;
+ return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
+ net, NULL, skb, dev, NULL,
+ ip_rcv_finish);
+}
+
+static void ip_sublist_rcv_finish(struct list_head *head)
+{
+ struct sk_buff *skb, *next;
+
+ list_for_each_entry_safe(skb, next, head, list) {
+ list_del(&skb->list);
+ /* Handle ip{6}_forward case, as sch_direct_xmit have
+ * another kind of SKB-list usage (see validate_xmit_skb_list)
+ */
+ skb->next = NULL;
+ dst_input(skb);
+ }
+}
+
+static void ip_list_rcv_finish(struct net *net, struct sock *sk,
+ struct list_head *head)
+{
+ struct dst_entry *curr_dst = NULL;
+ struct sk_buff *skb, *next;
+ struct list_head sublist;
+
+ INIT_LIST_HEAD(&sublist);
+ list_for_each_entry_safe(skb, next, head, list) {
+ struct dst_entry *dst;
+
+ list_del(&skb->list);
+ /* if ingress device is enslaved to an L3 master device pass the
+ * skb to its handler for processing
+ */
+ skb = l3mdev_ip_rcv(skb);
+ if (!skb)
+ continue;
+ if (ip_rcv_finish_core(net, sk, skb) == NET_RX_DROP)
+ continue;
+
+ dst = skb_dst(skb);
+ if (curr_dst != dst) {
+ /* dispatch old sublist */
+ if (!list_empty(&sublist))
+ ip_sublist_rcv_finish(&sublist);
+ /* start new sublist */
+ INIT_LIST_HEAD(&sublist);
+ curr_dst = dst;
+ }
+ list_add_tail(&skb->list, &sublist);
+ }
+ /* dispatch final sublist */
+ ip_sublist_rcv_finish(&sublist);
+}
+
+static void ip_sublist_rcv(struct list_head *head, struct net_device *dev,
+ struct net *net)
+{
+ NF_HOOK_LIST(NFPROTO_IPV4, NF_INET_PRE_ROUTING, net, NULL,
+ head, dev, NULL, ip_rcv_finish);
+ ip_list_rcv_finish(net, NULL, head);
+}
+
+/* Receive a list of IP packets */
+void ip_list_rcv(struct list_head *head, struct packet_type *pt,
+ struct net_device *orig_dev)
+{
+ struct net_device *curr_dev = NULL;
+ struct net *curr_net = NULL;
+ struct sk_buff *skb, *next;
+ struct list_head sublist;
+
+ INIT_LIST_HEAD(&sublist);
+ list_for_each_entry_safe(skb, next, head, list) {
+ struct net_device *dev = skb->dev;
+ struct net *net = dev_net(dev);
+
+ list_del(&skb->list);
+ skb = ip_rcv_core(skb, net);
+ if (skb == NULL)
+ continue;
+
+ if (curr_dev != dev || curr_net != net) {
+ /* dispatch old sublist */
+ if (!list_empty(&sublist))
+ ip_sublist_rcv(&sublist, curr_dev, curr_net);
+ /* start new sublist */
+ INIT_LIST_HEAD(&sublist);
+ curr_dev = dev;
+ curr_net = net;
+ }
+ list_add_tail(&skb->list, &sublist);
+ }
+ /* dispatch final sublist */
+ ip_sublist_rcv(&sublist, curr_dev, curr_net);
}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index b3308e9d9762..e2b6bd478afb 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -423,7 +423,8 @@ static void ip_copy_addrs(struct iphdr *iph, const struct flowi4 *fl4)
}
/* Note: skb->sk can be different from sk, in case of tunnels */
-int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)
+int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
+ __u8 tos)
{
struct inet_sock *inet = inet_sk(sk);
struct net *net = sock_net(sk);
@@ -462,7 +463,7 @@ int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)
inet->inet_dport,
inet->inet_sport,
sk->sk_protocol,
- RT_CONN_FLAGS(sk),
+ RT_CONN_FLAGS_TOS(sk, tos),
sk->sk_bound_dev_if);
if (IS_ERR(rt))
goto no_route;
@@ -478,7 +479,7 @@ packet_routed:
skb_push(skb, sizeof(struct iphdr) + (inet_opt ? inet_opt->opt.optlen : 0));
skb_reset_network_header(skb);
iph = ip_hdr(skb);
- *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
+ *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (tos & 0xff));
if (ip_dont_fragment(sk, &rt->dst) && !skb->ignore_df)
iph->frag_off = htons(IP_DF);
else
@@ -511,7 +512,7 @@ no_route:
kfree_skb(skb);
return -EHOSTUNREACH;
}
-EXPORT_SYMBOL(ip_queue_xmit);
+EXPORT_SYMBOL(__ip_queue_xmit);
static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
{
@@ -1145,14 +1146,15 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
cork->fragsize = ip_sk_use_pmtu(sk) ?
dst_mtu(&rt->dst) : rt->dst.dev->mtu;
- cork->gso_size = sk->sk_type == SOCK_DGRAM &&
- sk->sk_protocol == IPPROTO_UDP ? ipc->gso_size : 0;
+ cork->gso_size = ipc->gso_size;
cork->dst = &rt->dst;
cork->length = 0;
cork->ttl = ipc->ttl;
cork->tos = ipc->tos;
cork->priority = ipc->priority;
- cork->tx_flags = ipc->tx_flags;
+ cork->transmit_time = ipc->sockc.transmit_time;
+ cork->tx_flags = 0;
+ sock_tx_timestamp(sk, ipc->sockc.tsflags, &cork->tx_flags);
return 0;
}
@@ -1413,6 +1415,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
skb->priority = (cork->tos != -1) ? cork->priority: sk->sk_priority;
skb->mark = sk->sk_mark;
+ skb->tstamp = cork->transmit_time;
/*
* Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec
* on dst refcount
@@ -1545,11 +1548,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
if (__ip_options_echo(net, &replyopts.opt.opt, skb, sopt))
return;
+ ipcm_init(&ipc);
ipc.addr = daddr;
- ipc.opt = NULL;
- ipc.tx_flags = 0;
- ipc.ttl = 0;
- ipc.tos = -1;
if (replyopts.opt.opt.optlen) {
ipc.opt = &replyopts.opt;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 9f79b9803a16..5660adcf7a04 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -60,6 +60,7 @@
#include <linux/netfilter_ipv4.h>
#include <linux/compat.h>
#include <linux/export.h>
+#include <linux/rhashtable.h>
#include <net/ip_tunnels.h>
#include <net/checksum.h>
#include <net/netlink.h>
@@ -1051,7 +1052,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
struct sk_buff *skb;
int ret;
- if (assert == IGMPMSG_WHOLEPKT)
+ if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE)
skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
else
skb = alloc_skb(128, GFP_ATOMIC);
@@ -1059,7 +1060,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
if (!skb)
return -ENOBUFS;
- if (assert == IGMPMSG_WHOLEPKT) {
+ if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE) {
/* Ugly, but we have no choice with this interface.
* Duplicate old header, fix ihl, length etc.
* And all this only to mangle msg->im_msgtype and
@@ -1070,9 +1071,12 @@ static int ipmr_cache_report(struct mr_table *mrt,
skb_reset_transport_header(skb);
msg = (struct igmpmsg *)skb_network_header(skb);
memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
- msg->im_msgtype = IGMPMSG_WHOLEPKT;
+ msg->im_msgtype = assert;
msg->im_mbz = 0;
- msg->im_vif = mrt->mroute_reg_vif_num;
+ if (assert == IGMPMSG_WRVIFWHOLE)
+ msg->im_vif = vifi;
+ else
+ msg->im_vif = mrt->mroute_reg_vif_num;
ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
sizeof(struct iphdr));
@@ -1371,6 +1375,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
struct mr_table *mrt;
struct vifctl vif;
struct mfcctl mfc;
+ bool do_wrvifwhole;
u32 uval;
/* There's one exception to the lock - MRT_DONE which needs to unlock */
@@ -1501,10 +1506,12 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
break;
}
+ do_wrvifwhole = (val == IGMPMSG_WRVIFWHOLE);
val = !!val;
if (val != mrt->mroute_do_pim) {
mrt->mroute_do_pim = val;
mrt->mroute_do_assert = val;
+ mrt->mroute_do_wrvifwhole = do_wrvifwhole;
}
break;
case MRT_TABLE:
@@ -1982,6 +1989,9 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt,
MFC_ASSERT_THRESH)) {
c->_c.mfc_un.res.last_assert = jiffies;
ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
+ if (mrt->mroute_do_wrvifwhole)
+ ipmr_cache_report(mrt, skb, true_vifi,
+ IGMPMSG_WRVIFWHOLE);
}
goto dont_forward;
}
@@ -2658,7 +2668,9 @@ static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb)
mrt->mroute_reg_vif_num) ||
nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_ASSERT,
mrt->mroute_do_assert) ||
- nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_PIM, mrt->mroute_do_pim))
+ nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_PIM, mrt->mroute_do_pim) ||
+ nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_WRVIFWHOLE,
+ mrt->mroute_do_wrvifwhole))
return false;
return true;
diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
index cafb0506c8c9..1ad9aa62a97b 100644
--- a/net/ipv4/ipmr_base.c
+++ b/net/ipv4/ipmr_base.c
@@ -2,6 +2,7 @@
* Common logic shared by IPv4 [ipmr] and IPv6 [ip6mr] implementation
*/
+#include <linux/rhashtable.h>
#include <linux/mroute_base.h>
/* Sets everything common except 'dev', since that is done under locking */
diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c
index 4388de0e5380..1e6f28c97d3a 100644
--- a/net/ipv4/netfilter/nf_log_ipv4.c
+++ b/net/ipv4/netfilter/nf_log_ipv4.c
@@ -35,7 +35,7 @@ static const struct nf_loginfo default_loginfo = {
};
/* One level of recursion won't kill us */
-static void dump_ipv4_packet(struct nf_log_buf *m,
+static void dump_ipv4_packet(struct net *net, struct nf_log_buf *m,
const struct nf_loginfo *info,
const struct sk_buff *skb, unsigned int iphoff)
{
@@ -183,7 +183,7 @@ static void dump_ipv4_packet(struct nf_log_buf *m,
/* Max length: 3+maxlen */
if (!iphoff) { /* Only recurse once. */
nf_log_buf_add(m, "[");
- dump_ipv4_packet(m, info, skb,
+ dump_ipv4_packet(net, m, info, skb,
iphoff + ih->ihl*4+sizeof(_icmph));
nf_log_buf_add(m, "] ");
}
@@ -251,7 +251,7 @@ static void dump_ipv4_packet(struct nf_log_buf *m,
/* Max length: 15 "UID=4294967295 " */
if ((logflags & NF_LOG_UID) && !iphoff)
- nf_log_dump_sk_uid_gid(m, skb->sk);
+ nf_log_dump_sk_uid_gid(net, m, skb->sk);
/* Max length: 16 "MARK=0xFFFFFFFF " */
if (!iphoff && skb->mark)
@@ -333,7 +333,7 @@ static void nf_log_ip_packet(struct net *net, u_int8_t pf,
if (in != NULL)
dump_ipv4_mac_header(m, loginfo, skb);
- dump_ipv4_packet(m, loginfo, skb, 0);
+ dump_ipv4_packet(net, m, loginfo, skb, 0);
nf_log_buf_close(m);
}
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 2ed64bca54e3..b54c964ad925 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -739,13 +739,7 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
/* no remote port */
}
- ipc.sockc.tsflags = sk->sk_tsflags;
- ipc.addr = inet->inet_saddr;
- ipc.opt = NULL;
- ipc.oif = sk->sk_bound_dev_if;
- ipc.tx_flags = 0;
- ipc.ttl = 0;
- ipc.tos = -1;
+ ipcm_init_sk(&ipc, inet);
if (msg->msg_controllen) {
err = ip_cmsg_send(sk, msg, &ipc, false);
@@ -769,8 +763,6 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
rcu_read_unlock();
}
- sock_tx_timestamp(sk, ipc.sockc.tsflags, &ipc.tx_flags);
-
saddr = ipc.addr;
ipc.addr = faddr = daddr;
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 77350c1256ce..b46e4cf9a55a 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -287,6 +287,8 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPDelivered", LINUX_MIB_TCPDELIVERED),
SNMP_MIB_ITEM("TCPDeliveredCE", LINUX_MIB_TCPDELIVEREDCE),
SNMP_MIB_ITEM("TCPAckCompressed", LINUX_MIB_TCPACKCOMPRESSED),
+ SNMP_MIB_ITEM("TCPZeroWindowDrop", LINUX_MIB_TCPZEROWINDOWDROP),
+ SNMP_MIB_ITEM("TCPRcvQDrop", LINUX_MIB_TCPRCVQDROP),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index abb3c9490c55..33df4d76db2d 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -381,6 +381,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
+ skb->tstamp = sockc->transmit_time;
skb_dst_set(skb, &rt->dst);
*rtp = NULL;
@@ -561,13 +562,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
daddr = inet->inet_daddr;
}
- ipc.sockc.tsflags = sk->sk_tsflags;
- ipc.addr = inet->inet_saddr;
- ipc.opt = NULL;
- ipc.tx_flags = 0;
- ipc.ttl = 0;
- ipc.tos = -1;
- ipc.oif = sk->sk_bound_dev_if;
+ ipcm_init_sk(&ipc, inet);
if (msg->msg_controllen) {
err = ip_cmsg_send(sk, msg, &ipc, false);
@@ -670,8 +665,6 @@ back_from_confirm:
&rt, msg->msg_flags, &ipc.sockc);
else {
- sock_tx_timestamp(sk, ipc.sockc.tsflags, &ipc.tx_flags);
-
if (!ipc.addr)
ipc.addr = fl4.daddr;
lock_sock(sk);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4491faf83f4f..bce53b1728a6 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -817,8 +817,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
* This occurs when user tries to read
* from never connected socket.
*/
- if (!sock_flag(sk, SOCK_DONE))
- ret = -ENOTCONN;
+ ret = -ENOTCONN;
break;
}
if (!timeo) {
@@ -1241,7 +1240,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
/* 'common' sending to sendq */
}
- sockc.tsflags = sk->sk_tsflags;
+ sockcm_init(&sockc, sk);
if (msg->msg_controllen) {
err = sock_cmsg_send(sk, msg, &sockc);
if (unlikely(err)) {
@@ -1275,9 +1274,6 @@ restart:
int linear;
new_segment:
- /* Allocate new segment. If the interface is SG,
- * allocate skb fitting to single page.
- */
if (!sk_stream_memory_free(sk))
goto wait_for_sndbuf;
@@ -2042,13 +2038,10 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
break;
if (sk->sk_state == TCP_CLOSE) {
- if (!sock_flag(sk, SOCK_DONE)) {
- /* This occurs when user tries to read
- * from never connected socket.
- */
- copied = -ENOTCONN;
- break;
- }
+ /* This occurs when user tries to read
+ * from never connected socket.
+ */
+ copied = -ENOTCONN;
break;
}
@@ -2576,6 +2569,7 @@ int tcp_disconnect(struct sock *sk, int flags)
sk->sk_shutdown = 0;
sock_reset_flag(sk, SOCK_DONE);
tp->srtt_us = 0;
+ tp->rcv_rtt_last_tsecr = 0;
tp->write_seq += tp->max_window + 2;
if (tp->write_seq == 0)
tp->write_seq = 1;
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 58e2f479ffb4..3b5f45b9e81e 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -205,7 +205,11 @@ static u32 bbr_bw(const struct sock *sk)
*/
static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain)
{
- rate *= tcp_mss_to_mtu(sk, tcp_sk(sk)->mss_cache);
+ unsigned int mss = tcp_sk(sk)->mss_cache;
+
+ if (!tcp_needs_internal_pacing(sk))
+ mss = tcp_mss_to_mtu(sk, mss);
+ rate *= mss;
rate *= gain;
rate >>= BBR_SCALE;
rate *= USEC_PER_SEC;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 8e5522c6833a..91dbb9afb950 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -78,6 +78,7 @@
#include <linux/errqueue.h>
#include <trace/events/tcp.h>
#include <linux/static_key.h>
+#include <net/busy_poll.h>
int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
@@ -582,9 +583,12 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
{
struct tcp_sock *tp = tcp_sk(sk);
- if (tp->rx_opt.rcv_tsecr &&
- (TCP_SKB_CB(skb)->end_seq -
- TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss)) {
+ if (tp->rx_opt.rcv_tsecr == tp->rcv_rtt_last_tsecr)
+ return;
+ tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr;
+
+ if (TCP_SKB_CB(skb)->end_seq -
+ TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss) {
u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
u32 delta_us;
@@ -3458,7 +3462,7 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
static void tcp_store_ts_recent(struct tcp_sock *tp)
{
tp->rx_opt.ts_recent = tp->rx_opt.rcv_tsval;
- tp->rx_opt.ts_recent_stamp = get_seconds();
+ tp->rx_opt.ts_recent_stamp = ktime_get_seconds();
}
static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
@@ -4339,6 +4343,11 @@ static bool tcp_try_coalesce(struct sock *sk,
if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq)
return false;
+#ifdef CONFIG_TLS_DEVICE
+ if (from->decrypted != to->decrypted)
+ return false;
+#endif
+
if (!skb_try_coalesce(to, from, fragstolen, &delta))
return false;
@@ -4617,8 +4626,10 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
skb->data_len = data_len;
skb->len = size;
- if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
+ if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
goto err_free;
+ }
err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
if (err)
@@ -4674,18 +4685,21 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
* Out of sequence packets to the out_of_order_queue.
*/
if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
- if (tcp_receive_window(tp) == 0)
+ if (tcp_receive_window(tp) == 0) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP);
goto out_of_window;
+ }
/* Ok. In sequence. In window. */
queue_and_out:
if (skb_queue_len(&sk->sk_receive_queue) == 0)
sk_forced_mem_schedule(sk, skb->truesize);
- else if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
+ else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVQDROP);
goto drop;
+ }
eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
- tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
if (skb->len)
tcp_event_data_recv(sk, skb);
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
@@ -4741,8 +4755,10 @@ drop:
/* If window is closed, drop tail of packet. But after
* remembering D-SACK for its head made in previous line.
*/
- if (!tcp_receive_window(tp))
+ if (!tcp_receive_window(tp)) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPZEROWINDOWDROP);
goto out_of_window;
+ }
goto queue_and_out;
}
@@ -4860,6 +4876,9 @@ restart:
break;
memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
+#ifdef CONFIG_TLS_DEVICE
+ nskb->decrypted = skb->decrypted;
+#endif
TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
if (list)
__skb_queue_before(list, skb, nskb);
@@ -4887,6 +4906,10 @@ restart:
skb == tail ||
(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
goto end;
+#ifdef CONFIG_TLS_DEVICE
+ if (skb->decrypted != nskb->decrypted)
+ goto end;
+#endif
}
}
}
@@ -5484,6 +5507,11 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
tcp_ack(sk, skb, 0);
__kfree_skb(skb);
tcp_data_snd_check(sk);
+ /* When receiving pure ack in fast path, update
+ * last ts ecr directly instead of calling
+ * tcp_rcv_rtt_measure_ts()
+ */
+ tp->rcv_rtt_last_tsecr = tp->rx_opt.rcv_tsecr;
return;
} else { /* Header too small */
TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
@@ -5585,6 +5613,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
if (skb) {
icsk->icsk_af_ops->sk_rx_dst_set(sk, skb);
security_inet_conn_established(sk, skb);
+ sk_mark_napi_id(sk, skb);
}
tcp_init_transfer(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
@@ -6413,6 +6442,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
tcp_rsk(req)->snt_isn = isn;
tcp_rsk(req)->txhash = net_tx_rndhash();
tcp_openreq_init_rwin(req, sk, dst);
+ sk_rx_queue_set(req_to_sk(req), skb);
if (!want_cookie) {
tcp_reqsk_record_syn(sk, req, skb);
fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 3b2711e33e4c..9e041fa5c545 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -155,7 +155,8 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
and use initial timestamp retrieved from peer table.
*/
if (tcptw->tw_ts_recent_stamp &&
- (!twp || (reuse && get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
+ (!twp || (reuse && time_after32(ktime_get_seconds(),
+ tcptw->tw_ts_recent_stamp)))) {
/* In case of repair and re-using TIME-WAIT sockets we still
* want to be sure that it is safe as above but honor the
* sequence numbers and time stamps set as part of the repair
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 1dda1341a223..75ef332a7caf 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -144,7 +144,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
tw->tw_substate = TCP_TIME_WAIT;
tcptw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq;
if (tmp_opt.saw_tstamp) {
- tcptw->tw_ts_recent_stamp = get_seconds();
+ tcptw->tw_ts_recent_stamp = ktime_get_seconds();
tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
}
@@ -189,7 +189,7 @@ kill:
if (tmp_opt.saw_tstamp) {
tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
- tcptw->tw_ts_recent_stamp = get_seconds();
+ tcptw->tw_ts_recent_stamp = ktime_get_seconds();
}
inet_twsk_put(tw);
@@ -449,119 +449,122 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
struct sk_buff *skb)
{
struct sock *newsk = inet_csk_clone_lock(sk, req, GFP_ATOMIC);
+ const struct inet_request_sock *ireq = inet_rsk(req);
+ struct tcp_request_sock *treq = tcp_rsk(req);
+ struct inet_connection_sock *newicsk;
+ struct tcp_sock *oldtp, *newtp;
- if (newsk) {
- const struct inet_request_sock *ireq = inet_rsk(req);
- struct tcp_request_sock *treq = tcp_rsk(req);
- struct inet_connection_sock *newicsk = inet_csk(newsk);
- struct tcp_sock *newtp = tcp_sk(newsk);
- struct tcp_sock *oldtp = tcp_sk(sk);
-
- smc_check_reset_syn_req(oldtp, req, newtp);
-
- /* Now setup tcp_sock */
- newtp->pred_flags = 0;
-
- newtp->rcv_wup = newtp->copied_seq =
- newtp->rcv_nxt = treq->rcv_isn + 1;
- newtp->segs_in = 1;
-
- newtp->snd_sml = newtp->snd_una =
- newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
-
- INIT_LIST_HEAD(&newtp->tsq_node);
- INIT_LIST_HEAD(&newtp->tsorted_sent_queue);
-
- tcp_init_wl(newtp, treq->rcv_isn);
-
- newtp->srtt_us = 0;
- newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
- minmax_reset(&newtp->rtt_min, tcp_jiffies32, ~0U);
- newicsk->icsk_rto = TCP_TIMEOUT_INIT;
- newicsk->icsk_ack.lrcvtime = tcp_jiffies32;
-
- newtp->packets_out = 0;
- newtp->retrans_out = 0;
- newtp->sacked_out = 0;
- newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
- newtp->tlp_high_seq = 0;
- newtp->lsndtime = tcp_jiffies32;
- newsk->sk_txhash = treq->txhash;
- newtp->last_oow_ack_time = 0;
- newtp->total_retrans = req->num_retrans;
-
- /* So many TCP implementations out there (incorrectly) count the
- * initial SYN frame in their delayed-ACK and congestion control
- * algorithms that we must have the following bandaid to talk
- * efficiently to them. -DaveM
- */
- newtp->snd_cwnd = TCP_INIT_CWND;
- newtp->snd_cwnd_cnt = 0;
-
- /* There's a bubble in the pipe until at least the first ACK. */
- newtp->app_limited = ~0U;
-
- tcp_init_xmit_timers(newsk);
- newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1;
-
- newtp->rx_opt.saw_tstamp = 0;
-
- newtp->rx_opt.dsack = 0;
- newtp->rx_opt.num_sacks = 0;
-
- newtp->urg_data = 0;
-
- if (sock_flag(newsk, SOCK_KEEPOPEN))
- inet_csk_reset_keepalive_timer(newsk,
- keepalive_time_when(newtp));
-
- newtp->rx_opt.tstamp_ok = ireq->tstamp_ok;
- newtp->rx_opt.sack_ok = ireq->sack_ok;
- newtp->window_clamp = req->rsk_window_clamp;
- newtp->rcv_ssthresh = req->rsk_rcv_wnd;
- newtp->rcv_wnd = req->rsk_rcv_wnd;
- newtp->rx_opt.wscale_ok = ireq->wscale_ok;
- if (newtp->rx_opt.wscale_ok) {
- newtp->rx_opt.snd_wscale = ireq->snd_wscale;
- newtp->rx_opt.rcv_wscale = ireq->rcv_wscale;
- } else {
- newtp->rx_opt.snd_wscale = newtp->rx_opt.rcv_wscale = 0;
- newtp->window_clamp = min(newtp->window_clamp, 65535U);
- }
- newtp->snd_wnd = (ntohs(tcp_hdr(skb)->window) <<
- newtp->rx_opt.snd_wscale);
- newtp->max_window = newtp->snd_wnd;
-
- if (newtp->rx_opt.tstamp_ok) {
- newtp->rx_opt.ts_recent = req->ts_recent;
- newtp->rx_opt.ts_recent_stamp = get_seconds();
- newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
- } else {
- newtp->rx_opt.ts_recent_stamp = 0;
- newtp->tcp_header_len = sizeof(struct tcphdr);
- }
- newtp->tsoffset = treq->ts_off;
+ if (!newsk)
+ return NULL;
+
+ newicsk = inet_csk(newsk);
+ newtp = tcp_sk(newsk);
+ oldtp = tcp_sk(sk);
+
+ smc_check_reset_syn_req(oldtp, req, newtp);
+
+ /* Now setup tcp_sock */
+ newtp->pred_flags = 0;
+
+ newtp->rcv_wup = newtp->copied_seq =
+ newtp->rcv_nxt = treq->rcv_isn + 1;
+ newtp->segs_in = 1;
+
+ newtp->snd_sml = newtp->snd_una =
+ newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
+
+ INIT_LIST_HEAD(&newtp->tsq_node);
+ INIT_LIST_HEAD(&newtp->tsorted_sent_queue);
+
+ tcp_init_wl(newtp, treq->rcv_isn);
+
+ newtp->srtt_us = 0;
+ newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
+ minmax_reset(&newtp->rtt_min, tcp_jiffies32, ~0U);
+ newicsk->icsk_rto = TCP_TIMEOUT_INIT;
+ newicsk->icsk_ack.lrcvtime = tcp_jiffies32;
+
+ newtp->packets_out = 0;
+ newtp->retrans_out = 0;
+ newtp->sacked_out = 0;
+ newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
+ newtp->tlp_high_seq = 0;
+ newtp->lsndtime = tcp_jiffies32;
+ newsk->sk_txhash = treq->txhash;
+ newtp->last_oow_ack_time = 0;
+ newtp->total_retrans = req->num_retrans;
+
+ /* So many TCP implementations out there (incorrectly) count the
+ * initial SYN frame in their delayed-ACK and congestion control
+ * algorithms that we must have the following bandaid to talk
+ * efficiently to them. -DaveM
+ */
+ newtp->snd_cwnd = TCP_INIT_CWND;
+ newtp->snd_cwnd_cnt = 0;
+
+ /* There's a bubble in the pipe until at least the first ACK. */
+ newtp->app_limited = ~0U;
+
+ tcp_init_xmit_timers(newsk);
+ newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1;
+
+ newtp->rx_opt.saw_tstamp = 0;
+
+ newtp->rx_opt.dsack = 0;
+ newtp->rx_opt.num_sacks = 0;
+
+ newtp->urg_data = 0;
+
+ if (sock_flag(newsk, SOCK_KEEPOPEN))
+ inet_csk_reset_keepalive_timer(newsk,
+ keepalive_time_when(newtp));
+
+ newtp->rx_opt.tstamp_ok = ireq->tstamp_ok;
+ newtp->rx_opt.sack_ok = ireq->sack_ok;
+ newtp->window_clamp = req->rsk_window_clamp;
+ newtp->rcv_ssthresh = req->rsk_rcv_wnd;
+ newtp->rcv_wnd = req->rsk_rcv_wnd;
+ newtp->rx_opt.wscale_ok = ireq->wscale_ok;
+ if (newtp->rx_opt.wscale_ok) {
+ newtp->rx_opt.snd_wscale = ireq->snd_wscale;
+ newtp->rx_opt.rcv_wscale = ireq->rcv_wscale;
+ } else {
+ newtp->rx_opt.snd_wscale = newtp->rx_opt.rcv_wscale = 0;
+ newtp->window_clamp = min(newtp->window_clamp, 65535U);
+ }
+ newtp->snd_wnd = ntohs(tcp_hdr(skb)->window) << newtp->rx_opt.snd_wscale;
+ newtp->max_window = newtp->snd_wnd;
+
+ if (newtp->rx_opt.tstamp_ok) {
+ newtp->rx_opt.ts_recent = req->ts_recent;
+ newtp->rx_opt.ts_recent_stamp = ktime_get_seconds();
+ newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
+ } else {
+ newtp->rx_opt.ts_recent_stamp = 0;
+ newtp->tcp_header_len = sizeof(struct tcphdr);
+ }
+ newtp->tsoffset = treq->ts_off;
#ifdef CONFIG_TCP_MD5SIG
- newtp->md5sig_info = NULL; /*XXX*/
- if (newtp->af_specific->md5_lookup(sk, newsk))
- newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
+ newtp->md5sig_info = NULL; /*XXX*/
+ if (newtp->af_specific->md5_lookup(sk, newsk))
+ newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
#endif
- if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len)
- newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
- newtp->rx_opt.mss_clamp = req->mss;
- tcp_ecn_openreq_child(newtp, req);
- newtp->fastopen_req = NULL;
- newtp->fastopen_rsk = NULL;
- newtp->syn_data_acked = 0;
- newtp->rack.mstamp = 0;
- newtp->rack.advanced = 0;
- newtp->rack.reo_wnd_steps = 1;
- newtp->rack.last_delivered = 0;
- newtp->rack.reo_wnd_persist = 0;
- newtp->rack.dsack_seen = 0;
-
- __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS);
- }
+ if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len)
+ newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len;
+ newtp->rx_opt.mss_clamp = req->mss;
+ tcp_ecn_openreq_child(newtp, req);
+ newtp->fastopen_req = NULL;
+ newtp->fastopen_rsk = NULL;
+ newtp->syn_data_acked = 0;
+ newtp->rack.mstamp = 0;
+ newtp->rack.advanced = 0;
+ newtp->rack.reo_wnd_steps = 1;
+ newtp->rack.last_delivered = 0;
+ newtp->rack.reo_wnd_persist = 0;
+ newtp->rack.dsack_seen = 0;
+
+ __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS);
+
return newsk;
}
EXPORT_SYMBOL(tcp_create_openreq_child);
@@ -600,7 +603,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
* it can be estimated (approximately)
* from another data.
*/
- tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->num_timeout);
+ tmp_opt.ts_recent_stamp = ktime_get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->num_timeout);
paws_reject = tcp_paws_reject(&tmp_opt, th->rst);
}
}
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 8cc7c3487330..870b0a335061 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -180,9 +180,9 @@ out:
return segs;
}
-struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
+struct sk_buff *tcp_gro_receive(struct list_head *head, struct sk_buff *skb)
{
- struct sk_buff **pp = NULL;
+ struct sk_buff *pp = NULL;
struct sk_buff *p;
struct tcphdr *th;
struct tcphdr *th2;
@@ -220,7 +220,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
len = skb_gro_len(skb);
flags = tcp_flag_word(th);
- for (; (p = *head); head = &p->next) {
+ list_for_each_entry(p, head, list) {
if (!NAPI_GRO_CB(p)->same_flow)
continue;
@@ -233,7 +233,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
goto found;
}
-
+ p = NULL;
goto out_check_final;
found:
@@ -262,8 +262,11 @@ found:
flush |= (len - 1) >= mss;
flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
+#ifdef CONFIG_TLS_DEVICE
+ flush |= p->decrypted ^ skb->decrypted;
+#endif
- if (flush || skb_gro_receive(head, skb)) {
+ if (flush || skb_gro_receive(p, skb)) {
mss = 1;
goto out_check_final;
}
@@ -277,7 +280,7 @@ out_check_final:
TCP_FLAG_FIN));
if (p && (!NAPI_GRO_CB(skb)->same_flow || flush))
- pp = head;
+ pp = p;
out:
NAPI_GRO_CB(skb)->flush |= (flush != 0);
@@ -302,7 +305,7 @@ int tcp_gro_complete(struct sk_buff *skb)
}
EXPORT_SYMBOL(tcp_gro_complete);
-static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
+static struct sk_buff *tcp4_gro_receive(struct list_head *head, struct sk_buff *skb)
{
/* Don't bother verifying checksum if we're going to flush anyway. */
if (!NAPI_GRO_CB(skb)->flush &&
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 00e5a300ddb9..6cbab56e7407 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -973,17 +973,6 @@ enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer)
return HRTIMER_NORESTART;
}
-/* BBR congestion control needs pacing.
- * Same remark for SO_MAX_PACING_RATE.
- * sch_fq packet scheduler is efficiently handling pacing,
- * but is not always installed/used.
- * Return true if TCP stack should pace packets itself.
- */
-static bool tcp_needs_internal_pacing(const struct sock *sk)
-{
- return smp_load_acquire(&sk->sk_pacing_status) == SK_PACING_NEEDED;
-}
-
static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb)
{
u64 len_ns;
@@ -995,9 +984,6 @@ static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb)
if (!rate || rate == ~0U)
return;
- /* Should account for header sizes as sch_fq does,
- * but lets make things simple.
- */
len_ns = (u64)skb->len * NSEC_PER_SEC;
do_div(len_ns, rate);
hrtimer_start(&tcp_sk(sk)->pacing_timer,
diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c
index c61240e43923..4dff40dad4dc 100644
--- a/net/ipv4/tcp_rate.c
+++ b/net/ipv4/tcp_rate.c
@@ -146,6 +146,10 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
rs->prior_mstamp); /* ack phase */
rs->interval_us = max(snd_us, ack_us);
+ /* Record both segment send and ack receive intervals */
+ rs->snd_interval_us = snd_us;
+ rs->rcv_interval_us = ack_us;
+
/* Normally we expect interval_us >= min-rtt.
* Note that rate may still be over-estimated when a spuriously
* retransmistted skb was first (s)acked because "interval_us"
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 24e116ddae79..060e841dde40 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -926,11 +926,6 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message compatibility */
return -EOPNOTSUPP;
- ipc.opt = NULL;
- ipc.tx_flags = 0;
- ipc.ttl = 0;
- ipc.tos = -1;
-
getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
fl4 = &inet->cork.fl.u.ip4;
@@ -977,9 +972,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
connected = 1;
}
- ipc.sockc.tsflags = sk->sk_tsflags;
- ipc.addr = inet->inet_saddr;
- ipc.oif = sk->sk_bound_dev_if;
+ ipcm_init_sk(&ipc, inet);
ipc.gso_size = up->gso_size;
if (msg->msg_controllen) {
@@ -1027,8 +1020,6 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
saddr = ipc.addr;
ipc.addr = faddr = daddr;
- sock_tx_timestamp(sk, ipc.sockc.tsflags, &ipc.tx_flags);
-
if (ipc.opt && ipc.opt->opt.srr) {
if (!daddr) {
err = -EINVAL;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 69c54540d5b4..0c0522b79b43 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -343,10 +343,11 @@ out:
return segs;
}
-struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,
- struct udphdr *uh, udp_lookup_t lookup)
+struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
+ struct udphdr *uh, udp_lookup_t lookup)
{
- struct sk_buff *p, **pp = NULL;
+ struct sk_buff *pp = NULL;
+ struct sk_buff *p;
struct udphdr *uh2;
unsigned int off = skb_gro_offset(skb);
int flush = 1;
@@ -371,7 +372,7 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,
unflush:
flush = 0;
- for (p = *head; p; p = p->next) {
+ list_for_each_entry(p, head, list) {
if (!NAPI_GRO_CB(p)->same_flow)
continue;
@@ -399,8 +400,8 @@ out:
}
EXPORT_SYMBOL(udp_gro_receive);
-static struct sk_buff **udp4_gro_receive(struct sk_buff **head,
- struct sk_buff *skb)
+static struct sk_buff *udp4_gro_receive(struct list_head *head,
+ struct sk_buff *skb)
{
struct udphdr *uh = udp_gro_udphdr(skb);
OpenPOWER on IntegriCloud