summaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_ipv4.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
-rw-r--r--net/ipv4/tcp_ipv4.c89
1 files changed, 53 insertions, 36 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ba09016d1bfd..487ac67059e2 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -73,7 +73,6 @@
#include <net/timewait_sock.h>
#include <net/xfrm.h>
#include <net/secure_seq.h>
-#include <net/tcp_memcontrol.h>
#include <net/busy_poll.h>
#include <linux/inet.h>
@@ -312,7 +311,7 @@ static void do_redirect(struct sk_buff *skb, struct sock *sk)
/* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */
-void tcp_req_err(struct sock *sk, u32 seq)
+void tcp_req_err(struct sock *sk, u32 seq, bool abort)
{
struct request_sock *req = inet_reqsk(sk);
struct net *net = sock_net(sk);
@@ -324,7 +323,7 @@ void tcp_req_err(struct sock *sk, u32 seq)
if (seq != tcp_rsk(req)->snt_isn) {
NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
- } else {
+ } else if (abort) {
/*
* Still in SYN_RECV, just remove it silently.
* There is no good way to pass the error to the newly
@@ -384,7 +383,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
}
seq = ntohl(th->seq);
if (sk->sk_state == TCP_NEW_SYN_RECV)
- return tcp_req_err(sk, seq);
+ return tcp_req_err(sk, seq,
+ type == ICMP_PARAMETERPROB ||
+ type == ICMP_TIME_EXCEEDED ||
+ (type == ICMP_DEST_UNREACH &&
+ (code == ICMP_NET_UNREACH ||
+ code == ICMP_HOST_UNREACH)));
bh_lock_sock(sk);
/* If too many ICMPs get dropped on busy
@@ -587,7 +591,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
} rep;
struct ip_reply_arg arg;
#ifdef CONFIG_TCP_MD5SIG
- struct tcp_md5sig_key *key;
+ struct tcp_md5sig_key *key = NULL;
const __u8 *hash_location = NULL;
unsigned char newhash[16];
int genhash;
@@ -627,7 +631,10 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
#ifdef CONFIG_TCP_MD5SIG
hash_location = tcp_parse_md5sig_option(th);
- if (!sk && hash_location) {
+ if (sk && sk_fullsock(sk)) {
+ key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
+ &ip_hdr(skb)->saddr, AF_INET);
+ } else if (hash_location) {
/*
* active side is lost. Try to find listening socket through
* source port, and then find md5 key through listening socket.
@@ -651,10 +658,6 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
if (genhash || memcmp(hash_location, newhash, 16) != 0)
goto release_sk1;
- } else {
- key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
- &ip_hdr(skb)->saddr,
- AF_INET) : NULL;
}
if (key) {
@@ -675,7 +678,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
ip_hdr(skb)->saddr, /* XXX */
arg.iov[0].iov_len, IPPROTO_TCP, 0);
arg.csumoffset = offsetof(struct tcphdr, check) / 2;
- arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
+ arg.flags = (sk && inet_sk_transparent(sk)) ? IP_REPLY_ARG_NOSRCCHECK : 0;
+
/* When socket is gone, all binding information is lost.
* routing might fail in this case. No choice here, if we choose to force
* input interface, we will misroute in case of asymmetric route.
@@ -683,6 +687,9 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
if (sk)
arg.bound_dev_if = sk->sk_bound_dev_if;
+ BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
+ offsetof(struct inet_timewait_sock, tw_bound_dev_if));
+
arg.tos = ip_hdr(skb)->tos;
ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
skb, &TCP_SKB_CB(skb)->header.h4.opt,
@@ -705,7 +712,8 @@ release_sk1:
outside socket context is ugly, certainly. What can I do?
*/
-static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
+static void tcp_v4_send_ack(struct net *net,
+ struct sk_buff *skb, u32 seq, u32 ack,
u32 win, u32 tsval, u32 tsecr, int oif,
struct tcp_md5sig_key *key,
int reply_flags, u8 tos)
@@ -720,7 +728,6 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
];
} rep;
struct ip_reply_arg arg;
- struct net *net = dev_net(skb_dst(skb)->dev);
memset(&rep.th, 0, sizeof(struct tcphdr));
memset(&arg, 0, sizeof(arg));
@@ -782,7 +789,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
struct inet_timewait_sock *tw = inet_twsk(sk);
struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
- tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
+ tcp_v4_send_ack(sock_net(sk), skb,
+ tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
tcp_time_stamp + tcptw->tw_ts_offset,
tcptw->tw_ts_recent,
@@ -801,8 +809,10 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
* sk->sk_state == TCP_SYN_RECV -> for Fast Open.
*/
- tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
- tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
+ u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
+ tcp_sk(sk)->snd_nxt;
+
+ tcp_v4_send_ack(sock_net(sk), skb, seq,
tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd,
tcp_time_stamp,
req->ts_recent,
@@ -921,7 +931,8 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
}
md5sig = rcu_dereference_protected(tp->md5sig_info,
- sock_owned_by_user(sk));
+ sock_owned_by_user(sk) ||
+ lockdep_is_held(&sk->sk_lock.slock));
if (!md5sig) {
md5sig = kmalloc(sizeof(*md5sig), gfp);
if (!md5sig)
@@ -1275,6 +1286,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
ireq = inet_rsk(req);
sk_daddr_set(newsk, ireq->ir_rmt_addr);
sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
+ newsk->sk_bound_dev_if = ireq->ir_iif;
newinet->inet_saddr = ireq->ir_loc_addr;
inet_opt = ireq->opt;
rcu_assign_pointer(newinet->inet_opt, inet_opt);
@@ -1492,7 +1504,7 @@ bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
if (likely(sk->sk_rx_dst))
skb_dst_drop(skb);
else
- skb_dst_force(skb);
+ skb_dst_force_safe(skb);
__skb_queue_tail(&tp->ucopy.prequeue, skb);
tp->ucopy.memory += skb->truesize;
@@ -1585,28 +1597,30 @@ process:
if (sk->sk_state == TCP_NEW_SYN_RECV) {
struct request_sock *req = inet_reqsk(sk);
- struct sock *nsk = NULL;
+ struct sock *nsk;
sk = req->rsk_listener;
- if (tcp_v4_inbound_md5_hash(sk, skb))
- goto discard_and_relse;
- if (likely(sk->sk_state == TCP_LISTEN)) {
- nsk = tcp_check_req(sk, skb, req, false);
- } else {
+ if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
+ reqsk_put(req);
+ goto discard_it;
+ }
+ if (unlikely(sk->sk_state != TCP_LISTEN)) {
inet_csk_reqsk_queue_drop_and_put(sk, req);
goto lookup;
}
+ sock_hold(sk);
+ nsk = tcp_check_req(sk, skb, req, false);
if (!nsk) {
reqsk_put(req);
- goto discard_it;
+ goto discard_and_relse;
}
if (nsk == sk) {
- sock_hold(sk);
reqsk_put(req);
} else if (tcp_child_process(sk, nsk, skb)) {
tcp_v4_send_reset(nsk, skb);
- goto discard_it;
+ goto discard_and_relse;
} else {
+ sock_put(sk);
return 0;
}
}
@@ -1704,7 +1718,9 @@ do_time_wait:
tcp_v4_timewait_ack(sk, skb);
break;
case TCP_TW_RST:
- goto no_tcp_socket;
+ tcp_v4_send_reset(sk, skb);
+ inet_twsk_deschedule_put(inet_twsk(sk));
+ goto discard_it;
case TCP_TW_SUCCESS:;
}
goto discard_it;
@@ -1720,8 +1736,7 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
- if (dst) {
- dst_hold(dst);
+ if (dst && dst_hold_safe(dst)) {
sk->sk_rx_dst = dst;
inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
}
@@ -1812,7 +1827,9 @@ void tcp_v4_destroy_sock(struct sock *sk)
tcp_saved_syn_free(tp);
sk_sockets_allocated_dec(sk);
- sock_release_memcg(sk);
+
+ if (mem_cgroup_sockets_enabled && sk->sk_memcg)
+ sock_release_memcg(sk);
}
EXPORT_SYMBOL(tcp_v4_destroy_sock);
@@ -2336,11 +2353,7 @@ struct proto tcp_prot = {
.compat_setsockopt = compat_tcp_setsockopt,
.compat_getsockopt = compat_tcp_getsockopt,
#endif
-#ifdef CONFIG_MEMCG_KMEM
- .init_cgroup = tcp_init_cgroup,
- .destroy_cgroup = tcp_destroy_cgroup,
- .proto_cgroup = tcp_proto_cgroup,
-#endif
+ .diag_destroy = tcp_abort,
};
EXPORT_SYMBOL(tcp_prot);
@@ -2378,6 +2391,10 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
+ net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
+ net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
+ net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
+
return 0;
fail:
tcp_sk_exit(net);
OpenPOWER on IntegriCloud