From db9b2e0af605e7c994784527abfd9276cabd718a Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 3 Oct 2019 17:44:44 +0100 Subject: rxrpc: Fix rxrpc_recvmsg tracepoint Fix the rxrpc_recvmsg tracepoint to handle being called with a NULL call parameter. Fixes: a25e21f0bcd2 ("rxrpc, afs: Use debug_ids rather than pointers in traces") Signed-off-by: David Howells Signed-off-by: David S. Miller --- include/trace/events/rxrpc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/trace') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index a13a62db3565..edc5c887a44c 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -1068,7 +1068,7 @@ TRACE_EVENT(rxrpc_recvmsg, ), TP_fast_assign( - __entry->call = call->debug_id; + __entry->call = call ? call->debug_id : 0; __entry->why = why; __entry->seq = seq; __entry->offset = offset; -- cgit v1.2.3 From 55f6c98e3674ce16038a1949c3f9ca5a9a99f289 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 7 Oct 2019 10:58:29 +0100 Subject: rxrpc: Fix trace-after-put looking at the put peer record rxrpc_put_peer() calls trace_rxrpc_peer() after it has done the decrement of the refcount - which looks at the debug_id in the peer record. But unless the refcount was reduced to zero, we no longer have the right to look in the record and, indeed, it may be deleted by some other thread. Fix this by getting the debug_id out before decrementing the refcount and then passing that into the tracepoint. This can cause the following symptoms: BUG: KASAN: use-after-free in __rxrpc_put_peer net/rxrpc/peer_object.c:411 [inline] BUG: KASAN: use-after-free in rxrpc_put_peer+0x685/0x6a0 net/rxrpc/peer_object.c:435 Read of size 8 at addr ffff888097ec0058 by task syz-executor823/24216 Fixes: 1159d4b496f5 ("rxrpc: Add a tracepoint to track rxrpc_peer refcounting") Reported-by: syzbot+b9be979c55f2bea8ed30@syzkaller.appspotmail.com Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 6 +++--- net/rxrpc/peer_object.c | 11 +++++++---- 2 files changed, 10 insertions(+), 7 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index edc5c887a44c..45556fe771c3 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -519,10 +519,10 @@ TRACE_EVENT(rxrpc_local, ); TRACE_EVENT(rxrpc_peer, - TP_PROTO(struct rxrpc_peer *peer, enum rxrpc_peer_trace op, + TP_PROTO(unsigned int peer_debug_id, enum rxrpc_peer_trace op, int usage, const void *where), - TP_ARGS(peer, op, usage, where), + TP_ARGS(peer_debug_id, op, usage, where), TP_STRUCT__entry( __field(unsigned int, peer ) @@ -532,7 +532,7 @@ TRACE_EVENT(rxrpc_peer, ), TP_fast_assign( - __entry->peer = peer->debug_id; + __entry->peer = peer_debug_id; __entry->op = op; __entry->usage = usage; __entry->where = where; diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 9c3ac96f71cb..b700b7ecaa3d 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -382,7 +382,7 @@ struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer) int n; n = atomic_inc_return(&peer->usage); - trace_rxrpc_peer(peer, rxrpc_peer_got, n, here); + trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, n, here); return peer; } @@ -396,7 +396,7 @@ struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *peer) if (peer) { int n = atomic_fetch_add_unless(&peer->usage, 1, 0); if (n > 0) - trace_rxrpc_peer(peer, rxrpc_peer_got, n + 1, here); + trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, n + 1, here); else peer = NULL; } @@ -426,11 +426,13 @@ static void __rxrpc_put_peer(struct rxrpc_peer *peer) void rxrpc_put_peer(struct rxrpc_peer *peer) { const void *here = __builtin_return_address(0); + unsigned int debug_id; int n; if (peer) { + debug_id = peer->debug_id; n = atomic_dec_return(&peer->usage); - trace_rxrpc_peer(peer, rxrpc_peer_put, n, here); + trace_rxrpc_peer(debug_id, rxrpc_peer_put, n, here); if (n == 0) __rxrpc_put_peer(peer); } @@ -443,10 +445,11 @@ void rxrpc_put_peer(struct rxrpc_peer *peer) void rxrpc_put_peer_locked(struct rxrpc_peer *peer) { const void *here = __builtin_return_address(0); + unsigned int debug_id = peer->debug_id; int n; n = atomic_dec_return(&peer->usage); - trace_rxrpc_peer(peer, rxrpc_peer_put, n, here); + trace_rxrpc_peer(debug_id, rxrpc_peer_put, n, here); if (n == 0) { hash_del_rcu(&peer->hash_link); list_del_init(&peer->keepalive_link); -- cgit v1.2.3 From 4c1295dccc0afe0905b6ca4c62ade7f2406f2cfb Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 7 Oct 2019 10:58:29 +0100 Subject: rxrpc: Fix trace-after-put looking at the put connection record rxrpc_put_*conn() calls trace_rxrpc_conn() after they have done the decrement of the refcount - which looks at the debug_id in the connection record. But unless the refcount was reduced to zero, we no longer have the right to look in the record and, indeed, it may be deleted by some other thread. Fix this by getting the debug_id out before decrementing the refcount and then passing that into the tracepoint. Fixes: 363deeab6d0f ("rxrpc: Add connection tracepoint and client conn state tracepoint") Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 6 +++--- net/rxrpc/call_accept.c | 2 +- net/rxrpc/conn_client.c | 6 ++++-- net/rxrpc/conn_object.c | 13 +++++++------ net/rxrpc/conn_service.c | 2 +- 5 files changed, 16 insertions(+), 13 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 45556fe771c3..38a97e890cb6 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -546,10 +546,10 @@ TRACE_EVENT(rxrpc_peer, ); TRACE_EVENT(rxrpc_conn, - TP_PROTO(struct rxrpc_connection *conn, enum rxrpc_conn_trace op, + TP_PROTO(unsigned int conn_debug_id, enum rxrpc_conn_trace op, int usage, const void *where), - TP_ARGS(conn, op, usage, where), + TP_ARGS(conn_debug_id, op, usage, where), TP_STRUCT__entry( __field(unsigned int, conn ) @@ -559,7 +559,7 @@ TRACE_EVENT(rxrpc_conn, ), TP_fast_assign( - __entry->conn = conn->debug_id; + __entry->conn = conn_debug_id; __entry->op = op; __entry->usage = usage; __entry->where = where; diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 00c095d74145..c1b1b7dd2924 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -84,7 +84,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, smp_store_release(&b->conn_backlog_head, (head + 1) & (size - 1)); - trace_rxrpc_conn(conn, rxrpc_conn_new_service, + trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_service, atomic_read(&conn->usage), here); } diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 3f1da1b49f69..700eb77173fc 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -212,7 +212,8 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp) rxrpc_get_local(conn->params.local); key_get(conn->params.key); - trace_rxrpc_conn(conn, rxrpc_conn_new_client, atomic_read(&conn->usage), + trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_client, + atomic_read(&conn->usage), __builtin_return_address(0)); trace_rxrpc_client(conn, -1, rxrpc_client_alloc); _leave(" = %p", conn); @@ -985,11 +986,12 @@ rxrpc_put_one_client_conn(struct rxrpc_connection *conn) void rxrpc_put_client_conn(struct rxrpc_connection *conn) { const void *here = __builtin_return_address(0); + unsigned int debug_id = conn->debug_id; int n; do { n = atomic_dec_return(&conn->usage); - trace_rxrpc_conn(conn, rxrpc_conn_put_client, n, here); + trace_rxrpc_conn(debug_id, rxrpc_conn_put_client, n, here); if (n > 0) return; ASSERTCMP(n, >=, 0); diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index ed05b6922132..38d718e90dc6 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -269,7 +269,7 @@ bool rxrpc_queue_conn(struct rxrpc_connection *conn) if (n == 0) return false; if (rxrpc_queue_work(&conn->processor)) - trace_rxrpc_conn(conn, rxrpc_conn_queued, n + 1, here); + trace_rxrpc_conn(conn->debug_id, rxrpc_conn_queued, n + 1, here); else rxrpc_put_connection(conn); return true; @@ -284,7 +284,7 @@ void rxrpc_see_connection(struct rxrpc_connection *conn) if (conn) { int n = atomic_read(&conn->usage); - trace_rxrpc_conn(conn, rxrpc_conn_seen, n, here); + trace_rxrpc_conn(conn->debug_id, rxrpc_conn_seen, n, here); } } @@ -296,7 +296,7 @@ void rxrpc_get_connection(struct rxrpc_connection *conn) const void *here = __builtin_return_address(0); int n = atomic_inc_return(&conn->usage); - trace_rxrpc_conn(conn, rxrpc_conn_got, n, here); + trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, n, here); } /* @@ -310,7 +310,7 @@ rxrpc_get_connection_maybe(struct rxrpc_connection *conn) if (conn) { int n = atomic_fetch_add_unless(&conn->usage, 1, 0); if (n > 0) - trace_rxrpc_conn(conn, rxrpc_conn_got, n + 1, here); + trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, n + 1, here); else conn = NULL; } @@ -333,10 +333,11 @@ static void rxrpc_set_service_reap_timer(struct rxrpc_net *rxnet, void rxrpc_put_service_conn(struct rxrpc_connection *conn) { const void *here = __builtin_return_address(0); + unsigned int debug_id = conn->debug_id; int n; n = atomic_dec_return(&conn->usage); - trace_rxrpc_conn(conn, rxrpc_conn_put_service, n, here); + trace_rxrpc_conn(debug_id, rxrpc_conn_put_service, n, here); ASSERTCMP(n, >=, 0); if (n == 1) rxrpc_set_service_reap_timer(conn->params.local->rxnet, @@ -420,7 +421,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work) */ if (atomic_cmpxchg(&conn->usage, 1, 0) != 1) continue; - trace_rxrpc_conn(conn, rxrpc_conn_reap_service, 0, NULL); + trace_rxrpc_conn(conn->debug_id, rxrpc_conn_reap_service, 0, NULL); if (rxrpc_conn_is_client(conn)) BUG(); diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c index b30e13f6d95f..123d6ceab15c 100644 --- a/net/rxrpc/conn_service.c +++ b/net/rxrpc/conn_service.c @@ -134,7 +134,7 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxn list_add_tail(&conn->proc_link, &rxnet->conn_proc_list); write_unlock(&rxnet->conn_lock); - trace_rxrpc_conn(conn, rxrpc_conn_new_service, + trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_service, atomic_read(&conn->usage), __builtin_return_address(0)); } -- cgit v1.2.3 From 48c9e0ec7cbbb7370448f859ccc8e3b7eb69e755 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 7 Oct 2019 10:58:29 +0100 Subject: rxrpc: Fix trace-after-put looking at the put call record rxrpc_put_call() calls trace_rxrpc_call() after it has done the decrement of the refcount - which looks at the debug_id in the call record. But unless the refcount was reduced to zero, we no longer have the right to look in the record and, indeed, it may be deleted by some other thread. Fix this by getting the debug_id out before decrementing the refcount and then passing that into the tracepoint. Fixes: e34d4234b0b7 ("rxrpc: Trace rxrpc_call usage") Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 6 +++--- net/rxrpc/call_accept.c | 2 +- net/rxrpc/call_object.c | 28 +++++++++++++++++----------- 3 files changed, 21 insertions(+), 15 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 38a97e890cb6..191fe447f990 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -606,10 +606,10 @@ TRACE_EVENT(rxrpc_client, ); TRACE_EVENT(rxrpc_call, - TP_PROTO(struct rxrpc_call *call, enum rxrpc_call_trace op, + TP_PROTO(unsigned int call_debug_id, enum rxrpc_call_trace op, int usage, const void *where, const void *aux), - TP_ARGS(call, op, usage, where, aux), + TP_ARGS(call_debug_id, op, usage, where, aux), TP_STRUCT__entry( __field(unsigned int, call ) @@ -620,7 +620,7 @@ TRACE_EVENT(rxrpc_call, ), TP_fast_assign( - __entry->call = call->debug_id; + __entry->call = call_debug_id; __entry->op = op; __entry->usage = usage; __entry->where = where; diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index c1b1b7dd2924..1f778102ed8d 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -97,7 +97,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, call->flags |= (1 << RXRPC_CALL_IS_SERVICE); call->state = RXRPC_CALL_SERVER_PREALLOC; - trace_rxrpc_call(call, rxrpc_call_new_service, + trace_rxrpc_call(call->debug_id, rxrpc_call_new_service, atomic_read(&call->usage), here, (const void *)user_call_ID); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 32d8dc677142..6dace078971a 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -240,7 +240,8 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, if (p->intr) __set_bit(RXRPC_CALL_IS_INTR, &call->flags); call->tx_total_len = p->tx_total_len; - trace_rxrpc_call(call, rxrpc_call_new_client, atomic_read(&call->usage), + trace_rxrpc_call(call->debug_id, rxrpc_call_new_client, + atomic_read(&call->usage), here, (const void *)p->user_call_ID); /* We need to protect a partially set up call against the user as we @@ -290,8 +291,8 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, if (ret < 0) goto error; - trace_rxrpc_call(call, rxrpc_call_connected, atomic_read(&call->usage), - here, NULL); + trace_rxrpc_call(call->debug_id, rxrpc_call_connected, + atomic_read(&call->usage), here, NULL); rxrpc_start_call_timer(call); @@ -313,8 +314,8 @@ error_dup_user_ID: error: __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, RX_CALL_DEAD, ret); - trace_rxrpc_call(call, rxrpc_call_error, atomic_read(&call->usage), - here, ERR_PTR(ret)); + trace_rxrpc_call(call->debug_id, rxrpc_call_error, + atomic_read(&call->usage), here, ERR_PTR(ret)); rxrpc_release_call(rx, call); mutex_unlock(&call->user_mutex); rxrpc_put_call(call, rxrpc_call_put); @@ -376,7 +377,8 @@ bool rxrpc_queue_call(struct rxrpc_call *call) if (n == 0) return false; if (rxrpc_queue_work(&call->processor)) - trace_rxrpc_call(call, rxrpc_call_queued, n + 1, here, NULL); + trace_rxrpc_call(call->debug_id, rxrpc_call_queued, n + 1, + here, NULL); else rxrpc_put_call(call, rxrpc_call_put_noqueue); return true; @@ -391,7 +393,8 @@ bool __rxrpc_queue_call(struct rxrpc_call *call) int n = atomic_read(&call->usage); ASSERTCMP(n, >=, 1); if (rxrpc_queue_work(&call->processor)) - trace_rxrpc_call(call, rxrpc_call_queued_ref, n, here, NULL); + trace_rxrpc_call(call->debug_id, rxrpc_call_queued_ref, n, + here, NULL); else rxrpc_put_call(call, rxrpc_call_put_noqueue); return true; @@ -406,7 +409,8 @@ void rxrpc_see_call(struct rxrpc_call *call) if (call) { int n = atomic_read(&call->usage); - trace_rxrpc_call(call, rxrpc_call_seen, n, here, NULL); + trace_rxrpc_call(call->debug_id, rxrpc_call_seen, n, + here, NULL); } } @@ -418,7 +422,7 @@ void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op) const void *here = __builtin_return_address(0); int n = atomic_inc_return(&call->usage); - trace_rxrpc_call(call, op, n, here, NULL); + trace_rxrpc_call(call->debug_id, op, n, here, NULL); } /* @@ -445,7 +449,8 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) _enter("{%d,%d}", call->debug_id, atomic_read(&call->usage)); - trace_rxrpc_call(call, rxrpc_call_release, atomic_read(&call->usage), + trace_rxrpc_call(call->debug_id, rxrpc_call_release, + atomic_read(&call->usage), here, (const void *)call->flags); ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); @@ -534,12 +539,13 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op) { struct rxrpc_net *rxnet = call->rxnet; const void *here = __builtin_return_address(0); + unsigned int debug_id = call->debug_id; int n; ASSERT(call != NULL); n = atomic_dec_return(&call->usage); - trace_rxrpc_call(call, op, n, here, NULL); + trace_rxrpc_call(debug_id, op, n, here, NULL); ASSERTCMP(n, >=, 0); if (n == 0) { _debug("call %d dead", call->debug_id); -- cgit v1.2.3 From ebb3b78db7bf842270a46fd4fe7cc45c78fa5ed6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 10 Oct 2019 20:17:44 -0700 Subject: tcp: annotate sk->sk_rcvbuf lockless reads For the sake of tcp_poll(), there are few places where we fetch sk->sk_rcvbuf while this field can change from IRQ or other cpu. We need to add READ_ONCE() annotations, and also make sure write sides use corresponding WRITE_ONCE() to avoid store-tearing. Note that other transports probably need similar fixes. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 4 ++-- include/trace/events/sock.h | 2 +- net/core/filter.c | 3 ++- net/core/skbuff.c | 2 +- net/core/sock.c | 5 +++-- net/ipv4/tcp.c | 4 ++-- net/ipv4/tcp_input.c | 7 ++++--- 7 files changed, 15 insertions(+), 12 deletions(-) (limited to 'include/trace') diff --git a/include/net/tcp.h b/include/net/tcp.h index e1d08f69fd39..ab4eb5eb5d07 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1380,14 +1380,14 @@ static inline int tcp_win_from_space(const struct sock *sk, int space) /* Note: caller must be prepared to deal with negative returns */ static inline int tcp_space(const struct sock *sk) { - return tcp_win_from_space(sk, sk->sk_rcvbuf - + return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - READ_ONCE(sk->sk_backlog.len) - atomic_read(&sk->sk_rmem_alloc)); } static inline int tcp_full_space(const struct sock *sk) { - return tcp_win_from_space(sk, sk->sk_rcvbuf); + return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf)); } extern void tcp_openreq_init_rwin(struct request_sock *req, diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h index a0c4b8a30966..f720c32e7dfd 100644 --- a/include/trace/events/sock.h +++ b/include/trace/events/sock.h @@ -82,7 +82,7 @@ TRACE_EVENT(sock_rcvqueue_full, TP_fast_assign( __entry->rmem_alloc = atomic_read(&sk->sk_rmem_alloc); __entry->truesize = skb->truesize; - __entry->sk_rcvbuf = sk->sk_rcvbuf; + __entry->sk_rcvbuf = READ_ONCE(sk->sk_rcvbuf); ), TP_printk("rmem_alloc=%d truesize=%u sk_rcvbuf=%d", diff --git a/net/core/filter.c b/net/core/filter.c index a50c0b6846f2..7deceaeeed7b 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4252,7 +4252,8 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, case SO_RCVBUF: val = min_t(u32, val, sysctl_rmem_max); sk->sk_userlocks |= SOCK_RCVBUF_LOCK; - sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF); + WRITE_ONCE(sk->sk_rcvbuf, + max_t(int, val * 2, SOCK_MIN_RCVBUF)); break; case SO_SNDBUF: val = min_t(u32, val, sysctl_wmem_max); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 529133611ea2..8c178703467b 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4415,7 +4415,7 @@ static void skb_set_err_queue(struct sk_buff *skb) int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb) { if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= - (unsigned int)sk->sk_rcvbuf) + (unsigned int)READ_ONCE(sk->sk_rcvbuf)) return -ENOMEM; skb_orphan(skb); diff --git a/net/core/sock.c b/net/core/sock.c index 2a053999df11..8c8f61e70141 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -831,7 +831,8 @@ set_rcvbuf: * returning the value we actually used in getsockopt * is the most desirable behavior. */ - sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF); + WRITE_ONCE(sk->sk_rcvbuf, + max_t(int, val * 2, SOCK_MIN_RCVBUF)); break; case SO_RCVBUFFORCE: @@ -3204,7 +3205,7 @@ void sk_get_meminfo(const struct sock *sk, u32 *mem) memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS); mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk); - mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf; + mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf); mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk); mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf; mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 577a8c6eef9f..bc0481aa6633 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -451,7 +451,7 @@ void tcp_init_sock(struct sock *sk) icsk->icsk_sync_mss = tcp_sync_mss; sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1]; - sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1]; + WRITE_ONCE(sk->sk_rcvbuf, sock_net(sk)->ipv4.sysctl_tcp_rmem[1]); sk_sockets_allocated_inc(sk); sk->sk_route_forced_caps = NETIF_F_GSO; @@ -1711,7 +1711,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val) val <<= 1; if (val > sk->sk_rcvbuf) { - sk->sk_rcvbuf = val; + WRITE_ONCE(sk->sk_rcvbuf, val); tcp_sk(sk)->window_clamp = tcp_win_from_space(sk, val); } return 0; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 16342e043ab3..6995df20710a 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -483,8 +483,9 @@ static void tcp_clamp_window(struct sock *sk) !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && !tcp_under_memory_pressure(sk) && sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) { - sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), - net->ipv4.sysctl_tcp_rmem[2]); + WRITE_ONCE(sk->sk_rcvbuf, + min(atomic_read(&sk->sk_rmem_alloc), + net->ipv4.sysctl_tcp_rmem[2])); } if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss); @@ -648,7 +649,7 @@ void tcp_rcv_space_adjust(struct sock *sk) rcvbuf = min_t(u64, rcvwin * rcvmem, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); if (rcvbuf > sk->sk_rcvbuf) { - sk->sk_rcvbuf = rcvbuf; + WRITE_ONCE(sk->sk_rcvbuf, rcvbuf); /* Make the window clamp follow along. */ tp->window_clamp = tcp_win_from_space(sk, rcvbuf); -- cgit v1.2.3 From ab4e846a82d0ae00176de19f2db3c5c64f8eb5f2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 10 Oct 2019 20:17:46 -0700 Subject: tcp: annotate sk->sk_wmem_queued lockless reads For the sake of tcp_poll(), there are few places where we fetch sk->sk_wmem_queued while this field can change from IRQ or other cpu. We need to add READ_ONCE() annotations, and also make sure write sides use corresponding WRITE_ONCE() to avoid store-tearing. sk_wmem_queued_add() helper is added so that we can in the future convert to ADD_ONCE() or equivalent if/when available. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 15 ++++++++++----- include/trace/events/sock.h | 2 +- net/core/datagram.c | 2 +- net/core/sock.c | 2 +- net/ipv4/inet_diag.c | 2 +- net/ipv4/tcp.c | 4 ++-- net/ipv4/tcp_output.c | 14 +++++++------- net/sched/em_meta.c | 2 +- 8 files changed, 24 insertions(+), 19 deletions(-) (limited to 'include/trace') diff --git a/include/net/sock.h b/include/net/sock.h index 3d1e7502333e..f69b58bff7e5 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -878,12 +878,17 @@ static inline bool sk_acceptq_is_full(const struct sock *sk) */ static inline int sk_stream_min_wspace(const struct sock *sk) { - return sk->sk_wmem_queued >> 1; + return READ_ONCE(sk->sk_wmem_queued) >> 1; } static inline int sk_stream_wspace(const struct sock *sk) { - return READ_ONCE(sk->sk_sndbuf) - sk->sk_wmem_queued; + return READ_ONCE(sk->sk_sndbuf) - READ_ONCE(sk->sk_wmem_queued); +} + +static inline void sk_wmem_queued_add(struct sock *sk, int val) +{ + WRITE_ONCE(sk->sk_wmem_queued, sk->sk_wmem_queued + val); } void sk_stream_write_space(struct sock *sk); @@ -1207,7 +1212,7 @@ static inline void sk_refcnt_debug_release(const struct sock *sk) static inline bool __sk_stream_memory_free(const struct sock *sk, int wake) { - if (sk->sk_wmem_queued >= READ_ONCE(sk->sk_sndbuf)) + if (READ_ONCE(sk->sk_wmem_queued) >= READ_ONCE(sk->sk_sndbuf)) return false; return sk->sk_prot->stream_memory_free ? @@ -1467,7 +1472,7 @@ DECLARE_STATIC_KEY_FALSE(tcp_tx_skb_cache_key); static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb) { sock_set_flag(sk, SOCK_QUEUE_SHRUNK); - sk->sk_wmem_queued -= skb->truesize; + sk_wmem_queued_add(sk, -skb->truesize); sk_mem_uncharge(sk, skb->truesize); if (static_branch_unlikely(&tcp_tx_skb_cache_key) && !sk->sk_tx_skb_cache && !skb_cloned(skb)) { @@ -2014,7 +2019,7 @@ static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *fro skb->len += copy; skb->data_len += copy; skb->truesize += copy; - sk->sk_wmem_queued += copy; + sk_wmem_queued_add(sk, copy); sk_mem_charge(sk, copy); return 0; } diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h index f720c32e7dfd..51fe9f6719eb 100644 --- a/include/trace/events/sock.h +++ b/include/trace/events/sock.h @@ -115,7 +115,7 @@ TRACE_EVENT(sock_exceed_buf_limit, __entry->rmem_alloc = atomic_read(&sk->sk_rmem_alloc); __entry->sysctl_wmem = sk_get_wmem0(sk, prot); __entry->wmem_alloc = refcount_read(&sk->sk_wmem_alloc); - __entry->wmem_queued = sk->sk_wmem_queued; + __entry->wmem_queued = READ_ONCE(sk->sk_wmem_queued); __entry->kind = kind; ), diff --git a/net/core/datagram.c b/net/core/datagram.c index 4cc8dc5db2b7..c210fc116103 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -640,7 +640,7 @@ int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb, skb->len += copied; skb->truesize += truesize; if (sk && sk->sk_type == SOCK_STREAM) { - sk->sk_wmem_queued += truesize; + sk_wmem_queued_add(sk, truesize); sk_mem_charge(sk, truesize); } else { refcount_add(truesize, &skb->sk->sk_wmem_alloc); diff --git a/net/core/sock.c b/net/core/sock.c index cd075bc86407..a515392ba84b 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3212,7 +3212,7 @@ void sk_get_meminfo(const struct sock *sk, u32 *mem) mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk); mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf); mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc; - mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued; + mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued); mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc); mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len); mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index bbb005eb5218..7dc79b973e6e 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -193,7 +193,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, if (ext & (1 << (INET_DIAG_MEMINFO - 1))) { struct inet_diag_meminfo minfo = { .idiag_rmem = sk_rmem_alloc_get(sk), - .idiag_wmem = sk->sk_wmem_queued, + .idiag_wmem = READ_ONCE(sk->sk_wmem_queued), .idiag_fmem = sk->sk_forward_alloc, .idiag_tmem = sk_wmem_alloc_get(sk), }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 111853262972..b2ac4f074e2d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -659,7 +659,7 @@ static void skb_entail(struct sock *sk, struct sk_buff *skb) tcb->sacked = 0; __skb_header_release(skb); tcp_add_write_queue_tail(sk, skb); - sk->sk_wmem_queued += skb->truesize; + sk_wmem_queued_add(sk, skb->truesize); sk_mem_charge(sk, skb->truesize); if (tp->nonagle & TCP_NAGLE_PUSH) tp->nonagle &= ~TCP_NAGLE_PUSH; @@ -1034,7 +1034,7 @@ new_segment: skb->len += copy; skb->data_len += copy; skb->truesize += copy; - sk->sk_wmem_queued += copy; + sk_wmem_queued_add(sk, copy); sk_mem_charge(sk, copy); skb->ip_summed = CHECKSUM_PARTIAL; WRITE_ONCE(tp->write_seq, tp->write_seq + copy); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a115a991dfb5..0488607c5cd3 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1199,7 +1199,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) WRITE_ONCE(tp->write_seq, TCP_SKB_CB(skb)->end_seq); __skb_header_release(skb); tcp_add_write_queue_tail(sk, skb); - sk->sk_wmem_queued += skb->truesize; + sk_wmem_queued_add(sk, skb->truesize); sk_mem_charge(sk, skb->truesize); } @@ -1333,7 +1333,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, return -ENOMEM; /* We'll just try again later. */ skb_copy_decrypted(buff, skb); - sk->sk_wmem_queued += buff->truesize; + sk_wmem_queued_add(sk, buff->truesize); sk_mem_charge(sk, buff->truesize); nlen = skb->len - len - nsize; buff->truesize += nlen; @@ -1443,7 +1443,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) if (delta_truesize) { skb->truesize -= delta_truesize; - sk->sk_wmem_queued -= delta_truesize; + sk_wmem_queued_add(sk, -delta_truesize); sk_mem_uncharge(sk, delta_truesize); sock_set_flag(sk, SOCK_QUEUE_SHRUNK); } @@ -1888,7 +1888,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, return -ENOMEM; skb_copy_decrypted(buff, skb); - sk->sk_wmem_queued += buff->truesize; + sk_wmem_queued_add(sk, buff->truesize); sk_mem_charge(sk, buff->truesize); buff->truesize += nlen; skb->truesize -= nlen; @@ -2152,7 +2152,7 @@ static int tcp_mtu_probe(struct sock *sk) nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false); if (!nskb) return -1; - sk->sk_wmem_queued += nskb->truesize; + sk_wmem_queued_add(sk, nskb->truesize); sk_mem_charge(sk, nskb->truesize); skb = tcp_send_head(sk); @@ -3222,7 +3222,7 @@ int tcp_send_synack(struct sock *sk) tcp_rtx_queue_unlink_and_free(skb, sk); __skb_header_release(nskb); tcp_rbtree_insert(&sk->tcp_rtx_queue, nskb); - sk->sk_wmem_queued += nskb->truesize; + sk_wmem_queued_add(sk, nskb->truesize); sk_mem_charge(sk, nskb->truesize); skb = nskb; } @@ -3447,7 +3447,7 @@ static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb) tcb->end_seq += skb->len; __skb_header_release(skb); - sk->sk_wmem_queued += skb->truesize; + sk_wmem_queued_add(sk, skb->truesize); sk_mem_charge(sk, skb->truesize); WRITE_ONCE(tp->write_seq, tcb->end_seq); tp->packets_out += tcp_skb_pcount(skb); diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 4c9122fc35c9..3177dcb17316 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -446,7 +446,7 @@ META_COLLECTOR(int_sk_wmem_queued) *err = -1; return; } - dst->value = sk->sk_wmem_queued; + dst->value = READ_ONCE(sk->sk_wmem_queued); } META_COLLECTOR(int_sk_fwd_alloc) -- cgit v1.2.3