From ef84d8ce5a36d0c4a6454e7e9dff54d19f96a25f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 14 Oct 2015 11:16:26 -0700 Subject: [PATCH 1/3] Revert "inet: fix double request socket freeing" This reverts commit c69736696cf3742b37d850289dc0d7ead177bb14. At the time of above commit, tcp_req_err() and dccp_req_err() were dead code, as SYN_RECV request sockets were not yet in ehash table. Real bug was fixed later in a different commit. We need to revert to not leak a refcount on request socket. inet_csk_reqsk_queue_drop_and_put() will be added in following commit to make clean inet_csk_reqsk_queue_drop() does not release the reference owned by caller. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 2 +- net/ipv4/tcp_ipv4.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 0dcf1963b323..644af510d932 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -208,7 +208,6 @@ void dccp_req_err(struct sock *sk, u64 seq) if (!between48(seq, dccp_rsk(req)->dreq_iss, dccp_rsk(req)->dreq_gss)) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); - reqsk_put(req); } else { /* * Still in RESPOND, just remove it silently. @@ -218,6 +217,7 @@ void dccp_req_err(struct sock *sk, u64 seq) */ inet_csk_reqsk_queue_drop(req->rsk_listener, req); } + reqsk_put(req); } EXPORT_SYMBOL(dccp_req_err); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1ff0923df715..aad2298de7ad 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -324,7 +324,6 @@ void tcp_req_err(struct sock *sk, u32 seq) if (seq != tcp_rsk(req)->snt_isn) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); - reqsk_put(req); } else { /* * Still in SYN_RECV, just remove it silently. @@ -332,9 +331,10 @@ void tcp_req_err(struct sock *sk, u32 seq) * created socket, and POSIX does not want network * errors returned from accept(). */ - NET_INC_STATS_BH(net, LINUX_MIB_LISTENDROPS); inet_csk_reqsk_queue_drop(req->rsk_listener, req); + NET_INC_STATS_BH(net, LINUX_MIB_LISTENDROPS); } + reqsk_put(req); } EXPORT_SYMBOL(tcp_req_err); From f03f2e154f52fdaa982de7e2c386737679963dc9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 14 Oct 2015 11:16:27 -0700 Subject: [PATCH 2/3] tcp/dccp: add inet_csk_reqsk_queue_drop_and_put() helper Let's reduce the confusion about inet_csk_reqsk_queue_drop() : In many cases we also need to release reference on request socket, so add a helper to do this, reducing code size and complexity. Fixes: 4bdc3d66147b ("tcp/dccp: fix behavior of stale SYN_RECV request sockets") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 1 + net/dccp/ipv4.c | 2 +- net/dccp/ipv6.c | 2 +- net/ipv4/inet_connection_sock.c | 10 ++++++++-- net/ipv4/tcp_ipv4.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- 6 files changed, 13 insertions(+), 6 deletions(-) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index fd645c49e71e..e84ea9f2498f 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -299,6 +299,7 @@ static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk) } void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req); +void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req); void inet_csk_destroy_sock(struct sock *sk); void inet_csk_prepare_forced_close(struct sock *sk); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 644af510d932..59bc180b02d8 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -828,7 +828,7 @@ lookup: if (likely(sk->sk_state == DCCP_LISTEN)) { nsk = dccp_check_req(sk, skb, req); } else { - inet_csk_reqsk_queue_drop(sk, req); + inet_csk_reqsk_queue_drop_and_put(sk, req); goto lookup; } if (!nsk) { diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 68831931b1fe..d9cc731f2619 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -686,7 +686,7 @@ lookup: if (likely(sk->sk_state == DCCP_LISTEN)) { nsk = dccp_check_req(sk, skb, req); } else { - inet_csk_reqsk_queue_drop(sk, req); + inet_csk_reqsk_queue_drop_and_put(sk, req); goto lookup; } if (!nsk) { diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index ba9ec9a0d0ce..b85c720956a9 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -546,6 +546,13 @@ void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req) } EXPORT_SYMBOL(inet_csk_reqsk_queue_drop); +void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req) +{ + inet_csk_reqsk_queue_drop(sk, req); + reqsk_put(req); +} +EXPORT_SYMBOL(inet_csk_reqsk_queue_drop_and_put); + static void reqsk_timer_handler(unsigned long data) { struct request_sock *req = (struct request_sock *)data; @@ -608,8 +615,7 @@ static void reqsk_timer_handler(unsigned long data) return; } drop: - inet_csk_reqsk_queue_drop(sk_listener, req); - reqsk_put(req); + inet_csk_reqsk_queue_drop_and_put(sk_listener, req); } static void reqsk_queue_hash_req(struct request_sock *req, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index aad2298de7ad..9c68cf3762c4 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1591,7 +1591,7 @@ process: if (likely(sk->sk_state == TCP_LISTEN)) { nsk = tcp_check_req(sk, skb, req, false); } else { - inet_csk_reqsk_queue_drop(sk, req); + inet_csk_reqsk_queue_drop_and_put(sk, req); goto lookup; } if (!nsk) { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 7ce1c57199d1..acb06f86f372 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1386,7 +1386,7 @@ process: if (likely(sk->sk_state == TCP_LISTEN)) { nsk = tcp_check_req(sk, skb, req, false); } else { - inet_csk_reqsk_queue_drop(sk, req); + inet_csk_reqsk_queue_drop_and_put(sk, req); goto lookup; } if (!nsk) { From ebb516af60e18258aac8e80bbe068740ef1579ed Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 14 Oct 2015 11:16:28 -0700 Subject: [PATCH 3/3] tcp/dccp: fix race at listener dismantle phase Under stress, a close() on a listener can trigger the WARN_ON(sk->sk_ack_backlog) in inet_csk_listen_stop() We need to test if listener is still active before queueing a child in inet_csk_reqsk_queue_add() Create a common inet_child_forget() helper, and use it from inet_csk_reqsk_queue_add() and inet_csk_listen_stop() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 9 +--- include/net/request_sock.h | 19 -------- net/ipv4/inet_connection_sock.c | 71 +++++++++++++++++++++--------- 3 files changed, 51 insertions(+), 48 deletions(-) diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index e84ea9f2498f..63615709839d 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -268,13 +268,8 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, struct sock *newsk, const struct request_sock *req); -static inline void inet_csk_reqsk_queue_add(struct sock *sk, - struct request_sock *req, - struct sock *child) -{ - reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child); -} - +void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req, + struct sock *child); void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, unsigned long timeout); diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 2e73748956d5..a0dde04eb178 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -186,25 +186,6 @@ static inline bool reqsk_queue_empty(const struct request_sock_queue *queue) return queue->rskq_accept_head == NULL; } -static inline void reqsk_queue_add(struct request_sock_queue *queue, - struct request_sock *req, - struct sock *parent, - struct sock *child) -{ - spin_lock(&queue->rskq_lock); - req->sk = child; - sk_acceptq_added(parent); - - if (queue->rskq_accept_head == NULL) - queue->rskq_accept_head = req; - else - queue->rskq_accept_tail->dl_next = req; - - queue->rskq_accept_tail = req; - req->dl_next = NULL; - spin_unlock(&queue->rskq_lock); -} - static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue *queue, struct sock *parent) { diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index b85c720956a9..8430bc8ccd58 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -764,6 +764,53 @@ int inet_csk_listen_start(struct sock *sk, int backlog) } EXPORT_SYMBOL_GPL(inet_csk_listen_start); +static void inet_child_forget(struct sock *sk, struct request_sock *req, + struct sock *child) +{ + sk->sk_prot->disconnect(child, O_NONBLOCK); + + sock_orphan(child); + + percpu_counter_inc(sk->sk_prot->orphan_count); + + if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) { + BUG_ON(tcp_sk(child)->fastopen_rsk != req); + BUG_ON(sk != req->rsk_listener); + + /* Paranoid, to prevent race condition if + * an inbound pkt destined for child is + * blocked by sock lock in tcp_v4_rcv(). + * Also to satisfy an assertion in + * tcp_v4_destroy_sock(). + */ + tcp_sk(child)->fastopen_rsk = NULL; + } + inet_csk_destroy_sock(child); + reqsk_put(req); +} + +void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req, + struct sock *child) +{ + struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; + + spin_lock(&queue->rskq_lock); + if (unlikely(sk->sk_state != TCP_LISTEN)) { + inet_child_forget(sk, req, child); + } else { + req->sk = child; + req->dl_next = NULL; + if (queue->rskq_accept_head == NULL) + queue->rskq_accept_head = req; + else + queue->rskq_accept_tail->dl_next = req; + queue->rskq_accept_tail = req; + sk_acceptq_added(sk); + } + spin_unlock(&queue->rskq_lock); +} +EXPORT_SYMBOL(inet_csk_reqsk_queue_add); + /* * This routine closes sockets which have been at least partially * opened, but not yet accepted. @@ -790,31 +837,11 @@ void inet_csk_listen_stop(struct sock *sk) WARN_ON(sock_owned_by_user(child)); sock_hold(child); - sk->sk_prot->disconnect(child, O_NONBLOCK); - - sock_orphan(child); - - percpu_counter_inc(sk->sk_prot->orphan_count); - - if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) { - BUG_ON(tcp_sk(child)->fastopen_rsk != req); - BUG_ON(sk != req->rsk_listener); - - /* Paranoid, to prevent race condition if - * an inbound pkt destined for child is - * blocked by sock lock in tcp_v4_rcv(). - * Also to satisfy an assertion in - * tcp_v4_destroy_sock(). - */ - tcp_sk(child)->fastopen_rsk = NULL; - } - inet_csk_destroy_sock(child); - + inet_child_forget(sk, req, child); bh_unlock_sock(child); local_bh_enable(); sock_put(child); - reqsk_put(req); cond_resched(); } if (queue->fastopenq.rskq_rst_head) { @@ -829,7 +856,7 @@ void inet_csk_listen_stop(struct sock *sk) req = next; } } - WARN_ON(sk->sk_ack_backlog); + WARN_ON_ONCE(sk->sk_ack_backlog); } EXPORT_SYMBOL_GPL(inet_csk_listen_stop);