From b290098092e4aeaa1712d3326bf5b64d2751c740 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Mon, 21 Oct 2019 16:13:08 +0200 Subject: [PATCH 1/8] net/smc: cancel send and receive for terminated socket The resources for a terminated socket are being cleaned up. This patch makes sure * no more data is received for an actively terminated socket * no more data is sent for an actively or passively terminated socket Signed-off-by: Ursula Braun Signed-off-by: Karsten Graul Signed-off-by: Jakub Kicinski --- net/smc/smc.h | 1 + net/smc/smc_cdc.c | 4 ++-- net/smc/smc_close.c | 7 +++++-- net/smc/smc_core.c | 1 + net/smc/smc_rx.c | 10 ++++++++-- net/smc/smc_tx.c | 26 +++++++++++++++----------- 6 files changed, 32 insertions(+), 17 deletions(-) diff --git a/net/smc/smc.h b/net/smc/smc.h index 878313f8d6c1..be11ba41190f 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -188,6 +188,7 @@ struct smc_connection { * 0 for SMC-R, 32 for SMC-D */ u64 peer_token; /* SMC-D token of peer */ + u8 killed : 1; /* abnormal termination */ }; struct smc_sock { /* smc sock container */ diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index d0b0f4c865b4..7dc07ec2379b 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -63,7 +63,7 @@ int smc_cdc_get_free_slot(struct smc_connection *conn, rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf, wr_rdma_buf, (struct smc_wr_tx_pend_priv **)pend); - if (!conn->alert_token_local) + if (conn->killed) /* abnormal termination */ rc = -EPIPE; return rc; @@ -328,7 +328,7 @@ static void smcd_cdc_rx_tsklet(unsigned long data) struct smcd_cdc_msg cdc; struct smc_sock *smc; - if (!conn) + if (!conn || conn->killed) return; data_cdc = (struct smcd_cdc_msg *)conn->rmb_desc->cpu_addr; diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index 1a858e59fc31..1d706c581592 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -66,7 +66,8 @@ static void smc_close_stream_wait(struct smc_sock *smc, long timeout) rc = sk_wait_event(sk, &timeout, !smc_tx_prepared_sends(&smc->conn) || sk->sk_err == ECONNABORTED || - sk->sk_err == ECONNRESET, + sk->sk_err == ECONNRESET || + smc->conn.killed, &wait); if (rc) break; @@ -95,6 +96,8 @@ static int smc_close_final(struct smc_connection *conn) conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; else conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1; + if (conn->killed) + return -EPIPE; return smc_cdc_get_slot_and_msg_send(conn); } @@ -326,7 +329,7 @@ static void smc_close_passive_work(struct work_struct *work) lock_sock(sk); old_state = sk->sk_state; - if (!conn->alert_token_local) { + if (conn->killed) { /* abnormal termination */ smc_close_active_abort(smc); goto wakeup; diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index de9bf035f545..4ee0e33b8c5a 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -500,6 +500,7 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr) conn = rb_entry(node, struct smc_connection, alert_node); smc = container_of(conn, struct smc_sock, conn); sock_hold(&smc->sk); /* sock_put in close work */ + conn->killed = 1; conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; __smc_lgr_unregister_conn(conn); conn->lgr = NULL; diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c index 97e8369002d7..39d7b34d06d2 100644 --- a/net/smc/smc_rx.c +++ b/net/smc/smc_rx.c @@ -201,6 +201,8 @@ int smc_rx_wait(struct smc_sock *smc, long *timeo, { DEFINE_WAIT_FUNC(wait, woken_wake_function); struct smc_connection *conn = &smc->conn; + struct smc_cdc_conn_state_flags *cflags = + &conn->local_tx_ctrl.conn_state_flags; struct sock *sk = &smc->sk; int rc; @@ -210,7 +212,9 @@ int smc_rx_wait(struct smc_sock *smc, long *timeo, add_wait_queue(sk_sleep(sk), &wait); rc = sk_wait_event(sk, timeo, sk->sk_err || + cflags->peer_conn_abort || sk->sk_shutdown & RCV_SHUTDOWN || + conn->killed || fcrit(conn), &wait); remove_wait_queue(sk_sleep(sk), &wait); @@ -314,11 +318,13 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, if (read_done >= target || (pipe && read_done)) break; + if (conn->killed) + break; + if (smc_rx_recvmsg_data_available(smc)) goto copy; - if (sk->sk_shutdown & RCV_SHUTDOWN || - conn->local_tx_ctrl.conn_state_flags.peer_conn_abort) { + if (sk->sk_shutdown & RCV_SHUTDOWN) { /* smc_cdc_msg_recv_action() could have run after * above smc_rx_recvmsg_data_available() */ diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index 6c8f09c1ce51..824f096ee7de 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -86,6 +86,7 @@ static int smc_tx_wait(struct smc_sock *smc, int flags) sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN) || + conn->killed || conn->local_tx_ctrl.conn_state_flags.peer_done_writing) { rc = -EPIPE; break; @@ -155,7 +156,7 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len) return -ENOTCONN; if (smc->sk.sk_shutdown & SEND_SHUTDOWN || (smc->sk.sk_err == ECONNABORTED) || - conn->local_tx_ctrl.conn_state_flags.peer_conn_abort) + conn->killed) return -EPIPE; if (smc_cdc_rxed_any_close(conn)) return send_done ?: -ECONNRESET; @@ -282,10 +283,8 @@ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset, peer_rmbe_offset; rdma_wr->rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey; rc = ib_post_send(link->roce_qp, &rdma_wr->wr, NULL); - if (rc) { - conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; + if (rc) smc_lgr_terminate(lgr); - } return rc; } @@ -495,10 +494,11 @@ static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn) if (smc->sk.sk_err == ECONNABORTED) return sock_error(&smc->sk); + if (conn->killed) + return -EPIPE; rc = 0; - if (conn->alert_token_local) /* connection healthy */ - mod_delayed_work(system_wq, &conn->tx_work, - SMC_TX_WORK_DELAY); + mod_delayed_work(system_wq, &conn->tx_work, + SMC_TX_WORK_DELAY); } return rc; } @@ -547,6 +547,9 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn) { int rc; + if (conn->killed || + conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) + return -EPIPE; /* connection being aborted */ if (conn->lgr->is_smcd) rc = smcd_tx_sndbuf_nonempty(conn); else @@ -573,9 +576,7 @@ void smc_tx_work(struct work_struct *work) int rc; lock_sock(&smc->sk); - if (smc->sk.sk_err || - !conn->alert_token_local || - conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) + if (smc->sk.sk_err) goto out; rc = smc_tx_sndbuf_nonempty(conn); @@ -608,8 +609,11 @@ void smc_tx_consumer_update(struct smc_connection *conn, bool force) ((to_confirm > conn->rmbe_update_limit) && ((sender_free <= (conn->rmb_desc->len / 2)) || conn->local_rx_ctrl.prod_flags.write_blocked))) { + if (conn->killed || + conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) + return; if ((smc_cdc_get_slot_and_msg_send(conn) < 0) && - conn->alert_token_local) { /* connection healthy */ + !conn->killed) { schedule_delayed_work(&conn->tx_work, SMC_TX_WORK_DELAY); return; From 8caa654451bda40379bff786a63833b2965536e4 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Mon, 21 Oct 2019 16:13:09 +0200 Subject: [PATCH 2/8] net/smc: terminate link group without holding lgr lock When a link group is to be terminated, it is sufficient to hold the lgr lock when unlinking the link group from its list. Move the lock-protected link group unlinking into smc_lgr_terminate(). Signed-off-by: Ursula Braun Signed-off-by: Karsten Graul Signed-off-by: Jakub Kicinski --- net/smc/smc_core.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 4ee0e33b8c5a..b53ba8f0a833 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -182,8 +182,7 @@ static void smc_lgr_free_work(struct work_struct *work) spin_unlock_bh(lgr_lock); return; } - if (!list_empty(&lgr->list)) - list_del_init(&lgr->list); /* remove from smc_lgr_list */ + list_del_init(&lgr->list); /* remove from smc_lgr_list */ spin_unlock_bh(lgr_lock); if (!lgr->is_smcd && !lgr->terminating) { @@ -479,7 +478,7 @@ void smc_lgr_forget(struct smc_link_group *lgr) spin_unlock_bh(lgr_lock); } -/* terminate linkgroup abnormally */ +/* terminate link group */ static void __smc_lgr_terminate(struct smc_link_group *lgr) { struct smc_connection *conn; @@ -489,8 +488,6 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr) if (lgr->terminating) return; /* lgr already terminating */ lgr->terminating = 1; - if (!list_empty(&lgr->list)) /* forget lgr */ - list_del_init(&lgr->list); if (!lgr->is_smcd) smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]); @@ -516,29 +513,41 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr) smc_lgr_schedule_free_work(lgr); } +/* unlink and terminate link group */ void smc_lgr_terminate(struct smc_link_group *lgr) { spinlock_t *lgr_lock; smc_lgr_list_head(lgr, &lgr_lock); spin_lock_bh(lgr_lock); - __smc_lgr_terminate(lgr); + if (lgr->terminating) { + spin_unlock_bh(lgr_lock); + return; /* lgr already terminating */ + } + list_del_init(&lgr->list); spin_unlock_bh(lgr_lock); + __smc_lgr_terminate(lgr); } /* Called when IB port is terminated */ void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport) { struct smc_link_group *lgr, *l; + LIST_HEAD(lgr_free_list); spin_lock_bh(&smc_lgr_list.lock); list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) { if (!lgr->is_smcd && lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev && lgr->lnk[SMC_SINGLE_LINK].ibport == ibport) - __smc_lgr_terminate(lgr); + list_move(&lgr->list, &lgr_free_list); } spin_unlock_bh(&smc_lgr_list.lock); + + list_for_each_entry_safe(lgr, l, &lgr_free_list, list) { + list_del_init(&lgr->list); + __smc_lgr_terminate(lgr); + } } /* Called when SMC-D device is terminated or peer is lost */ @@ -552,7 +561,6 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan) list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) { if ((!peer_gid || lgr->peer_gid == peer_gid) && (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) { - __smc_lgr_terminate(lgr); list_move(&lgr->list, &lgr_free_list); } } @@ -561,6 +569,7 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan) /* cancel the regular free workers and actually free lgrs */ list_for_each_entry_safe(lgr, l, &lgr_free_list, list) { list_del_init(&lgr->list); + __smc_lgr_terminate(lgr); cancel_delayed_work_sync(&lgr->free_work); if (!peer_gid && vlan == VLAN_VID_MASK) /* dev terminated? */ smc_ism_signal_shutdown(lgr); From 69318b5215f2dc32c345a3d65b98b4b1bf29c007 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Mon, 21 Oct 2019 16:13:10 +0200 Subject: [PATCH 3/8] net/smc: improve abnormal termination locking Locking hierarchy requires that the link group conns_lock can be taken if the socket lock is held, but not vice versa. Nevertheless socket termination during abnormal link group termination should be protected by the socket lock. This patch reduces the time segments the link group conns_lock is held to enable usage of lock_sock in smc_lgr_terminate(). Signed-off-by: Ursula Braun Signed-off-by: Karsten Graul Signed-off-by: Jakub Kicinski --- net/smc/smc_core.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index b53ba8f0a833..1f58cd82928c 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -491,23 +491,26 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr) if (!lgr->is_smcd) smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]); - write_lock_bh(&lgr->conns_lock); + /* kill remaining link group connections */ + read_lock_bh(&lgr->conns_lock); node = rb_first(&lgr->conns_all); while (node) { + read_unlock_bh(&lgr->conns_lock); conn = rb_entry(node, struct smc_connection, alert_node); smc = container_of(conn, struct smc_sock, conn); + lock_sock(&smc->sk); sock_hold(&smc->sk); /* sock_put in close work */ conn->killed = 1; conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; - __smc_lgr_unregister_conn(conn); + smc_lgr_unregister_conn(conn); conn->lgr = NULL; - write_unlock_bh(&lgr->conns_lock); if (!schedule_work(&conn->close_work)) sock_put(&smc->sk); - write_lock_bh(&lgr->conns_lock); + release_sock(&smc->sk); + read_lock_bh(&lgr->conns_lock); node = rb_first(&lgr->conns_all); } - write_unlock_bh(&lgr->conns_lock); + read_unlock_bh(&lgr->conns_lock); if (!lgr->is_smcd) wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait); smc_lgr_schedule_free_work(lgr); From 8e316b9e7260cbc61974c2558733dab5de949399 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Mon, 21 Oct 2019 16:13:11 +0200 Subject: [PATCH 4/8] net/smc: improve link group freeing Usually link groups are freed delayed to enable quick connection creation for a follow-on SMC socket. Terminated link groups are freed faster. This patch makes sure, fast schedule of link group freeing is not rescheduled by a delayed schedule. And it makes sure link group freeing is not rescheduled, if the real freeing is already running. Signed-off-by: Ursula Braun Signed-off-by: Karsten Graul Signed-off-by: Jakub Kicinski --- net/smc/smc_core.c | 47 +++++++++++++++++++++++++++++----------------- net/smc/smc_core.h | 2 ++ 2 files changed, 32 insertions(+), 17 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 1f58cd82928c..e7e9dbcd7d8b 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -61,14 +61,21 @@ static void smc_lgr_schedule_free_work(struct smc_link_group *lgr) * creation. For client use a somewhat higher removal delay time, * otherwise there is a risk of out-of-sync link groups. */ - mod_delayed_work(system_wq, &lgr->free_work, - (!lgr->is_smcd && lgr->role == SMC_CLNT) ? - SMC_LGR_FREE_DELAY_CLNT : SMC_LGR_FREE_DELAY_SERV); + if (!lgr->freeing && !lgr->freefast) { + mod_delayed_work(system_wq, &lgr->free_work, + (!lgr->is_smcd && lgr->role == SMC_CLNT) ? + SMC_LGR_FREE_DELAY_CLNT : + SMC_LGR_FREE_DELAY_SERV); + } } void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr) { - mod_delayed_work(system_wq, &lgr->free_work, SMC_LGR_FREE_DELAY_FAST); + if (!lgr->freeing && !lgr->freefast) { + lgr->freefast = 1; + mod_delayed_work(system_wq, &lgr->free_work, + SMC_LGR_FREE_DELAY_FAST); + } } /* Register connection's alert token in our lookup structure. @@ -171,10 +178,15 @@ static void smc_lgr_free_work(struct work_struct *work) struct smc_link_group, free_work); spinlock_t *lgr_lock; + struct smc_link *lnk; bool conns; smc_lgr_list_head(lgr, &lgr_lock); spin_lock_bh(lgr_lock); + if (lgr->freeing) { + spin_unlock_bh(lgr_lock); + return; + } read_lock_bh(&lgr->conns_lock); conns = RB_EMPTY_ROOT(&lgr->conns_all); read_unlock_bh(&lgr->conns_lock); @@ -183,29 +195,27 @@ static void smc_lgr_free_work(struct work_struct *work) return; } list_del_init(&lgr->list); /* remove from smc_lgr_list */ - spin_unlock_bh(lgr_lock); + lnk = &lgr->lnk[SMC_SINGLE_LINK]; if (!lgr->is_smcd && !lgr->terminating) { - struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; - /* try to send del link msg, on error free lgr immediately */ if (lnk->state == SMC_LNK_ACTIVE && !smc_link_send_delete(lnk)) { /* reschedule in case we never receive a response */ smc_lgr_schedule_free_work(lgr); + spin_unlock_bh(lgr_lock); return; } } + lgr->freeing = 1; /* this instance does the freeing, no new schedule */ + spin_unlock_bh(lgr_lock); + cancel_delayed_work(&lgr->free_work); - if (!delayed_work_pending(&lgr->free_work)) { - struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; - - if (!lgr->is_smcd && lnk->state != SMC_LNK_INACTIVE) - smc_llc_link_inactive(lnk); - if (lgr->is_smcd) - smc_ism_signal_shutdown(lgr); - smc_lgr_free(lgr); - } + if (!lgr->is_smcd && lnk->state != SMC_LNK_INACTIVE) + smc_llc_link_inactive(lnk); + if (lgr->is_smcd) + smc_ism_signal_shutdown(lgr); + smc_lgr_free(lgr); } /* create a new SMC link group */ @@ -233,6 +243,9 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) } lgr->is_smcd = ini->is_smcd; lgr->sync_err = 0; + lgr->terminating = 0; + lgr->freefast = 0; + lgr->freeing = 0; lgr->vlan_id = ini->vlan_id; rwlock_init(&lgr->sndbufs_lock); rwlock_init(&lgr->rmbs_lock); @@ -513,7 +526,7 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr) read_unlock_bh(&lgr->conns_lock); if (!lgr->is_smcd) wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait); - smc_lgr_schedule_free_work(lgr); + smc_lgr_schedule_free_work_fast(lgr); } /* unlink and terminate link group */ diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index c00ac61dc129..12c2818b293f 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -204,6 +204,8 @@ struct smc_link_group { struct delayed_work free_work; /* delayed freeing of an lgr */ u8 sync_err : 1; /* lgr no longer fits to peer */ u8 terminating : 1;/* lgr is terminating */ + u8 freefast : 1; /* free worker scheduled fast */ + u8 freeing : 1; /* lgr is being freed */ bool is_smcd; /* SMC-R or SMC-D */ union { From 8317976096635110603c3e143bcaf8773f4a3e65 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Mon, 21 Oct 2019 16:13:12 +0200 Subject: [PATCH 5/8] net/smc: tell peers about abnormal link group termination There are lots of link group termination scenarios. Most of them still allow to inform the peer of the terminating sockets about aborting. This patch tries to call smc_close_abort() for terminating sockets. And the internal TCP socket is reset with tcp_abort(). Signed-off-by: Ursula Braun Signed-off-by: Karsten Graul Signed-off-by: Jakub Kicinski --- net/smc/smc_close.c | 9 ++++----- net/smc/smc_close.h | 1 + net/smc/smc_core.c | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index 1d706c581592..2bbcd45a421e 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -13,6 +13,7 @@ #include #include +#include #include "smc.h" #include "smc_tx.h" @@ -102,7 +103,7 @@ static int smc_close_final(struct smc_connection *conn) return smc_cdc_get_slot_and_msg_send(conn); } -static int smc_close_abort(struct smc_connection *conn) +int smc_close_abort(struct smc_connection *conn) { conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; @@ -118,10 +119,8 @@ static void smc_close_active_abort(struct smc_sock *smc) if (sk->sk_state != SMC_INIT && smc->clcsock && smc->clcsock->sk) { sk->sk_err = ECONNABORTED; - if (smc->clcsock && smc->clcsock->sk) { - smc->clcsock->sk->sk_err = ECONNABORTED; - smc->clcsock->sk->sk_state_change(smc->clcsock->sk); - } + if (smc->clcsock && smc->clcsock->sk) + tcp_abort(smc->clcsock->sk, ECONNABORTED); } switch (sk->sk_state) { case SMC_ACTIVE: diff --git a/net/smc/smc_close.h b/net/smc/smc_close.h index e0e3b5df25d2..084c4f37aa96 100644 --- a/net/smc/smc_close.h +++ b/net/smc/smc_close.h @@ -24,5 +24,6 @@ int smc_close_active(struct smc_sock *smc); int smc_close_shutdown_write(struct smc_sock *smc); void smc_close_init(struct smc_sock *smc); void smc_clcsock_release(struct smc_sock *smc); +int smc_close_abort(struct smc_connection *conn); #endif /* SMC_CLOSE_H */ diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index e7e9dbcd7d8b..494288f32df6 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -513,8 +513,8 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr) smc = container_of(conn, struct smc_sock, conn); lock_sock(&smc->sk); sock_hold(&smc->sk); /* sock_put in close work */ + smc_close_abort(conn); conn->killed = 1; - conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; smc_lgr_unregister_conn(conn); conn->lgr = NULL; if (!schedule_work(&conn->close_work)) From 2a0674fffb6bc1a7c0f46bb2e0b1bcf1d49c2232 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Mon, 21 Oct 2019 16:13:13 +0200 Subject: [PATCH 6/8] net/smc: improve abnormal termination of link groups If a link group and its connections must be terminated, * wake up socket waiters * do not enable buffer reuse A linkgroup might be terminated while normal connection closing is running. Avoid buffer reuse and its related LLC DELETE RKEY call, if linkgroup termination has started. And use the earliest indication of linkgroup termination possible, namely the removal from the linkgroup list. Signed-off-by: Ursula Braun Signed-off-by: Karsten Graul Signed-off-by: Jakub Kicinski --- net/smc/smc_core.c | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 494288f32df6..6faaa38412b1 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -154,6 +154,7 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn) __smc_lgr_unregister_conn(conn); } write_unlock_bh(&lgr->conns_lock); + conn->lgr = NULL; } /* Send delete link, either as client to request the initiation @@ -344,7 +345,7 @@ static void smc_buf_unuse(struct smc_connection *conn, conn->sndbuf_desc->used = 0; if (conn->rmb_desc) { if (!conn->rmb_desc->regerr) { - if (!lgr->is_smcd) { + if (!lgr->is_smcd && !list_empty(&lgr->list)) { /* unregister rmb with peer */ smc_llc_do_delete_rkey( &lgr->lnk[SMC_SINGLE_LINK], @@ -375,9 +376,10 @@ void smc_conn_free(struct smc_connection *conn) } else { smc_cdc_tx_dismiss_slots(conn); } - smc_lgr_unregister_conn(conn); - smc_buf_unuse(conn, lgr); /* allow buffer reuse */ - conn->lgr = NULL; + if (!list_empty(&lgr->list)) { + smc_lgr_unregister_conn(conn); + smc_buf_unuse(conn, lgr); /* allow buffer reuse */ + } if (!lgr->conns_num) smc_lgr_schedule_free_work(lgr); @@ -491,6 +493,28 @@ void smc_lgr_forget(struct smc_link_group *lgr) spin_unlock_bh(lgr_lock); } +static void smc_sk_wake_ups(struct smc_sock *smc) +{ + smc->sk.sk_write_space(&smc->sk); + smc->sk.sk_data_ready(&smc->sk); + smc->sk.sk_state_change(&smc->sk); +} + +/* kill a connection */ +static void smc_conn_kill(struct smc_connection *conn) +{ + struct smc_sock *smc = container_of(conn, struct smc_sock, conn); + + smc_close_abort(conn); + conn->killed = 1; + smc_sk_wake_ups(smc); + smc_lgr_unregister_conn(conn); + smc->sk.sk_err = ECONNABORTED; + sock_hold(&smc->sk); /* sock_put in close work */ + if (!schedule_work(&conn->close_work)) + sock_put(&smc->sk); +} + /* terminate link group */ static void __smc_lgr_terminate(struct smc_link_group *lgr) { @@ -512,13 +536,7 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr) conn = rb_entry(node, struct smc_connection, alert_node); smc = container_of(conn, struct smc_sock, conn); lock_sock(&smc->sk); - sock_hold(&smc->sk); /* sock_put in close work */ - smc_close_abort(conn); - conn->killed = 1; - smc_lgr_unregister_conn(conn); - conn->lgr = NULL; - if (!schedule_work(&conn->close_work)) - sock_put(&smc->sk); + smc_conn_kill(conn); release_sock(&smc->sk); read_lock_bh(&lgr->conns_lock); node = rb_first(&lgr->conns_all); From f528ba24a8ad61b8a5e55d34cb1da127ce67cf6e Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Mon, 21 Oct 2019 16:13:14 +0200 Subject: [PATCH 7/8] net/smc: introduce link group termination worker Use a worker for link group termination to guarantee process context. Signed-off-by: Ursula Braun Signed-off-by: Karsten Graul Signed-off-by: Jakub Kicinski --- net/smc/smc_core.c | 9 +++++++++ net/smc/smc_core.h | 7 +++++++ net/smc/smc_llc.c | 2 +- net/smc/smc_wr.c | 10 +++++----- 4 files changed, 22 insertions(+), 6 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 6faaa38412b1..46d4b944c4c4 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -219,6 +219,14 @@ static void smc_lgr_free_work(struct work_struct *work) smc_lgr_free(lgr); } +static void smc_lgr_terminate_work(struct work_struct *work) +{ + struct smc_link_group *lgr = container_of(work, struct smc_link_group, + terminate_work); + + smc_lgr_terminate(lgr); +} + /* create a new SMC link group */ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) { @@ -258,6 +266,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) smc_lgr_list.num += SMC_LGR_NUM_INCR; memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE); INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work); + INIT_WORK(&lgr->terminate_work, smc_lgr_terminate_work); lgr->conns_all = RB_ROOT; if (ini->is_smcd) { /* SMC-D specific settings */ diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 12c2818b293f..e6fd1ed42064 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -202,6 +202,7 @@ struct smc_link_group { u8 id[SMC_LGR_ID_SIZE]; /* unique lgr id */ struct delayed_work free_work; /* delayed freeing of an lgr */ + struct work_struct terminate_work; /* abnormal lgr termination */ u8 sync_err : 1; /* lgr no longer fits to peer */ u8 terminating : 1;/* lgr is terminating */ u8 freefast : 1; /* free worker scheduled fast */ @@ -282,6 +283,12 @@ static inline struct smc_connection *smc_lgr_find_conn( return res; } +static inline void smc_lgr_terminate_sched(struct smc_link_group *lgr) +{ + if (!lgr->terminating) + schedule_work(&lgr->terminate_work); +} + struct smc_sock; struct smc_clc_msg_accept_confirm; struct smc_clc_msg_local; diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 4fd60c522802..e1918ffaf125 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -475,7 +475,7 @@ static void smc_llc_rx_delete_link(struct smc_link *link, smc_llc_prep_delete_link(llc, link, SMC_LLC_RESP, true); } smc_llc_send_message(link, llc, sizeof(*llc)); - smc_lgr_schedule_free_work_fast(lgr); + smc_lgr_terminate_sched(lgr); } } diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index 253aa75dc2b6..50743dc56c86 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -101,7 +101,7 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc) clear_bit(i, link->wr_tx_mask); } /* terminate connections of this link group abnormally */ - smc_lgr_terminate(smc_get_lgr(link)); + smc_lgr_terminate_sched(smc_get_lgr(link)); } if (pnd_snd.handler) pnd_snd.handler(&pnd_snd.priv, link, wc->status); @@ -191,7 +191,7 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, SMC_WR_TX_WAIT_FREE_SLOT_TIME); if (!rc) { /* timeout - terminate connections */ - smc_lgr_terminate(smc_get_lgr(link)); + smc_lgr_terminate_sched(smc_get_lgr(link)); return -EPIPE; } if (idx == link->wr_tx_cnt) @@ -247,7 +247,7 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv) rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx], NULL); if (rc) { smc_wr_tx_put_slot(link, priv); - smc_lgr_terminate(smc_get_lgr(link)); + smc_lgr_terminate_sched(smc_get_lgr(link)); } return rc; } @@ -272,7 +272,7 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr) SMC_WR_REG_MR_WAIT_TIME); if (!rc) { /* timeout - terminate connections */ - smc_lgr_terminate(smc_get_lgr(link)); + smc_lgr_terminate_sched(smc_get_lgr(link)); return -EPIPE; } if (rc == -ERESTARTSYS) @@ -373,7 +373,7 @@ static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num) /* terminate connections of this link group * abnormally */ - smc_lgr_terminate(smc_get_lgr(link)); + smc_lgr_terminate_sched(smc_get_lgr(link)); break; default: smc_wr_rx_post(link); /* refill WR RX */ From 81cf4f4707af9704ac1c3dd177c8bd1fcc01da6c Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Mon, 21 Oct 2019 16:13:15 +0200 Subject: [PATCH 8/8] net/smc: remove close abort worker With the introduction of the link group termination worker there is no longer a need to postpone smc_close_active_abort() to a worker. To protect socket destruction due to normal and abnormal socket closing, the socket refcount is increased. Signed-off-by: Ursula Braun Signed-off-by: Karsten Graul Signed-off-by: Jakub Kicinski --- net/smc/af_smc.c | 4 ++++ net/smc/smc_close.c | 18 +++++++++++------- net/smc/smc_close.h | 1 + net/smc/smc_core.c | 6 +++--- 4 files changed, 19 insertions(+), 10 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 5b932583e407..91ea098fabd9 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -167,6 +167,7 @@ static int smc_release(struct socket *sock) if (!sk) goto out; + sock_hold(sk); /* sock_put below */ smc = smc_sk(sk); /* cleanup for a dangling non-blocking connect */ @@ -189,6 +190,7 @@ static int smc_release(struct socket *sock) sock->sk = NULL; release_sock(sk); + sock_put(sk); /* sock_hold above */ sock_put(sk); /* final sock_put */ out: return rc; @@ -970,12 +972,14 @@ void smc_close_non_accepted(struct sock *sk) { struct smc_sock *smc = smc_sk(sk); + sock_hold(sk); /* sock_put below */ lock_sock(sk); if (!sk->sk_lingertime) /* wait for peer closing */ sk->sk_lingertime = SMC_MAX_STREAM_WAIT_TIMEOUT; __smc_release(smc); release_sock(sk); + sock_put(sk); /* sock_hold above */ sock_put(sk); /* final sock_put */ } diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index 2bbcd45a421e..d34e5adce2eb 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -113,9 +113,10 @@ int smc_close_abort(struct smc_connection *conn) /* terminate smc socket abnormally - active abort * link group is terminated, i.e. RDMA communication no longer possible */ -static void smc_close_active_abort(struct smc_sock *smc) +void smc_close_active_abort(struct smc_sock *smc) { struct sock *sk = &smc->sk; + bool release_clcsock = false; if (sk->sk_state != SMC_INIT && smc->clcsock && smc->clcsock->sk) { sk->sk_err = ECONNABORTED; @@ -137,11 +138,14 @@ static void smc_close_active_abort(struct smc_sock *smc) cancel_delayed_work_sync(&smc->conn.tx_work); lock_sock(sk); sk->sk_state = SMC_CLOSED; + sock_put(sk); /* postponed passive closing */ break; case SMC_PEERCLOSEWAIT1: case SMC_PEERCLOSEWAIT2: case SMC_PEERFINCLOSEWAIT: sk->sk_state = SMC_CLOSED; + smc_conn_free(&smc->conn); + release_clcsock = true; sock_put(sk); /* passive closing */ break; case SMC_PROCESSABORT: @@ -156,6 +160,12 @@ static void smc_close_active_abort(struct smc_sock *smc) sock_set_flag(sk, SOCK_DEAD); sk->sk_state_change(sk); + + if (release_clcsock) { + release_sock(sk); + smc_clcsock_release(smc); + lock_sock(sk); + } } static inline bool smc_close_sent_any_close(struct smc_connection *conn) @@ -328,12 +338,6 @@ static void smc_close_passive_work(struct work_struct *work) lock_sock(sk); old_state = sk->sk_state; - if (conn->killed) { - /* abnormal termination */ - smc_close_active_abort(smc); - goto wakeup; - } - rxflags = &conn->local_rx_ctrl.conn_state_flags; if (rxflags->peer_conn_abort) { /* peer has not received all data */ diff --git a/net/smc/smc_close.h b/net/smc/smc_close.h index 084c4f37aa96..634fea2b7c95 100644 --- a/net/smc/smc_close.h +++ b/net/smc/smc_close.h @@ -25,5 +25,6 @@ int smc_close_shutdown_write(struct smc_sock *smc); void smc_close_init(struct smc_sock *smc); void smc_clcsock_release(struct smc_sock *smc); int smc_close_abort(struct smc_connection *conn); +void smc_close_active_abort(struct smc_sock *smc); #endif /* SMC_CLOSE_H */ diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 46d4b944c4c4..ed02eac636da 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -519,9 +519,7 @@ static void smc_conn_kill(struct smc_connection *conn) smc_sk_wake_ups(smc); smc_lgr_unregister_conn(conn); smc->sk.sk_err = ECONNABORTED; - sock_hold(&smc->sk); /* sock_put in close work */ - if (!schedule_work(&conn->close_work)) - sock_put(&smc->sk); + smc_close_active_abort(smc); } /* terminate link group */ @@ -544,9 +542,11 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr) read_unlock_bh(&lgr->conns_lock); conn = rb_entry(node, struct smc_connection, alert_node); smc = container_of(conn, struct smc_sock, conn); + sock_hold(&smc->sk); /* sock_put below */ lock_sock(&smc->sk); smc_conn_kill(conn); release_sock(&smc->sk); + sock_put(&smc->sk); /* sock_hold above */ read_lock_bh(&lgr->conns_lock); node = rb_first(&lgr->conns_all); }