Merge branch 'mptcp-non-backup-subflows-pre-reqs'

Paolo Abeni says:

====================
mptcp: non backup subflows pre-reqs

This series contains a bunch of MPTCP improvements loosely related to
concurrent subflows xmit usage, currently under development.

The first 3 patches are actually bugfixes for issues that will become apparent
as soon as we will enable the above feature.

The later patches improve the handling of incoming additional subflows,
improving significantly the performances in stress tests based on a high new
connection rate.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2020-07-23 11:47:25 -07:00
Родитель 205a55f4e6 4cf8b7e48a
Коммит a3c8c7f467
4 изменённых файлов: 81 добавлений и 46 удалений

Просмотреть файл

@ -709,6 +709,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
* additional ack.
*/
subflow->fully_established = 1;
WRITE_ONCE(msk->fully_established, true);
goto fully_established;
}
@ -724,9 +725,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
if (unlikely(!READ_ONCE(msk->pm.server_side)))
pr_warn_once("bogus mpc option on established client sk");
subflow->fully_established = 1;
subflow->remote_key = mp_opt->sndr_key;
subflow->can_ack = 1;
mptcp_subflow_fully_established(subflow, mp_opt);
fully_established:
if (likely(subflow->pm_notified))

Просмотреть файл

@ -460,15 +460,20 @@ static void mptcp_clean_una(struct sock *sk)
dfrag = mptcp_rtx_head(sk);
if (dfrag && after64(snd_una, dfrag->data_seq)) {
u64 delta = dfrag->data_seq + dfrag->data_len - snd_una;
u64 delta = snd_una - dfrag->data_seq;
if (WARN_ON_ONCE(delta > dfrag->data_len))
goto out;
dfrag->data_seq += delta;
dfrag->offset += delta;
dfrag->data_len -= delta;
dfrag_uncharge(sk, delta);
cleaned = true;
}
out:
if (cleaned) {
sk_mem_reclaim_partial(sk);
@ -1517,6 +1522,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
msk->local_key = subflow_req->local_key;
msk->token = subflow_req->token;
msk->subflow = NULL;
WRITE_ONCE(msk->fully_established, false);
msk->write_seq = subflow_req->idsn + 1;
atomic64_set(&msk->snd_una, msk->write_seq);
@ -1600,7 +1606,6 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
newsk = new_mptcp_sock;
mptcp_copy_inaddrs(newsk, ssk);
list_add(&subflow->node, &msk->conn_list);
inet_sk_state_store(newsk, TCP_ESTABLISHED);
mptcp_rcv_space_init(msk, ssk);
bh_unlock_sock(new_mptcp_sock);
@ -1814,7 +1819,6 @@ void mptcp_finish_connect(struct sock *ssk)
ack_seq++;
subflow->map_seq = ack_seq;
subflow->map_subflow_seq = 1;
subflow->rel_write_seq = 1;
/* the socket is not connected yet, no msk/subflow ops can access/race
* accessing the field below
@ -1851,7 +1855,7 @@ bool mptcp_finish_join(struct sock *sk)
pr_debug("msk=%p, subflow=%p", msk, subflow);
/* mptcp socket already closing? */
if (inet_sk_state_load(parent) != TCP_ESTABLISHED)
if (!mptcp_is_fully_established(parent))
return false;
if (!msk->pm.server_side)
@ -1940,6 +1944,13 @@ unlock:
return err;
}
static void mptcp_subflow_early_fallback(struct mptcp_sock *msk,
struct mptcp_subflow_context *subflow)
{
subflow->request_mptcp = 0;
__mptcp_do_fallback(msk);
}
static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
int addr_len, int flags)
{
@ -1971,10 +1982,10 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
* TCP option space.
*/
if (rcu_access_pointer(tcp_sk(ssock->sk)->md5sig_info))
subflow->request_mptcp = 0;
mptcp_subflow_early_fallback(msk, subflow);
#endif
if (subflow->request_mptcp && mptcp_token_new_connect(ssock->sk))
subflow->request_mptcp = 0;
mptcp_subflow_early_fallback(msk, subflow);
do_connect:
err = ssock->ops->connect(ssock, uaddr, addr_len, flags);

Просмотреть файл

@ -198,6 +198,7 @@ struct mptcp_sock {
u32 token;
unsigned long flags;
bool can_ack;
bool fully_established;
spinlock_t join_list_lock;
struct work_struct work;
struct list_head conn_list;
@ -342,6 +343,8 @@ mptcp_subflow_get_mapped_dsn(const struct mptcp_subflow_context *subflow)
}
int mptcp_is_enabled(struct net *net);
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
struct mptcp_options_received *mp_opt);
bool mptcp_subflow_data_available(struct sock *sk);
void __init mptcp_subflow_init(void);
@ -373,6 +376,11 @@ void mptcp_get_options(const struct sk_buff *skb,
struct mptcp_options_received *mp_opt);
void mptcp_finish_connect(struct sock *sk);
static inline bool mptcp_is_fully_established(struct sock *sk)
{
return inet_sk_state_load(sk) == TCP_ESTABLISHED &&
READ_ONCE(mptcp_sk(sk)->fully_established);
}
void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk);
void mptcp_data_ready(struct sock *sk, struct sock *ssk);
bool mptcp_finish_join(struct sock *sk);

Просмотреть файл

@ -53,6 +53,12 @@ static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2,
mptcp_crypto_hmac_sha(key1, key2, msg, 8, hmac);
}
static bool mptcp_can_accept_new_subflow(const struct mptcp_sock *msk)
{
return mptcp_is_fully_established((void *)msk) &&
READ_ONCE(msk->pm.accept_subflow);
}
/* validate received token and create truncated hmac and nonce for SYN-ACK */
static struct mptcp_sock *subflow_token_join_request(struct request_sock *req,
const struct sk_buff *skb)
@ -200,49 +206,40 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
if (subflow->conn_finished)
return;
subflow->rel_write_seq = 1;
subflow->conn_finished = 1;
subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset);
mptcp_get_options(skb, &mp_opt);
if (subflow->request_mptcp && mp_opt.mp_capable) {
if (subflow->request_mptcp) {
if (!mp_opt.mp_capable) {
MPTCP_INC_STATS(sock_net(sk),
MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
mptcp_do_fallback(sk);
pr_fallback(mptcp_sk(subflow->conn));
goto fallback;
}
subflow->mp_capable = 1;
subflow->can_ack = 1;
subflow->remote_key = mp_opt.sndr_key;
pr_debug("subflow=%p, remote_key=%llu", subflow,
subflow->remote_key);
} else if (subflow->request_join && mp_opt.mp_join) {
subflow->mp_join = 1;
mptcp_finish_connect(sk);
} else if (subflow->request_join) {
u8 hmac[SHA256_DIGEST_SIZE];
if (!mp_opt.mp_join)
goto do_reset;
subflow->thmac = mp_opt.thmac;
subflow->remote_nonce = mp_opt.nonce;
pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow,
subflow->thmac, subflow->remote_nonce);
} else {
if (subflow->request_mptcp)
MPTCP_INC_STATS(sock_net(sk),
MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
mptcp_do_fallback(sk);
pr_fallback(mptcp_sk(subflow->conn));
}
if (mptcp_check_fallback(sk)) {
mptcp_rcv_space_init(mptcp_sk(parent), sk);
return;
}
if (subflow->mp_capable) {
pr_debug("subflow=%p, remote_key=%llu", mptcp_subflow_ctx(sk),
subflow->remote_key);
mptcp_finish_connect(sk);
} else if (subflow->mp_join) {
u8 hmac[SHA256_DIGEST_SIZE];
pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u",
subflow, subflow->thmac,
subflow->remote_nonce);
if (!subflow_thmac_valid(subflow)) {
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC);
subflow->mp_join = 0;
goto do_reset;
}
@ -250,18 +247,22 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->local_nonce,
subflow->remote_nonce,
hmac);
memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN);
if (!mptcp_finish_join(sk))
goto do_reset;
subflow->mp_join = 1;
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX);
} else {
do_reset:
tcp_send_active_reset(sk, GFP_ATOMIC);
tcp_done(sk);
} else if (mptcp_check_fallback(sk)) {
fallback:
mptcp_rcv_space_init(mptcp_sk(parent), sk);
}
return;
do_reset:
tcp_send_active_reset(sk, GFP_ATOMIC);
tcp_done(sk);
}
static struct request_sock_ops subflow_request_sock_ops;
@ -386,6 +387,17 @@ static void subflow_drop_ctx(struct sock *ssk)
kfree_rcu(ctx, rcu);
}
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
struct mptcp_options_received *mp_opt)
{
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
subflow->remote_key = mp_opt->sndr_key;
subflow->fully_established = 1;
subflow->can_ack = 1;
WRITE_ONCE(msk->fully_established, true);
}
static struct sock *subflow_syn_recv_sock(const struct sock *sk,
struct sk_buff *skb,
struct request_sock *req,
@ -409,7 +421,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
/* hopefully temporary handling for MP_JOIN+syncookie */
subflow_req = mptcp_subflow_rsk(req);
fallback_is_fatal = subflow_req->mp_join;
fallback_is_fatal = tcp_rsk(req)->is_mptcp && subflow_req->mp_join;
fallback = !tcp_rsk(req)->is_mptcp;
if (fallback)
goto create_child;
@ -437,6 +449,7 @@ create_msk:
} else if (subflow_req->mp_join) {
mptcp_get_options(skb, &mp_opt);
if (!mp_opt.mp_join ||
!mptcp_can_accept_new_subflow(subflow_req->msk) ||
!subflow_hmac_valid(req, &mp_opt)) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
fallback = true;
@ -465,6 +478,11 @@ create_child:
}
if (ctx->mp_capable) {
/* this can't race with mptcp_close(), as the msk is
* not yet exposted to user-space
*/
inet_sk_state_store((void *)new_msk, TCP_ESTABLISHED);
/* new mpc subflow takes ownership of the newly
* created mptcp socket
*/
@ -477,9 +495,8 @@ create_child:
/* with OoO packets we can reach here without ingress
* mpc option
*/
ctx->remote_key = mp_opt.sndr_key;
ctx->fully_established = mp_opt.mp_capable;
ctx->can_ack = mp_opt.mp_capable;
if (mp_opt.mp_capable)
mptcp_subflow_fully_established(ctx, &mp_opt);
} else if (ctx->mp_join) {
struct mptcp_sock *owner;
@ -514,9 +531,9 @@ out:
dispose_child:
subflow_drop_ctx(child);
tcp_rsk(req)->drop_req = true;
tcp_send_active_reset(child, GFP_ATOMIC);
inet_csk_prepare_for_destroy_sock(child);
tcp_done(child);
req->rsk_ops->send_reset(sk, skb);
/* The last child reference will be released by the caller */
return child;
@ -966,7 +983,7 @@ int __mptcp_subflow_connect(struct sock *sk, int ifindex,
int addrlen;
int err;
if (sk->sk_state != TCP_ESTABLISHED)
if (!mptcp_is_fully_established(sk))
return -ENOTCONN;
err = mptcp_subflow_create_socket(sk, &sf);