Merge branch 'mptcp-C-flag-and-fixes'

Mat Martineau says:

====================
mptcp: Connection-time 'C' flag and two fixes

Here are six more patches from the MPTCP tree.

Most of them add support for the 'C' flag in the MPTCP connection-time
option headers. This flag affects how the initial address and port are
treated by each peer. Normally one peer may send MP_JOIN requests to the
remote address and port that were used when initiating the MPTCP
connection. The 'C' bit indicates that MP_JOINs should only be sent to
remote addresses that have been advertised with ADD_ADDR.

The other two patches are unrelated improvements.

Patches 1-4: Add the 'C' flag feature, a sysctl to optionally enable it,
and a selftest.

Patch 5: Adjust rp_filter settings in a selftest.

Patch 6: Improve rbuf cleanup for MPTCP sockets.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2021-06-22 14:36:01 -07:00
Родитель a432c771e2 fde56eea01
Коммит 38f75922a6
11 изменённых файлов: 157 добавлений и 41 удалений

Просмотреть файл

@ -32,3 +32,16 @@ checksum_enabled - BOOLEAN
per-namespace sysctl.
Default: 0
allow_join_initial_addr_port - BOOLEAN
Allow peers to send join requests to the IP address and port number used
by the initial subflow if the value is 1. This controls a flag that is
sent to the peer at connection time, and whether such join requests are
accepted or denied.
Joins to addresses advertised with ADD_ADDR are not affected by this
value.
This is a per-namespace sysctl.
Default: 1

Просмотреть файл

@ -67,7 +67,8 @@ struct mptcp_out_options {
u8 backup;
u8 reset_reason:4,
reset_transient:1,
csum_reqd:1;
csum_reqd:1,
allow_join_id0:1;
u32 nonce;
u64 thmac;
u32 token;

Просмотреть файл

@ -24,6 +24,7 @@ struct mptcp_pernet {
u8 mptcp_enabled;
unsigned int add_addr_timeout;
u8 checksum_enabled;
u8 allow_join_initial_addr_port;
};
static struct mptcp_pernet *mptcp_get_pernet(struct net *net)
@ -46,11 +47,17 @@ int mptcp_is_checksum_enabled(struct net *net)
return mptcp_get_pernet(net)->checksum_enabled;
}
int mptcp_allow_join_id0(struct net *net)
{
return mptcp_get_pernet(net)->allow_join_initial_addr_port;
}
static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
{
pernet->mptcp_enabled = 1;
pernet->add_addr_timeout = TCP_RTO_MAX;
pernet->checksum_enabled = 0;
pernet->allow_join_initial_addr_port = 1;
}
#ifdef CONFIG_SYSCTL
@ -80,6 +87,14 @@ static struct ctl_table mptcp_sysctl_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE
},
{
.procname = "allow_join_initial_addr_port",
.maxlen = sizeof(u8),
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE
},
{}
};
@ -98,6 +113,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
table[0].data = &pernet->mptcp_enabled;
table[1].data = &pernet->add_addr_timeout;
table[2].data = &pernet->checksum_enabled;
table[3].data = &pernet->allow_join_initial_addr_port;
hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table);
if (!hdr)

Просмотреть файл

@ -83,6 +83,9 @@ static void mptcp_parse_option(const struct sk_buff *skb,
if (flags & MPTCP_CAP_CHECKSUM_REQD)
mp_opt->csum_reqd = 1;
if (flags & MPTCP_CAP_DENY_JOIN_ID0)
mp_opt->deny_join_id0 = 1;
mp_opt->mp_capable = 1;
if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) {
mp_opt->sndr_key = get_unaligned_be64(ptr);
@ -360,6 +363,7 @@ void mptcp_get_options(const struct sock *sk,
mp_opt->mp_prio = 0;
mp_opt->reset = 0;
mp_opt->csum_reqd = READ_ONCE(msk->csum_enabled);
mp_opt->deny_join_id0 = 0;
length = (th->doff * 4) - sizeof(struct tcphdr);
ptr = (const unsigned char *)(th + 1);
@ -402,6 +406,7 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
if (subflow->request_mptcp) {
opts->suboptions = OPTION_MPTCP_MPC_SYN;
opts->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk));
opts->allow_join_id0 = mptcp_allow_join_id0(sock_net(sk));
*size = TCPOLEN_MPTCP_MPC_SYN;
return true;
} else if (subflow->request_join) {
@ -490,6 +495,7 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
opts->sndr_key = subflow->local_key;
opts->rcvr_key = subflow->remote_key;
opts->csum_reqd = READ_ONCE(msk->csum_enabled);
opts->allow_join_id0 = mptcp_allow_join_id0(sock_net(sk));
/* Section 3.1.
* The MP_CAPABLE option is carried on the SYN, SYN/ACK, and ACK
@ -827,6 +833,7 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
opts->suboptions = OPTION_MPTCP_MPC_SYNACK;
opts->sndr_key = subflow_req->local_key;
opts->csum_reqd = subflow_req->csum_reqd;
opts->allow_join_id0 = subflow_req->allow_join_id0;
*size = TCPOLEN_MPTCP_MPC_SYNACK;
pr_debug("subflow_req=%p, local_key=%llu",
subflow_req, subflow_req->local_key);
@ -905,6 +912,9 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
return false;
}
if (mp_opt->deny_join_id0)
WRITE_ONCE(msk->pm.remote_deny_join_id0, true);
if (unlikely(!READ_ONCE(msk->pm.server_side)))
pr_warn_once("bogus mpc option on established client sk");
mptcp_subflow_fully_established(subflow, mp_opt);
@ -1201,6 +1211,9 @@ void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
if (opts->csum_reqd)
flag |= MPTCP_CAP_CHECKSUM_REQD;
if (!opts->allow_join_id0)
flag |= MPTCP_CAP_DENY_JOIN_ID0;
*ptr++ = mptcp_option(MPTCPOPT_MP_CAPABLE, len,
MPTCP_SUPPORTED_VERSION,
flag);

Просмотреть файл

@ -320,6 +320,7 @@ void mptcp_pm_data_init(struct mptcp_sock *msk)
WRITE_ONCE(msk->pm.addr_signal, 0);
WRITE_ONCE(msk->pm.accept_addr, false);
WRITE_ONCE(msk->pm.accept_subflow, false);
WRITE_ONCE(msk->pm.remote_deny_join_id0, false);
msk->pm.status = 0;
spin_lock_init(&msk->pm.lock);

Просмотреть файл

@ -451,7 +451,8 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
/* check if should create a new subflow */
if (msk->pm.local_addr_used < local_addr_max &&
msk->pm.subflows < subflows_max) {
msk->pm.subflows < subflows_max &&
!READ_ONCE(msk->pm.remote_deny_join_id0)) {
local = select_local_address(pernet, msk);
if (local) {
struct mptcp_addr_info remote = { 0 };

Просмотреть файл

@ -442,49 +442,46 @@ static void mptcp_send_ack(struct mptcp_sock *msk)
}
}
static bool mptcp_subflow_cleanup_rbuf(struct sock *ssk)
static void mptcp_subflow_cleanup_rbuf(struct sock *ssk)
{
bool slow;
int ret;
slow = lock_sock_fast(ssk);
ret = tcp_can_send_ack(ssk);
if (ret)
if (tcp_can_send_ack(ssk))
tcp_cleanup_rbuf(ssk, 1);
unlock_sock_fast(ssk, slow);
return ret;
}
static bool mptcp_subflow_could_cleanup(const struct sock *ssk, bool rx_empty)
{
const struct inet_connection_sock *icsk = inet_csk(ssk);
bool ack_pending = READ_ONCE(icsk->icsk_ack.pending);
const struct tcp_sock *tp = tcp_sk(ssk);
return (ack_pending & ICSK_ACK_SCHED) &&
((READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->rcv_wup) >
READ_ONCE(icsk->icsk_ack.rcv_mss)) ||
(rx_empty && ack_pending &
(ICSK_ACK_PUSHED2 | ICSK_ACK_PUSHED)));
}
static void mptcp_cleanup_rbuf(struct mptcp_sock *msk)
{
struct sock *ack_hint = READ_ONCE(msk->ack_hint);
int old_space = READ_ONCE(msk->old_wspace);
struct mptcp_subflow_context *subflow;
struct sock *sk = (struct sock *)msk;
bool cleanup;
int space = __mptcp_space(sk);
bool cleanup, rx_empty;
/* this is a simple superset of what tcp_cleanup_rbuf() implements
* so that we don't have to acquire the ssk socket lock most of the time
* to do actually nothing
*/
cleanup = __mptcp_space(sk) - old_space >= max(0, old_space);
if (!cleanup)
return;
cleanup = (space > 0) && (space >= (old_space << 1));
rx_empty = !atomic_read(&sk->sk_rmem_alloc);
/* if the hinted ssk is still active, try to use it */
if (likely(ack_hint)) {
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
if (ack_hint == ssk && mptcp_subflow_cleanup_rbuf(ssk))
return;
if (cleanup || mptcp_subflow_could_cleanup(ssk, rx_empty))
mptcp_subflow_cleanup_rbuf(ssk);
}
}
/* otherwise pick the first active subflow */
mptcp_for_each_subflow(msk, subflow)
if (mptcp_subflow_cleanup_rbuf(mptcp_subflow_tcp_sock(subflow)))
return;
}
static bool mptcp_check_data_fin(struct sock *sk)
@ -629,7 +626,6 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
break;
}
} while (more_data_avail);
WRITE_ONCE(msk->ack_hint, ssk);
*bytes += moved;
return done;
@ -1910,7 +1906,6 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk)
__mptcp_update_rmem(sk);
done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved);
mptcp_data_unlock(sk);
tcp_cleanup_rbuf(ssk, moved);
if (unlikely(ssk->sk_err))
__mptcp_error_report(sk);
@ -1926,7 +1921,6 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk)
ret |= __mptcp_ofo_queue(msk);
__mptcp_splice_receive_queue(sk);
mptcp_data_unlock(sk);
mptcp_cleanup_rbuf(msk);
}
if (ret)
mptcp_check_data_fin((struct sock *)msk);
@ -2175,9 +2169,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
if (ssk == msk->last_snd)
msk->last_snd = NULL;
if (ssk == msk->ack_hint)
msk->ack_hint = NULL;
if (ssk == msk->first)
msk->first = NULL;
@ -2392,7 +2383,6 @@ static int __mptcp_init_sock(struct sock *sk)
msk->rmem_released = 0;
msk->tx_pending_data = 0;
msk->ack_hint = NULL;
msk->first = NULL;
inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));

Просмотреть файл

@ -79,8 +79,9 @@
#define MPTCP_VERSION_MASK (0x0F)
#define MPTCP_CAP_CHECKSUM_REQD BIT(7)
#define MPTCP_CAP_EXTENSIBILITY BIT(6)
#define MPTCP_CAP_DENY_JOIN_ID0 BIT(5)
#define MPTCP_CAP_HMAC_SHA256 BIT(0)
#define MPTCP_CAP_FLAG_MASK (0x3F)
#define MPTCP_CAP_FLAG_MASK (0x1F)
/* MPTCP DSS flags */
#define MPTCP_DSS_DATA_FIN BIT(4)
@ -137,7 +138,8 @@ struct mptcp_options_received {
mp_prio : 1,
echo : 1,
csum_reqd : 1,
backup : 1;
backup : 1,
deny_join_id0 : 1;
u32 token;
u32 nonce;
u64 thmac;
@ -192,6 +194,7 @@ struct mptcp_pm_data {
bool work_pending;
bool accept_addr;
bool accept_subflow;
bool remote_deny_join_id0;
u8 add_addr_signaled;
u8 add_addr_accepted;
u8 local_addr_used;
@ -240,7 +243,6 @@ struct mptcp_sock {
bool use_64bit_ack; /* Set when we received a 64-bit DSN */
bool csum_enabled;
spinlock_t join_list_lock;
struct sock *ack_hint;
struct work_struct work;
struct sk_buff *ooo_last_skb;
struct rb_root out_of_order_queue;
@ -350,7 +352,8 @@ struct mptcp_subflow_request_sock {
u16 mp_capable : 1,
mp_join : 1,
backup : 1,
csum_reqd : 1;
csum_reqd : 1,
allow_join_id0 : 1;
u8 local_id;
u8 remote_id;
u64 local_key;
@ -540,6 +543,7 @@ static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *su
int mptcp_is_enabled(struct net *net);
unsigned int mptcp_get_add_addr_timeout(struct net *net);
int mptcp_is_checksum_enabled(struct net *net);
int mptcp_allow_join_id0(struct net *net);
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
struct mptcp_options_received *mp_opt);
bool mptcp_subflow_data_available(struct sock *sk);

Просмотреть файл

@ -109,6 +109,7 @@ static void subflow_init_req(struct request_sock *req, const struct sock *sk_lis
subflow_req->mp_capable = 0;
subflow_req->mp_join = 0;
subflow_req->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk_listener));
subflow_req->allow_join_id0 = mptcp_allow_join_id0(sock_net(sk_listener));
subflow_req->msk = NULL;
mptcp_token_init_request(req);
}
@ -407,6 +408,8 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
if (mp_opt.csum_reqd)
WRITE_ONCE(mptcp_sk(parent)->csum_enabled, true);
if (mp_opt.deny_join_id0)
WRITE_ONCE(mptcp_sk(parent)->pm.remote_deny_join_id0, true);
subflow->mp_capable = 1;
subflow->can_ack = 1;
subflow->remote_key = mp_opt.sndr_key;

Просмотреть файл

@ -139,6 +139,17 @@ reset_with_checksum()
ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=$ns2_enable
}
reset_with_allow_join_id0()
{
local ns1_enable=$1
local ns2_enable=$2
reset
ip netns exec $ns1 sysctl -q net.mptcp.allow_join_initial_addr_port=$ns1_enable
ip netns exec $ns2 sysctl -q net.mptcp.allow_join_initial_addr_port=$ns2_enable
}
ip -Version > /dev/null 2>&1
if [ $? -ne 0 ];then
echo "SKIP: Could not run test without ip tool"
@ -1462,6 +1473,63 @@ checksum_tests()
chk_csum_nr "checksum test 1 0"
}
deny_join_id0_tests()
{
# subflow allow join id0 ns1
reset_with_allow_join_id0 1 0
ip netns exec $ns1 ./pm_nl_ctl limits 1 1
ip netns exec $ns2 ./pm_nl_ctl limits 1 1
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "single subflow allow join id0 ns1" 1 1 1
# subflow allow join id0 ns2
reset_with_allow_join_id0 0 1
ip netns exec $ns1 ./pm_nl_ctl limits 1 1
ip netns exec $ns2 ./pm_nl_ctl limits 1 1
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "single subflow allow join id0 ns2" 0 0 0
# signal address allow join id0 ns1
# ADD_ADDRs are not affected by allow_join_id0 value.
reset_with_allow_join_id0 1 0
ip netns exec $ns1 ./pm_nl_ctl limits 1 1
ip netns exec $ns2 ./pm_nl_ctl limits 1 1
ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "signal address allow join id0 ns1" 1 1 1
chk_add_nr 1 1
# signal address allow join id0 ns2
# ADD_ADDRs are not affected by allow_join_id0 value.
reset_with_allow_join_id0 0 1
ip netns exec $ns1 ./pm_nl_ctl limits 1 1
ip netns exec $ns2 ./pm_nl_ctl limits 1 1
ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "signal address allow join id0 ns2" 1 1 1
chk_add_nr 1 1
# subflow and address allow join id0 ns1
reset_with_allow_join_id0 1 0
ip netns exec $ns1 ./pm_nl_ctl limits 2 2
ip netns exec $ns2 ./pm_nl_ctl limits 2 2
ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "subflow and address allow join id0 1" 2 2 2
# subflow and address allow join id0 ns2
reset_with_allow_join_id0 0 1
ip netns exec $ns1 ./pm_nl_ctl limits 2 2
ip netns exec $ns2 ./pm_nl_ctl limits 2 2
ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "subflow and address allow join id0 2" 1 1 1
}
all_tests()
{
subflows_tests
@ -1476,6 +1544,7 @@ all_tests()
add_addr_ports_tests
syncookies_tests
checksum_tests
deny_join_id0_tests
}
usage()
@ -1493,6 +1562,7 @@ usage()
echo " -p add_addr_ports_tests"
echo " -k syncookies_tests"
echo " -S checksum_tests"
echo " -d deny_join_id0_tests"
echo " -c capture pcap files"
echo " -C enable data checksum"
echo " -h help"
@ -1528,7 +1598,7 @@ if [ $do_all_tests -eq 1 ]; then
exit $ret
fi
while getopts 'fsltra64bpkchCS' opt; do
while getopts 'fsltra64bpkdchCS' opt; do
case $opt in
f)
subflows_tests
@ -1566,6 +1636,9 @@ while getopts 'fsltra64bpkchCS' opt; do
S)
checksum_tests
;;
d)
deny_join_id0_tests
;;
c)
;;
C)

Просмотреть файл

@ -60,6 +60,8 @@ setup()
for i in "$ns1" "$ns2" "$ns3";do
ip netns add $i || exit $ksft_skip
ip -net $i link set lo up
ip netns exec $i sysctl -q net.ipv4.conf.all.rp_filter=0
ip netns exec $i sysctl -q net.ipv4.conf.default.rp_filter=0
done
ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2"
@ -80,7 +82,6 @@ setup()
ip netns exec "$ns1" ./pm_nl_ctl limits 1 1
ip netns exec "$ns1" ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags subflow
ip netns exec "$ns1" sysctl -q net.ipv4.conf.all.rp_filter=0
ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1
ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad