net: tcp: assign tcp cong_ops when tcp sk is created
Split assignment and initialization from one into two functions. This is required by followup patches that add Datacenter TCP (DCTCP) congestion control algorithm - we need to be able to determine if the connection is moderated by DCTCP before the 3WHS has finished. As we walk the available congestion control list during the assignment, we are always guaranteed to have Reno present as it's fixed compiled-in. Therefore, since we're doing the early assignment, we don't have a real use for the Reno alias tcp_init_congestion_ops anymore and can thus remove it. Actual usage of the congestion control operations are being made after the 3WHS has finished, in some cases however we can access get_info() via diag if implemented, therefore we need to zero out the private area for those modules. Joint work with Daniel Borkmann and Glenn Judd. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Daniel Borkmann <dborkman@redhat.com> Signed-off-by: Glenn Judd <glenn.judd@morganstanley.com> Acked-by: Stephen Hemminger <stephen@networkplumber.org> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Родитель
53dfd50181
Коммит
55d8694fa8
|
@ -824,6 +824,7 @@ struct tcp_congestion_ops {
|
||||||
int tcp_register_congestion_control(struct tcp_congestion_ops *type);
|
int tcp_register_congestion_control(struct tcp_congestion_ops *type);
|
||||||
void tcp_unregister_congestion_control(struct tcp_congestion_ops *type);
|
void tcp_unregister_congestion_control(struct tcp_congestion_ops *type);
|
||||||
|
|
||||||
|
void tcp_assign_congestion_control(struct sock *sk);
|
||||||
void tcp_init_congestion_control(struct sock *sk);
|
void tcp_init_congestion_control(struct sock *sk);
|
||||||
void tcp_cleanup_congestion_control(struct sock *sk);
|
void tcp_cleanup_congestion_control(struct sock *sk);
|
||||||
int tcp_set_default_congestion_control(const char *name);
|
int tcp_set_default_congestion_control(const char *name);
|
||||||
|
@ -835,7 +836,6 @@ int tcp_set_congestion_control(struct sock *sk, const char *name);
|
||||||
int tcp_slow_start(struct tcp_sock *tp, u32 acked);
|
int tcp_slow_start(struct tcp_sock *tp, u32 acked);
|
||||||
void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w);
|
void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w);
|
||||||
|
|
||||||
extern struct tcp_congestion_ops tcp_init_congestion_ops;
|
|
||||||
u32 tcp_reno_ssthresh(struct sock *sk);
|
u32 tcp_reno_ssthresh(struct sock *sk);
|
||||||
void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked);
|
void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked);
|
||||||
extern struct tcp_congestion_ops tcp_reno;
|
extern struct tcp_congestion_ops tcp_reno;
|
||||||
|
|
|
@ -405,7 +405,7 @@ void tcp_init_sock(struct sock *sk)
|
||||||
|
|
||||||
tp->reordering = sysctl_tcp_reordering;
|
tp->reordering = sysctl_tcp_reordering;
|
||||||
tcp_enable_early_retrans(tp);
|
tcp_enable_early_retrans(tp);
|
||||||
icsk->icsk_ca_ops = &tcp_init_congestion_ops;
|
tcp_assign_congestion_control(sk);
|
||||||
|
|
||||||
tp->tsoffset = 0;
|
tp->tsoffset = 0;
|
||||||
|
|
||||||
|
@ -3258,8 +3258,6 @@ void __init tcp_init(void)
|
||||||
tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
|
tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size);
|
||||||
|
|
||||||
tcp_metrics_init();
|
tcp_metrics_init();
|
||||||
|
BUG_ON(tcp_register_congestion_control(&tcp_reno) != 0);
|
||||||
tcp_register_congestion_control(&tcp_reno);
|
|
||||||
|
|
||||||
tcp_tasklet_init();
|
tcp_tasklet_init();
|
||||||
}
|
}
|
||||||
|
|
|
@ -74,24 +74,34 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca)
|
||||||
EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);
|
EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);
|
||||||
|
|
||||||
/* Assign choice of congestion control. */
|
/* Assign choice of congestion control. */
|
||||||
void tcp_init_congestion_control(struct sock *sk)
|
void tcp_assign_congestion_control(struct sock *sk)
|
||||||
{
|
{
|
||||||
struct inet_connection_sock *icsk = inet_csk(sk);
|
struct inet_connection_sock *icsk = inet_csk(sk);
|
||||||
struct tcp_congestion_ops *ca;
|
struct tcp_congestion_ops *ca;
|
||||||
|
|
||||||
/* if no choice made yet assign the current value set as default */
|
rcu_read_lock();
|
||||||
if (icsk->icsk_ca_ops == &tcp_init_congestion_ops) {
|
list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
|
||||||
rcu_read_lock();
|
if (likely(try_module_get(ca->owner))) {
|
||||||
list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
|
icsk->icsk_ca_ops = ca;
|
||||||
if (try_module_get(ca->owner)) {
|
goto out;
|
||||||
icsk->icsk_ca_ops = ca;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* fallback to next available */
|
|
||||||
}
|
}
|
||||||
rcu_read_unlock();
|
/* Fallback to next available. The last really
|
||||||
|
* guaranteed fallback is Reno from this list.
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
|
out:
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
/* Clear out private data before diag gets it and
|
||||||
|
* the ca has not been initialized.
|
||||||
|
*/
|
||||||
|
if (ca->get_info)
|
||||||
|
memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
|
||||||
|
}
|
||||||
|
|
||||||
|
void tcp_init_congestion_control(struct sock *sk)
|
||||||
|
{
|
||||||
|
const struct inet_connection_sock *icsk = inet_csk(sk);
|
||||||
|
|
||||||
if (icsk->icsk_ca_ops->init)
|
if (icsk->icsk_ca_ops->init)
|
||||||
icsk->icsk_ca_ops->init(sk);
|
icsk->icsk_ca_ops->init(sk);
|
||||||
|
@ -345,15 +355,3 @@ struct tcp_congestion_ops tcp_reno = {
|
||||||
.ssthresh = tcp_reno_ssthresh,
|
.ssthresh = tcp_reno_ssthresh,
|
||||||
.cong_avoid = tcp_reno_cong_avoid,
|
.cong_avoid = tcp_reno_cong_avoid,
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Initial congestion control used (until SYN)
|
|
||||||
* really reno under another name so we can tell difference
|
|
||||||
* during tcp_set_default_congestion_control
|
|
||||||
*/
|
|
||||||
struct tcp_congestion_ops tcp_init_congestion_ops = {
|
|
||||||
.name = "",
|
|
||||||
.owner = THIS_MODULE,
|
|
||||||
.ssthresh = tcp_reno_ssthresh,
|
|
||||||
.cong_avoid = tcp_reno_cong_avoid,
|
|
||||||
};
|
|
||||||
EXPORT_SYMBOL_GPL(tcp_init_congestion_ops);
|
|
||||||
|
|
|
@ -451,9 +451,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
|
||||||
newtp->snd_cwnd = TCP_INIT_CWND;
|
newtp->snd_cwnd = TCP_INIT_CWND;
|
||||||
newtp->snd_cwnd_cnt = 0;
|
newtp->snd_cwnd_cnt = 0;
|
||||||
|
|
||||||
if (newicsk->icsk_ca_ops != &tcp_init_congestion_ops &&
|
if (!try_module_get(newicsk->icsk_ca_ops->owner))
|
||||||
!try_module_get(newicsk->icsk_ca_ops->owner))
|
tcp_assign_congestion_control(newsk);
|
||||||
newicsk->icsk_ca_ops = &tcp_init_congestion_ops;
|
|
||||||
|
|
||||||
tcp_set_ca_state(newsk, TCP_CA_Open);
|
tcp_set_ca_state(newsk, TCP_CA_Open);
|
||||||
tcp_init_xmit_timers(newsk);
|
tcp_init_xmit_timers(newsk);
|
||||||
|
|
Загрузка…
Ссылка в новой задаче