tcp: refine pacing rate determination
When TCP pacing was added back in linux-3.12, we chose to apply a fixed ratio of 200 % against current rate, to allow probing for optimal throughput even during slow start phase, where cwnd can be doubled every other gRTT. At Google, we found it was better applying a different ratio while in Congestion Avoidance phase. This ratio was set to 120 %. We've used the normal tcp_in_slow_start() helper for a while, then tuned the condition to select the conservative ratio as soon as cwnd >= ssthresh/2 : - After cwnd reduction, it is safer to ramp up more slowly, as we approach optimal cwnd. - Initial ramp up (ssthresh == INFINITY) still allows doubling cwnd every other RTT. Signed-off-by: Eric Dumazet <edumazet@google.com> Cc: Neal Cardwell <ncardwell@google.com> Cc: Yuchung Cheng <ycheng@google.com> Acked-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Родитель
4ec3b28c27
Коммит
43e122b014
|
@ -586,6 +586,21 @@ tcp_min_tso_segs - INTEGER
|
||||||
if available window is too small.
|
if available window is too small.
|
||||||
Default: 2
|
Default: 2
|
||||||
|
|
||||||
|
tcp_pacing_ss_ratio - INTEGER
|
||||||
|
sk->sk_pacing_rate is set by TCP stack using a ratio applied
|
||||||
|
to current rate. (current_rate = cwnd * mss / srtt)
|
||||||
|
If TCP is in slow start, tcp_pacing_ss_ratio is applied
|
||||||
|
to let TCP probe for bigger speeds, assuming cwnd can be
|
||||||
|
doubled every other RTT.
|
||||||
|
Default: 200
|
||||||
|
|
||||||
|
tcp_pacing_ca_ratio - INTEGER
|
||||||
|
sk->sk_pacing_rate is set by TCP stack using a ratio applied
|
||||||
|
to current rate. (current_rate = cwnd * mss / srtt)
|
||||||
|
If TCP is in congestion avoidance phase, tcp_pacing_ca_ratio
|
||||||
|
is applied to conservatively probe for bigger throughput.
|
||||||
|
Default: 120
|
||||||
|
|
||||||
tcp_tso_win_divisor - INTEGER
|
tcp_tso_win_divisor - INTEGER
|
||||||
This allows control over what percentage of the congestion window
|
This allows control over what percentage of the congestion window
|
||||||
can be consumed by a single TSO frame.
|
can be consumed by a single TSO frame.
|
||||||
|
|
|
@ -281,6 +281,8 @@ extern unsigned int sysctl_tcp_notsent_lowat;
|
||||||
extern int sysctl_tcp_min_tso_segs;
|
extern int sysctl_tcp_min_tso_segs;
|
||||||
extern int sysctl_tcp_autocorking;
|
extern int sysctl_tcp_autocorking;
|
||||||
extern int sysctl_tcp_invalid_ratelimit;
|
extern int sysctl_tcp_invalid_ratelimit;
|
||||||
|
extern int sysctl_tcp_pacing_ss_ratio;
|
||||||
|
extern int sysctl_tcp_pacing_ca_ratio;
|
||||||
|
|
||||||
extern atomic_long_t tcp_memory_allocated;
|
extern atomic_long_t tcp_memory_allocated;
|
||||||
extern struct percpu_counter tcp_sockets_allocated;
|
extern struct percpu_counter tcp_sockets_allocated;
|
||||||
|
|
|
@ -29,6 +29,7 @@
|
||||||
static int zero;
|
static int zero;
|
||||||
static int one = 1;
|
static int one = 1;
|
||||||
static int four = 4;
|
static int four = 4;
|
||||||
|
static int thousand = 1000;
|
||||||
static int gso_max_segs = GSO_MAX_SEGS;
|
static int gso_max_segs = GSO_MAX_SEGS;
|
||||||
static int tcp_retr1_max = 255;
|
static int tcp_retr1_max = 255;
|
||||||
static int ip_local_port_range_min[] = { 1, 1 };
|
static int ip_local_port_range_min[] = { 1, 1 };
|
||||||
|
@ -711,6 +712,24 @@ static struct ctl_table ipv4_table[] = {
|
||||||
.extra1 = &one,
|
.extra1 = &one,
|
||||||
.extra2 = &gso_max_segs,
|
.extra2 = &gso_max_segs,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
.procname = "tcp_pacing_ss_ratio",
|
||||||
|
.data = &sysctl_tcp_pacing_ss_ratio,
|
||||||
|
.maxlen = sizeof(int),
|
||||||
|
.mode = 0644,
|
||||||
|
.proc_handler = proc_dointvec_minmax,
|
||||||
|
.extra1 = &zero,
|
||||||
|
.extra2 = &thousand,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
.procname = "tcp_pacing_ca_ratio",
|
||||||
|
.data = &sysctl_tcp_pacing_ca_ratio,
|
||||||
|
.maxlen = sizeof(int),
|
||||||
|
.mode = 0644,
|
||||||
|
.proc_handler = proc_dointvec_minmax,
|
||||||
|
.extra1 = &zero,
|
||||||
|
.extra2 = &thousand,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
.procname = "tcp_autocorking",
|
.procname = "tcp_autocorking",
|
||||||
.data = &sysctl_tcp_autocorking,
|
.data = &sysctl_tcp_autocorking,
|
||||||
|
|
|
@ -753,13 +753,29 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us)
|
||||||
* TCP pacing, to smooth the burst on large writes when packets
|
* TCP pacing, to smooth the burst on large writes when packets
|
||||||
* in flight is significantly lower than cwnd (or rwin)
|
* in flight is significantly lower than cwnd (or rwin)
|
||||||
*/
|
*/
|
||||||
|
int sysctl_tcp_pacing_ss_ratio __read_mostly = 200;
|
||||||
|
int sysctl_tcp_pacing_ca_ratio __read_mostly = 120;
|
||||||
|
|
||||||
static void tcp_update_pacing_rate(struct sock *sk)
|
static void tcp_update_pacing_rate(struct sock *sk)
|
||||||
{
|
{
|
||||||
const struct tcp_sock *tp = tcp_sk(sk);
|
const struct tcp_sock *tp = tcp_sk(sk);
|
||||||
u64 rate;
|
u64 rate;
|
||||||
|
|
||||||
/* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */
|
/* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */
|
||||||
rate = (u64)tp->mss_cache * 2 * (USEC_PER_SEC << 3);
|
rate = (u64)tp->mss_cache * ((USEC_PER_SEC / 100) << 3);
|
||||||
|
|
||||||
|
/* current rate is (cwnd * mss) / srtt
|
||||||
|
* In Slow Start [1], set sk_pacing_rate to 200 % the current rate.
|
||||||
|
* In Congestion Avoidance phase, set it to 120 % the current rate.
|
||||||
|
*
|
||||||
|
* [1] : Normal Slow Start condition is (tp->snd_cwnd < tp->snd_ssthresh)
|
||||||
|
* If snd_cwnd >= (tp->snd_ssthresh / 2), we are approaching
|
||||||
|
* end of slow start and should slow down.
|
||||||
|
*/
|
||||||
|
if (tp->snd_cwnd < tp->snd_ssthresh / 2)
|
||||||
|
rate *= sysctl_tcp_pacing_ss_ratio;
|
||||||
|
else
|
||||||
|
rate *= sysctl_tcp_pacing_ca_ratio;
|
||||||
|
|
||||||
rate *= max(tp->snd_cwnd, tp->packets_out);
|
rate *= max(tp->snd_cwnd, tp->packets_out);
|
||||||
|
|
||||||
|
|
Загрузка…
Ссылка в новой задаче