diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index da349723d411..1f03e221a01e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -512,6 +512,18 @@ menuconfig PARAVIRT_GUEST if PARAVIRT_GUEST +config PARAVIRT_TIME_ACCOUNTING + bool "Paravirtual steal time accounting" + select PARAVIRT + default n + ---help--- + Select this option to enable fine granularity task steal time + accounting. Time spent executing other tasks in parallel with + the current vCPU is discounted from the vCPU power. To account for + that, there can be a small performance impact. + + If in doubt, say N here. + source "arch/x86/xen/Kconfig" config KVM_CLOCK diff --git a/kernel/sched.c b/kernel/sched.c index f98a28b19b2a..b35ac50b26c8 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -534,6 +534,9 @@ struct rq { #ifdef CONFIG_PARAVIRT u64 prev_steal_time; #endif +#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING + u64 prev_steal_time_rq; +#endif /* calc_load related fields */ unsigned long calc_load_update; @@ -1973,8 +1976,14 @@ static inline u64 steal_ticks(u64 steal) static void update_rq_clock_task(struct rq *rq, s64 delta) { - s64 irq_delta; - +/* + * In theory, the compile should just see 0 here, and optimize out the call + * to sched_rt_avg_update. But I don't trust it... + */ +#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING) + s64 steal = 0, irq_delta = 0; +#endif +#ifdef CONFIG_IRQ_TIME_ACCOUNTING irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time; /* @@ -1997,12 +2006,35 @@ static void update_rq_clock_task(struct rq *rq, s64 delta) rq->prev_irq_time += irq_delta; delta -= irq_delta; +#endif +#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING + if (static_branch((¶virt_steal_rq_enabled))) { + u64 st; + + steal = paravirt_steal_clock(cpu_of(rq)); + steal -= rq->prev_steal_time_rq; + + if (unlikely(steal > delta)) + steal = delta; + + st = steal_ticks(steal); + steal = st * TICK_NSEC; + + rq->prev_steal_time_rq += steal; + + delta -= steal; + } +#endif + rq->clock_task += delta; - if (irq_delta && sched_feat(NONIRQ_POWER)) - sched_rt_avg_update(rq, irq_delta); +#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING) + if ((irq_delta + steal) && sched_feat(NONTASK_POWER)) + sched_rt_avg_update(rq, irq_delta + steal); +#endif } +#ifdef CONFIG_IRQ_TIME_ACCOUNTING static int irqtime_account_hi_update(void) { struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; @@ -2037,12 +2069,7 @@ static int irqtime_account_si_update(void) #define sched_clock_irqtime (0) -static void update_rq_clock_task(struct rq *rq, s64 delta) -{ - rq->clock_task += delta; -} - -#endif /* CONFIG_IRQ_TIME_ACCOUNTING */ +#endif #include "sched_idletask.c" #include "sched_fair.c" diff --git a/kernel/sched_features.h b/kernel/sched_features.h index be40f7371ee1..ca3b025f8669 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h @@ -61,9 +61,9 @@ SCHED_FEAT(LB_BIAS, 1) SCHED_FEAT(OWNER_SPIN, 1) /* - * Decrement CPU power based on irq activity + * Decrement CPU power based on time not spent running tasks */ -SCHED_FEAT(NONIRQ_POWER, 1) +SCHED_FEAT(NONTASK_POWER, 1) /* * Queue remote wakeups on the target CPU and process them