sched/cpufreq: Change the worker kthread to SCHED_DEADLINE

Worker kthread needs to be able to change frequency for all other
threads.

Make it special, just under STOP class.

Signed-off-by: Juri Lelli <juri.lelli@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Claudio Scordino <claudio@evidence.eu.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luca Abeni <luca.abeni@santannapisa.it>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rafael J . Wysocki <rafael.j.wysocki@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: alessio.balsini@arm.com
Cc: bristot@redhat.com
Cc: dietmar.eggemann@arm.com
Cc: joelaf@google.com
Cc: juri.lelli@redhat.com
Cc: mathieu.poirier@linaro.org
Cc: morten.rasmussen@arm.com
Cc: patrick.bellasi@arm.com
Cc: rjw@rjwysocki.net
Cc: rostedt@goodmis.org
Cc: tkjos@android.com
Cc: tommaso.cucinotta@santannapisa.it
Cc: vincent.guittot@linaro.org
Link: http://lkml.kernel.org/r/20171204102325.5110-4-juri.lelli@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Juri Lelli 2017-12-04 11:23:20 +01:00 коммит произвёл Ingo Molnar
Родитель e0367b1267
Коммит 794a56ebd9
5 изменённых файлов: 130 добавлений и 36 удалений

Просмотреть файл

@ -1431,6 +1431,7 @@ extern int idle_cpu(int cpu);
extern int sched_setscheduler(struct task_struct *, int, const struct sched_param *); extern int sched_setscheduler(struct task_struct *, int, const struct sched_param *);
extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *); extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *);
extern int sched_setattr(struct task_struct *, const struct sched_attr *); extern int sched_setattr(struct task_struct *, const struct sched_attr *);
extern int sched_setattr_nocheck(struct task_struct *, const struct sched_attr *);
extern struct task_struct *idle_task(int cpu); extern struct task_struct *idle_task(int cpu);
/** /**

Просмотреть файл

@ -4085,7 +4085,7 @@ recheck:
return -EINVAL; return -EINVAL;
} }
if (attr->sched_flags & ~SCHED_FLAG_ALL) if (attr->sched_flags & ~(SCHED_FLAG_ALL | SCHED_FLAG_SUGOV))
return -EINVAL; return -EINVAL;
/* /*
@ -4152,6 +4152,9 @@ recheck:
} }
if (user) { if (user) {
if (attr->sched_flags & SCHED_FLAG_SUGOV)
return -EINVAL;
retval = security_task_setscheduler(p); retval = security_task_setscheduler(p);
if (retval) if (retval)
return retval; return retval;
@ -4207,7 +4210,8 @@ change:
} }
#endif #endif
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
if (dl_bandwidth_enabled() && dl_policy(policy)) { if (dl_bandwidth_enabled() && dl_policy(policy) &&
!(attr->sched_flags & SCHED_FLAG_SUGOV)) {
cpumask_t *span = rq->rd->span; cpumask_t *span = rq->rd->span;
/* /*
@ -4337,6 +4341,11 @@ int sched_setattr(struct task_struct *p, const struct sched_attr *attr)
} }
EXPORT_SYMBOL_GPL(sched_setattr); EXPORT_SYMBOL_GPL(sched_setattr);
int sched_setattr_nocheck(struct task_struct *p, const struct sched_attr *attr)
{
return __sched_setscheduler(p, attr, false, true);
}
/** /**
* sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace. * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace.
* @p: the task in question. * @p: the task in question.

Просмотреть файл

@ -474,7 +474,20 @@ static void sugov_policy_free(struct sugov_policy *sg_policy)
static int sugov_kthread_create(struct sugov_policy *sg_policy) static int sugov_kthread_create(struct sugov_policy *sg_policy)
{ {
struct task_struct *thread; struct task_struct *thread;
struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO / 2 }; struct sched_attr attr = {
.size = sizeof(struct sched_attr),
.sched_policy = SCHED_DEADLINE,
.sched_flags = SCHED_FLAG_SUGOV,
.sched_nice = 0,
.sched_priority = 0,
/*
* Fake (unused) bandwidth; workaround to "fix"
* priority inheritance.
*/
.sched_runtime = 1000000,
.sched_deadline = 10000000,
.sched_period = 10000000,
};
struct cpufreq_policy *policy = sg_policy->policy; struct cpufreq_policy *policy = sg_policy->policy;
int ret; int ret;
@ -492,10 +505,10 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy)
return PTR_ERR(thread); return PTR_ERR(thread);
} }
ret = sched_setscheduler_nocheck(thread, SCHED_FIFO, &param); ret = sched_setattr_nocheck(thread, &attr);
if (ret) { if (ret) {
kthread_stop(thread); kthread_stop(thread);
pr_warn("%s: failed to set SCHED_FIFO\n", __func__); pr_warn("%s: failed to set SCHED_DEADLINE\n", __func__);
return ret; return ret;
} }

Просмотреть файл

@ -78,7 +78,7 @@ static inline int dl_bw_cpus(int i)
#endif #endif
static inline static inline
void add_running_bw(u64 dl_bw, struct dl_rq *dl_rq) void __add_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
{ {
u64 old = dl_rq->running_bw; u64 old = dl_rq->running_bw;
@ -91,7 +91,7 @@ void add_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
} }
static inline static inline
void sub_running_bw(u64 dl_bw, struct dl_rq *dl_rq) void __sub_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
{ {
u64 old = dl_rq->running_bw; u64 old = dl_rq->running_bw;
@ -105,7 +105,7 @@ void sub_running_bw(u64 dl_bw, struct dl_rq *dl_rq)
} }
static inline static inline
void add_rq_bw(u64 dl_bw, struct dl_rq *dl_rq) void __add_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
{ {
u64 old = dl_rq->this_bw; u64 old = dl_rq->this_bw;
@ -115,7 +115,7 @@ void add_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
} }
static inline static inline
void sub_rq_bw(u64 dl_bw, struct dl_rq *dl_rq) void __sub_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
{ {
u64 old = dl_rq->this_bw; u64 old = dl_rq->this_bw;
@ -127,16 +127,46 @@ void sub_rq_bw(u64 dl_bw, struct dl_rq *dl_rq)
SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw); SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw);
} }
static inline
void add_rq_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
{
if (!dl_entity_is_special(dl_se))
__add_rq_bw(dl_se->dl_bw, dl_rq);
}
static inline
void sub_rq_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
{
if (!dl_entity_is_special(dl_se))
__sub_rq_bw(dl_se->dl_bw, dl_rq);
}
static inline
void add_running_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
{
if (!dl_entity_is_special(dl_se))
__add_running_bw(dl_se->dl_bw, dl_rq);
}
static inline
void sub_running_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
{
if (!dl_entity_is_special(dl_se))
__sub_running_bw(dl_se->dl_bw, dl_rq);
}
void dl_change_utilization(struct task_struct *p, u64 new_bw) void dl_change_utilization(struct task_struct *p, u64 new_bw)
{ {
struct rq *rq; struct rq *rq;
BUG_ON(p->dl.flags & SCHED_FLAG_SUGOV);
if (task_on_rq_queued(p)) if (task_on_rq_queued(p))
return; return;
rq = task_rq(p); rq = task_rq(p);
if (p->dl.dl_non_contending) { if (p->dl.dl_non_contending) {
sub_running_bw(p->dl.dl_bw, &rq->dl); sub_running_bw(&p->dl, &rq->dl);
p->dl.dl_non_contending = 0; p->dl.dl_non_contending = 0;
/* /*
* If the timer handler is currently running and the * If the timer handler is currently running and the
@ -148,8 +178,8 @@ void dl_change_utilization(struct task_struct *p, u64 new_bw)
if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1) if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
put_task_struct(p); put_task_struct(p);
} }
sub_rq_bw(p->dl.dl_bw, &rq->dl); __sub_rq_bw(p->dl.dl_bw, &rq->dl);
add_rq_bw(new_bw, &rq->dl); __add_rq_bw(new_bw, &rq->dl);
} }
/* /*
@ -221,6 +251,9 @@ static void task_non_contending(struct task_struct *p)
if (dl_se->dl_runtime == 0) if (dl_se->dl_runtime == 0)
return; return;
if (dl_entity_is_special(dl_se))
return;
WARN_ON(hrtimer_active(&dl_se->inactive_timer)); WARN_ON(hrtimer_active(&dl_se->inactive_timer));
WARN_ON(dl_se->dl_non_contending); WARN_ON(dl_se->dl_non_contending);
@ -240,12 +273,12 @@ static void task_non_contending(struct task_struct *p)
*/ */
if (zerolag_time < 0) { if (zerolag_time < 0) {
if (dl_task(p)) if (dl_task(p))
sub_running_bw(dl_se->dl_bw, dl_rq); sub_running_bw(dl_se, dl_rq);
if (!dl_task(p) || p->state == TASK_DEAD) { if (!dl_task(p) || p->state == TASK_DEAD) {
struct dl_bw *dl_b = dl_bw_of(task_cpu(p)); struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
if (p->state == TASK_DEAD) if (p->state == TASK_DEAD)
sub_rq_bw(p->dl.dl_bw, &rq->dl); sub_rq_bw(&p->dl, &rq->dl);
raw_spin_lock(&dl_b->lock); raw_spin_lock(&dl_b->lock);
__dl_sub(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p))); __dl_sub(dl_b, p->dl.dl_bw, dl_bw_cpus(task_cpu(p)));
__dl_clear_params(p); __dl_clear_params(p);
@ -272,7 +305,7 @@ static void task_contending(struct sched_dl_entity *dl_se, int flags)
return; return;
if (flags & ENQUEUE_MIGRATED) if (flags & ENQUEUE_MIGRATED)
add_rq_bw(dl_se->dl_bw, dl_rq); add_rq_bw(dl_se, dl_rq);
if (dl_se->dl_non_contending) { if (dl_se->dl_non_contending) {
dl_se->dl_non_contending = 0; dl_se->dl_non_contending = 0;
@ -293,7 +326,7 @@ static void task_contending(struct sched_dl_entity *dl_se, int flags)
* when the "inactive timer" fired). * when the "inactive timer" fired).
* So, add it back. * So, add it back.
*/ */
add_running_bw(dl_se->dl_bw, dl_rq); add_running_bw(dl_se, dl_rq);
} }
} }
@ -1149,6 +1182,9 @@ static void update_curr_dl(struct rq *rq)
sched_rt_avg_update(rq, delta_exec); sched_rt_avg_update(rq, delta_exec);
if (dl_entity_is_special(dl_se))
return;
if (unlikely(dl_se->flags & SCHED_FLAG_RECLAIM)) if (unlikely(dl_se->flags & SCHED_FLAG_RECLAIM))
delta_exec = grub_reclaim(delta_exec, rq, &curr->dl); delta_exec = grub_reclaim(delta_exec, rq, &curr->dl);
dl_se->runtime -= delta_exec; dl_se->runtime -= delta_exec;
@ -1211,8 +1247,8 @@ static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer)
struct dl_bw *dl_b = dl_bw_of(task_cpu(p)); struct dl_bw *dl_b = dl_bw_of(task_cpu(p));
if (p->state == TASK_DEAD && dl_se->dl_non_contending) { if (p->state == TASK_DEAD && dl_se->dl_non_contending) {
sub_running_bw(p->dl.dl_bw, dl_rq_of_se(&p->dl)); sub_running_bw(&p->dl, dl_rq_of_se(&p->dl));
sub_rq_bw(p->dl.dl_bw, dl_rq_of_se(&p->dl)); sub_rq_bw(&p->dl, dl_rq_of_se(&p->dl));
dl_se->dl_non_contending = 0; dl_se->dl_non_contending = 0;
} }
@ -1229,7 +1265,7 @@ static enum hrtimer_restart inactive_task_timer(struct hrtimer *timer)
sched_clock_tick(); sched_clock_tick();
update_rq_clock(rq); update_rq_clock(rq);
sub_running_bw(dl_se->dl_bw, &rq->dl); sub_running_bw(dl_se, &rq->dl);
dl_se->dl_non_contending = 0; dl_se->dl_non_contending = 0;
unlock: unlock:
task_rq_unlock(rq, p, &rf); task_rq_unlock(rq, p, &rf);
@ -1423,8 +1459,8 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
dl_check_constrained_dl(&p->dl); dl_check_constrained_dl(&p->dl);
if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & ENQUEUE_RESTORE) { if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & ENQUEUE_RESTORE) {
add_rq_bw(p->dl.dl_bw, &rq->dl); add_rq_bw(&p->dl, &rq->dl);
add_running_bw(p->dl.dl_bw, &rq->dl); add_running_bw(&p->dl, &rq->dl);
} }
/* /*
@ -1464,8 +1500,8 @@ static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
__dequeue_task_dl(rq, p, flags); __dequeue_task_dl(rq, p, flags);
if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & DEQUEUE_SAVE) { if (p->on_rq == TASK_ON_RQ_MIGRATING || flags & DEQUEUE_SAVE) {
sub_running_bw(p->dl.dl_bw, &rq->dl); sub_running_bw(&p->dl, &rq->dl);
sub_rq_bw(p->dl.dl_bw, &rq->dl); sub_rq_bw(&p->dl, &rq->dl);
} }
/* /*
@ -1571,7 +1607,7 @@ static void migrate_task_rq_dl(struct task_struct *p)
*/ */
raw_spin_lock(&rq->lock); raw_spin_lock(&rq->lock);
if (p->dl.dl_non_contending) { if (p->dl.dl_non_contending) {
sub_running_bw(p->dl.dl_bw, &rq->dl); sub_running_bw(&p->dl, &rq->dl);
p->dl.dl_non_contending = 0; p->dl.dl_non_contending = 0;
/* /*
* If the timer handler is currently running and the * If the timer handler is currently running and the
@ -1583,7 +1619,7 @@ static void migrate_task_rq_dl(struct task_struct *p)
if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1) if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
put_task_struct(p); put_task_struct(p);
} }
sub_rq_bw(p->dl.dl_bw, &rq->dl); sub_rq_bw(&p->dl, &rq->dl);
raw_spin_unlock(&rq->lock); raw_spin_unlock(&rq->lock);
} }
@ -2026,11 +2062,11 @@ retry:
} }
deactivate_task(rq, next_task, 0); deactivate_task(rq, next_task, 0);
sub_running_bw(next_task->dl.dl_bw, &rq->dl); sub_running_bw(&next_task->dl, &rq->dl);
sub_rq_bw(next_task->dl.dl_bw, &rq->dl); sub_rq_bw(&next_task->dl, &rq->dl);
set_task_cpu(next_task, later_rq->cpu); set_task_cpu(next_task, later_rq->cpu);
add_rq_bw(next_task->dl.dl_bw, &later_rq->dl); add_rq_bw(&next_task->dl, &later_rq->dl);
add_running_bw(next_task->dl.dl_bw, &later_rq->dl); add_running_bw(&next_task->dl, &later_rq->dl);
activate_task(later_rq, next_task, 0); activate_task(later_rq, next_task, 0);
ret = 1; ret = 1;
@ -2118,11 +2154,11 @@ static void pull_dl_task(struct rq *this_rq)
resched = true; resched = true;
deactivate_task(src_rq, p, 0); deactivate_task(src_rq, p, 0);
sub_running_bw(p->dl.dl_bw, &src_rq->dl); sub_running_bw(&p->dl, &src_rq->dl);
sub_rq_bw(p->dl.dl_bw, &src_rq->dl); sub_rq_bw(&p->dl, &src_rq->dl);
set_task_cpu(p, this_cpu); set_task_cpu(p, this_cpu);
add_rq_bw(p->dl.dl_bw, &this_rq->dl); add_rq_bw(&p->dl, &this_rq->dl);
add_running_bw(p->dl.dl_bw, &this_rq->dl); add_running_bw(&p->dl, &this_rq->dl);
activate_task(this_rq, p, 0); activate_task(this_rq, p, 0);
dmin = p->dl.deadline; dmin = p->dl.deadline;
@ -2231,7 +2267,7 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
task_non_contending(p); task_non_contending(p);
if (!task_on_rq_queued(p)) if (!task_on_rq_queued(p))
sub_rq_bw(p->dl.dl_bw, &rq->dl); sub_rq_bw(&p->dl, &rq->dl);
/* /*
* We cannot use inactive_task_timer() to invoke sub_running_bw() * We cannot use inactive_task_timer() to invoke sub_running_bw()
@ -2263,7 +2299,7 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
/* If p is not queued we will update its parameters at next wakeup. */ /* If p is not queued we will update its parameters at next wakeup. */
if (!task_on_rq_queued(p)) { if (!task_on_rq_queued(p)) {
add_rq_bw(p->dl.dl_bw, &rq->dl); add_rq_bw(&p->dl, &rq->dl);
return; return;
} }
@ -2442,6 +2478,9 @@ int sched_dl_overflow(struct task_struct *p, int policy,
u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0; u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0;
int cpus, err = -1; int cpus, err = -1;
if (attr->sched_flags & SCHED_FLAG_SUGOV)
return 0;
/* !deadline task may carry old deadline bandwidth */ /* !deadline task may carry old deadline bandwidth */
if (new_bw == p->dl.dl_bw && task_has_dl_policy(p)) if (new_bw == p->dl.dl_bw && task_has_dl_policy(p))
return 0; return 0;
@ -2528,6 +2567,10 @@ void __getparam_dl(struct task_struct *p, struct sched_attr *attr)
*/ */
bool __checkparam_dl(const struct sched_attr *attr) bool __checkparam_dl(const struct sched_attr *attr)
{ {
/* special dl tasks don't actually use any parameter */
if (attr->sched_flags & SCHED_FLAG_SUGOV)
return true;
/* deadline != 0 */ /* deadline != 0 */
if (attr->sched_deadline == 0) if (attr->sched_deadline == 0)
return false; return false;

Просмотреть файл

@ -156,13 +156,37 @@ static inline int task_has_dl_policy(struct task_struct *p)
return dl_policy(p->policy); return dl_policy(p->policy);
} }
/*
* !! For sched_setattr_nocheck() (kernel) only !!
*
* This is actually gross. :(
*
* It is used to make schedutil kworker(s) higher priority than SCHED_DEADLINE
* tasks, but still be able to sleep. We need this on platforms that cannot
* atomically change clock frequency. Remove once fast switching will be
* available on such platforms.
*
* SUGOV stands for SchedUtil GOVernor.
*/
#define SCHED_FLAG_SUGOV 0x10000000
static inline bool dl_entity_is_special(struct sched_dl_entity *dl_se)
{
#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
return unlikely(dl_se->flags & SCHED_FLAG_SUGOV);
#else
return false;
#endif
}
/* /*
* Tells if entity @a should preempt entity @b. * Tells if entity @a should preempt entity @b.
*/ */
static inline bool static inline bool
dl_entity_preempt(struct sched_dl_entity *a, struct sched_dl_entity *b) dl_entity_preempt(struct sched_dl_entity *a, struct sched_dl_entity *b)
{ {
return dl_time_before(a->deadline, b->deadline); return dl_entity_is_special(a) ||
dl_time_before(a->deadline, b->deadline);
} }
/* /*
@ -2085,6 +2109,8 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
#define arch_scale_freq_invariant() (false) #define arch_scale_freq_invariant() (false)
#endif #endif
#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
static inline unsigned long cpu_util_dl(struct rq *rq) static inline unsigned long cpu_util_dl(struct rq *rq)
{ {
return (rq->dl.running_bw * SCHED_CAPACITY_SCALE) >> BW_SHIFT; return (rq->dl.running_bw * SCHED_CAPACITY_SCALE) >> BW_SHIFT;
@ -2094,3 +2120,5 @@ static inline unsigned long cpu_util_cfs(struct rq *rq)
{ {
return rq->cfs.avg.util_avg; return rq->cfs.avg.util_avg;
} }
#endif