sched: clean up code under CONFIG_FAIR_GROUP_SCHED
With the view of supporting user-id based fair scheduling (and not just container-based fair scheduling), this patch renames several functions and makes them independent of whether they are being used for container or user-id based fair scheduling. Also fix a problem reported by KAMEZAWA Hiroyuki (wrt allocating less-sized array for tg->cfs_rq[] and tf->se[]). Signed-off-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com> Signed-off-by: Dhaval Giani <dhaval@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
This commit is contained in:
Родитель
75c28ace9f
Коммит
9b5b77512d
|
@ -136,6 +136,7 @@ extern unsigned long weighted_cpuload(const int cpu);
|
|||
|
||||
struct seq_file;
|
||||
struct cfs_rq;
|
||||
struct task_grp;
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m);
|
||||
extern void proc_sched_set_task(struct task_struct *p);
|
||||
|
@ -1834,6 +1835,17 @@ extern int sched_mc_power_savings, sched_smt_power_savings;
|
|||
|
||||
extern void normalize_rt_tasks(void);
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
|
||||
extern struct task_grp init_task_grp;
|
||||
|
||||
extern struct task_grp *sched_create_group(void);
|
||||
extern void sched_destroy_group(struct task_grp *tg);
|
||||
extern void sched_move_task(struct task_struct *tsk);
|
||||
extern int sched_group_set_shares(struct task_grp *tg, unsigned long shares);
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_TASK_XACCT
|
||||
static inline void add_rchar(struct task_struct *tsk, ssize_t amt)
|
||||
{
|
||||
|
|
11
init/Kconfig
11
init/Kconfig
|
@ -282,13 +282,12 @@ config CPUSETS
|
|||
Say N if unsure.
|
||||
|
||||
config FAIR_GROUP_SCHED
|
||||
bool "Fair group scheduler"
|
||||
depends on EXPERIMENTAL && CONTAINERS
|
||||
bool "Fair group cpu scheduler"
|
||||
default n
|
||||
depends on EXPERIMENTAL
|
||||
help
|
||||
This option enables you to group tasks and control CPU resource
|
||||
allocation to such groups.
|
||||
|
||||
Say N if unsure.
|
||||
This feature lets cpu scheduler recognize task groups and control cpu
|
||||
bandwidth allocation to such task groups.
|
||||
|
||||
config SYSFS_DEPRECATED
|
||||
bool "Create deprecated sysfs files"
|
||||
|
|
172
kernel/sched.c
172
kernel/sched.c
|
@ -173,13 +173,10 @@ struct rt_prio_array {
|
|||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
|
||||
#include <linux/container.h>
|
||||
|
||||
struct cfs_rq;
|
||||
|
||||
/* task group related information */
|
||||
struct task_grp {
|
||||
struct container_subsys_state css;
|
||||
/* schedulable entities of this group on each cpu */
|
||||
struct sched_entity **se;
|
||||
/* runqueue "owned" by this group on each cpu */
|
||||
|
@ -192,22 +189,28 @@ static DEFINE_PER_CPU(struct sched_entity, init_sched_entity);
|
|||
/* Default task group's cfs_rq on each cpu */
|
||||
static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp;
|
||||
|
||||
static struct sched_entity *init_sched_entity_p[CONFIG_NR_CPUS];
|
||||
static struct cfs_rq *init_cfs_rq_p[CONFIG_NR_CPUS];
|
||||
static struct sched_entity *init_sched_entity_p[NR_CPUS];
|
||||
static struct cfs_rq *init_cfs_rq_p[NR_CPUS];
|
||||
|
||||
/* Default task group.
|
||||
* Every task in system belong to this group at bootup.
|
||||
*/
|
||||
static struct task_grp init_task_grp = {
|
||||
.se = init_sched_entity_p,
|
||||
.cfs_rq = init_cfs_rq_p,
|
||||
};
|
||||
struct task_grp init_task_grp = {
|
||||
.se = init_sched_entity_p,
|
||||
.cfs_rq = init_cfs_rq_p,
|
||||
};
|
||||
|
||||
#define INIT_TASK_GRP_LOAD NICE_0_LOAD
|
||||
static int init_task_grp_load = INIT_TASK_GRP_LOAD;
|
||||
|
||||
/* return group to which a task belongs */
|
||||
static inline struct task_grp *task_grp(struct task_struct *p)
|
||||
{
|
||||
return container_of(task_subsys_state(p, cpu_subsys_id),
|
||||
struct task_grp, css);
|
||||
struct task_grp *tg;
|
||||
|
||||
tg = &init_task_grp;
|
||||
|
||||
return tg;
|
||||
}
|
||||
|
||||
/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
|
||||
|
@ -250,6 +253,7 @@ struct cfs_rq {
|
|||
*/
|
||||
struct list_head leaf_cfs_rq_list; /* Better name : task_cfs_rq_list? */
|
||||
struct task_grp *tg; /* group that "owns" this runqueue */
|
||||
struct rcu_head rcu;
|
||||
#endif
|
||||
};
|
||||
|
||||
|
@ -6513,11 +6517,12 @@ void __init sched_init(void)
|
|||
init_sched_entity_p[i] = se;
|
||||
se->cfs_rq = &rq->cfs;
|
||||
se->my_q = cfs_rq;
|
||||
se->load.weight = NICE_0_LOAD;
|
||||
se->load.inv_weight = div64_64(1ULL<<32, NICE_0_LOAD);
|
||||
se->load.weight = init_task_grp_load;
|
||||
se->load.inv_weight =
|
||||
div64_64(1ULL<<32, init_task_grp_load);
|
||||
se->parent = NULL;
|
||||
}
|
||||
init_task_grp.shares = NICE_0_LOAD;
|
||||
init_task_grp.shares = init_task_grp_load;
|
||||
#endif
|
||||
|
||||
for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
|
||||
|
@ -6707,45 +6712,28 @@ void set_curr_task(int cpu, struct task_struct *p)
|
|||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
|
||||
/* return corresponding task_grp object of a container */
|
||||
static inline struct task_grp *container_tg(struct container *cont)
|
||||
{
|
||||
return container_of(container_subsys_state(cont, cpu_subsys_id),
|
||||
struct task_grp, css);
|
||||
}
|
||||
|
||||
/* allocate runqueue etc for a new task group */
|
||||
static struct container_subsys_state *
|
||||
sched_create_group(struct container_subsys *ss, struct container *cont)
|
||||
struct task_grp *sched_create_group(void)
|
||||
{
|
||||
struct task_grp *tg;
|
||||
struct cfs_rq *cfs_rq;
|
||||
struct sched_entity *se;
|
||||
struct rq *rq;
|
||||
int i;
|
||||
|
||||
if (!cont->parent) {
|
||||
/* This is early initialization for the top container */
|
||||
init_task_grp.css.container = cont;
|
||||
return &init_task_grp.css;
|
||||
}
|
||||
|
||||
/* we support only 1-level deep hierarchical scheduler atm */
|
||||
if (cont->parent->parent)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
tg = kzalloc(sizeof(*tg), GFP_KERNEL);
|
||||
if (!tg)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
tg->cfs_rq = kzalloc(sizeof(cfs_rq) * num_possible_cpus(), GFP_KERNEL);
|
||||
tg->cfs_rq = kzalloc(sizeof(cfs_rq) * NR_CPUS, GFP_KERNEL);
|
||||
if (!tg->cfs_rq)
|
||||
goto err;
|
||||
tg->se = kzalloc(sizeof(se) * num_possible_cpus(), GFP_KERNEL);
|
||||
tg->se = kzalloc(sizeof(se) * NR_CPUS, GFP_KERNEL);
|
||||
if (!tg->se)
|
||||
goto err;
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
struct rq *rq = cpu_rq(i);
|
||||
rq = cpu_rq(i);
|
||||
|
||||
cfs_rq = kmalloc_node(sizeof(struct cfs_rq), GFP_KERNEL,
|
||||
cpu_to_node(i));
|
||||
|
@ -6763,7 +6751,6 @@ sched_create_group(struct container_subsys *ss, struct container *cont)
|
|||
tg->cfs_rq[i] = cfs_rq;
|
||||
init_cfs_rq(cfs_rq, rq);
|
||||
cfs_rq->tg = tg;
|
||||
list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
|
||||
|
||||
tg->se[i] = se;
|
||||
se->cfs_rq = &rq->cfs;
|
||||
|
@ -6773,12 +6760,15 @@ sched_create_group(struct container_subsys *ss, struct container *cont)
|
|||
se->parent = NULL;
|
||||
}
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
rq = cpu_rq(i);
|
||||
cfs_rq = tg->cfs_rq[i];
|
||||
list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
|
||||
}
|
||||
|
||||
tg->shares = NICE_0_LOAD;
|
||||
|
||||
/* Bind the container to task_grp object we just created */
|
||||
tg->css.container = cont;
|
||||
|
||||
return &tg->css;
|
||||
return tg;
|
||||
|
||||
err:
|
||||
for_each_possible_cpu(i) {
|
||||
|
@ -6797,24 +6787,14 @@ err:
|
|||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
|
||||
/* destroy runqueue etc associated with a task group */
|
||||
static void sched_destroy_group(struct container_subsys *ss,
|
||||
struct container *cont)
|
||||
/* rcu callback to free various structures associated with a task group */
|
||||
static void free_sched_group(struct rcu_head *rhp)
|
||||
{
|
||||
struct task_grp *tg = container_tg(cont);
|
||||
struct cfs_rq *cfs_rq;
|
||||
struct cfs_rq *cfs_rq = container_of(rhp, struct cfs_rq, rcu);
|
||||
struct task_grp *tg = cfs_rq->tg;
|
||||
struct sched_entity *se;
|
||||
int i;
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
cfs_rq = tg->cfs_rq[i];
|
||||
list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
|
||||
}
|
||||
|
||||
/* wait for possible concurrent references to cfs_rqs complete */
|
||||
synchronize_sched();
|
||||
|
||||
/* now it should be safe to free those cfs_rqs */
|
||||
for_each_possible_cpu(i) {
|
||||
cfs_rq = tg->cfs_rq[i];
|
||||
|
@ -6829,19 +6809,29 @@ static void sched_destroy_group(struct container_subsys *ss,
|
|||
kfree(tg);
|
||||
}
|
||||
|
||||
static int sched_can_attach(struct container_subsys *ss,
|
||||
struct container *cont, struct task_struct *tsk)
|
||||
/* Destroy runqueue etc associated with a task group */
|
||||
void sched_destroy_group(struct task_grp *tg)
|
||||
{
|
||||
/* We don't support RT-tasks being in separate groups */
|
||||
if (tsk->sched_class != &fair_sched_class)
|
||||
return -EINVAL;
|
||||
struct cfs_rq *cfs_rq;
|
||||
int i;
|
||||
|
||||
return 0;
|
||||
for_each_possible_cpu(i) {
|
||||
cfs_rq = tg->cfs_rq[i];
|
||||
list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
|
||||
}
|
||||
|
||||
cfs_rq = tg->cfs_rq[0];
|
||||
|
||||
/* wait for possible concurrent references to cfs_rqs complete */
|
||||
call_rcu(&cfs_rq->rcu, free_sched_group);
|
||||
}
|
||||
|
||||
/* change task's runqueue when it moves between groups */
|
||||
static void sched_move_task(struct container_subsys *ss, struct container *cont,
|
||||
struct container *old_cont, struct task_struct *tsk)
|
||||
/* change task's runqueue when it moves between groups.
|
||||
* The caller of this function should have put the task in its new group
|
||||
* by now. This function just updates tsk->se.cfs_rq and tsk->se.parent to
|
||||
* reflect its new group.
|
||||
*/
|
||||
void sched_move_task(struct task_struct *tsk)
|
||||
{
|
||||
int on_rq, running;
|
||||
unsigned long flags;
|
||||
|
@ -6896,58 +6886,20 @@ static void set_se_shares(struct sched_entity *se, unsigned long shares)
|
|||
spin_unlock_irq(&rq->lock);
|
||||
}
|
||||
|
||||
static ssize_t cpu_shares_write(struct container *cont, struct cftype *cftype,
|
||||
struct file *file, const char __user *userbuf,
|
||||
size_t nbytes, loff_t *ppos)
|
||||
int sched_group_set_shares(struct task_grp *tg, unsigned long shares)
|
||||
{
|
||||
int i;
|
||||
unsigned long shareval;
|
||||
struct task_grp *tg = container_tg(cont);
|
||||
char buffer[2*sizeof(unsigned long) + 1];
|
||||
|
||||
if (nbytes > 2*sizeof(unsigned long)) /* safety check */
|
||||
return -E2BIG;
|
||||
if (tg->shares == shares)
|
||||
return 0;
|
||||
|
||||
if (copy_from_user(buffer, userbuf, nbytes))
|
||||
return -EFAULT;
|
||||
/* return -EINVAL if the new value is not sane */
|
||||
|
||||
buffer[nbytes] = 0; /* nul-terminate */
|
||||
shareval = simple_strtoul(buffer, NULL, 10);
|
||||
|
||||
tg->shares = shareval;
|
||||
tg->shares = shares;
|
||||
for_each_possible_cpu(i)
|
||||
set_se_shares(tg->se[i], shareval);
|
||||
set_se_shares(tg->se[i], shares);
|
||||
|
||||
return nbytes;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 cpu_shares_read_uint(struct container *cont, struct cftype *cft)
|
||||
{
|
||||
struct task_grp *tg = container_tg(cont);
|
||||
|
||||
return (u64) tg->shares;
|
||||
}
|
||||
|
||||
struct cftype cpuctl_share = {
|
||||
.name = "shares",
|
||||
.read_uint = cpu_shares_read_uint,
|
||||
.write = cpu_shares_write,
|
||||
};
|
||||
|
||||
static int sched_populate(struct container_subsys *ss, struct container *cont)
|
||||
{
|
||||
return container_add_file(cont, ss, &cpuctl_share);
|
||||
}
|
||||
|
||||
struct container_subsys cpu_subsys = {
|
||||
.name = "cpu",
|
||||
.create = sched_create_group,
|
||||
.destroy = sched_destroy_group,
|
||||
.can_attach = sched_can_attach,
|
||||
.attach = sched_move_task,
|
||||
.populate = sched_populate,
|
||||
.subsys_id = cpu_subsys_id,
|
||||
.early_init = 1,
|
||||
};
|
||||
|
||||
#endif /* CONFIG_FAIR_GROUP_SCHED */
|
||||
#endif /* CONFIG_FAIR_GROUP_SCHED */
|
||||
|
|
|
@ -877,7 +877,10 @@ static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)
|
|||
if (!cfs_rq->nr_running)
|
||||
return MAX_PRIO;
|
||||
|
||||
curr = __pick_next_entity(cfs_rq);
|
||||
curr = cfs_rq->curr;
|
||||
if (!curr)
|
||||
curr = __pick_next_entity(cfs_rq);
|
||||
|
||||
p = task_of(curr);
|
||||
|
||||
return p->prio;
|
||||
|
|
Загрузка…
Ссылка в новой задаче