cpuset: rewrite update_tasks_nodemask()
This patch uses cgroup_scan_tasks() to rebind tasks' vmas to new cpuset's mems_allowed. Not only simplify the code largely, but also avoid allocating an array to hold mm pointers of all the tasks in the cpuset. This array can be big (size > PAGESIZE) if we have lots of tasks in that cpuset, thus has a chance to fail the allocation when under memory stress. Signed-off-by: Li Zefan <lizf@cn.fujitsu.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Paul Menage <menage@google.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Родитель
bd1a8ab73e
Коммит
3b6766fe66
109
kernel/cpuset.c
109
kernel/cpuset.c
|
@ -1026,6 +1026,31 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
|
||||||
mutex_unlock(&callback_mutex);
|
mutex_unlock(&callback_mutex);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Rebind task's vmas to cpuset's new mems_allowed, and migrate pages to new
|
||||||
|
* nodes if memory_migrate flag is set. Called with cgroup_mutex held.
|
||||||
|
*/
|
||||||
|
static void cpuset_change_nodemask(struct task_struct *p,
|
||||||
|
struct cgroup_scanner *scan)
|
||||||
|
{
|
||||||
|
struct mm_struct *mm;
|
||||||
|
struct cpuset *cs;
|
||||||
|
int migrate;
|
||||||
|
const nodemask_t *oldmem = scan->data;
|
||||||
|
|
||||||
|
mm = get_task_mm(p);
|
||||||
|
if (!mm)
|
||||||
|
return;
|
||||||
|
|
||||||
|
cs = cgroup_cs(scan->cg);
|
||||||
|
migrate = is_memory_migrate(cs);
|
||||||
|
|
||||||
|
mpol_rebind_mm(mm, &cs->mems_allowed);
|
||||||
|
if (migrate)
|
||||||
|
cpuset_migrate_mm(mm, oldmem, &cs->mems_allowed);
|
||||||
|
mmput(mm);
|
||||||
|
}
|
||||||
|
|
||||||
static void *cpuset_being_rebound;
|
static void *cpuset_being_rebound;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1038,88 +1063,32 @@ static void *cpuset_being_rebound;
|
||||||
*/
|
*/
|
||||||
static int update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem)
|
static int update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem)
|
||||||
{
|
{
|
||||||
struct task_struct *p;
|
|
||||||
struct mm_struct **mmarray;
|
|
||||||
int i, n, ntasks;
|
|
||||||
int migrate;
|
|
||||||
int fudge;
|
|
||||||
struct cgroup_iter it;
|
|
||||||
int retval;
|
int retval;
|
||||||
|
struct cgroup_scanner scan;
|
||||||
|
|
||||||
cpuset_being_rebound = cs; /* causes mpol_dup() rebind */
|
cpuset_being_rebound = cs; /* causes mpol_dup() rebind */
|
||||||
|
|
||||||
fudge = 10; /* spare mmarray[] slots */
|
scan.cg = cs->css.cgroup;
|
||||||
fudge += cpumask_weight(cs->cpus_allowed);/* imagine 1 fork-bomb/cpu */
|
scan.test_task = NULL;
|
||||||
retval = -ENOMEM;
|
scan.process_task = cpuset_change_nodemask;
|
||||||
|
scan.heap = NULL;
|
||||||
|
scan.data = (nodemask_t *)oldmem;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Allocate mmarray[] to hold mm reference for each task
|
* The mpol_rebind_mm() call takes mmap_sem, which we couldn't
|
||||||
* in cpuset cs. Can't kmalloc GFP_KERNEL while holding
|
* take while holding tasklist_lock. Forks can happen - the
|
||||||
* tasklist_lock. We could use GFP_ATOMIC, but with a
|
* mpol_dup() cpuset_being_rebound check will catch such forks,
|
||||||
* few more lines of code, we can retry until we get a big
|
* and rebind their vma mempolicies too. Because we still hold
|
||||||
* enough mmarray[] w/o using GFP_ATOMIC.
|
* the global cgroup_mutex, we know that no other rebind effort
|
||||||
*/
|
* will be contending for the global variable cpuset_being_rebound.
|
||||||
while (1) {
|
|
||||||
ntasks = cgroup_task_count(cs->css.cgroup); /* guess */
|
|
||||||
ntasks += fudge;
|
|
||||||
mmarray = kmalloc(ntasks * sizeof(*mmarray), GFP_KERNEL);
|
|
||||||
if (!mmarray)
|
|
||||||
goto done;
|
|
||||||
read_lock(&tasklist_lock); /* block fork */
|
|
||||||
if (cgroup_task_count(cs->css.cgroup) <= ntasks)
|
|
||||||
break; /* got enough */
|
|
||||||
read_unlock(&tasklist_lock); /* try again */
|
|
||||||
kfree(mmarray);
|
|
||||||
}
|
|
||||||
|
|
||||||
n = 0;
|
|
||||||
|
|
||||||
/* Load up mmarray[] with mm reference for each task in cpuset. */
|
|
||||||
cgroup_iter_start(cs->css.cgroup, &it);
|
|
||||||
while ((p = cgroup_iter_next(cs->css.cgroup, &it))) {
|
|
||||||
struct mm_struct *mm;
|
|
||||||
|
|
||||||
if (n >= ntasks) {
|
|
||||||
printk(KERN_WARNING
|
|
||||||
"Cpuset mempolicy rebind incomplete.\n");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
mm = get_task_mm(p);
|
|
||||||
if (!mm)
|
|
||||||
continue;
|
|
||||||
mmarray[n++] = mm;
|
|
||||||
}
|
|
||||||
cgroup_iter_end(cs->css.cgroup, &it);
|
|
||||||
read_unlock(&tasklist_lock);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Now that we've dropped the tasklist spinlock, we can
|
|
||||||
* rebind the vma mempolicies of each mm in mmarray[] to their
|
|
||||||
* new cpuset, and release that mm. The mpol_rebind_mm()
|
|
||||||
* call takes mmap_sem, which we couldn't take while holding
|
|
||||||
* tasklist_lock. Forks can happen again now - the mpol_dup()
|
|
||||||
* cpuset_being_rebound check will catch such forks, and rebind
|
|
||||||
* their vma mempolicies too. Because we still hold the global
|
|
||||||
* cgroup_mutex, we know that no other rebind effort will
|
|
||||||
* be contending for the global variable cpuset_being_rebound.
|
|
||||||
* It's ok if we rebind the same mm twice; mpol_rebind_mm()
|
* It's ok if we rebind the same mm twice; mpol_rebind_mm()
|
||||||
* is idempotent. Also migrate pages in each mm to new nodes.
|
* is idempotent. Also migrate pages in each mm to new nodes.
|
||||||
*/
|
*/
|
||||||
migrate = is_memory_migrate(cs);
|
retval = cgroup_scan_tasks(&scan);
|
||||||
for (i = 0; i < n; i++) {
|
|
||||||
struct mm_struct *mm = mmarray[i];
|
|
||||||
|
|
||||||
mpol_rebind_mm(mm, &cs->mems_allowed);
|
|
||||||
if (migrate)
|
|
||||||
cpuset_migrate_mm(mm, oldmem, &cs->mems_allowed);
|
|
||||||
mmput(mm);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* We're done rebinding vmas to this cpuset's new mems_allowed. */
|
/* We're done rebinding vmas to this cpuset's new mems_allowed. */
|
||||||
kfree(mmarray);
|
|
||||||
cpuset_being_rebound = NULL;
|
cpuset_being_rebound = NULL;
|
||||||
retval = 0;
|
|
||||||
done:
|
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Загрузка…
Ссылка в новой задаче