diff --git a/mm/memcontrol.c b/mm/memcontrol.c index b78972e2f43f..77a29cea5d76 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1521,11 +1521,8 @@ void __mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, if (!chosen) return; points = chosen_points * 1000 / totalpages; - read_lock(&tasklist_lock); oom_kill_process(chosen, gfp_mask, order, points, totalpages, memcg, NULL, "Memory cgroup out of memory"); - read_unlock(&tasklist_lock); - put_task_struct(chosen); } static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg, diff --git a/mm/oom_kill.c b/mm/oom_kill.c index c0c97aea837f..a3a32ae02e9d 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -336,7 +336,7 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task, /* * Simple selection loop. We chose the process with the highest - * number of 'points'. We expect the caller will lock the tasklist. + * number of 'points'. * * (not docbooked, we don't want this one cluttering up the manual) */ @@ -348,6 +348,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints, struct task_struct *chosen = NULL; unsigned long chosen_points = 0; + rcu_read_lock(); do_each_thread(g, p) { unsigned int points; @@ -360,6 +361,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints, case OOM_SCAN_CONTINUE: continue; case OOM_SCAN_ABORT: + rcu_read_unlock(); return ERR_PTR(-1UL); case OOM_SCAN_OK: break; @@ -370,6 +372,9 @@ static struct task_struct *select_bad_process(unsigned int *ppoints, chosen_points = points; } } while_each_thread(g, p); + if (chosen) + get_task_struct(chosen); + rcu_read_unlock(); *ppoints = chosen_points * 1000 / totalpages; return chosen; @@ -385,8 +390,6 @@ static struct task_struct *select_bad_process(unsigned int *ppoints, * are not shown. * State information includes task's pid, uid, tgid, vm size, rss, nr_ptes, * swapents, oom_score_adj value, and name. - * - * Call with tasklist_lock read-locked. */ static void dump_tasks(const struct mem_cgroup *memcg, const nodemask_t *nodemask) { @@ -394,6 +397,7 @@ static void dump_tasks(const struct mem_cgroup *memcg, const nodemask_t *nodemas struct task_struct *task; pr_info("[ pid ] uid tgid total_vm rss nr_ptes swapents oom_score_adj name\n"); + rcu_read_lock(); for_each_process(p) { if (oom_unkillable_task(p, memcg, nodemask)) continue; @@ -416,6 +420,7 @@ static void dump_tasks(const struct mem_cgroup *memcg, const nodemask_t *nodemas task->signal->oom_score_adj, task->comm); task_unlock(task); } + rcu_read_unlock(); } static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order, @@ -436,6 +441,10 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order, } #define K(x) ((x) << (PAGE_SHIFT-10)) +/* + * Must be called while holding a reference to p, which will be released upon + * returning. + */ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, unsigned int points, unsigned long totalpages, struct mem_cgroup *memcg, nodemask_t *nodemask, @@ -455,6 +464,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, */ if (p->flags & PF_EXITING) { set_tsk_thread_flag(p, TIF_MEMDIE); + put_task_struct(p); return; } @@ -472,6 +482,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, * parent. This attempts to lose the minimal amount of work done while * still freeing memory. */ + read_lock(&tasklist_lock); do { list_for_each_entry(child, &t->children, sibling) { unsigned int child_points; @@ -484,15 +495,26 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, child_points = oom_badness(child, memcg, nodemask, totalpages); if (child_points > victim_points) { + put_task_struct(victim); victim = child; victim_points = child_points; + get_task_struct(victim); } } } while_each_thread(p, t); + read_unlock(&tasklist_lock); - victim = find_lock_task_mm(victim); - if (!victim) + rcu_read_lock(); + p = find_lock_task_mm(victim); + if (!p) { + rcu_read_unlock(); + put_task_struct(victim); return; + } else if (victim != p) { + get_task_struct(p); + put_task_struct(victim); + victim = p; + } /* mm cannot safely be dereferenced after task_unlock(victim) */ mm = victim->mm; @@ -523,9 +545,11 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, task_unlock(p); do_send_sig_info(SIGKILL, SEND_SIG_FORCED, p, true); } + rcu_read_unlock(); set_tsk_thread_flag(victim, TIF_MEMDIE); do_send_sig_info(SIGKILL, SEND_SIG_FORCED, victim, true); + put_task_struct(victim); } #undef K @@ -546,9 +570,7 @@ static void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask, if (constraint != CONSTRAINT_NONE) return; } - read_lock(&tasklist_lock); dump_header(NULL, gfp_mask, order, NULL, nodemask); - read_unlock(&tasklist_lock); panic("Out of memory: %s panic_on_oom is enabled\n", sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide"); } @@ -721,10 +743,10 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, mpol_mask = (constraint == CONSTRAINT_MEMORY_POLICY) ? nodemask : NULL; check_panic_on_oom(constraint, gfp_mask, order, mpol_mask); - read_lock(&tasklist_lock); if (sysctl_oom_kill_allocating_task && current->mm && !oom_unkillable_task(current, NULL, nodemask) && current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) { + get_task_struct(current); oom_kill_process(current, gfp_mask, order, 0, totalpages, NULL, nodemask, "Out of memory (oom_kill_allocating_task)"); @@ -735,7 +757,6 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, /* Found nothing?!?! Either we hang forever, or we panic. */ if (!p) { dump_header(NULL, gfp_mask, order, NULL, mpol_mask); - read_unlock(&tasklist_lock); panic("Out of memory and no killable processes...\n"); } if (PTR_ERR(p) != -1UL) { @@ -744,8 +765,6 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, killed = 1; } out: - read_unlock(&tasklist_lock); - /* * Give the killed threads a good chance of exiting before trying to * allocate memory again.