mm: memcontrol: factor out reclaim iterator loading and updating
mem_cgroup_iter() is too hard to follow. Factor out the lockless reclaim iterator loading and updating so it's easier to follow the big picture. Also document the iterator invalidation mechanism a bit more extensively. Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Reported-by: Tejun Heo <tj@kernel.org> Reviewed-by: Tejun Heo <tj@kernel.org> Acked-by: Michal Hocko <mhocko@suse.cz> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Glauber Costa <glommer@parallels.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Родитель
0f2d4a8e27
Коммит
519ebea3bf
|
@ -1148,6 +1148,58 @@ skip_node:
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static void mem_cgroup_iter_invalidate(struct mem_cgroup *root)
|
||||
{
|
||||
/*
|
||||
* When a group in the hierarchy below root is destroyed, the
|
||||
* hierarchy iterator can no longer be trusted since it might
|
||||
* have pointed to the destroyed group. Invalidate it.
|
||||
*/
|
||||
atomic_inc(&root->dead_count);
|
||||
}
|
||||
|
||||
static struct mem_cgroup *
|
||||
mem_cgroup_iter_load(struct mem_cgroup_reclaim_iter *iter,
|
||||
struct mem_cgroup *root,
|
||||
int *sequence)
|
||||
{
|
||||
struct mem_cgroup *position = NULL;
|
||||
/*
|
||||
* A cgroup destruction happens in two stages: offlining and
|
||||
* release. They are separated by a RCU grace period.
|
||||
*
|
||||
* If the iterator is valid, we may still race with an
|
||||
* offlining. The RCU lock ensures the object won't be
|
||||
* released, tryget will fail if we lost the race.
|
||||
*/
|
||||
*sequence = atomic_read(&root->dead_count);
|
||||
if (iter->last_dead_count == *sequence) {
|
||||
smp_rmb();
|
||||
position = iter->last_visited;
|
||||
if (position && !css_tryget(&position->css))
|
||||
position = NULL;
|
||||
}
|
||||
return position;
|
||||
}
|
||||
|
||||
static void mem_cgroup_iter_update(struct mem_cgroup_reclaim_iter *iter,
|
||||
struct mem_cgroup *last_visited,
|
||||
struct mem_cgroup *new_position,
|
||||
int sequence)
|
||||
{
|
||||
if (last_visited)
|
||||
css_put(&last_visited->css);
|
||||
/*
|
||||
* We store the sequence count from the time @last_visited was
|
||||
* loaded successfully instead of rereading it here so that we
|
||||
* don't lose destruction events in between. We could have
|
||||
* raced with the destruction of @new_position after all.
|
||||
*/
|
||||
iter->last_visited = new_position;
|
||||
smp_wmb();
|
||||
iter->last_dead_count = sequence;
|
||||
}
|
||||
|
||||
/**
|
||||
* mem_cgroup_iter - iterate over memory cgroup hierarchy
|
||||
* @root: hierarchy root
|
||||
|
@ -1171,7 +1223,6 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
|
|||
{
|
||||
struct mem_cgroup *memcg = NULL;
|
||||
struct mem_cgroup *last_visited = NULL;
|
||||
unsigned long uninitialized_var(dead_count);
|
||||
|
||||
if (mem_cgroup_disabled())
|
||||
return NULL;
|
||||
|
@ -1191,6 +1242,7 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
|
|||
rcu_read_lock();
|
||||
while (!memcg) {
|
||||
struct mem_cgroup_reclaim_iter *uninitialized_var(iter);
|
||||
int uninitialized_var(seq);
|
||||
|
||||
if (reclaim) {
|
||||
int nid = zone_to_nid(reclaim->zone);
|
||||
|
@ -1204,37 +1256,13 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
|
|||
goto out_unlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the dead_count mismatches, a destruction
|
||||
* has happened or is happening concurrently.
|
||||
* If the dead_count matches, a destruction
|
||||
* might still happen concurrently, but since
|
||||
* we checked under RCU, that destruction
|
||||
* won't free the object until we release the
|
||||
* RCU reader lock. Thus, the dead_count
|
||||
* check verifies the pointer is still valid,
|
||||
* css_tryget() verifies the cgroup pointed to
|
||||
* is alive.
|
||||
*/
|
||||
dead_count = atomic_read(&root->dead_count);
|
||||
if (dead_count == iter->last_dead_count) {
|
||||
smp_rmb();
|
||||
last_visited = iter->last_visited;
|
||||
if (last_visited &&
|
||||
!css_tryget(&last_visited->css))
|
||||
last_visited = NULL;
|
||||
}
|
||||
last_visited = mem_cgroup_iter_load(iter, root, &seq);
|
||||
}
|
||||
|
||||
memcg = __mem_cgroup_iter_next(root, last_visited);
|
||||
|
||||
if (reclaim) {
|
||||
if (last_visited)
|
||||
css_put(&last_visited->css);
|
||||
|
||||
iter->last_visited = memcg;
|
||||
smp_wmb();
|
||||
iter->last_dead_count = dead_count;
|
||||
mem_cgroup_iter_update(iter, last_visited, memcg, seq);
|
||||
|
||||
if (!memcg)
|
||||
iter->generation++;
|
||||
|
@ -6318,14 +6346,14 @@ static void mem_cgroup_invalidate_reclaim_iterators(struct mem_cgroup *memcg)
|
|||
struct mem_cgroup *parent = memcg;
|
||||
|
||||
while ((parent = parent_mem_cgroup(parent)))
|
||||
atomic_inc(&parent->dead_count);
|
||||
mem_cgroup_iter_invalidate(parent);
|
||||
|
||||
/*
|
||||
* if the root memcg is not hierarchical we have to check it
|
||||
* explicitely.
|
||||
*/
|
||||
if (!root_mem_cgroup->use_hierarchy)
|
||||
atomic_inc(&root_mem_cgroup->dead_count);
|
||||
mem_cgroup_iter_invalidate(root_mem_cgroup);
|
||||
}
|
||||
|
||||
static void mem_cgroup_css_offline(struct cgroup *cont)
|
||||
|
|
Загрузка…
Ссылка в новой задаче