mm: vmscan: replace shrink_node() loop with a retry jump
Most of the function body is inside a loop, which imposes an additional indentation and scoping level that makes the code a bit hard to follow and modify. The looping only happens in case of reclaim-compaction, which isn't the common case. So rather than adding yet another function level to the reclaim path and have every reclaim invocation go through a level that only exists for one specific cornercase, use a retry goto. Link: http://lkml.kernel.org/r/20191022144803.302233-6-hannes@cmpxchg.org Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Reviewed-by: Roman Gushchin <guro@fb.com> Reviewed-by: Shakeel Butt <shakeelb@google.com> Cc: Michal Hocko <mhocko@suse.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Родитель
b5ead35e7e
Коммит
d2af339706
233
mm/vmscan.c
233
mm/vmscan.c
|
@ -2729,144 +2729,143 @@ static bool pgdat_memcg_congested(pg_data_t *pgdat, struct mem_cgroup *memcg)
|
||||||
static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
|
static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
|
||||||
{
|
{
|
||||||
struct reclaim_state *reclaim_state = current->reclaim_state;
|
struct reclaim_state *reclaim_state = current->reclaim_state;
|
||||||
|
struct mem_cgroup *root = sc->target_mem_cgroup;
|
||||||
unsigned long nr_reclaimed, nr_scanned;
|
unsigned long nr_reclaimed, nr_scanned;
|
||||||
bool reclaimable = false;
|
bool reclaimable = false;
|
||||||
|
struct mem_cgroup *memcg;
|
||||||
|
again:
|
||||||
|
memset(&sc->nr, 0, sizeof(sc->nr));
|
||||||
|
|
||||||
|
nr_reclaimed = sc->nr_reclaimed;
|
||||||
|
nr_scanned = sc->nr_scanned;
|
||||||
|
|
||||||
|
memcg = mem_cgroup_iter(root, NULL, NULL);
|
||||||
do {
|
do {
|
||||||
struct mem_cgroup *root = sc->target_mem_cgroup;
|
unsigned long reclaimed;
|
||||||
struct mem_cgroup *memcg;
|
unsigned long scanned;
|
||||||
|
|
||||||
memset(&sc->nr, 0, sizeof(sc->nr));
|
switch (mem_cgroup_protected(root, memcg)) {
|
||||||
|
case MEMCG_PROT_MIN:
|
||||||
nr_reclaimed = sc->nr_reclaimed;
|
/*
|
||||||
nr_scanned = sc->nr_scanned;
|
* Hard protection.
|
||||||
|
* If there is no reclaimable memory, OOM.
|
||||||
memcg = mem_cgroup_iter(root, NULL, NULL);
|
*/
|
||||||
do {
|
continue;
|
||||||
unsigned long reclaimed;
|
case MEMCG_PROT_LOW:
|
||||||
unsigned long scanned;
|
/*
|
||||||
|
* Soft protection.
|
||||||
switch (mem_cgroup_protected(root, memcg)) {
|
* Respect the protection only as long as
|
||||||
case MEMCG_PROT_MIN:
|
* there is an unprotected supply
|
||||||
/*
|
* of reclaimable memory from other cgroups.
|
||||||
* Hard protection.
|
*/
|
||||||
* If there is no reclaimable memory, OOM.
|
if (!sc->memcg_low_reclaim) {
|
||||||
*/
|
sc->memcg_low_skipped = 1;
|
||||||
continue;
|
continue;
|
||||||
case MEMCG_PROT_LOW:
|
|
||||||
/*
|
|
||||||
* Soft protection.
|
|
||||||
* Respect the protection only as long as
|
|
||||||
* there is an unprotected supply
|
|
||||||
* of reclaimable memory from other cgroups.
|
|
||||||
*/
|
|
||||||
if (!sc->memcg_low_reclaim) {
|
|
||||||
sc->memcg_low_skipped = 1;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
memcg_memory_event(memcg, MEMCG_LOW);
|
|
||||||
break;
|
|
||||||
case MEMCG_PROT_NONE:
|
|
||||||
/*
|
|
||||||
* All protection thresholds breached. We may
|
|
||||||
* still choose to vary the scan pressure
|
|
||||||
* applied based on by how much the cgroup in
|
|
||||||
* question has exceeded its protection
|
|
||||||
* thresholds (see get_scan_count).
|
|
||||||
*/
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
memcg_memory_event(memcg, MEMCG_LOW);
|
||||||
reclaimed = sc->nr_reclaimed;
|
break;
|
||||||
scanned = sc->nr_scanned;
|
case MEMCG_PROT_NONE:
|
||||||
shrink_node_memcg(pgdat, memcg, sc);
|
/*
|
||||||
|
* All protection thresholds breached. We may
|
||||||
shrink_slab(sc->gfp_mask, pgdat->node_id, memcg,
|
* still choose to vary the scan pressure
|
||||||
sc->priority);
|
* applied based on by how much the cgroup in
|
||||||
|
* question has exceeded its protection
|
||||||
/* Record the group's reclaim efficiency */
|
* thresholds (see get_scan_count).
|
||||||
vmpressure(sc->gfp_mask, memcg, false,
|
*/
|
||||||
sc->nr_scanned - scanned,
|
break;
|
||||||
sc->nr_reclaimed - reclaimed);
|
|
||||||
|
|
||||||
} while ((memcg = mem_cgroup_iter(root, memcg, NULL)));
|
|
||||||
|
|
||||||
if (reclaim_state) {
|
|
||||||
sc->nr_reclaimed += reclaim_state->reclaimed_slab;
|
|
||||||
reclaim_state->reclaimed_slab = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Record the subtree's reclaim efficiency */
|
reclaimed = sc->nr_reclaimed;
|
||||||
vmpressure(sc->gfp_mask, sc->target_mem_cgroup, true,
|
scanned = sc->nr_scanned;
|
||||||
sc->nr_scanned - nr_scanned,
|
shrink_node_memcg(pgdat, memcg, sc);
|
||||||
sc->nr_reclaimed - nr_reclaimed);
|
|
||||||
|
|
||||||
if (sc->nr_reclaimed - nr_reclaimed)
|
shrink_slab(sc->gfp_mask, pgdat->node_id, memcg,
|
||||||
reclaimable = true;
|
sc->priority);
|
||||||
|
|
||||||
if (current_is_kswapd()) {
|
/* Record the group's reclaim efficiency */
|
||||||
/*
|
vmpressure(sc->gfp_mask, memcg, false,
|
||||||
* If reclaim is isolating dirty pages under writeback,
|
sc->nr_scanned - scanned,
|
||||||
* it implies that the long-lived page allocation rate
|
sc->nr_reclaimed - reclaimed);
|
||||||
* is exceeding the page laundering rate. Either the
|
|
||||||
* global limits are not being effective at throttling
|
|
||||||
* processes due to the page distribution throughout
|
|
||||||
* zones or there is heavy usage of a slow backing
|
|
||||||
* device. The only option is to throttle from reclaim
|
|
||||||
* context which is not ideal as there is no guarantee
|
|
||||||
* the dirtying process is throttled in the same way
|
|
||||||
* balance_dirty_pages() manages.
|
|
||||||
*
|
|
||||||
* Once a node is flagged PGDAT_WRITEBACK, kswapd will
|
|
||||||
* count the number of pages under pages flagged for
|
|
||||||
* immediate reclaim and stall if any are encountered
|
|
||||||
* in the nr_immediate check below.
|
|
||||||
*/
|
|
||||||
if (sc->nr.writeback && sc->nr.writeback == sc->nr.taken)
|
|
||||||
set_bit(PGDAT_WRITEBACK, &pgdat->flags);
|
|
||||||
|
|
||||||
/*
|
} while ((memcg = mem_cgroup_iter(root, memcg, NULL)));
|
||||||
* Tag a node as congested if all the dirty pages
|
|
||||||
* scanned were backed by a congested BDI and
|
|
||||||
* wait_iff_congested will stall.
|
|
||||||
*/
|
|
||||||
if (sc->nr.dirty && sc->nr.dirty == sc->nr.congested)
|
|
||||||
set_bit(PGDAT_CONGESTED, &pgdat->flags);
|
|
||||||
|
|
||||||
/* Allow kswapd to start writing pages during reclaim.*/
|
if (reclaim_state) {
|
||||||
if (sc->nr.unqueued_dirty == sc->nr.file_taken)
|
sc->nr_reclaimed += reclaim_state->reclaimed_slab;
|
||||||
set_bit(PGDAT_DIRTY, &pgdat->flags);
|
reclaim_state->reclaimed_slab = 0;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/* Record the subtree's reclaim efficiency */
|
||||||
* If kswapd scans pages marked marked for immediate
|
vmpressure(sc->gfp_mask, sc->target_mem_cgroup, true,
|
||||||
* reclaim and under writeback (nr_immediate), it
|
sc->nr_scanned - nr_scanned,
|
||||||
* implies that pages are cycling through the LRU
|
sc->nr_reclaimed - nr_reclaimed);
|
||||||
* faster than they are written so also forcibly stall.
|
|
||||||
*/
|
if (sc->nr_reclaimed - nr_reclaimed)
|
||||||
if (sc->nr.immediate)
|
reclaimable = true;
|
||||||
congestion_wait(BLK_RW_ASYNC, HZ/10);
|
|
||||||
}
|
if (current_is_kswapd()) {
|
||||||
|
/*
|
||||||
|
* If reclaim is isolating dirty pages under writeback,
|
||||||
|
* it implies that the long-lived page allocation rate
|
||||||
|
* is exceeding the page laundering rate. Either the
|
||||||
|
* global limits are not being effective at throttling
|
||||||
|
* processes due to the page distribution throughout
|
||||||
|
* zones or there is heavy usage of a slow backing
|
||||||
|
* device. The only option is to throttle from reclaim
|
||||||
|
* context which is not ideal as there is no guarantee
|
||||||
|
* the dirtying process is throttled in the same way
|
||||||
|
* balance_dirty_pages() manages.
|
||||||
|
*
|
||||||
|
* Once a node is flagged PGDAT_WRITEBACK, kswapd will
|
||||||
|
* count the number of pages under pages flagged for
|
||||||
|
* immediate reclaim and stall if any are encountered
|
||||||
|
* in the nr_immediate check below.
|
||||||
|
*/
|
||||||
|
if (sc->nr.writeback && sc->nr.writeback == sc->nr.taken)
|
||||||
|
set_bit(PGDAT_WRITEBACK, &pgdat->flags);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Legacy memcg will stall in page writeback so avoid forcibly
|
* Tag a node as congested if all the dirty pages
|
||||||
* stalling in wait_iff_congested().
|
* scanned were backed by a congested BDI and
|
||||||
|
* wait_iff_congested will stall.
|
||||||
*/
|
*/
|
||||||
if (cgroup_reclaim(sc) && writeback_throttling_sane(sc) &&
|
if (sc->nr.dirty && sc->nr.dirty == sc->nr.congested)
|
||||||
sc->nr.dirty && sc->nr.dirty == sc->nr.congested)
|
set_bit(PGDAT_CONGESTED, &pgdat->flags);
|
||||||
set_memcg_congestion(pgdat, root, true);
|
|
||||||
|
/* Allow kswapd to start writing pages during reclaim.*/
|
||||||
|
if (sc->nr.unqueued_dirty == sc->nr.file_taken)
|
||||||
|
set_bit(PGDAT_DIRTY, &pgdat->flags);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Stall direct reclaim for IO completions if underlying BDIs
|
* If kswapd scans pages marked marked for immediate
|
||||||
* and node is congested. Allow kswapd to continue until it
|
* reclaim and under writeback (nr_immediate), it
|
||||||
* starts encountering unqueued dirty pages or cycling through
|
* implies that pages are cycling through the LRU
|
||||||
* the LRU too quickly.
|
* faster than they are written so also forcibly stall.
|
||||||
*/
|
*/
|
||||||
if (!sc->hibernation_mode && !current_is_kswapd() &&
|
if (sc->nr.immediate)
|
||||||
current_may_throttle() && pgdat_memcg_congested(pgdat, root))
|
congestion_wait(BLK_RW_ASYNC, HZ/10);
|
||||||
wait_iff_congested(BLK_RW_ASYNC, HZ/10);
|
}
|
||||||
|
|
||||||
} while (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed,
|
/*
|
||||||
sc));
|
* Legacy memcg will stall in page writeback so avoid forcibly
|
||||||
|
* stalling in wait_iff_congested().
|
||||||
|
*/
|
||||||
|
if (cgroup_reclaim(sc) && writeback_throttling_sane(sc) &&
|
||||||
|
sc->nr.dirty && sc->nr.dirty == sc->nr.congested)
|
||||||
|
set_memcg_congestion(pgdat, root, true);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Stall direct reclaim for IO completions if underlying BDIs
|
||||||
|
* and node is congested. Allow kswapd to continue until it
|
||||||
|
* starts encountering unqueued dirty pages or cycling through
|
||||||
|
* the LRU too quickly.
|
||||||
|
*/
|
||||||
|
if (!sc->hibernation_mode && !current_is_kswapd() &&
|
||||||
|
current_may_throttle() && pgdat_memcg_congested(pgdat, root))
|
||||||
|
wait_iff_congested(BLK_RW_ASYNC, HZ/10);
|
||||||
|
|
||||||
|
if (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed,
|
||||||
|
sc))
|
||||||
|
goto again;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Kswapd gives up on balancing particular nodes after too
|
* Kswapd gives up on balancing particular nodes after too
|
||||||
|
|
Загрузка…
Ссылка в новой задаче