diff --git a/mm/vmscan.c b/mm/vmscan.c index bfbfc98c856c..cc522e048ed7 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -137,7 +137,6 @@ struct scan_control { #ifdef CONFIG_LRU_GEN /* help kswapd make better choices among multiple memcgs */ - unsigned int memcgs_need_aging:1; unsigned long last_reclaimed; #endif @@ -4468,7 +4467,7 @@ done: return true; } -static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsigned long *min_seq, +static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, struct scan_control *sc, bool can_swap, unsigned long *nr_to_scan) { int gen, type, zone; @@ -4477,6 +4476,13 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsig unsigned long total = 0; struct lru_gen_folio *lrugen = &lruvec->lrugen; struct mem_cgroup *memcg = lruvec_memcg(lruvec); + DEFINE_MIN_SEQ(lruvec); + + /* whether this lruvec is completely out of cold folios */ + if (min_seq[!can_swap] + MIN_NR_GENS > max_seq) { + *nr_to_scan = 0; + return true; + } for (type = !can_swap; type < ANON_AND_FILE; type++) { unsigned long seq; @@ -4505,8 +4511,6 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsig * stalls when the number of generations reaches MIN_NR_GENS. Hence, the * ideal number of generations is MIN_NR_GENS+1. */ - if (min_seq[!can_swap] + MIN_NR_GENS > max_seq) - return true; if (min_seq[!can_swap] + MIN_NR_GENS < max_seq) return false; @@ -4525,40 +4529,54 @@ static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq, unsig return false; } -static bool age_lruvec(struct lruvec *lruvec, struct scan_control *sc, unsigned long min_ttl) +static bool lruvec_is_sizable(struct lruvec *lruvec, struct scan_control *sc) { - bool need_aging; - unsigned long nr_to_scan; - int swappiness = get_swappiness(lruvec, sc); + int gen, type, zone; + unsigned long total = 0; + bool can_swap = get_swappiness(lruvec, sc); + struct lru_gen_folio *lrugen = &lruvec->lrugen; struct mem_cgroup *memcg = lruvec_memcg(lruvec); DEFINE_MAX_SEQ(lruvec); DEFINE_MIN_SEQ(lruvec); + for (type = !can_swap; type < ANON_AND_FILE; type++) { + unsigned long seq; + + for (seq = min_seq[type]; seq <= max_seq; seq++) { + gen = lru_gen_from_seq(seq); + + for (zone = 0; zone < MAX_NR_ZONES; zone++) + total += max(READ_ONCE(lrugen->nr_pages[gen][type][zone]), 0L); + } + } + + /* whether the size is big enough to be helpful */ + return mem_cgroup_online(memcg) ? (total >> sc->priority) : total; +} + +static bool lruvec_is_reclaimable(struct lruvec *lruvec, struct scan_control *sc, + unsigned long min_ttl) +{ + int gen; + unsigned long birth; + struct mem_cgroup *memcg = lruvec_memcg(lruvec); + DEFINE_MIN_SEQ(lruvec); + VM_WARN_ON_ONCE(sc->memcg_low_reclaim); + /* see the comment on lru_gen_folio */ + gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]); + birth = READ_ONCE(lruvec->lrugen.timestamps[gen]); + + if (time_is_after_jiffies(birth + min_ttl)) + return false; + + if (!lruvec_is_sizable(lruvec, sc)) + return false; + mem_cgroup_calculate_protection(NULL, memcg); - if (mem_cgroup_below_min(NULL, memcg)) - return false; - - need_aging = should_run_aging(lruvec, max_seq, min_seq, sc, swappiness, &nr_to_scan); - - if (min_ttl) { - int gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]); - unsigned long birth = READ_ONCE(lruvec->lrugen.timestamps[gen]); - - if (time_is_after_jiffies(birth + min_ttl)) - return false; - - /* the size is likely too small to be helpful */ - if (!nr_to_scan && sc->priority != DEF_PRIORITY) - return false; - } - - if (need_aging) - try_to_inc_max_seq(lruvec, max_seq, sc, swappiness, false); - - return true; + return !mem_cgroup_below_min(NULL, memcg); } /* to protect the working set of the last N jiffies */ @@ -4567,46 +4585,32 @@ static unsigned long lru_gen_min_ttl __read_mostly; static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) { struct mem_cgroup *memcg; - bool success = false; unsigned long min_ttl = READ_ONCE(lru_gen_min_ttl); VM_WARN_ON_ONCE(!current_is_kswapd()); sc->last_reclaimed = sc->nr_reclaimed; - /* - * To reduce the chance of going into the aging path, which can be - * costly, optimistically skip it if the flag below was cleared in the - * eviction path. This improves the overall performance when multiple - * memcgs are available. - */ - if (!sc->memcgs_need_aging) { - sc->memcgs_need_aging = true; + /* check the order to exclude compaction-induced reclaim */ + if (!min_ttl || sc->order || sc->priority == DEF_PRIORITY) return; - } - - set_mm_walk(pgdat); memcg = mem_cgroup_iter(NULL, NULL, NULL); do { struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat); - if (age_lruvec(lruvec, sc, min_ttl)) - success = true; + if (lruvec_is_reclaimable(lruvec, sc, min_ttl)) { + mem_cgroup_iter_break(NULL, memcg); + return; + } cond_resched(); } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL))); - clear_mm_walk(); - - /* check the order to exclude compaction-induced reclaim */ - if (success || !min_ttl || sc->order) - return; - /* * The main goal is to OOM kill if every generation from all memcgs is * younger than min_ttl. However, another possibility is all memcgs are - * either below min or empty. + * either too small or below min. */ if (mutex_trylock(&oom_lock)) { struct oom_control oc = { @@ -5114,34 +5118,28 @@ retry: * reclaim. */ static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, - bool can_swap, bool *need_aging) + bool can_swap) { unsigned long nr_to_scan; struct mem_cgroup *memcg = lruvec_memcg(lruvec); DEFINE_MAX_SEQ(lruvec); - DEFINE_MIN_SEQ(lruvec); if (mem_cgroup_below_min(sc->target_mem_cgroup, memcg) || (mem_cgroup_below_low(sc->target_mem_cgroup, memcg) && !sc->memcg_low_reclaim)) return 0; - *need_aging = should_run_aging(lruvec, max_seq, min_seq, sc, can_swap, &nr_to_scan); - if (!*need_aging) + if (!should_run_aging(lruvec, max_seq, sc, can_swap, &nr_to_scan)) return nr_to_scan; /* skip the aging path at the default priority */ if (sc->priority == DEF_PRIORITY) - goto done; - - /* leave the work to lru_gen_age_node() */ - if (current_is_kswapd()) - return 0; - - if (try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false)) return nr_to_scan; -done: - return min_seq[!can_swap] + MIN_NR_GENS <= max_seq ? nr_to_scan : 0; + + try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false); + + /* skip this lruvec as it's low on cold folios */ + return 0; } static unsigned long get_nr_to_reclaim(struct scan_control *sc) @@ -5160,9 +5158,7 @@ static unsigned long get_nr_to_reclaim(struct scan_control *sc) static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) { struct blk_plug plug; - bool need_aging = false; unsigned long scanned = 0; - unsigned long reclaimed = sc->nr_reclaimed; unsigned long nr_to_reclaim = get_nr_to_reclaim(sc); lru_add_drain(); @@ -5183,13 +5179,13 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc else swappiness = 0; - nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness, &need_aging); + nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness); if (!nr_to_scan) - goto done; + break; delta = evict_folios(lruvec, sc, swappiness); if (!delta) - goto done; + break; scanned += delta; if (scanned >= nr_to_scan) @@ -5201,10 +5197,6 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc cond_resched(); } - /* see the comment in lru_gen_age_node() */ - if (sc->nr_reclaimed - reclaimed >= MIN_LRU_BATCH && !need_aging) - sc->memcgs_need_aging = false; -done: clear_mm_walk(); blk_finish_plug(&plug);