diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 2f79ff4477ba..e57248ccb63d 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -242,19 +242,6 @@ static inline bool is_active_lru(enum lru_list lru) return (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE); } -struct zone_reclaim_stat { - /* - * The pageout code in vmscan.c keeps track of how many of the - * mem/swap backed and file backed pages are referenced. - * The higher the rotated/scanned ratio, the more valuable - * that cache is. - * - * The anon LRU stats live in [0], file LRU stats in [1] - */ - unsigned long recent_rotated[2]; - unsigned long recent_scanned[2]; -}; - enum lruvec_flags { LRUVEC_CONGESTED, /* lruvec has many dirty pages * backed by a congested BDI @@ -263,7 +250,13 @@ enum lruvec_flags { struct lruvec { struct list_head lists[NR_LRU_LISTS]; - struct zone_reclaim_stat reclaim_stat; + /* + * These track the cost of reclaiming one LRU - file or anon - + * over the other. As the observed cost of reclaiming one LRU + * increases, the reclaim scan balance tips toward the other. + */ + unsigned long anon_cost; + unsigned long file_cost; /* Evictions & activations on the inactive file list */ atomic_long_t inactive_age; /* Refaults at the time of last reclaim cycle */ diff --git a/include/linux/swap.h b/include/linux/swap.h index 217bc8e13feb..ce3c55747006 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -334,6 +334,8 @@ extern unsigned long nr_free_pagecache_pages(void); /* linux/mm/swap.c */ +extern void lru_note_cost(struct lruvec *lruvec, bool file, + unsigned int nr_pages); extern void lru_cache_add(struct page *); extern void lru_add_page_tail(struct page *page, struct page *page_tail, struct lruvec *lruvec, struct list_head *head); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ee6fe8bb0bff..5381afb23d58 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3853,23 +3853,17 @@ static int memcg_stat_show(struct seq_file *m, void *v) { pg_data_t *pgdat; struct mem_cgroup_per_node *mz; - struct zone_reclaim_stat *rstat; - unsigned long recent_rotated[2] = {0, 0}; - unsigned long recent_scanned[2] = {0, 0}; + unsigned long anon_cost = 0; + unsigned long file_cost = 0; for_each_online_pgdat(pgdat) { mz = mem_cgroup_nodeinfo(memcg, pgdat->node_id); - rstat = &mz->lruvec.reclaim_stat; - recent_rotated[0] += rstat->recent_rotated[0]; - recent_rotated[1] += rstat->recent_rotated[1]; - recent_scanned[0] += rstat->recent_scanned[0]; - recent_scanned[1] += rstat->recent_scanned[1]; + anon_cost += mz->lruvec.anon_cost; + file_cost += mz->lruvec.file_cost; } - seq_printf(m, "recent_rotated_anon %lu\n", recent_rotated[0]); - seq_printf(m, "recent_rotated_file %lu\n", recent_rotated[1]); - seq_printf(m, "recent_scanned_anon %lu\n", recent_scanned[0]); - seq_printf(m, "recent_scanned_file %lu\n", recent_scanned[1]); + seq_printf(m, "anon_cost %lu\n", anon_cost); + seq_printf(m, "file_cost %lu\n", file_cost); } #endif diff --git a/mm/swap.c b/mm/swap.c index 116b609c25c1..fedeb925dbfe 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -278,15 +278,12 @@ void rotate_reclaimable_page(struct page *page) } } -static void update_page_reclaim_stat(struct lruvec *lruvec, - int file, int rotated, - unsigned int nr_pages) +void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages) { - struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; - - reclaim_stat->recent_scanned[file] += nr_pages; - if (rotated) - reclaim_stat->recent_rotated[file] += nr_pages; + if (file) + lruvec->file_cost += nr_pages; + else + lruvec->anon_cost += nr_pages; } static void __activate_page(struct page *page, struct lruvec *lruvec, @@ -541,7 +538,7 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec, if (active) __count_vm_event(PGDEACTIVATE); - update_page_reclaim_stat(lruvec, file, 0, hpage_nr_pages(page)); + lru_note_cost(lruvec, !file, hpage_nr_pages(page)); } static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec, @@ -557,7 +554,7 @@ static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec, add_page_to_lru_list(page, lruvec, lru); __count_vm_events(PGDEACTIVATE, hpage_nr_pages(page)); - update_page_reclaim_stat(lruvec, file, 0, hpage_nr_pages(page)); + lru_note_cost(lruvec, !file, hpage_nr_pages(page)); } } @@ -582,7 +579,7 @@ static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec, __count_vm_events(PGLAZYFREE, hpage_nr_pages(page)); count_memcg_page_event(page, PGLAZYFREE); - update_page_reclaim_stat(lruvec, 1, 0, hpage_nr_pages(page)); + lru_note_cost(lruvec, 0, hpage_nr_pages(page)); } } diff --git a/mm/vmscan.c b/mm/vmscan.c index a5a7a8d0764c..c5b2a68f4ef6 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1916,7 +1916,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, bool file = is_file_lru(lru); enum vm_event_item item; struct pglist_data *pgdat = lruvec_pgdat(lruvec); - struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; bool stalled = false; while (unlikely(too_many_isolated(pgdat, file, sc))) { @@ -1940,7 +1939,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, &nr_scanned, sc, lru); __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, nr_taken); - reclaim_stat->recent_scanned[file] += nr_taken; item = current_is_kswapd() ? PGSCAN_KSWAPD : PGSCAN_DIRECT; if (!cgroup_reclaim(sc)) __count_vm_events(item, nr_scanned); @@ -1960,8 +1958,12 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, move_pages_to_lru(lruvec, &page_list); __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken); - reclaim_stat->recent_rotated[0] += stat.nr_activate[0]; - reclaim_stat->recent_rotated[1] += stat.nr_activate[1]; + /* + * Rotating pages costs CPU without actually + * progressing toward the reclaim goal. + */ + lru_note_cost(lruvec, 0, stat.nr_activate[0]); + lru_note_cost(lruvec, 1, stat.nr_activate[1]); item = current_is_kswapd() ? PGSTEAL_KSWAPD : PGSTEAL_DIRECT; if (!cgroup_reclaim(sc)) __count_vm_events(item, nr_reclaimed); @@ -2013,7 +2015,6 @@ static void shrink_active_list(unsigned long nr_to_scan, LIST_HEAD(l_active); LIST_HEAD(l_inactive); struct page *page; - struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; unsigned nr_deactivate, nr_activate; unsigned nr_rotated = 0; int file = is_file_lru(lru); @@ -2027,7 +2028,6 @@ static void shrink_active_list(unsigned long nr_to_scan, &nr_scanned, sc, lru); __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, nr_taken); - reclaim_stat->recent_scanned[file] += nr_taken; __count_vm_events(PGREFILL, nr_scanned); __count_memcg_events(lruvec_memcg(lruvec), PGREFILL, nr_scanned); @@ -2085,7 +2085,7 @@ static void shrink_active_list(unsigned long nr_to_scan, * helps balance scan pressure between file and anonymous pages in * get_scan_count. */ - reclaim_stat->recent_rotated[file] += nr_rotated; + lru_note_cost(lruvec, file, nr_rotated); nr_activate = move_pages_to_lru(lruvec, &l_active); nr_deactivate = move_pages_to_lru(lruvec, &l_inactive); @@ -2242,13 +2242,13 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, { struct mem_cgroup *memcg = lruvec_memcg(lruvec); int swappiness = mem_cgroup_swappiness(memcg); - struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; u64 fraction[2]; u64 denominator = 0; /* gcc */ struct pglist_data *pgdat = lruvec_pgdat(lruvec); unsigned long anon_prio, file_prio; enum scan_balance scan_balance; unsigned long anon, file; + unsigned long totalcost; unsigned long ap, fp; enum lru_list lru; @@ -2324,26 +2324,26 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, MAX_NR_ZONES); spin_lock_irq(&pgdat->lru_lock); - if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) { - reclaim_stat->recent_scanned[0] /= 2; - reclaim_stat->recent_rotated[0] /= 2; - } - - if (unlikely(reclaim_stat->recent_scanned[1] > file / 4)) { - reclaim_stat->recent_scanned[1] /= 2; - reclaim_stat->recent_rotated[1] /= 2; + totalcost = lruvec->anon_cost + lruvec->file_cost; + if (unlikely(totalcost > (anon + file) / 4)) { + lruvec->anon_cost /= 2; + lruvec->file_cost /= 2; + totalcost /= 2; } /* * The amount of pressure on anon vs file pages is inversely - * proportional to the fraction of recently scanned pages on - * each list that were recently referenced and in active use. + * proportional to the assumed cost of reclaiming each list, + * as determined by the share of pages that are likely going + * to refault or rotate on each list (recently referenced), + * times the relative IO cost of bringing back a swapped out + * anonymous page vs reloading a filesystem page (swappiness). */ - ap = anon_prio * (reclaim_stat->recent_scanned[0] + 1); - ap /= reclaim_stat->recent_rotated[0] + 1; + ap = anon_prio * (totalcost + 1); + ap /= lruvec->anon_cost + 1; - fp = file_prio * (reclaim_stat->recent_scanned[1] + 1); - fp /= reclaim_stat->recent_rotated[1] + 1; + fp = file_prio * (totalcost + 1); + fp /= lruvec->file_cost + 1; spin_unlock_irq(&pgdat->lru_lock); fraction[0] = ap;