mm: vmscan: replace shrink_node() loop with a retry jump

Most of the function body is inside a loop, which imposes an additional indentation and scoping level that makes the code a bit hard to follow and modify. The looping only happens in case of reclaim-compaction, which isn't the common case. So rather than adding yet another function level to the reclaim path and have every reclaim invocation go through a level that only exists for one specific cornercase, use a retry goto. Link: http://lkml.kernel.org/r/20191022144803.302233-6-hannes@cmpxchg.org Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Reviewed-by: Roman Gushchin <guro@fb.com> Reviewed-by: Shakeel Butt <shakeelb@google.com> Cc: Michal Hocko <mhocko@suse.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-11-30 17:55:43 -08:00 · 2019-11-30 17:55:43 -08:00 · d2af339706
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@ -2729,144 +2729,143 @@ static bool pgdat_memcg_congested(pg_data_t *pgdat, struct mem_cgroup *memcg)
 static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
 {
 	struct reclaim_state *reclaim_state = current->reclaim_state;
 	struct mem_cgroup *root = sc->target_mem_cgroup;
 	unsigned long nr_reclaimed, nr_scanned;
 	bool reclaimable = false;
 	struct mem_cgroup *memcg;
 again:
 	memset(&sc->nr, 0, sizeof(sc->nr));
 	nr_reclaimed = sc->nr_reclaimed;
 	nr_scanned = sc->nr_scanned;
 	memcg = mem_cgroup_iter(root, NULL, NULL);
 	do {
-		struct mem_cgroup *root = sc->target_mem_cgroup;
+		unsigned long reclaimed;
-		struct mem_cgroup *memcg;
+		unsigned long scanned;
-		memset(&sc->nr, 0, sizeof(sc->nr));
+		switch (mem_cgroup_protected(root, memcg)) {
-
+		case MEMCG_PROT_MIN:
-		nr_reclaimed = sc->nr_reclaimed;
+			/*
-		nr_scanned = sc->nr_scanned;
+			 * Hard protection.
-
+			 * If there is no reclaimable memory, OOM.
-		memcg = mem_cgroup_iter(root, NULL, NULL);
+			 */
-		do {
+			continue;
-			unsigned long reclaimed;
+		case MEMCG_PROT_LOW:
-			unsigned long scanned;
+			/*
-
+			 * Soft protection.
-			switch (mem_cgroup_protected(root, memcg)) {
+			 * Respect the protection only as long as
-			case MEMCG_PROT_MIN:
+			 * there is an unprotected supply
-				/*
+			 * of reclaimable memory from other cgroups.
-				 * Hard protection.
+			 */
-				 * If there is no reclaimable memory, OOM.
+			if (!sc->memcg_low_reclaim) {
-				 */
+				sc->memcg_low_skipped = 1;
 				continue;
 			case MEMCG_PROT_LOW:
 				/*
 				 * Soft protection.
 				 * Respect the protection only as long as
 				 * there is an unprotected supply
 				 * of reclaimable memory from other cgroups.
 				 */
 				if (!sc->memcg_low_reclaim) {
 					sc->memcg_low_skipped = 1;
 					continue;
 				}
 				memcg_memory_event(memcg, MEMCG_LOW);
 				break;
 			case MEMCG_PROT_NONE:
 				/*
 				 * All protection thresholds breached. We may
 				 * still choose to vary the scan pressure
 				 * applied based on by how much the cgroup in
 				 * question has exceeded its protection
 				 * thresholds (see get_scan_count).
 				 */
 				break;
 			}
-
+			memcg_memory_event(memcg, MEMCG_LOW);
-			reclaimed = sc->nr_reclaimed;
+			break;
-			scanned = sc->nr_scanned;
+		case MEMCG_PROT_NONE:
-			shrink_node_memcg(pgdat, memcg, sc);
+			/*
-
+			 * All protection thresholds breached. We may
-			shrink_slab(sc->gfp_mask, pgdat->node_id, memcg,
+			 * still choose to vary the scan pressure
-					sc->priority);
+			 * applied based on by how much the cgroup in
-
+			 * question has exceeded its protection
-			/* Record the group's reclaim efficiency */
+			 * thresholds (see get_scan_count).
-			vmpressure(sc->gfp_mask, memcg, false,
+			 */
-				   sc->nr_scanned - scanned,
+			break;
 				   sc->nr_reclaimed - reclaimed);
 		} while ((memcg = mem_cgroup_iter(root, memcg, NULL)));
 		if (reclaim_state) {
 			sc->nr_reclaimed += reclaim_state->reclaimed_slab;
 			reclaim_state->reclaimed_slab = 0;
 		}
-		/* Record the subtree's reclaim efficiency */
+		reclaimed = sc->nr_reclaimed;
-		vmpressure(sc->gfp_mask, sc->target_mem_cgroup, true,
+		scanned = sc->nr_scanned;
-			   sc->nr_scanned - nr_scanned,
+		shrink_node_memcg(pgdat, memcg, sc);
 			   sc->nr_reclaimed - nr_reclaimed);
-		if (sc->nr_reclaimed - nr_reclaimed)
+		shrink_slab(sc->gfp_mask, pgdat->node_id, memcg,
-			reclaimable = true;
+			    sc->priority);
-		if (current_is_kswapd()) {
+		/* Record the group's reclaim efficiency */
-			/*
+		vmpressure(sc->gfp_mask, memcg, false,
-			 * If reclaim is isolating dirty pages under writeback,
+			   sc->nr_scanned - scanned,
-			 * it implies that the long-lived page allocation rate
+			   sc->nr_reclaimed - reclaimed);
 			 * is exceeding the page laundering rate. Either the
 			 * global limits are not being effective at throttling
 			 * processes due to the page distribution throughout
 			 * zones or there is heavy usage of a slow backing
 			 * device. The only option is to throttle from reclaim
 			 * context which is not ideal as there is no guarantee
 			 * the dirtying process is throttled in the same way
 			 * balance_dirty_pages() manages.
 			 *
 			 * Once a node is flagged PGDAT_WRITEBACK, kswapd will
 			 * count the number of pages under pages flagged for
 			 * immediate reclaim and stall if any are encountered
 			 * in the nr_immediate check below.
 			 */
 			if (sc->nr.writeback && sc->nr.writeback == sc->nr.taken)
 				set_bit(PGDAT_WRITEBACK, &pgdat->flags);
-			/*
+	} while ((memcg = mem_cgroup_iter(root, memcg, NULL)));
 			 * Tag a node as congested if all the dirty pages
 			 * scanned were backed by a congested BDI and
 			 * wait_iff_congested will stall.
 			 */
 			if (sc->nr.dirty && sc->nr.dirty == sc->nr.congested)
 				set_bit(PGDAT_CONGESTED, &pgdat->flags);
-			/* Allow kswapd to start writing pages during reclaim.*/
+	if (reclaim_state) {
-			if (sc->nr.unqueued_dirty == sc->nr.file_taken)
+		sc->nr_reclaimed += reclaim_state->reclaimed_slab;
-				set_bit(PGDAT_DIRTY, &pgdat->flags);
+		reclaim_state->reclaimed_slab = 0;
 	}
-			/*
+	/* Record the subtree's reclaim efficiency */
-			 * If kswapd scans pages marked marked for immediate
+	vmpressure(sc->gfp_mask, sc->target_mem_cgroup, true,
-			 * reclaim and under writeback (nr_immediate), it
+		   sc->nr_scanned - nr_scanned,
-			 * implies that pages are cycling through the LRU
+		   sc->nr_reclaimed - nr_reclaimed);
-			 * faster than they are written so also forcibly stall.
+
-			 */
+	if (sc->nr_reclaimed - nr_reclaimed)
-			if (sc->nr.immediate)
+		reclaimable = true;
-				congestion_wait(BLK_RW_ASYNC, HZ/10);
+
-		}
+	if (current_is_kswapd()) {
 		/*
 		 * If reclaim is isolating dirty pages under writeback,
 		 * it implies that the long-lived page allocation rate
 		 * is exceeding the page laundering rate. Either the
 		 * global limits are not being effective at throttling
 		 * processes due to the page distribution throughout
 		 * zones or there is heavy usage of a slow backing
 		 * device. The only option is to throttle from reclaim
 		 * context which is not ideal as there is no guarantee
 		 * the dirtying process is throttled in the same way
 		 * balance_dirty_pages() manages.
 		 *
 		 * Once a node is flagged PGDAT_WRITEBACK, kswapd will
 		 * count the number of pages under pages flagged for
 		 * immediate reclaim and stall if any are encountered
 		 * in the nr_immediate check below.
 		 */
 		if (sc->nr.writeback && sc->nr.writeback == sc->nr.taken)
 			set_bit(PGDAT_WRITEBACK, &pgdat->flags);
 		/*
-		 * Legacy memcg will stall in page writeback so avoid forcibly
+		 * Tag a node as congested if all the dirty pages
-		 * stalling in wait_iff_congested().
+		 * scanned were backed by a congested BDI and
 		 * wait_iff_congested will stall.
 		 */
-		if (cgroup_reclaim(sc) && writeback_throttling_sane(sc) &&
+		if (sc->nr.dirty && sc->nr.dirty == sc->nr.congested)
-		    sc->nr.dirty && sc->nr.dirty == sc->nr.congested)
+			set_bit(PGDAT_CONGESTED, &pgdat->flags);
-			set_memcg_congestion(pgdat, root, true);
+
 		/* Allow kswapd to start writing pages during reclaim.*/
 		if (sc->nr.unqueued_dirty == sc->nr.file_taken)
 			set_bit(PGDAT_DIRTY, &pgdat->flags);
 		/*
-		 * Stall direct reclaim for IO completions if underlying BDIs
+		 * If kswapd scans pages marked marked for immediate
-		 * and node is congested. Allow kswapd to continue until it
+		 * reclaim and under writeback (nr_immediate), it
-		 * starts encountering unqueued dirty pages or cycling through
+		 * implies that pages are cycling through the LRU
-		 * the LRU too quickly.
+		 * faster than they are written so also forcibly stall.
 		 */
-		if (!sc->hibernation_mode && !current_is_kswapd() &&
+		if (sc->nr.immediate)
-		   current_may_throttle() && pgdat_memcg_congested(pgdat, root))
+			congestion_wait(BLK_RW_ASYNC, HZ/10);
-			wait_iff_congested(BLK_RW_ASYNC, HZ/10);
+	}
-	} while (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed,
+	/*
-					 sc));
+	 * Legacy memcg will stall in page writeback so avoid forcibly
 	 * stalling in wait_iff_congested().
 	 */
 	if (cgroup_reclaim(sc) && writeback_throttling_sane(sc) &&
 	    sc->nr.dirty && sc->nr.dirty == sc->nr.congested)
 		set_memcg_congestion(pgdat, root, true);
 	/*
 	 * Stall direct reclaim for IO completions if underlying BDIs
 	 * and node is congested. Allow kswapd to continue until it
 	 * starts encountering unqueued dirty pages or cycling through
 	 * the LRU too quickly.
 	 */
 	if (!sc->hibernation_mode && !current_is_kswapd() &&
 	    current_may_throttle() && pgdat_memcg_congested(pgdat, root))
 		wait_iff_congested(BLK_RW_ASYNC, HZ/10);
 	if (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed,
 				    sc))
 		goto again;
 	/*
 	 * Kswapd gives up on balancing particular nodes after too