xfs: merge xfs_reclaim_inodes_ag into xfs_inode_walk_ag

Merge these two inode walk loops together, since they're pretty similar now. Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Dave Chinner <dchinner@redhat.com>
2021-05-31 11:32:02 -07:00 · 2021-05-31 11:32:02 -07:00 · f1bc5c5630
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@ -43,6 +43,7 @@ enum xfs_icwalk_goal {
 	/* Goals directly associated with tagged inodes. */
 	XFS_ICWALK_BLOCKGC	= XFS_ICI_BLOCKGC_TAG,
 	XFS_ICWALK_RECLAIM	= XFS_ICI_RECLAIM_TAG,
 };
 #define XFS_ICWALK_NULL_TAG	(-1U)
@ -67,9 +68,13 @@ static int xfs_icwalk_ag(struct xfs_perag *pag,
 #define XFS_ICWALK_FLAG_DROP_GDQUOT	(1U << 30)
 #define XFS_ICWALK_FLAG_DROP_PDQUOT	(1U << 29)
 /* Stop scanning after icw_scan_limit inodes. */
 #define XFS_ICWALK_FLAG_SCAN_LIMIT	(1U << 28)
 #define XFS_ICWALK_PRIVATE_FLAGS	(XFS_ICWALK_FLAG_DROP_UDQUOT | \
 					 XFS_ICWALK_FLAG_DROP_GDQUOT | \
-					 XFS_ICWALK_FLAG_DROP_PDQUOT)
+					 XFS_ICWALK_FLAG_DROP_PDQUOT | \
 					 XFS_ICWALK_FLAG_SCAN_LIMIT)
 /*
 * Allocate and initialise an xfs_inode.
@ -760,17 +765,6 @@ xfs_icache_inode_is_allocated(
 	return 0;
 }
 /*
 * The inode lookup is done in batches to keep the amount of lock traffic and
 * radix tree lookups to a minimum. The batch size is a trade off between
 * lookup reduction and stack usage. This is in the reclaim path, so we can't
 * be too greedy.
 *
 * XXX: This will be moved closer to xfs_icwalk* once we get rid of the
 * separate reclaim walk functions.
 */
 #define XFS_LOOKUP_BATCH	32
 #ifdef CONFIG_XFS_QUOTA
 /* Decide if we want to grab this inode to drop its dquots. */
 static bool
@ -880,7 +874,7 @@ xfs_dqrele_all_inodes(
 * Return true if we grabbed it, false otherwise.
 */
 static bool
-xfs_reclaim_inode_grab(
+xfs_reclaim_igrab(
 	struct xfs_inode	*ip)
 {
 	ASSERT(rcu_read_lock_held());
@ -990,108 +984,13 @@ out:
 	xfs_iflags_clear(ip, XFS_IRECLAIM);
 }
 /*
 * Walk the AGs and reclaim the inodes in them. Even if the filesystem is
 * corrupted, we still want to try to reclaim all the inodes. If we don't,
 * then a shut down during filesystem unmount reclaim walk leak all the
 * unreclaimed inodes.
 *
 * Returns non-zero if any AGs or inodes were skipped in the reclaim pass
 * so that callers that want to block until all dirty inodes are written back
 * and reclaimed can sanely loop.
 */
 static void
 xfs_reclaim_inodes_ag(
 	struct xfs_mount	*mp,
 	int			*nr_to_scan)
 {
 	struct xfs_perag	*pag;
 	xfs_agnumber_t		ag = 0;
 	while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
 		unsigned long	first_index = 0;
 		int		done = 0;
 		int		nr_found = 0;
 		ag = pag->pag_agno + 1;
 		first_index = READ_ONCE(pag->pag_ici_reclaim_cursor);
 		do {
 			struct xfs_inode *batch[XFS_LOOKUP_BATCH];
 			int	i;
 			rcu_read_lock();
 			nr_found = radix_tree_gang_lookup_tag(
 					&pag->pag_ici_root,
 					(void **)batch, first_index,
 					XFS_LOOKUP_BATCH,
 					XFS_ICI_RECLAIM_TAG);
 			if (!nr_found) {
 				done = 1;
 				rcu_read_unlock();
 				break;
 			}
 			/*
 			 * Grab the inodes before we drop the lock. if we found
 			 * nothing, nr == 0 and the loop will be skipped.
 			 */
 			for (i = 0; i < nr_found; i++) {
 				struct xfs_inode *ip = batch[i];
 				if (done || !xfs_reclaim_inode_grab(ip))
 					batch[i] = NULL;
 				/*
 				 * Update the index for the next lookup. Catch
 				 * overflows into the next AG range which can
 				 * occur if we have inodes in the last block of
 				 * the AG and we are currently pointing to the
 				 * last inode.
 				 *
 				 * Because we may see inodes that are from the
 				 * wrong AG due to RCU freeing and
 				 * reallocation, only update the index if it
 				 * lies in this AG. It was a race that lead us
 				 * to see this inode, so another lookup from
 				 * the same index will not find it again.
 				 */
 				if (XFS_INO_TO_AGNO(mp, ip->i_ino) !=
 								pag->pag_agno)
 					continue;
 				first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
 				if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
 					done = 1;
 			}
 			/* unlock now we've grabbed the inodes. */
 			rcu_read_unlock();
 			for (i = 0; i < nr_found; i++) {
 				if (batch[i])
 					xfs_reclaim_inode(batch[i], pag);
 			}
 			*nr_to_scan -= XFS_LOOKUP_BATCH;
 			cond_resched();
 		} while (nr_found && !done && *nr_to_scan > 0);
 		if (done)
 			first_index = 0;
 		WRITE_ONCE(pag->pag_ici_reclaim_cursor, first_index);
 		xfs_perag_put(pag);
 	}
 }
 void
 xfs_reclaim_inodes(
 	struct xfs_mount	*mp)
 {
 	int		nr_to_scan = INT_MAX;
 	while (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
 		xfs_ail_push_all_sync(mp->m_ail);
-		xfs_reclaim_inodes_ag(mp, &nr_to_scan);
+		xfs_icwalk(mp, XFS_ICWALK_RECLAIM, NULL);
 	}
 }
@ -1107,11 +1006,16 @@ xfs_reclaim_inodes_nr(
 	struct xfs_mount	*mp,
 	int			nr_to_scan)
 {
 	struct xfs_eofblocks	eofb = {
 		.eof_flags	= XFS_ICWALK_FLAG_SCAN_LIMIT,
 		.icw_scan_limit	= nr_to_scan,
 	};
 	/* kick background reclaimer and push the AIL */
 	xfs_reclaim_work_queue(mp);
 	xfs_ail_push_all(mp->m_ail);
-	xfs_reclaim_inodes_ag(mp, &nr_to_scan);
+	xfs_icwalk(mp, XFS_ICWALK_RECLAIM, &eofb);
 	return 0;
 }
@ -1221,9 +1125,8 @@ xfs_reclaim_worker(
 {
 	struct xfs_mount *mp = container_of(to_delayed_work(work),
 					struct xfs_mount, m_reclaim_work);
 	int		nr_to_scan = INT_MAX;
-	xfs_reclaim_inodes_ag(mp, &nr_to_scan);
+	xfs_icwalk(mp, XFS_ICWALK_RECLAIM, NULL);
 	xfs_reclaim_work_queue(mp);
 }
@ -1693,6 +1596,15 @@ xfs_blockgc_free_quota(
 /* XFS Inode Cache Walking Code */
 /*
 * The inode lookup is done in batches to keep the amount of lock traffic and
 * radix tree lookups to a minimum. The batch size is a trade off between
 * lookup reduction and stack usage. This is in the reclaim path, so we can't
 * be too greedy.
 */
 #define XFS_LOOKUP_BATCH	32
 /*
 * Decide if we want to grab this inode in anticipation of doing work towards
 * the goal.
@ -1707,6 +1619,8 @@ xfs_icwalk_igrab(
 		return xfs_dqrele_igrab(ip);
 	case XFS_ICWALK_BLOCKGC:
 		return xfs_blockgc_igrab(ip);
 	case XFS_ICWALK_RECLAIM:
 		return xfs_reclaim_igrab(ip);
 	default:
 		return false;
 	}
@ -1720,6 +1634,7 @@ static inline int
 xfs_icwalk_process_inode(
 	enum xfs_icwalk_goal	goal,
 	struct xfs_inode	*ip,
 	struct xfs_perag	*pag,
 	struct xfs_eofblocks	*eofb)
 {
 	int			error = 0;
@ -1731,6 +1646,9 @@ xfs_icwalk_process_inode(
 	case XFS_ICWALK_BLOCKGC:
 		error = xfs_blockgc_scan_inode(ip, eofb);
 		break;
 	case XFS_ICWALK_RECLAIM:
 		xfs_reclaim_inode(ip, pag);
 		break;
 	}
 	return error;
 }
@ -1755,7 +1673,10 @@ xfs_icwalk_ag(
 restart:
 	done = false;
 	skipped = 0;
-	first_index = 0;
+	if (goal == XFS_ICWALK_RECLAIM)
 		first_index = READ_ONCE(pag->pag_ici_reclaim_cursor);
 	else
 		first_index = 0;
 	nr_found = 0;
 	do {
 		struct xfs_inode *batch[XFS_LOOKUP_BATCH];
@ -1776,6 +1697,7 @@ restart:
 					XFS_LOOKUP_BATCH, tag);
 		if (!nr_found) {
 			done = true;
 			rcu_read_unlock();
 			break;
 		}
@ -1815,7 +1737,8 @@ restart:
 		for (i = 0; i < nr_found; i++) {
 			if (!batch[i])
 				continue;
-			error = xfs_icwalk_process_inode(goal, batch[i], eofb);
+			error = xfs_icwalk_process_inode(goal, batch[i], pag,
 					eofb);
 			if (error == -EAGAIN) {
 				skipped++;
 				continue;
@ -1830,8 +1753,19 @@ restart:
 		cond_resched();
 		if (eofb && (eofb->eof_flags & XFS_ICWALK_FLAG_SCAN_LIMIT)) {
 			eofb->icw_scan_limit -= XFS_LOOKUP_BATCH;
 			if (eofb->icw_scan_limit <= 0)
 				break;
 		}
 	} while (nr_found && !done);
 	if (goal == XFS_ICWALK_RECLAIM) {
 		if (done)
 			first_index = 0;
 		WRITE_ONCE(pag->pag_ici_reclaim_cursor, first_index);
 	}
 	if (skipped) {
 		delay(1);
 		goto restart;
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@ -15,6 +15,7 @@ struct xfs_eofblocks {
 	kgid_t		eof_gid;
 	prid_t		eof_prid;
 	__u64		eof_min_file_size;
 	int		icw_scan_limit;
 };
 /*
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@ -3898,6 +3898,7 @@ DECLARE_EVENT_CLASS(xfs_eofblocks_class,
 		__field(uint32_t, gid)
 		__field(prid_t, prid)
 		__field(__u64, min_file_size)
 		__field(int, scan_limit)
 		__field(unsigned long, caller_ip)
 	),
 	TP_fast_assign(
@ -3909,15 +3910,17 @@ DECLARE_EVENT_CLASS(xfs_eofblocks_class,
 						eofb->eof_gid) : 0;
 		__entry->prid = eofb ? eofb->eof_prid : 0;
 		__entry->min_file_size = eofb ? eofb->eof_min_file_size : 0;
 		__entry->scan_limit = eofb ? eofb->icw_scan_limit : 0;
 		__entry->caller_ip = caller_ip;
 	),
-	TP_printk("dev %d:%d flags 0x%x uid %u gid %u prid %u minsize %llu caller %pS",
+	TP_printk("dev %d:%d flags 0x%x uid %u gid %u prid %u minsize %llu scan_limit %d caller %pS",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->flags,
 		  __entry->uid,
 		  __entry->gid,
 		  __entry->prid,
 		  __entry->min_file_size,
 		  __entry->scan_limit,
 		  (char *)__entry->caller_ip)
 );
 #define DEFINE_EOFBLOCKS_EVENT(name)	\