xfs: merge xfs_reclaim_inodes_ag into xfs_inode_walk_ag
Merge these two inode walk loops together, since they're pretty similar now. Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Dave Chinner <dchinner@redhat.com>
This commit is contained in:
Родитель
9d5ee83759
Коммит
f1bc5c5630
|
@ -43,6 +43,7 @@ enum xfs_icwalk_goal {
|
||||||
|
|
||||||
/* Goals directly associated with tagged inodes. */
|
/* Goals directly associated with tagged inodes. */
|
||||||
XFS_ICWALK_BLOCKGC = XFS_ICI_BLOCKGC_TAG,
|
XFS_ICWALK_BLOCKGC = XFS_ICI_BLOCKGC_TAG,
|
||||||
|
XFS_ICWALK_RECLAIM = XFS_ICI_RECLAIM_TAG,
|
||||||
};
|
};
|
||||||
|
|
||||||
#define XFS_ICWALK_NULL_TAG (-1U)
|
#define XFS_ICWALK_NULL_TAG (-1U)
|
||||||
|
@ -67,9 +68,13 @@ static int xfs_icwalk_ag(struct xfs_perag *pag,
|
||||||
#define XFS_ICWALK_FLAG_DROP_GDQUOT (1U << 30)
|
#define XFS_ICWALK_FLAG_DROP_GDQUOT (1U << 30)
|
||||||
#define XFS_ICWALK_FLAG_DROP_PDQUOT (1U << 29)
|
#define XFS_ICWALK_FLAG_DROP_PDQUOT (1U << 29)
|
||||||
|
|
||||||
|
/* Stop scanning after icw_scan_limit inodes. */
|
||||||
|
#define XFS_ICWALK_FLAG_SCAN_LIMIT (1U << 28)
|
||||||
|
|
||||||
#define XFS_ICWALK_PRIVATE_FLAGS (XFS_ICWALK_FLAG_DROP_UDQUOT | \
|
#define XFS_ICWALK_PRIVATE_FLAGS (XFS_ICWALK_FLAG_DROP_UDQUOT | \
|
||||||
XFS_ICWALK_FLAG_DROP_GDQUOT | \
|
XFS_ICWALK_FLAG_DROP_GDQUOT | \
|
||||||
XFS_ICWALK_FLAG_DROP_PDQUOT)
|
XFS_ICWALK_FLAG_DROP_PDQUOT | \
|
||||||
|
XFS_ICWALK_FLAG_SCAN_LIMIT)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Allocate and initialise an xfs_inode.
|
* Allocate and initialise an xfs_inode.
|
||||||
|
@ -760,17 +765,6 @@ xfs_icache_inode_is_allocated(
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* The inode lookup is done in batches to keep the amount of lock traffic and
|
|
||||||
* radix tree lookups to a minimum. The batch size is a trade off between
|
|
||||||
* lookup reduction and stack usage. This is in the reclaim path, so we can't
|
|
||||||
* be too greedy.
|
|
||||||
*
|
|
||||||
* XXX: This will be moved closer to xfs_icwalk* once we get rid of the
|
|
||||||
* separate reclaim walk functions.
|
|
||||||
*/
|
|
||||||
#define XFS_LOOKUP_BATCH 32
|
|
||||||
|
|
||||||
#ifdef CONFIG_XFS_QUOTA
|
#ifdef CONFIG_XFS_QUOTA
|
||||||
/* Decide if we want to grab this inode to drop its dquots. */
|
/* Decide if we want to grab this inode to drop its dquots. */
|
||||||
static bool
|
static bool
|
||||||
|
@ -880,7 +874,7 @@ xfs_dqrele_all_inodes(
|
||||||
* Return true if we grabbed it, false otherwise.
|
* Return true if we grabbed it, false otherwise.
|
||||||
*/
|
*/
|
||||||
static bool
|
static bool
|
||||||
xfs_reclaim_inode_grab(
|
xfs_reclaim_igrab(
|
||||||
struct xfs_inode *ip)
|
struct xfs_inode *ip)
|
||||||
{
|
{
|
||||||
ASSERT(rcu_read_lock_held());
|
ASSERT(rcu_read_lock_held());
|
||||||
|
@ -990,108 +984,13 @@ out:
|
||||||
xfs_iflags_clear(ip, XFS_IRECLAIM);
|
xfs_iflags_clear(ip, XFS_IRECLAIM);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Walk the AGs and reclaim the inodes in them. Even if the filesystem is
|
|
||||||
* corrupted, we still want to try to reclaim all the inodes. If we don't,
|
|
||||||
* then a shut down during filesystem unmount reclaim walk leak all the
|
|
||||||
* unreclaimed inodes.
|
|
||||||
*
|
|
||||||
* Returns non-zero if any AGs or inodes were skipped in the reclaim pass
|
|
||||||
* so that callers that want to block until all dirty inodes are written back
|
|
||||||
* and reclaimed can sanely loop.
|
|
||||||
*/
|
|
||||||
static void
|
|
||||||
xfs_reclaim_inodes_ag(
|
|
||||||
struct xfs_mount *mp,
|
|
||||||
int *nr_to_scan)
|
|
||||||
{
|
|
||||||
struct xfs_perag *pag;
|
|
||||||
xfs_agnumber_t ag = 0;
|
|
||||||
|
|
||||||
while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
|
|
||||||
unsigned long first_index = 0;
|
|
||||||
int done = 0;
|
|
||||||
int nr_found = 0;
|
|
||||||
|
|
||||||
ag = pag->pag_agno + 1;
|
|
||||||
|
|
||||||
first_index = READ_ONCE(pag->pag_ici_reclaim_cursor);
|
|
||||||
do {
|
|
||||||
struct xfs_inode *batch[XFS_LOOKUP_BATCH];
|
|
||||||
int i;
|
|
||||||
|
|
||||||
rcu_read_lock();
|
|
||||||
nr_found = radix_tree_gang_lookup_tag(
|
|
||||||
&pag->pag_ici_root,
|
|
||||||
(void **)batch, first_index,
|
|
||||||
XFS_LOOKUP_BATCH,
|
|
||||||
XFS_ICI_RECLAIM_TAG);
|
|
||||||
if (!nr_found) {
|
|
||||||
done = 1;
|
|
||||||
rcu_read_unlock();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Grab the inodes before we drop the lock. if we found
|
|
||||||
* nothing, nr == 0 and the loop will be skipped.
|
|
||||||
*/
|
|
||||||
for (i = 0; i < nr_found; i++) {
|
|
||||||
struct xfs_inode *ip = batch[i];
|
|
||||||
|
|
||||||
if (done || !xfs_reclaim_inode_grab(ip))
|
|
||||||
batch[i] = NULL;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Update the index for the next lookup. Catch
|
|
||||||
* overflows into the next AG range which can
|
|
||||||
* occur if we have inodes in the last block of
|
|
||||||
* the AG and we are currently pointing to the
|
|
||||||
* last inode.
|
|
||||||
*
|
|
||||||
* Because we may see inodes that are from the
|
|
||||||
* wrong AG due to RCU freeing and
|
|
||||||
* reallocation, only update the index if it
|
|
||||||
* lies in this AG. It was a race that lead us
|
|
||||||
* to see this inode, so another lookup from
|
|
||||||
* the same index will not find it again.
|
|
||||||
*/
|
|
||||||
if (XFS_INO_TO_AGNO(mp, ip->i_ino) !=
|
|
||||||
pag->pag_agno)
|
|
||||||
continue;
|
|
||||||
first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
|
|
||||||
if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
|
|
||||||
done = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* unlock now we've grabbed the inodes. */
|
|
||||||
rcu_read_unlock();
|
|
||||||
|
|
||||||
for (i = 0; i < nr_found; i++) {
|
|
||||||
if (batch[i])
|
|
||||||
xfs_reclaim_inode(batch[i], pag);
|
|
||||||
}
|
|
||||||
|
|
||||||
*nr_to_scan -= XFS_LOOKUP_BATCH;
|
|
||||||
cond_resched();
|
|
||||||
} while (nr_found && !done && *nr_to_scan > 0);
|
|
||||||
|
|
||||||
if (done)
|
|
||||||
first_index = 0;
|
|
||||||
WRITE_ONCE(pag->pag_ici_reclaim_cursor, first_index);
|
|
||||||
xfs_perag_put(pag);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
void
|
||||||
xfs_reclaim_inodes(
|
xfs_reclaim_inodes(
|
||||||
struct xfs_mount *mp)
|
struct xfs_mount *mp)
|
||||||
{
|
{
|
||||||
int nr_to_scan = INT_MAX;
|
|
||||||
|
|
||||||
while (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
|
while (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
|
||||||
xfs_ail_push_all_sync(mp->m_ail);
|
xfs_ail_push_all_sync(mp->m_ail);
|
||||||
xfs_reclaim_inodes_ag(mp, &nr_to_scan);
|
xfs_icwalk(mp, XFS_ICWALK_RECLAIM, NULL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1107,11 +1006,16 @@ xfs_reclaim_inodes_nr(
|
||||||
struct xfs_mount *mp,
|
struct xfs_mount *mp,
|
||||||
int nr_to_scan)
|
int nr_to_scan)
|
||||||
{
|
{
|
||||||
|
struct xfs_eofblocks eofb = {
|
||||||
|
.eof_flags = XFS_ICWALK_FLAG_SCAN_LIMIT,
|
||||||
|
.icw_scan_limit = nr_to_scan,
|
||||||
|
};
|
||||||
|
|
||||||
/* kick background reclaimer and push the AIL */
|
/* kick background reclaimer and push the AIL */
|
||||||
xfs_reclaim_work_queue(mp);
|
xfs_reclaim_work_queue(mp);
|
||||||
xfs_ail_push_all(mp->m_ail);
|
xfs_ail_push_all(mp->m_ail);
|
||||||
|
|
||||||
xfs_reclaim_inodes_ag(mp, &nr_to_scan);
|
xfs_icwalk(mp, XFS_ICWALK_RECLAIM, &eofb);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1221,9 +1125,8 @@ xfs_reclaim_worker(
|
||||||
{
|
{
|
||||||
struct xfs_mount *mp = container_of(to_delayed_work(work),
|
struct xfs_mount *mp = container_of(to_delayed_work(work),
|
||||||
struct xfs_mount, m_reclaim_work);
|
struct xfs_mount, m_reclaim_work);
|
||||||
int nr_to_scan = INT_MAX;
|
|
||||||
|
|
||||||
xfs_reclaim_inodes_ag(mp, &nr_to_scan);
|
xfs_icwalk(mp, XFS_ICWALK_RECLAIM, NULL);
|
||||||
xfs_reclaim_work_queue(mp);
|
xfs_reclaim_work_queue(mp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1693,6 +1596,15 @@ xfs_blockgc_free_quota(
|
||||||
|
|
||||||
/* XFS Inode Cache Walking Code */
|
/* XFS Inode Cache Walking Code */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The inode lookup is done in batches to keep the amount of lock traffic and
|
||||||
|
* radix tree lookups to a minimum. The batch size is a trade off between
|
||||||
|
* lookup reduction and stack usage. This is in the reclaim path, so we can't
|
||||||
|
* be too greedy.
|
||||||
|
*/
|
||||||
|
#define XFS_LOOKUP_BATCH 32
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Decide if we want to grab this inode in anticipation of doing work towards
|
* Decide if we want to grab this inode in anticipation of doing work towards
|
||||||
* the goal.
|
* the goal.
|
||||||
|
@ -1707,6 +1619,8 @@ xfs_icwalk_igrab(
|
||||||
return xfs_dqrele_igrab(ip);
|
return xfs_dqrele_igrab(ip);
|
||||||
case XFS_ICWALK_BLOCKGC:
|
case XFS_ICWALK_BLOCKGC:
|
||||||
return xfs_blockgc_igrab(ip);
|
return xfs_blockgc_igrab(ip);
|
||||||
|
case XFS_ICWALK_RECLAIM:
|
||||||
|
return xfs_reclaim_igrab(ip);
|
||||||
default:
|
default:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -1720,6 +1634,7 @@ static inline int
|
||||||
xfs_icwalk_process_inode(
|
xfs_icwalk_process_inode(
|
||||||
enum xfs_icwalk_goal goal,
|
enum xfs_icwalk_goal goal,
|
||||||
struct xfs_inode *ip,
|
struct xfs_inode *ip,
|
||||||
|
struct xfs_perag *pag,
|
||||||
struct xfs_eofblocks *eofb)
|
struct xfs_eofblocks *eofb)
|
||||||
{
|
{
|
||||||
int error = 0;
|
int error = 0;
|
||||||
|
@ -1731,6 +1646,9 @@ xfs_icwalk_process_inode(
|
||||||
case XFS_ICWALK_BLOCKGC:
|
case XFS_ICWALK_BLOCKGC:
|
||||||
error = xfs_blockgc_scan_inode(ip, eofb);
|
error = xfs_blockgc_scan_inode(ip, eofb);
|
||||||
break;
|
break;
|
||||||
|
case XFS_ICWALK_RECLAIM:
|
||||||
|
xfs_reclaim_inode(ip, pag);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
@ -1755,7 +1673,10 @@ xfs_icwalk_ag(
|
||||||
restart:
|
restart:
|
||||||
done = false;
|
done = false;
|
||||||
skipped = 0;
|
skipped = 0;
|
||||||
first_index = 0;
|
if (goal == XFS_ICWALK_RECLAIM)
|
||||||
|
first_index = READ_ONCE(pag->pag_ici_reclaim_cursor);
|
||||||
|
else
|
||||||
|
first_index = 0;
|
||||||
nr_found = 0;
|
nr_found = 0;
|
||||||
do {
|
do {
|
||||||
struct xfs_inode *batch[XFS_LOOKUP_BATCH];
|
struct xfs_inode *batch[XFS_LOOKUP_BATCH];
|
||||||
|
@ -1776,6 +1697,7 @@ restart:
|
||||||
XFS_LOOKUP_BATCH, tag);
|
XFS_LOOKUP_BATCH, tag);
|
||||||
|
|
||||||
if (!nr_found) {
|
if (!nr_found) {
|
||||||
|
done = true;
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -1815,7 +1737,8 @@ restart:
|
||||||
for (i = 0; i < nr_found; i++) {
|
for (i = 0; i < nr_found; i++) {
|
||||||
if (!batch[i])
|
if (!batch[i])
|
||||||
continue;
|
continue;
|
||||||
error = xfs_icwalk_process_inode(goal, batch[i], eofb);
|
error = xfs_icwalk_process_inode(goal, batch[i], pag,
|
||||||
|
eofb);
|
||||||
if (error == -EAGAIN) {
|
if (error == -EAGAIN) {
|
||||||
skipped++;
|
skipped++;
|
||||||
continue;
|
continue;
|
||||||
|
@ -1830,8 +1753,19 @@ restart:
|
||||||
|
|
||||||
cond_resched();
|
cond_resched();
|
||||||
|
|
||||||
|
if (eofb && (eofb->eof_flags & XFS_ICWALK_FLAG_SCAN_LIMIT)) {
|
||||||
|
eofb->icw_scan_limit -= XFS_LOOKUP_BATCH;
|
||||||
|
if (eofb->icw_scan_limit <= 0)
|
||||||
|
break;
|
||||||
|
}
|
||||||
} while (nr_found && !done);
|
} while (nr_found && !done);
|
||||||
|
|
||||||
|
if (goal == XFS_ICWALK_RECLAIM) {
|
||||||
|
if (done)
|
||||||
|
first_index = 0;
|
||||||
|
WRITE_ONCE(pag->pag_ici_reclaim_cursor, first_index);
|
||||||
|
}
|
||||||
|
|
||||||
if (skipped) {
|
if (skipped) {
|
||||||
delay(1);
|
delay(1);
|
||||||
goto restart;
|
goto restart;
|
||||||
|
|
|
@ -15,6 +15,7 @@ struct xfs_eofblocks {
|
||||||
kgid_t eof_gid;
|
kgid_t eof_gid;
|
||||||
prid_t eof_prid;
|
prid_t eof_prid;
|
||||||
__u64 eof_min_file_size;
|
__u64 eof_min_file_size;
|
||||||
|
int icw_scan_limit;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -3898,6 +3898,7 @@ DECLARE_EVENT_CLASS(xfs_eofblocks_class,
|
||||||
__field(uint32_t, gid)
|
__field(uint32_t, gid)
|
||||||
__field(prid_t, prid)
|
__field(prid_t, prid)
|
||||||
__field(__u64, min_file_size)
|
__field(__u64, min_file_size)
|
||||||
|
__field(int, scan_limit)
|
||||||
__field(unsigned long, caller_ip)
|
__field(unsigned long, caller_ip)
|
||||||
),
|
),
|
||||||
TP_fast_assign(
|
TP_fast_assign(
|
||||||
|
@ -3909,15 +3910,17 @@ DECLARE_EVENT_CLASS(xfs_eofblocks_class,
|
||||||
eofb->eof_gid) : 0;
|
eofb->eof_gid) : 0;
|
||||||
__entry->prid = eofb ? eofb->eof_prid : 0;
|
__entry->prid = eofb ? eofb->eof_prid : 0;
|
||||||
__entry->min_file_size = eofb ? eofb->eof_min_file_size : 0;
|
__entry->min_file_size = eofb ? eofb->eof_min_file_size : 0;
|
||||||
|
__entry->scan_limit = eofb ? eofb->icw_scan_limit : 0;
|
||||||
__entry->caller_ip = caller_ip;
|
__entry->caller_ip = caller_ip;
|
||||||
),
|
),
|
||||||
TP_printk("dev %d:%d flags 0x%x uid %u gid %u prid %u minsize %llu caller %pS",
|
TP_printk("dev %d:%d flags 0x%x uid %u gid %u prid %u minsize %llu scan_limit %d caller %pS",
|
||||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||||
__entry->flags,
|
__entry->flags,
|
||||||
__entry->uid,
|
__entry->uid,
|
||||||
__entry->gid,
|
__entry->gid,
|
||||||
__entry->prid,
|
__entry->prid,
|
||||||
__entry->min_file_size,
|
__entry->min_file_size,
|
||||||
|
__entry->scan_limit,
|
||||||
(char *)__entry->caller_ip)
|
(char *)__entry->caller_ip)
|
||||||
);
|
);
|
||||||
#define DEFINE_EOFBLOCKS_EVENT(name) \
|
#define DEFINE_EOFBLOCKS_EVENT(name) \
|
||||||
|
|
Загрузка…
Ссылка в новой задаче