2005-04-17 02:20:36 +04:00
|
|
|
/*
|
2005-11-02 06:59:41 +03:00
|
|
|
* Copyright (c) 2000-2002 Silicon Graphics, Inc.
|
|
|
|
* All Rights Reserved.
|
2005-04-17 02:20:36 +04:00
|
|
|
*
|
2005-11-02 06:59:41 +03:00
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public License as
|
2005-04-17 02:20:36 +04:00
|
|
|
* published by the Free Software Foundation.
|
|
|
|
*
|
2005-11-02 06:59:41 +03:00
|
|
|
* This program is distributed in the hope that it would be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
2005-04-17 02:20:36 +04:00
|
|
|
*
|
2005-11-02 06:59:41 +03:00
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program; if not, write the Free Software Foundation,
|
|
|
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
2005-04-17 02:20:36 +04:00
|
|
|
*/
|
|
|
|
#include "xfs.h"
|
|
|
|
#include "xfs_fs.h"
|
2013-10-23 03:36:05 +04:00
|
|
|
#include "xfs_shared.h"
|
2013-10-23 03:50:10 +04:00
|
|
|
#include "xfs_format.h"
|
|
|
|
#include "xfs_log_format.h"
|
|
|
|
#include "xfs_trans_resv.h"
|
2005-04-17 02:20:36 +04:00
|
|
|
#include "xfs_mount.h"
|
|
|
|
#include "xfs_inode.h"
|
|
|
|
#include "xfs_error.h"
|
2013-10-23 03:50:10 +04:00
|
|
|
#include "xfs_trans.h"
|
2005-04-17 02:20:36 +04:00
|
|
|
#include "xfs_trans_priv.h"
|
2013-10-23 03:51:50 +04:00
|
|
|
#include "xfs_quota.h"
|
2005-04-17 02:20:36 +04:00
|
|
|
#include "xfs_qm.h"
|
|
|
|
|
|
|
|
STATIC void xfs_trans_alloc_dqinfo(xfs_trans_t *);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Add the locked dquot to the transaction.
|
|
|
|
* The dquot must be locked, and it cannot be associated with any
|
|
|
|
* transaction.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
xfs_trans_dqjoin(
|
|
|
|
xfs_trans_t *tp,
|
|
|
|
xfs_dquot_t *dqp)
|
|
|
|
{
|
2010-04-20 11:01:53 +04:00
|
|
|
ASSERT(dqp->q_transp != tp);
|
2005-04-17 02:20:36 +04:00
|
|
|
ASSERT(XFS_DQ_IS_LOCKED(dqp));
|
2010-06-23 12:11:15 +04:00
|
|
|
ASSERT(dqp->q_logitem.qli_dquot == dqp);
|
2005-04-17 02:20:36 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Get a log_item_desc to point at the new item.
|
|
|
|
*/
|
2010-06-23 12:11:15 +04:00
|
|
|
xfs_trans_add_item(tp, &dqp->q_logitem.qli_item);
|
2005-04-17 02:20:36 +04:00
|
|
|
|
|
|
|
/*
|
2011-07-08 16:34:47 +04:00
|
|
|
* Initialize d_transp so we can later determine if this dquot is
|
2005-04-17 02:20:36 +04:00
|
|
|
* associated with this transaction.
|
|
|
|
*/
|
|
|
|
dqp->q_transp = tp;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This is called to mark the dquot as needing
|
|
|
|
* to be logged when the transaction is committed. The dquot must
|
|
|
|
* already be associated with the given transaction.
|
|
|
|
* Note that it marks the entire transaction as dirty. In the ordinary
|
|
|
|
* case, this gets called via xfs_trans_commit, after the transaction
|
|
|
|
* is already dirty. However, there's nothing stop this from getting
|
|
|
|
* called directly, as done by xfs_qm_scall_setqlim. Hence, the TRANS_DIRTY
|
|
|
|
* flag.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
xfs_trans_log_dquot(
|
|
|
|
xfs_trans_t *tp,
|
|
|
|
xfs_dquot_t *dqp)
|
|
|
|
{
|
2010-04-20 11:01:53 +04:00
|
|
|
ASSERT(dqp->q_transp == tp);
|
2005-04-17 02:20:36 +04:00
|
|
|
ASSERT(XFS_DQ_IS_LOCKED(dqp));
|
|
|
|
|
|
|
|
tp->t_flags |= XFS_TRANS_DIRTY;
|
2010-06-23 12:11:15 +04:00
|
|
|
dqp->q_logitem.qli_item.li_desc->lid_flags |= XFS_LID_DIRTY;
|
2005-04-17 02:20:36 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Carry forward whatever is left of the quota blk reservation to
|
|
|
|
* the spanky new transaction
|
|
|
|
*/
|
2009-06-08 17:33:32 +04:00
|
|
|
void
|
2005-04-17 02:20:36 +04:00
|
|
|
xfs_trans_dup_dqinfo(
|
|
|
|
xfs_trans_t *otp,
|
|
|
|
xfs_trans_t *ntp)
|
|
|
|
{
|
|
|
|
xfs_dqtrx_t *oq, *nq;
|
2015-06-01 03:50:00 +03:00
|
|
|
int i, j;
|
2005-04-17 02:20:36 +04:00
|
|
|
xfs_dqtrx_t *oqa, *nqa;
|
xfs: fix quota block reservation leak when tp allocates and frees blocks
Al Viro reports that generic/231 fails frequently on XFS and bisected
the problem to the following commit:
5d11fb4b xfs: rework zero range to prevent invalid i_size updates
... which is just the first commit that happens to cause fsx to
reproduce the problem. fsx reproduces via zero range calls. The
aforementioned commit overhauls zero range to use hole punch and
fallocate. As it turns out, the problem is reproducible on demand using
basic hole punch as follows:
$ mkfs.xfs -f -m crc=1,finobt=1 <dev>
$ mount <dev> /mnt -o uquota
$ xfs_io -f -c "falloc 0 50m" /mnt/file
$ for i in $(seq 1 20); do xfs_io -c "fpunch ${i}m 32k" /mnt/file; done
$ rm -f /mnt/file
$ repquota -us /mnt
...
User used soft hard grace used soft hard grace
----------------------------------------------------------------------
root -- 32K 0K 0K 3 0 0
A file is allocated with a single 50m extent. The extent count increases
via hole punches until the bmap converts to btree format. The file is
removed but quota reports 32k of space usage for the user. This
reservation is effectively leaked for the lifetime of the mount.
The reason this occurs is because the quota block reservation tracking
is confused when a transaction happens to free and allocate blocks at
the same time. Consider the following sequence of events:
- tp is allocated from xfs_free_file_space() and reserves several blocks
for btree management. Blocks are reserved against the dquot and marked
as such in the transaction (qtrx->qt_blk_res).
- 8 blocks are accounted free when the 32k range is punched out.
xfs_trans_mod_dquot() is called with XFS_TRANS_DQ_BCOUNT and sets
->qt_bcount_delta to -8.
- Subsequently, a block is allocated against the same transaction by
xfs_bmap_extents_to_btree() for btree conversion. A call to
xfs_trans_mod_dquot() increases qt_blk_res_used to 1 and qt_bcount_delta
to -7.
- The transaction is dup'd and committed by xfs_bmap_finish().
xfs_trans_dup_dqinfo() sets the first transaction up such that it has a
matching qt_blk_res and qt_blk_res_used of 1. The remaining unused
reservation is transferred to the duplicate tp.
When the transactions are committed, the dquots are fixed up in
xfs_trans_apply_dquot_deltas() according to one of two methods:
1.) If the transaction holds a block reservation (->qt_blk_res != 0),
_only_ the unused portion reservation is unaccounted from the dquot.
Note that the tp duplication behavior of xfs_bmap_finish() makes it such
that qt_blk_res is typically 0 for tp's with unused reservation.
2.) Otherwise, the dquot is fixed up based on the block delta
(->qt_bcount_delta) created by the transaction.
Therefore, if a transaction has a negative qt_bcount_delta and positive
qt_blk_res_used, the former set of blocks that have been removed from
the file are never factored out of the in-core dquot reservation.
Instead, *_apply_dquot_deltas() sees 1 block used out of a 1 block
reservation and believes there is nothing to fix up. The on-disk
d_bcount is updated independently from qt_bcount_delta, and thus is
correct (and allows the quota usage to correct on remount).
To deal with this situation, we effectively want the "used reservation"
part of the transaction to be consistent with any freed blocks with
respect to quota tracking. For example, if 8 blocks are freed, the
subsequent single block allocation does not need to consume the initial
reservation made by the tp. Instead, it simply borrows one from the
previously freed. One possible implementation of such borrowing is to
avoid the blks_res_used increment when bcount_delta is negative. This
alone is flawed logic in that it only handles the case where blocks are
freed before allocated, however.
Rather than add more complexity to manage synchronization between
bcount_delta and blks_res_used, kill the latter entirely. blk_res_used
is only updated in one place and always in sync with delta_bcount.
Therefore, the net block reservation consumption of the transaction is
always available from bcount_delta. Calculate the reservation
consumption on the fly where necessary based on whether the tp has a
reservation and results in a positive net block delta on the inode.
Reported-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2015-06-01 00:15:37 +03:00
|
|
|
ulong blk_res_used;
|
2005-04-17 02:20:36 +04:00
|
|
|
|
|
|
|
if (!otp->t_dqinfo)
|
|
|
|
return;
|
|
|
|
|
|
|
|
xfs_trans_alloc_dqinfo(ntp);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Because the quota blk reservation is carried forward,
|
|
|
|
* it is also necessary to carry forward the DQ_DIRTY flag.
|
|
|
|
*/
|
2015-06-01 03:50:00 +03:00
|
|
|
if (otp->t_flags & XFS_TRANS_DQ_DIRTY)
|
2005-04-17 02:20:36 +04:00
|
|
|
ntp->t_flags |= XFS_TRANS_DQ_DIRTY;
|
|
|
|
|
2013-06-28 02:25:09 +04:00
|
|
|
for (j = 0; j < XFS_QM_TRANS_DQTYPES; j++) {
|
|
|
|
oqa = otp->t_dqinfo->dqs[j];
|
|
|
|
nqa = ntp->t_dqinfo->dqs[j];
|
2005-04-17 02:20:36 +04:00
|
|
|
for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
|
xfs: fix quota block reservation leak when tp allocates and frees blocks
Al Viro reports that generic/231 fails frequently on XFS and bisected
the problem to the following commit:
5d11fb4b xfs: rework zero range to prevent invalid i_size updates
... which is just the first commit that happens to cause fsx to
reproduce the problem. fsx reproduces via zero range calls. The
aforementioned commit overhauls zero range to use hole punch and
fallocate. As it turns out, the problem is reproducible on demand using
basic hole punch as follows:
$ mkfs.xfs -f -m crc=1,finobt=1 <dev>
$ mount <dev> /mnt -o uquota
$ xfs_io -f -c "falloc 0 50m" /mnt/file
$ for i in $(seq 1 20); do xfs_io -c "fpunch ${i}m 32k" /mnt/file; done
$ rm -f /mnt/file
$ repquota -us /mnt
...
User used soft hard grace used soft hard grace
----------------------------------------------------------------------
root -- 32K 0K 0K 3 0 0
A file is allocated with a single 50m extent. The extent count increases
via hole punches until the bmap converts to btree format. The file is
removed but quota reports 32k of space usage for the user. This
reservation is effectively leaked for the lifetime of the mount.
The reason this occurs is because the quota block reservation tracking
is confused when a transaction happens to free and allocate blocks at
the same time. Consider the following sequence of events:
- tp is allocated from xfs_free_file_space() and reserves several blocks
for btree management. Blocks are reserved against the dquot and marked
as such in the transaction (qtrx->qt_blk_res).
- 8 blocks are accounted free when the 32k range is punched out.
xfs_trans_mod_dquot() is called with XFS_TRANS_DQ_BCOUNT and sets
->qt_bcount_delta to -8.
- Subsequently, a block is allocated against the same transaction by
xfs_bmap_extents_to_btree() for btree conversion. A call to
xfs_trans_mod_dquot() increases qt_blk_res_used to 1 and qt_bcount_delta
to -7.
- The transaction is dup'd and committed by xfs_bmap_finish().
xfs_trans_dup_dqinfo() sets the first transaction up such that it has a
matching qt_blk_res and qt_blk_res_used of 1. The remaining unused
reservation is transferred to the duplicate tp.
When the transactions are committed, the dquots are fixed up in
xfs_trans_apply_dquot_deltas() according to one of two methods:
1.) If the transaction holds a block reservation (->qt_blk_res != 0),
_only_ the unused portion reservation is unaccounted from the dquot.
Note that the tp duplication behavior of xfs_bmap_finish() makes it such
that qt_blk_res is typically 0 for tp's with unused reservation.
2.) Otherwise, the dquot is fixed up based on the block delta
(->qt_bcount_delta) created by the transaction.
Therefore, if a transaction has a negative qt_bcount_delta and positive
qt_blk_res_used, the former set of blocks that have been removed from
the file are never factored out of the in-core dquot reservation.
Instead, *_apply_dquot_deltas() sees 1 block used out of a 1 block
reservation and believes there is nothing to fix up. The on-disk
d_bcount is updated independently from qt_bcount_delta, and thus is
correct (and allows the quota usage to correct on remount).
To deal with this situation, we effectively want the "used reservation"
part of the transaction to be consistent with any freed blocks with
respect to quota tracking. For example, if 8 blocks are freed, the
subsequent single block allocation does not need to consume the initial
reservation made by the tp. Instead, it simply borrows one from the
previously freed. One possible implementation of such borrowing is to
avoid the blks_res_used increment when bcount_delta is negative. This
alone is flawed logic in that it only handles the case where blocks are
freed before allocated, however.
Rather than add more complexity to manage synchronization between
bcount_delta and blks_res_used, kill the latter entirely. blk_res_used
is only updated in one place and always in sync with delta_bcount.
Therefore, the net block reservation consumption of the transaction is
always available from bcount_delta. Calculate the reservation
consumption on the fly where necessary based on whether the tp has a
reservation and results in a positive net block delta on the inode.
Reported-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2015-06-01 00:15:37 +03:00
|
|
|
blk_res_used = 0;
|
|
|
|
|
2005-04-17 02:20:36 +04:00
|
|
|
if (oqa[i].qt_dquot == NULL)
|
|
|
|
break;
|
|
|
|
oq = &oqa[i];
|
|
|
|
nq = &nqa[i];
|
|
|
|
|
xfs: fix quota block reservation leak when tp allocates and frees blocks
Al Viro reports that generic/231 fails frequently on XFS and bisected
the problem to the following commit:
5d11fb4b xfs: rework zero range to prevent invalid i_size updates
... which is just the first commit that happens to cause fsx to
reproduce the problem. fsx reproduces via zero range calls. The
aforementioned commit overhauls zero range to use hole punch and
fallocate. As it turns out, the problem is reproducible on demand using
basic hole punch as follows:
$ mkfs.xfs -f -m crc=1,finobt=1 <dev>
$ mount <dev> /mnt -o uquota
$ xfs_io -f -c "falloc 0 50m" /mnt/file
$ for i in $(seq 1 20); do xfs_io -c "fpunch ${i}m 32k" /mnt/file; done
$ rm -f /mnt/file
$ repquota -us /mnt
...
User used soft hard grace used soft hard grace
----------------------------------------------------------------------
root -- 32K 0K 0K 3 0 0
A file is allocated with a single 50m extent. The extent count increases
via hole punches until the bmap converts to btree format. The file is
removed but quota reports 32k of space usage for the user. This
reservation is effectively leaked for the lifetime of the mount.
The reason this occurs is because the quota block reservation tracking
is confused when a transaction happens to free and allocate blocks at
the same time. Consider the following sequence of events:
- tp is allocated from xfs_free_file_space() and reserves several blocks
for btree management. Blocks are reserved against the dquot and marked
as such in the transaction (qtrx->qt_blk_res).
- 8 blocks are accounted free when the 32k range is punched out.
xfs_trans_mod_dquot() is called with XFS_TRANS_DQ_BCOUNT and sets
->qt_bcount_delta to -8.
- Subsequently, a block is allocated against the same transaction by
xfs_bmap_extents_to_btree() for btree conversion. A call to
xfs_trans_mod_dquot() increases qt_blk_res_used to 1 and qt_bcount_delta
to -7.
- The transaction is dup'd and committed by xfs_bmap_finish().
xfs_trans_dup_dqinfo() sets the first transaction up such that it has a
matching qt_blk_res and qt_blk_res_used of 1. The remaining unused
reservation is transferred to the duplicate tp.
When the transactions are committed, the dquots are fixed up in
xfs_trans_apply_dquot_deltas() according to one of two methods:
1.) If the transaction holds a block reservation (->qt_blk_res != 0),
_only_ the unused portion reservation is unaccounted from the dquot.
Note that the tp duplication behavior of xfs_bmap_finish() makes it such
that qt_blk_res is typically 0 for tp's with unused reservation.
2.) Otherwise, the dquot is fixed up based on the block delta
(->qt_bcount_delta) created by the transaction.
Therefore, if a transaction has a negative qt_bcount_delta and positive
qt_blk_res_used, the former set of blocks that have been removed from
the file are never factored out of the in-core dquot reservation.
Instead, *_apply_dquot_deltas() sees 1 block used out of a 1 block
reservation and believes there is nothing to fix up. The on-disk
d_bcount is updated independently from qt_bcount_delta, and thus is
correct (and allows the quota usage to correct on remount).
To deal with this situation, we effectively want the "used reservation"
part of the transaction to be consistent with any freed blocks with
respect to quota tracking. For example, if 8 blocks are freed, the
subsequent single block allocation does not need to consume the initial
reservation made by the tp. Instead, it simply borrows one from the
previously freed. One possible implementation of such borrowing is to
avoid the blks_res_used increment when bcount_delta is negative. This
alone is flawed logic in that it only handles the case where blocks are
freed before allocated, however.
Rather than add more complexity to manage synchronization between
bcount_delta and blks_res_used, kill the latter entirely. blk_res_used
is only updated in one place and always in sync with delta_bcount.
Therefore, the net block reservation consumption of the transaction is
always available from bcount_delta. Calculate the reservation
consumption on the fly where necessary based on whether the tp has a
reservation and results in a positive net block delta on the inode.
Reported-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2015-06-01 00:15:37 +03:00
|
|
|
if (oq->qt_blk_res && oq->qt_bcount_delta > 0)
|
|
|
|
blk_res_used = oq->qt_bcount_delta;
|
|
|
|
|
2005-04-17 02:20:36 +04:00
|
|
|
nq->qt_dquot = oq->qt_dquot;
|
|
|
|
nq->qt_bcount_delta = nq->qt_icount_delta = 0;
|
|
|
|
nq->qt_rtbcount_delta = 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Transfer whatever is left of the reservations.
|
|
|
|
*/
|
xfs: fix quota block reservation leak when tp allocates and frees blocks
Al Viro reports that generic/231 fails frequently on XFS and bisected
the problem to the following commit:
5d11fb4b xfs: rework zero range to prevent invalid i_size updates
... which is just the first commit that happens to cause fsx to
reproduce the problem. fsx reproduces via zero range calls. The
aforementioned commit overhauls zero range to use hole punch and
fallocate. As it turns out, the problem is reproducible on demand using
basic hole punch as follows:
$ mkfs.xfs -f -m crc=1,finobt=1 <dev>
$ mount <dev> /mnt -o uquota
$ xfs_io -f -c "falloc 0 50m" /mnt/file
$ for i in $(seq 1 20); do xfs_io -c "fpunch ${i}m 32k" /mnt/file; done
$ rm -f /mnt/file
$ repquota -us /mnt
...
User used soft hard grace used soft hard grace
----------------------------------------------------------------------
root -- 32K 0K 0K 3 0 0
A file is allocated with a single 50m extent. The extent count increases
via hole punches until the bmap converts to btree format. The file is
removed but quota reports 32k of space usage for the user. This
reservation is effectively leaked for the lifetime of the mount.
The reason this occurs is because the quota block reservation tracking
is confused when a transaction happens to free and allocate blocks at
the same time. Consider the following sequence of events:
- tp is allocated from xfs_free_file_space() and reserves several blocks
for btree management. Blocks are reserved against the dquot and marked
as such in the transaction (qtrx->qt_blk_res).
- 8 blocks are accounted free when the 32k range is punched out.
xfs_trans_mod_dquot() is called with XFS_TRANS_DQ_BCOUNT and sets
->qt_bcount_delta to -8.
- Subsequently, a block is allocated against the same transaction by
xfs_bmap_extents_to_btree() for btree conversion. A call to
xfs_trans_mod_dquot() increases qt_blk_res_used to 1 and qt_bcount_delta
to -7.
- The transaction is dup'd and committed by xfs_bmap_finish().
xfs_trans_dup_dqinfo() sets the first transaction up such that it has a
matching qt_blk_res and qt_blk_res_used of 1. The remaining unused
reservation is transferred to the duplicate tp.
When the transactions are committed, the dquots are fixed up in
xfs_trans_apply_dquot_deltas() according to one of two methods:
1.) If the transaction holds a block reservation (->qt_blk_res != 0),
_only_ the unused portion reservation is unaccounted from the dquot.
Note that the tp duplication behavior of xfs_bmap_finish() makes it such
that qt_blk_res is typically 0 for tp's with unused reservation.
2.) Otherwise, the dquot is fixed up based on the block delta
(->qt_bcount_delta) created by the transaction.
Therefore, if a transaction has a negative qt_bcount_delta and positive
qt_blk_res_used, the former set of blocks that have been removed from
the file are never factored out of the in-core dquot reservation.
Instead, *_apply_dquot_deltas() sees 1 block used out of a 1 block
reservation and believes there is nothing to fix up. The on-disk
d_bcount is updated independently from qt_bcount_delta, and thus is
correct (and allows the quota usage to correct on remount).
To deal with this situation, we effectively want the "used reservation"
part of the transaction to be consistent with any freed blocks with
respect to quota tracking. For example, if 8 blocks are freed, the
subsequent single block allocation does not need to consume the initial
reservation made by the tp. Instead, it simply borrows one from the
previously freed. One possible implementation of such borrowing is to
avoid the blks_res_used increment when bcount_delta is negative. This
alone is flawed logic in that it only handles the case where blocks are
freed before allocated, however.
Rather than add more complexity to manage synchronization between
bcount_delta and blks_res_used, kill the latter entirely. blk_res_used
is only updated in one place and always in sync with delta_bcount.
Therefore, the net block reservation consumption of the transaction is
always available from bcount_delta. Calculate the reservation
consumption on the fly where necessary based on whether the tp has a
reservation and results in a positive net block delta on the inode.
Reported-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2015-06-01 00:15:37 +03:00
|
|
|
nq->qt_blk_res = oq->qt_blk_res - blk_res_used;
|
|
|
|
oq->qt_blk_res = blk_res_used;
|
2005-04-17 02:20:36 +04:00
|
|
|
|
|
|
|
nq->qt_rtblk_res = oq->qt_rtblk_res -
|
|
|
|
oq->qt_rtblk_res_used;
|
|
|
|
oq->qt_rtblk_res = oq->qt_rtblk_res_used;
|
|
|
|
|
|
|
|
nq->qt_ino_res = oq->qt_ino_res - oq->qt_ino_res_used;
|
|
|
|
oq->qt_ino_res = oq->qt_ino_res_used;
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Wrap around mod_dquot to account for both user and group quotas.
|
|
|
|
*/
|
2009-06-08 17:33:32 +04:00
|
|
|
void
|
2005-04-17 02:20:36 +04:00
|
|
|
xfs_trans_mod_dquot_byino(
|
|
|
|
xfs_trans_t *tp,
|
|
|
|
xfs_inode_t *ip,
|
|
|
|
uint field,
|
|
|
|
long delta)
|
|
|
|
{
|
2009-06-08 17:33:32 +04:00
|
|
|
xfs_mount_t *mp = tp->t_mountp;
|
2005-04-17 02:20:36 +04:00
|
|
|
|
2009-06-08 17:33:32 +04:00
|
|
|
if (!XFS_IS_QUOTA_RUNNING(mp) ||
|
|
|
|
!XFS_IS_QUOTA_ON(mp) ||
|
2013-06-28 02:25:04 +04:00
|
|
|
xfs_is_quota_inode(&mp->m_sb, ip->i_ino))
|
2005-04-17 02:20:36 +04:00
|
|
|
return;
|
|
|
|
|
|
|
|
if (tp->t_dqinfo == NULL)
|
|
|
|
xfs_trans_alloc_dqinfo(tp);
|
|
|
|
|
2005-06-21 09:38:48 +04:00
|
|
|
if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot)
|
2005-04-17 02:20:36 +04:00
|
|
|
(void) xfs_trans_mod_dquot(tp, ip->i_udquot, field, delta);
|
2013-07-11 09:00:40 +04:00
|
|
|
if (XFS_IS_GQUOTA_ON(mp) && ip->i_gdquot)
|
2005-04-17 02:20:36 +04:00
|
|
|
(void) xfs_trans_mod_dquot(tp, ip->i_gdquot, field, delta);
|
2013-07-11 09:00:40 +04:00
|
|
|
if (XFS_IS_PQUOTA_ON(mp) && ip->i_pdquot)
|
|
|
|
(void) xfs_trans_mod_dquot(tp, ip->i_pdquot, field, delta);
|
2005-04-17 02:20:36 +04:00
|
|
|
}
|
|
|
|
|
2013-06-28 02:25:07 +04:00
|
|
|
STATIC struct xfs_dqtrx *
|
2005-04-17 02:20:36 +04:00
|
|
|
xfs_trans_get_dqtrx(
|
2013-06-28 02:25:07 +04:00
|
|
|
struct xfs_trans *tp,
|
|
|
|
struct xfs_dquot *dqp)
|
2005-04-17 02:20:36 +04:00
|
|
|
{
|
2013-06-28 02:25:07 +04:00
|
|
|
int i;
|
|
|
|
struct xfs_dqtrx *qa;
|
2005-04-17 02:20:36 +04:00
|
|
|
|
2013-06-28 02:25:09 +04:00
|
|
|
if (XFS_QM_ISUDQ(dqp))
|
|
|
|
qa = tp->t_dqinfo->dqs[XFS_QM_TRANS_USR];
|
2013-07-11 09:00:40 +04:00
|
|
|
else if (XFS_QM_ISGDQ(dqp))
|
2013-06-28 02:25:09 +04:00
|
|
|
qa = tp->t_dqinfo->dqs[XFS_QM_TRANS_GRP];
|
2013-07-11 09:00:40 +04:00
|
|
|
else if (XFS_QM_ISPDQ(dqp))
|
|
|
|
qa = tp->t_dqinfo->dqs[XFS_QM_TRANS_PRJ];
|
|
|
|
else
|
|
|
|
return NULL;
|
2005-04-17 02:20:36 +04:00
|
|
|
|
2010-04-20 11:01:53 +04:00
|
|
|
for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
|
2005-04-17 02:20:36 +04:00
|
|
|
if (qa[i].qt_dquot == NULL ||
|
2010-04-20 11:01:53 +04:00
|
|
|
qa[i].qt_dquot == dqp)
|
|
|
|
return &qa[i];
|
2005-04-17 02:20:36 +04:00
|
|
|
}
|
|
|
|
|
2010-04-20 11:01:53 +04:00
|
|
|
return NULL;
|
2005-04-17 02:20:36 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Make the changes in the transaction structure.
|
|
|
|
* The moral equivalent to xfs_trans_mod_sb().
|
|
|
|
* We don't touch any fields in the dquot, so we don't care
|
|
|
|
* if it's locked or not (most of the time it won't be).
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
xfs_trans_mod_dquot(
|
|
|
|
xfs_trans_t *tp,
|
|
|
|
xfs_dquot_t *dqp,
|
|
|
|
uint field,
|
|
|
|
long delta)
|
|
|
|
{
|
|
|
|
xfs_dqtrx_t *qtrx;
|
|
|
|
|
|
|
|
ASSERT(tp);
|
2009-06-08 17:33:32 +04:00
|
|
|
ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
|
2005-04-17 02:20:36 +04:00
|
|
|
qtrx = NULL;
|
|
|
|
|
|
|
|
if (tp->t_dqinfo == NULL)
|
|
|
|
xfs_trans_alloc_dqinfo(tp);
|
|
|
|
/*
|
|
|
|
* Find either the first free slot or the slot that belongs
|
|
|
|
* to this dquot.
|
|
|
|
*/
|
|
|
|
qtrx = xfs_trans_get_dqtrx(tp, dqp);
|
|
|
|
ASSERT(qtrx);
|
|
|
|
if (qtrx->qt_dquot == NULL)
|
|
|
|
qtrx->qt_dquot = dqp;
|
|
|
|
|
|
|
|
switch (field) {
|
|
|
|
|
|
|
|
/*
|
|
|
|
* regular disk blk reservation
|
|
|
|
*/
|
|
|
|
case XFS_TRANS_DQ_RES_BLKS:
|
|
|
|
qtrx->qt_blk_res += (ulong)delta;
|
|
|
|
break;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* inode reservation
|
|
|
|
*/
|
|
|
|
case XFS_TRANS_DQ_RES_INOS:
|
|
|
|
qtrx->qt_ino_res += (ulong)delta;
|
|
|
|
break;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* disk blocks used.
|
|
|
|
*/
|
|
|
|
case XFS_TRANS_DQ_BCOUNT:
|
|
|
|
qtrx->qt_bcount_delta += delta;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case XFS_TRANS_DQ_DELBCOUNT:
|
|
|
|
qtrx->qt_delbcnt_delta += delta;
|
|
|
|
break;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Inode Count
|
|
|
|
*/
|
|
|
|
case XFS_TRANS_DQ_ICOUNT:
|
|
|
|
if (qtrx->qt_ino_res && delta > 0) {
|
|
|
|
qtrx->qt_ino_res_used += (ulong)delta;
|
|
|
|
ASSERT(qtrx->qt_ino_res >= qtrx->qt_ino_res_used);
|
|
|
|
}
|
|
|
|
qtrx->qt_icount_delta += delta;
|
|
|
|
break;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* rtblk reservation
|
|
|
|
*/
|
|
|
|
case XFS_TRANS_DQ_RES_RTBLKS:
|
|
|
|
qtrx->qt_rtblk_res += (ulong)delta;
|
|
|
|
break;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* rtblk count
|
|
|
|
*/
|
|
|
|
case XFS_TRANS_DQ_RTBCOUNT:
|
|
|
|
if (qtrx->qt_rtblk_res && delta > 0) {
|
|
|
|
qtrx->qt_rtblk_res_used += (ulong)delta;
|
|
|
|
ASSERT(qtrx->qt_rtblk_res >= qtrx->qt_rtblk_res_used);
|
|
|
|
}
|
|
|
|
qtrx->qt_rtbcount_delta += delta;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case XFS_TRANS_DQ_DELRTBCOUNT:
|
|
|
|
qtrx->qt_delrtb_delta += delta;
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
ASSERT(0);
|
|
|
|
}
|
|
|
|
tp->t_flags |= XFS_TRANS_DQ_DIRTY;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
xfs: dquot log reservations are too small
During review of the separate project quota inode patches, it became
obvious that the dquot log reservation calculation underestimated
the number dquots that can be modified in a transaction. This has
it's roots way back in the Irix quota implementation.
That is, when quotas were first implemented in XFS, it only
supported user and project quotas as Irix did not have group quotas.
Hence the worst case operation involving dquot modification was
calculated to involve 2 user dquots and 1 project dquot or 1 user
dequot and 2 project dquots. i.e. 3 dquots. This was determined back
in 1996, and has remained unchanged ever since.
However, back in 2001, the Linux XFS port dropped all support for
project quota and implmented group quotas over the top. This was
effectively done with a search-and-replace of project with group,
and as such the log reservation was not changed. However, with the
advent of group quotas, chmod and rename now could modify more than
3 dquots in a single transaction - both could modify 4 dquots. Hence
this log reservation has been wrong for a long time.
In 2005, project quota support was reintroduced into Linux, but it
was implemented to be mutually exclusive to group quotas and so this
didn't add any new changes to the dquot log reservation. Hence when
project quotas were in use (rather than group quotas) the log
reservation was again valid, just like in the Irix days.
Now, with the addition of the separate project quota inode, group
and project quotas are no longer mutually exclusive, and hence
operations can now modify three dquots per inode where previously it
was only two. The worst case here is the rename transaction, which
can allocate/free space on two different directory inodes, and if
they have different uid/gid/prid configurations and are world
writeable, then rename can actually modify 6 different dquots now.
Further, the dquot log reservation doesn't take into account the
space used by the dquot log format structure that precedes the dquot
that is logged, and hence further underestimates the worst case
log space required by dquots during a transaction. This has been
missing since the first commit in 1996.
Hence the worst case log reservation needs to be increased from 3 to
6, and it needs to take into account a log format header for each of
those dquots.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Mark Tinguely <tinguely@sgi.com>
Signed-off-by: Ben Myers <bpm@sgi.com>
2013-07-10 01:04:01 +04:00
|
|
|
* Given an array of dqtrx structures, lock all the dquots associated and join
|
|
|
|
* them to the transaction, provided they have been modified. We know that the
|
2013-11-06 15:45:36 +04:00
|
|
|
* highest number of dquots of one type - usr, grp and prj - involved in a
|
|
|
|
* transaction is 3 so we don't need to make this very generic.
|
2005-04-17 02:20:36 +04:00
|
|
|
*/
|
|
|
|
STATIC void
|
|
|
|
xfs_trans_dqlockedjoin(
|
|
|
|
xfs_trans_t *tp,
|
|
|
|
xfs_dqtrx_t *q)
|
|
|
|
{
|
|
|
|
ASSERT(q[0].qt_dquot != NULL);
|
|
|
|
if (q[1].qt_dquot == NULL) {
|
|
|
|
xfs_dqlock(q[0].qt_dquot);
|
|
|
|
xfs_trans_dqjoin(tp, q[0].qt_dquot);
|
|
|
|
} else {
|
|
|
|
ASSERT(XFS_QM_TRANS_MAXDQS == 2);
|
|
|
|
xfs_dqlock2(q[0].qt_dquot, q[1].qt_dquot);
|
|
|
|
xfs_trans_dqjoin(tp, q[0].qt_dquot);
|
|
|
|
xfs_trans_dqjoin(tp, q[1].qt_dquot);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Called by xfs_trans_commit() and similar in spirit to
|
|
|
|
* xfs_trans_apply_sb_deltas().
|
|
|
|
* Go thru all the dquots belonging to this transaction and modify the
|
|
|
|
* INCORE dquot to reflect the actual usages.
|
|
|
|
* Unreserve just the reservations done by this transaction.
|
|
|
|
* dquot is still left locked at exit.
|
|
|
|
*/
|
2009-06-08 17:33:32 +04:00
|
|
|
void
|
2005-04-17 02:20:36 +04:00
|
|
|
xfs_trans_apply_dquot_deltas(
|
2013-03-18 18:51:45 +04:00
|
|
|
struct xfs_trans *tp)
|
2005-04-17 02:20:36 +04:00
|
|
|
{
|
|
|
|
int i, j;
|
2013-03-18 18:51:45 +04:00
|
|
|
struct xfs_dquot *dqp;
|
|
|
|
struct xfs_dqtrx *qtrx, *qa;
|
|
|
|
struct xfs_disk_dquot *d;
|
2005-04-17 02:20:36 +04:00
|
|
|
long totalbdelta;
|
|
|
|
long totalrtbdelta;
|
|
|
|
|
2009-06-08 17:33:32 +04:00
|
|
|
if (!(tp->t_flags & XFS_TRANS_DQ_DIRTY))
|
2005-04-17 02:20:36 +04:00
|
|
|
return;
|
|
|
|
|
|
|
|
ASSERT(tp->t_dqinfo);
|
2013-06-28 02:25:09 +04:00
|
|
|
for (j = 0; j < XFS_QM_TRANS_DQTYPES; j++) {
|
|
|
|
qa = tp->t_dqinfo->dqs[j];
|
|
|
|
if (qa[0].qt_dquot == NULL)
|
2005-04-17 02:20:36 +04:00
|
|
|
continue;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Lock all of the dquots and join them to the transaction.
|
|
|
|
*/
|
|
|
|
xfs_trans_dqlockedjoin(tp, qa);
|
|
|
|
|
|
|
|
for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
|
|
|
|
qtrx = &qa[i];
|
|
|
|
/*
|
|
|
|
* The array of dquots is filled
|
|
|
|
* sequentially, not sparsely.
|
|
|
|
*/
|
|
|
|
if ((dqp = qtrx->qt_dquot) == NULL)
|
|
|
|
break;
|
|
|
|
|
|
|
|
ASSERT(XFS_DQ_IS_LOCKED(dqp));
|
2010-04-20 11:01:53 +04:00
|
|
|
ASSERT(dqp->q_transp == tp);
|
2005-04-17 02:20:36 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* adjust the actual number of blocks used
|
|
|
|
*/
|
|
|
|
d = &dqp->q_core;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The issue here is - sometimes we don't make a blkquota
|
|
|
|
* reservation intentionally to be fair to users
|
|
|
|
* (when the amount is small). On the other hand,
|
|
|
|
* delayed allocs do make reservations, but that's
|
|
|
|
* outside of a transaction, so we have no
|
|
|
|
* idea how much was really reserved.
|
|
|
|
* So, here we've accumulated delayed allocation blks and
|
|
|
|
* non-delay blks. The assumption is that the
|
|
|
|
* delayed ones are always reserved (outside of a
|
|
|
|
* transaction), and the others may or may not have
|
|
|
|
* quota reservations.
|
|
|
|
*/
|
|
|
|
totalbdelta = qtrx->qt_bcount_delta +
|
|
|
|
qtrx->qt_delbcnt_delta;
|
|
|
|
totalrtbdelta = qtrx->qt_rtbcount_delta +
|
|
|
|
qtrx->qt_delrtb_delta;
|
2011-07-13 15:43:50 +04:00
|
|
|
#ifdef DEBUG
|
2005-04-17 02:20:36 +04:00
|
|
|
if (totalbdelta < 0)
|
2005-11-02 07:01:12 +03:00
|
|
|
ASSERT(be64_to_cpu(d->d_bcount) >=
|
2011-07-13 15:43:50 +04:00
|
|
|
-totalbdelta);
|
2005-04-17 02:20:36 +04:00
|
|
|
|
|
|
|
if (totalrtbdelta < 0)
|
2005-11-02 07:01:12 +03:00
|
|
|
ASSERT(be64_to_cpu(d->d_rtbcount) >=
|
2011-07-13 15:43:50 +04:00
|
|
|
-totalrtbdelta);
|
2005-04-17 02:20:36 +04:00
|
|
|
|
|
|
|
if (qtrx->qt_icount_delta < 0)
|
2005-11-02 07:01:12 +03:00
|
|
|
ASSERT(be64_to_cpu(d->d_icount) >=
|
2011-07-13 15:43:50 +04:00
|
|
|
-qtrx->qt_icount_delta);
|
2005-04-17 02:20:36 +04:00
|
|
|
#endif
|
|
|
|
if (totalbdelta)
|
2008-02-14 02:03:29 +03:00
|
|
|
be64_add_cpu(&d->d_bcount, (xfs_qcnt_t)totalbdelta);
|
2005-04-17 02:20:36 +04:00
|
|
|
|
|
|
|
if (qtrx->qt_icount_delta)
|
2008-02-14 02:03:29 +03:00
|
|
|
be64_add_cpu(&d->d_icount, (xfs_qcnt_t)qtrx->qt_icount_delta);
|
2005-04-17 02:20:36 +04:00
|
|
|
|
|
|
|
if (totalrtbdelta)
|
2008-02-14 02:03:29 +03:00
|
|
|
be64_add_cpu(&d->d_rtbcount, (xfs_qcnt_t)totalrtbdelta);
|
2005-04-17 02:20:36 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Get any default limits in use.
|
|
|
|
* Start/reset the timer(s) if needed.
|
|
|
|
*/
|
|
|
|
if (d->d_id) {
|
2013-03-18 18:51:45 +04:00
|
|
|
xfs_qm_adjust_dqlimits(tp->t_mountp, dqp);
|
2005-04-17 02:20:36 +04:00
|
|
|
xfs_qm_adjust_dqtimers(tp->t_mountp, d);
|
|
|
|
}
|
|
|
|
|
|
|
|
dqp->dq_flags |= XFS_DQ_DIRTY;
|
|
|
|
/*
|
|
|
|
* add this to the list of items to get logged
|
|
|
|
*/
|
|
|
|
xfs_trans_log_dquot(tp, dqp);
|
|
|
|
/*
|
|
|
|
* Take off what's left of the original reservation.
|
|
|
|
* In case of delayed allocations, there's no
|
|
|
|
* reservation that a transaction structure knows of.
|
|
|
|
*/
|
|
|
|
if (qtrx->qt_blk_res != 0) {
|
xfs: fix quota block reservation leak when tp allocates and frees blocks
Al Viro reports that generic/231 fails frequently on XFS and bisected
the problem to the following commit:
5d11fb4b xfs: rework zero range to prevent invalid i_size updates
... which is just the first commit that happens to cause fsx to
reproduce the problem. fsx reproduces via zero range calls. The
aforementioned commit overhauls zero range to use hole punch and
fallocate. As it turns out, the problem is reproducible on demand using
basic hole punch as follows:
$ mkfs.xfs -f -m crc=1,finobt=1 <dev>
$ mount <dev> /mnt -o uquota
$ xfs_io -f -c "falloc 0 50m" /mnt/file
$ for i in $(seq 1 20); do xfs_io -c "fpunch ${i}m 32k" /mnt/file; done
$ rm -f /mnt/file
$ repquota -us /mnt
...
User used soft hard grace used soft hard grace
----------------------------------------------------------------------
root -- 32K 0K 0K 3 0 0
A file is allocated with a single 50m extent. The extent count increases
via hole punches until the bmap converts to btree format. The file is
removed but quota reports 32k of space usage for the user. This
reservation is effectively leaked for the lifetime of the mount.
The reason this occurs is because the quota block reservation tracking
is confused when a transaction happens to free and allocate blocks at
the same time. Consider the following sequence of events:
- tp is allocated from xfs_free_file_space() and reserves several blocks
for btree management. Blocks are reserved against the dquot and marked
as such in the transaction (qtrx->qt_blk_res).
- 8 blocks are accounted free when the 32k range is punched out.
xfs_trans_mod_dquot() is called with XFS_TRANS_DQ_BCOUNT and sets
->qt_bcount_delta to -8.
- Subsequently, a block is allocated against the same transaction by
xfs_bmap_extents_to_btree() for btree conversion. A call to
xfs_trans_mod_dquot() increases qt_blk_res_used to 1 and qt_bcount_delta
to -7.
- The transaction is dup'd and committed by xfs_bmap_finish().
xfs_trans_dup_dqinfo() sets the first transaction up such that it has a
matching qt_blk_res and qt_blk_res_used of 1. The remaining unused
reservation is transferred to the duplicate tp.
When the transactions are committed, the dquots are fixed up in
xfs_trans_apply_dquot_deltas() according to one of two methods:
1.) If the transaction holds a block reservation (->qt_blk_res != 0),
_only_ the unused portion reservation is unaccounted from the dquot.
Note that the tp duplication behavior of xfs_bmap_finish() makes it such
that qt_blk_res is typically 0 for tp's with unused reservation.
2.) Otherwise, the dquot is fixed up based on the block delta
(->qt_bcount_delta) created by the transaction.
Therefore, if a transaction has a negative qt_bcount_delta and positive
qt_blk_res_used, the former set of blocks that have been removed from
the file are never factored out of the in-core dquot reservation.
Instead, *_apply_dquot_deltas() sees 1 block used out of a 1 block
reservation and believes there is nothing to fix up. The on-disk
d_bcount is updated independently from qt_bcount_delta, and thus is
correct (and allows the quota usage to correct on remount).
To deal with this situation, we effectively want the "used reservation"
part of the transaction to be consistent with any freed blocks with
respect to quota tracking. For example, if 8 blocks are freed, the
subsequent single block allocation does not need to consume the initial
reservation made by the tp. Instead, it simply borrows one from the
previously freed. One possible implementation of such borrowing is to
avoid the blks_res_used increment when bcount_delta is negative. This
alone is flawed logic in that it only handles the case where blocks are
freed before allocated, however.
Rather than add more complexity to manage synchronization between
bcount_delta and blks_res_used, kill the latter entirely. blk_res_used
is only updated in one place and always in sync with delta_bcount.
Therefore, the net block reservation consumption of the transaction is
always available from bcount_delta. Calculate the reservation
consumption on the fly where necessary based on whether the tp has a
reservation and results in a positive net block delta on the inode.
Reported-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2015-06-01 00:15:37 +03:00
|
|
|
ulong blk_res_used = 0;
|
|
|
|
|
|
|
|
if (qtrx->qt_bcount_delta > 0)
|
|
|
|
blk_res_used = qtrx->qt_bcount_delta;
|
|
|
|
|
|
|
|
if (qtrx->qt_blk_res != blk_res_used) {
|
|
|
|
if (qtrx->qt_blk_res > blk_res_used)
|
2005-04-17 02:20:36 +04:00
|
|
|
dqp->q_res_bcount -= (xfs_qcnt_t)
|
|
|
|
(qtrx->qt_blk_res -
|
xfs: fix quota block reservation leak when tp allocates and frees blocks
Al Viro reports that generic/231 fails frequently on XFS and bisected
the problem to the following commit:
5d11fb4b xfs: rework zero range to prevent invalid i_size updates
... which is just the first commit that happens to cause fsx to
reproduce the problem. fsx reproduces via zero range calls. The
aforementioned commit overhauls zero range to use hole punch and
fallocate. As it turns out, the problem is reproducible on demand using
basic hole punch as follows:
$ mkfs.xfs -f -m crc=1,finobt=1 <dev>
$ mount <dev> /mnt -o uquota
$ xfs_io -f -c "falloc 0 50m" /mnt/file
$ for i in $(seq 1 20); do xfs_io -c "fpunch ${i}m 32k" /mnt/file; done
$ rm -f /mnt/file
$ repquota -us /mnt
...
User used soft hard grace used soft hard grace
----------------------------------------------------------------------
root -- 32K 0K 0K 3 0 0
A file is allocated with a single 50m extent. The extent count increases
via hole punches until the bmap converts to btree format. The file is
removed but quota reports 32k of space usage for the user. This
reservation is effectively leaked for the lifetime of the mount.
The reason this occurs is because the quota block reservation tracking
is confused when a transaction happens to free and allocate blocks at
the same time. Consider the following sequence of events:
- tp is allocated from xfs_free_file_space() and reserves several blocks
for btree management. Blocks are reserved against the dquot and marked
as such in the transaction (qtrx->qt_blk_res).
- 8 blocks are accounted free when the 32k range is punched out.
xfs_trans_mod_dquot() is called with XFS_TRANS_DQ_BCOUNT and sets
->qt_bcount_delta to -8.
- Subsequently, a block is allocated against the same transaction by
xfs_bmap_extents_to_btree() for btree conversion. A call to
xfs_trans_mod_dquot() increases qt_blk_res_used to 1 and qt_bcount_delta
to -7.
- The transaction is dup'd and committed by xfs_bmap_finish().
xfs_trans_dup_dqinfo() sets the first transaction up such that it has a
matching qt_blk_res and qt_blk_res_used of 1. The remaining unused
reservation is transferred to the duplicate tp.
When the transactions are committed, the dquots are fixed up in
xfs_trans_apply_dquot_deltas() according to one of two methods:
1.) If the transaction holds a block reservation (->qt_blk_res != 0),
_only_ the unused portion reservation is unaccounted from the dquot.
Note that the tp duplication behavior of xfs_bmap_finish() makes it such
that qt_blk_res is typically 0 for tp's with unused reservation.
2.) Otherwise, the dquot is fixed up based on the block delta
(->qt_bcount_delta) created by the transaction.
Therefore, if a transaction has a negative qt_bcount_delta and positive
qt_blk_res_used, the former set of blocks that have been removed from
the file are never factored out of the in-core dquot reservation.
Instead, *_apply_dquot_deltas() sees 1 block used out of a 1 block
reservation and believes there is nothing to fix up. The on-disk
d_bcount is updated independently from qt_bcount_delta, and thus is
correct (and allows the quota usage to correct on remount).
To deal with this situation, we effectively want the "used reservation"
part of the transaction to be consistent with any freed blocks with
respect to quota tracking. For example, if 8 blocks are freed, the
subsequent single block allocation does not need to consume the initial
reservation made by the tp. Instead, it simply borrows one from the
previously freed. One possible implementation of such borrowing is to
avoid the blks_res_used increment when bcount_delta is negative. This
alone is flawed logic in that it only handles the case where blocks are
freed before allocated, however.
Rather than add more complexity to manage synchronization between
bcount_delta and blks_res_used, kill the latter entirely. blk_res_used
is only updated in one place and always in sync with delta_bcount.
Therefore, the net block reservation consumption of the transaction is
always available from bcount_delta. Calculate the reservation
consumption on the fly where necessary based on whether the tp has a
reservation and results in a positive net block delta on the inode.
Reported-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2015-06-01 00:15:37 +03:00
|
|
|
blk_res_used);
|
2005-04-17 02:20:36 +04:00
|
|
|
else
|
|
|
|
dqp->q_res_bcount -= (xfs_qcnt_t)
|
xfs: fix quota block reservation leak when tp allocates and frees blocks
Al Viro reports that generic/231 fails frequently on XFS and bisected
the problem to the following commit:
5d11fb4b xfs: rework zero range to prevent invalid i_size updates
... which is just the first commit that happens to cause fsx to
reproduce the problem. fsx reproduces via zero range calls. The
aforementioned commit overhauls zero range to use hole punch and
fallocate. As it turns out, the problem is reproducible on demand using
basic hole punch as follows:
$ mkfs.xfs -f -m crc=1,finobt=1 <dev>
$ mount <dev> /mnt -o uquota
$ xfs_io -f -c "falloc 0 50m" /mnt/file
$ for i in $(seq 1 20); do xfs_io -c "fpunch ${i}m 32k" /mnt/file; done
$ rm -f /mnt/file
$ repquota -us /mnt
...
User used soft hard grace used soft hard grace
----------------------------------------------------------------------
root -- 32K 0K 0K 3 0 0
A file is allocated with a single 50m extent. The extent count increases
via hole punches until the bmap converts to btree format. The file is
removed but quota reports 32k of space usage for the user. This
reservation is effectively leaked for the lifetime of the mount.
The reason this occurs is because the quota block reservation tracking
is confused when a transaction happens to free and allocate blocks at
the same time. Consider the following sequence of events:
- tp is allocated from xfs_free_file_space() and reserves several blocks
for btree management. Blocks are reserved against the dquot and marked
as such in the transaction (qtrx->qt_blk_res).
- 8 blocks are accounted free when the 32k range is punched out.
xfs_trans_mod_dquot() is called with XFS_TRANS_DQ_BCOUNT and sets
->qt_bcount_delta to -8.
- Subsequently, a block is allocated against the same transaction by
xfs_bmap_extents_to_btree() for btree conversion. A call to
xfs_trans_mod_dquot() increases qt_blk_res_used to 1 and qt_bcount_delta
to -7.
- The transaction is dup'd and committed by xfs_bmap_finish().
xfs_trans_dup_dqinfo() sets the first transaction up such that it has a
matching qt_blk_res and qt_blk_res_used of 1. The remaining unused
reservation is transferred to the duplicate tp.
When the transactions are committed, the dquots are fixed up in
xfs_trans_apply_dquot_deltas() according to one of two methods:
1.) If the transaction holds a block reservation (->qt_blk_res != 0),
_only_ the unused portion reservation is unaccounted from the dquot.
Note that the tp duplication behavior of xfs_bmap_finish() makes it such
that qt_blk_res is typically 0 for tp's with unused reservation.
2.) Otherwise, the dquot is fixed up based on the block delta
(->qt_bcount_delta) created by the transaction.
Therefore, if a transaction has a negative qt_bcount_delta and positive
qt_blk_res_used, the former set of blocks that have been removed from
the file are never factored out of the in-core dquot reservation.
Instead, *_apply_dquot_deltas() sees 1 block used out of a 1 block
reservation and believes there is nothing to fix up. The on-disk
d_bcount is updated independently from qt_bcount_delta, and thus is
correct (and allows the quota usage to correct on remount).
To deal with this situation, we effectively want the "used reservation"
part of the transaction to be consistent with any freed blocks with
respect to quota tracking. For example, if 8 blocks are freed, the
subsequent single block allocation does not need to consume the initial
reservation made by the tp. Instead, it simply borrows one from the
previously freed. One possible implementation of such borrowing is to
avoid the blks_res_used increment when bcount_delta is negative. This
alone is flawed logic in that it only handles the case where blocks are
freed before allocated, however.
Rather than add more complexity to manage synchronization between
bcount_delta and blks_res_used, kill the latter entirely. blk_res_used
is only updated in one place and always in sync with delta_bcount.
Therefore, the net block reservation consumption of the transaction is
always available from bcount_delta. Calculate the reservation
consumption on the fly where necessary based on whether the tp has a
reservation and results in a positive net block delta on the inode.
Reported-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
2015-06-01 00:15:37 +03:00
|
|
|
(blk_res_used -
|
2005-04-17 02:20:36 +04:00
|
|
|
qtrx->qt_blk_res);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* These blks were never reserved, either inside
|
|
|
|
* a transaction or outside one (in a delayed
|
|
|
|
* allocation). Also, this isn't always a
|
|
|
|
* negative number since we sometimes
|
|
|
|
* deliberately skip quota reservations.
|
|
|
|
*/
|
|
|
|
if (qtrx->qt_bcount_delta) {
|
|
|
|
dqp->q_res_bcount +=
|
|
|
|
(xfs_qcnt_t)qtrx->qt_bcount_delta;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* Adjust the RT reservation.
|
|
|
|
*/
|
|
|
|
if (qtrx->qt_rtblk_res != 0) {
|
2005-06-21 09:48:47 +04:00
|
|
|
if (qtrx->qt_rtblk_res != qtrx->qt_rtblk_res_used) {
|
2005-04-17 02:20:36 +04:00
|
|
|
if (qtrx->qt_rtblk_res >
|
|
|
|
qtrx->qt_rtblk_res_used)
|
|
|
|
dqp->q_res_rtbcount -= (xfs_qcnt_t)
|
|
|
|
(qtrx->qt_rtblk_res -
|
|
|
|
qtrx->qt_rtblk_res_used);
|
|
|
|
else
|
|
|
|
dqp->q_res_rtbcount -= (xfs_qcnt_t)
|
|
|
|
(qtrx->qt_rtblk_res_used -
|
|
|
|
qtrx->qt_rtblk_res);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (qtrx->qt_rtbcount_delta)
|
|
|
|
dqp->q_res_rtbcount +=
|
|
|
|
(xfs_qcnt_t)qtrx->qt_rtbcount_delta;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Adjust the inode reservation.
|
|
|
|
*/
|
|
|
|
if (qtrx->qt_ino_res != 0) {
|
|
|
|
ASSERT(qtrx->qt_ino_res >=
|
|
|
|
qtrx->qt_ino_res_used);
|
|
|
|
if (qtrx->qt_ino_res > qtrx->qt_ino_res_used)
|
|
|
|
dqp->q_res_icount -= (xfs_qcnt_t)
|
|
|
|
(qtrx->qt_ino_res -
|
|
|
|
qtrx->qt_ino_res_used);
|
|
|
|
} else {
|
|
|
|
if (qtrx->qt_icount_delta)
|
|
|
|
dqp->q_res_icount +=
|
|
|
|
(xfs_qcnt_t)qtrx->qt_icount_delta;
|
|
|
|
}
|
|
|
|
|
|
|
|
ASSERT(dqp->q_res_bcount >=
|
2005-11-02 07:01:12 +03:00
|
|
|
be64_to_cpu(dqp->q_core.d_bcount));
|
2005-04-17 02:20:36 +04:00
|
|
|
ASSERT(dqp->q_res_icount >=
|
2005-11-02 07:01:12 +03:00
|
|
|
be64_to_cpu(dqp->q_core.d_icount));
|
2005-04-17 02:20:36 +04:00
|
|
|
ASSERT(dqp->q_res_rtbcount >=
|
2005-11-02 07:01:12 +03:00
|
|
|
be64_to_cpu(dqp->q_core.d_rtbcount));
|
2005-04-17 02:20:36 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Release the reservations, and adjust the dquots accordingly.
|
|
|
|
* This is called only when the transaction is being aborted. If by
|
|
|
|
* any chance we have done dquot modifications incore (ie. deltas) already,
|
|
|
|
* we simply throw those away, since that's the expected behavior
|
|
|
|
* when a transaction is curtailed without a commit.
|
|
|
|
*/
|
2009-06-08 17:33:32 +04:00
|
|
|
void
|
2005-04-17 02:20:36 +04:00
|
|
|
xfs_trans_unreserve_and_mod_dquots(
|
|
|
|
xfs_trans_t *tp)
|
|
|
|
{
|
|
|
|
int i, j;
|
|
|
|
xfs_dquot_t *dqp;
|
|
|
|
xfs_dqtrx_t *qtrx, *qa;
|
2012-11-13 03:32:59 +04:00
|
|
|
bool locked;
|
2005-04-17 02:20:36 +04:00
|
|
|
|
|
|
|
if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY))
|
|
|
|
return;
|
|
|
|
|
2013-06-28 02:25:09 +04:00
|
|
|
for (j = 0; j < XFS_QM_TRANS_DQTYPES; j++) {
|
|
|
|
qa = tp->t_dqinfo->dqs[j];
|
2005-04-17 02:20:36 +04:00
|
|
|
|
|
|
|
for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
|
|
|
|
qtrx = &qa[i];
|
|
|
|
/*
|
|
|
|
* We assume that the array of dquots is filled
|
|
|
|
* sequentially, not sparsely.
|
|
|
|
*/
|
|
|
|
if ((dqp = qtrx->qt_dquot) == NULL)
|
|
|
|
break;
|
|
|
|
/*
|
|
|
|
* Unreserve the original reservation. We don't care
|
|
|
|
* about the number of blocks used field, or deltas.
|
|
|
|
* Also we don't bother to zero the fields.
|
|
|
|
*/
|
2012-11-13 03:32:59 +04:00
|
|
|
locked = false;
|
2005-04-17 02:20:36 +04:00
|
|
|
if (qtrx->qt_blk_res) {
|
|
|
|
xfs_dqlock(dqp);
|
2012-11-13 03:32:59 +04:00
|
|
|
locked = true;
|
2005-04-17 02:20:36 +04:00
|
|
|
dqp->q_res_bcount -=
|
|
|
|
(xfs_qcnt_t)qtrx->qt_blk_res;
|
|
|
|
}
|
|
|
|
if (qtrx->qt_ino_res) {
|
|
|
|
if (!locked) {
|
|
|
|
xfs_dqlock(dqp);
|
2012-11-13 03:32:59 +04:00
|
|
|
locked = true;
|
2005-04-17 02:20:36 +04:00
|
|
|
}
|
|
|
|
dqp->q_res_icount -=
|
|
|
|
(xfs_qcnt_t)qtrx->qt_ino_res;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (qtrx->qt_rtblk_res) {
|
|
|
|
if (!locked) {
|
|
|
|
xfs_dqlock(dqp);
|
2012-11-13 03:32:59 +04:00
|
|
|
locked = true;
|
2005-04-17 02:20:36 +04:00
|
|
|
}
|
|
|
|
dqp->q_res_rtbcount -=
|
|
|
|
(xfs_qcnt_t)qtrx->qt_rtblk_res;
|
|
|
|
}
|
|
|
|
if (locked)
|
|
|
|
xfs_dqunlock(dqp);
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2010-01-18 01:36:19 +03:00
|
|
|
STATIC void
|
|
|
|
xfs_quota_warn(
|
|
|
|
struct xfs_mount *mp,
|
|
|
|
struct xfs_dquot *dqp,
|
|
|
|
int type)
|
|
|
|
{
|
2016-01-04 08:10:42 +03:00
|
|
|
enum quota_type qtype;
|
|
|
|
|
2010-01-18 01:36:19 +03:00
|
|
|
if (dqp->dq_flags & XFS_DQ_PROJ)
|
2016-01-04 08:10:42 +03:00
|
|
|
qtype = PRJQUOTA;
|
|
|
|
else if (dqp->dq_flags & XFS_DQ_USER)
|
|
|
|
qtype = USRQUOTA;
|
|
|
|
else
|
|
|
|
qtype = GRPQUOTA;
|
|
|
|
|
|
|
|
quota_send_warning(make_kqid(&init_user_ns, qtype,
|
2012-09-16 13:32:43 +04:00
|
|
|
be32_to_cpu(dqp->q_core.d_id)),
|
|
|
|
mp->m_super->s_dev, type);
|
2010-01-18 01:36:19 +03:00
|
|
|
}
|
|
|
|
|
2005-04-17 02:20:36 +04:00
|
|
|
/*
|
|
|
|
* This reserves disk blocks and inodes against a dquot.
|
|
|
|
* Flags indicate if the dquot is to be locked here and also
|
|
|
|
* if the blk reservation is for RT or regular blocks.
|
|
|
|
* Sending in XFS_QMOPT_FORCE_RES flag skips the quota check.
|
|
|
|
*/
|
|
|
|
STATIC int
|
|
|
|
xfs_trans_dqresv(
|
|
|
|
xfs_trans_t *tp,
|
|
|
|
xfs_mount_t *mp,
|
|
|
|
xfs_dquot_t *dqp,
|
|
|
|
long nblks,
|
|
|
|
long ninos,
|
|
|
|
uint flags)
|
|
|
|
{
|
|
|
|
xfs_qcnt_t hardlimit;
|
|
|
|
xfs_qcnt_t softlimit;
|
2005-06-21 09:48:47 +04:00
|
|
|
time_t timer;
|
|
|
|
xfs_qwarncnt_t warns;
|
|
|
|
xfs_qwarncnt_t warnlimit;
|
2012-02-06 16:51:05 +04:00
|
|
|
xfs_qcnt_t total_count;
|
2005-04-17 02:20:36 +04:00
|
|
|
xfs_qcnt_t *resbcountp;
|
|
|
|
xfs_quotainfo_t *q = mp->m_quotainfo;
|
2016-02-08 03:27:55 +03:00
|
|
|
struct xfs_def_quota *defq;
|
2005-04-17 02:20:36 +04:00
|
|
|
|
2009-02-08 23:51:42 +03:00
|
|
|
|
|
|
|
xfs_dqlock(dqp);
|
|
|
|
|
2016-02-08 03:27:55 +03:00
|
|
|
defq = xfs_get_defquota(dqp, q);
|
|
|
|
|
2005-04-17 02:20:36 +04:00
|
|
|
if (flags & XFS_TRANS_DQ_RES_BLKS) {
|
2005-11-02 07:01:12 +03:00
|
|
|
hardlimit = be64_to_cpu(dqp->q_core.d_blk_hardlimit);
|
2005-04-17 02:20:36 +04:00
|
|
|
if (!hardlimit)
|
2016-02-08 03:27:55 +03:00
|
|
|
hardlimit = defq->bhardlimit;
|
2005-11-02 07:01:12 +03:00
|
|
|
softlimit = be64_to_cpu(dqp->q_core.d_blk_softlimit);
|
2005-04-17 02:20:36 +04:00
|
|
|
if (!softlimit)
|
2016-02-08 03:27:55 +03:00
|
|
|
softlimit = defq->bsoftlimit;
|
2005-11-02 07:01:12 +03:00
|
|
|
timer = be32_to_cpu(dqp->q_core.d_btimer);
|
|
|
|
warns = be16_to_cpu(dqp->q_core.d_bwarns);
|
2010-04-20 11:01:30 +04:00
|
|
|
warnlimit = dqp->q_mount->m_quotainfo->qi_bwarnlimit;
|
2005-04-17 02:20:36 +04:00
|
|
|
resbcountp = &dqp->q_res_bcount;
|
|
|
|
} else {
|
|
|
|
ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS);
|
2005-11-02 07:01:12 +03:00
|
|
|
hardlimit = be64_to_cpu(dqp->q_core.d_rtb_hardlimit);
|
2005-04-17 02:20:36 +04:00
|
|
|
if (!hardlimit)
|
2016-02-08 03:27:55 +03:00
|
|
|
hardlimit = defq->rtbhardlimit;
|
2005-11-02 07:01:12 +03:00
|
|
|
softlimit = be64_to_cpu(dqp->q_core.d_rtb_softlimit);
|
2005-04-17 02:20:36 +04:00
|
|
|
if (!softlimit)
|
2016-02-08 03:27:55 +03:00
|
|
|
softlimit = defq->rtbsoftlimit;
|
2005-11-02 07:01:12 +03:00
|
|
|
timer = be32_to_cpu(dqp->q_core.d_rtbtimer);
|
|
|
|
warns = be16_to_cpu(dqp->q_core.d_rtbwarns);
|
2010-04-20 11:01:30 +04:00
|
|
|
warnlimit = dqp->q_mount->m_quotainfo->qi_rtbwarnlimit;
|
2005-04-17 02:20:36 +04:00
|
|
|
resbcountp = &dqp->q_res_rtbcount;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((flags & XFS_QMOPT_FORCE_RES) == 0 &&
|
|
|
|
dqp->q_core.d_id &&
|
2007-05-08 07:49:33 +04:00
|
|
|
((XFS_IS_UQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISUDQ(dqp)) ||
|
2013-06-28 02:25:10 +04:00
|
|
|
(XFS_IS_GQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISGDQ(dqp)) ||
|
|
|
|
(XFS_IS_PQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISPDQ(dqp)))) {
|
2005-04-17 02:20:36 +04:00
|
|
|
if (nblks > 0) {
|
|
|
|
/*
|
|
|
|
* dquot is locked already. See if we'd go over the
|
|
|
|
* hardlimit or exceed the timelimit if we allocate
|
|
|
|
* nblks.
|
|
|
|
*/
|
2012-02-06 16:51:05 +04:00
|
|
|
total_count = *resbcountp + nblks;
|
|
|
|
if (hardlimit && total_count > hardlimit) {
|
2010-01-18 01:36:19 +03:00
|
|
|
xfs_quota_warn(mp, dqp, QUOTA_NL_BHARDWARN);
|
2005-04-17 02:20:36 +04:00
|
|
|
goto error_return;
|
2010-01-18 01:36:19 +03:00
|
|
|
}
|
2012-02-06 16:51:05 +04:00
|
|
|
if (softlimit && total_count > softlimit) {
|
2010-01-18 01:36:19 +03:00
|
|
|
if ((timer != 0 && get_seconds() > timer) ||
|
|
|
|
(warns != 0 && warns >= warnlimit)) {
|
|
|
|
xfs_quota_warn(mp, dqp,
|
|
|
|
QUOTA_NL_BSOFTLONGWARN);
|
|
|
|
goto error_return;
|
|
|
|
}
|
|
|
|
|
|
|
|
xfs_quota_warn(mp, dqp, QUOTA_NL_BSOFTWARN);
|
|
|
|
}
|
2005-04-17 02:20:36 +04:00
|
|
|
}
|
|
|
|
if (ninos > 0) {
|
2012-02-06 16:51:05 +04:00
|
|
|
total_count = be64_to_cpu(dqp->q_core.d_icount) + ninos;
|
2005-11-02 07:01:12 +03:00
|
|
|
timer = be32_to_cpu(dqp->q_core.d_itimer);
|
|
|
|
warns = be16_to_cpu(dqp->q_core.d_iwarns);
|
2010-04-20 11:01:30 +04:00
|
|
|
warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit;
|
2005-11-02 07:01:12 +03:00
|
|
|
hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit);
|
2005-04-17 02:20:36 +04:00
|
|
|
if (!hardlimit)
|
2016-02-08 03:27:55 +03:00
|
|
|
hardlimit = defq->ihardlimit;
|
2005-11-02 07:01:12 +03:00
|
|
|
softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit);
|
2005-04-17 02:20:36 +04:00
|
|
|
if (!softlimit)
|
2016-02-08 03:27:55 +03:00
|
|
|
softlimit = defq->isoftlimit;
|
2010-01-14 01:05:49 +03:00
|
|
|
|
2012-02-06 16:51:05 +04:00
|
|
|
if (hardlimit && total_count > hardlimit) {
|
2010-01-18 01:36:19 +03:00
|
|
|
xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN);
|
2005-04-17 02:20:36 +04:00
|
|
|
goto error_return;
|
2010-01-18 01:36:19 +03:00
|
|
|
}
|
2012-02-06 16:51:05 +04:00
|
|
|
if (softlimit && total_count > softlimit) {
|
2010-01-18 01:36:19 +03:00
|
|
|
if ((timer != 0 && get_seconds() > timer) ||
|
|
|
|
(warns != 0 && warns >= warnlimit)) {
|
|
|
|
xfs_quota_warn(mp, dqp,
|
|
|
|
QUOTA_NL_ISOFTLONGWARN);
|
|
|
|
goto error_return;
|
|
|
|
}
|
|
|
|
xfs_quota_warn(mp, dqp, QUOTA_NL_ISOFTWARN);
|
|
|
|
}
|
2005-04-17 02:20:36 +04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Change the reservation, but not the actual usage.
|
|
|
|
* Note that q_res_bcount = q_core.d_bcount + resv
|
|
|
|
*/
|
|
|
|
(*resbcountp) += (xfs_qcnt_t)nblks;
|
|
|
|
if (ninos != 0)
|
|
|
|
dqp->q_res_icount += (xfs_qcnt_t)ninos;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* note the reservation amt in the trans struct too,
|
|
|
|
* so that the transaction knows how much was reserved by
|
|
|
|
* it against this particular dquot.
|
|
|
|
* We don't do this when we are reserving for a delayed allocation,
|
|
|
|
* because we don't have the luxury of a transaction envelope then.
|
|
|
|
*/
|
|
|
|
if (tp) {
|
|
|
|
ASSERT(tp->t_dqinfo);
|
|
|
|
ASSERT(flags & XFS_QMOPT_RESBLK_MASK);
|
|
|
|
if (nblks != 0)
|
|
|
|
xfs_trans_mod_dquot(tp, dqp,
|
|
|
|
flags & XFS_QMOPT_RESBLK_MASK,
|
|
|
|
nblks);
|
|
|
|
if (ninos != 0)
|
|
|
|
xfs_trans_mod_dquot(tp, dqp,
|
|
|
|
XFS_TRANS_DQ_RES_INOS,
|
|
|
|
ninos);
|
|
|
|
}
|
2005-11-02 07:01:12 +03:00
|
|
|
ASSERT(dqp->q_res_bcount >= be64_to_cpu(dqp->q_core.d_bcount));
|
|
|
|
ASSERT(dqp->q_res_rtbcount >= be64_to_cpu(dqp->q_core.d_rtbcount));
|
|
|
|
ASSERT(dqp->q_res_icount >= be64_to_cpu(dqp->q_core.d_icount));
|
2005-04-17 02:20:36 +04:00
|
|
|
|
2010-01-14 01:05:49 +03:00
|
|
|
xfs_dqunlock(dqp);
|
|
|
|
return 0;
|
|
|
|
|
2005-04-17 02:20:36 +04:00
|
|
|
error_return:
|
2009-02-08 23:51:42 +03:00
|
|
|
xfs_dqunlock(dqp);
|
2010-01-14 01:05:49 +03:00
|
|
|
if (flags & XFS_QMOPT_ENOSPC)
|
2014-06-25 08:58:08 +04:00
|
|
|
return -ENOSPC;
|
|
|
|
return -EDQUOT;
|
2005-04-17 02:20:36 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
2006-03-31 07:04:49 +04:00
|
|
|
* Given dquot(s), make disk block and/or inode reservations against them.
|
2013-07-11 09:00:40 +04:00
|
|
|
* The fact that this does the reservation against user, group and
|
|
|
|
* project quotas is important, because this follows a all-or-nothing
|
2005-04-17 02:20:36 +04:00
|
|
|
* approach.
|
|
|
|
*
|
2009-02-08 23:51:42 +03:00
|
|
|
* flags = XFS_QMOPT_FORCE_RES evades limit enforcement. Used by chown.
|
2006-03-31 07:04:49 +04:00
|
|
|
* XFS_QMOPT_ENOSPC returns ENOSPC not EDQUOT. Used by pquota.
|
2005-04-17 02:20:36 +04:00
|
|
|
* XFS_TRANS_DQ_RES_BLKS reserves regular disk blocks
|
|
|
|
* XFS_TRANS_DQ_RES_RTBLKS reserves realtime disk blocks
|
|
|
|
* dquots are unlocked on return, if they were not locked by caller.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
xfs_trans_reserve_quota_bydquots(
|
2013-06-28 02:25:07 +04:00
|
|
|
struct xfs_trans *tp,
|
|
|
|
struct xfs_mount *mp,
|
|
|
|
struct xfs_dquot *udqp,
|
|
|
|
struct xfs_dquot *gdqp,
|
2013-07-11 09:00:40 +04:00
|
|
|
struct xfs_dquot *pdqp,
|
2013-06-28 02:25:07 +04:00
|
|
|
long nblks,
|
|
|
|
long ninos,
|
|
|
|
uint flags)
|
2005-04-17 02:20:36 +04:00
|
|
|
{
|
2013-06-28 02:25:07 +04:00
|
|
|
int error;
|
2005-04-17 02:20:36 +04:00
|
|
|
|
2009-06-08 17:33:32 +04:00
|
|
|
if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
|
2006-03-31 07:04:49 +04:00
|
|
|
return 0;
|
2005-04-17 02:20:36 +04:00
|
|
|
|
|
|
|
if (tp && tp->t_dqinfo == NULL)
|
|
|
|
xfs_trans_alloc_dqinfo(tp);
|
|
|
|
|
|
|
|
ASSERT(flags & XFS_QMOPT_RESBLK_MASK);
|
|
|
|
|
|
|
|
if (udqp) {
|
2006-03-31 07:04:49 +04:00
|
|
|
error = xfs_trans_dqresv(tp, mp, udqp, nblks, ninos,
|
|
|
|
(flags & ~XFS_QMOPT_ENOSPC));
|
|
|
|
if (error)
|
|
|
|
return error;
|
2005-04-17 02:20:36 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
if (gdqp) {
|
2006-03-31 07:04:49 +04:00
|
|
|
error = xfs_trans_dqresv(tp, mp, gdqp, nblks, ninos, flags);
|
2013-06-28 02:25:07 +04:00
|
|
|
if (error)
|
|
|
|
goto unwind_usr;
|
2005-04-17 02:20:36 +04:00
|
|
|
}
|
|
|
|
|
2013-07-11 09:00:40 +04:00
|
|
|
if (pdqp) {
|
|
|
|
error = xfs_trans_dqresv(tp, mp, pdqp, nblks, ninos, flags);
|
|
|
|
if (error)
|
|
|
|
goto unwind_grp;
|
|
|
|
}
|
|
|
|
|
2005-04-17 02:20:36 +04:00
|
|
|
/*
|
2006-03-29 02:55:14 +04:00
|
|
|
* Didn't change anything critical, so, no need to log
|
2005-04-17 02:20:36 +04:00
|
|
|
*/
|
2006-03-31 07:04:49 +04:00
|
|
|
return 0;
|
2013-06-28 02:25:07 +04:00
|
|
|
|
2013-07-11 09:00:40 +04:00
|
|
|
unwind_grp:
|
|
|
|
flags |= XFS_QMOPT_FORCE_RES;
|
|
|
|
if (gdqp)
|
|
|
|
xfs_trans_dqresv(tp, mp, gdqp, -nblks, -ninos, flags);
|
2013-06-28 02:25:07 +04:00
|
|
|
unwind_usr:
|
|
|
|
flags |= XFS_QMOPT_FORCE_RES;
|
|
|
|
if (udqp)
|
|
|
|
xfs_trans_dqresv(tp, mp, udqp, -nblks, -ninos, flags);
|
|
|
|
return error;
|
2005-04-17 02:20:36 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Lock the dquot and change the reservation if we can.
|
|
|
|
* This doesn't change the actual usage, just the reservation.
|
|
|
|
* The inode sent in is locked.
|
|
|
|
*/
|
2009-06-08 17:33:32 +04:00
|
|
|
int
|
2005-04-17 02:20:36 +04:00
|
|
|
xfs_trans_reserve_quota_nblks(
|
2009-06-08 17:33:32 +04:00
|
|
|
struct xfs_trans *tp,
|
|
|
|
struct xfs_inode *ip,
|
|
|
|
long nblks,
|
|
|
|
long ninos,
|
|
|
|
uint flags)
|
2005-04-17 02:20:36 +04:00
|
|
|
{
|
2009-06-08 17:33:32 +04:00
|
|
|
struct xfs_mount *mp = ip->i_mount;
|
2005-04-17 02:20:36 +04:00
|
|
|
|
2009-06-08 17:33:32 +04:00
|
|
|
if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
|
2006-03-31 07:04:49 +04:00
|
|
|
return 0;
|
|
|
|
if (XFS_IS_PQUOTA_ON(mp))
|
|
|
|
flags |= XFS_QMOPT_ENOSPC;
|
2005-04-17 02:20:36 +04:00
|
|
|
|
2013-06-28 02:25:04 +04:00
|
|
|
ASSERT(!xfs_is_quota_inode(&mp->m_sb, ip->i_ino));
|
2005-04-17 02:20:36 +04:00
|
|
|
|
2008-04-22 11:34:00 +04:00
|
|
|
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
|
2006-03-31 07:04:49 +04:00
|
|
|
ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
|
|
|
|
XFS_TRANS_DQ_RES_RTBLKS ||
|
|
|
|
(flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
|
|
|
|
XFS_TRANS_DQ_RES_BLKS);
|
2005-04-17 02:20:36 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Reserve nblks against these dquots, with trans as the mediator.
|
|
|
|
*/
|
2009-06-08 17:33:32 +04:00
|
|
|
return xfs_trans_reserve_quota_bydquots(tp, mp,
|
|
|
|
ip->i_udquot, ip->i_gdquot,
|
2013-07-11 09:00:40 +04:00
|
|
|
ip->i_pdquot,
|
2009-06-08 17:33:32 +04:00
|
|
|
nblks, ninos, flags);
|
2005-04-17 02:20:36 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This routine is called to allocate a quotaoff log item.
|
|
|
|
*/
|
|
|
|
xfs_qoff_logitem_t *
|
|
|
|
xfs_trans_get_qoff_item(
|
|
|
|
xfs_trans_t *tp,
|
|
|
|
xfs_qoff_logitem_t *startqoff,
|
|
|
|
uint flags)
|
|
|
|
{
|
|
|
|
xfs_qoff_logitem_t *q;
|
|
|
|
|
|
|
|
ASSERT(tp != NULL);
|
|
|
|
|
|
|
|
q = xfs_qm_qoff_logitem_init(tp->t_mountp, startqoff, flags);
|
|
|
|
ASSERT(q != NULL);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get a log_item_desc to point at the new item.
|
|
|
|
*/
|
2010-06-23 12:11:15 +04:00
|
|
|
xfs_trans_add_item(tp, &q->qql_item);
|
|
|
|
return q;
|
2005-04-17 02:20:36 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This is called to mark the quotaoff logitem as needing
|
|
|
|
* to be logged when the transaction is committed. The logitem must
|
|
|
|
* already be associated with the given transaction.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
xfs_trans_log_quotaoff_item(
|
|
|
|
xfs_trans_t *tp,
|
|
|
|
xfs_qoff_logitem_t *qlp)
|
|
|
|
{
|
|
|
|
tp->t_flags |= XFS_TRANS_DIRTY;
|
2010-06-23 12:11:15 +04:00
|
|
|
qlp->qql_item.li_desc->lid_flags |= XFS_LID_DIRTY;
|
2005-04-17 02:20:36 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
STATIC void
|
|
|
|
xfs_trans_alloc_dqinfo(
|
|
|
|
xfs_trans_t *tp)
|
|
|
|
{
|
2012-03-13 12:52:37 +04:00
|
|
|
tp->t_dqinfo = kmem_zone_zalloc(xfs_qm_dqtrxzone, KM_SLEEP);
|
2005-04-17 02:20:36 +04:00
|
|
|
}
|
|
|
|
|
2009-06-08 17:33:32 +04:00
|
|
|
void
|
2005-04-17 02:20:36 +04:00
|
|
|
xfs_trans_free_dqinfo(
|
|
|
|
xfs_trans_t *tp)
|
|
|
|
{
|
|
|
|
if (!tp->t_dqinfo)
|
|
|
|
return;
|
2012-03-13 12:52:37 +04:00
|
|
|
kmem_zone_free(xfs_qm_dqtrxzone, tp->t_dqinfo);
|
2009-06-08 17:33:32 +04:00
|
|
|
tp->t_dqinfo = NULL;
|
2005-04-17 02:20:36 +04:00
|
|
|
}
|