2005-04-17 02:20:36 +04:00
|
|
|
/*
|
2005-11-02 06:58:39 +03:00
|
|
|
* Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
|
|
|
|
* All Rights Reserved.
|
2005-04-17 02:20:36 +04:00
|
|
|
*
|
2005-11-02 06:58:39 +03:00
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU General Public License as
|
2005-04-17 02:20:36 +04:00
|
|
|
* published by the Free Software Foundation.
|
|
|
|
*
|
2005-11-02 06:58:39 +03:00
|
|
|
* This program is distributed in the hope that it would be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
2005-04-17 02:20:36 +04:00
|
|
|
*
|
2005-11-02 06:58:39 +03:00
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program; if not, write the Free Software Foundation,
|
|
|
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
2005-04-17 02:20:36 +04:00
|
|
|
*/
|
|
|
|
#ifndef __XFS_TRANS_H__
|
|
|
|
#define __XFS_TRANS_H__
|
|
|
|
|
2008-10-30 09:05:38 +03:00
|
|
|
struct xfs_log_item;
|
|
|
|
|
2005-04-17 02:20:36 +04:00
|
|
|
/*
|
|
|
|
* This is the structure written in the log at the head of
|
|
|
|
* every transaction. It identifies the type and id of the
|
|
|
|
* transaction, and contains the number of items logged by
|
|
|
|
* the transaction so we know how many to expect during recovery.
|
|
|
|
*
|
|
|
|
* Do not change the below structure without redoing the code in
|
|
|
|
* xlog_recover_add_to_trans() and xlog_recover_add_to_cont_trans().
|
|
|
|
*/
|
|
|
|
typedef struct xfs_trans_header {
|
|
|
|
uint th_magic; /* magic number */
|
|
|
|
uint th_type; /* transaction type */
|
|
|
|
__int32_t th_tid; /* transaction id (unused) */
|
|
|
|
uint th_num_items; /* num items logged by trans */
|
|
|
|
} xfs_trans_header_t;
|
|
|
|
|
|
|
|
#define XFS_TRANS_HEADER_MAGIC 0x5452414e /* TRAN */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Log item types.
|
|
|
|
*/
|
|
|
|
#define XFS_LI_EFI 0x1236
|
|
|
|
#define XFS_LI_EFD 0x1237
|
|
|
|
#define XFS_LI_IUNLINK 0x1238
|
|
|
|
#define XFS_LI_INODE 0x123b /* aligned ino chunks, var-size ibufs */
|
|
|
|
#define XFS_LI_BUF 0x123c /* v2 bufs, variable sized inode bufs */
|
|
|
|
#define XFS_LI_DQUOT 0x123d
|
|
|
|
#define XFS_LI_QUOTAOFF 0x123e
|
|
|
|
|
2010-04-13 09:06:46 +04:00
|
|
|
#define XFS_LI_TYPE_DESC \
|
|
|
|
{ XFS_LI_EFI, "XFS_LI_EFI" }, \
|
|
|
|
{ XFS_LI_EFD, "XFS_LI_EFD" }, \
|
|
|
|
{ XFS_LI_IUNLINK, "XFS_LI_IUNLINK" }, \
|
|
|
|
{ XFS_LI_INODE, "XFS_LI_INODE" }, \
|
|
|
|
{ XFS_LI_BUF, "XFS_LI_BUF" }, \
|
|
|
|
{ XFS_LI_DQUOT, "XFS_LI_DQUOT" }, \
|
|
|
|
{ XFS_LI_QUOTAOFF, "XFS_LI_QUOTAOFF" }
|
|
|
|
|
2005-04-17 02:20:36 +04:00
|
|
|
/*
|
|
|
|
* Transaction types. Used to distinguish types of buffers.
|
|
|
|
*/
|
|
|
|
#define XFS_TRANS_SETATTR_NOT_SIZE 1
|
|
|
|
#define XFS_TRANS_SETATTR_SIZE 2
|
|
|
|
#define XFS_TRANS_INACTIVE 3
|
|
|
|
#define XFS_TRANS_CREATE 4
|
|
|
|
#define XFS_TRANS_CREATE_TRUNC 5
|
|
|
|
#define XFS_TRANS_TRUNCATE_FILE 6
|
|
|
|
#define XFS_TRANS_REMOVE 7
|
|
|
|
#define XFS_TRANS_LINK 8
|
|
|
|
#define XFS_TRANS_RENAME 9
|
|
|
|
#define XFS_TRANS_MKDIR 10
|
|
|
|
#define XFS_TRANS_RMDIR 11
|
|
|
|
#define XFS_TRANS_SYMLINK 12
|
|
|
|
#define XFS_TRANS_SET_DMATTRS 13
|
|
|
|
#define XFS_TRANS_GROWFS 14
|
|
|
|
#define XFS_TRANS_STRAT_WRITE 15
|
|
|
|
#define XFS_TRANS_DIOSTRAT 16
|
2009-09-01 04:00:31 +04:00
|
|
|
/* 17 was XFS_TRANS_WRITE_SYNC */
|
2005-04-17 02:20:36 +04:00
|
|
|
#define XFS_TRANS_WRITEID 18
|
|
|
|
#define XFS_TRANS_ADDAFORK 19
|
|
|
|
#define XFS_TRANS_ATTRINVAL 20
|
|
|
|
#define XFS_TRANS_ATRUNCATE 21
|
|
|
|
#define XFS_TRANS_ATTR_SET 22
|
|
|
|
#define XFS_TRANS_ATTR_RM 23
|
|
|
|
#define XFS_TRANS_ATTR_FLAG 24
|
|
|
|
#define XFS_TRANS_CLEAR_AGI_BUCKET 25
|
|
|
|
#define XFS_TRANS_QM_SBCHANGE 26
|
|
|
|
/*
|
|
|
|
* Dummy entries since we use the transaction type to index into the
|
|
|
|
* trans_type[] in xlog_recover_print_trans_head()
|
|
|
|
*/
|
|
|
|
#define XFS_TRANS_DUMMY1 27
|
|
|
|
#define XFS_TRANS_DUMMY2 28
|
|
|
|
#define XFS_TRANS_QM_QUOTAOFF 29
|
|
|
|
#define XFS_TRANS_QM_DQALLOC 30
|
|
|
|
#define XFS_TRANS_QM_SETQLIM 31
|
|
|
|
#define XFS_TRANS_QM_DQCLUSTER 32
|
|
|
|
#define XFS_TRANS_QM_QINOCREATE 33
|
|
|
|
#define XFS_TRANS_QM_QUOTAOFF_END 34
|
|
|
|
#define XFS_TRANS_SB_UNIT 35
|
|
|
|
#define XFS_TRANS_FSYNC_TS 36
|
|
|
|
#define XFS_TRANS_GROWFSRT_ALLOC 37
|
|
|
|
#define XFS_TRANS_GROWFSRT_ZERO 38
|
|
|
|
#define XFS_TRANS_GROWFSRT_FREE 39
|
|
|
|
#define XFS_TRANS_SWAPEXT 40
|
[XFS] Lazy Superblock Counters
When we have a couple of hundred transactions on the fly at once, they all
typically modify the on disk superblock in some way.
create/unclink/mkdir/rmdir modify inode counts, allocation/freeing modify
free block counts.
When these counts are modified in a transaction, they must eventually lock
the superblock buffer and apply the mods. The buffer then remains locked
until the transaction is committed into the incore log buffer. The result
of this is that with enough transactions on the fly the incore superblock
buffer becomes a bottleneck.
The result of contention on the incore superblock buffer is that
transaction rates fall - the more pressure that is put on the superblock
buffer, the slower things go.
The key to removing the contention is to not require the superblock fields
in question to be locked. We do that by not marking the superblock dirty
in the transaction. IOWs, we modify the incore superblock but do not
modify the cached superblock buffer. In short, we do not log superblock
modifications to critical fields in the superblock on every transaction.
In fact we only do it just before we write the superblock to disk every
sync period or just before unmount.
This creates an interesting problem - if we don't log or write out the
fields in every transaction, then how do the values get recovered after a
crash? the answer is simple - we keep enough duplicate, logged information
in other structures that we can reconstruct the correct count after log
recovery has been performed.
It is the AGF and AGI structures that contain the duplicate information;
after recovery, we walk every AGI and AGF and sum their individual
counters to get the correct value, and we do a transaction into the log to
correct them. An optimisation of this is that if we have a clean unmount
record, we know the value in the superblock is correct, so we can avoid
the summation walk under normal conditions and so mount/recovery times do
not change under normal operation.
One wrinkle that was discovered during development was that the blocks
used in the freespace btrees are never accounted for in the AGF counters.
This was once a valid optimisation to make; when the filesystem is full,
the free space btrees are empty and consume no space. Hence when it
matters, the "accounting" is correct. But that means the when we do the
AGF summations, we would not have a correct count and xfs_check would
complain. Hence a new counter was added to track the number of blocks used
by the free space btrees. This is an *on-disk format change*.
As a result of this, lazy superblock counters are a mkfs option and at the
moment on linux there is no way to convert an old filesystem. This is
possible - xfs_db can be used to twiddle the right bits and then
xfs_repair will do the format conversion for you. Similarly, you can
convert backwards as well. At some point we'll add functionality to
xfs_admin to do the bit twiddling easily....
SGI-PV: 964999
SGI-Modid: xfs-linux-melb:xfs-kern:28652a
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Tim Shimmin <tes@sgi.com>
2007-05-24 09:26:31 +04:00
|
|
|
#define XFS_TRANS_SB_COUNT 41
|
|
|
|
#define XFS_TRANS_TYPE_MAX 41
|
2005-04-17 02:20:36 +04:00
|
|
|
/* new transaction types need to be reflected in xfs_logprint(8) */
|
|
|
|
|
2009-12-15 02:14:59 +03:00
|
|
|
#define XFS_TRANS_TYPES \
|
|
|
|
{ XFS_TRANS_SETATTR_NOT_SIZE, "SETATTR_NOT_SIZE" }, \
|
|
|
|
{ XFS_TRANS_SETATTR_SIZE, "SETATTR_SIZE" }, \
|
|
|
|
{ XFS_TRANS_INACTIVE, "INACTIVE" }, \
|
|
|
|
{ XFS_TRANS_CREATE, "CREATE" }, \
|
|
|
|
{ XFS_TRANS_CREATE_TRUNC, "CREATE_TRUNC" }, \
|
|
|
|
{ XFS_TRANS_TRUNCATE_FILE, "TRUNCATE_FILE" }, \
|
|
|
|
{ XFS_TRANS_REMOVE, "REMOVE" }, \
|
|
|
|
{ XFS_TRANS_LINK, "LINK" }, \
|
|
|
|
{ XFS_TRANS_RENAME, "RENAME" }, \
|
|
|
|
{ XFS_TRANS_MKDIR, "MKDIR" }, \
|
|
|
|
{ XFS_TRANS_RMDIR, "RMDIR" }, \
|
|
|
|
{ XFS_TRANS_SYMLINK, "SYMLINK" }, \
|
|
|
|
{ XFS_TRANS_SET_DMATTRS, "SET_DMATTRS" }, \
|
|
|
|
{ XFS_TRANS_GROWFS, "GROWFS" }, \
|
|
|
|
{ XFS_TRANS_STRAT_WRITE, "STRAT_WRITE" }, \
|
|
|
|
{ XFS_TRANS_DIOSTRAT, "DIOSTRAT" }, \
|
|
|
|
{ XFS_TRANS_WRITEID, "WRITEID" }, \
|
|
|
|
{ XFS_TRANS_ADDAFORK, "ADDAFORK" }, \
|
|
|
|
{ XFS_TRANS_ATTRINVAL, "ATTRINVAL" }, \
|
|
|
|
{ XFS_TRANS_ATRUNCATE, "ATRUNCATE" }, \
|
|
|
|
{ XFS_TRANS_ATTR_SET, "ATTR_SET" }, \
|
|
|
|
{ XFS_TRANS_ATTR_RM, "ATTR_RM" }, \
|
|
|
|
{ XFS_TRANS_ATTR_FLAG, "ATTR_FLAG" }, \
|
|
|
|
{ XFS_TRANS_CLEAR_AGI_BUCKET, "CLEAR_AGI_BUCKET" }, \
|
|
|
|
{ XFS_TRANS_QM_SBCHANGE, "QM_SBCHANGE" }, \
|
|
|
|
{ XFS_TRANS_QM_QUOTAOFF, "QM_QUOTAOFF" }, \
|
|
|
|
{ XFS_TRANS_QM_DQALLOC, "QM_DQALLOC" }, \
|
|
|
|
{ XFS_TRANS_QM_SETQLIM, "QM_SETQLIM" }, \
|
|
|
|
{ XFS_TRANS_QM_DQCLUSTER, "QM_DQCLUSTER" }, \
|
|
|
|
{ XFS_TRANS_QM_QINOCREATE, "QM_QINOCREATE" }, \
|
|
|
|
{ XFS_TRANS_QM_QUOTAOFF_END, "QM_QOFF_END" }, \
|
|
|
|
{ XFS_TRANS_SB_UNIT, "SB_UNIT" }, \
|
|
|
|
{ XFS_TRANS_FSYNC_TS, "FSYNC_TS" }, \
|
|
|
|
{ XFS_TRANS_GROWFSRT_ALLOC, "GROWFSRT_ALLOC" }, \
|
|
|
|
{ XFS_TRANS_GROWFSRT_ZERO, "GROWFSRT_ZERO" }, \
|
|
|
|
{ XFS_TRANS_GROWFSRT_FREE, "GROWFSRT_FREE" }, \
|
|
|
|
{ XFS_TRANS_SWAPEXT, "SWAPEXT" }, \
|
|
|
|
{ XFS_TRANS_SB_COUNT, "SB_COUNT" }, \
|
|
|
|
{ XFS_TRANS_DUMMY1, "DUMMY1" }, \
|
|
|
|
{ XFS_TRANS_DUMMY2, "DUMMY2" }, \
|
|
|
|
{ XLOG_UNMOUNT_REC_TYPE, "UNMOUNT" }
|
|
|
|
|
2005-04-17 02:20:36 +04:00
|
|
|
/*
|
|
|
|
* This structure is used to track log items associated with
|
|
|
|
* a transaction. It points to the log item and keeps some
|
|
|
|
* flags to track the state of the log item. It also tracks
|
|
|
|
* the amount of space needed to log the item it describes
|
|
|
|
* once we get to commit processing (see xfs_trans_commit()).
|
|
|
|
*/
|
|
|
|
typedef struct xfs_log_item_desc {
|
2008-10-30 09:05:38 +03:00
|
|
|
struct xfs_log_item *lid_item;
|
2005-04-17 02:20:36 +04:00
|
|
|
ushort lid_size;
|
|
|
|
unsigned char lid_flags;
|
|
|
|
unsigned char lid_index;
|
|
|
|
} xfs_log_item_desc_t;
|
|
|
|
|
|
|
|
#define XFS_LID_DIRTY 0x1
|
|
|
|
#define XFS_LID_PINNED 0x2
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This structure is used to maintain a chunk list of log_item_desc
|
|
|
|
* structures. The free field is a bitmask indicating which descriptors
|
|
|
|
* in this chunk's array are free. The unused field is the first value
|
|
|
|
* not used since this chunk was allocated.
|
|
|
|
*/
|
|
|
|
#define XFS_LIC_NUM_SLOTS 15
|
|
|
|
typedef struct xfs_log_item_chunk {
|
|
|
|
struct xfs_log_item_chunk *lic_next;
|
|
|
|
ushort lic_free;
|
|
|
|
ushort lic_unused;
|
|
|
|
xfs_log_item_desc_t lic_descs[XFS_LIC_NUM_SLOTS];
|
|
|
|
} xfs_log_item_chunk_t;
|
|
|
|
|
|
|
|
#define XFS_LIC_MAX_SLOT (XFS_LIC_NUM_SLOTS - 1)
|
|
|
|
#define XFS_LIC_FREEMASK ((1 << XFS_LIC_NUM_SLOTS) - 1)
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize the given chunk. Set the chunk's free descriptor mask
|
|
|
|
* to indicate that all descriptors are free. The caller gets to set
|
|
|
|
* lic_unused to the right value (0 matches all free). The
|
|
|
|
* lic_descs.lid_index values are set up as each desc is allocated.
|
|
|
|
*/
|
2005-11-02 06:38:42 +03:00
|
|
|
static inline void xfs_lic_init(xfs_log_item_chunk_t *cp)
|
|
|
|
{
|
|
|
|
cp->lic_free = XFS_LIC_FREEMASK;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void xfs_lic_init_slot(xfs_log_item_chunk_t *cp, int slot)
|
|
|
|
{
|
|
|
|
cp->lic_descs[slot].lid_index = (unsigned char)(slot);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int xfs_lic_vacancy(xfs_log_item_chunk_t *cp)
|
|
|
|
{
|
|
|
|
return cp->lic_free & XFS_LIC_FREEMASK;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void xfs_lic_all_free(xfs_log_item_chunk_t *cp)
|
|
|
|
{
|
|
|
|
cp->lic_free = XFS_LIC_FREEMASK;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int xfs_lic_are_all_free(xfs_log_item_chunk_t *cp)
|
|
|
|
{
|
|
|
|
return ((cp->lic_free & XFS_LIC_FREEMASK) == XFS_LIC_FREEMASK);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int xfs_lic_isfree(xfs_log_item_chunk_t *cp, int slot)
|
|
|
|
{
|
|
|
|
return (cp->lic_free & (1 << slot));
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void xfs_lic_claim(xfs_log_item_chunk_t *cp, int slot)
|
|
|
|
{
|
|
|
|
cp->lic_free &= ~(1 << slot);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void xfs_lic_relse(xfs_log_item_chunk_t *cp, int slot)
|
|
|
|
{
|
|
|
|
cp->lic_free |= 1 << slot;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline xfs_log_item_desc_t *
|
|
|
|
xfs_lic_slot(xfs_log_item_chunk_t *cp, int slot)
|
|
|
|
{
|
|
|
|
return &(cp->lic_descs[slot]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int xfs_lic_desc_to_slot(xfs_log_item_desc_t *dp)
|
|
|
|
{
|
|
|
|
return (uint)dp->lid_index;
|
|
|
|
}
|
|
|
|
|
2005-04-17 02:20:36 +04:00
|
|
|
/*
|
|
|
|
* Calculate the address of a chunk given a descriptor pointer:
|
|
|
|
* dp - dp->lid_index give the address of the start of the lic_descs array.
|
|
|
|
* From this we subtract the offset of the lic_descs field in a chunk.
|
|
|
|
* All of this yields the address of the chunk, which is
|
|
|
|
* cast to a chunk pointer.
|
|
|
|
*/
|
2005-11-02 06:38:42 +03:00
|
|
|
static inline xfs_log_item_chunk_t *
|
|
|
|
xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
|
|
|
|
{
|
|
|
|
return (xfs_log_item_chunk_t*) \
|
|
|
|
(((xfs_caddr_t)((dp) - (dp)->lid_index)) - \
|
|
|
|
(xfs_caddr_t)(((xfs_log_item_chunk_t*)0)->lic_descs));
|
|
|
|
}
|
2005-04-17 02:20:36 +04:00
|
|
|
|
|
|
|
#define XFS_TRANS_MAGIC 0x5452414E /* 'TRAN' */
|
|
|
|
/*
|
|
|
|
* Values for t_flags.
|
|
|
|
*/
|
|
|
|
#define XFS_TRANS_DIRTY 0x01 /* something needs to be logged */
|
|
|
|
#define XFS_TRANS_SB_DIRTY 0x02 /* superblock is modified */
|
|
|
|
#define XFS_TRANS_PERM_LOG_RES 0x04 /* xact took a permanent log res */
|
|
|
|
#define XFS_TRANS_SYNC 0x08 /* make commit synchronous */
|
|
|
|
#define XFS_TRANS_DQ_DIRTY 0x10 /* at least one dquot in trx dirty */
|
|
|
|
#define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Values for call flags parameter.
|
|
|
|
*/
|
|
|
|
#define XFS_TRANS_NOSLEEP 0x1
|
|
|
|
#define XFS_TRANS_WAIT 0x2
|
|
|
|
#define XFS_TRANS_RELEASE_LOG_RES 0x4
|
|
|
|
#define XFS_TRANS_ABORT 0x8
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Field values for xfs_trans_mod_sb.
|
|
|
|
*/
|
|
|
|
#define XFS_TRANS_SB_ICOUNT 0x00000001
|
|
|
|
#define XFS_TRANS_SB_IFREE 0x00000002
|
|
|
|
#define XFS_TRANS_SB_FDBLOCKS 0x00000004
|
|
|
|
#define XFS_TRANS_SB_RES_FDBLOCKS 0x00000008
|
|
|
|
#define XFS_TRANS_SB_FREXTENTS 0x00000010
|
|
|
|
#define XFS_TRANS_SB_RES_FREXTENTS 0x00000020
|
|
|
|
#define XFS_TRANS_SB_DBLOCKS 0x00000040
|
|
|
|
#define XFS_TRANS_SB_AGCOUNT 0x00000080
|
|
|
|
#define XFS_TRANS_SB_IMAXPCT 0x00000100
|
|
|
|
#define XFS_TRANS_SB_REXTSIZE 0x00000200
|
|
|
|
#define XFS_TRANS_SB_RBMBLOCKS 0x00000400
|
|
|
|
#define XFS_TRANS_SB_RBLOCKS 0x00000800
|
|
|
|
#define XFS_TRANS_SB_REXTENTS 0x00001000
|
|
|
|
#define XFS_TRANS_SB_REXTSLOG 0x00002000
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Various log reservation values.
|
|
|
|
* These are based on the size of the file system block
|
|
|
|
* because that is what most transactions manipulate.
|
|
|
|
* Each adds in an additional 128 bytes per item logged to
|
|
|
|
* try to account for the overhead of the transaction mechanism.
|
|
|
|
*
|
|
|
|
* Note:
|
|
|
|
* Most of the reservations underestimate the number of allocation
|
|
|
|
* groups into which they could free extents in the xfs_bmap_finish()
|
|
|
|
* call. This is because the number in the worst case is quite high
|
|
|
|
* and quite unusual. In order to fix this we need to change
|
|
|
|
* xfs_bmap_finish() to free extents in only a single AG at a time.
|
|
|
|
* This will require changes to the EFI code as well, however, so that
|
|
|
|
* the EFI for the extents not freed is logged again in each transaction.
|
|
|
|
* See bug 261917.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Per-extent log reservation for the allocation btree changes
|
|
|
|
* involved in freeing or allocating an extent.
|
|
|
|
* 2 trees * (2 blocks/level * max depth - 1) * block size
|
|
|
|
*/
|
|
|
|
#define XFS_ALLOCFREE_LOG_RES(mp,nx) \
|
|
|
|
((nx) * (2 * XFS_FSB_TO_B((mp), 2 * XFS_AG_MAXLEVELS(mp) - 1)))
|
|
|
|
#define XFS_ALLOCFREE_LOG_COUNT(mp,nx) \
|
|
|
|
((nx) * (2 * (2 * XFS_AG_MAXLEVELS(mp) - 1)))
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Per-directory log reservation for any directory change.
|
|
|
|
* dir blocks: (1 btree block per level + data block + free block) * dblock size
|
|
|
|
* bmap btree: (levels + 2) * max depth * block size
|
|
|
|
* v2 directory blocks can be fragmented below the dirblksize down to the fsb
|
|
|
|
* size, so account for that in the DAENTER macros.
|
|
|
|
*/
|
|
|
|
#define XFS_DIROP_LOG_RES(mp) \
|
|
|
|
(XFS_FSB_TO_B(mp, XFS_DAENTER_BLOCKS(mp, XFS_DATA_FORK)) + \
|
|
|
|
(XFS_FSB_TO_B(mp, XFS_DAENTER_BMAPS(mp, XFS_DATA_FORK) + 1)))
|
|
|
|
#define XFS_DIROP_LOG_COUNT(mp) \
|
|
|
|
(XFS_DAENTER_BLOCKS(mp, XFS_DATA_FORK) + \
|
|
|
|
XFS_DAENTER_BMAPS(mp, XFS_DATA_FORK) + 1)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* In a write transaction we can allocate a maximum of 2
|
|
|
|
* extents. This gives:
|
|
|
|
* the inode getting the new extents: inode size
|
2009-03-29 11:55:42 +04:00
|
|
|
* the inode's bmap btree: max depth * block size
|
2005-04-17 02:20:36 +04:00
|
|
|
* the agfs of the ags from which the extents are allocated: 2 * sector
|
|
|
|
* the superblock free block counter: sector size
|
|
|
|
* the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
|
|
|
|
* And the bmap_finish transaction can free bmap blocks in a join:
|
|
|
|
* the agfs of the ags containing the blocks: 2 * sector size
|
|
|
|
* the agfls of the ags containing the blocks: 2 * sector size
|
|
|
|
* the super block free block counter: sector size
|
|
|
|
* the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
|
|
|
|
*/
|
|
|
|
#define XFS_CALC_WRITE_LOG_RES(mp) \
|
|
|
|
(MAX( \
|
|
|
|
((mp)->m_sb.sb_inodesize + \
|
|
|
|
XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + \
|
|
|
|
(2 * (mp)->m_sb.sb_sectsize) + \
|
|
|
|
(mp)->m_sb.sb_sectsize + \
|
|
|
|
XFS_ALLOCFREE_LOG_RES(mp, 2) + \
|
|
|
|
(128 * (4 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + XFS_ALLOCFREE_LOG_COUNT(mp, 2)))),\
|
|
|
|
((2 * (mp)->m_sb.sb_sectsize) + \
|
|
|
|
(2 * (mp)->m_sb.sb_sectsize) + \
|
|
|
|
(mp)->m_sb.sb_sectsize + \
|
|
|
|
XFS_ALLOCFREE_LOG_RES(mp, 2) + \
|
|
|
|
(128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))))))
|
|
|
|
|
|
|
|
#define XFS_WRITE_LOG_RES(mp) ((mp)->m_reservations.tr_write)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* In truncating a file we free up to two extents at once. We can modify:
|
|
|
|
* the inode being truncated: inode size
|
2009-03-29 11:55:42 +04:00
|
|
|
* the inode's bmap btree: (max depth + 1) * block size
|
2005-04-17 02:20:36 +04:00
|
|
|
* And the bmap_finish transaction can free the blocks and bmap blocks:
|
|
|
|
* the agf for each of the ags: 4 * sector size
|
|
|
|
* the agfl for each of the ags: 4 * sector size
|
|
|
|
* the super block to reflect the freed blocks: sector size
|
|
|
|
* worst case split in allocation btrees per extent assuming 4 extents:
|
|
|
|
* 4 exts * 2 trees * (2 * max depth - 1) * block size
|
|
|
|
* the inode btree: max depth * blocksize
|
|
|
|
* the allocation btrees: 2 trees * (max depth - 1) * block size
|
|
|
|
*/
|
|
|
|
#define XFS_CALC_ITRUNCATE_LOG_RES(mp) \
|
|
|
|
(MAX( \
|
|
|
|
((mp)->m_sb.sb_inodesize + \
|
|
|
|
XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1) + \
|
|
|
|
(128 * (2 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)))), \
|
|
|
|
((4 * (mp)->m_sb.sb_sectsize) + \
|
|
|
|
(4 * (mp)->m_sb.sb_sectsize) + \
|
|
|
|
(mp)->m_sb.sb_sectsize + \
|
|
|
|
XFS_ALLOCFREE_LOG_RES(mp, 4) + \
|
|
|
|
(128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))) + \
|
|
|
|
(128 * 5) + \
|
|
|
|
XFS_ALLOCFREE_LOG_RES(mp, 1) + \
|
2009-02-09 10:37:14 +03:00
|
|
|
(128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \
|
2005-04-17 02:20:36 +04:00
|
|
|
XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
|
|
|
|
|
|
|
|
#define XFS_ITRUNCATE_LOG_RES(mp) ((mp)->m_reservations.tr_itruncate)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* In renaming a files we can modify:
|
|
|
|
* the four inodes involved: 4 * inode size
|
|
|
|
* the two directory btrees: 2 * (max depth + v2) * dir block size
|
|
|
|
* the two directory bmap btrees: 2 * max depth * block size
|
|
|
|
* And the bmap_finish transaction can free dir and bmap blocks (two sets
|
|
|
|
* of bmap blocks) giving:
|
|
|
|
* the agf for the ags in which the blocks live: 3 * sector size
|
|
|
|
* the agfl for the ags in which the blocks live: 3 * sector size
|
|
|
|
* the superblock for the free block count: sector size
|
|
|
|
* the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size
|
|
|
|
*/
|
|
|
|
#define XFS_CALC_RENAME_LOG_RES(mp) \
|
|
|
|
(MAX( \
|
|
|
|
((4 * (mp)->m_sb.sb_inodesize) + \
|
|
|
|
(2 * XFS_DIROP_LOG_RES(mp)) + \
|
|
|
|
(128 * (4 + 2 * XFS_DIROP_LOG_COUNT(mp)))), \
|
|
|
|
((3 * (mp)->m_sb.sb_sectsize) + \
|
|
|
|
(3 * (mp)->m_sb.sb_sectsize) + \
|
|
|
|
(mp)->m_sb.sb_sectsize + \
|
|
|
|
XFS_ALLOCFREE_LOG_RES(mp, 3) + \
|
|
|
|
(128 * (7 + XFS_ALLOCFREE_LOG_COUNT(mp, 3))))))
|
|
|
|
|
|
|
|
#define XFS_RENAME_LOG_RES(mp) ((mp)->m_reservations.tr_rename)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* For creating a link to an inode:
|
|
|
|
* the parent directory inode: inode size
|
|
|
|
* the linked inode: inode size
|
|
|
|
* the directory btree could split: (max depth + v2) * dir block size
|
|
|
|
* the directory bmap btree could join or split: (max depth + v2) * blocksize
|
|
|
|
* And the bmap_finish transaction can free some bmap blocks giving:
|
|
|
|
* the agf for the ag in which the blocks live: sector size
|
|
|
|
* the agfl for the ag in which the blocks live: sector size
|
|
|
|
* the superblock for the free block count: sector size
|
|
|
|
* the allocation btrees: 2 trees * (2 * max depth - 1) * block size
|
|
|
|
*/
|
|
|
|
#define XFS_CALC_LINK_LOG_RES(mp) \
|
|
|
|
(MAX( \
|
|
|
|
((mp)->m_sb.sb_inodesize + \
|
|
|
|
(mp)->m_sb.sb_inodesize + \
|
|
|
|
XFS_DIROP_LOG_RES(mp) + \
|
|
|
|
(128 * (2 + XFS_DIROP_LOG_COUNT(mp)))), \
|
|
|
|
((mp)->m_sb.sb_sectsize + \
|
|
|
|
(mp)->m_sb.sb_sectsize + \
|
|
|
|
(mp)->m_sb.sb_sectsize + \
|
|
|
|
XFS_ALLOCFREE_LOG_RES(mp, 1) + \
|
|
|
|
(128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
|
|
|
|
|
|
|
|
#define XFS_LINK_LOG_RES(mp) ((mp)->m_reservations.tr_link)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* For removing a directory entry we can modify:
|
|
|
|
* the parent directory inode: inode size
|
|
|
|
* the removed inode: inode size
|
|
|
|
* the directory btree could join: (max depth + v2) * dir block size
|
|
|
|
* the directory bmap btree could join or split: (max depth + v2) * blocksize
|
|
|
|
* And the bmap_finish transaction can free the dir and bmap blocks giving:
|
|
|
|
* the agf for the ag in which the blocks live: 2 * sector size
|
|
|
|
* the agfl for the ag in which the blocks live: 2 * sector size
|
|
|
|
* the superblock for the free block count: sector size
|
|
|
|
* the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
|
|
|
|
*/
|
|
|
|
#define XFS_CALC_REMOVE_LOG_RES(mp) \
|
|
|
|
(MAX( \
|
|
|
|
((mp)->m_sb.sb_inodesize + \
|
|
|
|
(mp)->m_sb.sb_inodesize + \
|
|
|
|
XFS_DIROP_LOG_RES(mp) + \
|
|
|
|
(128 * (2 + XFS_DIROP_LOG_COUNT(mp)))), \
|
|
|
|
((2 * (mp)->m_sb.sb_sectsize) + \
|
|
|
|
(2 * (mp)->m_sb.sb_sectsize) + \
|
|
|
|
(mp)->m_sb.sb_sectsize + \
|
|
|
|
XFS_ALLOCFREE_LOG_RES(mp, 2) + \
|
|
|
|
(128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))))))
|
|
|
|
|
|
|
|
#define XFS_REMOVE_LOG_RES(mp) ((mp)->m_reservations.tr_remove)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* For symlink we can modify:
|
|
|
|
* the parent directory inode: inode size
|
|
|
|
* the new inode: inode size
|
|
|
|
* the inode btree entry: 1 block
|
|
|
|
* the directory btree: (max depth + v2) * dir block size
|
2009-03-29 11:55:42 +04:00
|
|
|
* the directory inode's bmap btree: (max depth + v2) * block size
|
|
|
|
* the blocks for the symlink: 1 kB
|
2005-04-17 02:20:36 +04:00
|
|
|
* Or in the first xact we allocate some inodes giving:
|
|
|
|
* the agi and agf of the ag getting the new inodes: 2 * sectorsize
|
|
|
|
* the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
|
|
|
|
* the inode btree: max depth * blocksize
|
|
|
|
* the allocation btrees: 2 trees * (2 * max depth - 1) * block size
|
|
|
|
*/
|
|
|
|
#define XFS_CALC_SYMLINK_LOG_RES(mp) \
|
|
|
|
(MAX( \
|
|
|
|
((mp)->m_sb.sb_inodesize + \
|
|
|
|
(mp)->m_sb.sb_inodesize + \
|
|
|
|
XFS_FSB_TO_B(mp, 1) + \
|
|
|
|
XFS_DIROP_LOG_RES(mp) + \
|
|
|
|
1024 + \
|
|
|
|
(128 * (4 + XFS_DIROP_LOG_COUNT(mp)))), \
|
|
|
|
(2 * (mp)->m_sb.sb_sectsize + \
|
|
|
|
XFS_FSB_TO_B((mp), XFS_IALLOC_BLOCKS((mp))) + \
|
2009-02-09 10:37:14 +03:00
|
|
|
XFS_FSB_TO_B((mp), (mp)->m_in_maxlevels) + \
|
2005-04-17 02:20:36 +04:00
|
|
|
XFS_ALLOCFREE_LOG_RES(mp, 1) + \
|
2009-02-09 10:37:14 +03:00
|
|
|
(128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \
|
2005-04-17 02:20:36 +04:00
|
|
|
XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
|
|
|
|
|
|
|
|
#define XFS_SYMLINK_LOG_RES(mp) ((mp)->m_reservations.tr_symlink)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* For create we can modify:
|
|
|
|
* the parent directory inode: inode size
|
|
|
|
* the new inode: inode size
|
|
|
|
* the inode btree entry: block size
|
|
|
|
* the superblock for the nlink flag: sector size
|
|
|
|
* the directory btree: (max depth + v2) * dir block size
|
2009-03-29 11:55:42 +04:00
|
|
|
* the directory inode's bmap btree: (max depth + v2) * block size
|
2005-04-17 02:20:36 +04:00
|
|
|
* Or in the first xact we allocate some inodes giving:
|
|
|
|
* the agi and agf of the ag getting the new inodes: 2 * sectorsize
|
|
|
|
* the superblock for the nlink flag: sector size
|
|
|
|
* the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
|
|
|
|
* the inode btree: max depth * blocksize
|
|
|
|
* the allocation btrees: 2 trees * (max depth - 1) * block size
|
|
|
|
*/
|
|
|
|
#define XFS_CALC_CREATE_LOG_RES(mp) \
|
|
|
|
(MAX( \
|
|
|
|
((mp)->m_sb.sb_inodesize + \
|
|
|
|
(mp)->m_sb.sb_inodesize + \
|
|
|
|
(mp)->m_sb.sb_sectsize + \
|
|
|
|
XFS_FSB_TO_B(mp, 1) + \
|
|
|
|
XFS_DIROP_LOG_RES(mp) + \
|
|
|
|
(128 * (3 + XFS_DIROP_LOG_COUNT(mp)))), \
|
|
|
|
(3 * (mp)->m_sb.sb_sectsize + \
|
|
|
|
XFS_FSB_TO_B((mp), XFS_IALLOC_BLOCKS((mp))) + \
|
2009-02-09 10:37:14 +03:00
|
|
|
XFS_FSB_TO_B((mp), (mp)->m_in_maxlevels) + \
|
2005-04-17 02:20:36 +04:00
|
|
|
XFS_ALLOCFREE_LOG_RES(mp, 1) + \
|
2009-02-09 10:37:14 +03:00
|
|
|
(128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \
|
2005-04-17 02:20:36 +04:00
|
|
|
XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
|
|
|
|
|
|
|
|
#define XFS_CREATE_LOG_RES(mp) ((mp)->m_reservations.tr_create)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Making a new directory is the same as creating a new file.
|
|
|
|
*/
|
|
|
|
#define XFS_CALC_MKDIR_LOG_RES(mp) XFS_CALC_CREATE_LOG_RES(mp)
|
|
|
|
|
|
|
|
#define XFS_MKDIR_LOG_RES(mp) ((mp)->m_reservations.tr_mkdir)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* In freeing an inode we can modify:
|
|
|
|
* the inode being freed: inode size
|
|
|
|
* the super block free inode counter: sector size
|
|
|
|
* the agi hash list and counters: sector size
|
|
|
|
* the inode btree entry: block size
|
|
|
|
* the on disk inode before ours in the agi hash list: inode cluster size
|
|
|
|
* the inode btree: max depth * blocksize
|
|
|
|
* the allocation btrees: 2 trees * (max depth - 1) * block size
|
|
|
|
*/
|
|
|
|
#define XFS_CALC_IFREE_LOG_RES(mp) \
|
|
|
|
((mp)->m_sb.sb_inodesize + \
|
|
|
|
(mp)->m_sb.sb_sectsize + \
|
|
|
|
(mp)->m_sb.sb_sectsize + \
|
|
|
|
XFS_FSB_TO_B((mp), 1) + \
|
|
|
|
MAX((__uint16_t)XFS_FSB_TO_B((mp), 1), XFS_INODE_CLUSTER_SIZE(mp)) + \
|
|
|
|
(128 * 5) + \
|
|
|
|
XFS_ALLOCFREE_LOG_RES(mp, 1) + \
|
2009-02-09 10:37:14 +03:00
|
|
|
(128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \
|
2005-04-17 02:20:36 +04:00
|
|
|
XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
|
|
|
|
|
|
|
|
|
|
|
|
#define XFS_IFREE_LOG_RES(mp) ((mp)->m_reservations.tr_ifree)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* When only changing the inode we log the inode and possibly the superblock
|
|
|
|
* We also add a bit of slop for the transaction stuff.
|
|
|
|
*/
|
|
|
|
#define XFS_CALC_ICHANGE_LOG_RES(mp) ((mp)->m_sb.sb_inodesize + \
|
|
|
|
(mp)->m_sb.sb_sectsize + 512)
|
|
|
|
|
|
|
|
#define XFS_ICHANGE_LOG_RES(mp) ((mp)->m_reservations.tr_ichange)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Growing the data section of the filesystem.
|
|
|
|
* superblock
|
|
|
|
* agi and agf
|
|
|
|
* allocation btrees
|
|
|
|
*/
|
|
|
|
#define XFS_CALC_GROWDATA_LOG_RES(mp) \
|
|
|
|
((mp)->m_sb.sb_sectsize * 3 + \
|
|
|
|
XFS_ALLOCFREE_LOG_RES(mp, 1) + \
|
|
|
|
(128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
|
|
|
|
|
|
|
|
#define XFS_GROWDATA_LOG_RES(mp) ((mp)->m_reservations.tr_growdata)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Growing the rt section of the filesystem.
|
|
|
|
* In the first set of transactions (ALLOC) we allocate space to the
|
|
|
|
* bitmap or summary files.
|
|
|
|
* superblock: sector size
|
|
|
|
* agf of the ag from which the extent is allocated: sector size
|
|
|
|
* bmap btree for bitmap/summary inode: max depth * blocksize
|
|
|
|
* bitmap/summary inode: inode size
|
|
|
|
* allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize
|
|
|
|
*/
|
|
|
|
#define XFS_CALC_GROWRTALLOC_LOG_RES(mp) \
|
|
|
|
(2 * (mp)->m_sb.sb_sectsize + \
|
|
|
|
XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + \
|
|
|
|
(mp)->m_sb.sb_inodesize + \
|
|
|
|
XFS_ALLOCFREE_LOG_RES(mp, 1) + \
|
|
|
|
(128 * \
|
|
|
|
(3 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + \
|
|
|
|
XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
|
|
|
|
|
|
|
|
#define XFS_GROWRTALLOC_LOG_RES(mp) ((mp)->m_reservations.tr_growrtalloc)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Growing the rt section of the filesystem.
|
|
|
|
* In the second set of transactions (ZERO) we zero the new metadata blocks.
|
|
|
|
* one bitmap/summary block: blocksize
|
|
|
|
*/
|
|
|
|
#define XFS_CALC_GROWRTZERO_LOG_RES(mp) \
|
|
|
|
((mp)->m_sb.sb_blocksize + 128)
|
|
|
|
|
|
|
|
#define XFS_GROWRTZERO_LOG_RES(mp) ((mp)->m_reservations.tr_growrtzero)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Growing the rt section of the filesystem.
|
|
|
|
* In the third set of transactions (FREE) we update metadata without
|
|
|
|
* allocating any new blocks.
|
|
|
|
* superblock: sector size
|
|
|
|
* bitmap inode: inode size
|
|
|
|
* summary inode: inode size
|
|
|
|
* one bitmap block: blocksize
|
|
|
|
* summary blocks: new summary size
|
|
|
|
*/
|
|
|
|
#define XFS_CALC_GROWRTFREE_LOG_RES(mp) \
|
|
|
|
((mp)->m_sb.sb_sectsize + \
|
|
|
|
2 * (mp)->m_sb.sb_inodesize + \
|
|
|
|
(mp)->m_sb.sb_blocksize + \
|
|
|
|
(mp)->m_rsumsize + \
|
|
|
|
(128 * 5))
|
|
|
|
|
|
|
|
#define XFS_GROWRTFREE_LOG_RES(mp) ((mp)->m_reservations.tr_growrtfree)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Logging the inode modification timestamp on a synchronous write.
|
|
|
|
* inode
|
|
|
|
*/
|
|
|
|
#define XFS_CALC_SWRITE_LOG_RES(mp) \
|
|
|
|
((mp)->m_sb.sb_inodesize + 128)
|
|
|
|
|
|
|
|
#define XFS_SWRITE_LOG_RES(mp) ((mp)->m_reservations.tr_swrite)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Logging the inode timestamps on an fsync -- same as SWRITE
|
|
|
|
* as long as SWRITE logs the entire inode core
|
|
|
|
*/
|
|
|
|
#define XFS_FSYNC_TS_LOG_RES(mp) ((mp)->m_reservations.tr_swrite)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Logging the inode mode bits when writing a setuid/setgid file
|
|
|
|
* inode
|
|
|
|
*/
|
|
|
|
#define XFS_CALC_WRITEID_LOG_RES(mp) \
|
|
|
|
((mp)->m_sb.sb_inodesize + 128)
|
|
|
|
|
|
|
|
#define XFS_WRITEID_LOG_RES(mp) ((mp)->m_reservations.tr_swrite)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Converting the inode from non-attributed to attributed.
|
|
|
|
* the inode being converted: inode size
|
|
|
|
* agf block and superblock (for block allocation)
|
|
|
|
* the new block (directory sized)
|
|
|
|
* bmap blocks for the new directory block
|
|
|
|
* allocation btrees
|
|
|
|
*/
|
|
|
|
#define XFS_CALC_ADDAFORK_LOG_RES(mp) \
|
|
|
|
((mp)->m_sb.sb_inodesize + \
|
|
|
|
(mp)->m_sb.sb_sectsize * 2 + \
|
|
|
|
(mp)->m_dirblksize + \
|
2006-06-20 07:04:51 +04:00
|
|
|
XFS_FSB_TO_B(mp, (XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1)) + \
|
2005-04-17 02:20:36 +04:00
|
|
|
XFS_ALLOCFREE_LOG_RES(mp, 1) + \
|
2006-06-20 07:04:51 +04:00
|
|
|
(128 * (4 + (XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1) + \
|
2005-04-17 02:20:36 +04:00
|
|
|
XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
|
|
|
|
|
|
|
|
#define XFS_ADDAFORK_LOG_RES(mp) ((mp)->m_reservations.tr_addafork)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Removing the attribute fork of a file
|
|
|
|
* the inode being truncated: inode size
|
2009-03-29 11:55:42 +04:00
|
|
|
* the inode's bmap btree: max depth * block size
|
2005-04-17 02:20:36 +04:00
|
|
|
* And the bmap_finish transaction can free the blocks and bmap blocks:
|
|
|
|
* the agf for each of the ags: 4 * sector size
|
|
|
|
* the agfl for each of the ags: 4 * sector size
|
|
|
|
* the super block to reflect the freed blocks: sector size
|
|
|
|
* worst case split in allocation btrees per extent assuming 4 extents:
|
|
|
|
* 4 exts * 2 trees * (2 * max depth - 1) * block size
|
|
|
|
*/
|
|
|
|
#define XFS_CALC_ATTRINVAL_LOG_RES(mp) \
|
|
|
|
(MAX( \
|
|
|
|
((mp)->m_sb.sb_inodesize + \
|
|
|
|
XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + \
|
|
|
|
(128 * (1 + XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)))), \
|
|
|
|
((4 * (mp)->m_sb.sb_sectsize) + \
|
|
|
|
(4 * (mp)->m_sb.sb_sectsize) + \
|
|
|
|
(mp)->m_sb.sb_sectsize + \
|
|
|
|
XFS_ALLOCFREE_LOG_RES(mp, 4) + \
|
|
|
|
(128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))))))
|
|
|
|
|
|
|
|
#define XFS_ATTRINVAL_LOG_RES(mp) ((mp)->m_reservations.tr_attrinval)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Setting an attribute.
|
|
|
|
* the inode getting the attribute
|
|
|
|
* the superblock for allocations
|
|
|
|
* the agfs extents are allocated from
|
|
|
|
* the attribute btree * max depth
|
|
|
|
* the inode allocation btree
|
|
|
|
* Since attribute transaction space is dependent on the size of the attribute,
|
|
|
|
* the calculation is done partially at mount time and partially at runtime.
|
|
|
|
*/
|
|
|
|
#define XFS_CALC_ATTRSET_LOG_RES(mp) \
|
|
|
|
((mp)->m_sb.sb_inodesize + \
|
|
|
|
(mp)->m_sb.sb_sectsize + \
|
|
|
|
XFS_FSB_TO_B((mp), XFS_DA_NODE_MAXDEPTH) + \
|
|
|
|
(128 * (2 + XFS_DA_NODE_MAXDEPTH)))
|
|
|
|
|
|
|
|
#define XFS_ATTRSET_LOG_RES(mp, ext) \
|
|
|
|
((mp)->m_reservations.tr_attrset + \
|
|
|
|
(ext * (mp)->m_sb.sb_sectsize) + \
|
|
|
|
(ext * XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))) + \
|
|
|
|
(128 * (ext + (ext * XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)))))
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Removing an attribute.
|
|
|
|
* the inode: inode size
|
|
|
|
* the attribute btree could join: max depth * block size
|
|
|
|
* the inode bmap btree could join or split: max depth * block size
|
|
|
|
* And the bmap_finish transaction can free the attr blocks freed giving:
|
|
|
|
* the agf for the ag in which the blocks live: 2 * sector size
|
|
|
|
* the agfl for the ag in which the blocks live: 2 * sector size
|
|
|
|
* the superblock for the free block count: sector size
|
|
|
|
* the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
|
|
|
|
*/
|
|
|
|
#define XFS_CALC_ATTRRM_LOG_RES(mp) \
|
|
|
|
(MAX( \
|
|
|
|
((mp)->m_sb.sb_inodesize + \
|
|
|
|
XFS_FSB_TO_B((mp), XFS_DA_NODE_MAXDEPTH) + \
|
|
|
|
XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + \
|
|
|
|
(128 * (1 + XFS_DA_NODE_MAXDEPTH + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)))), \
|
|
|
|
((2 * (mp)->m_sb.sb_sectsize) + \
|
|
|
|
(2 * (mp)->m_sb.sb_sectsize) + \
|
|
|
|
(mp)->m_sb.sb_sectsize + \
|
|
|
|
XFS_ALLOCFREE_LOG_RES(mp, 2) + \
|
|
|
|
(128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))))))
|
|
|
|
|
|
|
|
#define XFS_ATTRRM_LOG_RES(mp) ((mp)->m_reservations.tr_attrrm)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Clearing a bad agino number in an agi hash bucket.
|
|
|
|
*/
|
|
|
|
#define XFS_CALC_CLEAR_AGI_BUCKET_LOG_RES(mp) \
|
|
|
|
((mp)->m_sb.sb_sectsize + 128)
|
|
|
|
|
|
|
|
#define XFS_CLEAR_AGI_BUCKET_LOG_RES(mp) ((mp)->m_reservations.tr_clearagi)
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Various log count values.
|
|
|
|
*/
|
|
|
|
#define XFS_DEFAULT_LOG_COUNT 1
|
|
|
|
#define XFS_DEFAULT_PERM_LOG_COUNT 2
|
|
|
|
#define XFS_ITRUNCATE_LOG_COUNT 2
|
|
|
|
#define XFS_INACTIVE_LOG_COUNT 2
|
|
|
|
#define XFS_CREATE_LOG_COUNT 2
|
|
|
|
#define XFS_MKDIR_LOG_COUNT 3
|
|
|
|
#define XFS_SYMLINK_LOG_COUNT 3
|
|
|
|
#define XFS_REMOVE_LOG_COUNT 2
|
|
|
|
#define XFS_LINK_LOG_COUNT 2
|
|
|
|
#define XFS_RENAME_LOG_COUNT 2
|
|
|
|
#define XFS_WRITE_LOG_COUNT 2
|
|
|
|
#define XFS_ADDAFORK_LOG_COUNT 2
|
|
|
|
#define XFS_ATTRINVAL_LOG_COUNT 1
|
|
|
|
#define XFS_ATTRSET_LOG_COUNT 3
|
|
|
|
#define XFS_ATTRRM_LOG_COUNT 3
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Here we centralize the specification of XFS meta-data buffer
|
|
|
|
* reference count values. This determine how hard the buffer
|
|
|
|
* cache tries to hold onto the buffer.
|
|
|
|
*/
|
|
|
|
#define XFS_AGF_REF 4
|
|
|
|
#define XFS_AGI_REF 4
|
|
|
|
#define XFS_AGFL_REF 3
|
|
|
|
#define XFS_INO_BTREE_REF 3
|
|
|
|
#define XFS_ALLOC_BTREE_REF 2
|
|
|
|
#define XFS_BMAP_BTREE_REF 2
|
|
|
|
#define XFS_DIR_BTREE_REF 2
|
|
|
|
#define XFS_ATTR_BTREE_REF 1
|
|
|
|
#define XFS_INO_REF 1
|
|
|
|
#define XFS_DQUOT_REF 1
|
|
|
|
|
|
|
|
#ifdef __KERNEL__
|
2008-10-30 09:05:38 +03:00
|
|
|
|
|
|
|
struct xfs_buf;
|
|
|
|
struct xfs_buftarg;
|
|
|
|
struct xfs_efd_log_item;
|
|
|
|
struct xfs_efi_log_item;
|
|
|
|
struct xfs_inode;
|
|
|
|
struct xfs_item_ops;
|
|
|
|
struct xfs_log_iovec;
|
|
|
|
struct xfs_log_item_desc;
|
|
|
|
struct xfs_mount;
|
|
|
|
struct xfs_trans;
|
|
|
|
struct xfs_dquot_acct;
|
xfs: Improve scalability of busy extent tracking
When we free a metadata extent, we record it in the per-AG busy
extent array so that it is not re-used before the freeing
transaction hits the disk. This array is fixed size, so when it
overflows we make further allocation transactions synchronous
because we cannot track more freed extents until those transactions
hit the disk and are completed. Under heavy mixed allocation and
freeing workloads with large log buffers, we can overflow this array
quite easily.
Further, the array is sparsely populated, which means that inserts
need to search for a free slot, and array searches often have to
search many more slots that are actually used to check all the
busy extents. Quite inefficient, really.
To enable this aspect of extent freeing to scale better, we need
a structure that can grow dynamically. While in other areas of
XFS we have used radix trees, the extents being freed are at random
locations on disk so are better suited to being indexed by an rbtree.
So, use a per-AG rbtree indexed by block number to track busy
extents. This incures a memory allocation when marking an extent
busy, but should not occur too often in low memory situations. This
should scale to an arbitrary number of extents so should not be a
limitation for features such as in-memory aggregation of
transactions.
However, there are still situations where we can't avoid allocating
busy extents (such as allocation from the AGFL). To minimise the
overhead of such occurences, we need to avoid doing a synchronous
log force while holding the AGF locked to ensure that the previous
transactions are safely on disk before we use the extent. We can do
this by marking the transaction doing the allocation as synchronous
rather issuing a log force.
Because of the locking involved and the ordering of transactions,
the synchronous transaction provides the same guarantees as a
synchronous log force because it ensures that all the prior
transactions are already on disk when the synchronous transaction
hits the disk. i.e. it preserves the free->allocate order of the
extent correctly in recovery.
By doing this, we avoid holding the AGF locked while log writes are
in progress, hence reducing the length of time the lock is held and
therefore we increase the rate at which we can allocate and free
from the allocation group, thereby increasing overall throughput.
The only problem with this approach is that when a metadata buffer is
marked stale (e.g. a directory block is removed), then buffer remains
pinned and locked until the log goes to disk. The issue here is that
if that stale buffer is reallocated in a subsequent transaction, the
attempt to lock that buffer in the transaction will hang waiting
the log to go to disk to unlock and unpin the buffer. Hence if
someone tries to lock a pinned, stale, locked buffer we need to
push on the log to get it unlocked ASAP. Effectively we are trading
off a guaranteed log force for a much less common trigger for log
force to occur.
Ideally we should not reallocate busy extents. That is a much more
complex fix to the problem as it involves direct intervention in the
allocation btree searches in many places. This is left to a future
set of modifications.
Finally, now that we track busy extents in allocated memory, we
don't need the descriptors in the transaction structure to point to
them. We can replace the complex busy chunk infrastructure with a
simple linked list of busy extents. This allows us to remove a large
chunk of code, making the overall change a net reduction in code
size.
Signed-off-by: Dave Chinner <david@fromorbit.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>
2010-05-21 06:07:08 +04:00
|
|
|
struct xfs_busy_extent;
|
2008-10-30 09:05:38 +03:00
|
|
|
|
|
|
|
typedef struct xfs_log_item {
|
|
|
|
struct list_head li_ail; /* AIL pointers */
|
|
|
|
xfs_lsn_t li_lsn; /* last on-disk lsn */
|
|
|
|
struct xfs_log_item_desc *li_desc; /* ptr to current desc*/
|
|
|
|
struct xfs_mount *li_mountp; /* ptr to fs mount */
|
2008-10-30 09:39:46 +03:00
|
|
|
struct xfs_ail *li_ailp; /* ptr to AIL */
|
2008-10-30 09:05:38 +03:00
|
|
|
uint li_type; /* item type */
|
|
|
|
uint li_flags; /* misc flags */
|
|
|
|
struct xfs_log_item *li_bio_list; /* buffer item list */
|
|
|
|
void (*li_cb)(struct xfs_buf *,
|
|
|
|
struct xfs_log_item *);
|
|
|
|
/* buffer item iodone */
|
|
|
|
/* callback func */
|
|
|
|
struct xfs_item_ops *li_ops; /* function list */
|
|
|
|
} xfs_log_item_t;
|
|
|
|
|
|
|
|
#define XFS_LI_IN_AIL 0x1
|
|
|
|
#define XFS_LI_ABORTED 0x2
|
|
|
|
|
2009-12-15 02:14:59 +03:00
|
|
|
#define XFS_LI_FLAGS \
|
|
|
|
{ XFS_LI_IN_AIL, "IN_AIL" }, \
|
|
|
|
{ XFS_LI_ABORTED, "ABORTED" }
|
|
|
|
|
2008-10-30 09:05:38 +03:00
|
|
|
typedef struct xfs_item_ops {
|
|
|
|
uint (*iop_size)(xfs_log_item_t *);
|
|
|
|
void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *);
|
|
|
|
void (*iop_pin)(xfs_log_item_t *);
|
2010-03-08 03:26:03 +03:00
|
|
|
void (*iop_unpin)(xfs_log_item_t *);
|
2008-10-30 09:05:38 +03:00
|
|
|
void (*iop_unpin_remove)(xfs_log_item_t *, struct xfs_trans *);
|
|
|
|
uint (*iop_trylock)(xfs_log_item_t *);
|
|
|
|
void (*iop_unlock)(xfs_log_item_t *);
|
|
|
|
xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t);
|
|
|
|
void (*iop_push)(xfs_log_item_t *);
|
|
|
|
void (*iop_pushbuf)(xfs_log_item_t *);
|
|
|
|
void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t);
|
|
|
|
} xfs_item_ops_t;
|
|
|
|
|
|
|
|
#define IOP_SIZE(ip) (*(ip)->li_ops->iop_size)(ip)
|
|
|
|
#define IOP_FORMAT(ip,vp) (*(ip)->li_ops->iop_format)(ip, vp)
|
|
|
|
#define IOP_PIN(ip) (*(ip)->li_ops->iop_pin)(ip)
|
2010-03-08 03:26:03 +03:00
|
|
|
#define IOP_UNPIN(ip) (*(ip)->li_ops->iop_unpin)(ip)
|
2008-10-30 09:05:38 +03:00
|
|
|
#define IOP_UNPIN_REMOVE(ip,tp) (*(ip)->li_ops->iop_unpin_remove)(ip, tp)
|
|
|
|
#define IOP_TRYLOCK(ip) (*(ip)->li_ops->iop_trylock)(ip)
|
|
|
|
#define IOP_UNLOCK(ip) (*(ip)->li_ops->iop_unlock)(ip)
|
|
|
|
#define IOP_COMMITTED(ip, lsn) (*(ip)->li_ops->iop_committed)(ip, lsn)
|
|
|
|
#define IOP_PUSH(ip) (*(ip)->li_ops->iop_push)(ip)
|
|
|
|
#define IOP_PUSHBUF(ip) (*(ip)->li_ops->iop_pushbuf)(ip)
|
|
|
|
#define IOP_COMMITTING(ip, lsn) (*(ip)->li_ops->iop_committing)(ip, lsn)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Return values for the IOP_TRYLOCK() routines.
|
|
|
|
*/
|
|
|
|
#define XFS_ITEM_SUCCESS 0
|
|
|
|
#define XFS_ITEM_PINNED 1
|
|
|
|
#define XFS_ITEM_LOCKED 2
|
2010-02-02 02:13:42 +03:00
|
|
|
#define XFS_ITEM_PUSHBUF 3
|
2008-10-30 09:05:38 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* This is the type of function which can be given to xfs_trans_callback()
|
|
|
|
* to be called upon the transaction's commit to disk.
|
|
|
|
*/
|
|
|
|
typedef void (*xfs_trans_callback_t)(struct xfs_trans *, void *);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This is the structure maintained for every active transaction.
|
|
|
|
*/
|
|
|
|
typedef struct xfs_trans {
|
|
|
|
unsigned int t_magic; /* magic number */
|
|
|
|
xfs_log_callback_t t_logcb; /* log callback struct */
|
|
|
|
unsigned int t_type; /* transaction type */
|
|
|
|
unsigned int t_log_res; /* amt of log space resvd */
|
|
|
|
unsigned int t_log_count; /* count for perm log res */
|
|
|
|
unsigned int t_blk_res; /* # of blocks resvd */
|
|
|
|
unsigned int t_blk_res_used; /* # of resvd blocks used */
|
|
|
|
unsigned int t_rtx_res; /* # of rt extents resvd */
|
|
|
|
unsigned int t_rtx_res_used; /* # of resvd rt extents used */
|
2010-02-16 02:34:54 +03:00
|
|
|
struct xlog_ticket *t_ticket; /* log mgr ticket */
|
2008-10-30 09:05:38 +03:00
|
|
|
xfs_lsn_t t_lsn; /* log seq num of start of
|
|
|
|
* transaction. */
|
|
|
|
xfs_lsn_t t_commit_lsn; /* log seq num of end of
|
|
|
|
* transaction. */
|
|
|
|
struct xfs_mount *t_mountp; /* ptr to fs mount struct */
|
|
|
|
struct xfs_dquot_acct *t_dqinfo; /* acctg info for dquots */
|
|
|
|
xfs_trans_callback_t t_callback; /* transaction callback */
|
|
|
|
void *t_callarg; /* callback arg */
|
|
|
|
unsigned int t_flags; /* misc flags */
|
|
|
|
int64_t t_icount_delta; /* superblock icount change */
|
|
|
|
int64_t t_ifree_delta; /* superblock ifree change */
|
|
|
|
int64_t t_fdblocks_delta; /* superblock fdblocks chg */
|
|
|
|
int64_t t_res_fdblocks_delta; /* on-disk only chg */
|
|
|
|
int64_t t_frextents_delta;/* superblock freextents chg*/
|
|
|
|
int64_t t_res_frextents_delta; /* on-disk only chg */
|
|
|
|
#ifdef DEBUG
|
|
|
|
int64_t t_ag_freeblks_delta; /* debugging counter */
|
|
|
|
int64_t t_ag_flist_delta; /* debugging counter */
|
|
|
|
int64_t t_ag_btree_delta; /* debugging counter */
|
|
|
|
#endif
|
|
|
|
int64_t t_dblocks_delta;/* superblock dblocks change */
|
|
|
|
int64_t t_agcount_delta;/* superblock agcount change */
|
|
|
|
int64_t t_imaxpct_delta;/* superblock imaxpct change */
|
|
|
|
int64_t t_rextsize_delta;/* superblock rextsize chg */
|
|
|
|
int64_t t_rbmblocks_delta;/* superblock rbmblocks chg */
|
|
|
|
int64_t t_rblocks_delta;/* superblock rblocks change */
|
|
|
|
int64_t t_rextents_delta;/* superblocks rextents chg */
|
|
|
|
int64_t t_rextslog_delta;/* superblocks rextslog chg */
|
|
|
|
unsigned int t_items_free; /* log item descs free */
|
|
|
|
xfs_log_item_chunk_t t_items; /* first log item desc chunk */
|
|
|
|
xfs_trans_header_t t_header; /* header for in-log trans */
|
xfs: Improve scalability of busy extent tracking
When we free a metadata extent, we record it in the per-AG busy
extent array so that it is not re-used before the freeing
transaction hits the disk. This array is fixed size, so when it
overflows we make further allocation transactions synchronous
because we cannot track more freed extents until those transactions
hit the disk and are completed. Under heavy mixed allocation and
freeing workloads with large log buffers, we can overflow this array
quite easily.
Further, the array is sparsely populated, which means that inserts
need to search for a free slot, and array searches often have to
search many more slots that are actually used to check all the
busy extents. Quite inefficient, really.
To enable this aspect of extent freeing to scale better, we need
a structure that can grow dynamically. While in other areas of
XFS we have used radix trees, the extents being freed are at random
locations on disk so are better suited to being indexed by an rbtree.
So, use a per-AG rbtree indexed by block number to track busy
extents. This incures a memory allocation when marking an extent
busy, but should not occur too often in low memory situations. This
should scale to an arbitrary number of extents so should not be a
limitation for features such as in-memory aggregation of
transactions.
However, there are still situations where we can't avoid allocating
busy extents (such as allocation from the AGFL). To minimise the
overhead of such occurences, we need to avoid doing a synchronous
log force while holding the AGF locked to ensure that the previous
transactions are safely on disk before we use the extent. We can do
this by marking the transaction doing the allocation as synchronous
rather issuing a log force.
Because of the locking involved and the ordering of transactions,
the synchronous transaction provides the same guarantees as a
synchronous log force because it ensures that all the prior
transactions are already on disk when the synchronous transaction
hits the disk. i.e. it preserves the free->allocate order of the
extent correctly in recovery.
By doing this, we avoid holding the AGF locked while log writes are
in progress, hence reducing the length of time the lock is held and
therefore we increase the rate at which we can allocate and free
from the allocation group, thereby increasing overall throughput.
The only problem with this approach is that when a metadata buffer is
marked stale (e.g. a directory block is removed), then buffer remains
pinned and locked until the log goes to disk. The issue here is that
if that stale buffer is reallocated in a subsequent transaction, the
attempt to lock that buffer in the transaction will hang waiting
the log to go to disk to unlock and unpin the buffer. Hence if
someone tries to lock a pinned, stale, locked buffer we need to
push on the log to get it unlocked ASAP. Effectively we are trading
off a guaranteed log force for a much less common trigger for log
force to occur.
Ideally we should not reallocate busy extents. That is a much more
complex fix to the problem as it involves direct intervention in the
allocation btree searches in many places. This is left to a future
set of modifications.
Finally, now that we track busy extents in allocated memory, we
don't need the descriptors in the transaction structure to point to
them. We can replace the complex busy chunk infrastructure with a
simple linked list of busy extents. This allows us to remove a large
chunk of code, making the overall change a net reduction in code
size.
Signed-off-by: Dave Chinner <david@fromorbit.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>
2010-05-21 06:07:08 +04:00
|
|
|
struct list_head t_busy; /* list of busy extents */
|
2008-10-30 09:05:38 +03:00
|
|
|
unsigned long t_pflags; /* saved process flags state */
|
|
|
|
} xfs_trans_t;
|
|
|
|
|
2005-04-17 02:20:36 +04:00
|
|
|
/*
|
|
|
|
* XFS transaction mechanism exported interfaces that are
|
|
|
|
* actually macros.
|
|
|
|
*/
|
|
|
|
#define xfs_trans_get_log_res(tp) ((tp)->t_log_res)
|
|
|
|
#define xfs_trans_get_log_count(tp) ((tp)->t_log_count)
|
|
|
|
#define xfs_trans_get_block_res(tp) ((tp)->t_blk_res)
|
|
|
|
#define xfs_trans_set_sync(tp) ((tp)->t_flags |= XFS_TRANS_SYNC)
|
|
|
|
|
|
|
|
#ifdef DEBUG
|
2007-02-10 10:36:10 +03:00
|
|
|
#define xfs_trans_agblocks_delta(tp, d) ((tp)->t_ag_freeblks_delta += (int64_t)d)
|
|
|
|
#define xfs_trans_agflist_delta(tp, d) ((tp)->t_ag_flist_delta += (int64_t)d)
|
|
|
|
#define xfs_trans_agbtree_delta(tp, d) ((tp)->t_ag_btree_delta += (int64_t)d)
|
2005-04-17 02:20:36 +04:00
|
|
|
#else
|
|
|
|
#define xfs_trans_agblocks_delta(tp, d)
|
|
|
|
#define xfs_trans_agflist_delta(tp, d)
|
|
|
|
#define xfs_trans_agbtree_delta(tp, d)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
* XFS transaction mechanism exported interfaces.
|
|
|
|
*/
|
|
|
|
xfs_trans_t *xfs_trans_alloc(struct xfs_mount *, uint);
|
2009-10-19 08:00:03 +04:00
|
|
|
xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint, uint);
|
2005-04-17 02:20:36 +04:00
|
|
|
xfs_trans_t *xfs_trans_dup(xfs_trans_t *);
|
|
|
|
int xfs_trans_reserve(xfs_trans_t *, uint, uint, uint,
|
|
|
|
uint, uint);
|
2007-02-10 10:36:10 +03:00
|
|
|
void xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t);
|
2005-04-17 02:20:36 +04:00
|
|
|
struct xfs_buf *xfs_trans_get_buf(xfs_trans_t *, struct xfs_buftarg *, xfs_daddr_t,
|
|
|
|
int, uint);
|
|
|
|
int xfs_trans_read_buf(struct xfs_mount *, xfs_trans_t *,
|
|
|
|
struct xfs_buftarg *, xfs_daddr_t, int, uint,
|
|
|
|
struct xfs_buf **);
|
|
|
|
struct xfs_buf *xfs_trans_getsb(xfs_trans_t *, struct xfs_mount *, int);
|
|
|
|
|
|
|
|
void xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *);
|
|
|
|
void xfs_trans_bjoin(xfs_trans_t *, struct xfs_buf *);
|
|
|
|
void xfs_trans_bhold(xfs_trans_t *, struct xfs_buf *);
|
2005-09-05 02:29:01 +04:00
|
|
|
void xfs_trans_bhold_release(xfs_trans_t *, struct xfs_buf *);
|
2005-04-17 02:20:36 +04:00
|
|
|
void xfs_trans_binval(xfs_trans_t *, struct xfs_buf *);
|
|
|
|
void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *);
|
|
|
|
void xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *);
|
|
|
|
void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint);
|
|
|
|
void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *);
|
|
|
|
int xfs_trans_iget(struct xfs_mount *, xfs_trans_t *,
|
|
|
|
xfs_ino_t , uint, uint, struct xfs_inode **);
|
|
|
|
void xfs_trans_ijoin(xfs_trans_t *, struct xfs_inode *, uint);
|
|
|
|
void xfs_trans_ihold(xfs_trans_t *, struct xfs_inode *);
|
|
|
|
void xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint);
|
|
|
|
void xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint);
|
|
|
|
struct xfs_efi_log_item *xfs_trans_get_efi(xfs_trans_t *, uint);
|
|
|
|
void xfs_efi_release(struct xfs_efi_log_item *, uint);
|
|
|
|
void xfs_trans_log_efi_extent(xfs_trans_t *,
|
|
|
|
struct xfs_efi_log_item *,
|
|
|
|
xfs_fsblock_t,
|
|
|
|
xfs_extlen_t);
|
|
|
|
struct xfs_efd_log_item *xfs_trans_get_efd(xfs_trans_t *,
|
|
|
|
struct xfs_efi_log_item *,
|
|
|
|
uint);
|
|
|
|
void xfs_trans_log_efd_extent(xfs_trans_t *,
|
|
|
|
struct xfs_efd_log_item *,
|
|
|
|
xfs_fsblock_t,
|
|
|
|
xfs_extlen_t);
|
2005-11-02 02:26:59 +03:00
|
|
|
int _xfs_trans_commit(xfs_trans_t *,
|
|
|
|
uint flags,
|
|
|
|
int *);
|
2007-05-08 07:48:42 +04:00
|
|
|
#define xfs_trans_commit(tp, flags) _xfs_trans_commit(tp, flags, NULL)
|
2005-04-17 02:20:36 +04:00
|
|
|
void xfs_trans_cancel(xfs_trans_t *, int);
|
[XFS] Move AIL pushing into it's own thread
When many hundreds to thousands of threads all try to do simultaneous
transactions and the log is in a tail-pushing situation (i.e. full), we
can get multiple threads walking the AIL list and contending on the AIL
lock.
The AIL push is, in effect, a simple I/O dispatch algorithm complicated by
the ordering constraints placed on it by the transaction subsystem. It
really does not need multiple threads to push on it - even when only a
single CPU is pushing the AIL, it can push the I/O out far faster that
pretty much any disk subsystem can handle.
So, to avoid contention problems stemming from multiple list walkers, move
the list walk off into another thread and simply provide a "target" to
push to. When a thread requires a push, it sets the target and wakes the
push thread, then goes to sleep waiting for the required amount of space
to become available in the log.
This mechanism should also be a lot fairer under heavy load as the waiters
will queue in arrival order, rather than queuing in "who completed a push
first" order.
Also, by moving the pushing to a separate thread we can do more
effectively overload detection and prevention as we can keep context from
loop iteration to loop iteration. That is, we can push only part of the
list each loop and not have to loop back to the start of the list every
time we run. This should also help by reducing the number of items we try
to lock and/or push items that we cannot move.
Note that this patch is not intended to solve the inefficiencies in the
AIL structure and the associated issues with extremely large list
contents. That needs to be addresses separately; parallel access would
cause problems to any new structure as well, so I'm only aiming to isolate
the structure from unbounded parallelism here.
SGI-PV: 972759
SGI-Modid: xfs-linux-melb:xfs-kern:30371a
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
2008-02-05 04:13:32 +03:00
|
|
|
int xfs_trans_ail_init(struct xfs_mount *);
|
|
|
|
void xfs_trans_ail_destroy(struct xfs_mount *);
|
2005-04-17 02:20:36 +04:00
|
|
|
|
2007-11-23 08:28:09 +03:00
|
|
|
extern kmem_zone_t *xfs_trans_zone;
|
|
|
|
|
2005-04-17 02:20:36 +04:00
|
|
|
#endif /* __KERNEL__ */
|
|
|
|
|
2008-10-30 09:05:38 +03:00
|
|
|
void xfs_trans_init(struct xfs_mount *);
|
|
|
|
int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *);
|
|
|
|
|
2005-04-17 02:20:36 +04:00
|
|
|
#endif /* __XFS_TRANS_H__ */
|