xfs: support discontiguous buffers in the xfs_buf_log_item

discontigous buffer in separate buffer format structures. This means log
recovery will recover all the changes on a per segment basis without
requiring any knowledge of the fact that it was logged from a
compound buffer.

To do this, we need to be able to determine what buffer segment any
given offset into the compound buffer sits over. This enables us to
translate the dirty bitmap in the number of separate buffer format
structures required.

We also need to be able to determine the number of bitmap elements
that a given buffer segment has, as this determines the size of the
buffer format structure. Hence we need to be able to determine the
both the start offset into the buffer and the length of a given
segment to be able to calculate this.

With this information, we can preallocate, build and format the
correct log vector array for each segment in a compound buffer to
appear exactly the same as individually logged buffers in the log.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ben Myers <bpm@sgi.com>
This commit is contained in:
Dave Chinner 2012-06-22 18:50:12 +10:00 коммит произвёл Ben Myers
Родитель de2a4f5919
Коммит 372cc85ec6
2 изменённых файлов: 300 добавлений и 149 удалений

Просмотреть файл

@ -153,33 +153,25 @@ STATIC void xfs_buf_do_callbacks(struct xfs_buf *bp);
* If the XFS_BLI_STALE flag has been set, then log nothing. * If the XFS_BLI_STALE flag has been set, then log nothing.
*/ */
STATIC uint STATIC uint
xfs_buf_item_size( xfs_buf_item_size_segment(
struct xfs_log_item *lip) struct xfs_buf_log_item *bip,
struct xfs_buf_log_format *blfp)
{ {
struct xfs_buf_log_item *bip = BUF_ITEM(lip);
struct xfs_buf *bp = bip->bli_buf; struct xfs_buf *bp = bip->bli_buf;
uint nvecs; uint nvecs;
int next_bit; int next_bit;
int last_bit; int last_bit;
ASSERT(atomic_read(&bip->bli_refcount) > 0); last_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0);
if (bip->bli_flags & XFS_BLI_STALE) { if (last_bit == -1)
/* return 0;
* The buffer is stale, so all we need to log
* is the buf log format structure with the /*
* cancel flag in it. * initial count for a dirty buffer is 2 vectors - the format structure
*/ * and the first dirty region.
trace_xfs_buf_item_size_stale(bip); */
ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); nvecs = 2;
return 1;
}
ASSERT(bip->bli_flags & XFS_BLI_LOGGED);
nvecs = 1;
last_bit = xfs_next_bit(bip->bli_format.blf_data_map,
bip->bli_format.blf_map_size, 0);
ASSERT(last_bit != -1);
nvecs++;
while (last_bit != -1) { while (last_bit != -1) {
/* /*
* This takes the bit number to start looking from and * This takes the bit number to start looking from and
@ -187,16 +179,15 @@ xfs_buf_item_size(
* if there are no more bits set or the start bit is * if there are no more bits set or the start bit is
* beyond the end of the bitmap. * beyond the end of the bitmap.
*/ */
next_bit = xfs_next_bit(bip->bli_format.blf_data_map, next_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size,
bip->bli_format.blf_map_size, last_bit + 1);
last_bit + 1);
/* /*
* If we run out of bits, leave the loop, * If we run out of bits, leave the loop,
* else if we find a new set of bits bump the number of vecs, * else if we find a new set of bits bump the number of vecs,
* else keep scanning the current set of bits. * else keep scanning the current set of bits.
*/ */
if (next_bit == -1) { if (next_bit == -1) {
last_bit = -1; break;
} else if (next_bit != last_bit + 1) { } else if (next_bit != last_bit + 1) {
last_bit = next_bit; last_bit = next_bit;
nvecs++; nvecs++;
@ -210,10 +201,180 @@ xfs_buf_item_size(
} }
} }
return nvecs;
}
/*
* This returns the number of log iovecs needed to log the given buf log item.
*
* It calculates this as 1 iovec for the buf log format structure and 1 for each
* stretch of non-contiguous chunks to be logged. Contiguous chunks are logged
* in a single iovec.
*
* Discontiguous buffers need a format structure per region that that is being
* logged. This makes the changes in the buffer appear to log recovery as though
* they came from separate buffers, just like would occur if multiple buffers
* were used instead of a single discontiguous buffer. This enables
* discontiguous buffers to be in-memory constructs, completely transparent to
* what ends up on disk.
*
* If the XFS_BLI_STALE flag has been set, then log nothing but the buf log
* format structures.
*/
STATIC uint
xfs_buf_item_size(
struct xfs_log_item *lip)
{
struct xfs_buf_log_item *bip = BUF_ITEM(lip);
uint nvecs;
int i;
ASSERT(atomic_read(&bip->bli_refcount) > 0);
if (bip->bli_flags & XFS_BLI_STALE) {
/*
* The buffer is stale, so all we need to log
* is the buf log format structure with the
* cancel flag in it.
*/
trace_xfs_buf_item_size_stale(bip);
ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
return bip->bli_format_count;
}
ASSERT(bip->bli_flags & XFS_BLI_LOGGED);
/*
* the vector count is based on the number of buffer vectors we have
* dirty bits in. This will only be greater than one when we have a
* compound buffer with more than one segment dirty. Hence for compound
* buffers we need to track which segment the dirty bits correspond to,
* and when we move from one segment to the next increment the vector
* count for the extra buf log format structure that will need to be
* written.
*/
nvecs = 0;
for (i = 0; i < bip->bli_format_count; i++) {
nvecs += xfs_buf_item_size_segment(bip, &bip->bli_formats[i]);
}
trace_xfs_buf_item_size(bip); trace_xfs_buf_item_size(bip);
return nvecs; return nvecs;
} }
static struct xfs_log_iovec *
xfs_buf_item_format_segment(
struct xfs_buf_log_item *bip,
struct xfs_log_iovec *vecp,
uint offset,
struct xfs_buf_log_format *blfp)
{
struct xfs_buf *bp = bip->bli_buf;
uint base_size;
uint nvecs;
int first_bit;
int last_bit;
int next_bit;
uint nbits;
uint buffer_offset;
/* copy the flags across from the base format item */
blfp->blf_flags = bip->bli_format.blf_flags;
/*
* Base size is the actual size of the ondisk structure - it reflects
* the actual size of the dirty bitmap rather than the size of the in
* memory structure.
*/
base_size = offsetof(struct xfs_buf_log_format, blf_data_map) +
(blfp->blf_map_size * sizeof(blfp->blf_data_map[0]));
vecp->i_addr = blfp;
vecp->i_len = base_size;
vecp->i_type = XLOG_REG_TYPE_BFORMAT;
vecp++;
nvecs = 1;
if (bip->bli_flags & XFS_BLI_STALE) {
/*
* The buffer is stale, so all we need to log
* is the buf log format structure with the
* cancel flag in it.
*/
trace_xfs_buf_item_format_stale(bip);
ASSERT(blfp->blf_flags & XFS_BLF_CANCEL);
blfp->blf_size = nvecs;
return vecp;
}
/*
* Fill in an iovec for each set of contiguous chunks.
*/
first_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0);
ASSERT(first_bit != -1);
last_bit = first_bit;
nbits = 1;
for (;;) {
/*
* This takes the bit number to start looking from and
* returns the next set bit from there. It returns -1
* if there are no more bits set or the start bit is
* beyond the end of the bitmap.
*/
next_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size,
(uint)last_bit + 1);
/*
* If we run out of bits fill in the last iovec and get
* out of the loop.
* Else if we start a new set of bits then fill in the
* iovec for the series we were looking at and start
* counting the bits in the new one.
* Else we're still in the same set of bits so just
* keep counting and scanning.
*/
if (next_bit == -1) {
buffer_offset = offset + first_bit * XFS_BLF_CHUNK;
vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
vecp->i_len = nbits * XFS_BLF_CHUNK;
vecp->i_type = XLOG_REG_TYPE_BCHUNK;
nvecs++;
break;
} else if (next_bit != last_bit + 1) {
buffer_offset = offset + first_bit * XFS_BLF_CHUNK;
vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
vecp->i_len = nbits * XFS_BLF_CHUNK;
vecp->i_type = XLOG_REG_TYPE_BCHUNK;
nvecs++;
vecp++;
first_bit = next_bit;
last_bit = next_bit;
nbits = 1;
} else if (xfs_buf_offset(bp, offset +
(next_bit << XFS_BLF_SHIFT)) !=
(xfs_buf_offset(bp, offset +
(last_bit << XFS_BLF_SHIFT)) +
XFS_BLF_CHUNK)) {
buffer_offset = offset + first_bit * XFS_BLF_CHUNK;
vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
vecp->i_len = nbits * XFS_BLF_CHUNK;
vecp->i_type = XLOG_REG_TYPE_BCHUNK;
/*
* You would think we need to bump the nvecs here too, but we do not
* this number is used by recovery, and it gets confused by the boundary
* split here
* nvecs++;
*/
vecp++;
first_bit = next_bit;
last_bit = next_bit;
nbits = 1;
} else {
last_bit++;
nbits++;
}
}
bip->bli_format.blf_size = nvecs;
return vecp;
}
/* /*
* This is called to fill in the vector of log iovecs for the * This is called to fill in the vector of log iovecs for the
* given log buf item. It fills the first entry with a buf log * given log buf item. It fills the first entry with a buf log
@ -226,33 +387,14 @@ xfs_buf_item_format(
struct xfs_log_iovec *vecp) struct xfs_log_iovec *vecp)
{ {
struct xfs_buf_log_item *bip = BUF_ITEM(lip); struct xfs_buf_log_item *bip = BUF_ITEM(lip);
struct xfs_buf *bp = bip->bli_buf; struct xfs_buf *bp = bip->bli_buf;
uint base_size; uint offset = 0;
uint nvecs; int i;
int first_bit;
int last_bit;
int next_bit;
uint nbits;
uint buffer_offset;
ASSERT(atomic_read(&bip->bli_refcount) > 0); ASSERT(atomic_read(&bip->bli_refcount) > 0);
ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || ASSERT((bip->bli_flags & XFS_BLI_LOGGED) ||
(bip->bli_flags & XFS_BLI_STALE)); (bip->bli_flags & XFS_BLI_STALE));
/*
* Base size is the actual size of the ondisk structure - it reflects
* the actual size of the dirty bitmap rather than the size of the in
* memory structure.
*/
base_size = offsetof(struct xfs_buf_log_format, blf_data_map) +
(bip->bli_format.blf_map_size *
sizeof(bip->bli_format.blf_data_map[0]));
vecp->i_addr = &bip->bli_format;
vecp->i_len = base_size;
vecp->i_type = XLOG_REG_TYPE_BFORMAT;
vecp++;
nvecs = 1;
/* /*
* If it is an inode buffer, transfer the in-memory state to the * If it is an inode buffer, transfer the in-memory state to the
* format flags and clear the in-memory state. We do not transfer * format flags and clear the in-memory state. We do not transfer
@ -267,85 +409,12 @@ xfs_buf_item_format(
bip->bli_flags &= ~XFS_BLI_INODE_BUF; bip->bli_flags &= ~XFS_BLI_INODE_BUF;
} }
if (bip->bli_flags & XFS_BLI_STALE) { for (i = 0; i < bip->bli_format_count; i++) {
/* vecp = xfs_buf_item_format_segment(bip, vecp, offset,
* The buffer is stale, so all we need to log &bip->bli_formats[i]);
* is the buf log format structure with the offset += bp->b_maps[i].bm_len;
* cancel flag in it.
*/
trace_xfs_buf_item_format_stale(bip);
ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
bip->bli_format.blf_size = nvecs;
return;
} }
/*
* Fill in an iovec for each set of contiguous chunks.
*/
first_bit = xfs_next_bit(bip->bli_format.blf_data_map,
bip->bli_format.blf_map_size, 0);
ASSERT(first_bit != -1);
last_bit = first_bit;
nbits = 1;
for (;;) {
/*
* This takes the bit number to start looking from and
* returns the next set bit from there. It returns -1
* if there are no more bits set or the start bit is
* beyond the end of the bitmap.
*/
next_bit = xfs_next_bit(bip->bli_format.blf_data_map,
bip->bli_format.blf_map_size,
(uint)last_bit + 1);
/*
* If we run out of bits fill in the last iovec and get
* out of the loop.
* Else if we start a new set of bits then fill in the
* iovec for the series we were looking at and start
* counting the bits in the new one.
* Else we're still in the same set of bits so just
* keep counting and scanning.
*/
if (next_bit == -1) {
buffer_offset = first_bit * XFS_BLF_CHUNK;
vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
vecp->i_len = nbits * XFS_BLF_CHUNK;
vecp->i_type = XLOG_REG_TYPE_BCHUNK;
nvecs++;
break;
} else if (next_bit != last_bit + 1) {
buffer_offset = first_bit * XFS_BLF_CHUNK;
vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
vecp->i_len = nbits * XFS_BLF_CHUNK;
vecp->i_type = XLOG_REG_TYPE_BCHUNK;
nvecs++;
vecp++;
first_bit = next_bit;
last_bit = next_bit;
nbits = 1;
} else if (xfs_buf_offset(bp, next_bit << XFS_BLF_SHIFT) !=
(xfs_buf_offset(bp, last_bit << XFS_BLF_SHIFT) +
XFS_BLF_CHUNK)) {
buffer_offset = first_bit * XFS_BLF_CHUNK;
vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
vecp->i_len = nbits * XFS_BLF_CHUNK;
vecp->i_type = XLOG_REG_TYPE_BCHUNK;
/* You would think we need to bump the nvecs here too, but we do not
* this number is used by recovery, and it gets confused by the boundary
* split here
* nvecs++;
*/
vecp++;
first_bit = next_bit;
last_bit = next_bit;
nbits = 1;
} else {
last_bit++;
nbits++;
}
}
bip->bli_format.blf_size = nvecs;
/* /*
* Check to make sure everything is consistent. * Check to make sure everything is consistent.
*/ */
@ -620,6 +689,35 @@ static const struct xfs_item_ops xfs_buf_item_ops = {
.iop_committing = xfs_buf_item_committing .iop_committing = xfs_buf_item_committing
}; };
STATIC int
xfs_buf_item_get_format(
struct xfs_buf_log_item *bip,
int count)
{
ASSERT(bip->bli_formats == NULL);
bip->bli_format_count = count;
if (count == 1) {
bip->bli_formats = &bip->bli_format;
return 0;
}
bip->bli_formats = kmem_zalloc(count * sizeof(struct xfs_buf_log_format),
KM_SLEEP);
if (!bip->bli_formats)
return ENOMEM;
return 0;
}
STATIC void
xfs_buf_item_free_format(
struct xfs_buf_log_item *bip)
{
if (bip->bli_formats != &bip->bli_format) {
kmem_free(bip->bli_formats);
bip->bli_formats = NULL;
}
}
/* /*
* Allocate a new buf log item to go with the given buffer. * Allocate a new buf log item to go with the given buffer.
@ -637,6 +735,8 @@ xfs_buf_item_init(
xfs_buf_log_item_t *bip; xfs_buf_log_item_t *bip;
int chunks; int chunks;
int map_size; int map_size;
int error;
int i;
/* /*
* Check to see if there is already a buf log item for * Check to see if there is already a buf log item for
@ -648,25 +748,33 @@ xfs_buf_item_init(
if (lip != NULL && lip->li_type == XFS_LI_BUF) if (lip != NULL && lip->li_type == XFS_LI_BUF)
return; return;
/* bip = kmem_zone_zalloc(xfs_buf_item_zone, KM_SLEEP);
* chunks is the number of XFS_BLF_CHUNK size pieces
* the buffer can be divided into. Make sure not to
* truncate any pieces. map_size is the size of the
* bitmap needed to describe the chunks of the buffer.
*/
chunks = (int)((BBTOB(bp->b_length) + (XFS_BLF_CHUNK - 1)) >>
XFS_BLF_SHIFT);
map_size = (int)((chunks + NBWORD) >> BIT_TO_WORD_SHIFT);
bip = (xfs_buf_log_item_t*)kmem_zone_zalloc(xfs_buf_item_zone,
KM_SLEEP);
xfs_log_item_init(mp, &bip->bli_item, XFS_LI_BUF, &xfs_buf_item_ops); xfs_log_item_init(mp, &bip->bli_item, XFS_LI_BUF, &xfs_buf_item_ops);
bip->bli_buf = bp; bip->bli_buf = bp;
xfs_buf_hold(bp); xfs_buf_hold(bp);
bip->bli_format.blf_type = XFS_LI_BUF;
bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp); /*
bip->bli_format.blf_len = (ushort)bp->b_length; * chunks is the number of XFS_BLF_CHUNK size pieces the buffer
bip->bli_format.blf_map_size = map_size; * can be divided into. Make sure not to truncate any pieces.
* map_size is the size of the bitmap needed to describe the
* chunks of the buffer.
*
* Discontiguous buffer support follows the layout of the underlying
* buffer. This makes the implementation as simple as possible.
*/
error = xfs_buf_item_get_format(bip, bp->b_map_count);
ASSERT(error == 0);
for (i = 0; i < bip->bli_format_count; i++) {
chunks = DIV_ROUND_UP(BBTOB(bp->b_maps[i].bm_len),
XFS_BLF_CHUNK);
map_size = DIV_ROUND_UP(chunks, NBWORD);
bip->bli_formats[i].blf_type = XFS_LI_BUF;
bip->bli_formats[i].blf_blkno = bp->b_maps[i].bm_bn;
bip->bli_formats[i].blf_len = bp->b_maps[i].bm_len;
bip->bli_formats[i].blf_map_size = map_size;
}
#ifdef XFS_TRANS_DEBUG #ifdef XFS_TRANS_DEBUG
/* /*
@ -697,10 +805,11 @@ xfs_buf_item_init(
* item's bitmap. * item's bitmap.
*/ */
void void
xfs_buf_item_log( xfs_buf_item_log_segment(
xfs_buf_log_item_t *bip, struct xfs_buf_log_item *bip,
uint first, uint first,
uint last) uint last,
uint *map)
{ {
uint first_bit; uint first_bit;
uint last_bit; uint last_bit;
@ -712,12 +821,6 @@ xfs_buf_item_log(
uint end_bit; uint end_bit;
uint mask; uint mask;
/*
* Mark the item as having some dirty data for
* quick reference in xfs_buf_item_dirty.
*/
bip->bli_flags |= XFS_BLI_DIRTY;
/* /*
* Convert byte offsets to bit numbers. * Convert byte offsets to bit numbers.
*/ */
@ -734,7 +837,7 @@ xfs_buf_item_log(
* to set a bit in. * to set a bit in.
*/ */
word_num = first_bit >> BIT_TO_WORD_SHIFT; word_num = first_bit >> BIT_TO_WORD_SHIFT;
wordp = &(bip->bli_format.blf_data_map[word_num]); wordp = &map[word_num];
/* /*
* Calculate the starting bit in the first word. * Calculate the starting bit in the first word.
@ -781,6 +884,51 @@ xfs_buf_item_log(
xfs_buf_item_log_debug(bip, first, last); xfs_buf_item_log_debug(bip, first, last);
} }
/*
* Mark bytes first through last inclusive as dirty in the buf
* item's bitmap.
*/
void
xfs_buf_item_log(
xfs_buf_log_item_t *bip,
uint first,
uint last)
{
int i;
uint start;
uint end;
struct xfs_buf *bp = bip->bli_buf;
/*
* Mark the item as having some dirty data for
* quick reference in xfs_buf_item_dirty.
*/
bip->bli_flags |= XFS_BLI_DIRTY;
/*
* walk each buffer segment and mark them dirty appropriately.
*/
start = 0;
for (i = 0; i < bip->bli_format_count; i++) {
if (start > last)
break;
end = start + BBTOB(bp->b_maps[i].bm_len);
if (first > end) {
start += BBTOB(bp->b_maps[i].bm_len);
continue;
}
if (first < start)
first = start;
if (end > last)
end = last;
xfs_buf_item_log_segment(bip, first, end,
&bip->bli_formats[i].blf_data_map[0]);
start += bp->b_maps[i].bm_len;
}
}
/* /*
* Return 1 if the buffer has some data that has been logged (at any * Return 1 if the buffer has some data that has been logged (at any
@ -802,6 +950,7 @@ xfs_buf_item_free(
kmem_free(bip->bli_logged); kmem_free(bip->bli_logged);
#endif /* XFS_TRANS_DEBUG */ #endif /* XFS_TRANS_DEBUG */
xfs_buf_item_free_format(bip);
kmem_zone_free(xfs_buf_item_zone, bip); kmem_zone_free(xfs_buf_item_zone, bip);
} }

Просмотреть файл

@ -102,6 +102,8 @@ typedef struct xfs_buf_log_item {
char *bli_orig; /* original buffer copy */ char *bli_orig; /* original buffer copy */
char *bli_logged; /* bytes logged (bitmap) */ char *bli_logged; /* bytes logged (bitmap) */
#endif #endif
int bli_format_count; /* count of headers */
struct xfs_buf_log_format *bli_formats; /* array of in-log header ptrs */
struct xfs_buf_log_format bli_format; /* embedded in-log header */ struct xfs_buf_log_format bli_format; /* embedded in-log header */
} xfs_buf_log_item_t; } xfs_buf_log_item_t;