ocfs2: Validate metadata only when it's read from disk.

Add an optional validation hook to ocfs2_read_blocks().  Now the
validation function is only called when a block was actually read off of
disk.  It is not called when the buffer was in cache.

We add a buffer state bit BH_NeedsValidate to flag these buffers.  It
must always be one higher than the last JBD2 buffer state bit.

The dinode, dirblock, extent_block, and xattr_block validators are
lifted to this scheme directly.  The group_descriptor validator needs to
be split into two pieces.  The first part only needs the gd buffer and
is passed to ocfs2_read_block().  The second part requires the dinode as
well, and is called every time.  It's only 3 compares, so it's tiny.
This also allows us to clean up the non-fatal gd check used by resize.c.
It now has no magic argument.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
Signed-off-by: Mark Fasheh <mfasheh@suse.com>
This commit is contained in:
Joel Becker 2008-11-13 14:49:19 -08:00 коммит произвёл Mark Fasheh
Родитель 4ae1d69bed
Коммит 970e4936d7
10 изменённых файлов: 149 добавлений и 97 удалений

Просмотреть файл

@ -684,6 +684,9 @@ static int ocfs2_validate_extent_block(struct super_block *sb,
struct ocfs2_extent_block *eb =
(struct ocfs2_extent_block *)bh->b_data;
mlog(0, "Validating extent block %llu\n",
(unsigned long long)bh->b_blocknr);
if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
ocfs2_error(sb,
"Extent block #%llu has bad signature %.*s",
@ -719,21 +722,13 @@ int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno,
int rc;
struct buffer_head *tmp = *bh;
rc = ocfs2_read_block(inode, eb_blkno, &tmp);
if (rc)
goto out;
rc = ocfs2_validate_extent_block(inode->i_sb, tmp);
if (rc) {
brelse(tmp);
goto out;
}
rc = ocfs2_read_block(inode, eb_blkno, &tmp,
ocfs2_validate_extent_block);
/* If ocfs2_read_block() got us a new bh, pass it up. */
if (!*bh)
if (!rc && !*bh)
*bh = tmp;
out:
return rc;
}

Просмотреть файл

@ -39,6 +39,19 @@
#include "buffer_head_io.h"
/*
* Bits on bh->b_state used by ocfs2.
*
* These MUST be after the JBD2 bits. Currently BH_Unshadow is the last
* JBD2 bit.
*/
enum ocfs2_state_bits {
BH_NeedsValidate = BH_Unshadow + 1,
};
/* Expand the magic b_state functions */
BUFFER_FNS(NeedsValidate, needs_validate);
int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
struct inode *inode)
{
@ -166,7 +179,9 @@ bail:
}
int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
struct buffer_head *bhs[], int flags)
struct buffer_head *bhs[], int flags,
int (*validate)(struct super_block *sb,
struct buffer_head *bh))
{
int status = 0;
int i, ignore_cache = 0;
@ -298,6 +313,8 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
clear_buffer_uptodate(bh);
get_bh(bh); /* for end_buffer_read_sync() */
if (validate)
set_buffer_needs_validate(bh);
bh->b_end_io = end_buffer_read_sync;
submit_bh(READ, bh);
continue;
@ -328,6 +345,20 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
bhs[i] = NULL;
continue;
}
if (buffer_needs_validate(bh)) {
/* We never set NeedsValidate if the
* buffer was held by the journal, so
* that better not have changed */
BUG_ON(buffer_jbd(bh));
clear_buffer_needs_validate(bh);
status = validate(inode->i_sb, bh);
if (status) {
put_bh(bh);
bhs[i] = NULL;
continue;
}
}
}
/* Always set the buffer in the cache, even if it was

Просмотреть файл

@ -31,21 +31,24 @@
void ocfs2_end_buffer_io_sync(struct buffer_head *bh,
int uptodate);
static inline int ocfs2_read_block(struct inode *inode,
u64 off,
struct buffer_head **bh);
int ocfs2_write_block(struct ocfs2_super *osb,
struct buffer_head *bh,
struct inode *inode);
int ocfs2_read_blocks(struct inode *inode,
u64 block,
int nr,
struct buffer_head *bhs[],
int flags);
int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
unsigned int nr, struct buffer_head *bhs[]);
/*
* If not NULL, validate() will be called on a buffer that is freshly
* read from disk. It will not be called if the buffer was in cache.
* Note that if validate() is being used for this buffer, it needs to
* be set even for a READAHEAD call, as it marks the buffer for later
* validation.
*/
int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
struct buffer_head *bhs[], int flags,
int (*validate)(struct super_block *sb,
struct buffer_head *bh));
int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
struct buffer_head *bh);
@ -53,7 +56,9 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
#define OCFS2_BH_READAHEAD 8
static inline int ocfs2_read_block(struct inode *inode, u64 off,
struct buffer_head **bh)
struct buffer_head **bh,
int (*validate)(struct super_block *sb,
struct buffer_head *bh))
{
int status = 0;
@ -63,7 +68,7 @@ static inline int ocfs2_read_block(struct inode *inode, u64 off,
goto bail;
}
status = ocfs2_read_blocks(inode, off, 1, bh, 0);
status = ocfs2_read_blocks(inode, off, 1, bh, 0, validate);
bail:
return status;

Просмотреть файл

@ -214,6 +214,8 @@ static int ocfs2_validate_dir_block(struct super_block *sb,
* Nothing yet. We don't validate dirents here, that's handled
* in-place when the code walks them.
*/
mlog(0, "Validating dirblock %llu\n",
(unsigned long long)bh->b_blocknr);
return 0;
}
@ -255,20 +257,13 @@ static int ocfs2_read_dir_block(struct inode *inode, u64 v_block,
goto out;
}
rc = ocfs2_read_blocks(inode, p_blkno, 1, &tmp, flags);
rc = ocfs2_read_blocks(inode, p_blkno, 1, &tmp, flags,
ocfs2_validate_dir_block);
if (rc) {
mlog_errno(rc);
goto out;
}
if (!(flags & OCFS2_BH_READAHEAD)) {
rc = ocfs2_validate_dir_block(inode->i_sb, tmp);
if (rc) {
brelse(tmp);
goto out;
}
}
/* If ocfs2_read_blocks() got us a new bh, pass it up. */
if (!*bh)
*bh = tmp;

Просмотреть файл

@ -1255,6 +1255,9 @@ int ocfs2_validate_inode_block(struct super_block *sb,
int rc = -EINVAL;
struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
mlog(0, "Validating dinode %llu\n",
(unsigned long long)bh->b_blocknr);
BUG_ON(!buffer_uptodate(bh));
if (!OCFS2_IS_VALID_DINODE(di)) {
@ -1300,23 +1303,12 @@ int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh,
struct buffer_head *tmp = *bh;
rc = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, &tmp,
flags);
if (rc)
goto out;
if (!(flags & OCFS2_BH_READAHEAD)) {
rc = ocfs2_validate_inode_block(inode->i_sb, tmp);
if (rc) {
brelse(tmp);
goto out;
}
}
flags, ocfs2_validate_inode_block);
/* If ocfs2_read_blocks() got us a new bh, pass it up. */
if (!*bh)
if (!rc && !*bh)
*bh = tmp;
out:
return rc;
}

Просмотреть файл

@ -394,7 +394,7 @@ static int ocfs2_check_new_group(struct inode *inode,
(struct ocfs2_group_desc *)group_bh->b_data;
u16 cl_bpc = le16_to_cpu(di->id2.i_chain.cl_bpc);
ret = ocfs2_validate_group_descriptor(inode->i_sb, di, group_bh, 1);
ret = ocfs2_check_group_descriptor(inode->i_sb, di, group_bh);
if (ret)
goto out;

Просмотреть файл

@ -151,7 +151,7 @@ int ocfs2_refresh_slot_info(struct ocfs2_super *osb)
* this is not true, the read of -1 (UINT64_MAX) will fail.
*/
ret = ocfs2_read_blocks(si->si_inode, -1, si->si_blocks, si->si_bh,
OCFS2_BH_IGNORE_CACHE);
OCFS2_BH_IGNORE_CACHE, NULL);
if (ret == 0) {
spin_lock(&osb->osb_lock);
ocfs2_update_slot_info(si);
@ -405,7 +405,7 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
bh = NULL; /* Acquire a fresh bh */
status = ocfs2_read_blocks(si->si_inode, blkno, 1, &bh,
OCFS2_BH_IGNORE_CACHE);
OCFS2_BH_IGNORE_CACHE, NULL);
if (status < 0) {
mlog_errno(status);
goto bail;

Просмотреть файл

@ -145,14 +145,6 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl)
return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc);
}
int ocfs2_validate_group_descriptor(struct super_block *sb,
struct ocfs2_dinode *di,
struct buffer_head *bh,
int clean_error)
{
unsigned int max_bits;
struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
#define do_error(fmt, ...) \
do{ \
if (clean_error) \
@ -161,6 +153,12 @@ int ocfs2_validate_group_descriptor(struct super_block *sb,
ocfs2_error(sb, fmt, ##__VA_ARGS__); \
} while (0)
static int ocfs2_validate_gd_self(struct super_block *sb,
struct buffer_head *bh,
int clean_error)
{
struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
do_error("Group descriptor #%llu has bad signature %.*s",
(unsigned long long)bh->b_blocknr, 7,
@ -184,6 +182,35 @@ int ocfs2_validate_group_descriptor(struct super_block *sb,
return -EINVAL;
}
if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) {
do_error("Group descriptor #%llu has bit count %u but "
"claims that %u are free",
(unsigned long long)bh->b_blocknr,
le16_to_cpu(gd->bg_bits),
le16_to_cpu(gd->bg_free_bits_count));
return -EINVAL;
}
if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) {
do_error("Group descriptor #%llu has bit count %u but "
"max bitmap bits of %u",
(unsigned long long)bh->b_blocknr,
le16_to_cpu(gd->bg_bits),
8 * le16_to_cpu(gd->bg_size));
return -EINVAL;
}
return 0;
}
static int ocfs2_validate_gd_parent(struct super_block *sb,
struct ocfs2_dinode *di,
struct buffer_head *bh,
int clean_error)
{
unsigned int max_bits;
struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
if (di->i_blkno != gd->bg_parent_dinode) {
do_error("Group descriptor #%llu has bad parent "
"pointer (%llu, expected %llu)",
@ -209,26 +236,35 @@ int ocfs2_validate_group_descriptor(struct super_block *sb,
return -EINVAL;
}
if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) {
do_error("Group descriptor #%llu has bit count %u but "
"claims that %u are free",
(unsigned long long)bh->b_blocknr,
le16_to_cpu(gd->bg_bits),
le16_to_cpu(gd->bg_free_bits_count));
return -EINVAL;
return 0;
}
if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) {
do_error("Group descriptor #%llu has bit count %u but "
"max bitmap bits of %u",
(unsigned long long)bh->b_blocknr,
le16_to_cpu(gd->bg_bits),
8 * le16_to_cpu(gd->bg_size));
return -EINVAL;
}
#undef do_error
return 0;
/*
* This version only prints errors. It does not fail the filesystem, and
* exists only for resize.
*/
int ocfs2_check_group_descriptor(struct super_block *sb,
struct ocfs2_dinode *di,
struct buffer_head *bh)
{
int rc;
rc = ocfs2_validate_gd_self(sb, bh, 1);
if (!rc)
rc = ocfs2_validate_gd_parent(sb, di, bh, 1);
return rc;
}
static int ocfs2_validate_group_descriptor(struct super_block *sb,
struct buffer_head *bh)
{
mlog(0, "Validating group descriptor %llu\n",
(unsigned long long)bh->b_blocknr);
return ocfs2_validate_gd_self(sb, bh, 0);
}
int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di,
@ -237,11 +273,12 @@ int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di,
int rc;
struct buffer_head *tmp = *bh;
rc = ocfs2_read_block(inode, gd_blkno, &tmp);
rc = ocfs2_read_block(inode, gd_blkno, &tmp,
ocfs2_validate_group_descriptor);
if (rc)
goto out;
rc = ocfs2_validate_group_descriptor(inode->i_sb, di, tmp, 0);
rc = ocfs2_validate_gd_parent(inode->i_sb, di, tmp, 0);
if (rc) {
brelse(tmp);
goto out;

Просмотреть файл

@ -165,16 +165,15 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac);
u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster);
/*
* By default, ocfs2_validate_group_descriptor() calls ocfs2_error() when it
* By default, ocfs2_read_group_descriptor() calls ocfs2_error() when it
* finds a problem. A caller that wants to check a group descriptor
* without going readonly passes a nonzero clean_error. This is only
* resize, really. Everyone else should be using
* ocfs2_read_group_descriptor().
* without going readonly should read the block with ocfs2_read_block[s]()
* and then checking it with this function. This is only resize, really.
* Everyone else should be using ocfs2_read_group_descriptor().
*/
int ocfs2_validate_group_descriptor(struct super_block *sb,
int ocfs2_check_group_descriptor(struct super_block *sb,
struct ocfs2_dinode *di,
struct buffer_head *bh,
int clean_error);
struct buffer_head *bh);
/*
* Read a group descriptor block into *bh. If *bh is NULL, a bh will be
* allocated. This is a cached read. The descriptor will be validated with

Просмотреть файл

@ -266,7 +266,8 @@ static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
int rc;
rc = ocfs2_read_blocks(bucket->bu_inode, xb_blkno,
bucket->bu_blocks, bucket->bu_bhs, 0);
bucket->bu_blocks, bucket->bu_bhs, 0,
NULL);
if (rc)
ocfs2_xattr_bucket_relse(bucket);
return rc;
@ -359,12 +360,8 @@ static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno,
int rc;
struct buffer_head *tmp = *bh;
rc = ocfs2_read_block(inode, xb_blkno, &tmp);
if (!rc) {
rc = ocfs2_validate_xattr_block(inode->i_sb, tmp);
if (rc)
brelse(tmp);
}
rc = ocfs2_read_block(inode, xb_blkno, &tmp,
ocfs2_validate_xattr_block);
/* If ocfs2_read_block() got us a new bh, pass it up. */
if (!rc && !*bh)
@ -925,7 +922,7 @@ static int ocfs2_xattr_get_value_outside(struct inode *inode,
blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
/* Copy ocfs2_xattr_value */
for (i = 0; i < num_clusters * bpc; i++, blkno++) {
ret = ocfs2_read_block(inode, blkno, &bh);
ret = ocfs2_read_block(inode, blkno, &bh, NULL);
if (ret) {
mlog_errno(ret);
goto out;
@ -1174,7 +1171,7 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode,
blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
for (i = 0; i < num_clusters * bpc; i++, blkno++) {
ret = ocfs2_read_block(inode, blkno, &bh);
ret = ocfs2_read_block(inode, blkno, &bh, NULL);
if (ret) {
mlog_errno(ret);
goto out;
@ -2206,7 +2203,7 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode,
base = xis->base;
credits += OCFS2_INODE_UPDATE_CREDITS;
} else {
int i, block_off;
int i, block_off = 0;
xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
xe = xbs->here;
name_offset = le16_to_cpu(xe->xe_name_offset);
@ -2840,6 +2837,7 @@ static int ocfs2_find_xe_in_bucket(struct inode *inode,
break;
}
xe_name = bucket_block(bucket, block_off) + new_offset;
if (!memcmp(name, xe_name, name_len)) {
*xe_index = i;
@ -3598,7 +3596,7 @@ static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
goto out;
}
ret = ocfs2_read_block(inode, prev_blkno, &old_bh);
ret = ocfs2_read_block(inode, prev_blkno, &old_bh, NULL);
if (ret < 0) {
mlog_errno(ret);
brelse(new_bh);
@ -3990,7 +3988,7 @@ static int ocfs2_cp_xattr_cluster(struct inode *inode,
ocfs2_journal_dirty(handle, first_bh);
/* update the new bucket header. */
ret = ocfs2_read_block(inode, to_blk_start, &bh);
ret = ocfs2_read_block(inode, to_blk_start, &bh, NULL);
if (ret < 0) {
mlog_errno(ret);
goto out;
@ -4337,7 +4335,7 @@ static int ocfs2_add_new_xattr_bucket(struct inode *inode,
goto out;
}
ret = ocfs2_read_block(inode, p_blkno, &first_bh);
ret = ocfs2_read_block(inode, p_blkno, &first_bh, NULL);
if (ret) {
mlog_errno(ret);
goto out;
@ -4635,7 +4633,7 @@ static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
value_blk += header_bh->b_blocknr;
ret = ocfs2_read_block(inode, value_blk, &value_bh);
ret = ocfs2_read_block(inode, value_blk, &value_bh, NULL);
if (ret) {
mlog_errno(ret);
goto out;