Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (50 commits)
  jbd2: sparse pointer use of zero as null
  jbd2: Use round-jiffies() function for the "5 second" ext4/jbd2 wakeup
  jbd2: Mark jbd2 slabs as SLAB_TEMPORARY
  jbd2: add lockdep support
  ext4: Use the ext4_ext_actual_len() helper function
  ext4: fix uniniatilized extent splitting error
  ext4: Check for return value from sb_set_blocksize
  ext4: Add stripe= option to /proc/mounts
  ext4: Enable the multiblock allocator by default
  ext4: Add multi block allocator for ext4
  ext4: Add new functions for searching extent tree
  ext4: Add ext4_find_next_bit()
  ext4: fix up EXT4FS_DEBUG builds
  ext4: Fix ext4_show_options to show the correct mount options.
  ext4: Add EXT4_IOC_MIGRATE ioctl
  ext4: Add inode version support in ext4
  vfs: Add 64 bit i_version support
  ext4: Add the journal checksum feature
  jbd2: jbd2 stats through procfs
  ext4: Take read lock during overwrite case.
  ...
This commit is contained in:
Linus Torvalds 2008-01-29 22:43:38 +11:00
Родитель 6b11d8179d 4019191be7
Коммит 8cd226ca3f
46 изменённых файлов: 7682 добавлений и 807 удалений

Просмотреть файл

@ -86,9 +86,21 @@ Alex is working on a new set of patches right now.
When mounting an ext4 filesystem, the following option are accepted: When mounting an ext4 filesystem, the following option are accepted:
(*) == default (*) == default
extents ext4 will use extents to address file data. The extents (*) ext4 will use extents to address file data. The
file system will no longer be mountable by ext3. file system will no longer be mountable by ext3.
noextents ext4 will not use extents for newly created files
journal_checksum Enable checksumming of the journal transactions.
This will allow the recovery code in e2fsck and the
kernel to detect corruption in the kernel. It is a
compatible change and will be ignored by older kernels.
journal_async_commit Commit block can be written to disk without waiting
for descriptor blocks. If enabled older kernels cannot
mount the device. This will enable 'journal_checksum'
internally.
journal=update Update the ext4 file system's journal to the current journal=update Update the ext4 file system's journal to the current
format. format.
@ -196,6 +208,12 @@ nobh (a) cache disk block mapping information
"nobh" option tries to avoid associating buffer "nobh" option tries to avoid associating buffer
heads (supported only for "writeback" mode). heads (supported only for "writeback" mode).
mballoc (*) Use the multiple block allocator for block allocation
nomballoc disabled multiple block allocator for block allocation.
stripe=n Number of filesystem blocks that mballoc will try
to use for allocation size and alignment. For RAID5/6
systems this should be the number of data
disks * RAID chunk size in file system blocks.
Data Mode Data Mode
--------- ---------

Просмотреть файл

@ -857,6 +857,45 @@ CPUs.
The "procs_blocked" line gives the number of processes currently blocked, The "procs_blocked" line gives the number of processes currently blocked,
waiting for I/O to complete. waiting for I/O to complete.
1.9 Ext4 file system parameters
------------------------------
Ext4 file system have one directory per partition under /proc/fs/ext4/
# ls /proc/fs/ext4/hdc/
group_prealloc max_to_scan mb_groups mb_history min_to_scan order2_req
stats stream_req
mb_groups:
This file gives the details of mutiblock allocator buddy cache of free blocks
mb_history:
Multiblock allocation history.
stats:
This file indicate whether the multiblock allocator should start collecting
statistics. The statistics are shown during unmount
group_prealloc:
The multiblock allocator normalize the block allocation request to
group_prealloc filesystem blocks if we don't have strip value set.
The stripe value can be specified at mount time or during mke2fs.
max_to_scan:
How long multiblock allocator can look for a best extent (in found extents)
min_to_scan:
How long multiblock allocator must look for a best extent
order2_req:
Multiblock allocator use 2^N search using buddies only for requests greater
than or equal to order2_req. The request size is specfied in file system
blocks. A value of 2 indicate only if the requests are greater than or equal
to 4 blocks.
stream_req:
Files smaller than stream_req are served by the stream allocator, whose
purpose is to pack requests as close each to other as possible to
produce smooth I/O traffic. Avalue of 16 indicate that file smaller than 16
filesystem block size will use group based preallocation.
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
Summary Summary

Просмотреть файл

@ -236,6 +236,7 @@ config JBD_DEBUG
config JBD2 config JBD2
tristate tristate
select CRC32
help help
This is a generic journaling layer for block devices that support This is a generic journaling layer for block devices that support
both 32-bit and 64-bit block numbers. It is currently used by both 32-bit and 64-bit block numbers. It is currently used by

Просмотреть файл

@ -546,11 +546,11 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
dentry->d_op = &afs_fs_dentry_operations; dentry->d_op = &afs_fs_dentry_operations;
d_add(dentry, inode); d_add(dentry, inode);
_leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%lu }", _leave(" = 0 { vn=%u u=%u } -> { ino=%lu v=%llu }",
fid.vnode, fid.vnode,
fid.unique, fid.unique,
dentry->d_inode->i_ino, dentry->d_inode->i_ino,
dentry->d_inode->i_version); (unsigned long long)dentry->d_inode->i_version);
return NULL; return NULL;
} }
@ -630,9 +630,10 @@ static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
* been deleted and replaced, and the original vnode ID has * been deleted and replaced, and the original vnode ID has
* been reused */ * been reused */
if (fid.unique != vnode->fid.unique) { if (fid.unique != vnode->fid.unique) {
_debug("%s: file deleted (uq %u -> %u I:%lu)", _debug("%s: file deleted (uq %u -> %u I:%llu)",
dentry->d_name.name, fid.unique, dentry->d_name.name, fid.unique,
vnode->fid.unique, dentry->d_inode->i_version); vnode->fid.unique,
(unsigned long long)dentry->d_inode->i_version);
spin_lock(&vnode->lock); spin_lock(&vnode->lock);
set_bit(AFS_VNODE_DELETED, &vnode->flags); set_bit(AFS_VNODE_DELETED, &vnode->flags);
spin_unlock(&vnode->lock); spin_unlock(&vnode->lock);

Просмотреть файл

@ -301,7 +301,8 @@ int afs_getattr(struct vfsmount *mnt, struct dentry *dentry,
inode = dentry->d_inode; inode = dentry->d_inode;
_enter("{ ino=%lu v=%lu }", inode->i_ino, inode->i_version); _enter("{ ino=%lu v=%llu }", inode->i_ino,
(unsigned long long)inode->i_version);
generic_fillattr(inode, stat); generic_fillattr(inode, stat);
return 0; return 0;

Просмотреть файл

@ -3213,6 +3213,50 @@ static int buffer_cpu_notify(struct notifier_block *self,
return NOTIFY_OK; return NOTIFY_OK;
} }
/**
* bh_uptodate_or_lock: Test whether the buffer is uptodate
* @bh: struct buffer_head
*
* Return true if the buffer is up-to-date and false,
* with the buffer locked, if not.
*/
int bh_uptodate_or_lock(struct buffer_head *bh)
{
if (!buffer_uptodate(bh)) {
lock_buffer(bh);
if (!buffer_uptodate(bh))
return 0;
unlock_buffer(bh);
}
return 1;
}
EXPORT_SYMBOL(bh_uptodate_or_lock);
/**
* bh_submit_read: Submit a locked buffer for reading
* @bh: struct buffer_head
*
* Returns zero on success and -EIO on error.
*/
int bh_submit_read(struct buffer_head *bh)
{
BUG_ON(!buffer_locked(bh));
if (buffer_uptodate(bh)) {
unlock_buffer(bh);
return 0;
}
get_bh(bh);
bh->b_end_io = end_buffer_read_sync;
submit_bh(READ, bh);
wait_on_buffer(bh);
if (buffer_uptodate(bh))
return 0;
return -EIO;
}
EXPORT_SYMBOL(bh_submit_read);
void __init buffer_init(void) void __init buffer_init(void)
{ {
int nrpages; int nrpages;

Просмотреть файл

@ -680,11 +680,31 @@ static int ext2_check_descriptors (struct super_block * sb)
static loff_t ext2_max_size(int bits) static loff_t ext2_max_size(int bits)
{ {
loff_t res = EXT2_NDIR_BLOCKS; loff_t res = EXT2_NDIR_BLOCKS;
/* This constant is calculated to be the largest file size for a int meta_blocks;
* dense, 4k-blocksize file such that the total number of loff_t upper_limit;
/* This is calculated to be the largest file size for a
* dense, file such that the total number of
* sectors in the file, including data and all indirect blocks, * sectors in the file, including data and all indirect blocks,
* does not exceed 2^32. */ * does not exceed 2^32 -1
const loff_t upper_limit = 0x1ff7fffd000LL; * __u32 i_blocks representing the total number of
* 512 bytes blocks of the file
*/
upper_limit = (1LL << 32) - 1;
/* total blocks in file system block size */
upper_limit >>= (bits - 9);
/* indirect blocks */
meta_blocks = 1;
/* double indirect blocks */
meta_blocks += 1 + (1LL << (bits-2));
/* tripple indirect blocks */
meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
upper_limit -= meta_blocks;
upper_limit <<= bits;
res += 1LL << (bits-2); res += 1LL << (bits-2);
res += 1LL << (2*(bits-2)); res += 1LL << (2*(bits-2));
@ -692,6 +712,10 @@ static loff_t ext2_max_size(int bits)
res <<= bits; res <<= bits;
if (res > upper_limit) if (res > upper_limit)
res = upper_limit; res = upper_limit;
if (res > MAX_LFS_FILESIZE)
res = MAX_LFS_FILESIZE;
return res; return res;
} }

Просмотреть файл

@ -1436,11 +1436,31 @@ static void ext3_orphan_cleanup (struct super_block * sb,
static loff_t ext3_max_size(int bits) static loff_t ext3_max_size(int bits)
{ {
loff_t res = EXT3_NDIR_BLOCKS; loff_t res = EXT3_NDIR_BLOCKS;
/* This constant is calculated to be the largest file size for a int meta_blocks;
* dense, 4k-blocksize file such that the total number of loff_t upper_limit;
/* This is calculated to be the largest file size for a
* dense, file such that the total number of
* sectors in the file, including data and all indirect blocks, * sectors in the file, including data and all indirect blocks,
* does not exceed 2^32. */ * does not exceed 2^32 -1
const loff_t upper_limit = 0x1ff7fffd000LL; * __u32 i_blocks representing the total number of
* 512 bytes blocks of the file
*/
upper_limit = (1LL << 32) - 1;
/* total blocks in file system block size */
upper_limit >>= (bits - 9);
/* indirect blocks */
meta_blocks = 1;
/* double indirect blocks */
meta_blocks += 1 + (1LL << (bits-2));
/* tripple indirect blocks */
meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
upper_limit -= meta_blocks;
upper_limit <<= bits;
res += 1LL << (bits-2); res += 1LL << (bits-2);
res += 1LL << (2*(bits-2)); res += 1LL << (2*(bits-2));
@ -1448,6 +1468,10 @@ static loff_t ext3_max_size(int bits)
res <<= bits; res <<= bits;
if (res > upper_limit) if (res > upper_limit)
res = upper_limit; res = upper_limit;
if (res > MAX_LFS_FILESIZE)
res = MAX_LFS_FILESIZE;
return res; return res;
} }

Просмотреть файл

@ -6,7 +6,7 @@ obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o
ext4dev-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ ext4dev-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
ext4_jbd2.o ext4_jbd2.o migrate.o mballoc.o
ext4dev-$(CONFIG_EXT4DEV_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o ext4dev-$(CONFIG_EXT4DEV_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL) += acl.o ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL) += acl.o

Просмотреть файл

@ -29,7 +29,7 @@
* Calculate the block group number and offset, given a block number * Calculate the block group number and offset, given a block number
*/ */
void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
unsigned long *blockgrpp, ext4_grpblk_t *offsetp) ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp)
{ {
struct ext4_super_block *es = EXT4_SB(sb)->s_es; struct ext4_super_block *es = EXT4_SB(sb)->s_es;
ext4_grpblk_t offset; ext4_grpblk_t offset;
@ -46,7 +46,7 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
/* Initializes an uninitialized block bitmap if given, and returns the /* Initializes an uninitialized block bitmap if given, and returns the
* number of blocks free in the group. */ * number of blocks free in the group. */
unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
int block_group, struct ext4_group_desc *gdp) ext4_group_t block_group, struct ext4_group_desc *gdp)
{ {
unsigned long start; unsigned long start;
int bit, bit_max; int bit, bit_max;
@ -60,7 +60,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
* essentially implementing a per-group read-only flag. */ * essentially implementing a per-group read-only flag. */
if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
ext4_error(sb, __FUNCTION__, ext4_error(sb, __FUNCTION__,
"Checksum bad for group %u\n", block_group); "Checksum bad for group %lu\n", block_group);
gdp->bg_free_blocks_count = 0; gdp->bg_free_blocks_count = 0;
gdp->bg_free_inodes_count = 0; gdp->bg_free_inodes_count = 0;
gdp->bg_itable_unused = 0; gdp->bg_itable_unused = 0;
@ -153,7 +153,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
* group descriptor * group descriptor
*/ */
struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
unsigned int block_group, ext4_group_t block_group,
struct buffer_head ** bh) struct buffer_head ** bh)
{ {
unsigned long group_desc; unsigned long group_desc;
@ -164,7 +164,7 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
if (block_group >= sbi->s_groups_count) { if (block_group >= sbi->s_groups_count) {
ext4_error (sb, "ext4_get_group_desc", ext4_error (sb, "ext4_get_group_desc",
"block_group >= groups_count - " "block_group >= groups_count - "
"block_group = %d, groups_count = %lu", "block_group = %lu, groups_count = %lu",
block_group, sbi->s_groups_count); block_group, sbi->s_groups_count);
return NULL; return NULL;
@ -176,7 +176,7 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
if (!sbi->s_group_desc[group_desc]) { if (!sbi->s_group_desc[group_desc]) {
ext4_error (sb, "ext4_get_group_desc", ext4_error (sb, "ext4_get_group_desc",
"Group descriptor not loaded - " "Group descriptor not loaded - "
"block_group = %d, group_desc = %lu, desc = %lu", "block_group = %lu, group_desc = %lu, desc = %lu",
block_group, group_desc, offset); block_group, group_desc, offset);
return NULL; return NULL;
} }
@ -189,18 +189,70 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
return desc; return desc;
} }
static int ext4_valid_block_bitmap(struct super_block *sb,
struct ext4_group_desc *desc,
unsigned int block_group,
struct buffer_head *bh)
{
ext4_grpblk_t offset;
ext4_grpblk_t next_zero_bit;
ext4_fsblk_t bitmap_blk;
ext4_fsblk_t group_first_block;
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
/* with FLEX_BG, the inode/block bitmaps and itable
* blocks may not be in the group at all
* so the bitmap validation will be skipped for those groups
* or it has to also read the block group where the bitmaps
* are located to verify they are set.
*/
return 1;
}
group_first_block = ext4_group_first_block_no(sb, block_group);
/* check whether block bitmap block number is set */
bitmap_blk = ext4_block_bitmap(sb, desc);
offset = bitmap_blk - group_first_block;
if (!ext4_test_bit(offset, bh->b_data))
/* bad block bitmap */
goto err_out;
/* check whether the inode bitmap block number is set */
bitmap_blk = ext4_inode_bitmap(sb, desc);
offset = bitmap_blk - group_first_block;
if (!ext4_test_bit(offset, bh->b_data))
/* bad block bitmap */
goto err_out;
/* check whether the inode table block number is set */
bitmap_blk = ext4_inode_table(sb, desc);
offset = bitmap_blk - group_first_block;
next_zero_bit = ext4_find_next_zero_bit(bh->b_data,
offset + EXT4_SB(sb)->s_itb_per_group,
offset);
if (next_zero_bit >= offset + EXT4_SB(sb)->s_itb_per_group)
/* good bitmap for inode tables */
return 1;
err_out:
ext4_error(sb, __FUNCTION__,
"Invalid block bitmap - "
"block_group = %d, block = %llu",
block_group, bitmap_blk);
return 0;
}
/** /**
* read_block_bitmap() * read_block_bitmap()
* @sb: super block * @sb: super block
* @block_group: given block group * @block_group: given block group
* *
* Read the bitmap for a given block_group, reading into the specified * Read the bitmap for a given block_group,and validate the
* slot in the superblock's bitmap cache. * bits for block/inode/inode tables are set in the bitmaps
* *
* Return buffer_head on success or NULL in case of failure. * Return buffer_head on success or NULL in case of failure.
*/ */
struct buffer_head * struct buffer_head *
read_block_bitmap(struct super_block *sb, unsigned int block_group) read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
{ {
struct ext4_group_desc * desc; struct ext4_group_desc * desc;
struct buffer_head * bh = NULL; struct buffer_head * bh = NULL;
@ -210,25 +262,36 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group)
if (!desc) if (!desc)
return NULL; return NULL;
bitmap_blk = ext4_block_bitmap(sb, desc); bitmap_blk = ext4_block_bitmap(sb, desc);
if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { bh = sb_getblk(sb, bitmap_blk);
bh = sb_getblk(sb, bitmap_blk); if (unlikely(!bh)) {
if (!buffer_uptodate(bh)) { ext4_error(sb, __FUNCTION__,
lock_buffer(bh);
if (!buffer_uptodate(bh)) {
ext4_init_block_bitmap(sb, bh, block_group,
desc);
set_buffer_uptodate(bh);
}
unlock_buffer(bh);
}
} else {
bh = sb_bread(sb, bitmap_blk);
}
if (!bh)
ext4_error (sb, __FUNCTION__,
"Cannot read block bitmap - " "Cannot read block bitmap - "
"block_group = %d, block_bitmap = %llu", "block_group = %d, block_bitmap = %llu",
block_group, bitmap_blk); (int)block_group, (unsigned long long)bitmap_blk);
return NULL;
}
if (bh_uptodate_or_lock(bh))
return bh;
if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
ext4_init_block_bitmap(sb, bh, block_group, desc);
set_buffer_uptodate(bh);
unlock_buffer(bh);
return bh;
}
if (bh_submit_read(bh) < 0) {
put_bh(bh);
ext4_error(sb, __FUNCTION__,
"Cannot read block bitmap - "
"block_group = %d, block_bitmap = %llu",
(int)block_group, (unsigned long long)bitmap_blk);
return NULL;
}
if (!ext4_valid_block_bitmap(sb, desc, block_group, bh)) {
put_bh(bh);
return NULL;
}
return bh; return bh;
} }
/* /*
@ -320,7 +383,7 @@ restart:
*/ */
static int static int
goal_in_my_reservation(struct ext4_reserve_window *rsv, ext4_grpblk_t grp_goal, goal_in_my_reservation(struct ext4_reserve_window *rsv, ext4_grpblk_t grp_goal,
unsigned int group, struct super_block * sb) ext4_group_t group, struct super_block *sb)
{ {
ext4_fsblk_t group_first_block, group_last_block; ext4_fsblk_t group_first_block, group_last_block;
@ -463,7 +526,7 @@ static inline int rsv_is_empty(struct ext4_reserve_window *rsv)
* when setting the reservation window size through ioctl before the file * when setting the reservation window size through ioctl before the file
* is open for write (needs block allocation). * is open for write (needs block allocation).
* *
* Needs truncate_mutex protection prior to call this function. * Needs down_write(i_data_sem) protection prior to call this function.
*/ */
void ext4_init_block_alloc_info(struct inode *inode) void ext4_init_block_alloc_info(struct inode *inode)
{ {
@ -514,6 +577,8 @@ void ext4_discard_reservation(struct inode *inode)
struct ext4_reserve_window_node *rsv; struct ext4_reserve_window_node *rsv;
spinlock_t *rsv_lock = &EXT4_SB(inode->i_sb)->s_rsv_window_lock; spinlock_t *rsv_lock = &EXT4_SB(inode->i_sb)->s_rsv_window_lock;
ext4_mb_discard_inode_preallocations(inode);
if (!block_i) if (!block_i)
return; return;
@ -540,7 +605,7 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
{ {
struct buffer_head *bitmap_bh = NULL; struct buffer_head *bitmap_bh = NULL;
struct buffer_head *gd_bh; struct buffer_head *gd_bh;
unsigned long block_group; ext4_group_t block_group;
ext4_grpblk_t bit; ext4_grpblk_t bit;
unsigned long i; unsigned long i;
unsigned long overflow; unsigned long overflow;
@ -587,11 +652,13 @@ do_more:
in_range(ext4_inode_bitmap(sb, desc), block, count) || in_range(ext4_inode_bitmap(sb, desc), block, count) ||
in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
in_range(block + count - 1, ext4_inode_table(sb, desc), in_range(block + count - 1, ext4_inode_table(sb, desc),
sbi->s_itb_per_group)) sbi->s_itb_per_group)) {
ext4_error (sb, "ext4_free_blocks", ext4_error (sb, "ext4_free_blocks",
"Freeing blocks in system zones - " "Freeing blocks in system zones - "
"Block = %llu, count = %lu", "Block = %llu, count = %lu",
block, count); block, count);
goto error_return;
}
/* /*
* We are about to start releasing blocks in the bitmap, * We are about to start releasing blocks in the bitmap,
@ -720,19 +787,29 @@ error_return:
* @inode: inode * @inode: inode
* @block: start physical block to free * @block: start physical block to free
* @count: number of blocks to count * @count: number of blocks to count
* @metadata: Are these metadata blocks
*/ */
void ext4_free_blocks(handle_t *handle, struct inode *inode, void ext4_free_blocks(handle_t *handle, struct inode *inode,
ext4_fsblk_t block, unsigned long count) ext4_fsblk_t block, unsigned long count,
int metadata)
{ {
struct super_block * sb; struct super_block * sb;
unsigned long dquot_freed_blocks; unsigned long dquot_freed_blocks;
/* this isn't the right place to decide whether block is metadata
* inode.c/extents.c knows better, but for safety ... */
if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) ||
ext4_should_journal_data(inode))
metadata = 1;
sb = inode->i_sb; sb = inode->i_sb;
if (!sb) {
printk ("ext4_free_blocks: nonexistent device"); if (!test_opt(sb, MBALLOC) || !EXT4_SB(sb)->s_group_info)
return; ext4_free_blocks_sb(handle, sb, block, count,
} &dquot_freed_blocks);
ext4_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks); else
ext4_mb_free_blocks(handle, inode, block, count,
metadata, &dquot_freed_blocks);
if (dquot_freed_blocks) if (dquot_freed_blocks)
DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
return; return;
@ -920,9 +997,10 @@ claim_block(spinlock_t *lock, ext4_grpblk_t block, struct buffer_head *bh)
* ext4_journal_release_buffer(), else we'll run out of credits. * ext4_journal_release_buffer(), else we'll run out of credits.
*/ */
static ext4_grpblk_t static ext4_grpblk_t
ext4_try_to_allocate(struct super_block *sb, handle_t *handle, int group, ext4_try_to_allocate(struct super_block *sb, handle_t *handle,
struct buffer_head *bitmap_bh, ext4_grpblk_t grp_goal, ext4_group_t group, struct buffer_head *bitmap_bh,
unsigned long *count, struct ext4_reserve_window *my_rsv) ext4_grpblk_t grp_goal, unsigned long *count,
struct ext4_reserve_window *my_rsv)
{ {
ext4_fsblk_t group_first_block; ext4_fsblk_t group_first_block;
ext4_grpblk_t start, end; ext4_grpblk_t start, end;
@ -1156,7 +1234,7 @@ static int find_next_reservable_window(
*/ */
static int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv, static int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv,
ext4_grpblk_t grp_goal, struct super_block *sb, ext4_grpblk_t grp_goal, struct super_block *sb,
unsigned int group, struct buffer_head *bitmap_bh) ext4_group_t group, struct buffer_head *bitmap_bh)
{ {
struct ext4_reserve_window_node *search_head; struct ext4_reserve_window_node *search_head;
ext4_fsblk_t group_first_block, group_end_block, start_block; ext4_fsblk_t group_first_block, group_end_block, start_block;
@ -1354,7 +1432,7 @@ static void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv,
*/ */
static ext4_grpblk_t static ext4_grpblk_t
ext4_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, ext4_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
unsigned int group, struct buffer_head *bitmap_bh, ext4_group_t group, struct buffer_head *bitmap_bh,
ext4_grpblk_t grp_goal, ext4_grpblk_t grp_goal,
struct ext4_reserve_window_node * my_rsv, struct ext4_reserve_window_node * my_rsv,
unsigned long *count, int *errp) unsigned long *count, int *errp)
@ -1510,7 +1588,7 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
} }
/** /**
* ext4_new_blocks() -- core block(s) allocation function * ext4_new_blocks_old() -- core block(s) allocation function
* @handle: handle to this transaction * @handle: handle to this transaction
* @inode: file inode * @inode: file inode
* @goal: given target block(filesystem wide) * @goal: given target block(filesystem wide)
@ -1523,17 +1601,17 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
* any specific goal block. * any specific goal block.
* *
*/ */
ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode, ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
ext4_fsblk_t goal, unsigned long *count, int *errp) ext4_fsblk_t goal, unsigned long *count, int *errp)
{ {
struct buffer_head *bitmap_bh = NULL; struct buffer_head *bitmap_bh = NULL;
struct buffer_head *gdp_bh; struct buffer_head *gdp_bh;
unsigned long group_no; ext4_group_t group_no;
int goal_group; ext4_group_t goal_group;
ext4_grpblk_t grp_target_blk; /* blockgroup relative goal block */ ext4_grpblk_t grp_target_blk; /* blockgroup relative goal block */
ext4_grpblk_t grp_alloc_blk; /* blockgroup-relative allocated block*/ ext4_grpblk_t grp_alloc_blk; /* blockgroup-relative allocated block*/
ext4_fsblk_t ret_block; /* filesyetem-wide allocated block */ ext4_fsblk_t ret_block; /* filesyetem-wide allocated block */
int bgi; /* blockgroup iteration index */ ext4_group_t bgi; /* blockgroup iteration index */
int fatal = 0, err; int fatal = 0, err;
int performed_allocation = 0; int performed_allocation = 0;
ext4_grpblk_t free_blocks; /* number of free blocks in a group */ ext4_grpblk_t free_blocks; /* number of free blocks in a group */
@ -1544,10 +1622,7 @@ ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
struct ext4_reserve_window_node *my_rsv = NULL; struct ext4_reserve_window_node *my_rsv = NULL;
struct ext4_block_alloc_info *block_i; struct ext4_block_alloc_info *block_i;
unsigned short windowsz = 0; unsigned short windowsz = 0;
#ifdef EXT4FS_DEBUG ext4_group_t ngroups;
static int goal_hits, goal_attempts;
#endif
unsigned long ngroups;
unsigned long num = *count; unsigned long num = *count;
*errp = -ENOSPC; *errp = -ENOSPC;
@ -1567,7 +1642,7 @@ ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
sbi = EXT4_SB(sb); sbi = EXT4_SB(sb);
es = EXT4_SB(sb)->s_es; es = EXT4_SB(sb)->s_es;
ext4_debug("goal=%lu.\n", goal); ext4_debug("goal=%llu.\n", goal);
/* /*
* Allocate a block from reservation only when * Allocate a block from reservation only when
* filesystem is mounted with reservation(default,-o reservation), and * filesystem is mounted with reservation(default,-o reservation), and
@ -1677,7 +1752,7 @@ retry_alloc:
allocated: allocated:
ext4_debug("using block group %d(%d)\n", ext4_debug("using block group %lu(%d)\n",
group_no, gdp->bg_free_blocks_count); group_no, gdp->bg_free_blocks_count);
BUFFER_TRACE(gdp_bh, "get_write_access"); BUFFER_TRACE(gdp_bh, "get_write_access");
@ -1692,11 +1767,13 @@ allocated:
in_range(ret_block, ext4_inode_table(sb, gdp), in_range(ret_block, ext4_inode_table(sb, gdp),
EXT4_SB(sb)->s_itb_per_group) || EXT4_SB(sb)->s_itb_per_group) ||
in_range(ret_block + num - 1, ext4_inode_table(sb, gdp), in_range(ret_block + num - 1, ext4_inode_table(sb, gdp),
EXT4_SB(sb)->s_itb_per_group)) EXT4_SB(sb)->s_itb_per_group)) {
ext4_error(sb, "ext4_new_block", ext4_error(sb, "ext4_new_block",
"Allocating block in system zone - " "Allocating block in system zone - "
"blocks from %llu, length %lu", "blocks from %llu, length %lu",
ret_block, num); ret_block, num);
goto out;
}
performed_allocation = 1; performed_allocation = 1;
@ -1743,9 +1820,6 @@ allocated:
* list of some description. We don't know in advance whether * list of some description. We don't know in advance whether
* the caller wants to use it as metadata or data. * the caller wants to use it as metadata or data.
*/ */
ext4_debug("allocating block %lu. Goal hits %d of %d.\n",
ret_block, goal_hits, goal_attempts);
spin_lock(sb_bgl_lock(sbi, group_no)); spin_lock(sb_bgl_lock(sbi, group_no));
if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
@ -1787,13 +1861,46 @@ out:
} }
ext4_fsblk_t ext4_new_block(handle_t *handle, struct inode *inode, ext4_fsblk_t ext4_new_block(handle_t *handle, struct inode *inode,
ext4_fsblk_t goal, int *errp) ext4_fsblk_t goal, int *errp)
{ {
unsigned long count = 1; struct ext4_allocation_request ar;
ext4_fsblk_t ret;
return ext4_new_blocks(handle, inode, goal, &count, errp); if (!test_opt(inode->i_sb, MBALLOC)) {
unsigned long count = 1;
ret = ext4_new_blocks_old(handle, inode, goal, &count, errp);
return ret;
}
memset(&ar, 0, sizeof(ar));
ar.inode = inode;
ar.goal = goal;
ar.len = 1;
ret = ext4_mb_new_blocks(handle, &ar, errp);
return ret;
} }
ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
ext4_fsblk_t goal, unsigned long *count, int *errp)
{
struct ext4_allocation_request ar;
ext4_fsblk_t ret;
if (!test_opt(inode->i_sb, MBALLOC)) {
ret = ext4_new_blocks_old(handle, inode, goal, count, errp);
return ret;
}
memset(&ar, 0, sizeof(ar));
ar.inode = inode;
ar.goal = goal;
ar.len = *count;
ret = ext4_mb_new_blocks(handle, &ar, errp);
*count = ar.len;
return ret;
}
/** /**
* ext4_count_free_blocks() -- count filesystem free blocks * ext4_count_free_blocks() -- count filesystem free blocks
* @sb: superblock * @sb: superblock
@ -1804,8 +1911,8 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
{ {
ext4_fsblk_t desc_count; ext4_fsblk_t desc_count;
struct ext4_group_desc *gdp; struct ext4_group_desc *gdp;
int i; ext4_group_t i;
unsigned long ngroups = EXT4_SB(sb)->s_groups_count; ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
#ifdef EXT4FS_DEBUG #ifdef EXT4FS_DEBUG
struct ext4_super_block *es; struct ext4_super_block *es;
ext4_fsblk_t bitmap_count; ext4_fsblk_t bitmap_count;
@ -1829,14 +1936,14 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
continue; continue;
x = ext4_count_free(bitmap_bh, sb->s_blocksize); x = ext4_count_free(bitmap_bh, sb->s_blocksize);
printk("group %d: stored = %d, counted = %lu\n", printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n",
i, le16_to_cpu(gdp->bg_free_blocks_count), x); i, le16_to_cpu(gdp->bg_free_blocks_count), x);
bitmap_count += x; bitmap_count += x;
} }
brelse(bitmap_bh); brelse(bitmap_bh);
printk("ext4_count_free_blocks: stored = %llu" printk("ext4_count_free_blocks: stored = %llu"
", computed = %llu, %llu\n", ", computed = %llu, %llu\n",
EXT4_FREE_BLOCKS_COUNT(es), ext4_free_blocks_count(es),
desc_count, bitmap_count); desc_count, bitmap_count);
return bitmap_count; return bitmap_count;
#else #else
@ -1853,7 +1960,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
#endif #endif
} }
static inline int test_root(int a, int b) static inline int test_root(ext4_group_t a, int b)
{ {
int num = b; int num = b;
@ -1862,7 +1969,7 @@ static inline int test_root(int a, int b)
return num == a; return num == a;
} }
static int ext4_group_sparse(int group) static int ext4_group_sparse(ext4_group_t group)
{ {
if (group <= 1) if (group <= 1)
return 1; return 1;
@ -1880,7 +1987,7 @@ static int ext4_group_sparse(int group)
* Return the number of blocks used by the superblock (primary or backup) * Return the number of blocks used by the superblock (primary or backup)
* in this group. Currently this will be only 0 or 1. * in this group. Currently this will be only 0 or 1.
*/ */
int ext4_bg_has_super(struct super_block *sb, int group) int ext4_bg_has_super(struct super_block *sb, ext4_group_t group)
{ {
if (EXT4_HAS_RO_COMPAT_FEATURE(sb, if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER) && EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER) &&
@ -1889,18 +1996,20 @@ int ext4_bg_has_super(struct super_block *sb, int group)
return 1; return 1;
} }
static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb, int group) static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb,
ext4_group_t group)
{ {
unsigned long metagroup = group / EXT4_DESC_PER_BLOCK(sb); unsigned long metagroup = group / EXT4_DESC_PER_BLOCK(sb);
unsigned long first = metagroup * EXT4_DESC_PER_BLOCK(sb); ext4_group_t first = metagroup * EXT4_DESC_PER_BLOCK(sb);
unsigned long last = first + EXT4_DESC_PER_BLOCK(sb) - 1; ext4_group_t last = first + EXT4_DESC_PER_BLOCK(sb) - 1;
if (group == first || group == first + 1 || group == last) if (group == first || group == first + 1 || group == last)
return 1; return 1;
return 0; return 0;
} }
static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb, int group) static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb,
ext4_group_t group)
{ {
if (EXT4_HAS_RO_COMPAT_FEATURE(sb, if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER) && EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER) &&
@ -1918,7 +2027,7 @@ static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb, int group)
* (primary or backup) in this group. In the future there may be a * (primary or backup) in this group. In the future there may be a
* different number of descriptor blocks in each group. * different number of descriptor blocks in each group.
*/ */
unsigned long ext4_bg_num_gdb(struct super_block *sb, int group) unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group)
{ {
unsigned long first_meta_bg = unsigned long first_meta_bg =
le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg); le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg);

Просмотреть файл

@ -67,7 +67,7 @@ int ext4_check_dir_entry (const char * function, struct inode * dir,
unsigned long offset) unsigned long offset)
{ {
const char * error_msg = NULL; const char * error_msg = NULL;
const int rlen = le16_to_cpu(de->rec_len); const int rlen = ext4_rec_len_from_disk(de->rec_len);
if (rlen < EXT4_DIR_REC_LEN(1)) if (rlen < EXT4_DIR_REC_LEN(1))
error_msg = "rec_len is smaller than minimal"; error_msg = "rec_len is smaller than minimal";
@ -124,7 +124,7 @@ static int ext4_readdir(struct file * filp,
offset = filp->f_pos & (sb->s_blocksize - 1); offset = filp->f_pos & (sb->s_blocksize - 1);
while (!error && !stored && filp->f_pos < inode->i_size) { while (!error && !stored && filp->f_pos < inode->i_size) {
unsigned long blk = filp->f_pos >> EXT4_BLOCK_SIZE_BITS(sb); ext4_lblk_t blk = filp->f_pos >> EXT4_BLOCK_SIZE_BITS(sb);
struct buffer_head map_bh; struct buffer_head map_bh;
struct buffer_head *bh = NULL; struct buffer_head *bh = NULL;
@ -172,10 +172,10 @@ revalidate:
* least that it is non-zero. A * least that it is non-zero. A
* failure will be detected in the * failure will be detected in the
* dirent test below. */ * dirent test below. */
if (le16_to_cpu(de->rec_len) < if (ext4_rec_len_from_disk(de->rec_len)
EXT4_DIR_REC_LEN(1)) < EXT4_DIR_REC_LEN(1))
break; break;
i += le16_to_cpu(de->rec_len); i += ext4_rec_len_from_disk(de->rec_len);
} }
offset = i; offset = i;
filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1)) filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1))
@ -197,7 +197,7 @@ revalidate:
ret = stored; ret = stored;
goto out; goto out;
} }
offset += le16_to_cpu(de->rec_len); offset += ext4_rec_len_from_disk(de->rec_len);
if (le32_to_cpu(de->inode)) { if (le32_to_cpu(de->inode)) {
/* We might block in the next section /* We might block in the next section
* if the data destination is * if the data destination is
@ -219,7 +219,7 @@ revalidate:
goto revalidate; goto revalidate;
stored ++; stored ++;
} }
filp->f_pos += le16_to_cpu(de->rec_len); filp->f_pos += ext4_rec_len_from_disk(de->rec_len);
} }
offset = 0; offset = 0;
brelse (bh); brelse (bh);

Просмотреть файл

@ -61,7 +61,7 @@ static ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
* idx_pblock: * idx_pblock:
* combine low and high parts of a leaf physical block number into ext4_fsblk_t * combine low and high parts of a leaf physical block number into ext4_fsblk_t
*/ */
static ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix) ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
{ {
ext4_fsblk_t block; ext4_fsblk_t block;
@ -75,7 +75,7 @@ static ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
* stores a large physical block number into an extent struct, * stores a large physical block number into an extent struct,
* breaking it into parts * breaking it into parts
*/ */
static void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb) void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
{ {
ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff)); ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
@ -144,7 +144,7 @@ static int ext4_ext_dirty(handle_t *handle, struct inode *inode,
static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode, static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
struct ext4_ext_path *path, struct ext4_ext_path *path,
ext4_fsblk_t block) ext4_lblk_t block)
{ {
struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_inode_info *ei = EXT4_I(inode);
ext4_fsblk_t bg_start; ext4_fsblk_t bg_start;
@ -367,13 +367,14 @@ static void ext4_ext_drop_refs(struct ext4_ext_path *path)
* the header must be checked before calling this * the header must be checked before calling this
*/ */
static void static void
ext4_ext_binsearch_idx(struct inode *inode, struct ext4_ext_path *path, int block) ext4_ext_binsearch_idx(struct inode *inode,
struct ext4_ext_path *path, ext4_lblk_t block)
{ {
struct ext4_extent_header *eh = path->p_hdr; struct ext4_extent_header *eh = path->p_hdr;
struct ext4_extent_idx *r, *l, *m; struct ext4_extent_idx *r, *l, *m;
ext_debug("binsearch for %d(idx): ", block); ext_debug("binsearch for %u(idx): ", block);
l = EXT_FIRST_INDEX(eh) + 1; l = EXT_FIRST_INDEX(eh) + 1;
r = EXT_LAST_INDEX(eh); r = EXT_LAST_INDEX(eh);
@ -425,7 +426,8 @@ ext4_ext_binsearch_idx(struct inode *inode, struct ext4_ext_path *path, int bloc
* the header must be checked before calling this * the header must be checked before calling this
*/ */
static void static void
ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block) ext4_ext_binsearch(struct inode *inode,
struct ext4_ext_path *path, ext4_lblk_t block)
{ {
struct ext4_extent_header *eh = path->p_hdr; struct ext4_extent_header *eh = path->p_hdr;
struct ext4_extent *r, *l, *m; struct ext4_extent *r, *l, *m;
@ -438,7 +440,7 @@ ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block)
return; return;
} }
ext_debug("binsearch for %d: ", block); ext_debug("binsearch for %u: ", block);
l = EXT_FIRST_EXTENT(eh) + 1; l = EXT_FIRST_EXTENT(eh) + 1;
r = EXT_LAST_EXTENT(eh); r = EXT_LAST_EXTENT(eh);
@ -494,7 +496,8 @@ int ext4_ext_tree_init(handle_t *handle, struct inode *inode)
} }
struct ext4_ext_path * struct ext4_ext_path *
ext4_ext_find_extent(struct inode *inode, int block, struct ext4_ext_path *path) ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
struct ext4_ext_path *path)
{ {
struct ext4_extent_header *eh; struct ext4_extent_header *eh;
struct buffer_head *bh; struct buffer_head *bh;
@ -763,7 +766,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
while (k--) { while (k--) {
oldblock = newblock; oldblock = newblock;
newblock = ablocks[--a]; newblock = ablocks[--a];
bh = sb_getblk(inode->i_sb, (ext4_fsblk_t)newblock); bh = sb_getblk(inode->i_sb, newblock);
if (!bh) { if (!bh) {
err = -EIO; err = -EIO;
goto cleanup; goto cleanup;
@ -783,9 +786,8 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
fidx->ei_block = border; fidx->ei_block = border;
ext4_idx_store_pblock(fidx, oldblock); ext4_idx_store_pblock(fidx, oldblock);
ext_debug("int.index at %d (block %llu): %lu -> %llu\n", i, ext_debug("int.index at %d (block %llu): %u -> %llu\n",
newblock, (unsigned long) le32_to_cpu(border), i, newblock, le32_to_cpu(border), oldblock);
oldblock);
/* copy indexes */ /* copy indexes */
m = 0; m = 0;
path[i].p_idx++; path[i].p_idx++;
@ -851,7 +853,7 @@ cleanup:
for (i = 0; i < depth; i++) { for (i = 0; i < depth; i++) {
if (!ablocks[i]) if (!ablocks[i])
continue; continue;
ext4_free_blocks(handle, inode, ablocks[i], 1); ext4_free_blocks(handle, inode, ablocks[i], 1, 1);
} }
} }
kfree(ablocks); kfree(ablocks);
@ -979,8 +981,8 @@ repeat:
/* refill path */ /* refill path */
ext4_ext_drop_refs(path); ext4_ext_drop_refs(path);
path = ext4_ext_find_extent(inode, path = ext4_ext_find_extent(inode,
le32_to_cpu(newext->ee_block), (ext4_lblk_t)le32_to_cpu(newext->ee_block),
path); path);
if (IS_ERR(path)) if (IS_ERR(path))
err = PTR_ERR(path); err = PTR_ERR(path);
} else { } else {
@ -992,8 +994,8 @@ repeat:
/* refill path */ /* refill path */
ext4_ext_drop_refs(path); ext4_ext_drop_refs(path);
path = ext4_ext_find_extent(inode, path = ext4_ext_find_extent(inode,
le32_to_cpu(newext->ee_block), (ext4_lblk_t)le32_to_cpu(newext->ee_block),
path); path);
if (IS_ERR(path)) { if (IS_ERR(path)) {
err = PTR_ERR(path); err = PTR_ERR(path);
goto out; goto out;
@ -1014,6 +1016,150 @@ out:
return err; return err;
} }
/*
* search the closest allocated block to the left for *logical
* and returns it at @logical + it's physical address at @phys
* if *logical is the smallest allocated block, the function
* returns 0 at @phys
* return value contains 0 (success) or error code
*/
int
ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path,
ext4_lblk_t *logical, ext4_fsblk_t *phys)
{
struct ext4_extent_idx *ix;
struct ext4_extent *ex;
int depth, ee_len;
BUG_ON(path == NULL);
depth = path->p_depth;
*phys = 0;
if (depth == 0 && path->p_ext == NULL)
return 0;
/* usually extent in the path covers blocks smaller
* then *logical, but it can be that extent is the
* first one in the file */
ex = path[depth].p_ext;
ee_len = ext4_ext_get_actual_len(ex);
if (*logical < le32_to_cpu(ex->ee_block)) {
BUG_ON(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex);
while (--depth >= 0) {
ix = path[depth].p_idx;
BUG_ON(ix != EXT_FIRST_INDEX(path[depth].p_hdr));
}
return 0;
}
BUG_ON(*logical < (le32_to_cpu(ex->ee_block) + ee_len));
*logical = le32_to_cpu(ex->ee_block) + ee_len - 1;
*phys = ext_pblock(ex) + ee_len - 1;
return 0;
}
/*
* search the closest allocated block to the right for *logical
* and returns it at @logical + it's physical address at @phys
* if *logical is the smallest allocated block, the function
* returns 0 at @phys
* return value contains 0 (success) or error code
*/
int
ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
ext4_lblk_t *logical, ext4_fsblk_t *phys)
{
struct buffer_head *bh = NULL;
struct ext4_extent_header *eh;
struct ext4_extent_idx *ix;
struct ext4_extent *ex;
ext4_fsblk_t block;
int depth, ee_len;
BUG_ON(path == NULL);
depth = path->p_depth;
*phys = 0;
if (depth == 0 && path->p_ext == NULL)
return 0;
/* usually extent in the path covers blocks smaller
* then *logical, but it can be that extent is the
* first one in the file */
ex = path[depth].p_ext;
ee_len = ext4_ext_get_actual_len(ex);
if (*logical < le32_to_cpu(ex->ee_block)) {
BUG_ON(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex);
while (--depth >= 0) {
ix = path[depth].p_idx;
BUG_ON(ix != EXT_FIRST_INDEX(path[depth].p_hdr));
}
*logical = le32_to_cpu(ex->ee_block);
*phys = ext_pblock(ex);
return 0;
}
BUG_ON(*logical < (le32_to_cpu(ex->ee_block) + ee_len));
if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) {
/* next allocated block in this leaf */
ex++;
*logical = le32_to_cpu(ex->ee_block);
*phys = ext_pblock(ex);
return 0;
}
/* go up and search for index to the right */
while (--depth >= 0) {
ix = path[depth].p_idx;
if (ix != EXT_LAST_INDEX(path[depth].p_hdr))
break;
}
if (depth < 0) {
/* we've gone up to the root and
* found no index to the right */
return 0;
}
/* we've found index to the right, let's
* follow it and find the closest allocated
* block to the right */
ix++;
block = idx_pblock(ix);
while (++depth < path->p_depth) {
bh = sb_bread(inode->i_sb, block);
if (bh == NULL)
return -EIO;
eh = ext_block_hdr(bh);
if (ext4_ext_check_header(inode, eh, depth)) {
put_bh(bh);
return -EIO;
}
ix = EXT_FIRST_INDEX(eh);
block = idx_pblock(ix);
put_bh(bh);
}
bh = sb_bread(inode->i_sb, block);
if (bh == NULL)
return -EIO;
eh = ext_block_hdr(bh);
if (ext4_ext_check_header(inode, eh, path->p_depth - depth)) {
put_bh(bh);
return -EIO;
}
ex = EXT_FIRST_EXTENT(eh);
*logical = le32_to_cpu(ex->ee_block);
*phys = ext_pblock(ex);
put_bh(bh);
return 0;
}
/* /*
* ext4_ext_next_allocated_block: * ext4_ext_next_allocated_block:
* returns allocated block in subsequent extent or EXT_MAX_BLOCK. * returns allocated block in subsequent extent or EXT_MAX_BLOCK.
@ -1021,7 +1167,7 @@ out:
* allocated block. Thus, index entries have to be consistent * allocated block. Thus, index entries have to be consistent
* with leaves. * with leaves.
*/ */
static unsigned long static ext4_lblk_t
ext4_ext_next_allocated_block(struct ext4_ext_path *path) ext4_ext_next_allocated_block(struct ext4_ext_path *path)
{ {
int depth; int depth;
@ -1054,7 +1200,7 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path)
* ext4_ext_next_leaf_block: * ext4_ext_next_leaf_block:
* returns first allocated block from next leaf or EXT_MAX_BLOCK * returns first allocated block from next leaf or EXT_MAX_BLOCK
*/ */
static unsigned ext4_ext_next_leaf_block(struct inode *inode, static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode,
struct ext4_ext_path *path) struct ext4_ext_path *path)
{ {
int depth; int depth;
@ -1072,7 +1218,8 @@ static unsigned ext4_ext_next_leaf_block(struct inode *inode,
while (depth >= 0) { while (depth >= 0) {
if (path[depth].p_idx != if (path[depth].p_idx !=
EXT_LAST_INDEX(path[depth].p_hdr)) EXT_LAST_INDEX(path[depth].p_hdr))
return le32_to_cpu(path[depth].p_idx[1].ei_block); return (ext4_lblk_t)
le32_to_cpu(path[depth].p_idx[1].ei_block);
depth--; depth--;
} }
@ -1085,7 +1232,7 @@ static unsigned ext4_ext_next_leaf_block(struct inode *inode,
* then we have to correct all indexes above. * then we have to correct all indexes above.
* TODO: do we need to correct tree in all cases? * TODO: do we need to correct tree in all cases?
*/ */
int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode, static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
struct ext4_ext_path *path) struct ext4_ext_path *path)
{ {
struct ext4_extent_header *eh; struct ext4_extent_header *eh;
@ -1171,7 +1318,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
if (ext1_ee_len + ext2_ee_len > max_len) if (ext1_ee_len + ext2_ee_len > max_len)
return 0; return 0;
#ifdef AGGRESSIVE_TEST #ifdef AGGRESSIVE_TEST
if (le16_to_cpu(ex1->ee_len) >= 4) if (ext1_ee_len >= 4)
return 0; return 0;
#endif #endif
@ -1239,7 +1386,7 @@ unsigned int ext4_ext_check_overlap(struct inode *inode,
struct ext4_extent *newext, struct ext4_extent *newext,
struct ext4_ext_path *path) struct ext4_ext_path *path)
{ {
unsigned long b1, b2; ext4_lblk_t b1, b2;
unsigned int depth, len1; unsigned int depth, len1;
unsigned int ret = 0; unsigned int ret = 0;
@ -1260,7 +1407,7 @@ unsigned int ext4_ext_check_overlap(struct inode *inode,
goto out; goto out;
} }
/* check for wrap through zero */ /* check for wrap through zero on extent logical start block*/
if (b1 + len1 < b1) { if (b1 + len1 < b1) {
len1 = EXT_MAX_BLOCK - b1; len1 = EXT_MAX_BLOCK - b1;
newext->ee_len = cpu_to_le16(len1); newext->ee_len = cpu_to_le16(len1);
@ -1290,7 +1437,8 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
struct ext4_extent *ex, *fex; struct ext4_extent *ex, *fex;
struct ext4_extent *nearex; /* nearest extent */ struct ext4_extent *nearex; /* nearest extent */
struct ext4_ext_path *npath = NULL; struct ext4_ext_path *npath = NULL;
int depth, len, err, next; int depth, len, err;
ext4_lblk_t next;
unsigned uninitialized = 0; unsigned uninitialized = 0;
BUG_ON(ext4_ext_get_actual_len(newext) == 0); BUG_ON(ext4_ext_get_actual_len(newext) == 0);
@ -1435,114 +1583,8 @@ cleanup:
return err; return err;
} }
int ext4_ext_walk_space(struct inode *inode, unsigned long block,
unsigned long num, ext_prepare_callback func,
void *cbdata)
{
struct ext4_ext_path *path = NULL;
struct ext4_ext_cache cbex;
struct ext4_extent *ex;
unsigned long next, start = 0, end = 0;
unsigned long last = block + num;
int depth, exists, err = 0;
BUG_ON(func == NULL);
BUG_ON(inode == NULL);
while (block < last && block != EXT_MAX_BLOCK) {
num = last - block;
/* find extent for this block */
path = ext4_ext_find_extent(inode, block, path);
if (IS_ERR(path)) {
err = PTR_ERR(path);
path = NULL;
break;
}
depth = ext_depth(inode);
BUG_ON(path[depth].p_hdr == NULL);
ex = path[depth].p_ext;
next = ext4_ext_next_allocated_block(path);
exists = 0;
if (!ex) {
/* there is no extent yet, so try to allocate
* all requested space */
start = block;
end = block + num;
} else if (le32_to_cpu(ex->ee_block) > block) {
/* need to allocate space before found extent */
start = block;
end = le32_to_cpu(ex->ee_block);
if (block + num < end)
end = block + num;
} else if (block >= le32_to_cpu(ex->ee_block)
+ ext4_ext_get_actual_len(ex)) {
/* need to allocate space after found extent */
start = block;
end = block + num;
if (end >= next)
end = next;
} else if (block >= le32_to_cpu(ex->ee_block)) {
/*
* some part of requested space is covered
* by found extent
*/
start = block;
end = le32_to_cpu(ex->ee_block)
+ ext4_ext_get_actual_len(ex);
if (block + num < end)
end = block + num;
exists = 1;
} else {
BUG();
}
BUG_ON(end <= start);
if (!exists) {
cbex.ec_block = start;
cbex.ec_len = end - start;
cbex.ec_start = 0;
cbex.ec_type = EXT4_EXT_CACHE_GAP;
} else {
cbex.ec_block = le32_to_cpu(ex->ee_block);
cbex.ec_len = ext4_ext_get_actual_len(ex);
cbex.ec_start = ext_pblock(ex);
cbex.ec_type = EXT4_EXT_CACHE_EXTENT;
}
BUG_ON(cbex.ec_len == 0);
err = func(inode, path, &cbex, cbdata);
ext4_ext_drop_refs(path);
if (err < 0)
break;
if (err == EXT_REPEAT)
continue;
else if (err == EXT_BREAK) {
err = 0;
break;
}
if (ext_depth(inode) != depth) {
/* depth was changed. we have to realloc path */
kfree(path);
path = NULL;
}
block = cbex.ec_block + cbex.ec_len;
}
if (path) {
ext4_ext_drop_refs(path);
kfree(path);
}
return err;
}
static void static void
ext4_ext_put_in_cache(struct inode *inode, __u32 block, ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block,
__u32 len, ext4_fsblk_t start, int type) __u32 len, ext4_fsblk_t start, int type)
{ {
struct ext4_ext_cache *cex; struct ext4_ext_cache *cex;
@ -1561,10 +1603,11 @@ ext4_ext_put_in_cache(struct inode *inode, __u32 block,
*/ */
static void static void
ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path, ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
unsigned long block) ext4_lblk_t block)
{ {
int depth = ext_depth(inode); int depth = ext_depth(inode);
unsigned long lblock, len; unsigned long len;
ext4_lblk_t lblock;
struct ext4_extent *ex; struct ext4_extent *ex;
ex = path[depth].p_ext; ex = path[depth].p_ext;
@ -1576,32 +1619,34 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
} else if (block < le32_to_cpu(ex->ee_block)) { } else if (block < le32_to_cpu(ex->ee_block)) {
lblock = block; lblock = block;
len = le32_to_cpu(ex->ee_block) - block; len = le32_to_cpu(ex->ee_block) - block;
ext_debug("cache gap(before): %lu [%lu:%lu]", ext_debug("cache gap(before): %u [%u:%u]",
(unsigned long) block, block,
(unsigned long) le32_to_cpu(ex->ee_block), le32_to_cpu(ex->ee_block),
(unsigned long) ext4_ext_get_actual_len(ex)); ext4_ext_get_actual_len(ex));
} else if (block >= le32_to_cpu(ex->ee_block) } else if (block >= le32_to_cpu(ex->ee_block)
+ ext4_ext_get_actual_len(ex)) { + ext4_ext_get_actual_len(ex)) {
ext4_lblk_t next;
lblock = le32_to_cpu(ex->ee_block) lblock = le32_to_cpu(ex->ee_block)
+ ext4_ext_get_actual_len(ex); + ext4_ext_get_actual_len(ex);
len = ext4_ext_next_allocated_block(path);
ext_debug("cache gap(after): [%lu:%lu] %lu", next = ext4_ext_next_allocated_block(path);
(unsigned long) le32_to_cpu(ex->ee_block), ext_debug("cache gap(after): [%u:%u] %u",
(unsigned long) ext4_ext_get_actual_len(ex), le32_to_cpu(ex->ee_block),
(unsigned long) block); ext4_ext_get_actual_len(ex),
BUG_ON(len == lblock); block);
len = len - lblock; BUG_ON(next == lblock);
len = next - lblock;
} else { } else {
lblock = len = 0; lblock = len = 0;
BUG(); BUG();
} }
ext_debug(" -> %lu:%lu\n", (unsigned long) lblock, len); ext_debug(" -> %u:%lu\n", lblock, len);
ext4_ext_put_in_cache(inode, lblock, len, 0, EXT4_EXT_CACHE_GAP); ext4_ext_put_in_cache(inode, lblock, len, 0, EXT4_EXT_CACHE_GAP);
} }
static int static int
ext4_ext_in_cache(struct inode *inode, unsigned long block, ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
struct ext4_extent *ex) struct ext4_extent *ex)
{ {
struct ext4_ext_cache *cex; struct ext4_ext_cache *cex;
@ -1618,11 +1663,9 @@ ext4_ext_in_cache(struct inode *inode, unsigned long block,
ex->ee_block = cpu_to_le32(cex->ec_block); ex->ee_block = cpu_to_le32(cex->ec_block);
ext4_ext_store_pblock(ex, cex->ec_start); ext4_ext_store_pblock(ex, cex->ec_start);
ex->ee_len = cpu_to_le16(cex->ec_len); ex->ee_len = cpu_to_le16(cex->ec_len);
ext_debug("%lu cached by %lu:%lu:%llu\n", ext_debug("%u cached by %u:%u:%llu\n",
(unsigned long) block, block,
(unsigned long) cex->ec_block, cex->ec_block, cex->ec_len, cex->ec_start);
(unsigned long) cex->ec_len,
cex->ec_start);
return cex->ec_type; return cex->ec_type;
} }
@ -1636,7 +1679,7 @@ ext4_ext_in_cache(struct inode *inode, unsigned long block,
* It's used in truncate case only, thus all requests are for * It's used in truncate case only, thus all requests are for
* last index in the block only. * last index in the block only.
*/ */
int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
struct ext4_ext_path *path) struct ext4_ext_path *path)
{ {
struct buffer_head *bh; struct buffer_head *bh;
@ -1657,7 +1700,7 @@ int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
ext_debug("index is empty, remove it, free block %llu\n", leaf); ext_debug("index is empty, remove it, free block %llu\n", leaf);
bh = sb_find_get_block(inode->i_sb, leaf); bh = sb_find_get_block(inode->i_sb, leaf);
ext4_forget(handle, 1, inode, bh, leaf); ext4_forget(handle, 1, inode, bh, leaf);
ext4_free_blocks(handle, inode, leaf, 1); ext4_free_blocks(handle, inode, leaf, 1, 1);
return err; return err;
} }
@ -1666,7 +1709,7 @@ int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
* This routine returns max. credits that the extent tree can consume. * This routine returns max. credits that the extent tree can consume.
* It should be OK for low-performance paths like ->writepage() * It should be OK for low-performance paths like ->writepage()
* To allow many writing processes to fit into a single transaction, * To allow many writing processes to fit into a single transaction,
* the caller should calculate credits under truncate_mutex and * the caller should calculate credits under i_data_sem and
* pass the actual path. * pass the actual path.
*/ */
int ext4_ext_calc_credits_for_insert(struct inode *inode, int ext4_ext_calc_credits_for_insert(struct inode *inode,
@ -1714,12 +1757,14 @@ int ext4_ext_calc_credits_for_insert(struct inode *inode,
static int ext4_remove_blocks(handle_t *handle, struct inode *inode, static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
struct ext4_extent *ex, struct ext4_extent *ex,
unsigned long from, unsigned long to) ext4_lblk_t from, ext4_lblk_t to)
{ {
struct buffer_head *bh; struct buffer_head *bh;
unsigned short ee_len = ext4_ext_get_actual_len(ex); unsigned short ee_len = ext4_ext_get_actual_len(ex);
int i; int i, metadata = 0;
if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
metadata = 1;
#ifdef EXTENTS_STATS #ifdef EXTENTS_STATS
{ {
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@ -1738,42 +1783,45 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
if (from >= le32_to_cpu(ex->ee_block) if (from >= le32_to_cpu(ex->ee_block)
&& to == le32_to_cpu(ex->ee_block) + ee_len - 1) { && to == le32_to_cpu(ex->ee_block) + ee_len - 1) {
/* tail removal */ /* tail removal */
unsigned long num; ext4_lblk_t num;
ext4_fsblk_t start; ext4_fsblk_t start;
num = le32_to_cpu(ex->ee_block) + ee_len - from; num = le32_to_cpu(ex->ee_block) + ee_len - from;
start = ext_pblock(ex) + ee_len - num; start = ext_pblock(ex) + ee_len - num;
ext_debug("free last %lu blocks starting %llu\n", num, start); ext_debug("free last %u blocks starting %llu\n", num, start);
for (i = 0; i < num; i++) { for (i = 0; i < num; i++) {
bh = sb_find_get_block(inode->i_sb, start + i); bh = sb_find_get_block(inode->i_sb, start + i);
ext4_forget(handle, 0, inode, bh, start + i); ext4_forget(handle, 0, inode, bh, start + i);
} }
ext4_free_blocks(handle, inode, start, num); ext4_free_blocks(handle, inode, start, num, metadata);
} else if (from == le32_to_cpu(ex->ee_block) } else if (from == le32_to_cpu(ex->ee_block)
&& to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
printk("strange request: removal %lu-%lu from %u:%u\n", printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n",
from, to, le32_to_cpu(ex->ee_block), ee_len); from, to, le32_to_cpu(ex->ee_block), ee_len);
} else { } else {
printk("strange request: removal(2) %lu-%lu from %u:%u\n", printk(KERN_INFO "strange request: removal(2) "
from, to, le32_to_cpu(ex->ee_block), ee_len); "%u-%u from %u:%u\n",
from, to, le32_to_cpu(ex->ee_block), ee_len);
} }
return 0; return 0;
} }
static int static int
ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
struct ext4_ext_path *path, unsigned long start) struct ext4_ext_path *path, ext4_lblk_t start)
{ {
int err = 0, correct_index = 0; int err = 0, correct_index = 0;
int depth = ext_depth(inode), credits; int depth = ext_depth(inode), credits;
struct ext4_extent_header *eh; struct ext4_extent_header *eh;
unsigned a, b, block, num; ext4_lblk_t a, b, block;
unsigned long ex_ee_block; unsigned num;
ext4_lblk_t ex_ee_block;
unsigned short ex_ee_len; unsigned short ex_ee_len;
unsigned uninitialized = 0; unsigned uninitialized = 0;
struct ext4_extent *ex; struct ext4_extent *ex;
/* the header must be checked already in ext4_ext_remove_space() */ /* the header must be checked already in ext4_ext_remove_space() */
ext_debug("truncate since %lu in leaf\n", start); ext_debug("truncate since %u in leaf\n", start);
if (!path[depth].p_hdr) if (!path[depth].p_hdr)
path[depth].p_hdr = ext_block_hdr(path[depth].p_bh); path[depth].p_hdr = ext_block_hdr(path[depth].p_bh);
eh = path[depth].p_hdr; eh = path[depth].p_hdr;
@ -1904,7 +1952,7 @@ ext4_ext_more_to_rm(struct ext4_ext_path *path)
return 1; return 1;
} }
int ext4_ext_remove_space(struct inode *inode, unsigned long start) static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
{ {
struct super_block *sb = inode->i_sb; struct super_block *sb = inode->i_sb;
int depth = ext_depth(inode); int depth = ext_depth(inode);
@ -1912,7 +1960,7 @@ int ext4_ext_remove_space(struct inode *inode, unsigned long start)
handle_t *handle; handle_t *handle;
int i = 0, err = 0; int i = 0, err = 0;
ext_debug("truncate since %lu\n", start); ext_debug("truncate since %u\n", start);
/* probably first extent we're gonna free will be last in block */ /* probably first extent we're gonna free will be last in block */
handle = ext4_journal_start(inode, depth + 1); handle = ext4_journal_start(inode, depth + 1);
@ -2094,17 +2142,19 @@ void ext4_ext_release(struct super_block *sb)
* b> Splits in two extents: Write is happening at either end of the extent * b> Splits in two extents: Write is happening at either end of the extent
* c> Splits in three extents: Somone is writing in middle of the extent * c> Splits in three extents: Somone is writing in middle of the extent
*/ */
int ext4_ext_convert_to_initialized(handle_t *handle, struct inode *inode, static int ext4_ext_convert_to_initialized(handle_t *handle,
struct ext4_ext_path *path, struct inode *inode,
ext4_fsblk_t iblock, struct ext4_ext_path *path,
unsigned long max_blocks) ext4_lblk_t iblock,
unsigned long max_blocks)
{ {
struct ext4_extent *ex, newex; struct ext4_extent *ex, newex;
struct ext4_extent *ex1 = NULL; struct ext4_extent *ex1 = NULL;
struct ext4_extent *ex2 = NULL; struct ext4_extent *ex2 = NULL;
struct ext4_extent *ex3 = NULL; struct ext4_extent *ex3 = NULL;
struct ext4_extent_header *eh; struct ext4_extent_header *eh;
unsigned int allocated, ee_block, ee_len, depth; ext4_lblk_t ee_block;
unsigned int allocated, ee_len, depth;
ext4_fsblk_t newblock; ext4_fsblk_t newblock;
int err = 0; int err = 0;
int ret = 0; int ret = 0;
@ -2225,8 +2275,13 @@ out:
return err ? err : allocated; return err ? err : allocated;
} }
/*
* Need to be called with
* down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block
* (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem)
*/
int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
ext4_fsblk_t iblock, ext4_lblk_t iblock,
unsigned long max_blocks, struct buffer_head *bh_result, unsigned long max_blocks, struct buffer_head *bh_result,
int create, int extend_disksize) int create, int extend_disksize)
{ {
@ -2236,11 +2291,11 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
ext4_fsblk_t goal, newblock; ext4_fsblk_t goal, newblock;
int err = 0, depth, ret; int err = 0, depth, ret;
unsigned long allocated = 0; unsigned long allocated = 0;
struct ext4_allocation_request ar;
__clear_bit(BH_New, &bh_result->b_state); __clear_bit(BH_New, &bh_result->b_state);
ext_debug("blocks %d/%lu requested for inode %u\n", (int) iblock, ext_debug("blocks %u/%lu requested for inode %u\n",
max_blocks, (unsigned) inode->i_ino); iblock, max_blocks, inode->i_ino);
mutex_lock(&EXT4_I(inode)->truncate_mutex);
/* check in cache */ /* check in cache */
goal = ext4_ext_in_cache(inode, iblock, &newex); goal = ext4_ext_in_cache(inode, iblock, &newex);
@ -2260,7 +2315,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
- le32_to_cpu(newex.ee_block) - le32_to_cpu(newex.ee_block)
+ ext_pblock(&newex); + ext_pblock(&newex);
/* number of remaining blocks in the extent */ /* number of remaining blocks in the extent */
allocated = le16_to_cpu(newex.ee_len) - allocated = ext4_ext_get_actual_len(&newex) -
(iblock - le32_to_cpu(newex.ee_block)); (iblock - le32_to_cpu(newex.ee_block));
goto out; goto out;
} else { } else {
@ -2288,7 +2343,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
ex = path[depth].p_ext; ex = path[depth].p_ext;
if (ex) { if (ex) {
unsigned long ee_block = le32_to_cpu(ex->ee_block); ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
ext4_fsblk_t ee_start = ext_pblock(ex); ext4_fsblk_t ee_start = ext_pblock(ex);
unsigned short ee_len; unsigned short ee_len;
@ -2302,7 +2357,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
newblock = iblock - ee_block + ee_start; newblock = iblock - ee_block + ee_start;
/* number of remaining blocks in the extent */ /* number of remaining blocks in the extent */
allocated = ee_len - (iblock - ee_block); allocated = ee_len - (iblock - ee_block);
ext_debug("%d fit into %lu:%d -> %llu\n", (int) iblock, ext_debug("%u fit into %lu:%d -> %llu\n", iblock,
ee_block, ee_len, newblock); ee_block, ee_len, newblock);
/* Do not put uninitialized extent in the cache */ /* Do not put uninitialized extent in the cache */
@ -2320,9 +2375,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
ret = ext4_ext_convert_to_initialized(handle, inode, ret = ext4_ext_convert_to_initialized(handle, inode,
path, iblock, path, iblock,
max_blocks); max_blocks);
if (ret <= 0) if (ret <= 0) {
err = ret;
goto out2; goto out2;
else } else
allocated = ret; allocated = ret;
goto outnew; goto outnew;
} }
@ -2347,8 +2403,15 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
if (S_ISREG(inode->i_mode) && (!EXT4_I(inode)->i_block_alloc_info)) if (S_ISREG(inode->i_mode) && (!EXT4_I(inode)->i_block_alloc_info))
ext4_init_block_alloc_info(inode); ext4_init_block_alloc_info(inode);
/* allocate new block */ /* find neighbour allocated blocks */
goal = ext4_ext_find_goal(inode, path, iblock); ar.lleft = iblock;
err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft);
if (err)
goto out2;
ar.lright = iblock;
err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright);
if (err)
goto out2;
/* /*
* See if request is beyond maximum number of blocks we can have in * See if request is beyond maximum number of blocks we can have in
@ -2368,10 +2431,21 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
newex.ee_len = cpu_to_le16(max_blocks); newex.ee_len = cpu_to_le16(max_blocks);
err = ext4_ext_check_overlap(inode, &newex, path); err = ext4_ext_check_overlap(inode, &newex, path);
if (err) if (err)
allocated = le16_to_cpu(newex.ee_len); allocated = ext4_ext_get_actual_len(&newex);
else else
allocated = max_blocks; allocated = max_blocks;
newblock = ext4_new_blocks(handle, inode, goal, &allocated, &err);
/* allocate new block */
ar.inode = inode;
ar.goal = ext4_ext_find_goal(inode, path, iblock);
ar.logical = iblock;
ar.len = allocated;
if (S_ISREG(inode->i_mode))
ar.flags = EXT4_MB_HINT_DATA;
else
/* disable in-core preallocation for non-regular files */
ar.flags = 0;
newblock = ext4_mb_new_blocks(handle, &ar, &err);
if (!newblock) if (!newblock)
goto out2; goto out2;
ext_debug("allocate new block: goal %llu, found %llu/%lu\n", ext_debug("allocate new block: goal %llu, found %llu/%lu\n",
@ -2379,14 +2453,17 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
/* try to insert new extent into found leaf and return */ /* try to insert new extent into found leaf and return */
ext4_ext_store_pblock(&newex, newblock); ext4_ext_store_pblock(&newex, newblock);
newex.ee_len = cpu_to_le16(allocated); newex.ee_len = cpu_to_le16(ar.len);
if (create == EXT4_CREATE_UNINITIALIZED_EXT) /* Mark uninitialized */ if (create == EXT4_CREATE_UNINITIALIZED_EXT) /* Mark uninitialized */
ext4_ext_mark_uninitialized(&newex); ext4_ext_mark_uninitialized(&newex);
err = ext4_ext_insert_extent(handle, inode, path, &newex); err = ext4_ext_insert_extent(handle, inode, path, &newex);
if (err) { if (err) {
/* free data blocks we just allocated */ /* free data blocks we just allocated */
/* not a good idea to call discard here directly,
* but otherwise we'd need to call it every free() */
ext4_mb_discard_inode_preallocations(inode);
ext4_free_blocks(handle, inode, ext_pblock(&newex), ext4_free_blocks(handle, inode, ext_pblock(&newex),
le16_to_cpu(newex.ee_len)); ext4_ext_get_actual_len(&newex), 0);
goto out2; goto out2;
} }
@ -2395,6 +2472,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
/* previous routine could use block we allocated */ /* previous routine could use block we allocated */
newblock = ext_pblock(&newex); newblock = ext_pblock(&newex);
allocated = ext4_ext_get_actual_len(&newex);
outnew: outnew:
__set_bit(BH_New, &bh_result->b_state); __set_bit(BH_New, &bh_result->b_state);
@ -2414,8 +2492,6 @@ out2:
ext4_ext_drop_refs(path); ext4_ext_drop_refs(path);
kfree(path); kfree(path);
} }
mutex_unlock(&EXT4_I(inode)->truncate_mutex);
return err ? err : allocated; return err ? err : allocated;
} }
@ -2423,7 +2499,7 @@ void ext4_ext_truncate(struct inode * inode, struct page *page)
{ {
struct address_space *mapping = inode->i_mapping; struct address_space *mapping = inode->i_mapping;
struct super_block *sb = inode->i_sb; struct super_block *sb = inode->i_sb;
unsigned long last_block; ext4_lblk_t last_block;
handle_t *handle; handle_t *handle;
int err = 0; int err = 0;
@ -2445,9 +2521,11 @@ void ext4_ext_truncate(struct inode * inode, struct page *page)
if (page) if (page)
ext4_block_truncate_page(handle, page, mapping, inode->i_size); ext4_block_truncate_page(handle, page, mapping, inode->i_size);
mutex_lock(&EXT4_I(inode)->truncate_mutex); down_write(&EXT4_I(inode)->i_data_sem);
ext4_ext_invalidate_cache(inode); ext4_ext_invalidate_cache(inode);
ext4_mb_discard_inode_preallocations(inode);
/* /*
* TODO: optimization is possible here. * TODO: optimization is possible here.
* Probably we need not scan at all, * Probably we need not scan at all,
@ -2481,7 +2559,7 @@ out_stop:
if (inode->i_nlink) if (inode->i_nlink)
ext4_orphan_del(handle, inode); ext4_orphan_del(handle, inode);
mutex_unlock(&EXT4_I(inode)->truncate_mutex); up_write(&EXT4_I(inode)->i_data_sem);
ext4_journal_stop(handle); ext4_journal_stop(handle);
} }
@ -2516,7 +2594,8 @@ int ext4_ext_writepage_trans_blocks(struct inode *inode, int num)
long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
{ {
handle_t *handle; handle_t *handle;
ext4_fsblk_t block, max_blocks; ext4_lblk_t block;
unsigned long max_blocks;
ext4_fsblk_t nblocks = 0; ext4_fsblk_t nblocks = 0;
int ret = 0; int ret = 0;
int ret2 = 0; int ret2 = 0;
@ -2544,6 +2623,7 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
* modify 1 super block, 1 block bitmap and 1 group descriptor. * modify 1 super block, 1 block bitmap and 1 group descriptor.
*/ */
credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 3; credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 3;
down_write((&EXT4_I(inode)->i_data_sem));
retry: retry:
while (ret >= 0 && ret < max_blocks) { while (ret >= 0 && ret < max_blocks) {
block = block + ret; block = block + ret;
@ -2557,12 +2637,12 @@ retry:
ret = ext4_ext_get_blocks(handle, inode, block, ret = ext4_ext_get_blocks(handle, inode, block,
max_blocks, &map_bh, max_blocks, &map_bh,
EXT4_CREATE_UNINITIALIZED_EXT, 0); EXT4_CREATE_UNINITIALIZED_EXT, 0);
WARN_ON(!ret); WARN_ON(ret <= 0);
if (!ret) { if (ret <= 0) {
ext4_error(inode->i_sb, "ext4_fallocate", ext4_error(inode->i_sb, "ext4_fallocate",
"ext4_ext_get_blocks returned 0! inode#%lu" "ext4_ext_get_blocks returned error: "
", block=%llu, max_blocks=%llu", "inode#%lu, block=%u, max_blocks=%lu",
inode->i_ino, block, max_blocks); inode->i_ino, block, max_blocks);
ret = -EIO; ret = -EIO;
ext4_mark_inode_dirty(handle, inode); ext4_mark_inode_dirty(handle, inode);
ret2 = ext4_journal_stop(handle); ret2 = ext4_journal_stop(handle);
@ -2600,6 +2680,7 @@ retry:
if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
goto retry; goto retry;
up_write((&EXT4_I(inode)->i_data_sem));
/* /*
* Time to update the file size. * Time to update the file size.
* Update only when preallocation was requested beyond the file size. * Update only when preallocation was requested beyond the file size.

Просмотреть файл

@ -37,9 +37,9 @@ static int ext4_release_file (struct inode * inode, struct file * filp)
if ((filp->f_mode & FMODE_WRITE) && if ((filp->f_mode & FMODE_WRITE) &&
(atomic_read(&inode->i_writecount) == 1)) (atomic_read(&inode->i_writecount) == 1))
{ {
mutex_lock(&EXT4_I(inode)->truncate_mutex); down_write(&EXT4_I(inode)->i_data_sem);
ext4_discard_reservation(inode); ext4_discard_reservation(inode);
mutex_unlock(&EXT4_I(inode)->truncate_mutex); up_write(&EXT4_I(inode)->i_data_sem);
} }
if (is_dx(inode) && filp->private_data) if (is_dx(inode) && filp->private_data)
ext4_htree_free_dir_info(filp->private_data); ext4_htree_free_dir_info(filp->private_data);
@ -56,8 +56,25 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
ssize_t ret; ssize_t ret;
int err; int err;
ret = generic_file_aio_write(iocb, iov, nr_segs, pos); /*
* If we have encountered a bitmap-format file, the size limit
* is smaller than s_maxbytes, which is for extent-mapped files.
*/
if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) {
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
size_t length = iov_length(iov, nr_segs);
if (pos > sbi->s_bitmap_maxbytes)
return -EFBIG;
if (pos + length > sbi->s_bitmap_maxbytes) {
nr_segs = iov_shorten((struct iovec *)iov, nr_segs,
sbi->s_bitmap_maxbytes - pos);
}
}
ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
/* /*
* Skip flushing if there was an error, or if nothing was written. * Skip flushing if there was an error, or if nothing was written.
*/ */

Просмотреть файл

@ -14,14 +14,16 @@ extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group,
extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group, extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group,
struct ext4_group_desc *gdp); struct ext4_group_desc *gdp);
struct buffer_head *read_block_bitmap(struct super_block *sb, struct buffer_head *read_block_bitmap(struct super_block *sb,
unsigned int block_group); ext4_group_t block_group);
extern unsigned ext4_init_block_bitmap(struct super_block *sb, extern unsigned ext4_init_block_bitmap(struct super_block *sb,
struct buffer_head *bh, int group, struct buffer_head *bh,
ext4_group_t group,
struct ext4_group_desc *desc); struct ext4_group_desc *desc);
#define ext4_free_blocks_after_init(sb, group, desc) \ #define ext4_free_blocks_after_init(sb, group, desc) \
ext4_init_block_bitmap(sb, NULL, group, desc) ext4_init_block_bitmap(sb, NULL, group, desc)
extern unsigned ext4_init_inode_bitmap(struct super_block *sb, extern unsigned ext4_init_inode_bitmap(struct super_block *sb,
struct buffer_head *bh, int group, struct buffer_head *bh,
ext4_group_t group,
struct ext4_group_desc *desc); struct ext4_group_desc *desc);
extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap); extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
#endif /* _LINUX_EXT4_GROUP_H */ #endif /* _LINUX_EXT4_GROUP_H */

Просмотреть файл

@ -64,8 +64,8 @@ void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
} }
/* Initializes an uninitialized inode bitmap */ /* Initializes an uninitialized inode bitmap */
unsigned ext4_init_inode_bitmap(struct super_block *sb, unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
struct buffer_head *bh, int block_group, ext4_group_t block_group,
struct ext4_group_desc *gdp) struct ext4_group_desc *gdp)
{ {
struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_sb_info *sbi = EXT4_SB(sb);
@ -75,7 +75,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb,
/* If checksum is bad mark all blocks and inodes use to prevent /* If checksum is bad mark all blocks and inodes use to prevent
* allocation, essentially implementing a per-group read-only flag. */ * allocation, essentially implementing a per-group read-only flag. */
if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
ext4_error(sb, __FUNCTION__, "Checksum bad for group %u\n", ext4_error(sb, __FUNCTION__, "Checksum bad for group %lu\n",
block_group); block_group);
gdp->bg_free_blocks_count = 0; gdp->bg_free_blocks_count = 0;
gdp->bg_free_inodes_count = 0; gdp->bg_free_inodes_count = 0;
@ -98,7 +98,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb,
* Return buffer_head of bitmap on success or NULL. * Return buffer_head of bitmap on success or NULL.
*/ */
static struct buffer_head * static struct buffer_head *
read_inode_bitmap(struct super_block * sb, unsigned long block_group) read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
{ {
struct ext4_group_desc *desc; struct ext4_group_desc *desc;
struct buffer_head *bh = NULL; struct buffer_head *bh = NULL;
@ -152,7 +152,7 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
unsigned long ino; unsigned long ino;
struct buffer_head *bitmap_bh = NULL; struct buffer_head *bitmap_bh = NULL;
struct buffer_head *bh2; struct buffer_head *bh2;
unsigned long block_group; ext4_group_t block_group;
unsigned long bit; unsigned long bit;
struct ext4_group_desc * gdp; struct ext4_group_desc * gdp;
struct ext4_super_block * es; struct ext4_super_block * es;
@ -260,12 +260,14 @@ error_return:
* For other inodes, search forward from the parent directory\'s block * For other inodes, search forward from the parent directory\'s block
* group to find a free inode. * group to find a free inode.
*/ */
static int find_group_dir(struct super_block *sb, struct inode *parent) static int find_group_dir(struct super_block *sb, struct inode *parent,
ext4_group_t *best_group)
{ {
int ngroups = EXT4_SB(sb)->s_groups_count; ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
unsigned int freei, avefreei; unsigned int freei, avefreei;
struct ext4_group_desc *desc, *best_desc = NULL; struct ext4_group_desc *desc, *best_desc = NULL;
int group, best_group = -1; ext4_group_t group;
int ret = -1;
freei = percpu_counter_read_positive(&EXT4_SB(sb)->s_freeinodes_counter); freei = percpu_counter_read_positive(&EXT4_SB(sb)->s_freeinodes_counter);
avefreei = freei / ngroups; avefreei = freei / ngroups;
@ -279,11 +281,12 @@ static int find_group_dir(struct super_block *sb, struct inode *parent)
if (!best_desc || if (!best_desc ||
(le16_to_cpu(desc->bg_free_blocks_count) > (le16_to_cpu(desc->bg_free_blocks_count) >
le16_to_cpu(best_desc->bg_free_blocks_count))) { le16_to_cpu(best_desc->bg_free_blocks_count))) {
best_group = group; *best_group = group;
best_desc = desc; best_desc = desc;
ret = 0;
} }
} }
return best_group; return ret;
} }
/* /*
@ -314,12 +317,13 @@ static int find_group_dir(struct super_block *sb, struct inode *parent)
#define INODE_COST 64 #define INODE_COST 64
#define BLOCK_COST 256 #define BLOCK_COST 256
static int find_group_orlov(struct super_block *sb, struct inode *parent) static int find_group_orlov(struct super_block *sb, struct inode *parent,
ext4_group_t *group)
{ {
int parent_group = EXT4_I(parent)->i_block_group; ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es; struct ext4_super_block *es = sbi->s_es;
int ngroups = sbi->s_groups_count; ext4_group_t ngroups = sbi->s_groups_count;
int inodes_per_group = EXT4_INODES_PER_GROUP(sb); int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
unsigned int freei, avefreei; unsigned int freei, avefreei;
ext4_fsblk_t freeb, avefreeb; ext4_fsblk_t freeb, avefreeb;
@ -327,7 +331,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
unsigned int ndirs; unsigned int ndirs;
int max_debt, max_dirs, min_inodes; int max_debt, max_dirs, min_inodes;
ext4_grpblk_t min_blocks; ext4_grpblk_t min_blocks;
int group = -1, i; ext4_group_t i;
struct ext4_group_desc *desc; struct ext4_group_desc *desc;
freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter); freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
@ -340,13 +344,14 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
if ((parent == sb->s_root->d_inode) || if ((parent == sb->s_root->d_inode) ||
(EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL)) { (EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL)) {
int best_ndir = inodes_per_group; int best_ndir = inodes_per_group;
int best_group = -1; ext4_group_t grp;
int ret = -1;
get_random_bytes(&group, sizeof(group)); get_random_bytes(&grp, sizeof(grp));
parent_group = (unsigned)group % ngroups; parent_group = (unsigned)grp % ngroups;
for (i = 0; i < ngroups; i++) { for (i = 0; i < ngroups; i++) {
group = (parent_group + i) % ngroups; grp = (parent_group + i) % ngroups;
desc = ext4_get_group_desc (sb, group, NULL); desc = ext4_get_group_desc(sb, grp, NULL);
if (!desc || !desc->bg_free_inodes_count) if (!desc || !desc->bg_free_inodes_count)
continue; continue;
if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir) if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir)
@ -355,11 +360,12 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
continue; continue;
if (le16_to_cpu(desc->bg_free_blocks_count) < avefreeb) if (le16_to_cpu(desc->bg_free_blocks_count) < avefreeb)
continue; continue;
best_group = group; *group = grp;
ret = 0;
best_ndir = le16_to_cpu(desc->bg_used_dirs_count); best_ndir = le16_to_cpu(desc->bg_used_dirs_count);
} }
if (best_group >= 0) if (ret == 0)
return best_group; return ret;
goto fallback; goto fallback;
} }
@ -380,8 +386,8 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
max_debt = 1; max_debt = 1;
for (i = 0; i < ngroups; i++) { for (i = 0; i < ngroups; i++) {
group = (parent_group + i) % ngroups; *group = (parent_group + i) % ngroups;
desc = ext4_get_group_desc (sb, group, NULL); desc = ext4_get_group_desc(sb, *group, NULL);
if (!desc || !desc->bg_free_inodes_count) if (!desc || !desc->bg_free_inodes_count)
continue; continue;
if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs) if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs)
@ -390,17 +396,16 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
continue; continue;
if (le16_to_cpu(desc->bg_free_blocks_count) < min_blocks) if (le16_to_cpu(desc->bg_free_blocks_count) < min_blocks)
continue; continue;
return group; return 0;
} }
fallback: fallback:
for (i = 0; i < ngroups; i++) { for (i = 0; i < ngroups; i++) {
group = (parent_group + i) % ngroups; *group = (parent_group + i) % ngroups;
desc = ext4_get_group_desc (sb, group, NULL); desc = ext4_get_group_desc(sb, *group, NULL);
if (!desc || !desc->bg_free_inodes_count) if (desc && desc->bg_free_inodes_count &&
continue; le16_to_cpu(desc->bg_free_inodes_count) >= avefreei)
if (le16_to_cpu(desc->bg_free_inodes_count) >= avefreei) return 0;
return group;
} }
if (avefreei) { if (avefreei) {
@ -415,21 +420,22 @@ fallback:
return -1; return -1;
} }
static int find_group_other(struct super_block *sb, struct inode *parent) static int find_group_other(struct super_block *sb, struct inode *parent,
ext4_group_t *group)
{ {
int parent_group = EXT4_I(parent)->i_block_group; ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
int ngroups = EXT4_SB(sb)->s_groups_count; ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
struct ext4_group_desc *desc; struct ext4_group_desc *desc;
int group, i; ext4_group_t i;
/* /*
* Try to place the inode in its parent directory * Try to place the inode in its parent directory
*/ */
group = parent_group; *group = parent_group;
desc = ext4_get_group_desc (sb, group, NULL); desc = ext4_get_group_desc(sb, *group, NULL);
if (desc && le16_to_cpu(desc->bg_free_inodes_count) && if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
le16_to_cpu(desc->bg_free_blocks_count)) le16_to_cpu(desc->bg_free_blocks_count))
return group; return 0;
/* /*
* We're going to place this inode in a different blockgroup from its * We're going to place this inode in a different blockgroup from its
@ -440,33 +446,33 @@ static int find_group_other(struct super_block *sb, struct inode *parent)
* *
* So add our directory's i_ino into the starting point for the hash. * So add our directory's i_ino into the starting point for the hash.
*/ */
group = (group + parent->i_ino) % ngroups; *group = (*group + parent->i_ino) % ngroups;
/* /*
* Use a quadratic hash to find a group with a free inode and some free * Use a quadratic hash to find a group with a free inode and some free
* blocks. * blocks.
*/ */
for (i = 1; i < ngroups; i <<= 1) { for (i = 1; i < ngroups; i <<= 1) {
group += i; *group += i;
if (group >= ngroups) if (*group >= ngroups)
group -= ngroups; *group -= ngroups;
desc = ext4_get_group_desc (sb, group, NULL); desc = ext4_get_group_desc(sb, *group, NULL);
if (desc && le16_to_cpu(desc->bg_free_inodes_count) && if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
le16_to_cpu(desc->bg_free_blocks_count)) le16_to_cpu(desc->bg_free_blocks_count))
return group; return 0;
} }
/* /*
* That failed: try linear search for a free inode, even if that group * That failed: try linear search for a free inode, even if that group
* has no free blocks. * has no free blocks.
*/ */
group = parent_group; *group = parent_group;
for (i = 0; i < ngroups; i++) { for (i = 0; i < ngroups; i++) {
if (++group >= ngroups) if (++*group >= ngroups)
group = 0; *group = 0;
desc = ext4_get_group_desc (sb, group, NULL); desc = ext4_get_group_desc(sb, *group, NULL);
if (desc && le16_to_cpu(desc->bg_free_inodes_count)) if (desc && le16_to_cpu(desc->bg_free_inodes_count))
return group; return 0;
} }
return -1; return -1;
@ -487,16 +493,17 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
struct super_block *sb; struct super_block *sb;
struct buffer_head *bitmap_bh = NULL; struct buffer_head *bitmap_bh = NULL;
struct buffer_head *bh2; struct buffer_head *bh2;
int group; ext4_group_t group = 0;
unsigned long ino = 0; unsigned long ino = 0;
struct inode * inode; struct inode * inode;
struct ext4_group_desc * gdp = NULL; struct ext4_group_desc * gdp = NULL;
struct ext4_super_block * es; struct ext4_super_block * es;
struct ext4_inode_info *ei; struct ext4_inode_info *ei;
struct ext4_sb_info *sbi; struct ext4_sb_info *sbi;
int err = 0; int ret2, err = 0;
struct inode *ret; struct inode *ret;
int i, free = 0; ext4_group_t i;
int free = 0;
/* Cannot create files in a deleted directory */ /* Cannot create files in a deleted directory */
if (!dir || !dir->i_nlink) if (!dir || !dir->i_nlink)
@ -512,14 +519,14 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
es = sbi->s_es; es = sbi->s_es;
if (S_ISDIR(mode)) { if (S_ISDIR(mode)) {
if (test_opt (sb, OLDALLOC)) if (test_opt (sb, OLDALLOC))
group = find_group_dir(sb, dir); ret2 = find_group_dir(sb, dir, &group);
else else
group = find_group_orlov(sb, dir); ret2 = find_group_orlov(sb, dir, &group);
} else } else
group = find_group_other(sb, dir); ret2 = find_group_other(sb, dir, &group);
err = -ENOSPC; err = -ENOSPC;
if (group == -1) if (ret2 == -1)
goto out; goto out;
for (i = 0; i < sbi->s_groups_count; i++) { for (i = 0; i < sbi->s_groups_count; i++) {
@ -583,7 +590,7 @@ got:
ino > EXT4_INODES_PER_GROUP(sb)) { ino > EXT4_INODES_PER_GROUP(sb)) {
ext4_error(sb, __FUNCTION__, ext4_error(sb, __FUNCTION__,
"reserved inode or inode > inodes count - " "reserved inode or inode > inodes count - "
"block_group = %d, inode=%lu", group, "block_group = %lu, inode=%lu", group,
ino + group * EXT4_INODES_PER_GROUP(sb)); ino + group * EXT4_INODES_PER_GROUP(sb));
err = -EIO; err = -EIO;
goto fail; goto fail;
@ -702,7 +709,6 @@ got:
if (!S_ISDIR(mode)) if (!S_ISDIR(mode))
ei->i_flags &= ~EXT4_DIRSYNC_FL; ei->i_flags &= ~EXT4_DIRSYNC_FL;
ei->i_file_acl = 0; ei->i_file_acl = 0;
ei->i_dir_acl = 0;
ei->i_dtime = 0; ei->i_dtime = 0;
ei->i_block_alloc_info = NULL; ei->i_block_alloc_info = NULL;
ei->i_block_group = group; ei->i_block_group = group;
@ -741,13 +747,10 @@ got:
if (test_opt(sb, EXTENTS)) { if (test_opt(sb, EXTENTS)) {
EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL; EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL;
ext4_ext_tree_init(handle, inode); ext4_ext_tree_init(handle, inode);
if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { err = ext4_update_incompat_feature(handle, sb,
err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); EXT4_FEATURE_INCOMPAT_EXTENTS);
if (err) goto fail; if (err)
EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS); goto fail;
BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "call ext4_journal_dirty_metadata");
err = ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
}
} }
ext4_debug("allocating inode %lu\n", inode->i_ino); ext4_debug("allocating inode %lu\n", inode->i_ino);
@ -777,7 +780,7 @@ fail_drop:
struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
{ {
unsigned long max_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count); unsigned long max_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count);
unsigned long block_group; ext4_group_t block_group;
int bit; int bit;
struct buffer_head *bitmap_bh = NULL; struct buffer_head *bitmap_bh = NULL;
struct inode *inode = NULL; struct inode *inode = NULL;
@ -833,7 +836,7 @@ unsigned long ext4_count_free_inodes (struct super_block * sb)
{ {
unsigned long desc_count; unsigned long desc_count;
struct ext4_group_desc *gdp; struct ext4_group_desc *gdp;
int i; ext4_group_t i;
#ifdef EXT4FS_DEBUG #ifdef EXT4FS_DEBUG
struct ext4_super_block *es; struct ext4_super_block *es;
unsigned long bitmap_count, x; unsigned long bitmap_count, x;
@ -854,7 +857,7 @@ unsigned long ext4_count_free_inodes (struct super_block * sb)
continue; continue;
x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8); x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8);
printk("group %d: stored = %d, counted = %lu\n", printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n",
i, le16_to_cpu(gdp->bg_free_inodes_count), x); i, le16_to_cpu(gdp->bg_free_inodes_count), x);
bitmap_count += x; bitmap_count += x;
} }
@ -879,7 +882,7 @@ unsigned long ext4_count_free_inodes (struct super_block * sb)
unsigned long ext4_count_dirs (struct super_block * sb) unsigned long ext4_count_dirs (struct super_block * sb)
{ {
unsigned long count = 0; unsigned long count = 0;
int i; ext4_group_t i;
for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
struct ext4_group_desc *gdp = ext4_get_group_desc (sb, i, NULL); struct ext4_group_desc *gdp = ext4_get_group_desc (sb, i, NULL);

Просмотреть файл

@ -105,7 +105,7 @@ int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
*/ */
static unsigned long blocks_for_truncate(struct inode *inode) static unsigned long blocks_for_truncate(struct inode *inode)
{ {
unsigned long needed; ext4_lblk_t needed;
needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9); needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9);
@ -243,13 +243,6 @@ static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v)
p->bh = bh; p->bh = bh;
} }
static int verify_chain(Indirect *from, Indirect *to)
{
while (from <= to && from->key == *from->p)
from++;
return (from > to);
}
/** /**
* ext4_block_to_path - parse the block number into array of offsets * ext4_block_to_path - parse the block number into array of offsets
* @inode: inode in question (we are only interested in its superblock) * @inode: inode in question (we are only interested in its superblock)
@ -282,7 +275,8 @@ static int verify_chain(Indirect *from, Indirect *to)
*/ */
static int ext4_block_to_path(struct inode *inode, static int ext4_block_to_path(struct inode *inode,
long i_block, int offsets[4], int *boundary) ext4_lblk_t i_block,
ext4_lblk_t offsets[4], int *boundary)
{ {
int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb); int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb);
int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb); int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb);
@ -313,7 +307,10 @@ static int ext4_block_to_path(struct inode *inode,
offsets[n++] = i_block & (ptrs - 1); offsets[n++] = i_block & (ptrs - 1);
final = ptrs; final = ptrs;
} else { } else {
ext4_warning(inode->i_sb, "ext4_block_to_path", "block > big"); ext4_warning(inode->i_sb, "ext4_block_to_path",
"block %lu > max",
i_block + direct_blocks +
indirect_blocks + double_blocks);
} }
if (boundary) if (boundary)
*boundary = final - 1 - (i_block & (ptrs - 1)); *boundary = final - 1 - (i_block & (ptrs - 1));
@ -344,12 +341,14 @@ static int ext4_block_to_path(struct inode *inode,
* (pointer to last triple returned, *@err == 0) * (pointer to last triple returned, *@err == 0)
* or when it gets an IO error reading an indirect block * or when it gets an IO error reading an indirect block
* (ditto, *@err == -EIO) * (ditto, *@err == -EIO)
* or when it notices that chain had been changed while it was reading
* (ditto, *@err == -EAGAIN)
* or when it reads all @depth-1 indirect blocks successfully and finds * or when it reads all @depth-1 indirect blocks successfully and finds
* the whole chain, all way to the data (returns %NULL, *err == 0). * the whole chain, all way to the data (returns %NULL, *err == 0).
*
* Need to be called with
* down_read(&EXT4_I(inode)->i_data_sem)
*/ */
static Indirect *ext4_get_branch(struct inode *inode, int depth, int *offsets, static Indirect *ext4_get_branch(struct inode *inode, int depth,
ext4_lblk_t *offsets,
Indirect chain[4], int *err) Indirect chain[4], int *err)
{ {
struct super_block *sb = inode->i_sb; struct super_block *sb = inode->i_sb;
@ -365,9 +364,6 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth, int *offsets,
bh = sb_bread(sb, le32_to_cpu(p->key)); bh = sb_bread(sb, le32_to_cpu(p->key));
if (!bh) if (!bh)
goto failure; goto failure;
/* Reader: pointers */
if (!verify_chain(chain, p))
goto changed;
add_chain(++p, bh, (__le32*)bh->b_data + *++offsets); add_chain(++p, bh, (__le32*)bh->b_data + *++offsets);
/* Reader: end */ /* Reader: end */
if (!p->key) if (!p->key)
@ -375,10 +371,6 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth, int *offsets,
} }
return NULL; return NULL;
changed:
brelse(bh);
*err = -EAGAIN;
goto no_block;
failure: failure:
*err = -EIO; *err = -EIO;
no_block: no_block:
@ -445,7 +437,7 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
* stores it in *@goal and returns zero. * stores it in *@goal and returns zero.
*/ */
static ext4_fsblk_t ext4_find_goal(struct inode *inode, long block, static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block,
Indirect chain[4], Indirect *partial) Indirect chain[4], Indirect *partial)
{ {
struct ext4_block_alloc_info *block_i; struct ext4_block_alloc_info *block_i;
@ -559,7 +551,7 @@ static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
return ret; return ret;
failed_out: failed_out:
for (i = 0; i <index; i++) for (i = 0; i <index; i++)
ext4_free_blocks(handle, inode, new_blocks[i], 1); ext4_free_blocks(handle, inode, new_blocks[i], 1, 0);
return ret; return ret;
} }
@ -590,7 +582,7 @@ failed_out:
*/ */
static int ext4_alloc_branch(handle_t *handle, struct inode *inode, static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
int indirect_blks, int *blks, ext4_fsblk_t goal, int indirect_blks, int *blks, ext4_fsblk_t goal,
int *offsets, Indirect *branch) ext4_lblk_t *offsets, Indirect *branch)
{ {
int blocksize = inode->i_sb->s_blocksize; int blocksize = inode->i_sb->s_blocksize;
int i, n = 0; int i, n = 0;
@ -658,9 +650,9 @@ failed:
ext4_journal_forget(handle, branch[i].bh); ext4_journal_forget(handle, branch[i].bh);
} }
for (i = 0; i <indirect_blks; i++) for (i = 0; i <indirect_blks; i++)
ext4_free_blocks(handle, inode, new_blocks[i], 1); ext4_free_blocks(handle, inode, new_blocks[i], 1, 0);
ext4_free_blocks(handle, inode, new_blocks[i], num); ext4_free_blocks(handle, inode, new_blocks[i], num, 0);
return err; return err;
} }
@ -680,7 +672,7 @@ failed:
* chain to new block and return 0. * chain to new block and return 0.
*/ */
static int ext4_splice_branch(handle_t *handle, struct inode *inode, static int ext4_splice_branch(handle_t *handle, struct inode *inode,
long block, Indirect *where, int num, int blks) ext4_lblk_t block, Indirect *where, int num, int blks)
{ {
int i; int i;
int err = 0; int err = 0;
@ -757,9 +749,10 @@ err_out:
for (i = 1; i <= num; i++) { for (i = 1; i <= num; i++) {
BUFFER_TRACE(where[i].bh, "call jbd2_journal_forget"); BUFFER_TRACE(where[i].bh, "call jbd2_journal_forget");
ext4_journal_forget(handle, where[i].bh); ext4_journal_forget(handle, where[i].bh);
ext4_free_blocks(handle,inode,le32_to_cpu(where[i-1].key),1); ext4_free_blocks(handle, inode,
le32_to_cpu(where[i-1].key), 1, 0);
} }
ext4_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks); ext4_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks, 0);
return err; return err;
} }
@ -782,14 +775,19 @@ err_out:
* return > 0, # of blocks mapped or allocated. * return > 0, # of blocks mapped or allocated.
* return = 0, if plain lookup failed. * return = 0, if plain lookup failed.
* return < 0, error case. * return < 0, error case.
*
*
* Need to be called with
* down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block
* (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem)
*/ */
int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
sector_t iblock, unsigned long maxblocks, ext4_lblk_t iblock, unsigned long maxblocks,
struct buffer_head *bh_result, struct buffer_head *bh_result,
int create, int extend_disksize) int create, int extend_disksize)
{ {
int err = -EIO; int err = -EIO;
int offsets[4]; ext4_lblk_t offsets[4];
Indirect chain[4]; Indirect chain[4];
Indirect *partial; Indirect *partial;
ext4_fsblk_t goal; ext4_fsblk_t goal;
@ -803,7 +801,8 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)); J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL));
J_ASSERT(handle != NULL || create == 0); J_ASSERT(handle != NULL || create == 0);
depth = ext4_block_to_path(inode,iblock,offsets,&blocks_to_boundary); depth = ext4_block_to_path(inode, iblock, offsets,
&blocks_to_boundary);
if (depth == 0) if (depth == 0)
goto out; goto out;
@ -819,18 +818,6 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
while (count < maxblocks && count <= blocks_to_boundary) { while (count < maxblocks && count <= blocks_to_boundary) {
ext4_fsblk_t blk; ext4_fsblk_t blk;
if (!verify_chain(chain, partial)) {
/*
* Indirect block might be removed by
* truncate while we were reading it.
* Handling of that case: forget what we've
* got now. Flag the err as EAGAIN, so it
* will reread.
*/
err = -EAGAIN;
count = 0;
break;
}
blk = le32_to_cpu(*(chain[depth-1].p + count)); blk = le32_to_cpu(*(chain[depth-1].p + count));
if (blk == first_block + count) if (blk == first_block + count)
@ -838,44 +825,13 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
else else
break; break;
} }
if (err != -EAGAIN) goto got_it;
goto got_it;
} }
/* Next simple case - plain lookup or failed read of indirect block */ /* Next simple case - plain lookup or failed read of indirect block */
if (!create || err == -EIO) if (!create || err == -EIO)
goto cleanup; goto cleanup;
mutex_lock(&ei->truncate_mutex);
/*
* If the indirect block is missing while we are reading
* the chain(ext4_get_branch() returns -EAGAIN err), or
* if the chain has been changed after we grab the semaphore,
* (either because another process truncated this branch, or
* another get_block allocated this branch) re-grab the chain to see if
* the request block has been allocated or not.
*
* Since we already block the truncate/other get_block
* at this point, we will have the current copy of the chain when we
* splice the branch into the tree.
*/
if (err == -EAGAIN || !verify_chain(chain, partial)) {
while (partial > chain) {
brelse(partial->bh);
partial--;
}
partial = ext4_get_branch(inode, depth, offsets, chain, &err);
if (!partial) {
count++;
mutex_unlock(&ei->truncate_mutex);
if (err)
goto cleanup;
clear_buffer_new(bh_result);
goto got_it;
}
}
/* /*
* Okay, we need to do block allocation. Lazily initialize the block * Okay, we need to do block allocation. Lazily initialize the block
* allocation info here if necessary * allocation info here if necessary
@ -911,13 +867,12 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
err = ext4_splice_branch(handle, inode, iblock, err = ext4_splice_branch(handle, inode, iblock,
partial, indirect_blks, count); partial, indirect_blks, count);
/* /*
* i_disksize growing is protected by truncate_mutex. Don't forget to * i_disksize growing is protected by i_data_sem. Don't forget to
* protect it if you're about to implement concurrent * protect it if you're about to implement concurrent
* ext4_get_block() -bzzz * ext4_get_block() -bzzz
*/ */
if (!err && extend_disksize && inode->i_size > ei->i_disksize) if (!err && extend_disksize && inode->i_size > ei->i_disksize)
ei->i_disksize = inode->i_size; ei->i_disksize = inode->i_size;
mutex_unlock(&ei->truncate_mutex);
if (err) if (err)
goto cleanup; goto cleanup;
@ -942,6 +897,47 @@ out:
#define DIO_CREDITS (EXT4_RESERVE_TRANS_BLOCKS + 32) #define DIO_CREDITS (EXT4_RESERVE_TRANS_BLOCKS + 32)
int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
unsigned long max_blocks, struct buffer_head *bh,
int create, int extend_disksize)
{
int retval;
/*
* Try to see if we can get the block without requesting
* for new file system block.
*/
down_read((&EXT4_I(inode)->i_data_sem));
if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
retval = ext4_ext_get_blocks(handle, inode, block, max_blocks,
bh, 0, 0);
} else {
retval = ext4_get_blocks_handle(handle,
inode, block, max_blocks, bh, 0, 0);
}
up_read((&EXT4_I(inode)->i_data_sem));
if (!create || (retval > 0))
return retval;
/*
* We need to allocate new blocks which will result
* in i_data update
*/
down_write((&EXT4_I(inode)->i_data_sem));
/*
* We need to check for EXT4 here because migrate
* could have changed the inode type in between
*/
if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
retval = ext4_ext_get_blocks(handle, inode, block, max_blocks,
bh, create, extend_disksize);
} else {
retval = ext4_get_blocks_handle(handle, inode, block,
max_blocks, bh, create, extend_disksize);
}
up_write((&EXT4_I(inode)->i_data_sem));
return retval;
}
static int ext4_get_block(struct inode *inode, sector_t iblock, static int ext4_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create) struct buffer_head *bh_result, int create)
{ {
@ -996,7 +992,7 @@ get_block:
* `handle' can be NULL if create is zero * `handle' can be NULL if create is zero
*/ */
struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
long block, int create, int *errp) ext4_lblk_t block, int create, int *errp)
{ {
struct buffer_head dummy; struct buffer_head dummy;
int fatal = 0, err; int fatal = 0, err;
@ -1063,7 +1059,7 @@ err:
} }
struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
int block, int create, int *err) ext4_lblk_t block, int create, int *err)
{ {
struct buffer_head * bh; struct buffer_head * bh;
@ -1446,7 +1442,7 @@ static int jbd2_journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh)
* ext4_file_write() -> generic_file_write() -> __alloc_pages() -> ... * ext4_file_write() -> generic_file_write() -> __alloc_pages() -> ...
* *
* Same applies to ext4_get_block(). We will deadlock on various things like * Same applies to ext4_get_block(). We will deadlock on various things like
* lock_journal and i_truncate_mutex. * lock_journal and i_data_sem
* *
* Setting PF_MEMALLOC here doesn't work - too many internal memory * Setting PF_MEMALLOC here doesn't work - too many internal memory
* allocations fail. * allocations fail.
@ -1828,7 +1824,8 @@ int ext4_block_truncate_page(handle_t *handle, struct page *page,
{ {
ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
unsigned offset = from & (PAGE_CACHE_SIZE-1); unsigned offset = from & (PAGE_CACHE_SIZE-1);
unsigned blocksize, iblock, length, pos; unsigned blocksize, length, pos;
ext4_lblk_t iblock;
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
struct buffer_head *bh; struct buffer_head *bh;
int err = 0; int err = 0;
@ -1964,7 +1961,7 @@ static inline int all_zeroes(__le32 *p, __le32 *q)
* (no partially truncated stuff there). */ * (no partially truncated stuff there). */
static Indirect *ext4_find_shared(struct inode *inode, int depth, static Indirect *ext4_find_shared(struct inode *inode, int depth,
int offsets[4], Indirect chain[4], __le32 *top) ext4_lblk_t offsets[4], Indirect chain[4], __le32 *top)
{ {
Indirect *partial, *p; Indirect *partial, *p;
int k, err; int k, err;
@ -2048,15 +2045,15 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
for (p = first; p < last; p++) { for (p = first; p < last; p++) {
u32 nr = le32_to_cpu(*p); u32 nr = le32_to_cpu(*p);
if (nr) { if (nr) {
struct buffer_head *bh; struct buffer_head *tbh;
*p = 0; *p = 0;
bh = sb_find_get_block(inode->i_sb, nr); tbh = sb_find_get_block(inode->i_sb, nr);
ext4_forget(handle, 0, inode, bh, nr); ext4_forget(handle, 0, inode, tbh, nr);
} }
} }
ext4_free_blocks(handle, inode, block_to_free, count); ext4_free_blocks(handle, inode, block_to_free, count, 0);
} }
/** /**
@ -2229,7 +2226,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
ext4_journal_test_restart(handle, inode); ext4_journal_test_restart(handle, inode);
} }
ext4_free_blocks(handle, inode, nr, 1); ext4_free_blocks(handle, inode, nr, 1, 1);
if (parent_bh) { if (parent_bh) {
/* /*
@ -2289,12 +2286,12 @@ void ext4_truncate(struct inode *inode)
__le32 *i_data = ei->i_data; __le32 *i_data = ei->i_data;
int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb); int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
struct address_space *mapping = inode->i_mapping; struct address_space *mapping = inode->i_mapping;
int offsets[4]; ext4_lblk_t offsets[4];
Indirect chain[4]; Indirect chain[4];
Indirect *partial; Indirect *partial;
__le32 nr = 0; __le32 nr = 0;
int n; int n;
long last_block; ext4_lblk_t last_block;
unsigned blocksize = inode->i_sb->s_blocksize; unsigned blocksize = inode->i_sb->s_blocksize;
struct page *page; struct page *page;
@ -2320,8 +2317,10 @@ void ext4_truncate(struct inode *inode)
return; return;
} }
if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
return ext4_ext_truncate(inode, page); ext4_ext_truncate(inode, page);
return;
}
handle = start_transaction(inode); handle = start_transaction(inode);
if (IS_ERR(handle)) { if (IS_ERR(handle)) {
@ -2369,7 +2368,7 @@ void ext4_truncate(struct inode *inode)
* From here we block out all ext4_get_block() callers who want to * From here we block out all ext4_get_block() callers who want to
* modify the block allocation tree. * modify the block allocation tree.
*/ */
mutex_lock(&ei->truncate_mutex); down_write(&ei->i_data_sem);
if (n == 1) { /* direct blocks */ if (n == 1) { /* direct blocks */
ext4_free_data(handle, inode, NULL, i_data+offsets[0], ext4_free_data(handle, inode, NULL, i_data+offsets[0],
@ -2433,7 +2432,7 @@ do_indirects:
ext4_discard_reservation(inode); ext4_discard_reservation(inode);
mutex_unlock(&ei->truncate_mutex); up_write(&ei->i_data_sem);
inode->i_mtime = inode->i_ctime = ext4_current_time(inode); inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
ext4_mark_inode_dirty(handle, inode); ext4_mark_inode_dirty(handle, inode);
@ -2460,7 +2459,8 @@ out_stop:
static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb, static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
unsigned long ino, struct ext4_iloc *iloc) unsigned long ino, struct ext4_iloc *iloc)
{ {
unsigned long desc, group_desc, block_group; unsigned long desc, group_desc;
ext4_group_t block_group;
unsigned long offset; unsigned long offset;
ext4_fsblk_t block; ext4_fsblk_t block;
struct buffer_head *bh; struct buffer_head *bh;
@ -2547,7 +2547,7 @@ static int __ext4_get_inode_loc(struct inode *inode,
struct ext4_group_desc *desc; struct ext4_group_desc *desc;
int inodes_per_buffer; int inodes_per_buffer;
int inode_offset, i; int inode_offset, i;
int block_group; ext4_group_t block_group;
int start; int start;
block_group = (inode->i_ino - 1) / block_group = (inode->i_ino - 1) /
@ -2660,6 +2660,28 @@ void ext4_get_inode_flags(struct ext4_inode_info *ei)
if (flags & S_DIRSYNC) if (flags & S_DIRSYNC)
ei->i_flags |= EXT4_DIRSYNC_FL; ei->i_flags |= EXT4_DIRSYNC_FL;
} }
static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
struct ext4_inode_info *ei)
{
blkcnt_t i_blocks ;
struct inode *inode = &(ei->vfs_inode);
struct super_block *sb = inode->i_sb;
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) {
/* we are using combined 48 bit field */
i_blocks = ((u64)le16_to_cpu(raw_inode->i_blocks_high)) << 32 |
le32_to_cpu(raw_inode->i_blocks_lo);
if (ei->i_flags & EXT4_HUGE_FILE_FL) {
/* i_blocks represent file system block size */
return i_blocks << (inode->i_blkbits - 9);
} else {
return i_blocks;
}
} else {
return le32_to_cpu(raw_inode->i_blocks_lo);
}
}
void ext4_read_inode(struct inode * inode) void ext4_read_inode(struct inode * inode)
{ {
@ -2687,7 +2709,6 @@ void ext4_read_inode(struct inode * inode)
inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
} }
inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
inode->i_size = le32_to_cpu(raw_inode->i_size);
ei->i_state = 0; ei->i_state = 0;
ei->i_dir_start_lookup = 0; ei->i_dir_start_lookup = 0;
@ -2709,19 +2730,15 @@ void ext4_read_inode(struct inode * inode)
* recovery code: that's fine, we're about to complete * recovery code: that's fine, we're about to complete
* the process of deleting those. */ * the process of deleting those. */
} }
inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
ei->i_flags = le32_to_cpu(raw_inode->i_flags); ei->i_flags = le32_to_cpu(raw_inode->i_flags);
ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl); inode->i_blocks = ext4_inode_blocks(raw_inode, ei);
ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo);
if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
cpu_to_le32(EXT4_OS_HURD)) cpu_to_le32(EXT4_OS_HURD)) {
ei->i_file_acl |= ei->i_file_acl |=
((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
if (!S_ISREG(inode->i_mode)) {
ei->i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl);
} else {
inode->i_size |=
((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32;
} }
inode->i_size = ext4_isize(raw_inode);
ei->i_disksize = inode->i_size; ei->i_disksize = inode->i_size;
inode->i_generation = le32_to_cpu(raw_inode->i_generation); inode->i_generation = le32_to_cpu(raw_inode->i_generation);
ei->i_block_group = iloc.block_group; ei->i_block_group = iloc.block_group;
@ -2765,6 +2782,13 @@ void ext4_read_inode(struct inode * inode)
EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode); EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode);
EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode); EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode);
inode->i_version = le32_to_cpu(raw_inode->i_disk_version);
if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi))
inode->i_version |=
(__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32;
}
if (S_ISREG(inode->i_mode)) { if (S_ISREG(inode->i_mode)) {
inode->i_op = &ext4_file_inode_operations; inode->i_op = &ext4_file_inode_operations;
inode->i_fop = &ext4_file_operations; inode->i_fop = &ext4_file_operations;
@ -2797,6 +2821,55 @@ bad_inode:
return; return;
} }
static int ext4_inode_blocks_set(handle_t *handle,
struct ext4_inode *raw_inode,
struct ext4_inode_info *ei)
{
struct inode *inode = &(ei->vfs_inode);
u64 i_blocks = inode->i_blocks;
struct super_block *sb = inode->i_sb;
int err = 0;
if (i_blocks <= ~0U) {
/*
* i_blocks can be represnted in a 32 bit variable
* as multiple of 512 bytes
*/
raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
raw_inode->i_blocks_high = 0;
ei->i_flags &= ~EXT4_HUGE_FILE_FL;
} else if (i_blocks <= 0xffffffffffffULL) {
/*
* i_blocks can be represented in a 48 bit variable
* as multiple of 512 bytes
*/
err = ext4_update_rocompat_feature(handle, sb,
EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
if (err)
goto err_out;
/* i_block is stored in the split 48 bit fields */
raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
ei->i_flags &= ~EXT4_HUGE_FILE_FL;
} else {
/*
* i_blocks should be represented in a 48 bit variable
* as multiple of file system block size
*/
err = ext4_update_rocompat_feature(handle, sb,
EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
if (err)
goto err_out;
ei->i_flags |= EXT4_HUGE_FILE_FL;
/* i_block is stored in file system block size */
i_blocks = i_blocks >> (inode->i_blkbits - 9);
raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
}
err_out:
return err;
}
/* /*
* Post the struct inode info into an on-disk inode location in the * Post the struct inode info into an on-disk inode location in the
* buffer-cache. This gobbles the caller's reference to the * buffer-cache. This gobbles the caller's reference to the
@ -2845,47 +2918,42 @@ static int ext4_do_update_inode(handle_t *handle,
raw_inode->i_gid_high = 0; raw_inode->i_gid_high = 0;
} }
raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
raw_inode->i_size = cpu_to_le32(ei->i_disksize);
EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode); EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode);
EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode); EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode);
EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode); EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode); EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode);
raw_inode->i_blocks = cpu_to_le32(inode->i_blocks); if (ext4_inode_blocks_set(handle, raw_inode, ei))
goto out_brelse;
raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
raw_inode->i_flags = cpu_to_le32(ei->i_flags); raw_inode->i_flags = cpu_to_le32(ei->i_flags);
if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
cpu_to_le32(EXT4_OS_HURD)) cpu_to_le32(EXT4_OS_HURD))
raw_inode->i_file_acl_high = raw_inode->i_file_acl_high =
cpu_to_le16(ei->i_file_acl >> 32); cpu_to_le16(ei->i_file_acl >> 32);
raw_inode->i_file_acl = cpu_to_le32(ei->i_file_acl); raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl);
if (!S_ISREG(inode->i_mode)) { ext4_isize_set(raw_inode, ei->i_disksize);
raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl); if (ei->i_disksize > 0x7fffffffULL) {
} else { struct super_block *sb = inode->i_sb;
raw_inode->i_size_high = if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
cpu_to_le32(ei->i_disksize >> 32); EXT4_FEATURE_RO_COMPAT_LARGE_FILE) ||
if (ei->i_disksize > 0x7fffffffULL) { EXT4_SB(sb)->s_es->s_rev_level ==
struct super_block *sb = inode->i_sb; cpu_to_le32(EXT4_GOOD_OLD_REV)) {
if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, /* If this is the first large file
EXT4_FEATURE_RO_COMPAT_LARGE_FILE) || * created, add a flag to the superblock.
EXT4_SB(sb)->s_es->s_rev_level == */
cpu_to_le32(EXT4_GOOD_OLD_REV)) { err = ext4_journal_get_write_access(handle,
/* If this is the first large file EXT4_SB(sb)->s_sbh);
* created, add a flag to the superblock. if (err)
*/ goto out_brelse;
err = ext4_journal_get_write_access(handle, ext4_update_dynamic_rev(sb);
EXT4_SB(sb)->s_sbh); EXT4_SET_RO_COMPAT_FEATURE(sb,
if (err)
goto out_brelse;
ext4_update_dynamic_rev(sb);
EXT4_SET_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_LARGE_FILE); EXT4_FEATURE_RO_COMPAT_LARGE_FILE);
sb->s_dirt = 1; sb->s_dirt = 1;
handle->h_sync = 1; handle->h_sync = 1;
err = ext4_journal_dirty_metadata(handle, err = ext4_journal_dirty_metadata(handle,
EXT4_SB(sb)->s_sbh); EXT4_SB(sb)->s_sbh);
}
} }
} }
raw_inode->i_generation = cpu_to_le32(inode->i_generation); raw_inode->i_generation = cpu_to_le32(inode->i_generation);
@ -2903,8 +2971,14 @@ static int ext4_do_update_inode(handle_t *handle,
} else for (block = 0; block < EXT4_N_BLOCKS; block++) } else for (block = 0; block < EXT4_N_BLOCKS; block++)
raw_inode->i_block[block] = ei->i_data[block]; raw_inode->i_block[block] = ei->i_data[block];
if (ei->i_extra_isize) raw_inode->i_disk_version = cpu_to_le32(inode->i_version);
if (ei->i_extra_isize) {
if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi))
raw_inode->i_version_hi =
cpu_to_le32(inode->i_version >> 32);
raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize);
}
BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata"); BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
rc = ext4_journal_dirty_metadata(handle, bh); rc = ext4_journal_dirty_metadata(handle, bh);
@ -3024,6 +3098,17 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
ext4_journal_stop(handle); ext4_journal_stop(handle);
} }
if (attr->ia_valid & ATTR_SIZE) {
if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) {
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
if (attr->ia_size > sbi->s_bitmap_maxbytes) {
error = -EFBIG;
goto err_out;
}
}
}
if (S_ISREG(inode->i_mode) && if (S_ISREG(inode->i_mode) &&
attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) { attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) {
handle_t *handle; handle_t *handle;
@ -3120,6 +3205,9 @@ int ext4_mark_iloc_dirty(handle_t *handle,
{ {
int err = 0; int err = 0;
if (test_opt(inode->i_sb, I_VERSION))
inode_inc_iversion(inode);
/* the do_update_inode consumes one bh->b_count */ /* the do_update_inode consumes one bh->b_count */
get_bh(iloc->bh); get_bh(iloc->bh);
@ -3158,8 +3246,10 @@ ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
* Expand an inode by new_extra_isize bytes. * Expand an inode by new_extra_isize bytes.
* Returns 0 on success or negative error number on failure. * Returns 0 on success or negative error number on failure.
*/ */
int ext4_expand_extra_isize(struct inode *inode, unsigned int new_extra_isize, static int ext4_expand_extra_isize(struct inode *inode,
struct ext4_iloc iloc, handle_t *handle) unsigned int new_extra_isize,
struct ext4_iloc iloc,
handle_t *handle)
{ {
struct ext4_inode *raw_inode; struct ext4_inode *raw_inode;
struct ext4_xattr_ibody_header *header; struct ext4_xattr_ibody_header *header;

Просмотреть файл

@ -199,7 +199,7 @@ flags_err:
* need to allocate reservation structure for this inode * need to allocate reservation structure for this inode
* before set the window size * before set the window size
*/ */
mutex_lock(&ei->truncate_mutex); down_write(&ei->i_data_sem);
if (!ei->i_block_alloc_info) if (!ei->i_block_alloc_info)
ext4_init_block_alloc_info(inode); ext4_init_block_alloc_info(inode);
@ -207,7 +207,7 @@ flags_err:
struct ext4_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node; struct ext4_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node;
rsv->rsv_goal_size = rsv_window_size; rsv->rsv_goal_size = rsv_window_size;
} }
mutex_unlock(&ei->truncate_mutex); up_write(&ei->i_data_sem);
return 0; return 0;
} }
case EXT4_IOC_GROUP_EXTEND: { case EXT4_IOC_GROUP_EXTEND: {
@ -254,6 +254,9 @@ flags_err:
return err; return err;
} }
case EXT4_IOC_MIGRATE:
return ext4_ext_migrate(inode, filp, cmd, arg);
default: default:
return -ENOTTY; return -ENOTTY;
} }

4552
fs/ext4/mballoc.c Normal file

Разница между файлами не показана из-за своего большого размера Загрузить разницу

560
fs/ext4/migrate.c Normal file
Просмотреть файл

@ -0,0 +1,560 @@
/*
* Copyright IBM Corporation, 2007
* Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of version 2.1 of the GNU Lesser General Public License
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*
*/
#include <linux/module.h>
#include <linux/ext4_jbd2.h>
#include <linux/ext4_fs_extents.h>
/*
* The contiguous blocks details which can be
* represented by a single extent
*/
struct list_blocks_struct {
ext4_lblk_t first_block, last_block;
ext4_fsblk_t first_pblock, last_pblock;
};
static int finish_range(handle_t *handle, struct inode *inode,
struct list_blocks_struct *lb)
{
int retval = 0, needed;
struct ext4_extent newext;
struct ext4_ext_path *path;
if (lb->first_pblock == 0)
return 0;
/* Add the extent to temp inode*/
newext.ee_block = cpu_to_le32(lb->first_block);
newext.ee_len = cpu_to_le16(lb->last_block - lb->first_block + 1);
ext4_ext_store_pblock(&newext, lb->first_pblock);
path = ext4_ext_find_extent(inode, lb->first_block, NULL);
if (IS_ERR(path)) {
retval = PTR_ERR(path);
goto err_out;
}
/*
* Calculate the credit needed to inserting this extent
* Since we are doing this in loop we may accumalate extra
* credit. But below we try to not accumalate too much
* of them by restarting the journal.
*/
needed = ext4_ext_calc_credits_for_insert(inode, path);
/*
* Make sure the credit we accumalated is not really high
*/
if (needed && handle->h_buffer_credits >= EXT4_RESERVE_TRANS_BLOCKS) {
retval = ext4_journal_restart(handle, needed);
if (retval)
goto err_out;
}
if (needed) {
retval = ext4_journal_extend(handle, needed);
if (retval != 0) {
/*
* IF not able to extend the journal restart the journal
*/
retval = ext4_journal_restart(handle, needed);
if (retval)
goto err_out;
}
}
retval = ext4_ext_insert_extent(handle, inode, path, &newext);
err_out:
lb->first_pblock = 0;
return retval;
}
static int update_extent_range(handle_t *handle, struct inode *inode,
ext4_fsblk_t pblock, ext4_lblk_t blk_num,
struct list_blocks_struct *lb)
{
int retval;
/*
* See if we can add on to the existing range (if it exists)
*/
if (lb->first_pblock &&
(lb->last_pblock+1 == pblock) &&
(lb->last_block+1 == blk_num)) {
lb->last_pblock = pblock;
lb->last_block = blk_num;
return 0;
}
/*
* Start a new range.
*/
retval = finish_range(handle, inode, lb);
lb->first_pblock = lb->last_pblock = pblock;
lb->first_block = lb->last_block = blk_num;
return retval;
}
static int update_ind_extent_range(handle_t *handle, struct inode *inode,
ext4_fsblk_t pblock, ext4_lblk_t *blk_nump,
struct list_blocks_struct *lb)
{
struct buffer_head *bh;
__le32 *i_data;
int i, retval = 0;
ext4_lblk_t blk_count = *blk_nump;
unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
if (!pblock) {
/* Only update the file block number */
*blk_nump += max_entries;
return 0;
}
bh = sb_bread(inode->i_sb, pblock);
if (!bh)
return -EIO;
i_data = (__le32 *)bh->b_data;
for (i = 0; i < max_entries; i++, blk_count++) {
if (i_data[i]) {
retval = update_extent_range(handle, inode,
le32_to_cpu(i_data[i]),
blk_count, lb);
if (retval)
break;
}
}
/* Update the file block number */
*blk_nump = blk_count;
put_bh(bh);
return retval;
}
static int update_dind_extent_range(handle_t *handle, struct inode *inode,
ext4_fsblk_t pblock, ext4_lblk_t *blk_nump,
struct list_blocks_struct *lb)
{
struct buffer_head *bh;
__le32 *i_data;
int i, retval = 0;
ext4_lblk_t blk_count = *blk_nump;
unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
if (!pblock) {
/* Only update the file block number */
*blk_nump += max_entries * max_entries;
return 0;
}
bh = sb_bread(inode->i_sb, pblock);
if (!bh)
return -EIO;
i_data = (__le32 *)bh->b_data;
for (i = 0; i < max_entries; i++) {
if (i_data[i]) {
retval = update_ind_extent_range(handle, inode,
le32_to_cpu(i_data[i]),
&blk_count, lb);
if (retval)
break;
} else {
/* Only update the file block number */
blk_count += max_entries;
}
}
/* Update the file block number */
*blk_nump = blk_count;
put_bh(bh);
return retval;
}
static int update_tind_extent_range(handle_t *handle, struct inode *inode,
ext4_fsblk_t pblock, ext4_lblk_t *blk_nump,
struct list_blocks_struct *lb)
{
struct buffer_head *bh;
__le32 *i_data;
int i, retval = 0;
ext4_lblk_t blk_count = *blk_nump;
unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
if (!pblock) {
/* Only update the file block number */
*blk_nump += max_entries * max_entries * max_entries;
return 0;
}
bh = sb_bread(inode->i_sb, pblock);
if (!bh)
return -EIO;
i_data = (__le32 *)bh->b_data;
for (i = 0; i < max_entries; i++) {
if (i_data[i]) {
retval = update_dind_extent_range(handle, inode,
le32_to_cpu(i_data[i]),
&blk_count, lb);
if (retval)
break;
} else
/* Only update the file block number */
blk_count += max_entries * max_entries;
}
/* Update the file block number */
*blk_nump = blk_count;
put_bh(bh);
return retval;
}
static int free_dind_blocks(handle_t *handle,
struct inode *inode, __le32 i_data)
{
int i;
__le32 *tmp_idata;
struct buffer_head *bh;
unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
bh = sb_bread(inode->i_sb, le32_to_cpu(i_data));
if (!bh)
return -EIO;
tmp_idata = (__le32 *)bh->b_data;
for (i = 0; i < max_entries; i++) {
if (tmp_idata[i])
ext4_free_blocks(handle, inode,
le32_to_cpu(tmp_idata[i]), 1, 1);
}
put_bh(bh);
ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1);
return 0;
}
static int free_tind_blocks(handle_t *handle,
struct inode *inode, __le32 i_data)
{
int i, retval = 0;
__le32 *tmp_idata;
struct buffer_head *bh;
unsigned long max_entries = inode->i_sb->s_blocksize >> 2;
bh = sb_bread(inode->i_sb, le32_to_cpu(i_data));
if (!bh)
return -EIO;
tmp_idata = (__le32 *)bh->b_data;
for (i = 0; i < max_entries; i++) {
if (tmp_idata[i]) {
retval = free_dind_blocks(handle,
inode, tmp_idata[i]);
if (retval) {
put_bh(bh);
return retval;
}
}
}
put_bh(bh);
ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1);
return 0;
}
static int free_ind_block(handle_t *handle, struct inode *inode)
{
int retval;
struct ext4_inode_info *ei = EXT4_I(inode);
if (ei->i_data[EXT4_IND_BLOCK])
ext4_free_blocks(handle, inode,
le32_to_cpu(ei->i_data[EXT4_IND_BLOCK]), 1, 1);
if (ei->i_data[EXT4_DIND_BLOCK]) {
retval = free_dind_blocks(handle, inode,
ei->i_data[EXT4_DIND_BLOCK]);
if (retval)
return retval;
}
if (ei->i_data[EXT4_TIND_BLOCK]) {
retval = free_tind_blocks(handle, inode,
ei->i_data[EXT4_TIND_BLOCK]);
if (retval)
return retval;
}
return 0;
}
static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
struct inode *tmp_inode, int retval)
{
struct ext4_inode_info *ei = EXT4_I(inode);
struct ext4_inode_info *tmp_ei = EXT4_I(tmp_inode);
retval = free_ind_block(handle, inode);
if (retval)
goto err_out;
/*
* One credit accounted for writing the
* i_data field of the original inode
*/
retval = ext4_journal_extend(handle, 1);
if (retval != 0) {
retval = ext4_journal_restart(handle, 1);
if (retval)
goto err_out;
}
/*
* We have the extent map build with the tmp inode.
* Now copy the i_data across
*/
ei->i_flags |= EXT4_EXTENTS_FL;
memcpy(ei->i_data, tmp_ei->i_data, sizeof(ei->i_data));
/*
* Update i_blocks with the new blocks that got
* allocated while adding extents for extent index
* blocks.
*
* While converting to extents we need not
* update the orignal inode i_blocks for extent blocks
* via quota APIs. The quota update happened via tmp_inode already.
*/
spin_lock(&inode->i_lock);
inode->i_blocks += tmp_inode->i_blocks;
spin_unlock(&inode->i_lock);
ext4_mark_inode_dirty(handle, inode);
err_out:
return retval;
}
static int free_ext_idx(handle_t *handle, struct inode *inode,
struct ext4_extent_idx *ix)
{
int i, retval = 0;
ext4_fsblk_t block;
struct buffer_head *bh;
struct ext4_extent_header *eh;
block = idx_pblock(ix);
bh = sb_bread(inode->i_sb, block);
if (!bh)
return -EIO;
eh = (struct ext4_extent_header *)bh->b_data;
if (eh->eh_depth != 0) {
ix = EXT_FIRST_INDEX(eh);
for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) {
retval = free_ext_idx(handle, inode, ix);
if (retval)
break;
}
}
put_bh(bh);
ext4_free_blocks(handle, inode, block, 1, 1);
return retval;
}
/*
* Free the extent meta data blocks only
*/
static int free_ext_block(handle_t *handle, struct inode *inode)
{
int i, retval = 0;
struct ext4_inode_info *ei = EXT4_I(inode);
struct ext4_extent_header *eh = (struct ext4_extent_header *)ei->i_data;
struct ext4_extent_idx *ix;
if (eh->eh_depth == 0)
/*
* No extra blocks allocated for extent meta data
*/
return 0;
ix = EXT_FIRST_INDEX(eh);
for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ix++) {
retval = free_ext_idx(handle, inode, ix);
if (retval)
return retval;
}
return retval;
}
int ext4_ext_migrate(struct inode *inode, struct file *filp,
unsigned int cmd, unsigned long arg)
{
handle_t *handle;
int retval = 0, i;
__le32 *i_data;
ext4_lblk_t blk_count = 0;
struct ext4_inode_info *ei;
struct inode *tmp_inode = NULL;
struct list_blocks_struct lb;
unsigned long max_entries;
if (!test_opt(inode->i_sb, EXTENTS))
/*
* if mounted with noextents we don't allow the migrate
*/
return -EINVAL;
if ((EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
return -EINVAL;
down_write(&EXT4_I(inode)->i_data_sem);
handle = ext4_journal_start(inode,
EXT4_DATA_TRANS_BLOCKS(inode->i_sb) +
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
2 * EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)
+ 1);
if (IS_ERR(handle)) {
retval = PTR_ERR(handle);
goto err_out;
}
tmp_inode = ext4_new_inode(handle,
inode->i_sb->s_root->d_inode,
S_IFREG);
if (IS_ERR(tmp_inode)) {
retval = -ENOMEM;
ext4_journal_stop(handle);
tmp_inode = NULL;
goto err_out;
}
i_size_write(tmp_inode, i_size_read(inode));
/*
* We don't want the inode to be reclaimed
* if we got interrupted in between. We have
* this tmp inode carrying reference to the
* data blocks of the original file. We set
* the i_nlink to zero at the last stage after
* switching the original file to extent format
*/
tmp_inode->i_nlink = 1;
ext4_ext_tree_init(handle, tmp_inode);
ext4_orphan_add(handle, tmp_inode);
ext4_journal_stop(handle);
ei = EXT4_I(inode);
i_data = ei->i_data;
memset(&lb, 0, sizeof(lb));
/* 32 bit block address 4 bytes */
max_entries = inode->i_sb->s_blocksize >> 2;
/*
* start with one credit accounted for
* superblock modification.
*
* For the tmp_inode we already have commited the
* trascation that created the inode. Later as and
* when we add extents we extent the journal
*/
handle = ext4_journal_start(inode, 1);
for (i = 0; i < EXT4_NDIR_BLOCKS; i++, blk_count++) {
if (i_data[i]) {
retval = update_extent_range(handle, tmp_inode,
le32_to_cpu(i_data[i]),
blk_count, &lb);
if (retval)
goto err_out;
}
}
if (i_data[EXT4_IND_BLOCK]) {
retval = update_ind_extent_range(handle, tmp_inode,
le32_to_cpu(i_data[EXT4_IND_BLOCK]),
&blk_count, &lb);
if (retval)
goto err_out;
} else
blk_count += max_entries;
if (i_data[EXT4_DIND_BLOCK]) {
retval = update_dind_extent_range(handle, tmp_inode,
le32_to_cpu(i_data[EXT4_DIND_BLOCK]),
&blk_count, &lb);
if (retval)
goto err_out;
} else
blk_count += max_entries * max_entries;
if (i_data[EXT4_TIND_BLOCK]) {
retval = update_tind_extent_range(handle, tmp_inode,
le32_to_cpu(i_data[EXT4_TIND_BLOCK]),
&blk_count, &lb);
if (retval)
goto err_out;
}
/*
* Build the last extent
*/
retval = finish_range(handle, tmp_inode, &lb);
err_out:
/*
* We are either freeing extent information or indirect
* blocks. During this we touch superblock, group descriptor
* and block bitmap. Later we mark the tmp_inode dirty
* via ext4_ext_tree_init. So allocate a credit of 4
* We may update quota (user and group).
*
* FIXME!! we may be touching bitmaps in different block groups.
*/
if (ext4_journal_extend(handle,
4 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)) != 0)
ext4_journal_restart(handle,
4 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb));
if (retval)
/*
* Failure case delete the extent information with the
* tmp_inode
*/
free_ext_block(handle, tmp_inode);
else
retval = ext4_ext_swap_inode_data(handle, inode,
tmp_inode, retval);
/*
* Mark the tmp_inode as of size zero
*/
i_size_write(tmp_inode, 0);
/*
* set the i_blocks count to zero
* so that the ext4_delete_inode does the
* right job
*
* We don't need to take the i_lock because
* the inode is not visible to user space.
*/
tmp_inode->i_blocks = 0;
/* Reset the extent details */
ext4_ext_tree_init(handle, tmp_inode);
/*
* Set the i_nlink to zero so that
* generic_drop_inode really deletes the
* inode
*/
tmp_inode->i_nlink = 0;
ext4_journal_stop(handle);
up_write(&EXT4_I(inode)->i_data_sem);
if (tmp_inode)
iput(tmp_inode);
return retval;
}

Просмотреть файл

@ -51,7 +51,7 @@
static struct buffer_head *ext4_append(handle_t *handle, static struct buffer_head *ext4_append(handle_t *handle,
struct inode *inode, struct inode *inode,
u32 *block, int *err) ext4_lblk_t *block, int *err)
{ {
struct buffer_head *bh; struct buffer_head *bh;
@ -144,8 +144,8 @@ struct dx_map_entry
u16 size; u16 size;
}; };
static inline unsigned dx_get_block (struct dx_entry *entry); static inline ext4_lblk_t dx_get_block(struct dx_entry *entry);
static void dx_set_block (struct dx_entry *entry, unsigned value); static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value);
static inline unsigned dx_get_hash (struct dx_entry *entry); static inline unsigned dx_get_hash (struct dx_entry *entry);
static void dx_set_hash (struct dx_entry *entry, unsigned value); static void dx_set_hash (struct dx_entry *entry, unsigned value);
static unsigned dx_get_count (struct dx_entry *entries); static unsigned dx_get_count (struct dx_entry *entries);
@ -166,7 +166,8 @@ static void dx_sort_map(struct dx_map_entry *map, unsigned count);
static struct ext4_dir_entry_2 *dx_move_dirents (char *from, char *to, static struct ext4_dir_entry_2 *dx_move_dirents (char *from, char *to,
struct dx_map_entry *offsets, int count); struct dx_map_entry *offsets, int count);
static struct ext4_dir_entry_2* dx_pack_dirents (char *base, int size); static struct ext4_dir_entry_2* dx_pack_dirents (char *base, int size);
static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block); static void dx_insert_block(struct dx_frame *frame,
u32 hash, ext4_lblk_t block);
static int ext4_htree_next_block(struct inode *dir, __u32 hash, static int ext4_htree_next_block(struct inode *dir, __u32 hash,
struct dx_frame *frame, struct dx_frame *frame,
struct dx_frame *frames, struct dx_frame *frames,
@ -181,12 +182,12 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
* Mask them off for now. * Mask them off for now.
*/ */
static inline unsigned dx_get_block (struct dx_entry *entry) static inline ext4_lblk_t dx_get_block(struct dx_entry *entry)
{ {
return le32_to_cpu(entry->block) & 0x00ffffff; return le32_to_cpu(entry->block) & 0x00ffffff;
} }
static inline void dx_set_block (struct dx_entry *entry, unsigned value) static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value)
{ {
entry->block = cpu_to_le32(value); entry->block = cpu_to_le32(value);
} }
@ -243,8 +244,8 @@ static void dx_show_index (char * label, struct dx_entry *entries)
int i, n = dx_get_count (entries); int i, n = dx_get_count (entries);
printk("%s index ", label); printk("%s index ", label);
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
printk("%x->%u ", i? dx_get_hash(entries + i) : printk("%x->%lu ", i? dx_get_hash(entries + i) :
0, dx_get_block(entries + i)); 0, (unsigned long)dx_get_block(entries + i));
} }
printk("\n"); printk("\n");
} }
@ -280,7 +281,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext4_dir_ent
space += EXT4_DIR_REC_LEN(de->name_len); space += EXT4_DIR_REC_LEN(de->name_len);
names++; names++;
} }
de = (struct ext4_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); de = ext4_next_entry(de);
} }
printk("(%i)\n", names); printk("(%i)\n", names);
return (struct stats) { names, space, 1 }; return (struct stats) { names, space, 1 };
@ -297,7 +298,8 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
printk("%i indexed blocks...\n", count); printk("%i indexed blocks...\n", count);
for (i = 0; i < count; i++, entries++) for (i = 0; i < count; i++, entries++)
{ {
u32 block = dx_get_block(entries), hash = i? dx_get_hash(entries): 0; ext4_lblk_t block = dx_get_block(entries);
ext4_lblk_t hash = i ? dx_get_hash(entries): 0;
u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash; u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash;
struct stats stats; struct stats stats;
printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range); printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range);
@ -551,7 +553,8 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
*/ */
static inline struct ext4_dir_entry_2 *ext4_next_entry(struct ext4_dir_entry_2 *p) static inline struct ext4_dir_entry_2 *ext4_next_entry(struct ext4_dir_entry_2 *p)
{ {
return (struct ext4_dir_entry_2 *)((char*)p + le16_to_cpu(p->rec_len)); return (struct ext4_dir_entry_2 *)((char *)p +
ext4_rec_len_from_disk(p->rec_len));
} }
/* /*
@ -560,7 +563,7 @@ static inline struct ext4_dir_entry_2 *ext4_next_entry(struct ext4_dir_entry_2 *
* into the tree. If there is an error it is returned in err. * into the tree. If there is an error it is returned in err.
*/ */
static int htree_dirblock_to_tree(struct file *dir_file, static int htree_dirblock_to_tree(struct file *dir_file,
struct inode *dir, int block, struct inode *dir, ext4_lblk_t block,
struct dx_hash_info *hinfo, struct dx_hash_info *hinfo,
__u32 start_hash, __u32 start_minor_hash) __u32 start_hash, __u32 start_minor_hash)
{ {
@ -568,7 +571,8 @@ static int htree_dirblock_to_tree(struct file *dir_file,
struct ext4_dir_entry_2 *de, *top; struct ext4_dir_entry_2 *de, *top;
int err, count = 0; int err, count = 0;
dxtrace(printk("In htree dirblock_to_tree: block %d\n", block)); dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n",
(unsigned long)block));
if (!(bh = ext4_bread (NULL, dir, block, 0, &err))) if (!(bh = ext4_bread (NULL, dir, block, 0, &err)))
return err; return err;
@ -620,9 +624,9 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
struct ext4_dir_entry_2 *de; struct ext4_dir_entry_2 *de;
struct dx_frame frames[2], *frame; struct dx_frame frames[2], *frame;
struct inode *dir; struct inode *dir;
int block, err; ext4_lblk_t block;
int count = 0; int count = 0;
int ret; int ret, err;
__u32 hashval; __u32 hashval;
dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash, dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash,
@ -720,7 +724,7 @@ static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
cond_resched(); cond_resched();
} }
/* XXX: do we need to check rec_len == 0 case? -Chris */ /* XXX: do we need to check rec_len == 0 case? -Chris */
de = (struct ext4_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); de = ext4_next_entry(de);
} }
return count; return count;
} }
@ -752,7 +756,7 @@ static void dx_sort_map (struct dx_map_entry *map, unsigned count)
} while(more); } while(more);
} }
static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block) static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
{ {
struct dx_entry *entries = frame->entries; struct dx_entry *entries = frame->entries;
struct dx_entry *old = frame->at, *new = old + 1; struct dx_entry *old = frame->at, *new = old + 1;
@ -820,7 +824,7 @@ static inline int search_dirblock(struct buffer_head * bh,
return 1; return 1;
} }
/* prevent looping on a bad block */ /* prevent looping on a bad block */
de_len = le16_to_cpu(de->rec_len); de_len = ext4_rec_len_from_disk(de->rec_len);
if (de_len <= 0) if (de_len <= 0)
return -1; return -1;
offset += de_len; offset += de_len;
@ -847,23 +851,20 @@ static struct buffer_head * ext4_find_entry (struct dentry *dentry,
struct super_block * sb; struct super_block * sb;
struct buffer_head * bh_use[NAMEI_RA_SIZE]; struct buffer_head * bh_use[NAMEI_RA_SIZE];
struct buffer_head * bh, *ret = NULL; struct buffer_head * bh, *ret = NULL;
unsigned long start, block, b; ext4_lblk_t start, block, b;
int ra_max = 0; /* Number of bh's in the readahead int ra_max = 0; /* Number of bh's in the readahead
buffer, bh_use[] */ buffer, bh_use[] */
int ra_ptr = 0; /* Current index into readahead int ra_ptr = 0; /* Current index into readahead
buffer */ buffer */
int num = 0; int num = 0;
int nblocks, i, err; ext4_lblk_t nblocks;
int i, err;
struct inode *dir = dentry->d_parent->d_inode; struct inode *dir = dentry->d_parent->d_inode;
int namelen; int namelen;
const u8 *name;
unsigned blocksize;
*res_dir = NULL; *res_dir = NULL;
sb = dir->i_sb; sb = dir->i_sb;
blocksize = sb->s_blocksize;
namelen = dentry->d_name.len; namelen = dentry->d_name.len;
name = dentry->d_name.name;
if (namelen > EXT4_NAME_LEN) if (namelen > EXT4_NAME_LEN)
return NULL; return NULL;
if (is_dx(dir)) { if (is_dx(dir)) {
@ -914,7 +915,8 @@ restart:
if (!buffer_uptodate(bh)) { if (!buffer_uptodate(bh)) {
/* read error, skip block & hope for the best */ /* read error, skip block & hope for the best */
ext4_error(sb, __FUNCTION__, "reading directory #%lu " ext4_error(sb, __FUNCTION__, "reading directory #%lu "
"offset %lu", dir->i_ino, block); "offset %lu", dir->i_ino,
(unsigned long)block);
brelse(bh); brelse(bh);
goto next; goto next;
} }
@ -961,7 +963,7 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
struct dx_frame frames[2], *frame; struct dx_frame frames[2], *frame;
struct ext4_dir_entry_2 *de, *top; struct ext4_dir_entry_2 *de, *top;
struct buffer_head *bh; struct buffer_head *bh;
unsigned long block; ext4_lblk_t block;
int retval; int retval;
int namelen = dentry->d_name.len; int namelen = dentry->d_name.len;
const u8 *name = dentry->d_name.name; const u8 *name = dentry->d_name.name;
@ -1128,7 +1130,7 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
rec_len = EXT4_DIR_REC_LEN(de->name_len); rec_len = EXT4_DIR_REC_LEN(de->name_len);
memcpy (to, de, rec_len); memcpy (to, de, rec_len);
((struct ext4_dir_entry_2 *) to)->rec_len = ((struct ext4_dir_entry_2 *) to)->rec_len =
cpu_to_le16(rec_len); ext4_rec_len_to_disk(rec_len);
de->inode = 0; de->inode = 0;
map++; map++;
to += rec_len; to += rec_len;
@ -1147,13 +1149,12 @@ static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size)
prev = to = de; prev = to = de;
while ((char*)de < base + size) { while ((char*)de < base + size) {
next = (struct ext4_dir_entry_2 *) ((char *) de + next = ext4_next_entry(de);
le16_to_cpu(de->rec_len));
if (de->inode && de->name_len) { if (de->inode && de->name_len) {
rec_len = EXT4_DIR_REC_LEN(de->name_len); rec_len = EXT4_DIR_REC_LEN(de->name_len);
if (de > to) if (de > to)
memmove(to, de, rec_len); memmove(to, de, rec_len);
to->rec_len = cpu_to_le16(rec_len); to->rec_len = ext4_rec_len_to_disk(rec_len);
prev = to; prev = to;
to = (struct ext4_dir_entry_2 *) (((char *) to) + rec_len); to = (struct ext4_dir_entry_2 *) (((char *) to) + rec_len);
} }
@ -1174,7 +1175,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
unsigned blocksize = dir->i_sb->s_blocksize; unsigned blocksize = dir->i_sb->s_blocksize;
unsigned count, continued; unsigned count, continued;
struct buffer_head *bh2; struct buffer_head *bh2;
u32 newblock; ext4_lblk_t newblock;
u32 hash2; u32 hash2;
struct dx_map_entry *map; struct dx_map_entry *map;
char *data1 = (*bh)->b_data, *data2; char *data1 = (*bh)->b_data, *data2;
@ -1221,14 +1222,15 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
split = count - move; split = count - move;
hash2 = map[split].hash; hash2 = map[split].hash;
continued = hash2 == map[split - 1].hash; continued = hash2 == map[split - 1].hash;
dxtrace(printk("Split block %i at %x, %i/%i\n", dxtrace(printk(KERN_INFO "Split block %lu at %x, %i/%i\n",
dx_get_block(frame->at), hash2, split, count-split)); (unsigned long)dx_get_block(frame->at),
hash2, split, count-split));
/* Fancy dance to stay within two buffers */ /* Fancy dance to stay within two buffers */
de2 = dx_move_dirents(data1, data2, map + split, count - split); de2 = dx_move_dirents(data1, data2, map + split, count - split);
de = dx_pack_dirents(data1,blocksize); de = dx_pack_dirents(data1,blocksize);
de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de); de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de);
de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2); de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2);
dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1)); dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1));
dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1)); dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1));
@ -1297,7 +1299,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
return -EEXIST; return -EEXIST;
} }
nlen = EXT4_DIR_REC_LEN(de->name_len); nlen = EXT4_DIR_REC_LEN(de->name_len);
rlen = le16_to_cpu(de->rec_len); rlen = ext4_rec_len_from_disk(de->rec_len);
if ((de->inode? rlen - nlen: rlen) >= reclen) if ((de->inode? rlen - nlen: rlen) >= reclen)
break; break;
de = (struct ext4_dir_entry_2 *)((char *)de + rlen); de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
@ -1316,11 +1318,11 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
/* By now the buffer is marked for journaling */ /* By now the buffer is marked for journaling */
nlen = EXT4_DIR_REC_LEN(de->name_len); nlen = EXT4_DIR_REC_LEN(de->name_len);
rlen = le16_to_cpu(de->rec_len); rlen = ext4_rec_len_from_disk(de->rec_len);
if (de->inode) { if (de->inode) {
struct ext4_dir_entry_2 *de1 = (struct ext4_dir_entry_2 *)((char *)de + nlen); struct ext4_dir_entry_2 *de1 = (struct ext4_dir_entry_2 *)((char *)de + nlen);
de1->rec_len = cpu_to_le16(rlen - nlen); de1->rec_len = ext4_rec_len_to_disk(rlen - nlen);
de->rec_len = cpu_to_le16(nlen); de->rec_len = ext4_rec_len_to_disk(nlen);
de = de1; de = de1;
} }
de->file_type = EXT4_FT_UNKNOWN; de->file_type = EXT4_FT_UNKNOWN;
@ -1374,7 +1376,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
int retval; int retval;
unsigned blocksize; unsigned blocksize;
struct dx_hash_info hinfo; struct dx_hash_info hinfo;
u32 block; ext4_lblk_t block;
struct fake_dirent *fde; struct fake_dirent *fde;
blocksize = dir->i_sb->s_blocksize; blocksize = dir->i_sb->s_blocksize;
@ -1397,17 +1399,18 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
/* The 0th block becomes the root, move the dirents out */ /* The 0th block becomes the root, move the dirents out */
fde = &root->dotdot; fde = &root->dotdot;
de = (struct ext4_dir_entry_2 *)((char *)fde + le16_to_cpu(fde->rec_len)); de = (struct ext4_dir_entry_2 *)((char *)fde +
ext4_rec_len_from_disk(fde->rec_len));
len = ((char *) root) + blocksize - (char *) de; len = ((char *) root) + blocksize - (char *) de;
memcpy (data1, de, len); memcpy (data1, de, len);
de = (struct ext4_dir_entry_2 *) data1; de = (struct ext4_dir_entry_2 *) data1;
top = data1 + len; top = data1 + len;
while ((char *)(de2=(void*)de+le16_to_cpu(de->rec_len)) < top) while ((char *)(de2 = ext4_next_entry(de)) < top)
de = de2; de = de2;
de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de); de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de);
/* Initialize the root; the dot dirents already exist */ /* Initialize the root; the dot dirents already exist */
de = (struct ext4_dir_entry_2 *) (&root->dotdot); de = (struct ext4_dir_entry_2 *) (&root->dotdot);
de->rec_len = cpu_to_le16(blocksize - EXT4_DIR_REC_LEN(2)); de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(2));
memset (&root->info, 0, sizeof(root->info)); memset (&root->info, 0, sizeof(root->info));
root->info.info_length = sizeof(root->info); root->info.info_length = sizeof(root->info);
root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
@ -1454,7 +1457,7 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
int retval; int retval;
int dx_fallback=0; int dx_fallback=0;
unsigned blocksize; unsigned blocksize;
u32 block, blocks; ext4_lblk_t block, blocks;
sb = dir->i_sb; sb = dir->i_sb;
blocksize = sb->s_blocksize; blocksize = sb->s_blocksize;
@ -1487,7 +1490,7 @@ static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
return retval; return retval;
de = (struct ext4_dir_entry_2 *) bh->b_data; de = (struct ext4_dir_entry_2 *) bh->b_data;
de->inode = 0; de->inode = 0;
de->rec_len = cpu_to_le16(blocksize); de->rec_len = ext4_rec_len_to_disk(blocksize);
return add_dirent_to_buf(handle, dentry, inode, de, bh); return add_dirent_to_buf(handle, dentry, inode, de, bh);
} }
@ -1531,7 +1534,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
dx_get_count(entries), dx_get_limit(entries))); dx_get_count(entries), dx_get_limit(entries)));
/* Need to split index? */ /* Need to split index? */
if (dx_get_count(entries) == dx_get_limit(entries)) { if (dx_get_count(entries) == dx_get_limit(entries)) {
u32 newblock; ext4_lblk_t newblock;
unsigned icount = dx_get_count(entries); unsigned icount = dx_get_count(entries);
int levels = frame - frames; int levels = frame - frames;
struct dx_entry *entries2; struct dx_entry *entries2;
@ -1550,7 +1553,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
goto cleanup; goto cleanup;
node2 = (struct dx_node *)(bh2->b_data); node2 = (struct dx_node *)(bh2->b_data);
entries2 = node2->entries; entries2 = node2->entries;
node2->fake.rec_len = cpu_to_le16(sb->s_blocksize); node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize);
node2->fake.inode = 0; node2->fake.inode = 0;
BUFFER_TRACE(frame->bh, "get_write_access"); BUFFER_TRACE(frame->bh, "get_write_access");
err = ext4_journal_get_write_access(handle, frame->bh); err = ext4_journal_get_write_access(handle, frame->bh);
@ -1648,9 +1651,9 @@ static int ext4_delete_entry (handle_t *handle,
BUFFER_TRACE(bh, "get_write_access"); BUFFER_TRACE(bh, "get_write_access");
ext4_journal_get_write_access(handle, bh); ext4_journal_get_write_access(handle, bh);
if (pde) if (pde)
pde->rec_len = pde->rec_len = ext4_rec_len_to_disk(
cpu_to_le16(le16_to_cpu(pde->rec_len) + ext4_rec_len_from_disk(pde->rec_len) +
le16_to_cpu(de->rec_len)); ext4_rec_len_from_disk(de->rec_len));
else else
de->inode = 0; de->inode = 0;
dir->i_version++; dir->i_version++;
@ -1658,10 +1661,9 @@ static int ext4_delete_entry (handle_t *handle,
ext4_journal_dirty_metadata(handle, bh); ext4_journal_dirty_metadata(handle, bh);
return 0; return 0;
} }
i += le16_to_cpu(de->rec_len); i += ext4_rec_len_from_disk(de->rec_len);
pde = de; pde = de;
de = (struct ext4_dir_entry_2 *) de = ext4_next_entry(de);
((char *) de + le16_to_cpu(de->rec_len));
} }
return -ENOENT; return -ENOENT;
} }
@ -1824,13 +1826,13 @@ retry:
de = (struct ext4_dir_entry_2 *) dir_block->b_data; de = (struct ext4_dir_entry_2 *) dir_block->b_data;
de->inode = cpu_to_le32(inode->i_ino); de->inode = cpu_to_le32(inode->i_ino);
de->name_len = 1; de->name_len = 1;
de->rec_len = cpu_to_le16(EXT4_DIR_REC_LEN(de->name_len)); de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len));
strcpy (de->name, "."); strcpy (de->name, ".");
ext4_set_de_type(dir->i_sb, de, S_IFDIR); ext4_set_de_type(dir->i_sb, de, S_IFDIR);
de = (struct ext4_dir_entry_2 *) de = ext4_next_entry(de);
((char *) de + le16_to_cpu(de->rec_len));
de->inode = cpu_to_le32(dir->i_ino); de->inode = cpu_to_le32(dir->i_ino);
de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize-EXT4_DIR_REC_LEN(1)); de->rec_len = ext4_rec_len_to_disk(inode->i_sb->s_blocksize -
EXT4_DIR_REC_LEN(1));
de->name_len = 2; de->name_len = 2;
strcpy (de->name, ".."); strcpy (de->name, "..");
ext4_set_de_type(dir->i_sb, de, S_IFDIR); ext4_set_de_type(dir->i_sb, de, S_IFDIR);
@ -1882,8 +1884,7 @@ static int empty_dir (struct inode * inode)
return 1; return 1;
} }
de = (struct ext4_dir_entry_2 *) bh->b_data; de = (struct ext4_dir_entry_2 *) bh->b_data;
de1 = (struct ext4_dir_entry_2 *) de1 = ext4_next_entry(de);
((char *) de + le16_to_cpu(de->rec_len));
if (le32_to_cpu(de->inode) != inode->i_ino || if (le32_to_cpu(de->inode) != inode->i_ino ||
!le32_to_cpu(de1->inode) || !le32_to_cpu(de1->inode) ||
strcmp (".", de->name) || strcmp (".", de->name) ||
@ -1894,9 +1895,9 @@ static int empty_dir (struct inode * inode)
brelse (bh); brelse (bh);
return 1; return 1;
} }
offset = le16_to_cpu(de->rec_len) + le16_to_cpu(de1->rec_len); offset = ext4_rec_len_from_disk(de->rec_len) +
de = (struct ext4_dir_entry_2 *) ext4_rec_len_from_disk(de1->rec_len);
((char *) de1 + le16_to_cpu(de1->rec_len)); de = ext4_next_entry(de1);
while (offset < inode->i_size ) { while (offset < inode->i_size ) {
if (!bh || if (!bh ||
(void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
@ -1925,9 +1926,8 @@ static int empty_dir (struct inode * inode)
brelse (bh); brelse (bh);
return 0; return 0;
} }
offset += le16_to_cpu(de->rec_len); offset += ext4_rec_len_from_disk(de->rec_len);
de = (struct ext4_dir_entry_2 *) de = ext4_next_entry(de);
((char *) de + le16_to_cpu(de->rec_len));
} }
brelse (bh); brelse (bh);
return 1; return 1;
@ -2282,8 +2282,7 @@ retry:
} }
#define PARENT_INO(buffer) \ #define PARENT_INO(buffer) \
((struct ext4_dir_entry_2 *) ((char *) buffer + \ (ext4_next_entry((struct ext4_dir_entry_2 *)(buffer))->inode)
le16_to_cpu(((struct ext4_dir_entry_2 *) buffer)->rec_len)))->inode
/* /*
* Anybody can rename anything with this: the permission checks are left to the * Anybody can rename anything with this: the permission checks are left to the

Просмотреть файл

@ -28,7 +28,7 @@ static int verify_group_input(struct super_block *sb,
struct ext4_super_block *es = sbi->s_es; struct ext4_super_block *es = sbi->s_es;
ext4_fsblk_t start = ext4_blocks_count(es); ext4_fsblk_t start = ext4_blocks_count(es);
ext4_fsblk_t end = start + input->blocks_count; ext4_fsblk_t end = start + input->blocks_count;
unsigned group = input->group; ext4_group_t group = input->group;
ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group; ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group;
unsigned overhead = ext4_bg_has_super(sb, group) ? unsigned overhead = ext4_bg_has_super(sb, group) ?
(1 + ext4_bg_num_gdb(sb, group) + (1 + ext4_bg_num_gdb(sb, group) +
@ -206,7 +206,7 @@ static int setup_new_group_blocks(struct super_block *sb,
} }
if (ext4_bg_has_super(sb, input->group)) { if (ext4_bg_has_super(sb, input->group)) {
ext4_debug("mark backup superblock %#04lx (+0)\n", start); ext4_debug("mark backup superblock %#04llx (+0)\n", start);
ext4_set_bit(0, bh->b_data); ext4_set_bit(0, bh->b_data);
} }
@ -215,7 +215,7 @@ static int setup_new_group_blocks(struct super_block *sb,
i < gdblocks; i++, block++, bit++) { i < gdblocks; i++, block++, bit++) {
struct buffer_head *gdb; struct buffer_head *gdb;
ext4_debug("update backup group %#04lx (+%d)\n", block, bit); ext4_debug("update backup group %#04llx (+%d)\n", block, bit);
if ((err = extend_or_restart_transaction(handle, 1, bh))) if ((err = extend_or_restart_transaction(handle, 1, bh)))
goto exit_bh; goto exit_bh;
@ -243,7 +243,7 @@ static int setup_new_group_blocks(struct super_block *sb,
i < reserved_gdb; i++, block++, bit++) { i < reserved_gdb; i++, block++, bit++) {
struct buffer_head *gdb; struct buffer_head *gdb;
ext4_debug("clear reserved block %#04lx (+%d)\n", block, bit); ext4_debug("clear reserved block %#04llx (+%d)\n", block, bit);
if ((err = extend_or_restart_transaction(handle, 1, bh))) if ((err = extend_or_restart_transaction(handle, 1, bh)))
goto exit_bh; goto exit_bh;
@ -256,10 +256,10 @@ static int setup_new_group_blocks(struct super_block *sb,
ext4_set_bit(bit, bh->b_data); ext4_set_bit(bit, bh->b_data);
brelse(gdb); brelse(gdb);
} }
ext4_debug("mark block bitmap %#04x (+%ld)\n", input->block_bitmap, ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap,
input->block_bitmap - start); input->block_bitmap - start);
ext4_set_bit(input->block_bitmap - start, bh->b_data); ext4_set_bit(input->block_bitmap - start, bh->b_data);
ext4_debug("mark inode bitmap %#04x (+%ld)\n", input->inode_bitmap, ext4_debug("mark inode bitmap %#04llx (+%llu)\n", input->inode_bitmap,
input->inode_bitmap - start); input->inode_bitmap - start);
ext4_set_bit(input->inode_bitmap - start, bh->b_data); ext4_set_bit(input->inode_bitmap - start, bh->b_data);
@ -268,7 +268,7 @@ static int setup_new_group_blocks(struct super_block *sb,
i < sbi->s_itb_per_group; i++, bit++, block++) { i < sbi->s_itb_per_group; i++, bit++, block++) {
struct buffer_head *it; struct buffer_head *it;
ext4_debug("clear inode block %#04lx (+%d)\n", block, bit); ext4_debug("clear inode block %#04llx (+%d)\n", block, bit);
if ((err = extend_or_restart_transaction(handle, 1, bh))) if ((err = extend_or_restart_transaction(handle, 1, bh)))
goto exit_bh; goto exit_bh;
@ -291,7 +291,7 @@ static int setup_new_group_blocks(struct super_block *sb,
brelse(bh); brelse(bh);
/* Mark unused entries in inode bitmap used */ /* Mark unused entries in inode bitmap used */
ext4_debug("clear inode bitmap %#04x (+%ld)\n", ext4_debug("clear inode bitmap %#04llx (+%llu)\n",
input->inode_bitmap, input->inode_bitmap - start); input->inode_bitmap, input->inode_bitmap - start);
if (IS_ERR(bh = bclean(handle, sb, input->inode_bitmap))) { if (IS_ERR(bh = bclean(handle, sb, input->inode_bitmap))) {
err = PTR_ERR(bh); err = PTR_ERR(bh);
@ -357,7 +357,7 @@ static int verify_reserved_gdb(struct super_block *sb,
struct buffer_head *primary) struct buffer_head *primary)
{ {
const ext4_fsblk_t blk = primary->b_blocknr; const ext4_fsblk_t blk = primary->b_blocknr;
const unsigned long end = EXT4_SB(sb)->s_groups_count; const ext4_group_t end = EXT4_SB(sb)->s_groups_count;
unsigned three = 1; unsigned three = 1;
unsigned five = 5; unsigned five = 5;
unsigned seven = 7; unsigned seven = 7;
@ -656,12 +656,12 @@ static void update_backups(struct super_block *sb,
int blk_off, char *data, int size) int blk_off, char *data, int size)
{ {
struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_sb_info *sbi = EXT4_SB(sb);
const unsigned long last = sbi->s_groups_count; const ext4_group_t last = sbi->s_groups_count;
const int bpg = EXT4_BLOCKS_PER_GROUP(sb); const int bpg = EXT4_BLOCKS_PER_GROUP(sb);
unsigned three = 1; unsigned three = 1;
unsigned five = 5; unsigned five = 5;
unsigned seven = 7; unsigned seven = 7;
unsigned group; ext4_group_t group;
int rest = sb->s_blocksize - size; int rest = sb->s_blocksize - size;
handle_t *handle; handle_t *handle;
int err = 0, err2; int err = 0, err2;
@ -716,7 +716,7 @@ static void update_backups(struct super_block *sb,
exit_err: exit_err:
if (err) { if (err) {
ext4_warning(sb, __FUNCTION__, ext4_warning(sb, __FUNCTION__,
"can't update backup for group %d (err %d), " "can't update backup for group %lu (err %d), "
"forcing fsck on next reboot", group, err); "forcing fsck on next reboot", group, err);
sbi->s_mount_state &= ~EXT4_VALID_FS; sbi->s_mount_state &= ~EXT4_VALID_FS;
sbi->s_es->s_state &= cpu_to_le16(~EXT4_VALID_FS); sbi->s_es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
@ -952,7 +952,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
ext4_fsblk_t n_blocks_count) ext4_fsblk_t n_blocks_count)
{ {
ext4_fsblk_t o_blocks_count; ext4_fsblk_t o_blocks_count;
unsigned long o_groups_count; ext4_group_t o_groups_count;
ext4_grpblk_t last; ext4_grpblk_t last;
ext4_grpblk_t add; ext4_grpblk_t add;
struct buffer_head * bh; struct buffer_head * bh;
@ -1054,7 +1054,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh); ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
sb->s_dirt = 1; sb->s_dirt = 1;
unlock_super(sb); unlock_super(sb);
ext4_debug("freeing blocks %lu through %llu\n", o_blocks_count, ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
o_blocks_count + add); o_blocks_count + add);
ext4_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks); ext4_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, ext4_debug("freed blocks %llu through %llu\n", o_blocks_count,

Просмотреть файл

@ -373,6 +373,66 @@ void ext4_update_dynamic_rev(struct super_block *sb)
*/ */
} }
int ext4_update_compat_feature(handle_t *handle,
struct super_block *sb, __u32 compat)
{
int err = 0;
if (!EXT4_HAS_COMPAT_FEATURE(sb, compat)) {
err = ext4_journal_get_write_access(handle,
EXT4_SB(sb)->s_sbh);
if (err)
return err;
EXT4_SET_COMPAT_FEATURE(sb, compat);
sb->s_dirt = 1;
handle->h_sync = 1;
BUFFER_TRACE(EXT4_SB(sb)->s_sbh,
"call ext4_journal_dirty_met adata");
err = ext4_journal_dirty_metadata(handle,
EXT4_SB(sb)->s_sbh);
}
return err;
}
int ext4_update_rocompat_feature(handle_t *handle,
struct super_block *sb, __u32 rocompat)
{
int err = 0;
if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, rocompat)) {
err = ext4_journal_get_write_access(handle,
EXT4_SB(sb)->s_sbh);
if (err)
return err;
EXT4_SET_RO_COMPAT_FEATURE(sb, rocompat);
sb->s_dirt = 1;
handle->h_sync = 1;
BUFFER_TRACE(EXT4_SB(sb)->s_sbh,
"call ext4_journal_dirty_met adata");
err = ext4_journal_dirty_metadata(handle,
EXT4_SB(sb)->s_sbh);
}
return err;
}
int ext4_update_incompat_feature(handle_t *handle,
struct super_block *sb, __u32 incompat)
{
int err = 0;
if (!EXT4_HAS_INCOMPAT_FEATURE(sb, incompat)) {
err = ext4_journal_get_write_access(handle,
EXT4_SB(sb)->s_sbh);
if (err)
return err;
EXT4_SET_INCOMPAT_FEATURE(sb, incompat);
sb->s_dirt = 1;
handle->h_sync = 1;
BUFFER_TRACE(EXT4_SB(sb)->s_sbh,
"call ext4_journal_dirty_met adata");
err = ext4_journal_dirty_metadata(handle,
EXT4_SB(sb)->s_sbh);
}
return err;
}
/* /*
* Open the external journal device * Open the external journal device
*/ */
@ -443,6 +503,7 @@ static void ext4_put_super (struct super_block * sb)
struct ext4_super_block *es = sbi->s_es; struct ext4_super_block *es = sbi->s_es;
int i; int i;
ext4_mb_release(sb);
ext4_ext_release(sb); ext4_ext_release(sb);
ext4_xattr_put_super(sb); ext4_xattr_put_super(sb);
jbd2_journal_destroy(sbi->s_journal); jbd2_journal_destroy(sbi->s_journal);
@ -509,6 +570,8 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
ei->i_block_alloc_info = NULL; ei->i_block_alloc_info = NULL;
ei->vfs_inode.i_version = 1; ei->vfs_inode.i_version = 1;
memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
INIT_LIST_HEAD(&ei->i_prealloc_list);
spin_lock_init(&ei->i_prealloc_lock);
return &ei->vfs_inode; return &ei->vfs_inode;
} }
@ -533,7 +596,7 @@ static void init_once(struct kmem_cache *cachep, void *foo)
#ifdef CONFIG_EXT4DEV_FS_XATTR #ifdef CONFIG_EXT4DEV_FS_XATTR
init_rwsem(&ei->xattr_sem); init_rwsem(&ei->xattr_sem);
#endif #endif
mutex_init(&ei->truncate_mutex); init_rwsem(&ei->i_data_sem);
inode_init_once(&ei->vfs_inode); inode_init_once(&ei->vfs_inode);
} }
@ -605,18 +668,20 @@ static inline void ext4_show_quota_options(struct seq_file *seq, struct super_bl
*/ */
static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
{ {
int def_errors;
unsigned long def_mount_opts;
struct super_block *sb = vfs->mnt_sb; struct super_block *sb = vfs->mnt_sb;
struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es; struct ext4_super_block *es = sbi->s_es;
unsigned long def_mount_opts;
def_mount_opts = le32_to_cpu(es->s_default_mount_opts); def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
def_errors = le16_to_cpu(es->s_errors);
if (sbi->s_sb_block != 1) if (sbi->s_sb_block != 1)
seq_printf(seq, ",sb=%llu", sbi->s_sb_block); seq_printf(seq, ",sb=%llu", sbi->s_sb_block);
if (test_opt(sb, MINIX_DF)) if (test_opt(sb, MINIX_DF))
seq_puts(seq, ",minixdf"); seq_puts(seq, ",minixdf");
if (test_opt(sb, GRPID)) if (test_opt(sb, GRPID) && !(def_mount_opts & EXT4_DEFM_BSDGROUPS))
seq_puts(seq, ",grpid"); seq_puts(seq, ",grpid");
if (!test_opt(sb, GRPID) && (def_mount_opts & EXT4_DEFM_BSDGROUPS)) if (!test_opt(sb, GRPID) && (def_mount_opts & EXT4_DEFM_BSDGROUPS))
seq_puts(seq, ",nogrpid"); seq_puts(seq, ",nogrpid");
@ -628,34 +693,33 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) { le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) {
seq_printf(seq, ",resgid=%u", sbi->s_resgid); seq_printf(seq, ",resgid=%u", sbi->s_resgid);
} }
if (test_opt(sb, ERRORS_CONT)) { if (test_opt(sb, ERRORS_RO)) {
int def_errors = le16_to_cpu(es->s_errors);
if (def_errors == EXT4_ERRORS_PANIC || if (def_errors == EXT4_ERRORS_PANIC ||
def_errors == EXT4_ERRORS_RO) { def_errors == EXT4_ERRORS_CONTINUE) {
seq_puts(seq, ",errors=continue"); seq_puts(seq, ",errors=remount-ro");
} }
} }
if (test_opt(sb, ERRORS_RO)) if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
seq_puts(seq, ",errors=remount-ro"); seq_puts(seq, ",errors=continue");
if (test_opt(sb, ERRORS_PANIC)) if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
seq_puts(seq, ",errors=panic"); seq_puts(seq, ",errors=panic");
if (test_opt(sb, NO_UID32)) if (test_opt(sb, NO_UID32) && !(def_mount_opts & EXT4_DEFM_UID16))
seq_puts(seq, ",nouid32"); seq_puts(seq, ",nouid32");
if (test_opt(sb, DEBUG)) if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG))
seq_puts(seq, ",debug"); seq_puts(seq, ",debug");
if (test_opt(sb, OLDALLOC)) if (test_opt(sb, OLDALLOC))
seq_puts(seq, ",oldalloc"); seq_puts(seq, ",oldalloc");
#ifdef CONFIG_EXT4_FS_XATTR #ifdef CONFIG_EXT4DEV_FS_XATTR
if (test_opt(sb, XATTR_USER)) if (test_opt(sb, XATTR_USER) &&
!(def_mount_opts & EXT4_DEFM_XATTR_USER))
seq_puts(seq, ",user_xattr"); seq_puts(seq, ",user_xattr");
if (!test_opt(sb, XATTR_USER) && if (!test_opt(sb, XATTR_USER) &&
(def_mount_opts & EXT4_DEFM_XATTR_USER)) { (def_mount_opts & EXT4_DEFM_XATTR_USER)) {
seq_puts(seq, ",nouser_xattr"); seq_puts(seq, ",nouser_xattr");
} }
#endif #endif
#ifdef CONFIG_EXT4_FS_POSIX_ACL #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
if (test_opt(sb, POSIX_ACL)) if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL))
seq_puts(seq, ",acl"); seq_puts(seq, ",acl");
if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL)) if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL))
seq_puts(seq, ",noacl"); seq_puts(seq, ",noacl");
@ -672,7 +736,17 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
seq_puts(seq, ",nobh"); seq_puts(seq, ",nobh");
if (!test_opt(sb, EXTENTS)) if (!test_opt(sb, EXTENTS))
seq_puts(seq, ",noextents"); seq_puts(seq, ",noextents");
if (!test_opt(sb, MBALLOC))
seq_puts(seq, ",nomballoc");
if (test_opt(sb, I_VERSION))
seq_puts(seq, ",i_version");
if (sbi->s_stripe)
seq_printf(seq, ",stripe=%lu", sbi->s_stripe);
/*
* journal mode get enabled in different ways
* So just print the value even if we didn't specify it
*/
if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
seq_puts(seq, ",data=journal"); seq_puts(seq, ",data=journal");
else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
@ -681,7 +755,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
seq_puts(seq, ",data=writeback"); seq_puts(seq, ",data=writeback");
ext4_show_quota_options(seq, sb); ext4_show_quota_options(seq, sb);
return 0; return 0;
} }
@ -809,11 +882,13 @@ enum {
Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
Opt_journal_checksum, Opt_journal_async_commit,
Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
Opt_grpquota, Opt_extents, Opt_noextents, Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version,
Opt_mballoc, Opt_nomballoc, Opt_stripe,
}; };
static match_table_t tokens = { static match_table_t tokens = {
@ -848,6 +923,8 @@ static match_table_t tokens = {
{Opt_journal_update, "journal=update"}, {Opt_journal_update, "journal=update"},
{Opt_journal_inum, "journal=%u"}, {Opt_journal_inum, "journal=%u"},
{Opt_journal_dev, "journal_dev=%u"}, {Opt_journal_dev, "journal_dev=%u"},
{Opt_journal_checksum, "journal_checksum"},
{Opt_journal_async_commit, "journal_async_commit"},
{Opt_abort, "abort"}, {Opt_abort, "abort"},
{Opt_data_journal, "data=journal"}, {Opt_data_journal, "data=journal"},
{Opt_data_ordered, "data=ordered"}, {Opt_data_ordered, "data=ordered"},
@ -865,6 +942,10 @@ static match_table_t tokens = {
{Opt_barrier, "barrier=%u"}, {Opt_barrier, "barrier=%u"},
{Opt_extents, "extents"}, {Opt_extents, "extents"},
{Opt_noextents, "noextents"}, {Opt_noextents, "noextents"},
{Opt_i_version, "i_version"},
{Opt_mballoc, "mballoc"},
{Opt_nomballoc, "nomballoc"},
{Opt_stripe, "stripe=%u"},
{Opt_err, NULL}, {Opt_err, NULL},
{Opt_resize, "resize"}, {Opt_resize, "resize"},
}; };
@ -1035,6 +1116,13 @@ static int parse_options (char *options, struct super_block *sb,
return 0; return 0;
*journal_devnum = option; *journal_devnum = option;
break; break;
case Opt_journal_checksum:
set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
break;
case Opt_journal_async_commit:
set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT);
set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
break;
case Opt_noload: case Opt_noload:
set_opt (sbi->s_mount_opt, NOLOAD); set_opt (sbi->s_mount_opt, NOLOAD);
break; break;
@ -1203,6 +1291,23 @@ clear_qf_name:
case Opt_noextents: case Opt_noextents:
clear_opt (sbi->s_mount_opt, EXTENTS); clear_opt (sbi->s_mount_opt, EXTENTS);
break; break;
case Opt_i_version:
set_opt(sbi->s_mount_opt, I_VERSION);
sb->s_flags |= MS_I_VERSION;
break;
case Opt_mballoc:
set_opt(sbi->s_mount_opt, MBALLOC);
break;
case Opt_nomballoc:
clear_opt(sbi->s_mount_opt, MBALLOC);
break;
case Opt_stripe:
if (match_int(&args[0], &option))
return 0;
if (option < 0)
return 0;
sbi->s_stripe = option;
break;
default: default:
printk (KERN_ERR printk (KERN_ERR
"EXT4-fs: Unrecognized mount option \"%s\" " "EXT4-fs: Unrecognized mount option \"%s\" "
@ -1364,7 +1469,7 @@ static int ext4_check_descriptors (struct super_block * sb)
struct ext4_group_desc * gdp = NULL; struct ext4_group_desc * gdp = NULL;
int desc_block = 0; int desc_block = 0;
int flexbg_flag = 0; int flexbg_flag = 0;
int i; ext4_group_t i;
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
flexbg_flag = 1; flexbg_flag = 1;
@ -1386,7 +1491,7 @@ static int ext4_check_descriptors (struct super_block * sb)
if (block_bitmap < first_block || block_bitmap > last_block) if (block_bitmap < first_block || block_bitmap > last_block)
{ {
ext4_error (sb, "ext4_check_descriptors", ext4_error (sb, "ext4_check_descriptors",
"Block bitmap for group %d" "Block bitmap for group %lu"
" not in group (block %llu)!", " not in group (block %llu)!",
i, block_bitmap); i, block_bitmap);
return 0; return 0;
@ -1395,7 +1500,7 @@ static int ext4_check_descriptors (struct super_block * sb)
if (inode_bitmap < first_block || inode_bitmap > last_block) if (inode_bitmap < first_block || inode_bitmap > last_block)
{ {
ext4_error (sb, "ext4_check_descriptors", ext4_error (sb, "ext4_check_descriptors",
"Inode bitmap for group %d" "Inode bitmap for group %lu"
" not in group (block %llu)!", " not in group (block %llu)!",
i, inode_bitmap); i, inode_bitmap);
return 0; return 0;
@ -1405,17 +1510,16 @@ static int ext4_check_descriptors (struct super_block * sb)
inode_table + sbi->s_itb_per_group - 1 > last_block) inode_table + sbi->s_itb_per_group - 1 > last_block)
{ {
ext4_error (sb, "ext4_check_descriptors", ext4_error (sb, "ext4_check_descriptors",
"Inode table for group %d" "Inode table for group %lu"
" not in group (block %llu)!", " not in group (block %llu)!",
i, inode_table); i, inode_table);
return 0; return 0;
} }
if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
ext4_error(sb, __FUNCTION__, ext4_error(sb, __FUNCTION__,
"Checksum for group %d failed (%u!=%u)\n", i, "Checksum for group %lu failed (%u!=%u)\n",
le16_to_cpu(ext4_group_desc_csum(sbi, i, i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
gdp)), gdp)), le16_to_cpu(gdp->bg_checksum));
le16_to_cpu(gdp->bg_checksum));
return 0; return 0;
} }
if (!flexbg_flag) if (!flexbg_flag)
@ -1429,7 +1533,6 @@ static int ext4_check_descriptors (struct super_block * sb)
return 1; return 1;
} }
/* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at /* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at
* the superblock) which were deleted from all directories, but held open by * the superblock) which were deleted from all directories, but held open by
* a process at the time of a crash. We walk the list and try to delete these * a process at the time of a crash. We walk the list and try to delete these
@ -1542,20 +1645,95 @@ static void ext4_orphan_cleanup (struct super_block * sb,
#endif #endif
sb->s_flags = s_flags; /* Restore MS_RDONLY status */ sb->s_flags = s_flags; /* Restore MS_RDONLY status */
} }
/*
* Maximal extent format file size.
* Resulting logical blkno at s_maxbytes must fit in our on-disk
* extent format containers, within a sector_t, and within i_blocks
* in the vfs. ext4 inode has 48 bits of i_block in fsblock units,
* so that won't be a limiting factor.
*
* Note, this does *not* consider any metadata overhead for vfs i_blocks.
*/
static loff_t ext4_max_size(int blkbits)
{
loff_t res;
loff_t upper_limit = MAX_LFS_FILESIZE;
/* small i_blocks in vfs inode? */
if (sizeof(blkcnt_t) < sizeof(u64)) {
/*
* CONFIG_LSF is not enabled implies the inode
* i_block represent total blocks in 512 bytes
* 32 == size of vfs inode i_blocks * 8
*/
upper_limit = (1LL << 32) - 1;
/* total blocks in file system block size */
upper_limit >>= (blkbits - 9);
upper_limit <<= blkbits;
}
/* 32-bit extent-start container, ee_block */
res = 1LL << 32;
res <<= blkbits;
res -= 1;
/* Sanity check against vm- & vfs- imposed limits */
if (res > upper_limit)
res = upper_limit;
return res;
}
/* /*
* Maximal file size. There is a direct, and {,double-,triple-}indirect * Maximal bitmap file size. There is a direct, and {,double-,triple-}indirect
* block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks. * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
* We need to be 1 filesystem block less than the 2^32 sector limit. * We need to be 1 filesystem block less than the 2^48 sector limit.
*/ */
static loff_t ext4_max_size(int bits) static loff_t ext4_max_bitmap_size(int bits)
{ {
loff_t res = EXT4_NDIR_BLOCKS; loff_t res = EXT4_NDIR_BLOCKS;
/* This constant is calculated to be the largest file size for a int meta_blocks;
* dense, 4k-blocksize file such that the total number of loff_t upper_limit;
/* This is calculated to be the largest file size for a
* dense, bitmapped file such that the total number of
* sectors in the file, including data and all indirect blocks, * sectors in the file, including data and all indirect blocks,
* does not exceed 2^32. */ * does not exceed 2^48 -1
const loff_t upper_limit = 0x1ff7fffd000LL; * __u32 i_blocks_lo and _u16 i_blocks_high representing the
* total number of 512 bytes blocks of the file
*/
if (sizeof(blkcnt_t) < sizeof(u64)) {
/*
* CONFIG_LSF is not enabled implies the inode
* i_block represent total blocks in 512 bytes
* 32 == size of vfs inode i_blocks * 8
*/
upper_limit = (1LL << 32) - 1;
/* total blocks in file system block size */
upper_limit >>= (bits - 9);
} else {
/*
* We use 48 bit ext4_inode i_blocks
* With EXT4_HUGE_FILE_FL set the i_blocks
* represent total number of blocks in
* file system block size
*/
upper_limit = (1LL << 48) - 1;
}
/* indirect blocks */
meta_blocks = 1;
/* double indirect blocks */
meta_blocks += 1 + (1LL << (bits-2));
/* tripple indirect blocks */
meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
upper_limit -= meta_blocks;
upper_limit <<= bits;
res += 1LL << (bits-2); res += 1LL << (bits-2);
res += 1LL << (2*(bits-2)); res += 1LL << (2*(bits-2));
@ -1563,6 +1741,10 @@ static loff_t ext4_max_size(int bits)
res <<= bits; res <<= bits;
if (res > upper_limit) if (res > upper_limit)
res = upper_limit; res = upper_limit;
if (res > MAX_LFS_FILESIZE)
res = MAX_LFS_FILESIZE;
return res; return res;
} }
@ -1570,7 +1752,7 @@ static ext4_fsblk_t descriptor_loc(struct super_block *sb,
ext4_fsblk_t logical_sb_block, int nr) ext4_fsblk_t logical_sb_block, int nr)
{ {
struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_sb_info *sbi = EXT4_SB(sb);
unsigned long bg, first_meta_bg; ext4_group_t bg, first_meta_bg;
int has_super = 0; int has_super = 0;
first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg); first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
@ -1584,8 +1766,39 @@ static ext4_fsblk_t descriptor_loc(struct super_block *sb,
return (has_super + ext4_group_first_block_no(sb, bg)); return (has_super + ext4_group_first_block_no(sb, bg));
} }
/**
* ext4_get_stripe_size: Get the stripe size.
* @sbi: In memory super block info
*
* If we have specified it via mount option, then
* use the mount option value. If the value specified at mount time is
* greater than the blocks per group use the super block value.
* If the super block value is greater than blocks per group return 0.
* Allocator needs it be less than blocks per group.
*
*/
static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
{
unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
unsigned long stripe_width =
le32_to_cpu(sbi->s_es->s_raid_stripe_width);
if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
return sbi->s_stripe;
if (stripe_width <= sbi->s_blocks_per_group)
return stripe_width;
if (stride <= sbi->s_blocks_per_group)
return stride;
return 0;
}
static int ext4_fill_super (struct super_block *sb, void *data, int silent) static int ext4_fill_super (struct super_block *sb, void *data, int silent)
__releases(kernel_sem)
__acquires(kernel_sem)
{ {
struct buffer_head * bh; struct buffer_head * bh;
struct ext4_super_block *es = NULL; struct ext4_super_block *es = NULL;
@ -1599,7 +1812,6 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
unsigned long def_mount_opts; unsigned long def_mount_opts;
struct inode *root; struct inode *root;
int blocksize; int blocksize;
int hblock;
int db_count; int db_count;
int i; int i;
int needs_recovery; int needs_recovery;
@ -1624,6 +1836,11 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
goto out_fail; goto out_fail;
} }
if (!sb_set_blocksize(sb, blocksize)) {
printk(KERN_ERR "EXT4-fs: bad blocksize %d.\n", blocksize);
goto out_fail;
}
/* /*
* The ext4 superblock will not be buffer aligned for other than 1kB * The ext4 superblock will not be buffer aligned for other than 1kB
* block sizes. We need to calculate the offset from buffer start. * block sizes. We need to calculate the offset from buffer start.
@ -1674,10 +1891,10 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
set_opt(sbi->s_mount_opt, ERRORS_PANIC); set_opt(sbi->s_mount_opt, ERRORS_PANIC);
else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_RO) else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE)
set_opt(sbi->s_mount_opt, ERRORS_RO);
else
set_opt(sbi->s_mount_opt, ERRORS_CONT); set_opt(sbi->s_mount_opt, ERRORS_CONT);
else
set_opt(sbi->s_mount_opt, ERRORS_RO);
sbi->s_resuid = le16_to_cpu(es->s_def_resuid); sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
sbi->s_resgid = le16_to_cpu(es->s_def_resgid); sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
@ -1689,6 +1906,11 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
* User -o noextents to turn it off * User -o noextents to turn it off
*/ */
set_opt(sbi->s_mount_opt, EXTENTS); set_opt(sbi->s_mount_opt, EXTENTS);
/*
* turn on mballoc feature by default in ext4 filesystem
* User -o nomballoc to turn it off
*/
set_opt(sbi->s_mount_opt, MBALLOC);
if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum, if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum,
NULL, 0)) NULL, 0))
@ -1723,6 +1945,19 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
sb->s_id, le32_to_cpu(features)); sb->s_id, le32_to_cpu(features));
goto failed_mount; goto failed_mount;
} }
if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) {
/*
* Large file size enabled file system can only be
* mount if kernel is build with CONFIG_LSF
*/
if (sizeof(root->i_blocks) < sizeof(u64) &&
!(sb->s_flags & MS_RDONLY)) {
printk(KERN_ERR "EXT4-fs: %s: Filesystem with huge "
"files cannot be mounted read-write "
"without CONFIG_LSF.\n", sb->s_id);
goto failed_mount;
}
}
blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
if (blocksize < EXT4_MIN_BLOCK_SIZE || if (blocksize < EXT4_MIN_BLOCK_SIZE ||
@ -1733,20 +1968,16 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
goto failed_mount; goto failed_mount;
} }
hblock = bdev_hardsect_size(sb->s_bdev);
if (sb->s_blocksize != blocksize) { if (sb->s_blocksize != blocksize) {
/*
* Make sure the blocksize for the filesystem is larger /* Validate the filesystem blocksize */
* than the hardware sectorsize for the machine. if (!sb_set_blocksize(sb, blocksize)) {
*/ printk(KERN_ERR "EXT4-fs: bad block size %d.\n",
if (blocksize < hblock) { blocksize);
printk(KERN_ERR "EXT4-fs: blocksize %d too small for "
"device blocksize %d.\n", blocksize, hblock);
goto failed_mount; goto failed_mount;
} }
brelse (bh); brelse (bh);
sb_set_blocksize(sb, blocksize);
logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
offset = do_div(logical_sb_block, blocksize); offset = do_div(logical_sb_block, blocksize);
bh = sb_bread(sb, logical_sb_block); bh = sb_bread(sb, logical_sb_block);
@ -1764,6 +1995,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
} }
} }
sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits);
sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits); sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits);
if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
@ -1838,6 +2070,17 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
if (EXT4_BLOCKS_PER_GROUP(sb) == 0) if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
goto cantfind_ext4; goto cantfind_ext4;
/* ensure blocks_count calculation below doesn't sign-extend */
if (ext4_blocks_count(es) + EXT4_BLOCKS_PER_GROUP(sb) <
le32_to_cpu(es->s_first_data_block) + 1) {
printk(KERN_WARNING "EXT4-fs: bad geometry: block count %llu, "
"first data block %u, blocks per group %lu\n",
ext4_blocks_count(es),
le32_to_cpu(es->s_first_data_block),
EXT4_BLOCKS_PER_GROUP(sb));
goto failed_mount;
}
blocks_count = (ext4_blocks_count(es) - blocks_count = (ext4_blocks_count(es) -
le32_to_cpu(es->s_first_data_block) + le32_to_cpu(es->s_first_data_block) +
EXT4_BLOCKS_PER_GROUP(sb) - 1); EXT4_BLOCKS_PER_GROUP(sb) - 1);
@ -1900,6 +2143,8 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
sbi->s_rsv_window_head.rsv_goal_size = 0; sbi->s_rsv_window_head.rsv_goal_size = 0;
ext4_rsv_window_add(sb, &sbi->s_rsv_window_head); ext4_rsv_window_add(sb, &sbi->s_rsv_window_head);
sbi->s_stripe = ext4_get_stripe_size(sbi);
/* /*
* set up enough so that it can read an inode * set up enough so that it can read an inode
*/ */
@ -1944,6 +2189,21 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
goto failed_mount4; goto failed_mount4;
} }
if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
jbd2_journal_set_features(sbi->s_journal,
JBD2_FEATURE_COMPAT_CHECKSUM, 0,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
} else if (test_opt(sb, JOURNAL_CHECKSUM)) {
jbd2_journal_set_features(sbi->s_journal,
JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0);
jbd2_journal_clear_features(sbi->s_journal, 0, 0,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
} else {
jbd2_journal_clear_features(sbi->s_journal,
JBD2_FEATURE_COMPAT_CHECKSUM, 0,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
}
/* We have now updated the journal if required, so we can /* We have now updated the journal if required, so we can
* validate the data journaling mode. */ * validate the data journaling mode. */
switch (test_opt(sb, DATA_FLAGS)) { switch (test_opt(sb, DATA_FLAGS)) {
@ -2044,6 +2304,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
"writeback"); "writeback");
ext4_ext_init(sb); ext4_ext_init(sb);
ext4_mb_init(sb, needs_recovery);
lock_kernel(); lock_kernel();
return 0; return 0;
@ -2673,7 +2934,7 @@ static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf)
if (test_opt(sb, MINIX_DF)) { if (test_opt(sb, MINIX_DF)) {
sbi->s_overhead_last = 0; sbi->s_overhead_last = 0;
} else if (sbi->s_blocks_last != ext4_blocks_count(es)) { } else if (sbi->s_blocks_last != ext4_blocks_count(es)) {
unsigned long ngroups = sbi->s_groups_count, i; ext4_group_t ngroups = sbi->s_groups_count, i;
ext4_fsblk_t overhead = 0; ext4_fsblk_t overhead = 0;
smp_rmb(); smp_rmb();
@ -2909,7 +3170,7 @@ static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
size_t len, loff_t off) size_t len, loff_t off)
{ {
struct inode *inode = sb_dqopt(sb)->files[type]; struct inode *inode = sb_dqopt(sb)->files[type];
sector_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
int err = 0; int err = 0;
int offset = off & (sb->s_blocksize - 1); int offset = off & (sb->s_blocksize - 1);
int tocopy; int tocopy;
@ -2947,7 +3208,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
const char *data, size_t len, loff_t off) const char *data, size_t len, loff_t off)
{ {
struct inode *inode = sb_dqopt(sb)->files[type]; struct inode *inode = sb_dqopt(sb)->files[type];
sector_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
int err = 0; int err = 0;
int offset = off & (sb->s_blocksize - 1); int offset = off & (sb->s_blocksize - 1);
int tocopy; int tocopy;
@ -3002,7 +3263,6 @@ out:
i_size_write(inode, off+len-towrite); i_size_write(inode, off+len-towrite);
EXT4_I(inode)->i_disksize = inode->i_size; EXT4_I(inode)->i_disksize = inode->i_size;
} }
inode->i_version++;
inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_mtime = inode->i_ctime = CURRENT_TIME;
ext4_mark_inode_dirty(handle, inode); ext4_mark_inode_dirty(handle, inode);
mutex_unlock(&inode->i_mutex); mutex_unlock(&inode->i_mutex);
@ -3027,9 +3287,15 @@ static struct file_system_type ext4dev_fs_type = {
static int __init init_ext4_fs(void) static int __init init_ext4_fs(void)
{ {
int err = init_ext4_xattr(); int err;
err = init_ext4_mballoc();
if (err) if (err)
return err; return err;
err = init_ext4_xattr();
if (err)
goto out2;
err = init_inodecache(); err = init_inodecache();
if (err) if (err)
goto out1; goto out1;
@ -3041,6 +3307,8 @@ out:
destroy_inodecache(); destroy_inodecache();
out1: out1:
exit_ext4_xattr(); exit_ext4_xattr();
out2:
exit_ext4_mballoc();
return err; return err;
} }
@ -3049,6 +3317,7 @@ static void __exit exit_ext4_fs(void)
unregister_filesystem(&ext4dev_fs_type); unregister_filesystem(&ext4dev_fs_type);
destroy_inodecache(); destroy_inodecache();
exit_ext4_xattr(); exit_ext4_xattr();
exit_ext4_mballoc();
} }
MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");

Просмотреть файл

@ -480,7 +480,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
ea_bdebug(bh, "refcount now=0; freeing"); ea_bdebug(bh, "refcount now=0; freeing");
if (ce) if (ce)
mb_cache_entry_free(ce); mb_cache_entry_free(ce);
ext4_free_blocks(handle, inode, bh->b_blocknr, 1); ext4_free_blocks(handle, inode, bh->b_blocknr, 1, 1);
get_bh(bh); get_bh(bh);
ext4_forget(handle, 1, inode, bh, bh->b_blocknr); ext4_forget(handle, 1, inode, bh, bh->b_blocknr);
} else { } else {
@ -821,7 +821,7 @@ inserted:
new_bh = sb_getblk(sb, block); new_bh = sb_getblk(sb, block);
if (!new_bh) { if (!new_bh) {
getblk_failed: getblk_failed:
ext4_free_blocks(handle, inode, block, 1); ext4_free_blocks(handle, inode, block, 1, 1);
error = -EIO; error = -EIO;
goto cleanup; goto cleanup;
} }

Просмотреть файл

@ -1276,6 +1276,11 @@ void file_update_time(struct file *file)
sync_it = 1; sync_it = 1;
} }
if (IS_I_VERSION(inode)) {
inode_inc_iversion(inode);
sync_it = 1;
}
if (sync_it) if (sync_it)
mark_inode_dirty_sync(inode); mark_inode_dirty_sync(inode);
} }

Просмотреть файл

@ -232,7 +232,8 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
* Called under jbd_lock_bh_state(jh2bh(jh)), and drops it * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
*/ */
static int __process_buffer(journal_t *journal, struct journal_head *jh, static int __process_buffer(journal_t *journal, struct journal_head *jh,
struct buffer_head **bhs, int *batch_count) struct buffer_head **bhs, int *batch_count,
transaction_t *transaction)
{ {
struct buffer_head *bh = jh2bh(jh); struct buffer_head *bh = jh2bh(jh);
int ret = 0; int ret = 0;
@ -250,6 +251,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
transaction_t *t = jh->b_transaction; transaction_t *t = jh->b_transaction;
tid_t tid = t->t_tid; tid_t tid = t->t_tid;
transaction->t_chp_stats.cs_forced_to_close++;
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh); jbd_unlock_bh_state(bh);
jbd2_log_start_commit(journal, tid); jbd2_log_start_commit(journal, tid);
@ -279,6 +281,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
bhs[*batch_count] = bh; bhs[*batch_count] = bh;
__buffer_relink_io(jh); __buffer_relink_io(jh);
jbd_unlock_bh_state(bh); jbd_unlock_bh_state(bh);
transaction->t_chp_stats.cs_written++;
(*batch_count)++; (*batch_count)++;
if (*batch_count == NR_BATCH) { if (*batch_count == NR_BATCH) {
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
@ -322,6 +325,8 @@ int jbd2_log_do_checkpoint(journal_t *journal)
if (!journal->j_checkpoint_transactions) if (!journal->j_checkpoint_transactions)
goto out; goto out;
transaction = journal->j_checkpoint_transactions; transaction = journal->j_checkpoint_transactions;
if (transaction->t_chp_stats.cs_chp_time == 0)
transaction->t_chp_stats.cs_chp_time = jiffies;
this_tid = transaction->t_tid; this_tid = transaction->t_tid;
restart: restart:
/* /*
@ -346,7 +351,8 @@ restart:
retry = 1; retry = 1;
break; break;
} }
retry = __process_buffer(journal, jh, bhs,&batch_count); retry = __process_buffer(journal, jh, bhs, &batch_count,
transaction);
if (!retry && lock_need_resched(&journal->j_list_lock)){ if (!retry && lock_need_resched(&journal->j_list_lock)){
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
retry = 1; retry = 1;
@ -602,15 +608,15 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
/* /*
* There is one special case to worry about: if we have just pulled the * There is one special case to worry about: if we have just pulled the
* buffer off a committing transaction's forget list, then even if the * buffer off a running or committing transaction's checkpoing list,
* checkpoint list is empty, the transaction obviously cannot be * then even if the checkpoint list is empty, the transaction obviously
* dropped! * cannot be dropped!
* *
* The locking here around j_committing_transaction is a bit sleazy. * The locking here around t_state is a bit sleazy.
* See the comment at the end of jbd2_journal_commit_transaction(). * See the comment at the end of jbd2_journal_commit_transaction().
*/ */
if (transaction == journal->j_committing_transaction) { if (transaction->t_state != T_FINISHED) {
JBUFFER_TRACE(jh, "belongs to committing transaction"); JBUFFER_TRACE(jh, "belongs to running/committing transaction");
goto out; goto out;
} }

Просмотреть файл

@ -20,6 +20,8 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/pagemap.h> #include <linux/pagemap.h>
#include <linux/jiffies.h>
#include <linux/crc32.h>
/* /*
* Default IO end handler for temporary BJ_IO buffer_heads. * Default IO end handler for temporary BJ_IO buffer_heads.
@ -92,19 +94,23 @@ static int inverted_lock(journal_t *journal, struct buffer_head *bh)
return 1; return 1;
} }
/* Done it all: now write the commit record. We should have /*
* Done it all: now submit the commit record. We should have
* cleaned up our previous buffers by now, so if we are in abort * cleaned up our previous buffers by now, so if we are in abort
* mode we can now just skip the rest of the journal write * mode we can now just skip the rest of the journal write
* entirely. * entirely.
* *
* Returns 1 if the journal needs to be aborted or 0 on success * Returns 1 if the journal needs to be aborted or 0 on success
*/ */
static int journal_write_commit_record(journal_t *journal, static int journal_submit_commit_record(journal_t *journal,
transaction_t *commit_transaction) transaction_t *commit_transaction,
struct buffer_head **cbh,
__u32 crc32_sum)
{ {
struct journal_head *descriptor; struct journal_head *descriptor;
struct commit_header *tmp;
struct buffer_head *bh; struct buffer_head *bh;
int i, ret; int ret;
int barrier_done = 0; int barrier_done = 0;
if (is_journal_aborted(journal)) if (is_journal_aborted(journal))
@ -116,21 +122,33 @@ static int journal_write_commit_record(journal_t *journal,
bh = jh2bh(descriptor); bh = jh2bh(descriptor);
/* AKPM: buglet - add `i' to tmp! */ tmp = (struct commit_header *)bh->b_data;
for (i = 0; i < bh->b_size; i += 512) { tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
journal_header_t *tmp = (journal_header_t*)bh->b_data; tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK);
tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK);
tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid); if (JBD2_HAS_COMPAT_FEATURE(journal,
JBD2_FEATURE_COMPAT_CHECKSUM)) {
tmp->h_chksum_type = JBD2_CRC32_CHKSUM;
tmp->h_chksum_size = JBD2_CRC32_CHKSUM_SIZE;
tmp->h_chksum[0] = cpu_to_be32(crc32_sum);
} }
JBUFFER_TRACE(descriptor, "write commit block"); JBUFFER_TRACE(descriptor, "submit commit block");
lock_buffer(bh);
set_buffer_dirty(bh); set_buffer_dirty(bh);
if (journal->j_flags & JBD2_BARRIER) { set_buffer_uptodate(bh);
bh->b_end_io = journal_end_buffer_io_sync;
if (journal->j_flags & JBD2_BARRIER &&
!JBD2_HAS_COMPAT_FEATURE(journal,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
set_buffer_ordered(bh); set_buffer_ordered(bh);
barrier_done = 1; barrier_done = 1;
} }
ret = sync_dirty_buffer(bh); ret = submit_bh(WRITE, bh);
/* is it possible for another commit to fail at roughly /* is it possible for another commit to fail at roughly
* the same time as this one? If so, we don't want to * the same time as this one? If so, we don't want to
* trust the barrier flag in the super, but instead want * trust the barrier flag in the super, but instead want
@ -151,14 +169,72 @@ static int journal_write_commit_record(journal_t *journal,
clear_buffer_ordered(bh); clear_buffer_ordered(bh);
set_buffer_uptodate(bh); set_buffer_uptodate(bh);
set_buffer_dirty(bh); set_buffer_dirty(bh);
ret = sync_dirty_buffer(bh); ret = submit_bh(WRITE, bh);
} }
put_bh(bh); /* One for getblk() */ *cbh = bh;
jbd2_journal_put_journal_head(descriptor); return ret;
return (ret == -EIO);
} }
/*
* This function along with journal_submit_commit_record
* allows to write the commit record asynchronously.
*/
static int journal_wait_on_commit_record(struct buffer_head *bh)
{
int ret = 0;
clear_buffer_dirty(bh);
wait_on_buffer(bh);
if (unlikely(!buffer_uptodate(bh)))
ret = -EIO;
put_bh(bh); /* One for getblk() */
jbd2_journal_put_journal_head(bh2jh(bh));
return ret;
}
/*
* Wait for all submitted IO to complete.
*/
static int journal_wait_on_locked_list(journal_t *journal,
transaction_t *commit_transaction)
{
int ret = 0;
struct journal_head *jh;
while (commit_transaction->t_locked_list) {
struct buffer_head *bh;
jh = commit_transaction->t_locked_list->b_tprev;
bh = jh2bh(jh);
get_bh(bh);
if (buffer_locked(bh)) {
spin_unlock(&journal->j_list_lock);
wait_on_buffer(bh);
if (unlikely(!buffer_uptodate(bh)))
ret = -EIO;
spin_lock(&journal->j_list_lock);
}
if (!inverted_lock(journal, bh)) {
put_bh(bh);
spin_lock(&journal->j_list_lock);
continue;
}
if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) {
__jbd2_journal_unfile_buffer(jh);
jbd_unlock_bh_state(bh);
jbd2_journal_remove_journal_head(bh);
put_bh(bh);
} else {
jbd_unlock_bh_state(bh);
}
put_bh(bh);
cond_resched_lock(&journal->j_list_lock);
}
return ret;
}
static void journal_do_submit_data(struct buffer_head **wbuf, int bufs) static void journal_do_submit_data(struct buffer_head **wbuf, int bufs)
{ {
int i; int i;
@ -274,7 +350,21 @@ write_out_data:
journal_do_submit_data(wbuf, bufs); journal_do_submit_data(wbuf, bufs);
} }
static inline void write_tag_block(int tag_bytes, journal_block_tag_t *tag, static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh)
{
struct page *page = bh->b_page;
char *addr;
__u32 checksum;
addr = kmap_atomic(page, KM_USER0);
checksum = crc32_be(crc32_sum,
(void *)(addr + offset_in_page(bh->b_data)), bh->b_size);
kunmap_atomic(addr, KM_USER0);
return checksum;
}
static void write_tag_block(int tag_bytes, journal_block_tag_t *tag,
unsigned long long block) unsigned long long block)
{ {
tag->t_blocknr = cpu_to_be32(block & (u32)~0); tag->t_blocknr = cpu_to_be32(block & (u32)~0);
@ -290,6 +380,7 @@ static inline void write_tag_block(int tag_bytes, journal_block_tag_t *tag,
*/ */
void jbd2_journal_commit_transaction(journal_t *journal) void jbd2_journal_commit_transaction(journal_t *journal)
{ {
struct transaction_stats_s stats;
transaction_t *commit_transaction; transaction_t *commit_transaction;
struct journal_head *jh, *new_jh, *descriptor; struct journal_head *jh, *new_jh, *descriptor;
struct buffer_head **wbuf = journal->j_wbuf; struct buffer_head **wbuf = journal->j_wbuf;
@ -305,6 +396,8 @@ void jbd2_journal_commit_transaction(journal_t *journal)
int tag_flag; int tag_flag;
int i; int i;
int tag_bytes = journal_tag_bytes(journal); int tag_bytes = journal_tag_bytes(journal);
struct buffer_head *cbh = NULL; /* For transactional checksums */
__u32 crc32_sum = ~0;
/* /*
* First job: lock down the current transaction and wait for * First job: lock down the current transaction and wait for
@ -337,6 +430,11 @@ void jbd2_journal_commit_transaction(journal_t *journal)
spin_lock(&journal->j_state_lock); spin_lock(&journal->j_state_lock);
commit_transaction->t_state = T_LOCKED; commit_transaction->t_state = T_LOCKED;
stats.u.run.rs_wait = commit_transaction->t_max_wait;
stats.u.run.rs_locked = jiffies;
stats.u.run.rs_running = jbd2_time_diff(commit_transaction->t_start,
stats.u.run.rs_locked);
spin_lock(&commit_transaction->t_handle_lock); spin_lock(&commit_transaction->t_handle_lock);
while (commit_transaction->t_updates) { while (commit_transaction->t_updates) {
DEFINE_WAIT(wait); DEFINE_WAIT(wait);
@ -407,6 +505,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
*/ */
jbd2_journal_switch_revoke_table(journal); jbd2_journal_switch_revoke_table(journal);
stats.u.run.rs_flushing = jiffies;
stats.u.run.rs_locked = jbd2_time_diff(stats.u.run.rs_locked,
stats.u.run.rs_flushing);
commit_transaction->t_state = T_FLUSH; commit_transaction->t_state = T_FLUSH;
journal->j_committing_transaction = commit_transaction; journal->j_committing_transaction = commit_transaction;
journal->j_running_transaction = NULL; journal->j_running_transaction = NULL;
@ -440,38 +542,15 @@ void jbd2_journal_commit_transaction(journal_t *journal)
journal_submit_data_buffers(journal, commit_transaction); journal_submit_data_buffers(journal, commit_transaction);
/* /*
* Wait for all previously submitted IO to complete. * Wait for all previously submitted IO to complete if commit
* record is to be written synchronously.
*/ */
spin_lock(&journal->j_list_lock); spin_lock(&journal->j_list_lock);
while (commit_transaction->t_locked_list) { if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
struct buffer_head *bh; JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT))
err = journal_wait_on_locked_list(journal,
commit_transaction);
jh = commit_transaction->t_locked_list->b_tprev;
bh = jh2bh(jh);
get_bh(bh);
if (buffer_locked(bh)) {
spin_unlock(&journal->j_list_lock);
wait_on_buffer(bh);
if (unlikely(!buffer_uptodate(bh)))
err = -EIO;
spin_lock(&journal->j_list_lock);
}
if (!inverted_lock(journal, bh)) {
put_bh(bh);
spin_lock(&journal->j_list_lock);
continue;
}
if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) {
__jbd2_journal_unfile_buffer(jh);
jbd_unlock_bh_state(bh);
jbd2_journal_remove_journal_head(bh);
put_bh(bh);
} else {
jbd_unlock_bh_state(bh);
}
put_bh(bh);
cond_resched_lock(&journal->j_list_lock);
}
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
if (err) if (err)
@ -498,6 +577,12 @@ void jbd2_journal_commit_transaction(journal_t *journal)
*/ */
commit_transaction->t_state = T_COMMIT; commit_transaction->t_state = T_COMMIT;
stats.u.run.rs_logging = jiffies;
stats.u.run.rs_flushing = jbd2_time_diff(stats.u.run.rs_flushing,
stats.u.run.rs_logging);
stats.u.run.rs_blocks = commit_transaction->t_outstanding_credits;
stats.u.run.rs_blocks_logged = 0;
descriptor = NULL; descriptor = NULL;
bufs = 0; bufs = 0;
while (commit_transaction->t_buffers) { while (commit_transaction->t_buffers) {
@ -639,6 +724,15 @@ void jbd2_journal_commit_transaction(journal_t *journal)
start_journal_io: start_journal_io:
for (i = 0; i < bufs; i++) { for (i = 0; i < bufs; i++) {
struct buffer_head *bh = wbuf[i]; struct buffer_head *bh = wbuf[i];
/*
* Compute checksum.
*/
if (JBD2_HAS_COMPAT_FEATURE(journal,
JBD2_FEATURE_COMPAT_CHECKSUM)) {
crc32_sum =
jbd2_checksum_data(crc32_sum, bh);
}
lock_buffer(bh); lock_buffer(bh);
clear_buffer_dirty(bh); clear_buffer_dirty(bh);
set_buffer_uptodate(bh); set_buffer_uptodate(bh);
@ -646,6 +740,7 @@ start_journal_io:
submit_bh(WRITE, bh); submit_bh(WRITE, bh);
} }
cond_resched(); cond_resched();
stats.u.run.rs_blocks_logged += bufs;
/* Force a new descriptor to be generated next /* Force a new descriptor to be generated next
time round the loop. */ time round the loop. */
@ -654,6 +749,23 @@ start_journal_io:
} }
} }
/* Done it all: now write the commit record asynchronously. */
if (JBD2_HAS_INCOMPAT_FEATURE(journal,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
err = journal_submit_commit_record(journal, commit_transaction,
&cbh, crc32_sum);
if (err)
__jbd2_journal_abort_hard(journal);
spin_lock(&journal->j_list_lock);
err = journal_wait_on_locked_list(journal,
commit_transaction);
spin_unlock(&journal->j_list_lock);
if (err)
__jbd2_journal_abort_hard(journal);
}
/* Lo and behold: we have just managed to send a transaction to /* Lo and behold: we have just managed to send a transaction to
the log. Before we can commit it, wait for the IO so far to the log. Before we can commit it, wait for the IO so far to
complete. Control buffers being written are on the complete. Control buffers being written are on the
@ -753,8 +865,14 @@ wait_for_iobuf:
jbd_debug(3, "JBD: commit phase 6\n"); jbd_debug(3, "JBD: commit phase 6\n");
if (journal_write_commit_record(journal, commit_transaction)) if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
err = -EIO; JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
err = journal_submit_commit_record(journal, commit_transaction,
&cbh, crc32_sum);
if (err)
__jbd2_journal_abort_hard(journal);
}
err = journal_wait_on_commit_record(cbh);
if (err) if (err)
jbd2_journal_abort(journal, err); jbd2_journal_abort(journal, err);
@ -816,6 +934,7 @@ restart_loop:
cp_transaction = jh->b_cp_transaction; cp_transaction = jh->b_cp_transaction;
if (cp_transaction) { if (cp_transaction) {
JBUFFER_TRACE(jh, "remove from old cp transaction"); JBUFFER_TRACE(jh, "remove from old cp transaction");
cp_transaction->t_chp_stats.cs_dropped++;
__jbd2_journal_remove_checkpoint(jh); __jbd2_journal_remove_checkpoint(jh);
} }
@ -867,10 +986,10 @@ restart_loop:
} }
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
/* /*
* This is a bit sleazy. We borrow j_list_lock to protect * This is a bit sleazy. We use j_list_lock to protect transition
* journal->j_committing_transaction in __jbd2_journal_remove_checkpoint. * of a transaction into T_FINISHED state and calling
* Really, __jbd2_journal_remove_checkpoint should be using j_state_lock but * __jbd2_journal_drop_transaction(). Otherwise we could race with
* it's a bit hassle to hold that across __jbd2_journal_remove_checkpoint * other checkpointing code processing the transaction...
*/ */
spin_lock(&journal->j_state_lock); spin_lock(&journal->j_state_lock);
spin_lock(&journal->j_list_lock); spin_lock(&journal->j_list_lock);
@ -890,6 +1009,36 @@ restart_loop:
J_ASSERT(commit_transaction->t_state == T_COMMIT); J_ASSERT(commit_transaction->t_state == T_COMMIT);
commit_transaction->t_start = jiffies;
stats.u.run.rs_logging = jbd2_time_diff(stats.u.run.rs_logging,
commit_transaction->t_start);
/*
* File the transaction for history
*/
stats.ts_type = JBD2_STATS_RUN;
stats.ts_tid = commit_transaction->t_tid;
stats.u.run.rs_handle_count = commit_transaction->t_handle_count;
spin_lock(&journal->j_history_lock);
memcpy(journal->j_history + journal->j_history_cur, &stats,
sizeof(stats));
if (++journal->j_history_cur == journal->j_history_max)
journal->j_history_cur = 0;
/*
* Calculate overall stats
*/
journal->j_stats.ts_tid++;
journal->j_stats.u.run.rs_wait += stats.u.run.rs_wait;
journal->j_stats.u.run.rs_running += stats.u.run.rs_running;
journal->j_stats.u.run.rs_locked += stats.u.run.rs_locked;
journal->j_stats.u.run.rs_flushing += stats.u.run.rs_flushing;
journal->j_stats.u.run.rs_logging += stats.u.run.rs_logging;
journal->j_stats.u.run.rs_handle_count += stats.u.run.rs_handle_count;
journal->j_stats.u.run.rs_blocks += stats.u.run.rs_blocks;
journal->j_stats.u.run.rs_blocks_logged += stats.u.run.rs_blocks_logged;
spin_unlock(&journal->j_history_lock);
commit_transaction->t_state = T_FINISHED; commit_transaction->t_state = T_FINISHED;
J_ASSERT(commit_transaction == journal->j_committing_transaction); J_ASSERT(commit_transaction == journal->j_committing_transaction);
journal->j_commit_sequence = commit_transaction->t_tid; journal->j_commit_sequence = commit_transaction->t_tid;

Просмотреть файл

@ -36,6 +36,7 @@
#include <linux/poison.h> #include <linux/poison.h>
#include <linux/proc_fs.h> #include <linux/proc_fs.h>
#include <linux/debugfs.h> #include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/page.h> #include <asm/page.h>
@ -640,6 +641,312 @@ struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal)
return jbd2_journal_add_journal_head(bh); return jbd2_journal_add_journal_head(bh);
} }
struct jbd2_stats_proc_session {
journal_t *journal;
struct transaction_stats_s *stats;
int start;
int max;
};
static void *jbd2_history_skip_empty(struct jbd2_stats_proc_session *s,
struct transaction_stats_s *ts,
int first)
{
if (ts == s->stats + s->max)
ts = s->stats;
if (!first && ts == s->stats + s->start)
return NULL;
while (ts->ts_type == 0) {
ts++;
if (ts == s->stats + s->max)
ts = s->stats;
if (ts == s->stats + s->start)
return NULL;
}
return ts;
}
static void *jbd2_seq_history_start(struct seq_file *seq, loff_t *pos)
{
struct jbd2_stats_proc_session *s = seq->private;
struct transaction_stats_s *ts;
int l = *pos;
if (l == 0)
return SEQ_START_TOKEN;
ts = jbd2_history_skip_empty(s, s->stats + s->start, 1);
if (!ts)
return NULL;
l--;
while (l) {
ts = jbd2_history_skip_empty(s, ++ts, 0);
if (!ts)
break;
l--;
}
return ts;
}
static void *jbd2_seq_history_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct jbd2_stats_proc_session *s = seq->private;
struct transaction_stats_s *ts = v;
++*pos;
if (v == SEQ_START_TOKEN)
return jbd2_history_skip_empty(s, s->stats + s->start, 1);
else
return jbd2_history_skip_empty(s, ++ts, 0);
}
static int jbd2_seq_history_show(struct seq_file *seq, void *v)
{
struct transaction_stats_s *ts = v;
if (v == SEQ_START_TOKEN) {
seq_printf(seq, "%-4s %-5s %-5s %-5s %-5s %-5s %-5s %-6s %-5s "
"%-5s %-5s %-5s %-5s %-5s\n", "R/C", "tid",
"wait", "run", "lock", "flush", "log", "hndls",
"block", "inlog", "ctime", "write", "drop",
"close");
return 0;
}
if (ts->ts_type == JBD2_STATS_RUN)
seq_printf(seq, "%-4s %-5lu %-5u %-5u %-5u %-5u %-5u "
"%-6lu %-5lu %-5lu\n", "R", ts->ts_tid,
jiffies_to_msecs(ts->u.run.rs_wait),
jiffies_to_msecs(ts->u.run.rs_running),
jiffies_to_msecs(ts->u.run.rs_locked),
jiffies_to_msecs(ts->u.run.rs_flushing),
jiffies_to_msecs(ts->u.run.rs_logging),
ts->u.run.rs_handle_count,
ts->u.run.rs_blocks,
ts->u.run.rs_blocks_logged);
else if (ts->ts_type == JBD2_STATS_CHECKPOINT)
seq_printf(seq, "%-4s %-5lu %48s %-5u %-5lu %-5lu %-5lu\n",
"C", ts->ts_tid, " ",
jiffies_to_msecs(ts->u.chp.cs_chp_time),
ts->u.chp.cs_written, ts->u.chp.cs_dropped,
ts->u.chp.cs_forced_to_close);
else
J_ASSERT(0);
return 0;
}
static void jbd2_seq_history_stop(struct seq_file *seq, void *v)
{
}
static struct seq_operations jbd2_seq_history_ops = {
.start = jbd2_seq_history_start,
.next = jbd2_seq_history_next,
.stop = jbd2_seq_history_stop,
.show = jbd2_seq_history_show,
};
static int jbd2_seq_history_open(struct inode *inode, struct file *file)
{
journal_t *journal = PDE(inode)->data;
struct jbd2_stats_proc_session *s;
int rc, size;
s = kmalloc(sizeof(*s), GFP_KERNEL);
if (s == NULL)
return -ENOMEM;
size = sizeof(struct transaction_stats_s) * journal->j_history_max;
s->stats = kmalloc(size, GFP_KERNEL);
if (s->stats == NULL) {
kfree(s);
return -ENOMEM;
}
spin_lock(&journal->j_history_lock);
memcpy(s->stats, journal->j_history, size);
s->max = journal->j_history_max;
s->start = journal->j_history_cur % s->max;
spin_unlock(&journal->j_history_lock);
rc = seq_open(file, &jbd2_seq_history_ops);
if (rc == 0) {
struct seq_file *m = file->private_data;
m->private = s;
} else {
kfree(s->stats);
kfree(s);
}
return rc;
}
static int jbd2_seq_history_release(struct inode *inode, struct file *file)
{
struct seq_file *seq = file->private_data;
struct jbd2_stats_proc_session *s = seq->private;
kfree(s->stats);
kfree(s);
return seq_release(inode, file);
}
static struct file_operations jbd2_seq_history_fops = {
.owner = THIS_MODULE,
.open = jbd2_seq_history_open,
.read = seq_read,
.llseek = seq_lseek,
.release = jbd2_seq_history_release,
};
static void *jbd2_seq_info_start(struct seq_file *seq, loff_t *pos)
{
return *pos ? NULL : SEQ_START_TOKEN;
}
static void *jbd2_seq_info_next(struct seq_file *seq, void *v, loff_t *pos)
{
return NULL;
}
static int jbd2_seq_info_show(struct seq_file *seq, void *v)
{
struct jbd2_stats_proc_session *s = seq->private;
if (v != SEQ_START_TOKEN)
return 0;
seq_printf(seq, "%lu transaction, each upto %u blocks\n",
s->stats->ts_tid,
s->journal->j_max_transaction_buffers);
if (s->stats->ts_tid == 0)
return 0;
seq_printf(seq, "average: \n %ums waiting for transaction\n",
jiffies_to_msecs(s->stats->u.run.rs_wait / s->stats->ts_tid));
seq_printf(seq, " %ums running transaction\n",
jiffies_to_msecs(s->stats->u.run.rs_running / s->stats->ts_tid));
seq_printf(seq, " %ums transaction was being locked\n",
jiffies_to_msecs(s->stats->u.run.rs_locked / s->stats->ts_tid));
seq_printf(seq, " %ums flushing data (in ordered mode)\n",
jiffies_to_msecs(s->stats->u.run.rs_flushing / s->stats->ts_tid));
seq_printf(seq, " %ums logging transaction\n",
jiffies_to_msecs(s->stats->u.run.rs_logging / s->stats->ts_tid));
seq_printf(seq, " %lu handles per transaction\n",
s->stats->u.run.rs_handle_count / s->stats->ts_tid);
seq_printf(seq, " %lu blocks per transaction\n",
s->stats->u.run.rs_blocks / s->stats->ts_tid);
seq_printf(seq, " %lu logged blocks per transaction\n",
s->stats->u.run.rs_blocks_logged / s->stats->ts_tid);
return 0;
}
static void jbd2_seq_info_stop(struct seq_file *seq, void *v)
{
}
static struct seq_operations jbd2_seq_info_ops = {
.start = jbd2_seq_info_start,
.next = jbd2_seq_info_next,
.stop = jbd2_seq_info_stop,
.show = jbd2_seq_info_show,
};
static int jbd2_seq_info_open(struct inode *inode, struct file *file)
{
journal_t *journal = PDE(inode)->data;
struct jbd2_stats_proc_session *s;
int rc, size;
s = kmalloc(sizeof(*s), GFP_KERNEL);
if (s == NULL)
return -ENOMEM;
size = sizeof(struct transaction_stats_s);
s->stats = kmalloc(size, GFP_KERNEL);
if (s->stats == NULL) {
kfree(s);
return -ENOMEM;
}
spin_lock(&journal->j_history_lock);
memcpy(s->stats, &journal->j_stats, size);
s->journal = journal;
spin_unlock(&journal->j_history_lock);
rc = seq_open(file, &jbd2_seq_info_ops);
if (rc == 0) {
struct seq_file *m = file->private_data;
m->private = s;
} else {
kfree(s->stats);
kfree(s);
}
return rc;
}
static int jbd2_seq_info_release(struct inode *inode, struct file *file)
{
struct seq_file *seq = file->private_data;
struct jbd2_stats_proc_session *s = seq->private;
kfree(s->stats);
kfree(s);
return seq_release(inode, file);
}
static struct file_operations jbd2_seq_info_fops = {
.owner = THIS_MODULE,
.open = jbd2_seq_info_open,
.read = seq_read,
.llseek = seq_lseek,
.release = jbd2_seq_info_release,
};
static struct proc_dir_entry *proc_jbd2_stats;
static void jbd2_stats_proc_init(journal_t *journal)
{
char name[BDEVNAME_SIZE];
snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name));
journal->j_proc_entry = proc_mkdir(name, proc_jbd2_stats);
if (journal->j_proc_entry) {
struct proc_dir_entry *p;
p = create_proc_entry("history", S_IRUGO,
journal->j_proc_entry);
if (p) {
p->proc_fops = &jbd2_seq_history_fops;
p->data = journal;
p = create_proc_entry("info", S_IRUGO,
journal->j_proc_entry);
if (p) {
p->proc_fops = &jbd2_seq_info_fops;
p->data = journal;
}
}
}
}
static void jbd2_stats_proc_exit(journal_t *journal)
{
char name[BDEVNAME_SIZE];
snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name));
remove_proc_entry("info", journal->j_proc_entry);
remove_proc_entry("history", journal->j_proc_entry);
remove_proc_entry(name, proc_jbd2_stats);
}
static void journal_init_stats(journal_t *journal)
{
int size;
if (!proc_jbd2_stats)
return;
journal->j_history_max = 100;
size = sizeof(struct transaction_stats_s) * journal->j_history_max;
journal->j_history = kzalloc(size, GFP_KERNEL);
if (!journal->j_history) {
journal->j_history_max = 0;
return;
}
spin_lock_init(&journal->j_history_lock);
}
/* /*
* Management for journal control blocks: functions to create and * Management for journal control blocks: functions to create and
* destroy journal_t structures, and to initialise and read existing * destroy journal_t structures, and to initialise and read existing
@ -681,6 +988,9 @@ static journal_t * journal_init_common (void)
kfree(journal); kfree(journal);
goto fail; goto fail;
} }
journal_init_stats(journal);
return journal; return journal;
fail: fail:
return NULL; return NULL;
@ -735,6 +1045,7 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev,
journal->j_fs_dev = fs_dev; journal->j_fs_dev = fs_dev;
journal->j_blk_offset = start; journal->j_blk_offset = start;
journal->j_maxlen = len; journal->j_maxlen = len;
jbd2_stats_proc_init(journal);
bh = __getblk(journal->j_dev, start, journal->j_blocksize); bh = __getblk(journal->j_dev, start, journal->j_blocksize);
J_ASSERT(bh != NULL); J_ASSERT(bh != NULL);
@ -773,6 +1084,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits; journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits;
journal->j_blocksize = inode->i_sb->s_blocksize; journal->j_blocksize = inode->i_sb->s_blocksize;
jbd2_stats_proc_init(journal);
/* journal descriptor can store up to n blocks -bzzz */ /* journal descriptor can store up to n blocks -bzzz */
n = journal->j_blocksize / sizeof(journal_block_tag_t); n = journal->j_blocksize / sizeof(journal_block_tag_t);
@ -1153,6 +1465,8 @@ void jbd2_journal_destroy(journal_t *journal)
brelse(journal->j_sb_buffer); brelse(journal->j_sb_buffer);
} }
if (journal->j_proc_entry)
jbd2_stats_proc_exit(journal);
if (journal->j_inode) if (journal->j_inode)
iput(journal->j_inode); iput(journal->j_inode);
if (journal->j_revoke) if (journal->j_revoke)
@ -1264,6 +1578,32 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
return 1; return 1;
} }
/*
* jbd2_journal_clear_features () - Clear a given journal feature in the
* superblock
* @journal: Journal to act on.
* @compat: bitmask of compatible features
* @ro: bitmask of features that force read-only mount
* @incompat: bitmask of incompatible features
*
* Clear a given journal feature as present on the
* superblock.
*/
void jbd2_journal_clear_features(journal_t *journal, unsigned long compat,
unsigned long ro, unsigned long incompat)
{
journal_superblock_t *sb;
jbd_debug(1, "Clear features 0x%lx/0x%lx/0x%lx\n",
compat, ro, incompat);
sb = journal->j_superblock;
sb->s_feature_compat &= ~cpu_to_be32(compat);
sb->s_feature_ro_compat &= ~cpu_to_be32(ro);
sb->s_feature_incompat &= ~cpu_to_be32(incompat);
}
EXPORT_SYMBOL(jbd2_journal_clear_features);
/** /**
* int jbd2_journal_update_format () - Update on-disk journal structure. * int jbd2_journal_update_format () - Update on-disk journal structure.
@ -1633,7 +1973,7 @@ static int journal_init_jbd2_journal_head_cache(void)
jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head", jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head",
sizeof(struct journal_head), sizeof(struct journal_head),
0, /* offset */ 0, /* offset */
0, /* flags */ SLAB_TEMPORARY, /* flags */
NULL); /* ctor */ NULL); /* ctor */
retval = 0; retval = 0;
if (jbd2_journal_head_cache == 0) { if (jbd2_journal_head_cache == 0) {
@ -1900,6 +2240,28 @@ static void __exit jbd2_remove_debugfs_entry(void)
#endif #endif
#ifdef CONFIG_PROC_FS
#define JBD2_STATS_PROC_NAME "fs/jbd2"
static void __init jbd2_create_jbd_stats_proc_entry(void)
{
proc_jbd2_stats = proc_mkdir(JBD2_STATS_PROC_NAME, NULL);
}
static void __exit jbd2_remove_jbd_stats_proc_entry(void)
{
if (proc_jbd2_stats)
remove_proc_entry(JBD2_STATS_PROC_NAME, NULL);
}
#else
#define jbd2_create_jbd_stats_proc_entry() do {} while (0)
#define jbd2_remove_jbd_stats_proc_entry() do {} while (0)
#endif
struct kmem_cache *jbd2_handle_cache; struct kmem_cache *jbd2_handle_cache;
static int __init journal_init_handle_cache(void) static int __init journal_init_handle_cache(void)
@ -1907,7 +2269,7 @@ static int __init journal_init_handle_cache(void)
jbd2_handle_cache = kmem_cache_create("jbd2_journal_handle", jbd2_handle_cache = kmem_cache_create("jbd2_journal_handle",
sizeof(handle_t), sizeof(handle_t),
0, /* offset */ 0, /* offset */
0, /* flags */ SLAB_TEMPORARY, /* flags */
NULL); /* ctor */ NULL); /* ctor */
if (jbd2_handle_cache == NULL) { if (jbd2_handle_cache == NULL) {
printk(KERN_EMERG "JBD: failed to create handle cache\n"); printk(KERN_EMERG "JBD: failed to create handle cache\n");
@ -1955,6 +2317,7 @@ static int __init journal_init(void)
if (ret != 0) if (ret != 0)
jbd2_journal_destroy_caches(); jbd2_journal_destroy_caches();
jbd2_create_debugfs_entry(); jbd2_create_debugfs_entry();
jbd2_create_jbd_stats_proc_entry();
return ret; return ret;
} }
@ -1966,6 +2329,7 @@ static void __exit journal_exit(void)
printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n); printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n);
#endif #endif
jbd2_remove_debugfs_entry(); jbd2_remove_debugfs_entry();
jbd2_remove_jbd_stats_proc_entry();
jbd2_journal_destroy_caches(); jbd2_journal_destroy_caches();
} }

Просмотреть файл

@ -21,6 +21,7 @@
#include <linux/jbd2.h> #include <linux/jbd2.h>
#include <linux/errno.h> #include <linux/errno.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/crc32.h>
#endif #endif
/* /*
@ -316,6 +317,37 @@ static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag
return block; return block;
} }
/*
* calc_chksums calculates the checksums for the blocks described in the
* descriptor block.
*/
static int calc_chksums(journal_t *journal, struct buffer_head *bh,
unsigned long *next_log_block, __u32 *crc32_sum)
{
int i, num_blks, err;
unsigned long io_block;
struct buffer_head *obh;
num_blks = count_tags(journal, bh);
/* Calculate checksum of the descriptor block. */
*crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size);
for (i = 0; i < num_blks; i++) {
io_block = (*next_log_block)++;
wrap(journal, *next_log_block);
err = jread(&obh, journal, io_block);
if (err) {
printk(KERN_ERR "JBD: IO error %d recovering block "
"%lu in log\n", err, io_block);
return 1;
} else {
*crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data,
obh->b_size);
}
}
return 0;
}
static int do_one_pass(journal_t *journal, static int do_one_pass(journal_t *journal,
struct recovery_info *info, enum passtype pass) struct recovery_info *info, enum passtype pass)
{ {
@ -328,6 +360,7 @@ static int do_one_pass(journal_t *journal,
unsigned int sequence; unsigned int sequence;
int blocktype; int blocktype;
int tag_bytes = journal_tag_bytes(journal); int tag_bytes = journal_tag_bytes(journal);
__u32 crc32_sum = ~0; /* Transactional Checksums */
/* Precompute the maximum metadata descriptors in a descriptor block */ /* Precompute the maximum metadata descriptors in a descriptor block */
int MAX_BLOCKS_PER_DESC; int MAX_BLOCKS_PER_DESC;
@ -419,12 +452,26 @@ static int do_one_pass(journal_t *journal,
switch(blocktype) { switch(blocktype) {
case JBD2_DESCRIPTOR_BLOCK: case JBD2_DESCRIPTOR_BLOCK:
/* If it is a valid descriptor block, replay it /* If it is a valid descriptor block, replay it
* in pass REPLAY; otherwise, just skip over the * in pass REPLAY; if journal_checksums enabled, then
* blocks it describes. */ * calculate checksums in PASS_SCAN, otherwise,
* just skip over the blocks it describes. */
if (pass != PASS_REPLAY) { if (pass != PASS_REPLAY) {
if (pass == PASS_SCAN &&
JBD2_HAS_COMPAT_FEATURE(journal,
JBD2_FEATURE_COMPAT_CHECKSUM) &&
!info->end_transaction) {
if (calc_chksums(journal, bh,
&next_log_block,
&crc32_sum)) {
put_bh(bh);
break;
}
put_bh(bh);
continue;
}
next_log_block += count_tags(journal, bh); next_log_block += count_tags(journal, bh);
wrap(journal, next_log_block); wrap(journal, next_log_block);
brelse(bh); put_bh(bh);
continue; continue;
} }
@ -516,9 +563,96 @@ static int do_one_pass(journal_t *journal,
continue; continue;
case JBD2_COMMIT_BLOCK: case JBD2_COMMIT_BLOCK:
/* Found an expected commit block: not much to /* How to differentiate between interrupted commit
* do other than move on to the next sequence * and journal corruption ?
*
* {nth transaction}
* Checksum Verification Failed
* |
* ____________________
* | |
* async_commit sync_commit
* | |
* | GO TO NEXT "Journal Corruption"
* | TRANSACTION
* |
* {(n+1)th transanction}
* |
* _______|______________
* | |
* Commit block found Commit block not found
* | |
* "Journal Corruption" |
* _____________|_________
* | |
* nth trans corrupt OR nth trans
* and (n+1)th interrupted interrupted
* before commit block
* could reach the disk.
* (Cannot find the difference in above
* mentioned conditions. Hence assume
* "Interrupted Commit".)
*/
/* Found an expected commit block: if checksums
* are present verify them in PASS_SCAN; else not
* much to do other than move on to the next sequence
* number. */ * number. */
if (pass == PASS_SCAN &&
JBD2_HAS_COMPAT_FEATURE(journal,
JBD2_FEATURE_COMPAT_CHECKSUM)) {
int chksum_err, chksum_seen;
struct commit_header *cbh =
(struct commit_header *)bh->b_data;
unsigned found_chksum =
be32_to_cpu(cbh->h_chksum[0]);
chksum_err = chksum_seen = 0;
if (info->end_transaction) {
printk(KERN_ERR "JBD: Transaction %u "
"found to be corrupt.\n",
next_commit_ID - 1);
brelse(bh);
break;
}
if (crc32_sum == found_chksum &&
cbh->h_chksum_type == JBD2_CRC32_CHKSUM &&
cbh->h_chksum_size ==
JBD2_CRC32_CHKSUM_SIZE)
chksum_seen = 1;
else if (!(cbh->h_chksum_type == 0 &&
cbh->h_chksum_size == 0 &&
found_chksum == 0 &&
!chksum_seen))
/*
* If fs is mounted using an old kernel and then
* kernel with journal_chksum is used then we
* get a situation where the journal flag has
* checksum flag set but checksums are not
* present i.e chksum = 0, in the individual
* commit blocks.
* Hence to avoid checksum failures, in this
* situation, this extra check is added.
*/
chksum_err = 1;
if (chksum_err) {
info->end_transaction = next_commit_ID;
if (!JBD2_HAS_COMPAT_FEATURE(journal,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)){
printk(KERN_ERR
"JBD: Transaction %u "
"found to be corrupt.\n",
next_commit_ID);
brelse(bh);
break;
}
}
crc32_sum = ~0;
}
brelse(bh); brelse(bh);
next_commit_ID++; next_commit_ID++;
continue; continue;
@ -554,9 +688,10 @@ static int do_one_pass(journal_t *journal,
* transaction marks the end of the valid log. * transaction marks the end of the valid log.
*/ */
if (pass == PASS_SCAN) if (pass == PASS_SCAN) {
info->end_transaction = next_commit_ID; if (!info->end_transaction)
else { info->end_transaction = next_commit_ID;
} else {
/* It's really bad news if different passes end up at /* It's really bad news if different passes end up at
* different places (but possible due to IO errors). */ * different places (but possible due to IO errors). */
if (info->end_transaction != next_commit_ID) { if (info->end_transaction != next_commit_ID) {

Просмотреть файл

@ -171,13 +171,15 @@ int __init jbd2_journal_init_revoke_caches(void)
{ {
jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record", jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record",
sizeof(struct jbd2_revoke_record_s), sizeof(struct jbd2_revoke_record_s),
0, SLAB_HWCACHE_ALIGN, NULL); 0,
SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
NULL);
if (jbd2_revoke_record_cache == 0) if (jbd2_revoke_record_cache == 0)
return -ENOMEM; return -ENOMEM;
jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table", jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table",
sizeof(struct jbd2_revoke_table_s), sizeof(struct jbd2_revoke_table_s),
0, 0, NULL); 0, SLAB_TEMPORARY, NULL);
if (jbd2_revoke_table_cache == 0) { if (jbd2_revoke_table_cache == 0) {
kmem_cache_destroy(jbd2_revoke_record_cache); kmem_cache_destroy(jbd2_revoke_record_cache);
jbd2_revoke_record_cache = NULL; jbd2_revoke_record_cache = NULL;

Просмотреть файл

@ -54,11 +54,13 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
spin_lock_init(&transaction->t_handle_lock); spin_lock_init(&transaction->t_handle_lock);
/* Set up the commit timer for the new transaction. */ /* Set up the commit timer for the new transaction. */
journal->j_commit_timer.expires = transaction->t_expires; journal->j_commit_timer.expires = round_jiffies(transaction->t_expires);
add_timer(&journal->j_commit_timer); add_timer(&journal->j_commit_timer);
J_ASSERT(journal->j_running_transaction == NULL); J_ASSERT(journal->j_running_transaction == NULL);
journal->j_running_transaction = transaction; journal->j_running_transaction = transaction;
transaction->t_max_wait = 0;
transaction->t_start = jiffies;
return transaction; return transaction;
} }
@ -85,6 +87,7 @@ static int start_this_handle(journal_t *journal, handle_t *handle)
int nblocks = handle->h_buffer_credits; int nblocks = handle->h_buffer_credits;
transaction_t *new_transaction = NULL; transaction_t *new_transaction = NULL;
int ret = 0; int ret = 0;
unsigned long ts = jiffies;
if (nblocks > journal->j_max_transaction_buffers) { if (nblocks > journal->j_max_transaction_buffers) {
printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n", printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
@ -217,6 +220,12 @@ repeat_locked:
/* OK, account for the buffers that this operation expects to /* OK, account for the buffers that this operation expects to
* use and add the handle to the running transaction. */ * use and add the handle to the running transaction. */
if (time_after(transaction->t_start, ts)) {
ts = jbd2_time_diff(ts, transaction->t_start);
if (ts > transaction->t_max_wait)
transaction->t_max_wait = ts;
}
handle->h_transaction = transaction; handle->h_transaction = transaction;
transaction->t_outstanding_credits += nblocks; transaction->t_outstanding_credits += nblocks;
transaction->t_updates++; transaction->t_updates++;
@ -232,6 +241,8 @@ out:
return ret; return ret;
} }
static struct lock_class_key jbd2_handle_key;
/* Allocate a new handle. This should probably be in a slab... */ /* Allocate a new handle. This should probably be in a slab... */
static handle_t *new_handle(int nblocks) static handle_t *new_handle(int nblocks)
{ {
@ -242,6 +253,9 @@ static handle_t *new_handle(int nblocks)
handle->h_buffer_credits = nblocks; handle->h_buffer_credits = nblocks;
handle->h_ref = 1; handle->h_ref = 1;
lockdep_init_map(&handle->h_lockdep_map, "jbd2_handle",
&jbd2_handle_key, 0);
return handle; return handle;
} }
@ -284,7 +298,11 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
jbd2_free_handle(handle); jbd2_free_handle(handle);
current->journal_info = NULL; current->journal_info = NULL;
handle = ERR_PTR(err); handle = ERR_PTR(err);
goto out;
} }
lock_acquire(&handle->h_lockdep_map, 0, 0, 0, 2, _THIS_IP_);
out:
return handle; return handle;
} }
@ -1164,7 +1182,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
} }
/* That test should have eliminated the following case: */ /* That test should have eliminated the following case: */
J_ASSERT_JH(jh, jh->b_frozen_data == 0); J_ASSERT_JH(jh, jh->b_frozen_data == NULL);
JBUFFER_TRACE(jh, "file as BJ_Metadata"); JBUFFER_TRACE(jh, "file as BJ_Metadata");
spin_lock(&journal->j_list_lock); spin_lock(&journal->j_list_lock);
@ -1410,6 +1428,8 @@ int jbd2_journal_stop(handle_t *handle)
spin_unlock(&journal->j_state_lock); spin_unlock(&journal->j_state_lock);
} }
lock_release(&handle->h_lockdep_map, 1, _THIS_IP_);
jbd2_free_handle(handle); jbd2_free_handle(handle);
return err; return err;
} }
@ -1512,7 +1532,7 @@ void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
J_ASSERT_JH(jh, jh->b_jlist < BJ_Types); J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
if (jh->b_jlist != BJ_None) if (jh->b_jlist != BJ_None)
J_ASSERT_JH(jh, transaction != 0); J_ASSERT_JH(jh, transaction != NULL);
switch (jh->b_jlist) { switch (jh->b_jlist) {
case BJ_None: case BJ_None:
@ -1581,11 +1601,11 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
if (buffer_locked(bh) || buffer_dirty(bh)) if (buffer_locked(bh) || buffer_dirty(bh))
goto out; goto out;
if (jh->b_next_transaction != 0) if (jh->b_next_transaction != NULL)
goto out; goto out;
spin_lock(&journal->j_list_lock); spin_lock(&journal->j_list_lock);
if (jh->b_transaction != 0 && jh->b_cp_transaction == 0) { if (jh->b_transaction != NULL && jh->b_cp_transaction == NULL) {
if (jh->b_jlist == BJ_SyncData || jh->b_jlist == BJ_Locked) { if (jh->b_jlist == BJ_SyncData || jh->b_jlist == BJ_Locked) {
/* A written-back ordered data buffer */ /* A written-back ordered data buffer */
JBUFFER_TRACE(jh, "release data"); JBUFFER_TRACE(jh, "release data");
@ -1593,7 +1613,7 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
jbd2_journal_remove_journal_head(bh); jbd2_journal_remove_journal_head(bh);
__brelse(bh); __brelse(bh);
} }
} else if (jh->b_cp_transaction != 0 && jh->b_transaction == 0) { } else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) {
/* written-back checkpointed metadata buffer */ /* written-back checkpointed metadata buffer */
if (jh->b_jlist == BJ_None) { if (jh->b_jlist == BJ_None) {
JBUFFER_TRACE(jh, "remove from checkpoint list"); JBUFFER_TRACE(jh, "remove from checkpoint list");
@ -1953,7 +1973,7 @@ void __jbd2_journal_file_buffer(struct journal_head *jh,
J_ASSERT_JH(jh, jh->b_jlist < BJ_Types); J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
J_ASSERT_JH(jh, jh->b_transaction == transaction || J_ASSERT_JH(jh, jh->b_transaction == transaction ||
jh->b_transaction == 0); jh->b_transaction == NULL);
if (jh->b_transaction && jh->b_jlist == jlist) if (jh->b_transaction && jh->b_jlist == jlist)
return; return;

Просмотреть файл

@ -446,6 +446,7 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
} }
return seg; return seg;
} }
EXPORT_SYMBOL(iov_shorten);
ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn) unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)

Просмотреть файл

@ -310,6 +310,8 @@ static inline int constant_fls(int x)
_find_first_zero_bit_le(p,sz) _find_first_zero_bit_le(p,sz)
#define ext2_find_next_zero_bit(p,sz,off) \ #define ext2_find_next_zero_bit(p,sz,off) \
_find_next_zero_bit_le(p,sz,off) _find_next_zero_bit_le(p,sz,off)
#define ext2_find_next_bit(p, sz, off) \
_find_next_bit_le(p, sz, off)
/* /*
* Minix is defined to use little-endian byte ordering. * Minix is defined to use little-endian byte ordering.

Просмотреть файл

@ -14,5 +14,7 @@
generic_find_first_zero_le_bit((unsigned long *)(addr), (size)) generic_find_first_zero_le_bit((unsigned long *)(addr), (size))
#define ext2_find_next_zero_bit(addr, size, off) \ #define ext2_find_next_zero_bit(addr, size, off) \
generic_find_next_zero_le_bit((unsigned long *)(addr), (size), (off)) generic_find_next_zero_le_bit((unsigned long *)(addr), (size), (off))
#define ext2_find_next_bit(addr, size, off) \
generic_find_next_le_bit((unsigned long *)(addr), (size), (off))
#endif /* _ASM_GENERIC_BITOPS_EXT2_NON_ATOMIC_H_ */ #endif /* _ASM_GENERIC_BITOPS_EXT2_NON_ATOMIC_H_ */

Просмотреть файл

@ -20,6 +20,8 @@
#define generic___test_and_clear_le_bit(nr, addr) __test_and_clear_bit(nr, addr) #define generic___test_and_clear_le_bit(nr, addr) __test_and_clear_bit(nr, addr)
#define generic_find_next_zero_le_bit(addr, size, offset) find_next_zero_bit(addr, size, offset) #define generic_find_next_zero_le_bit(addr, size, offset) find_next_zero_bit(addr, size, offset)
#define generic_find_next_le_bit(addr, size, offset) \
find_next_bit(addr, size, offset)
#elif defined(__BIG_ENDIAN) #elif defined(__BIG_ENDIAN)
@ -42,6 +44,8 @@
extern unsigned long generic_find_next_zero_le_bit(const unsigned long *addr, extern unsigned long generic_find_next_zero_le_bit(const unsigned long *addr,
unsigned long size, unsigned long offset); unsigned long size, unsigned long offset);
extern unsigned long generic_find_next_le_bit(const unsigned long *addr,
unsigned long size, unsigned long offset);
#else #else
#error "Please fix <asm/byteorder.h>" #error "Please fix <asm/byteorder.h>"

Просмотреть файл

@ -410,6 +410,8 @@ static inline int ext2_find_next_zero_bit(const void *vaddr, unsigned size,
res = ext2_find_first_zero_bit (p, size - 32 * (p - addr)); res = ext2_find_first_zero_bit (p, size - 32 * (p - addr));
return (p - addr) * 32 + res; return (p - addr) * 32 + res;
} }
#define ext2_find_next_bit(addr, size, off) \
generic_find_next_le_bit((unsigned long *)(addr), (size), (off))
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */

Просмотреть файл

@ -294,6 +294,8 @@ found_middle:
return result + ffz(__swab32(tmp)); return result + ffz(__swab32(tmp));
} }
#define ext2_find_next_bit(addr, size, off) \
generic_find_next_le_bit((unsigned long *)(addr), (size), (off))
#include <asm-generic/bitops/minix.h> #include <asm-generic/bitops/minix.h>
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */

Просмотреть файл

@ -359,6 +359,8 @@ static __inline__ int test_le_bit(unsigned long nr,
unsigned long generic_find_next_zero_le_bit(const unsigned long *addr, unsigned long generic_find_next_zero_le_bit(const unsigned long *addr,
unsigned long size, unsigned long offset); unsigned long size, unsigned long offset);
unsigned long generic_find_next_le_bit(const unsigned long *addr,
unsigned long size, unsigned long offset);
/* Bitmap functions for the ext2 filesystem */ /* Bitmap functions for the ext2 filesystem */
#define ext2_set_bit(nr,addr) \ #define ext2_set_bit(nr,addr) \
@ -378,6 +380,8 @@ unsigned long generic_find_next_zero_le_bit(const unsigned long *addr,
#define ext2_find_next_zero_bit(addr, size, off) \ #define ext2_find_next_zero_bit(addr, size, off) \
generic_find_next_zero_le_bit((unsigned long*)addr, size, off) generic_find_next_zero_le_bit((unsigned long*)addr, size, off)
#define ext2_find_next_bit(addr, size, off) \
generic_find_next_le_bit((unsigned long *)addr, size, off)
/* Bitmap functions for the minix filesystem. */ /* Bitmap functions for the minix filesystem. */
#define minix_test_and_set_bit(nr,addr) \ #define minix_test_and_set_bit(nr,addr) \

Просмотреть файл

@ -772,6 +772,8 @@ static inline int sched_find_first_bit(unsigned long *b)
test_and_clear_bit((nr)^(__BITOPS_WORDSIZE - 8), (unsigned long *)addr) test_and_clear_bit((nr)^(__BITOPS_WORDSIZE - 8), (unsigned long *)addr)
#define ext2_test_bit(nr, addr) \ #define ext2_test_bit(nr, addr) \
test_bit((nr)^(__BITOPS_WORDSIZE - 8), (unsigned long *)addr) test_bit((nr)^(__BITOPS_WORDSIZE - 8), (unsigned long *)addr)
#define ext2_find_next_bit(addr, size, off) \
generic_find_next_le_bit((unsigned long *)(addr), (size), (off))
#ifndef __s390x__ #ifndef __s390x__

Просмотреть файл

@ -192,6 +192,8 @@ int sync_dirty_buffer(struct buffer_head *bh);
int submit_bh(int, struct buffer_head *); int submit_bh(int, struct buffer_head *);
void write_boundary_block(struct block_device *bdev, void write_boundary_block(struct block_device *bdev,
sector_t bblock, unsigned blocksize); sector_t bblock, unsigned blocksize);
int bh_uptodate_or_lock(struct buffer_head *bh);
int bh_submit_read(struct buffer_head *bh);
extern int buffer_heads_over_limit; extern int buffer_heads_over_limit;

Просмотреть файл

@ -20,6 +20,8 @@
#include <linux/blkdev.h> #include <linux/blkdev.h>
#include <linux/magic.h> #include <linux/magic.h>
#include <linux/ext4_fs_i.h>
/* /*
* The second extended filesystem constants/structures * The second extended filesystem constants/structures
*/ */
@ -51,6 +53,50 @@
#define ext4_debug(f, a...) do {} while (0) #define ext4_debug(f, a...) do {} while (0)
#endif #endif
#define EXT4_MULTIBLOCK_ALLOCATOR 1
/* prefer goal again. length */
#define EXT4_MB_HINT_MERGE 1
/* blocks already reserved */
#define EXT4_MB_HINT_RESERVED 2
/* metadata is being allocated */
#define EXT4_MB_HINT_METADATA 4
/* first blocks in the file */
#define EXT4_MB_HINT_FIRST 8
/* search for the best chunk */
#define EXT4_MB_HINT_BEST 16
/* data is being allocated */
#define EXT4_MB_HINT_DATA 32
/* don't preallocate (for tails) */
#define EXT4_MB_HINT_NOPREALLOC 64
/* allocate for locality group */
#define EXT4_MB_HINT_GROUP_ALLOC 128
/* allocate goal blocks or none */
#define EXT4_MB_HINT_GOAL_ONLY 256
/* goal is meaningful */
#define EXT4_MB_HINT_TRY_GOAL 512
struct ext4_allocation_request {
/* target inode for block we're allocating */
struct inode *inode;
/* logical block in target inode */
ext4_lblk_t logical;
/* phys. target (a hint) */
ext4_fsblk_t goal;
/* the closest logical allocated block to the left */
ext4_lblk_t lleft;
/* phys. block for ^^^ */
ext4_fsblk_t pleft;
/* the closest logical allocated block to the right */
ext4_lblk_t lright;
/* phys. block for ^^^ */
ext4_fsblk_t pright;
/* how many blocks we want to allocate */
unsigned long len;
/* flags. see above EXT4_MB_HINT_* */
unsigned long flags;
};
/* /*
* Special inodes numbers * Special inodes numbers
*/ */
@ -73,8 +119,8 @@
* Macro-instructions used to manage several block sizes * Macro-instructions used to manage several block sizes
*/ */
#define EXT4_MIN_BLOCK_SIZE 1024 #define EXT4_MIN_BLOCK_SIZE 1024
#define EXT4_MAX_BLOCK_SIZE 4096 #define EXT4_MAX_BLOCK_SIZE 65536
#define EXT4_MIN_BLOCK_LOG_SIZE 10 #define EXT4_MIN_BLOCK_LOG_SIZE 10
#ifdef __KERNEL__ #ifdef __KERNEL__
# define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize) # define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize)
#else #else
@ -118,6 +164,11 @@ struct ext4_group_desc
__le32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */ __le32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */
__le32 bg_inode_bitmap_hi; /* Inodes bitmap block MSB */ __le32 bg_inode_bitmap_hi; /* Inodes bitmap block MSB */
__le32 bg_inode_table_hi; /* Inodes table block MSB */ __le32 bg_inode_table_hi; /* Inodes table block MSB */
__le16 bg_free_blocks_count_hi;/* Free blocks count MSB */
__le16 bg_free_inodes_count_hi;/* Free inodes count MSB */
__le16 bg_used_dirs_count_hi; /* Directories count MSB */
__le16 bg_itable_unused_hi; /* Unused inodes count MSB */
__u32 bg_reserved2[3];
}; };
#define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */ #define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */
@ -178,8 +229,9 @@ struct ext4_group_desc
#define EXT4_NOTAIL_FL 0x00008000 /* file tail should not be merged */ #define EXT4_NOTAIL_FL 0x00008000 /* file tail should not be merged */
#define EXT4_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ #define EXT4_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
#define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ #define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ #define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */
#define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */ #define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */
#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
#define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ #define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */
#define EXT4_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ #define EXT4_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */
@ -237,6 +289,7 @@ struct ext4_new_group_data {
#endif #endif
#define EXT4_IOC_GETRSVSZ _IOR('f', 5, long) #define EXT4_IOC_GETRSVSZ _IOR('f', 5, long)
#define EXT4_IOC_SETRSVSZ _IOW('f', 6, long) #define EXT4_IOC_SETRSVSZ _IOW('f', 6, long)
#define EXT4_IOC_MIGRATE _IO('f', 7)
/* /*
* ioctl commands in 32 bit emulation * ioctl commands in 32 bit emulation
@ -275,18 +328,18 @@ struct ext4_mount_options {
struct ext4_inode { struct ext4_inode {
__le16 i_mode; /* File mode */ __le16 i_mode; /* File mode */
__le16 i_uid; /* Low 16 bits of Owner Uid */ __le16 i_uid; /* Low 16 bits of Owner Uid */
__le32 i_size; /* Size in bytes */ __le32 i_size_lo; /* Size in bytes */
__le32 i_atime; /* Access time */ __le32 i_atime; /* Access time */
__le32 i_ctime; /* Inode Change time */ __le32 i_ctime; /* Inode Change time */
__le32 i_mtime; /* Modification time */ __le32 i_mtime; /* Modification time */
__le32 i_dtime; /* Deletion Time */ __le32 i_dtime; /* Deletion Time */
__le16 i_gid; /* Low 16 bits of Group Id */ __le16 i_gid; /* Low 16 bits of Group Id */
__le16 i_links_count; /* Links count */ __le16 i_links_count; /* Links count */
__le32 i_blocks; /* Blocks count */ __le32 i_blocks_lo; /* Blocks count */
__le32 i_flags; /* File flags */ __le32 i_flags; /* File flags */
union { union {
struct { struct {
__u32 l_i_reserved1; __le32 l_i_version;
} linux1; } linux1;
struct { struct {
__u32 h_i_translator; __u32 h_i_translator;
@ -297,12 +350,12 @@ struct ext4_inode {
} osd1; /* OS dependent 1 */ } osd1; /* OS dependent 1 */
__le32 i_block[EXT4_N_BLOCKS];/* Pointers to blocks */ __le32 i_block[EXT4_N_BLOCKS];/* Pointers to blocks */
__le32 i_generation; /* File version (for NFS) */ __le32 i_generation; /* File version (for NFS) */
__le32 i_file_acl; /* File ACL */ __le32 i_file_acl_lo; /* File ACL */
__le32 i_dir_acl; /* Directory ACL */ __le32 i_size_high;
__le32 i_obso_faddr; /* Obsoleted fragment address */ __le32 i_obso_faddr; /* Obsoleted fragment address */
union { union {
struct { struct {
__le16 l_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */ __le16 l_i_blocks_high; /* were l_i_reserved1 */
__le16 l_i_file_acl_high; __le16 l_i_file_acl_high;
__le16 l_i_uid_high; /* these 2 fields */ __le16 l_i_uid_high; /* these 2 fields */
__le16 l_i_gid_high; /* were reserved2[0] */ __le16 l_i_gid_high; /* were reserved2[0] */
@ -328,9 +381,9 @@ struct ext4_inode {
__le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */ __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */
__le32 i_crtime; /* File Creation time */ __le32 i_crtime; /* File Creation time */
__le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */ __le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */
__le32 i_version_hi; /* high 32 bits for 64-bit version */
}; };
#define i_size_high i_dir_acl
#define EXT4_EPOCH_BITS 2 #define EXT4_EPOCH_BITS 2
#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1) #define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1)
@ -402,9 +455,12 @@ do { \
raw_inode->xtime ## _extra); \ raw_inode->xtime ## _extra); \
} while (0) } while (0)
#define i_disk_version osd1.linux1.l_i_version
#if defined(__KERNEL__) || defined(__linux__) #if defined(__KERNEL__) || defined(__linux__)
#define i_reserved1 osd1.linux1.l_i_reserved1 #define i_reserved1 osd1.linux1.l_i_reserved1
#define i_file_acl_high osd2.linux2.l_i_file_acl_high #define i_file_acl_high osd2.linux2.l_i_file_acl_high
#define i_blocks_high osd2.linux2.l_i_blocks_high
#define i_uid_low i_uid #define i_uid_low i_uid
#define i_gid_low i_gid #define i_gid_low i_gid
#define i_uid_high osd2.linux2.l_i_uid_high #define i_uid_high osd2.linux2.l_i_uid_high
@ -461,7 +517,10 @@ do { \
#define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ #define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
#define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ #define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
#define EXT4_MOUNT_EXTENTS 0x400000 /* Extents support */ #define EXT4_MOUNT_EXTENTS 0x400000 /* Extents support */
#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */
#define EXT4_MOUNT_MBALLOC 0x4000000 /* Buddy allocation support */
/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
#ifndef _LINUX_EXT2_FS_H #ifndef _LINUX_EXT2_FS_H
#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
@ -481,6 +540,7 @@ do { \
#define ext4_test_bit ext2_test_bit #define ext4_test_bit ext2_test_bit
#define ext4_find_first_zero_bit ext2_find_first_zero_bit #define ext4_find_first_zero_bit ext2_find_first_zero_bit
#define ext4_find_next_zero_bit ext2_find_next_zero_bit #define ext4_find_next_zero_bit ext2_find_next_zero_bit
#define ext4_find_next_bit ext2_find_next_bit
/* /*
* Maximal mount counts between two filesystem checks * Maximal mount counts between two filesystem checks
@ -671,6 +731,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
#define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 #define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001
#define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 #define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002
#define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 #define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004
#define EXT4_FEATURE_RO_COMPAT_HUGE_FILE 0x0008
#define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010 #define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010
#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020
#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040
@ -682,6 +743,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
#define EXT4_FEATURE_INCOMPAT_META_BG 0x0010 #define EXT4_FEATURE_INCOMPAT_META_BG 0x0010
#define EXT4_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */ #define EXT4_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */
#define EXT4_FEATURE_INCOMPAT_64BIT 0x0080 #define EXT4_FEATURE_INCOMPAT_64BIT 0x0080
#define EXT4_FEATURE_INCOMPAT_MMP 0x0100
#define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200 #define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200
#define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR #define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR
@ -696,7 +758,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \ EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \ EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \
EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \ EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
EXT4_FEATURE_RO_COMPAT_BTREE_DIR) EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\
EXT4_FEATURE_RO_COMPAT_HUGE_FILE)
/* /*
* Default values for user and/or group using reserved blocks * Default values for user and/or group using reserved blocks
@ -767,6 +830,26 @@ struct ext4_dir_entry_2 {
#define EXT4_DIR_ROUND (EXT4_DIR_PAD - 1) #define EXT4_DIR_ROUND (EXT4_DIR_PAD - 1)
#define EXT4_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT4_DIR_ROUND) & \ #define EXT4_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT4_DIR_ROUND) & \
~EXT4_DIR_ROUND) ~EXT4_DIR_ROUND)
#define EXT4_MAX_REC_LEN ((1<<16)-1)
static inline unsigned ext4_rec_len_from_disk(__le16 dlen)
{
unsigned len = le16_to_cpu(dlen);
if (len == EXT4_MAX_REC_LEN)
return 1 << 16;
return len;
}
static inline __le16 ext4_rec_len_to_disk(unsigned len)
{
if (len == (1 << 16))
return cpu_to_le16(EXT4_MAX_REC_LEN);
else if (len > (1 << 16))
BUG();
return cpu_to_le16(len);
}
/* /*
* Hash Tree Directory indexing * Hash Tree Directory indexing
* (c) Daniel Phillips, 2001 * (c) Daniel Phillips, 2001
@ -810,7 +893,7 @@ struct ext4_iloc
{ {
struct buffer_head *bh; struct buffer_head *bh;
unsigned long offset; unsigned long offset;
unsigned long block_group; ext4_group_t block_group;
}; };
static inline struct ext4_inode *ext4_raw_inode(struct ext4_iloc *iloc) static inline struct ext4_inode *ext4_raw_inode(struct ext4_iloc *iloc)
@ -835,7 +918,7 @@ struct dir_private_info {
/* calculate the first block number of the group */ /* calculate the first block number of the group */
static inline ext4_fsblk_t static inline ext4_fsblk_t
ext4_group_first_block_no(struct super_block *sb, unsigned long group_no) ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no)
{ {
return group_no * (ext4_fsblk_t)EXT4_BLOCKS_PER_GROUP(sb) + return group_no * (ext4_fsblk_t)EXT4_BLOCKS_PER_GROUP(sb) +
le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
@ -866,21 +949,24 @@ extern unsigned int ext4_block_group(struct super_block *sb,
ext4_fsblk_t blocknr); ext4_fsblk_t blocknr);
extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb, extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb,
ext4_fsblk_t blocknr); ext4_fsblk_t blocknr);
extern int ext4_bg_has_super(struct super_block *sb, int group); extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group);
extern unsigned long ext4_bg_num_gdb(struct super_block *sb, int group); extern unsigned long ext4_bg_num_gdb(struct super_block *sb,
ext4_group_t group);
extern ext4_fsblk_t ext4_new_block (handle_t *handle, struct inode *inode, extern ext4_fsblk_t ext4_new_block (handle_t *handle, struct inode *inode,
ext4_fsblk_t goal, int *errp); ext4_fsblk_t goal, int *errp);
extern ext4_fsblk_t ext4_new_blocks (handle_t *handle, struct inode *inode, extern ext4_fsblk_t ext4_new_blocks (handle_t *handle, struct inode *inode,
ext4_fsblk_t goal, unsigned long *count, int *errp); ext4_fsblk_t goal, unsigned long *count, int *errp);
extern ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
ext4_fsblk_t goal, unsigned long *count, int *errp);
extern void ext4_free_blocks (handle_t *handle, struct inode *inode, extern void ext4_free_blocks (handle_t *handle, struct inode *inode,
ext4_fsblk_t block, unsigned long count); ext4_fsblk_t block, unsigned long count, int metadata);
extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb, extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb,
ext4_fsblk_t block, unsigned long count, ext4_fsblk_t block, unsigned long count,
unsigned long *pdquot_freed_blocks); unsigned long *pdquot_freed_blocks);
extern ext4_fsblk_t ext4_count_free_blocks (struct super_block *); extern ext4_fsblk_t ext4_count_free_blocks (struct super_block *);
extern void ext4_check_blocks_bitmap (struct super_block *); extern void ext4_check_blocks_bitmap (struct super_block *);
extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
unsigned int block_group, ext4_group_t block_group,
struct buffer_head ** bh); struct buffer_head ** bh);
extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
extern void ext4_init_block_alloc_info(struct inode *); extern void ext4_init_block_alloc_info(struct inode *);
@ -911,15 +997,32 @@ extern unsigned long ext4_count_dirs (struct super_block *);
extern void ext4_check_inodes_bitmap (struct super_block *); extern void ext4_check_inodes_bitmap (struct super_block *);
extern unsigned long ext4_count_free (struct buffer_head *, unsigned); extern unsigned long ext4_count_free (struct buffer_head *, unsigned);
/* mballoc.c */
extern long ext4_mb_stats;
extern long ext4_mb_max_to_scan;
extern int ext4_mb_init(struct super_block *, int);
extern int ext4_mb_release(struct super_block *);
extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
struct ext4_allocation_request *, int *);
extern int ext4_mb_reserve_blocks(struct super_block *, int);
extern void ext4_mb_discard_inode_preallocations(struct inode *);
extern int __init init_ext4_mballoc(void);
extern void exit_ext4_mballoc(void);
extern void ext4_mb_free_blocks(handle_t *, struct inode *,
unsigned long, unsigned long, int, unsigned long *);
/* inode.c */ /* inode.c */
int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
struct buffer_head *bh, ext4_fsblk_t blocknr); struct buffer_head *bh, ext4_fsblk_t blocknr);
struct buffer_head * ext4_getblk (handle_t *, struct inode *, long, int, int *); struct buffer_head *ext4_getblk(handle_t *, struct inode *,
struct buffer_head * ext4_bread (handle_t *, struct inode *, int, int, int *); ext4_lblk_t, int, int *);
struct buffer_head *ext4_bread(handle_t *, struct inode *,
ext4_lblk_t, int, int *);
int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
sector_t iblock, unsigned long maxblocks, struct buffer_head *bh_result, ext4_lblk_t iblock, unsigned long maxblocks,
int create, int extend_disksize); struct buffer_head *bh_result,
int create, int extend_disksize);
extern void ext4_read_inode (struct inode *); extern void ext4_read_inode (struct inode *);
extern int ext4_write_inode (struct inode *, int); extern int ext4_write_inode (struct inode *, int);
@ -943,6 +1046,9 @@ extern int ext4_ioctl (struct inode *, struct file *, unsigned int,
unsigned long); unsigned long);
extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long); extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long);
/* migrate.c */
extern int ext4_ext_migrate(struct inode *, struct file *, unsigned int,
unsigned long);
/* namei.c */ /* namei.c */
extern int ext4_orphan_add(handle_t *, struct inode *); extern int ext4_orphan_add(handle_t *, struct inode *);
extern int ext4_orphan_del(handle_t *, struct inode *); extern int ext4_orphan_del(handle_t *, struct inode *);
@ -965,6 +1071,12 @@ extern void ext4_abort (struct super_block *, const char *, const char *, ...)
extern void ext4_warning (struct super_block *, const char *, const char *, ...) extern void ext4_warning (struct super_block *, const char *, const char *, ...)
__attribute__ ((format (printf, 3, 4))); __attribute__ ((format (printf, 3, 4)));
extern void ext4_update_dynamic_rev (struct super_block *sb); extern void ext4_update_dynamic_rev (struct super_block *sb);
extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb,
__u32 compat);
extern int ext4_update_rocompat_feature(handle_t *handle,
struct super_block *sb, __u32 rocompat);
extern int ext4_update_incompat_feature(handle_t *handle,
struct super_block *sb, __u32 incompat);
extern ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, extern ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
struct ext4_group_desc *bg); struct ext4_group_desc *bg);
extern ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, extern ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
@ -1017,6 +1129,29 @@ static inline void ext4_r_blocks_count_set(struct ext4_super_block *es,
es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32); es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32);
} }
static inline loff_t ext4_isize(struct ext4_inode *raw_inode)
{
return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) |
le32_to_cpu(raw_inode->i_size_lo);
}
static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size)
{
raw_inode->i_size_lo = cpu_to_le32(i_size);
raw_inode->i_size_high = cpu_to_le32(i_size >> 32);
}
static inline
struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
ext4_group_t group)
{
struct ext4_group_info ***grp_info;
long indexv, indexh;
grp_info = EXT4_SB(sb)->s_group_info;
indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb));
indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1);
return grp_info[indexv][indexh];
}
#define ext4_std_error(sb, errno) \ #define ext4_std_error(sb, errno) \
@ -1048,7 +1183,7 @@ extern const struct inode_operations ext4_fast_symlink_inode_operations;
extern int ext4_ext_tree_init(handle_t *handle, struct inode *); extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
extern int ext4_ext_writepage_trans_blocks(struct inode *, int); extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
ext4_fsblk_t iblock, ext4_lblk_t iblock,
unsigned long max_blocks, struct buffer_head *bh_result, unsigned long max_blocks, struct buffer_head *bh_result,
int create, int extend_disksize); int create, int extend_disksize);
extern void ext4_ext_truncate(struct inode *, struct page *); extern void ext4_ext_truncate(struct inode *, struct page *);
@ -1056,19 +1191,10 @@ extern void ext4_ext_init(struct super_block *);
extern void ext4_ext_release(struct super_block *); extern void ext4_ext_release(struct super_block *);
extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
loff_t len); loff_t len);
static inline int extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode,
ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, sector_t block, unsigned long max_blocks,
unsigned long max_blocks, struct buffer_head *bh, struct buffer_head *bh, int create,
int create, int extend_disksize) int extend_disksize);
{
if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
return ext4_ext_get_blocks(handle, inode, block, max_blocks,
bh, create, extend_disksize);
return ext4_get_blocks_handle(handle, inode, block, max_blocks, bh,
create, extend_disksize);
}
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
#endif /* _LINUX_EXT4_FS_H */ #endif /* _LINUX_EXT4_FS_H */

Просмотреть файл

@ -124,20 +124,6 @@ struct ext4_ext_path {
#define EXT4_EXT_CACHE_GAP 1 #define EXT4_EXT_CACHE_GAP 1
#define EXT4_EXT_CACHE_EXTENT 2 #define EXT4_EXT_CACHE_EXTENT 2
/*
* to be called by ext4_ext_walk_space()
* negative retcode - error
* positive retcode - signal for ext4_ext_walk_space(), see below
* callback must return valid extent (passed or newly created)
*/
typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *,
struct ext4_ext_cache *,
void *);
#define EXT_CONTINUE 0
#define EXT_BREAK 1
#define EXT_REPEAT 2
#define EXT_MAX_BLOCK 0xffffffff #define EXT_MAX_BLOCK 0xffffffff
@ -226,6 +212,8 @@ static inline int ext4_ext_get_actual_len(struct ext4_extent *ext)
(le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN)); (le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN));
} }
extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
extern int ext4_extent_tree_init(handle_t *, struct inode *); extern int ext4_extent_tree_init(handle_t *, struct inode *);
extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *); extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *);
extern int ext4_ext_try_to_merge(struct inode *inode, extern int ext4_ext_try_to_merge(struct inode *inode,
@ -233,8 +221,11 @@ extern int ext4_ext_try_to_merge(struct inode *inode,
struct ext4_extent *); struct ext4_extent *);
extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *); extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *);
extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *); extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *);
extern int ext4_ext_walk_space(struct inode *, unsigned long, unsigned long, ext_prepare_callback, void *); extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
extern struct ext4_ext_path * ext4_ext_find_extent(struct inode *, int, struct ext4_ext_path *); struct ext4_ext_path *);
extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *,
ext4_lblk_t *, ext4_fsblk_t *);
extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *,
ext4_lblk_t *, ext4_fsblk_t *);
#endif /* _LINUX_EXT4_EXTENTS */ #endif /* _LINUX_EXT4_EXTENTS */

Просмотреть файл

@ -27,6 +27,12 @@ typedef int ext4_grpblk_t;
/* data type for filesystem-wide blocks number */ /* data type for filesystem-wide blocks number */
typedef unsigned long long ext4_fsblk_t; typedef unsigned long long ext4_fsblk_t;
/* data type for file logical block number */
typedef __u32 ext4_lblk_t;
/* data type for block group number */
typedef unsigned long ext4_group_t;
struct ext4_reserve_window { struct ext4_reserve_window {
ext4_fsblk_t _rsv_start; /* First byte reserved */ ext4_fsblk_t _rsv_start; /* First byte reserved */
ext4_fsblk_t _rsv_end; /* Last byte reserved or 0 */ ext4_fsblk_t _rsv_end; /* Last byte reserved or 0 */
@ -48,7 +54,7 @@ struct ext4_block_alloc_info {
* most-recently-allocated block in this file. * most-recently-allocated block in this file.
* We use this for detecting linearly ascending allocation requests. * We use this for detecting linearly ascending allocation requests.
*/ */
__u32 last_alloc_logical_block; ext4_lblk_t last_alloc_logical_block;
/* /*
* Was i_next_alloc_goal in ext4_inode_info * Was i_next_alloc_goal in ext4_inode_info
* is the *physical* companion to i_next_alloc_block. * is the *physical* companion to i_next_alloc_block.
@ -67,7 +73,7 @@ struct ext4_block_alloc_info {
*/ */
struct ext4_ext_cache { struct ext4_ext_cache {
ext4_fsblk_t ec_start; ext4_fsblk_t ec_start;
__u32 ec_block; ext4_lblk_t ec_block;
__u32 ec_len; /* must be 32bit to return holes */ __u32 ec_len; /* must be 32bit to return holes */
__u32 ec_type; __u32 ec_type;
}; };
@ -79,7 +85,6 @@ struct ext4_inode_info {
__le32 i_data[15]; /* unconverted */ __le32 i_data[15]; /* unconverted */
__u32 i_flags; __u32 i_flags;
ext4_fsblk_t i_file_acl; ext4_fsblk_t i_file_acl;
__u32 i_dir_acl;
__u32 i_dtime; __u32 i_dtime;
/* /*
@ -89,13 +94,13 @@ struct ext4_inode_info {
* place a file's data blocks near its inode block, and new inodes * place a file's data blocks near its inode block, and new inodes
* near to their parent directory's inode. * near to their parent directory's inode.
*/ */
__u32 i_block_group; ext4_group_t i_block_group;
__u32 i_state; /* Dynamic state flags for ext4 */ __u32 i_state; /* Dynamic state flags for ext4 */
/* block reservation info */ /* block reservation info */
struct ext4_block_alloc_info *i_block_alloc_info; struct ext4_block_alloc_info *i_block_alloc_info;
__u32 i_dir_start_lookup; ext4_lblk_t i_dir_start_lookup;
#ifdef CONFIG_EXT4DEV_FS_XATTR #ifdef CONFIG_EXT4DEV_FS_XATTR
/* /*
* Extended attributes can be read independently of the main file * Extended attributes can be read independently of the main file
@ -134,16 +139,16 @@ struct ext4_inode_info {
__u16 i_extra_isize; __u16 i_extra_isize;
/* /*
* truncate_mutex is for serialising ext4_truncate() against * i_data_sem is for serialising ext4_truncate() against
* ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's
* data tree are chopped off during truncate. We can't do that in * data tree are chopped off during truncate. We can't do that in
* ext4 because whenever we perform intermediate commits during * ext4 because whenever we perform intermediate commits during
* truncate, the inode and all the metadata blocks *must* be in a * truncate, the inode and all the metadata blocks *must* be in a
* consistent state which allows truncation of the orphans to restart * consistent state which allows truncation of the orphans to restart
* during recovery. Hence we must fix the get_block-vs-truncate race * during recovery. Hence we must fix the get_block-vs-truncate race
* by other means, so we have truncate_mutex. * by other means, so we have i_data_sem.
*/ */
struct mutex truncate_mutex; struct rw_semaphore i_data_sem;
struct inode vfs_inode; struct inode vfs_inode;
unsigned long i_ext_generation; unsigned long i_ext_generation;
@ -153,6 +158,10 @@ struct ext4_inode_info {
* struct timespec i_{a,c,m}time in the generic inode. * struct timespec i_{a,c,m}time in the generic inode.
*/ */
struct timespec i_crtime; struct timespec i_crtime;
/* mballoc */
struct list_head i_prealloc_list;
spinlock_t i_prealloc_lock;
}; };
#endif /* _LINUX_EXT4_FS_I */ #endif /* _LINUX_EXT4_FS_I */

Просмотреть файл

@ -35,9 +35,10 @@ struct ext4_sb_info {
unsigned long s_itb_per_group; /* Number of inode table blocks per group */ unsigned long s_itb_per_group; /* Number of inode table blocks per group */
unsigned long s_gdb_count; /* Number of group descriptor blocks */ unsigned long s_gdb_count; /* Number of group descriptor blocks */
unsigned long s_desc_per_block; /* Number of group descriptors per block */ unsigned long s_desc_per_block; /* Number of group descriptors per block */
unsigned long s_groups_count; /* Number of groups in the fs */ ext4_group_t s_groups_count; /* Number of groups in the fs */
unsigned long s_overhead_last; /* Last calculated overhead */ unsigned long s_overhead_last; /* Last calculated overhead */
unsigned long s_blocks_last; /* Last seen block count */ unsigned long s_blocks_last; /* Last seen block count */
loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
struct buffer_head * s_sbh; /* Buffer containing the super block */ struct buffer_head * s_sbh; /* Buffer containing the super block */
struct ext4_super_block * s_es; /* Pointer to the super block in the buffer */ struct ext4_super_block * s_es; /* Pointer to the super block in the buffer */
struct buffer_head ** s_group_desc; struct buffer_head ** s_group_desc;
@ -90,6 +91,58 @@ struct ext4_sb_info {
unsigned long s_ext_blocks; unsigned long s_ext_blocks;
unsigned long s_ext_extents; unsigned long s_ext_extents;
#endif #endif
/* for buddy allocator */
struct ext4_group_info ***s_group_info;
struct inode *s_buddy_cache;
long s_blocks_reserved;
spinlock_t s_reserve_lock;
struct list_head s_active_transaction;
struct list_head s_closed_transaction;
struct list_head s_committed_transaction;
spinlock_t s_md_lock;
tid_t s_last_transaction;
unsigned short *s_mb_offsets, *s_mb_maxs;
/* tunables */
unsigned long s_stripe;
unsigned long s_mb_stream_request;
unsigned long s_mb_max_to_scan;
unsigned long s_mb_min_to_scan;
unsigned long s_mb_stats;
unsigned long s_mb_order2_reqs;
unsigned long s_mb_group_prealloc;
/* where last allocation was done - for stream allocation */
unsigned long s_mb_last_group;
unsigned long s_mb_last_start;
/* history to debug policy */
struct ext4_mb_history *s_mb_history;
int s_mb_history_cur;
int s_mb_history_max;
int s_mb_history_num;
struct proc_dir_entry *s_mb_proc;
spinlock_t s_mb_history_lock;
int s_mb_history_filter;
/* stats for buddy allocator */
spinlock_t s_mb_pa_lock;
atomic_t s_bal_reqs; /* number of reqs with len > 1 */
atomic_t s_bal_success; /* we found long enough chunks */
atomic_t s_bal_allocated; /* in blocks */
atomic_t s_bal_ex_scanned; /* total extents scanned */
atomic_t s_bal_goals; /* goal hits */
atomic_t s_bal_breaks; /* too long searches */
atomic_t s_bal_2orders; /* 2^order hits */
spinlock_t s_bal_lock;
unsigned long s_mb_buddies_generated;
unsigned long long s_mb_generation_time;
atomic_t s_mb_lost_chunks;
atomic_t s_mb_preallocated;
atomic_t s_mb_discarded;
/* locality groups */
struct ext4_locality_group *s_locality_groups;
}; };
#endif /* _LINUX_EXT4_FS_SB */ #endif /* _LINUX_EXT4_FS_SB */

Просмотреть файл

@ -124,6 +124,7 @@ extern int dir_notify_enable;
#define MS_SHARED (1<<20) /* change to shared */ #define MS_SHARED (1<<20) /* change to shared */
#define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */ #define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */
#define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */ #define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */
#define MS_I_VERSION (1<<23) /* Update inode I_version field */
#define MS_ACTIVE (1<<30) #define MS_ACTIVE (1<<30)
#define MS_NOUSER (1<<31) #define MS_NOUSER (1<<31)
@ -173,6 +174,7 @@ extern int dir_notify_enable;
((inode)->i_flags & (S_SYNC|S_DIRSYNC))) ((inode)->i_flags & (S_SYNC|S_DIRSYNC)))
#define IS_MANDLOCK(inode) __IS_FLG(inode, MS_MANDLOCK) #define IS_MANDLOCK(inode) __IS_FLG(inode, MS_MANDLOCK)
#define IS_NOATIME(inode) __IS_FLG(inode, MS_RDONLY|MS_NOATIME) #define IS_NOATIME(inode) __IS_FLG(inode, MS_RDONLY|MS_NOATIME)
#define IS_I_VERSION(inode) __IS_FLG(inode, MS_I_VERSION)
#define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA) #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA)
#define IS_APPEND(inode) ((inode)->i_flags & S_APPEND) #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND)
@ -599,7 +601,7 @@ struct inode {
uid_t i_uid; uid_t i_uid;
gid_t i_gid; gid_t i_gid;
dev_t i_rdev; dev_t i_rdev;
unsigned long i_version; u64 i_version;
loff_t i_size; loff_t i_size;
#ifdef __NEED_I_SIZE_ORDERED #ifdef __NEED_I_SIZE_ORDERED
seqcount_t i_size_seqcount; seqcount_t i_size_seqcount;
@ -1394,6 +1396,21 @@ static inline void inode_dec_link_count(struct inode *inode)
mark_inode_dirty(inode); mark_inode_dirty(inode);
} }
/**
* inode_inc_iversion - increments i_version
* @inode: inode that need to be updated
*
* Every time the inode is modified, the i_version field will be incremented.
* The filesystem has to be mounted with i_version flag
*/
static inline void inode_inc_iversion(struct inode *inode)
{
spin_lock(&inode->i_lock);
inode->i_version++;
spin_unlock(&inode->i_lock);
}
extern void touch_atime(struct vfsmount *mnt, struct dentry *dentry); extern void touch_atime(struct vfsmount *mnt, struct dentry *dentry);
static inline void file_accessed(struct file *file) static inline void file_accessed(struct file *file)
{ {

Просмотреть файл

@ -149,6 +149,28 @@ typedef struct journal_header_s
__be32 h_sequence; __be32 h_sequence;
} journal_header_t; } journal_header_t;
/*
* Checksum types.
*/
#define JBD2_CRC32_CHKSUM 1
#define JBD2_MD5_CHKSUM 2
#define JBD2_SHA1_CHKSUM 3
#define JBD2_CRC32_CHKSUM_SIZE 4
#define JBD2_CHECKSUM_BYTES (32 / sizeof(u32))
/*
* Commit block header for storing transactional checksums:
*/
struct commit_header {
__be32 h_magic;
__be32 h_blocktype;
__be32 h_sequence;
unsigned char h_chksum_type;
unsigned char h_chksum_size;
unsigned char h_padding[2];
__be32 h_chksum[JBD2_CHECKSUM_BYTES];
};
/* /*
* The block tag: used to describe a single buffer in the journal. * The block tag: used to describe a single buffer in the journal.
@ -242,31 +264,25 @@ typedef struct journal_superblock_s
((j)->j_format_version >= 2 && \ ((j)->j_format_version >= 2 && \
((j)->j_superblock->s_feature_incompat & cpu_to_be32((mask)))) ((j)->j_superblock->s_feature_incompat & cpu_to_be32((mask))))
#define JBD2_FEATURE_INCOMPAT_REVOKE 0x00000001 #define JBD2_FEATURE_COMPAT_CHECKSUM 0x00000001
#define JBD2_FEATURE_INCOMPAT_64BIT 0x00000002
#define JBD2_FEATURE_INCOMPAT_REVOKE 0x00000001
#define JBD2_FEATURE_INCOMPAT_64BIT 0x00000002
#define JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT 0x00000004
/* Features known to this kernel version: */ /* Features known to this kernel version: */
#define JBD2_KNOWN_COMPAT_FEATURES 0 #define JBD2_KNOWN_COMPAT_FEATURES JBD2_FEATURE_COMPAT_CHECKSUM
#define JBD2_KNOWN_ROCOMPAT_FEATURES 0 #define JBD2_KNOWN_ROCOMPAT_FEATURES 0
#define JBD2_KNOWN_INCOMPAT_FEATURES (JBD2_FEATURE_INCOMPAT_REVOKE | \ #define JBD2_KNOWN_INCOMPAT_FEATURES (JBD2_FEATURE_INCOMPAT_REVOKE | \
JBD2_FEATURE_INCOMPAT_64BIT) JBD2_FEATURE_INCOMPAT_64BIT | \
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)
#ifdef __KERNEL__ #ifdef __KERNEL__
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/sched.h> #include <linux/sched.h>
#define JBD2_ASSERTIONS #define J_ASSERT(assert) BUG_ON(!(assert))
#ifdef JBD2_ASSERTIONS
#define J_ASSERT(assert) \
do { \
if (!(assert)) { \
printk (KERN_EMERG \
"Assertion failure in %s() at %s:%d: \"%s\"\n", \
__FUNCTION__, __FILE__, __LINE__, # assert); \
BUG(); \
} \
} while (0)
#if defined(CONFIG_BUFFER_DEBUG) #if defined(CONFIG_BUFFER_DEBUG)
void buffer_assertion_failure(struct buffer_head *bh); void buffer_assertion_failure(struct buffer_head *bh);
@ -282,10 +298,6 @@ void buffer_assertion_failure(struct buffer_head *bh);
#define J_ASSERT_JH(jh, expr) J_ASSERT(expr) #define J_ASSERT_JH(jh, expr) J_ASSERT(expr)
#endif #endif
#else
#define J_ASSERT(assert) do { } while (0)
#endif /* JBD2_ASSERTIONS */
#if defined(JBD2_PARANOID_IOFAIL) #if defined(JBD2_PARANOID_IOFAIL)
#define J_EXPECT(expr, why...) J_ASSERT(expr) #define J_EXPECT(expr, why...) J_ASSERT(expr)
#define J_EXPECT_BH(bh, expr, why...) J_ASSERT_BH(bh, expr) #define J_EXPECT_BH(bh, expr, why...) J_ASSERT_BH(bh, expr)
@ -406,9 +418,23 @@ struct handle_s
unsigned int h_sync: 1; /* sync-on-close */ unsigned int h_sync: 1; /* sync-on-close */
unsigned int h_jdata: 1; /* force data journaling */ unsigned int h_jdata: 1; /* force data journaling */
unsigned int h_aborted: 1; /* fatal error on handle */ unsigned int h_aborted: 1; /* fatal error on handle */
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map h_lockdep_map;
#endif
}; };
/*
* Some stats for checkpoint phase
*/
struct transaction_chp_stats_s {
unsigned long cs_chp_time;
unsigned long cs_forced_to_close;
unsigned long cs_written;
unsigned long cs_dropped;
};
/* The transaction_t type is the guts of the journaling mechanism. It /* The transaction_t type is the guts of the journaling mechanism. It
* tracks a compound transaction through its various states: * tracks a compound transaction through its various states:
* *
@ -456,6 +482,8 @@ struct transaction_s
/* /*
* Transaction's current state * Transaction's current state
* [no locking - only kjournald2 alters this] * [no locking - only kjournald2 alters this]
* [j_list_lock] guards transition of a transaction into T_FINISHED
* state and subsequent call of __jbd2_journal_drop_transaction()
* FIXME: needs barriers * FIXME: needs barriers
* KLUDGE: [use j_state_lock] * KLUDGE: [use j_state_lock]
*/ */
@ -543,6 +571,21 @@ struct transaction_s
*/ */
spinlock_t t_handle_lock; spinlock_t t_handle_lock;
/*
* Longest time some handle had to wait for running transaction
*/
unsigned long t_max_wait;
/*
* When transaction started
*/
unsigned long t_start;
/*
* Checkpointing stats [j_checkpoint_sem]
*/
struct transaction_chp_stats_s t_chp_stats;
/* /*
* Number of outstanding updates running on this transaction * Number of outstanding updates running on this transaction
* [t_handle_lock] * [t_handle_lock]
@ -574,6 +617,39 @@ struct transaction_s
}; };
struct transaction_run_stats_s {
unsigned long rs_wait;
unsigned long rs_running;
unsigned long rs_locked;
unsigned long rs_flushing;
unsigned long rs_logging;
unsigned long rs_handle_count;
unsigned long rs_blocks;
unsigned long rs_blocks_logged;
};
struct transaction_stats_s {
int ts_type;
unsigned long ts_tid;
union {
struct transaction_run_stats_s run;
struct transaction_chp_stats_s chp;
} u;
};
#define JBD2_STATS_RUN 1
#define JBD2_STATS_CHECKPOINT 2
static inline unsigned long
jbd2_time_diff(unsigned long start, unsigned long end)
{
if (end >= start)
return end - start;
return end + (MAX_JIFFY_OFFSET - start);
}
/** /**
* struct journal_s - The journal_s type is the concrete type associated with * struct journal_s - The journal_s type is the concrete type associated with
* journal_t. * journal_t.
@ -635,6 +711,12 @@ struct transaction_s
* @j_wbufsize: maximum number of buffer_heads allowed in j_wbuf, the * @j_wbufsize: maximum number of buffer_heads allowed in j_wbuf, the
* number that will fit in j_blocksize * number that will fit in j_blocksize
* @j_last_sync_writer: most recent pid which did a synchronous write * @j_last_sync_writer: most recent pid which did a synchronous write
* @j_history: Buffer storing the transactions statistics history
* @j_history_max: Maximum number of transactions in the statistics history
* @j_history_cur: Current number of transactions in the statistics history
* @j_history_lock: Protect the transactions statistics history
* @j_proc_entry: procfs entry for the jbd statistics directory
* @j_stats: Overall statistics
* @j_private: An opaque pointer to fs-private information. * @j_private: An opaque pointer to fs-private information.
*/ */
@ -826,6 +908,19 @@ struct journal_s
pid_t j_last_sync_writer; pid_t j_last_sync_writer;
/*
* Journal statistics
*/
struct transaction_stats_s *j_history;
int j_history_max;
int j_history_cur;
/*
* Protect the transactions statistics history
*/
spinlock_t j_history_lock;
struct proc_dir_entry *j_proc_entry;
struct transaction_stats_s j_stats;
/* /*
* An opaque pointer to fs-private information. ext3 puts its * An opaque pointer to fs-private information. ext3 puts its
* superblock pointer here * superblock pointer here
@ -932,6 +1027,8 @@ extern int jbd2_journal_check_available_features
(journal_t *, unsigned long, unsigned long, unsigned long); (journal_t *, unsigned long, unsigned long, unsigned long);
extern int jbd2_journal_set_features extern int jbd2_journal_set_features
(journal_t *, unsigned long, unsigned long, unsigned long); (journal_t *, unsigned long, unsigned long, unsigned long);
extern void jbd2_journal_clear_features
(journal_t *, unsigned long, unsigned long, unsigned long);
extern int jbd2_journal_create (journal_t *); extern int jbd2_journal_create (journal_t *);
extern int jbd2_journal_load (journal_t *journal); extern int jbd2_journal_load (journal_t *journal);
extern void jbd2_journal_destroy (journal_t *); extern void jbd2_journal_destroy (journal_t *);

Просмотреть файл

@ -178,4 +178,47 @@ found_middle_swap:
EXPORT_SYMBOL(generic_find_next_zero_le_bit); EXPORT_SYMBOL(generic_find_next_zero_le_bit);
unsigned long generic_find_next_le_bit(const unsigned long *addr, unsigned
long size, unsigned long offset)
{
const unsigned long *p = addr + BITOP_WORD(offset);
unsigned long result = offset & ~(BITS_PER_LONG - 1);
unsigned long tmp;
if (offset >= size)
return size;
size -= result;
offset &= (BITS_PER_LONG - 1UL);
if (offset) {
tmp = ext2_swabp(p++);
tmp &= (~0UL << offset);
if (size < BITS_PER_LONG)
goto found_first;
if (tmp)
goto found_middle;
size -= BITS_PER_LONG;
result += BITS_PER_LONG;
}
while (size & ~(BITS_PER_LONG - 1)) {
tmp = *(p++);
if (tmp)
goto found_middle_swap;
result += BITS_PER_LONG;
size -= BITS_PER_LONG;
}
if (!size)
return result;
tmp = ext2_swabp(p);
found_first:
tmp &= (~0UL >> (BITS_PER_LONG - size));
if (tmp == 0UL) /* Are any bits set? */
return result + size; /* Nope. */
found_middle:
return result + __ffs(tmp);
found_middle_swap:
return result + __ffs(ext2_swab(tmp));
}
EXPORT_SYMBOL(generic_find_next_le_bit);
#endif /* __BIG_ENDIAN */ #endif /* __BIG_ENDIAN */