ext4: implementation of a new ioctl called EXT4_IOC_SWAP_BOOT
Add a new ioctl, EXT4_IOC_SWAP_BOOT which swaps i_blocks and associated attributes (like i_blocks, i_size, i_flags, ...) from the specified inode with inode EXT4_BOOT_LOADER_INO (#5). This is typically used to store a boot loader in a secure part of the filesystem, where it can't be changed by a normal user by accident. The data blocks of the previous boot loader will be associated with the given inode. This usercode program is a simple example of the usage: int main(int argc, char *argv[]) { int fd; int err; if ( argc != 2 ) { printf("usage: ext4-swap-boot-inode FILE-TO-SWAP\n"); exit(1); } fd = open(argv[1], O_WRONLY); if ( fd < 0 ) { perror("open"); exit(1); } err = ioctl(fd, EXT4_IOC_SWAP_BOOT); if ( err < 0 ) { perror("ioctl"); exit(1); } close(fd); exit(0); } [ Modified by Theodore Ts'o to fix a number of bugs in the original code.] Signed-off-by: Dr. Tilmann Bubeck <t.bubeck@reinform.de> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
This commit is contained in:
Родитель
f78ee70db4
Коммит
393d1d1d76
|
@ -587,6 +587,16 @@ Table of Ext4 specific ioctls
|
|||
bitmaps and inode table, the userspace tool thus
|
||||
just passes the new number of blocks.
|
||||
|
||||
EXT4_IOC_SWAP_BOOT Swap i_blocks and associated attributes
|
||||
(like i_blocks, i_size, i_flags, ...) from
|
||||
the specified inode with inode
|
||||
EXT4_BOOT_LOADER_INO (#5). This is typically
|
||||
used to store a boot loader in a secure part of
|
||||
the filesystem, where it can't be changed by a
|
||||
normal user by accident.
|
||||
The data blocks of the previous boot loader
|
||||
will be associated with the given inode.
|
||||
|
||||
..............................................................................
|
||||
|
||||
References
|
||||
|
|
|
@ -616,6 +616,7 @@ enum {
|
|||
#define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12)
|
||||
#define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent)
|
||||
#define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64)
|
||||
#define EXT4_IOC_SWAP_BOOT _IO('f', 17)
|
||||
|
||||
#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
|
||||
/*
|
||||
|
@ -1341,6 +1342,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
|
|||
return ino == EXT4_ROOT_INO ||
|
||||
ino == EXT4_USR_QUOTA_INO ||
|
||||
ino == EXT4_GRP_QUOTA_INO ||
|
||||
ino == EXT4_BOOT_LOADER_INO ||
|
||||
ino == EXT4_JOURNAL_INO ||
|
||||
ino == EXT4_RESIZE_INO ||
|
||||
(ino >= EXT4_FIRST_INO(sb) &&
|
||||
|
@ -2624,6 +2626,12 @@ extern int ext4_ind_migrate(struct inode *inode);
|
|||
|
||||
|
||||
/* move_extent.c */
|
||||
extern void ext4_double_down_write_data_sem(struct inode *first,
|
||||
struct inode *second);
|
||||
extern void ext4_double_up_write_data_sem(struct inode *orig_inode,
|
||||
struct inode *donor_inode);
|
||||
void ext4_inode_double_lock(struct inode *inode1, struct inode *inode2);
|
||||
void ext4_inode_double_unlock(struct inode *inode1, struct inode *inode2);
|
||||
extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
|
||||
__u64 start_orig, __u64 start_donor,
|
||||
__u64 len, __u64 *moved_len);
|
||||
|
|
|
@ -4191,8 +4191,9 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
|
|||
* NeilBrown 1999oct15
|
||||
*/
|
||||
if (inode->i_nlink == 0) {
|
||||
if (inode->i_mode == 0 ||
|
||||
!(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) {
|
||||
if ((inode->i_mode == 0 ||
|
||||
!(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) &&
|
||||
ino != EXT4_BOOT_LOADER_INO) {
|
||||
/* this inode is deleted */
|
||||
ret = -ESTALE;
|
||||
goto bad_inode;
|
||||
|
@ -4200,7 +4201,9 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
|
|||
/* The only unlinked inodes we let through here have
|
||||
* valid i_mode and are being read by the orphan
|
||||
* recovery code: that's fine, we're about to complete
|
||||
* the process of deleting those. */
|
||||
* the process of deleting those.
|
||||
* OR it is the EXT4_BOOT_LOADER_INO which is
|
||||
* not initialized on a new filesystem. */
|
||||
}
|
||||
ei->i_flags = le32_to_cpu(raw_inode->i_flags);
|
||||
inode->i_blocks = ext4_inode_blocks(raw_inode, ei);
|
||||
|
@ -4320,6 +4323,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
|
|||
else
|
||||
init_special_inode(inode, inode->i_mode,
|
||||
new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
|
||||
} else if (ino == EXT4_BOOT_LOADER_INO) {
|
||||
make_bad_inode(inode);
|
||||
} else {
|
||||
ret = -EIO;
|
||||
EXT4_ERROR_INODE(inode, "bogus i_mode (%o)", inode->i_mode);
|
||||
|
|
197
fs/ext4/ioctl.c
197
fs/ext4/ioctl.c
|
@ -17,9 +17,201 @@
|
|||
#include <asm/uaccess.h>
|
||||
#include "ext4_jbd2.h"
|
||||
#include "ext4.h"
|
||||
#include "ext4_extents.h"
|
||||
|
||||
#define MAX_32_NUM ((((unsigned long long) 1) << 32) - 1)
|
||||
|
||||
/**
|
||||
* Swap memory between @a and @b for @len bytes.
|
||||
*
|
||||
* @a: pointer to first memory area
|
||||
* @b: pointer to second memory area
|
||||
* @len: number of bytes to swap
|
||||
*
|
||||
*/
|
||||
static void memswap(void *a, void *b, size_t len)
|
||||
{
|
||||
unsigned char *ap, *bp;
|
||||
unsigned char tmp;
|
||||
|
||||
ap = (unsigned char *)a;
|
||||
bp = (unsigned char *)b;
|
||||
while (len-- > 0) {
|
||||
tmp = *ap;
|
||||
*ap = *bp;
|
||||
*bp = tmp;
|
||||
ap++;
|
||||
bp++;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Swap i_data and associated attributes between @inode1 and @inode2.
|
||||
* This function is used for the primary swap between inode1 and inode2
|
||||
* and also to revert this primary swap in case of errors.
|
||||
*
|
||||
* Therefore you have to make sure, that calling this method twice
|
||||
* will revert all changes.
|
||||
*
|
||||
* @inode1: pointer to first inode
|
||||
* @inode2: pointer to second inode
|
||||
*/
|
||||
static void swap_inode_data(struct inode *inode1, struct inode *inode2)
|
||||
{
|
||||
loff_t isize;
|
||||
struct ext4_inode_info *ei1;
|
||||
struct ext4_inode_info *ei2;
|
||||
|
||||
ei1 = EXT4_I(inode1);
|
||||
ei2 = EXT4_I(inode2);
|
||||
|
||||
memswap(&inode1->i_flags, &inode2->i_flags, sizeof(inode1->i_flags));
|
||||
memswap(&inode1->i_version, &inode2->i_version,
|
||||
sizeof(inode1->i_version));
|
||||
memswap(&inode1->i_blocks, &inode2->i_blocks,
|
||||
sizeof(inode1->i_blocks));
|
||||
memswap(&inode1->i_bytes, &inode2->i_bytes, sizeof(inode1->i_bytes));
|
||||
memswap(&inode1->i_atime, &inode2->i_atime, sizeof(inode1->i_atime));
|
||||
memswap(&inode1->i_mtime, &inode2->i_mtime, sizeof(inode1->i_mtime));
|
||||
|
||||
memswap(ei1->i_data, ei2->i_data, sizeof(ei1->i_data));
|
||||
memswap(&ei1->i_flags, &ei2->i_flags, sizeof(ei1->i_flags));
|
||||
memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize));
|
||||
memswap(&ei1->i_es_tree, &ei2->i_es_tree, sizeof(ei1->i_es_tree));
|
||||
memswap(&ei1->i_es_lru_nr, &ei2->i_es_lru_nr, sizeof(ei1->i_es_lru_nr));
|
||||
|
||||
isize = i_size_read(inode1);
|
||||
i_size_write(inode1, i_size_read(inode2));
|
||||
i_size_write(inode2, isize);
|
||||
}
|
||||
|
||||
/**
|
||||
* Swap the information from the given @inode and the inode
|
||||
* EXT4_BOOT_LOADER_INO. It will basically swap i_data and all other
|
||||
* important fields of the inodes.
|
||||
*
|
||||
* @sb: the super block of the filesystem
|
||||
* @inode: the inode to swap with EXT4_BOOT_LOADER_INO
|
||||
*
|
||||
*/
|
||||
static long swap_inode_boot_loader(struct super_block *sb,
|
||||
struct inode *inode)
|
||||
{
|
||||
handle_t *handle;
|
||||
int err;
|
||||
struct inode *inode_bl;
|
||||
struct ext4_inode_info *ei;
|
||||
struct ext4_inode_info *ei_bl;
|
||||
struct ext4_sb_info *sbi;
|
||||
|
||||
if (inode->i_nlink != 1 || !S_ISREG(inode->i_mode)) {
|
||||
err = -EINVAL;
|
||||
goto swap_boot_out;
|
||||
}
|
||||
|
||||
if (!inode_owner_or_capable(inode) || !capable(CAP_SYS_ADMIN)) {
|
||||
err = -EPERM;
|
||||
goto swap_boot_out;
|
||||
}
|
||||
|
||||
sbi = EXT4_SB(sb);
|
||||
ei = EXT4_I(inode);
|
||||
|
||||
inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO);
|
||||
if (IS_ERR(inode_bl)) {
|
||||
err = PTR_ERR(inode_bl);
|
||||
goto swap_boot_out;
|
||||
}
|
||||
ei_bl = EXT4_I(inode_bl);
|
||||
|
||||
filemap_flush(inode->i_mapping);
|
||||
filemap_flush(inode_bl->i_mapping);
|
||||
|
||||
/* Protect orig inodes against a truncate and make sure,
|
||||
* that only 1 swap_inode_boot_loader is running. */
|
||||
ext4_inode_double_lock(inode, inode_bl);
|
||||
|
||||
truncate_inode_pages(&inode->i_data, 0);
|
||||
truncate_inode_pages(&inode_bl->i_data, 0);
|
||||
|
||||
/* Wait for all existing dio workers */
|
||||
ext4_inode_block_unlocked_dio(inode);
|
||||
ext4_inode_block_unlocked_dio(inode_bl);
|
||||
inode_dio_wait(inode);
|
||||
inode_dio_wait(inode_bl);
|
||||
|
||||
handle = ext4_journal_start(inode_bl, EXT4_HT_MOVE_EXTENTS, 2);
|
||||
if (IS_ERR(handle)) {
|
||||
err = -EINVAL;
|
||||
goto swap_boot_out;
|
||||
}
|
||||
|
||||
/* Protect extent tree against block allocations via delalloc */
|
||||
ext4_double_down_write_data_sem(inode, inode_bl);
|
||||
|
||||
if (inode_bl->i_nlink == 0) {
|
||||
/* this inode has never been used as a BOOT_LOADER */
|
||||
set_nlink(inode_bl, 1);
|
||||
i_uid_write(inode_bl, 0);
|
||||
i_gid_write(inode_bl, 0);
|
||||
inode_bl->i_flags = 0;
|
||||
ei_bl->i_flags = 0;
|
||||
inode_bl->i_version = 1;
|
||||
i_size_write(inode_bl, 0);
|
||||
inode_bl->i_mode = S_IFREG;
|
||||
if (EXT4_HAS_INCOMPAT_FEATURE(sb,
|
||||
EXT4_FEATURE_INCOMPAT_EXTENTS)) {
|
||||
ext4_set_inode_flag(inode_bl, EXT4_INODE_EXTENTS);
|
||||
ext4_ext_tree_init(handle, inode_bl);
|
||||
} else
|
||||
memset(ei_bl->i_data, 0, sizeof(ei_bl->i_data));
|
||||
}
|
||||
|
||||
swap_inode_data(inode, inode_bl);
|
||||
|
||||
inode->i_ctime = inode_bl->i_ctime = ext4_current_time(inode);
|
||||
|
||||
spin_lock(&sbi->s_next_gen_lock);
|
||||
inode->i_generation = sbi->s_next_generation++;
|
||||
inode_bl->i_generation = sbi->s_next_generation++;
|
||||
spin_unlock(&sbi->s_next_gen_lock);
|
||||
|
||||
ext4_discard_preallocations(inode);
|
||||
|
||||
err = ext4_mark_inode_dirty(handle, inode);
|
||||
if (err < 0) {
|
||||
ext4_warning(inode->i_sb,
|
||||
"couldn't mark inode #%lu dirty (err %d)",
|
||||
inode->i_ino, err);
|
||||
/* Revert all changes: */
|
||||
swap_inode_data(inode, inode_bl);
|
||||
} else {
|
||||
err = ext4_mark_inode_dirty(handle, inode_bl);
|
||||
if (err < 0) {
|
||||
ext4_warning(inode_bl->i_sb,
|
||||
"couldn't mark inode #%lu dirty (err %d)",
|
||||
inode_bl->i_ino, err);
|
||||
/* Revert all changes: */
|
||||
swap_inode_data(inode, inode_bl);
|
||||
ext4_mark_inode_dirty(handle, inode);
|
||||
}
|
||||
}
|
||||
|
||||
ext4_journal_stop(handle);
|
||||
|
||||
ext4_double_up_write_data_sem(inode, inode_bl);
|
||||
|
||||
ext4_inode_resume_unlocked_dio(inode);
|
||||
ext4_inode_resume_unlocked_dio(inode_bl);
|
||||
|
||||
ext4_inode_double_unlock(inode, inode_bl);
|
||||
|
||||
iput(inode_bl);
|
||||
|
||||
swap_boot_out:
|
||||
return err;
|
||||
}
|
||||
|
||||
long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
struct inode *inode = file_inode(filp);
|
||||
|
@ -353,6 +545,11 @@ group_add_out:
|
|||
return err;
|
||||
}
|
||||
|
||||
case EXT4_IOC_SWAP_BOOT:
|
||||
if (!(filp->f_mode & FMODE_WRITE))
|
||||
return -EBADF;
|
||||
return swap_inode_boot_loader(sb, inode);
|
||||
|
||||
case EXT4_IOC_RESIZE_FS: {
|
||||
ext4_fsblk_t n_blocks_count;
|
||||
struct super_block *sb = inode->i_sb;
|
||||
|
|
|
@ -144,12 +144,13 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
|
|||
}
|
||||
|
||||
/**
|
||||
* double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem
|
||||
* ext4_double_down_write_data_sem - Acquire two inodes' write lock
|
||||
* of i_data_sem
|
||||
*
|
||||
* Acquire write lock of i_data_sem of the two inodes
|
||||
*/
|
||||
static void
|
||||
double_down_write_data_sem(struct inode *first, struct inode *second)
|
||||
void
|
||||
ext4_double_down_write_data_sem(struct inode *first, struct inode *second)
|
||||
{
|
||||
if (first < second) {
|
||||
down_write(&EXT4_I(first)->i_data_sem);
|
||||
|
@ -162,14 +163,15 @@ double_down_write_data_sem(struct inode *first, struct inode *second)
|
|||
}
|
||||
|
||||
/**
|
||||
* double_up_write_data_sem - Release two inodes' write lock of i_data_sem
|
||||
* ext4_double_up_write_data_sem - Release two inodes' write lock of i_data_sem
|
||||
*
|
||||
* @orig_inode: original inode structure to be released its lock first
|
||||
* @donor_inode: donor inode structure to be released its lock second
|
||||
* Release write lock of i_data_sem of two inodes (orig and donor).
|
||||
*/
|
||||
static void
|
||||
double_up_write_data_sem(struct inode *orig_inode, struct inode *donor_inode)
|
||||
void
|
||||
ext4_double_up_write_data_sem(struct inode *orig_inode,
|
||||
struct inode *donor_inode)
|
||||
{
|
||||
up_write(&EXT4_I(orig_inode)->i_data_sem);
|
||||
up_write(&EXT4_I(donor_inode)->i_data_sem);
|
||||
|
@ -976,7 +978,7 @@ again:
|
|||
* necessary, just swap data blocks between orig and donor.
|
||||
*/
|
||||
if (uninit) {
|
||||
double_down_write_data_sem(orig_inode, donor_inode);
|
||||
ext4_double_down_write_data_sem(orig_inode, donor_inode);
|
||||
/* If any of extents in range became initialized we have to
|
||||
* fallback to data copying */
|
||||
uninit = mext_check_coverage(orig_inode, orig_blk_offset,
|
||||
|
@ -990,7 +992,7 @@ again:
|
|||
goto drop_data_sem;
|
||||
|
||||
if (!uninit) {
|
||||
double_up_write_data_sem(orig_inode, donor_inode);
|
||||
ext4_double_up_write_data_sem(orig_inode, donor_inode);
|
||||
goto data_copy;
|
||||
}
|
||||
if ((page_has_private(pagep[0]) &&
|
||||
|
@ -1004,7 +1006,7 @@ again:
|
|||
donor_inode, orig_blk_offset,
|
||||
block_len_in_page, err);
|
||||
drop_data_sem:
|
||||
double_up_write_data_sem(orig_inode, donor_inode);
|
||||
ext4_double_up_write_data_sem(orig_inode, donor_inode);
|
||||
goto unlock_pages;
|
||||
}
|
||||
data_copy:
|
||||
|
@ -1065,11 +1067,11 @@ repair_branches:
|
|||
* Extents are swapped already, but we are not able to copy data.
|
||||
* Try to swap extents to it's original places
|
||||
*/
|
||||
double_down_write_data_sem(orig_inode, donor_inode);
|
||||
ext4_double_down_write_data_sem(orig_inode, donor_inode);
|
||||
replaced_count = mext_replace_branches(handle, donor_inode, orig_inode,
|
||||
orig_blk_offset,
|
||||
block_len_in_page, &err2);
|
||||
double_up_write_data_sem(orig_inode, donor_inode);
|
||||
ext4_double_up_write_data_sem(orig_inode, donor_inode);
|
||||
if (replaced_count != block_len_in_page) {
|
||||
EXT4_ERROR_INODE_BLOCK(orig_inode, (sector_t)(orig_blk_offset),
|
||||
"Unable to copy data block,"
|
||||
|
@ -1209,15 +1211,15 @@ mext_check_arguments(struct inode *orig_inode,
|
|||
}
|
||||
|
||||
/**
|
||||
* mext_inode_double_lock - Lock i_mutex on both @inode1 and @inode2
|
||||
* ext4_inode_double_lock - Lock i_mutex on both @inode1 and @inode2
|
||||
*
|
||||
* @inode1: the inode structure
|
||||
* @inode2: the inode structure
|
||||
*
|
||||
* Lock two inodes' i_mutex
|
||||
*/
|
||||
static void
|
||||
mext_inode_double_lock(struct inode *inode1, struct inode *inode2)
|
||||
void
|
||||
ext4_inode_double_lock(struct inode *inode1, struct inode *inode2)
|
||||
{
|
||||
BUG_ON(inode1 == inode2);
|
||||
if (inode1 < inode2) {
|
||||
|
@ -1230,15 +1232,15 @@ mext_inode_double_lock(struct inode *inode1, struct inode *inode2)
|
|||
}
|
||||
|
||||
/**
|
||||
* mext_inode_double_unlock - Release i_mutex on both @inode1 and @inode2
|
||||
* ext4_inode_double_unlock - Release i_mutex on both @inode1 and @inode2
|
||||
*
|
||||
* @inode1: the inode that is released first
|
||||
* @inode2: the inode that is released second
|
||||
*
|
||||
*/
|
||||
|
||||
static void
|
||||
mext_inode_double_unlock(struct inode *inode1, struct inode *inode2)
|
||||
void
|
||||
ext4_inode_double_unlock(struct inode *inode1, struct inode *inode2)
|
||||
{
|
||||
mutex_unlock(&inode1->i_mutex);
|
||||
mutex_unlock(&inode2->i_mutex);
|
||||
|
@ -1333,7 +1335,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
|
|||
return -EINVAL;
|
||||
}
|
||||
/* Protect orig and donor inodes against a truncate */
|
||||
mext_inode_double_lock(orig_inode, donor_inode);
|
||||
ext4_inode_double_lock(orig_inode, donor_inode);
|
||||
|
||||
/* Wait for all existing dio workers */
|
||||
ext4_inode_block_unlocked_dio(orig_inode);
|
||||
|
@ -1342,7 +1344,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
|
|||
inode_dio_wait(donor_inode);
|
||||
|
||||
/* Protect extent tree against block allocations via delalloc */
|
||||
double_down_write_data_sem(orig_inode, donor_inode);
|
||||
ext4_double_down_write_data_sem(orig_inode, donor_inode);
|
||||
/* Check the filesystem environment whether move_extent can be done */
|
||||
ret = mext_check_arguments(orig_inode, donor_inode, orig_start,
|
||||
donor_start, &len);
|
||||
|
@ -1466,7 +1468,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
|
|||
* b. racing with ->readpage, ->write_begin, and ext4_get_block
|
||||
* in move_extent_per_page
|
||||
*/
|
||||
double_up_write_data_sem(orig_inode, donor_inode);
|
||||
ext4_double_up_write_data_sem(orig_inode, donor_inode);
|
||||
|
||||
while (orig_page_offset <= seq_end_page) {
|
||||
|
||||
|
@ -1500,7 +1502,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
|
|||
block_len_in_page = rest_blocks;
|
||||
}
|
||||
|
||||
double_down_write_data_sem(orig_inode, donor_inode);
|
||||
ext4_double_down_write_data_sem(orig_inode, donor_inode);
|
||||
if (ret < 0)
|
||||
break;
|
||||
|
||||
|
@ -1538,10 +1540,10 @@ out:
|
|||
ext4_ext_drop_refs(holecheck_path);
|
||||
kfree(holecheck_path);
|
||||
}
|
||||
double_up_write_data_sem(orig_inode, donor_inode);
|
||||
ext4_double_up_write_data_sem(orig_inode, donor_inode);
|
||||
ext4_inode_resume_unlocked_dio(orig_inode);
|
||||
ext4_inode_resume_unlocked_dio(donor_inode);
|
||||
mext_inode_double_unlock(orig_inode, donor_inode);
|
||||
ext4_inode_double_unlock(orig_inode, donor_inode);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче