Merge branch 'for-linus' of git://git.open-osd.org/linux-open-osd
* 'for-linus' of git://git.open-osd.org/linux-open-osd: exofs: deprecate the commands pending counter exofs: Write sbi->s_nextid as part of the Create command exofs: Add option to mount by osdname exofs: Override read-ahead to align on stripe_size exofs: simple fsync race fix exofs: Optimize read_4_write exofs: Trivial: fix some indentation and debug prints exofs: Remove redundant unlikely()
This commit is contained in:
Коммит
1b506cfb6a
|
@ -104,7 +104,15 @@ Where:
|
||||||
exofs specific options: Options are separated by commas (,)
|
exofs specific options: Options are separated by commas (,)
|
||||||
pid=<integer> - The partition number to mount/create as
|
pid=<integer> - The partition number to mount/create as
|
||||||
container of the filesystem.
|
container of the filesystem.
|
||||||
This option is mandatory.
|
This option is mandatory. integer can be
|
||||||
|
Hex by pre-pending an 0x to the number.
|
||||||
|
osdname=<id> - Mount by a device's osdname.
|
||||||
|
osdname is usually a 36 character uuid of the
|
||||||
|
form "d2683732-c906-4ee1-9dbd-c10c27bb40df".
|
||||||
|
It is one of the device's uuid specified in the
|
||||||
|
mkfs.exofs format command.
|
||||||
|
If this option is specified then the /dev/osdX
|
||||||
|
above can be empty and is ignored.
|
||||||
to=<integer> - Timeout in ticks for a single command.
|
to=<integer> - Timeout in ticks for a single command.
|
||||||
default is (60 * HZ) [for debugging only]
|
default is (60 * HZ) [for debugging only]
|
||||||
|
|
||||||
|
|
|
@ -53,10 +53,14 @@
|
||||||
#define EXOFS_ROOT_ID 0x10002 /* object ID for root directory */
|
#define EXOFS_ROOT_ID 0x10002 /* object ID for root directory */
|
||||||
|
|
||||||
/* exofs Application specific page/attribute */
|
/* exofs Application specific page/attribute */
|
||||||
|
/* Inode attrs */
|
||||||
# define EXOFS_APAGE_FS_DATA (OSD_APAGE_APP_DEFINED_FIRST + 3)
|
# define EXOFS_APAGE_FS_DATA (OSD_APAGE_APP_DEFINED_FIRST + 3)
|
||||||
# define EXOFS_ATTR_INODE_DATA 1
|
# define EXOFS_ATTR_INODE_DATA 1
|
||||||
# define EXOFS_ATTR_INODE_FILE_LAYOUT 2
|
# define EXOFS_ATTR_INODE_FILE_LAYOUT 2
|
||||||
# define EXOFS_ATTR_INODE_DIR_LAYOUT 3
|
# define EXOFS_ATTR_INODE_DIR_LAYOUT 3
|
||||||
|
/* Partition attrs */
|
||||||
|
# define EXOFS_APAGE_SB_DATA (0xF0000000U + 3)
|
||||||
|
# define EXOFS_ATTR_SB_STATS 1
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The maximum number of files we can have is limited by the size of the
|
* The maximum number of files we can have is limited by the size of the
|
||||||
|
@ -86,8 +90,8 @@ enum {
|
||||||
*/
|
*/
|
||||||
enum {EXOFS_FSCB_VER = 1, EXOFS_DT_VER = 1};
|
enum {EXOFS_FSCB_VER = 1, EXOFS_DT_VER = 1};
|
||||||
struct exofs_fscb {
|
struct exofs_fscb {
|
||||||
__le64 s_nextid; /* Highest object ID used */
|
__le64 s_nextid; /* Only used after mkfs */
|
||||||
__le64 s_numfiles; /* Number of files on fs */
|
__le64 s_numfiles; /* Only used after mkfs */
|
||||||
__le32 s_version; /* == EXOFS_FSCB_VER */
|
__le32 s_version; /* == EXOFS_FSCB_VER */
|
||||||
__le16 s_magic; /* Magic signature */
|
__le16 s_magic; /* Magic signature */
|
||||||
__le16 s_newfs; /* Non-zero if this is a new fs */
|
__le16 s_newfs; /* Non-zero if this is a new fs */
|
||||||
|
@ -97,6 +101,16 @@ struct exofs_fscb {
|
||||||
__le64 s_dev_table_count; /* == 0 means no dev_table */
|
__le64 s_dev_table_count; /* == 0 means no dev_table */
|
||||||
} __packed;
|
} __packed;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This struct is set on the FS partition's attributes.
|
||||||
|
* [EXOFS_APAGE_SB_DATA, EXOFS_ATTR_SB_STATS] and is written together
|
||||||
|
* with the create command, to atomically persist the sb writeable information.
|
||||||
|
*/
|
||||||
|
struct exofs_sb_stats {
|
||||||
|
__le64 s_nextid; /* Highest object ID used */
|
||||||
|
__le64 s_numfiles; /* Number of files on fs */
|
||||||
|
} __packed;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Describes the raid used in the FS. It is part of the device table.
|
* Describes the raid used in the FS. It is part of the device table.
|
||||||
* This here is taken from the pNFS-objects definition. In exofs we
|
* This here is taken from the pNFS-objects definition. In exofs we
|
||||||
|
|
|
@ -124,7 +124,7 @@ out:
|
||||||
|
|
||||||
Ebadsize:
|
Ebadsize:
|
||||||
EXOFS_ERR("ERROR [exofs_check_page]: "
|
EXOFS_ERR("ERROR [exofs_check_page]: "
|
||||||
"size of directory #%lu is not a multiple of chunk size",
|
"size of directory(0x%lx) is not a multiple of chunk size\n",
|
||||||
dir->i_ino
|
dir->i_ino
|
||||||
);
|
);
|
||||||
goto fail;
|
goto fail;
|
||||||
|
@ -142,8 +142,8 @@ Espan:
|
||||||
goto bad_entry;
|
goto bad_entry;
|
||||||
bad_entry:
|
bad_entry:
|
||||||
EXOFS_ERR(
|
EXOFS_ERR(
|
||||||
"ERROR [exofs_check_page]: bad entry in directory #%lu: %s - "
|
"ERROR [exofs_check_page]: bad entry in directory(0x%lx): %s - "
|
||||||
"offset=%lu, inode=%llu, rec_len=%d, name_len=%d",
|
"offset=%lu, inode=0x%llu, rec_len=%d, name_len=%d\n",
|
||||||
dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs,
|
dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs,
|
||||||
_LLU(le64_to_cpu(p->inode_no)),
|
_LLU(le64_to_cpu(p->inode_no)),
|
||||||
rec_len, p->name_len);
|
rec_len, p->name_len);
|
||||||
|
@ -151,8 +151,8 @@ bad_entry:
|
||||||
Eend:
|
Eend:
|
||||||
p = (struct exofs_dir_entry *)(kaddr + offs);
|
p = (struct exofs_dir_entry *)(kaddr + offs);
|
||||||
EXOFS_ERR("ERROR [exofs_check_page]: "
|
EXOFS_ERR("ERROR [exofs_check_page]: "
|
||||||
"entry in directory #%lu spans the page boundary"
|
"entry in directory(0x%lx) spans the page boundary"
|
||||||
"offset=%lu, inode=%llu",
|
"offset=%lu, inode=0x%llx\n",
|
||||||
dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs,
|
dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs,
|
||||||
_LLU(le64_to_cpu(p->inode_no)));
|
_LLU(le64_to_cpu(p->inode_no)));
|
||||||
fail:
|
fail:
|
||||||
|
@ -261,9 +261,8 @@ exofs_readdir(struct file *filp, void *dirent, filldir_t filldir)
|
||||||
struct page *page = exofs_get_page(inode, n);
|
struct page *page = exofs_get_page(inode, n);
|
||||||
|
|
||||||
if (IS_ERR(page)) {
|
if (IS_ERR(page)) {
|
||||||
EXOFS_ERR("ERROR: "
|
EXOFS_ERR("ERROR: bad page in directory(0x%lx)\n",
|
||||||
"bad page in #%lu",
|
inode->i_ino);
|
||||||
inode->i_ino);
|
|
||||||
filp->f_pos += PAGE_CACHE_SIZE - offset;
|
filp->f_pos += PAGE_CACHE_SIZE - offset;
|
||||||
return PTR_ERR(page);
|
return PTR_ERR(page);
|
||||||
}
|
}
|
||||||
|
@ -283,7 +282,8 @@ exofs_readdir(struct file *filp, void *dirent, filldir_t filldir)
|
||||||
for (; (char *)de <= limit; de = exofs_next_entry(de)) {
|
for (; (char *)de <= limit; de = exofs_next_entry(de)) {
|
||||||
if (de->rec_len == 0) {
|
if (de->rec_len == 0) {
|
||||||
EXOFS_ERR("ERROR: "
|
EXOFS_ERR("ERROR: "
|
||||||
"zero-length directory entry");
|
"zero-length entry in directory(0x%lx)\n",
|
||||||
|
inode->i_ino);
|
||||||
exofs_put_page(page);
|
exofs_put_page(page);
|
||||||
return -EIO;
|
return -EIO;
|
||||||
}
|
}
|
||||||
|
@ -342,9 +342,9 @@ struct exofs_dir_entry *exofs_find_entry(struct inode *dir,
|
||||||
kaddr += exofs_last_byte(dir, n) - reclen;
|
kaddr += exofs_last_byte(dir, n) - reclen;
|
||||||
while ((char *) de <= kaddr) {
|
while ((char *) de <= kaddr) {
|
||||||
if (de->rec_len == 0) {
|
if (de->rec_len == 0) {
|
||||||
EXOFS_ERR(
|
EXOFS_ERR("ERROR: zero-length entry in "
|
||||||
"ERROR: exofs_find_entry: "
|
"directory(0x%lx)\n",
|
||||||
"zero-length directory entry");
|
dir->i_ino);
|
||||||
exofs_put_page(page);
|
exofs_put_page(page);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
@ -472,7 +472,8 @@ int exofs_add_link(struct dentry *dentry, struct inode *inode)
|
||||||
}
|
}
|
||||||
if (de->rec_len == 0) {
|
if (de->rec_len == 0) {
|
||||||
EXOFS_ERR("ERROR: exofs_add_link: "
|
EXOFS_ERR("ERROR: exofs_add_link: "
|
||||||
"zero-length directory entry");
|
"zero-length entry in directory(0x%lx)\n",
|
||||||
|
inode->i_ino);
|
||||||
err = -EIO;
|
err = -EIO;
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
}
|
}
|
||||||
|
@ -491,7 +492,8 @@ int exofs_add_link(struct dentry *dentry, struct inode *inode)
|
||||||
exofs_put_page(page);
|
exofs_put_page(page);
|
||||||
}
|
}
|
||||||
|
|
||||||
EXOFS_ERR("exofs_add_link: BAD dentry=%p or inode=%p", dentry, inode);
|
EXOFS_ERR("exofs_add_link: BAD dentry=%p or inode=0x%lx\n",
|
||||||
|
dentry, inode->i_ino);
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
got_it:
|
got_it:
|
||||||
|
@ -542,7 +544,8 @@ int exofs_delete_entry(struct exofs_dir_entry *dir, struct page *page)
|
||||||
while (de < dir) {
|
while (de < dir) {
|
||||||
if (de->rec_len == 0) {
|
if (de->rec_len == 0) {
|
||||||
EXOFS_ERR("ERROR: exofs_delete_entry:"
|
EXOFS_ERR("ERROR: exofs_delete_entry:"
|
||||||
"zero-length directory entry");
|
"zero-length entry in directory(0x%lx)\n",
|
||||||
|
inode->i_ino);
|
||||||
err = -EIO;
|
err = -EIO;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
|
@ -77,7 +77,7 @@ struct exofs_layout {
|
||||||
* our extension to the in-memory superblock
|
* our extension to the in-memory superblock
|
||||||
*/
|
*/
|
||||||
struct exofs_sb_info {
|
struct exofs_sb_info {
|
||||||
struct exofs_fscb s_fscb; /* Written often, pre-allocate*/
|
struct exofs_sb_stats s_ess; /* Written often, pre-allocate*/
|
||||||
int s_timeout; /* timeout for OSD operations */
|
int s_timeout; /* timeout for OSD operations */
|
||||||
uint64_t s_nextid; /* highest object ID used */
|
uint64_t s_nextid; /* highest object ID used */
|
||||||
uint32_t s_numfiles; /* number of files on fs */
|
uint32_t s_numfiles; /* number of files on fs */
|
||||||
|
@ -256,6 +256,8 @@ static inline int exofs_oi_read(struct exofs_i_info *oi,
|
||||||
}
|
}
|
||||||
|
|
||||||
/* inode.c */
|
/* inode.c */
|
||||||
|
unsigned exofs_max_io_pages(struct exofs_layout *layout,
|
||||||
|
unsigned expected_pages);
|
||||||
int exofs_setattr(struct dentry *, struct iattr *);
|
int exofs_setattr(struct dentry *, struct iattr *);
|
||||||
int exofs_write_begin(struct file *file, struct address_space *mapping,
|
int exofs_write_begin(struct file *file, struct address_space *mapping,
|
||||||
loff_t pos, unsigned len, unsigned flags,
|
loff_t pos, unsigned len, unsigned flags,
|
||||||
|
@ -279,7 +281,7 @@ int exofs_set_link(struct inode *, struct exofs_dir_entry *, struct page *,
|
||||||
struct inode *);
|
struct inode *);
|
||||||
|
|
||||||
/* super.c */
|
/* super.c */
|
||||||
int exofs_sync_fs(struct super_block *sb, int wait);
|
int exofs_sbi_write_stats(struct exofs_sb_info *sbi);
|
||||||
|
|
||||||
/*********************
|
/*********************
|
||||||
* operation vectors *
|
* operation vectors *
|
||||||
|
|
|
@ -45,22 +45,8 @@ static int exofs_release_file(struct inode *inode, struct file *filp)
|
||||||
static int exofs_file_fsync(struct file *filp, int datasync)
|
static int exofs_file_fsync(struct file *filp, int datasync)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
struct inode *inode = filp->f_mapping->host;
|
|
||||||
struct super_block *sb;
|
|
||||||
|
|
||||||
if (!(inode->i_state & I_DIRTY))
|
|
||||||
return 0;
|
|
||||||
if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
ret = sync_inode_metadata(inode, 1);
|
|
||||||
|
|
||||||
/* This is a good place to write the sb */
|
|
||||||
/* TODO: Sechedule an sb-sync on create */
|
|
||||||
sb = inode->i_sb;
|
|
||||||
if (sb->s_dirt)
|
|
||||||
exofs_sync_fs(sb, 1);
|
|
||||||
|
|
||||||
|
ret = sync_inode_metadata(filp->f_mapping->host, 1);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -43,6 +43,17 @@ enum { BIO_MAX_PAGES_KMALLOC =
|
||||||
PAGE_SIZE / sizeof(struct page *),
|
PAGE_SIZE / sizeof(struct page *),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
unsigned exofs_max_io_pages(struct exofs_layout *layout,
|
||||||
|
unsigned expected_pages)
|
||||||
|
{
|
||||||
|
unsigned pages = min_t(unsigned, expected_pages, MAX_PAGES_KMALLOC);
|
||||||
|
|
||||||
|
/* TODO: easily support bio chaining */
|
||||||
|
pages = min_t(unsigned, pages,
|
||||||
|
layout->group_width * BIO_MAX_PAGES_KMALLOC);
|
||||||
|
return pages;
|
||||||
|
}
|
||||||
|
|
||||||
struct page_collect {
|
struct page_collect {
|
||||||
struct exofs_sb_info *sbi;
|
struct exofs_sb_info *sbi;
|
||||||
struct inode *inode;
|
struct inode *inode;
|
||||||
|
@ -97,8 +108,7 @@ static void _pcol_reset(struct page_collect *pcol)
|
||||||
|
|
||||||
static int pcol_try_alloc(struct page_collect *pcol)
|
static int pcol_try_alloc(struct page_collect *pcol)
|
||||||
{
|
{
|
||||||
unsigned pages = min_t(unsigned, pcol->expected_pages,
|
unsigned pages;
|
||||||
MAX_PAGES_KMALLOC);
|
|
||||||
|
|
||||||
if (!pcol->ios) { /* First time allocate io_state */
|
if (!pcol->ios) { /* First time allocate io_state */
|
||||||
int ret = exofs_get_io_state(&pcol->sbi->layout, &pcol->ios);
|
int ret = exofs_get_io_state(&pcol->sbi->layout, &pcol->ios);
|
||||||
|
@ -108,8 +118,7 @@ static int pcol_try_alloc(struct page_collect *pcol)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* TODO: easily support bio chaining */
|
/* TODO: easily support bio chaining */
|
||||||
pages = min_t(unsigned, pages,
|
pages = exofs_max_io_pages(&pcol->sbi->layout, pcol->expected_pages);
|
||||||
pcol->sbi->layout.group_width * BIO_MAX_PAGES_KMALLOC);
|
|
||||||
|
|
||||||
for (; pages; pages >>= 1) {
|
for (; pages; pages >>= 1) {
|
||||||
pcol->pages = kmalloc(pages * sizeof(struct page *),
|
pcol->pages = kmalloc(pages * sizeof(struct page *),
|
||||||
|
@ -350,8 +359,10 @@ static int readpage_strip(void *data, struct page *page)
|
||||||
|
|
||||||
if (!pcol->read_4_write)
|
if (!pcol->read_4_write)
|
||||||
unlock_page(page);
|
unlock_page(page);
|
||||||
EXOFS_DBGMSG("readpage_strip(0x%lx, 0x%lx) empty page,"
|
EXOFS_DBGMSG("readpage_strip(0x%lx) empty page len=%zx "
|
||||||
" splitting\n", inode->i_ino, page->index);
|
"read_4_write=%d index=0x%lx end_index=0x%lx "
|
||||||
|
"splitting\n", inode->i_ino, len,
|
||||||
|
pcol->read_4_write, page->index, end_index);
|
||||||
|
|
||||||
return read_exec(pcol);
|
return read_exec(pcol);
|
||||||
}
|
}
|
||||||
|
@ -722,11 +733,28 @@ int exofs_write_begin(struct file *file, struct address_space *mapping,
|
||||||
|
|
||||||
/* read modify write */
|
/* read modify write */
|
||||||
if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) {
|
if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) {
|
||||||
|
loff_t i_size = i_size_read(mapping->host);
|
||||||
|
pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
|
||||||
|
size_t rlen;
|
||||||
|
|
||||||
|
if (page->index < end_index)
|
||||||
|
rlen = PAGE_CACHE_SIZE;
|
||||||
|
else if (page->index == end_index)
|
||||||
|
rlen = i_size & ~PAGE_CACHE_MASK;
|
||||||
|
else
|
||||||
|
rlen = 0;
|
||||||
|
|
||||||
|
if (!rlen) {
|
||||||
|
clear_highpage(page);
|
||||||
|
SetPageUptodate(page);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
ret = _readpage(page, true);
|
ret = _readpage(page, true);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
/*SetPageError was done by _readpage. Is it ok?*/
|
/*SetPageError was done by _readpage. Is it ok?*/
|
||||||
unlock_page(page);
|
unlock_page(page);
|
||||||
EXOFS_DBGMSG("__readpage_filler failed\n");
|
EXOFS_DBGMSG("__readpage failed\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
out:
|
out:
|
||||||
|
@ -1030,6 +1058,7 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
|
||||||
memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data));
|
memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inode->i_mapping->backing_dev_info = sb->s_bdi;
|
||||||
if (S_ISREG(inode->i_mode)) {
|
if (S_ISREG(inode->i_mode)) {
|
||||||
inode->i_op = &exofs_file_inode_operations;
|
inode->i_op = &exofs_file_inode_operations;
|
||||||
inode->i_fop = &exofs_file_operations;
|
inode->i_fop = &exofs_file_operations;
|
||||||
|
@ -1073,6 +1102,7 @@ int __exofs_wait_obj_created(struct exofs_i_info *oi)
|
||||||
}
|
}
|
||||||
return unlikely(is_bad_inode(&oi->vfs_inode)) ? -EIO : 0;
|
return unlikely(is_bad_inode(&oi->vfs_inode)) ? -EIO : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Callback function from exofs_new_inode(). The important thing is that we
|
* Callback function from exofs_new_inode(). The important thing is that we
|
||||||
* set the obj_created flag so that other methods know that the object exists on
|
* set the obj_created flag so that other methods know that the object exists on
|
||||||
|
@ -1130,7 +1160,7 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
|
||||||
|
|
||||||
sbi = sb->s_fs_info;
|
sbi = sb->s_fs_info;
|
||||||
|
|
||||||
sb->s_dirt = 1;
|
inode->i_mapping->backing_dev_info = sb->s_bdi;
|
||||||
inode_init_owner(inode, dir, mode);
|
inode_init_owner(inode, dir, mode);
|
||||||
inode->i_ino = sbi->s_nextid++;
|
inode->i_ino = sbi->s_nextid++;
|
||||||
inode->i_blkbits = EXOFS_BLKSHIFT;
|
inode->i_blkbits = EXOFS_BLKSHIFT;
|
||||||
|
@ -1141,6 +1171,8 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
|
||||||
spin_unlock(&sbi->s_next_gen_lock);
|
spin_unlock(&sbi->s_next_gen_lock);
|
||||||
insert_inode_hash(inode);
|
insert_inode_hash(inode);
|
||||||
|
|
||||||
|
exofs_sbi_write_stats(sbi); /* Make sure new sbi->s_nextid is on disk */
|
||||||
|
|
||||||
mark_inode_dirty(inode);
|
mark_inode_dirty(inode);
|
||||||
|
|
||||||
ret = exofs_get_io_state(&sbi->layout, &ios);
|
ret = exofs_get_io_state(&sbi->layout, &ios);
|
||||||
|
@ -1271,7 +1303,8 @@ out:
|
||||||
|
|
||||||
int exofs_write_inode(struct inode *inode, struct writeback_control *wbc)
|
int exofs_write_inode(struct inode *inode, struct writeback_control *wbc)
|
||||||
{
|
{
|
||||||
return exofs_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
|
/* FIXME: fix fsync and use wbc->sync_mode == WB_SYNC_ALL */
|
||||||
|
return exofs_update_inode(inode, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
192
fs/exofs/super.c
192
fs/exofs/super.c
|
@ -48,6 +48,7 @@
|
||||||
* struct to hold what we get from mount options
|
* struct to hold what we get from mount options
|
||||||
*/
|
*/
|
||||||
struct exofs_mountopt {
|
struct exofs_mountopt {
|
||||||
|
bool is_osdname;
|
||||||
const char *dev_name;
|
const char *dev_name;
|
||||||
uint64_t pid;
|
uint64_t pid;
|
||||||
int timeout;
|
int timeout;
|
||||||
|
@ -56,7 +57,7 @@ struct exofs_mountopt {
|
||||||
/*
|
/*
|
||||||
* exofs-specific mount-time options.
|
* exofs-specific mount-time options.
|
||||||
*/
|
*/
|
||||||
enum { Opt_pid, Opt_to, Opt_mkfs, Opt_format, Opt_err };
|
enum { Opt_name, Opt_pid, Opt_to, Opt_err };
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Our mount-time options. These should ideally be 64-bit unsigned, but the
|
* Our mount-time options. These should ideally be 64-bit unsigned, but the
|
||||||
|
@ -64,6 +65,7 @@ enum { Opt_pid, Opt_to, Opt_mkfs, Opt_format, Opt_err };
|
||||||
* sufficient for most applications now.
|
* sufficient for most applications now.
|
||||||
*/
|
*/
|
||||||
static match_table_t tokens = {
|
static match_table_t tokens = {
|
||||||
|
{Opt_name, "osdname=%s"},
|
||||||
{Opt_pid, "pid=%u"},
|
{Opt_pid, "pid=%u"},
|
||||||
{Opt_to, "to=%u"},
|
{Opt_to, "to=%u"},
|
||||||
{Opt_err, NULL}
|
{Opt_err, NULL}
|
||||||
|
@ -94,6 +96,14 @@ static int parse_options(char *options, struct exofs_mountopt *opts)
|
||||||
|
|
||||||
token = match_token(p, tokens, args);
|
token = match_token(p, tokens, args);
|
||||||
switch (token) {
|
switch (token) {
|
||||||
|
case Opt_name:
|
||||||
|
opts->dev_name = match_strdup(&args[0]);
|
||||||
|
if (unlikely(!opts->dev_name)) {
|
||||||
|
EXOFS_ERR("Error allocating dev_name");
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
opts->is_osdname = true;
|
||||||
|
break;
|
||||||
case Opt_pid:
|
case Opt_pid:
|
||||||
if (0 == match_strlcpy(str, &args[0], sizeof(str)))
|
if (0 == match_strlcpy(str, &args[0], sizeof(str)))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
@ -203,6 +213,101 @@ static void destroy_inodecache(void)
|
||||||
static const struct super_operations exofs_sops;
|
static const struct super_operations exofs_sops;
|
||||||
static const struct export_operations exofs_export_ops;
|
static const struct export_operations exofs_export_ops;
|
||||||
|
|
||||||
|
static const struct osd_attr g_attr_sb_stats = ATTR_DEF(
|
||||||
|
EXOFS_APAGE_SB_DATA,
|
||||||
|
EXOFS_ATTR_SB_STATS,
|
||||||
|
sizeof(struct exofs_sb_stats));
|
||||||
|
|
||||||
|
static int __sbi_read_stats(struct exofs_sb_info *sbi)
|
||||||
|
{
|
||||||
|
struct osd_attr attrs[] = {
|
||||||
|
[0] = g_attr_sb_stats,
|
||||||
|
};
|
||||||
|
struct exofs_io_state *ios;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = exofs_get_io_state(&sbi->layout, &ios);
|
||||||
|
if (unlikely(ret)) {
|
||||||
|
EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
ios->cred = sbi->s_cred;
|
||||||
|
|
||||||
|
ios->in_attr = attrs;
|
||||||
|
ios->in_attr_len = ARRAY_SIZE(attrs);
|
||||||
|
|
||||||
|
ret = exofs_sbi_read(ios);
|
||||||
|
if (unlikely(ret)) {
|
||||||
|
EXOFS_ERR("Error reading super_block stats => %d\n", ret);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = extract_attr_from_ios(ios, &attrs[0]);
|
||||||
|
if (ret) {
|
||||||
|
EXOFS_ERR("%s: extract_attr of sb_stats failed\n", __func__);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
if (attrs[0].len) {
|
||||||
|
struct exofs_sb_stats *ess;
|
||||||
|
|
||||||
|
if (unlikely(attrs[0].len != sizeof(*ess))) {
|
||||||
|
EXOFS_ERR("%s: Wrong version of exofs_sb_stats "
|
||||||
|
"size(%d) != expected(%zd)\n",
|
||||||
|
__func__, attrs[0].len, sizeof(*ess));
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
ess = attrs[0].val_ptr;
|
||||||
|
sbi->s_nextid = le64_to_cpu(ess->s_nextid);
|
||||||
|
sbi->s_numfiles = le32_to_cpu(ess->s_numfiles);
|
||||||
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
exofs_put_io_state(ios);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void stats_done(struct exofs_io_state *ios, void *p)
|
||||||
|
{
|
||||||
|
exofs_put_io_state(ios);
|
||||||
|
/* Good thanks nothing to do anymore */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Asynchronously write the stats attribute */
|
||||||
|
int exofs_sbi_write_stats(struct exofs_sb_info *sbi)
|
||||||
|
{
|
||||||
|
struct osd_attr attrs[] = {
|
||||||
|
[0] = g_attr_sb_stats,
|
||||||
|
};
|
||||||
|
struct exofs_io_state *ios;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
ret = exofs_get_io_state(&sbi->layout, &ios);
|
||||||
|
if (unlikely(ret)) {
|
||||||
|
EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
sbi->s_ess.s_nextid = cpu_to_le64(sbi->s_nextid);
|
||||||
|
sbi->s_ess.s_numfiles = cpu_to_le64(sbi->s_numfiles);
|
||||||
|
attrs[0].val_ptr = &sbi->s_ess;
|
||||||
|
|
||||||
|
ios->cred = sbi->s_cred;
|
||||||
|
ios->done = stats_done;
|
||||||
|
ios->private = sbi;
|
||||||
|
ios->out_attr = attrs;
|
||||||
|
ios->out_attr_len = ARRAY_SIZE(attrs);
|
||||||
|
|
||||||
|
ret = exofs_sbi_write(ios);
|
||||||
|
if (unlikely(ret)) {
|
||||||
|
EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__);
|
||||||
|
exofs_put_io_state(ios);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Write the superblock to the OSD
|
* Write the superblock to the OSD
|
||||||
*/
|
*/
|
||||||
|
@ -213,18 +318,25 @@ int exofs_sync_fs(struct super_block *sb, int wait)
|
||||||
struct exofs_io_state *ios;
|
struct exofs_io_state *ios;
|
||||||
int ret = -ENOMEM;
|
int ret = -ENOMEM;
|
||||||
|
|
||||||
lock_super(sb);
|
fscb = kmalloc(sizeof(*fscb), GFP_KERNEL);
|
||||||
sbi = sb->s_fs_info;
|
if (unlikely(!fscb))
|
||||||
fscb = &sbi->s_fscb;
|
return -ENOMEM;
|
||||||
|
|
||||||
|
sbi = sb->s_fs_info;
|
||||||
|
|
||||||
|
/* NOTE: We no longer dirty the super_block anywhere in exofs. The
|
||||||
|
* reason we write the fscb here on unmount is so we can stay backwards
|
||||||
|
* compatible with fscb->s_version == 1. (What we are not compatible
|
||||||
|
* with is if a new version FS crashed and then we try to mount an old
|
||||||
|
* version). Otherwise the exofs_fscb is read-only from mkfs time. All
|
||||||
|
* the writeable info is set in exofs_sbi_write_stats() above.
|
||||||
|
*/
|
||||||
ret = exofs_get_io_state(&sbi->layout, &ios);
|
ret = exofs_get_io_state(&sbi->layout, &ios);
|
||||||
if (ret)
|
if (unlikely(ret))
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
/* Note: We only write the changing part of the fscb. .i.e upto the
|
lock_super(sb);
|
||||||
* the fscb->s_dev_table_oid member. There is no read-modify-write
|
|
||||||
* here.
|
|
||||||
*/
|
|
||||||
ios->length = offsetof(struct exofs_fscb, s_dev_table_oid);
|
ios->length = offsetof(struct exofs_fscb, s_dev_table_oid);
|
||||||
memset(fscb, 0, ios->length);
|
memset(fscb, 0, ios->length);
|
||||||
fscb->s_nextid = cpu_to_le64(sbi->s_nextid);
|
fscb->s_nextid = cpu_to_le64(sbi->s_nextid);
|
||||||
|
@ -239,16 +351,17 @@ int exofs_sync_fs(struct super_block *sb, int wait)
|
||||||
ios->cred = sbi->s_cred;
|
ios->cred = sbi->s_cred;
|
||||||
|
|
||||||
ret = exofs_sbi_write(ios);
|
ret = exofs_sbi_write(ios);
|
||||||
if (unlikely(ret)) {
|
if (unlikely(ret))
|
||||||
EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__);
|
EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__);
|
||||||
goto out;
|
else
|
||||||
}
|
sb->s_dirt = 0;
|
||||||
sb->s_dirt = 0;
|
|
||||||
|
|
||||||
|
|
||||||
|
unlock_super(sb);
|
||||||
out:
|
out:
|
||||||
EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret);
|
EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret);
|
||||||
exofs_put_io_state(ios);
|
exofs_put_io_state(ios);
|
||||||
unlock_super(sb);
|
kfree(fscb);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -292,13 +405,14 @@ static void exofs_put_super(struct super_block *sb)
|
||||||
int num_pend;
|
int num_pend;
|
||||||
struct exofs_sb_info *sbi = sb->s_fs_info;
|
struct exofs_sb_info *sbi = sb->s_fs_info;
|
||||||
|
|
||||||
if (sb->s_dirt)
|
|
||||||
exofs_write_super(sb);
|
|
||||||
|
|
||||||
/* make sure there are no pending commands */
|
/* make sure there are no pending commands */
|
||||||
for (num_pend = atomic_read(&sbi->s_curr_pending); num_pend > 0;
|
for (num_pend = atomic_read(&sbi->s_curr_pending); num_pend > 0;
|
||||||
num_pend = atomic_read(&sbi->s_curr_pending)) {
|
num_pend = atomic_read(&sbi->s_curr_pending)) {
|
||||||
wait_queue_head_t wq;
|
wait_queue_head_t wq;
|
||||||
|
|
||||||
|
printk(KERN_NOTICE "%s: !!Pending operations in flight. "
|
||||||
|
"This is a BUG. please report to osd-dev@open-osd.org\n",
|
||||||
|
__func__);
|
||||||
init_waitqueue_head(&wq);
|
init_waitqueue_head(&wq);
|
||||||
wait_event_timeout(wq,
|
wait_event_timeout(wq,
|
||||||
(atomic_read(&sbi->s_curr_pending) == 0),
|
(atomic_read(&sbi->s_curr_pending) == 0),
|
||||||
|
@ -390,6 +504,23 @@ static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static unsigned __ra_pages(struct exofs_layout *layout)
|
||||||
|
{
|
||||||
|
const unsigned _MIN_RA = 32; /* min 128K read-ahead */
|
||||||
|
unsigned ra_pages = layout->group_width * layout->stripe_unit /
|
||||||
|
PAGE_SIZE;
|
||||||
|
unsigned max_io_pages = exofs_max_io_pages(layout, ~0);
|
||||||
|
|
||||||
|
ra_pages *= 2; /* two stripes */
|
||||||
|
if (ra_pages < _MIN_RA)
|
||||||
|
ra_pages = roundup(_MIN_RA, ra_pages / 2);
|
||||||
|
|
||||||
|
if (ra_pages > max_io_pages)
|
||||||
|
ra_pages = max_io_pages;
|
||||||
|
|
||||||
|
return ra_pages;
|
||||||
|
}
|
||||||
|
|
||||||
/* @odi is valid only as long as @fscb_dev is valid */
|
/* @odi is valid only as long as @fscb_dev is valid */
|
||||||
static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev,
|
static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev,
|
||||||
struct osd_dev_info *odi)
|
struct osd_dev_info *odi)
|
||||||
|
@ -495,7 +626,7 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
|
||||||
}
|
}
|
||||||
|
|
||||||
od = osduld_info_lookup(&odi);
|
od = osduld_info_lookup(&odi);
|
||||||
if (unlikely(IS_ERR(od))) {
|
if (IS_ERR(od)) {
|
||||||
ret = PTR_ERR(od);
|
ret = PTR_ERR(od);
|
||||||
EXOFS_ERR("ERROR: device requested is not found "
|
EXOFS_ERR("ERROR: device requested is not found "
|
||||||
"osd_name-%s =>%d\n", odi.osdname, ret);
|
"osd_name-%s =>%d\n", odi.osdname, ret);
|
||||||
|
@ -558,9 +689,17 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
|
||||||
goto free_bdi;
|
goto free_bdi;
|
||||||
|
|
||||||
/* use mount options to fill superblock */
|
/* use mount options to fill superblock */
|
||||||
od = osduld_path_lookup(opts->dev_name);
|
if (opts->is_osdname) {
|
||||||
|
struct osd_dev_info odi = {.systemid_len = 0};
|
||||||
|
|
||||||
|
odi.osdname_len = strlen(opts->dev_name);
|
||||||
|
odi.osdname = (u8 *)opts->dev_name;
|
||||||
|
od = osduld_info_lookup(&odi);
|
||||||
|
} else {
|
||||||
|
od = osduld_path_lookup(opts->dev_name);
|
||||||
|
}
|
||||||
if (IS_ERR(od)) {
|
if (IS_ERR(od)) {
|
||||||
ret = PTR_ERR(od);
|
ret = -EINVAL;
|
||||||
goto free_sbi;
|
goto free_sbi;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -594,6 +733,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
|
||||||
goto free_sbi;
|
goto free_sbi;
|
||||||
|
|
||||||
sb->s_magic = le16_to_cpu(fscb.s_magic);
|
sb->s_magic = le16_to_cpu(fscb.s_magic);
|
||||||
|
/* NOTE: we read below to be backward compatible with old versions */
|
||||||
sbi->s_nextid = le64_to_cpu(fscb.s_nextid);
|
sbi->s_nextid = le64_to_cpu(fscb.s_nextid);
|
||||||
sbi->s_numfiles = le32_to_cpu(fscb.s_numfiles);
|
sbi->s_numfiles = le32_to_cpu(fscb.s_numfiles);
|
||||||
|
|
||||||
|
@ -604,7 +744,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
|
||||||
ret = -EINVAL;
|
ret = -EINVAL;
|
||||||
goto free_sbi;
|
goto free_sbi;
|
||||||
}
|
}
|
||||||
if (le32_to_cpu(fscb.s_version) != EXOFS_FSCB_VER) {
|
if (le32_to_cpu(fscb.s_version) > EXOFS_FSCB_VER) {
|
||||||
EXOFS_ERR("ERROR: Bad FSCB version expected-%d got-%d\n",
|
EXOFS_ERR("ERROR: Bad FSCB version expected-%d got-%d\n",
|
||||||
EXOFS_FSCB_VER, le32_to_cpu(fscb.s_version));
|
EXOFS_FSCB_VER, le32_to_cpu(fscb.s_version));
|
||||||
ret = -EINVAL;
|
ret = -EINVAL;
|
||||||
|
@ -622,7 +762,10 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
|
||||||
goto free_sbi;
|
goto free_sbi;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__sbi_read_stats(sbi);
|
||||||
|
|
||||||
/* set up operation vectors */
|
/* set up operation vectors */
|
||||||
|
sbi->bdi.ra_pages = __ra_pages(&sbi->layout);
|
||||||
sb->s_bdi = &sbi->bdi;
|
sb->s_bdi = &sbi->bdi;
|
||||||
sb->s_fs_info = sbi;
|
sb->s_fs_info = sbi;
|
||||||
sb->s_op = &exofs_sops;
|
sb->s_op = &exofs_sops;
|
||||||
|
@ -652,6 +795,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
|
||||||
|
|
||||||
_exofs_print_device("Mounting", opts->dev_name, sbi->layout.s_ods[0],
|
_exofs_print_device("Mounting", opts->dev_name, sbi->layout.s_ods[0],
|
||||||
sbi->layout.s_pid);
|
sbi->layout.s_pid);
|
||||||
|
if (opts->is_osdname)
|
||||||
|
kfree(opts->dev_name);
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
free_sbi:
|
free_sbi:
|
||||||
|
@ -660,6 +805,8 @@ free_bdi:
|
||||||
EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n",
|
EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n",
|
||||||
opts->dev_name, sbi->layout.s_pid, ret);
|
opts->dev_name, sbi->layout.s_pid, ret);
|
||||||
exofs_free_sbi(sbi);
|
exofs_free_sbi(sbi);
|
||||||
|
if (opts->is_osdname)
|
||||||
|
kfree(opts->dev_name);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -677,7 +824,8 @@ static struct dentry *exofs_mount(struct file_system_type *type,
|
||||||
if (ret)
|
if (ret)
|
||||||
return ERR_PTR(ret);
|
return ERR_PTR(ret);
|
||||||
|
|
||||||
opts.dev_name = dev_name;
|
if (!opts.dev_name)
|
||||||
|
opts.dev_name = dev_name;
|
||||||
return mount_nodev(type, flags, &opts, exofs_fill_super);
|
return mount_nodev(type, flags, &opts, exofs_fill_super);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Загрузка…
Ссылка в новой задаче