Orangefs: kernel client part 3

Signed-off-by: Mike Marshall <hubcap@omnibond.com>
This commit is contained in:
Mike Marshall 2015-07-17 10:38:13 -04:00
Родитель 5db11c21a9
Коммит 274dcf55bd
5 изменённых файлов: 2477 добавлений и 0 удалений

473
fs/orangefs/namei.c Normal file
Просмотреть файл

@ -0,0 +1,473 @@
/*
* (C) 2001 Clemson University and The University of Chicago
*
* See COPYING in top-level directory.
*/
/*
* Linux VFS namei operations.
*/
#include "protocol.h"
#include "pvfs2-kernel.h"
/*
* Get a newly allocated inode to go with a negative dentry.
*/
static int pvfs2_create(struct inode *dir,
struct dentry *dentry,
umode_t mode,
bool exclusive)
{
struct pvfs2_inode_s *parent = PVFS2_I(dir);
struct pvfs2_kernel_op_s *new_op;
struct inode *inode;
int ret;
gossip_debug(GOSSIP_NAME_DEBUG, "%s: called\n", __func__);
new_op = op_alloc(PVFS2_VFS_OP_CREATE);
if (!new_op)
return -ENOMEM;
new_op->upcall.req.create.parent_refn = parent->refn;
fill_default_sys_attrs(new_op->upcall.req.create.attributes,
PVFS_TYPE_METAFILE, mode);
strncpy(new_op->upcall.req.create.d_name,
dentry->d_name.name, PVFS2_NAME_LEN);
ret = service_operation(new_op, __func__, get_interruptible_flag(dir));
gossip_debug(GOSSIP_NAME_DEBUG,
"Create Got PVFS2 handle %pU on fsid %d (ret=%d)\n",
&new_op->downcall.resp.create.refn.khandle,
new_op->downcall.resp.create.refn.fs_id, ret);
if (ret < 0) {
gossip_debug(GOSSIP_NAME_DEBUG,
"%s: failed with error code %d\n",
__func__, ret);
goto out;
}
inode = pvfs2_new_inode(dir->i_sb, dir, S_IFREG | mode, 0,
&new_op->downcall.resp.create.refn);
if (IS_ERR(inode)) {
gossip_err("*** Failed to allocate pvfs2 file inode\n");
ret = PTR_ERR(inode);
goto out;
}
gossip_debug(GOSSIP_NAME_DEBUG,
"Assigned file inode new number of %pU\n",
get_khandle_from_ino(inode));
d_instantiate(dentry, inode);
unlock_new_inode(inode);
gossip_debug(GOSSIP_NAME_DEBUG,
"Inode (Regular File) %pU -> %s\n",
get_khandle_from_ino(inode),
dentry->d_name.name);
SetMtimeFlag(parent);
dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb);
mark_inode_dirty_sync(dir);
ret = 0;
out:
op_release(new_op);
gossip_debug(GOSSIP_NAME_DEBUG, "%s: returning %d\n", __func__, ret);
return ret;
}
/*
* Attempt to resolve an object name (dentry->d_name), parent handle, and
* fsid into a handle for the object.
*/
static struct dentry *pvfs2_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
struct pvfs2_inode_s *parent = PVFS2_I(dir);
struct pvfs2_kernel_op_s *new_op;
struct inode *inode;
struct dentry *res;
int ret = -EINVAL;
/*
* in theory we could skip a lookup here (if the intent is to
* create) in order to avoid a potentially failed lookup, but
* leaving it in can skip a valid lookup and try to create a file
* that already exists (e.g. the vfs already handles checking for
* -EEXIST on O_EXCL opens, which is broken if we skip this lookup
* in the create path)
*/
gossip_debug(GOSSIP_NAME_DEBUG, "%s called on %s\n",
__func__, dentry->d_name.name);
if (dentry->d_name.len > (PVFS2_NAME_LEN - 1))
return ERR_PTR(-ENAMETOOLONG);
new_op = op_alloc(PVFS2_VFS_OP_LOOKUP);
if (!new_op)
return ERR_PTR(-ENOMEM);
new_op->upcall.req.lookup.sym_follow = flags & LOOKUP_FOLLOW;
gossip_debug(GOSSIP_NAME_DEBUG, "%s:%s:%d using parent %pU\n",
__FILE__,
__func__,
__LINE__,
&parent->refn.khandle);
new_op->upcall.req.lookup.parent_refn = parent->refn;
strncpy(new_op->upcall.req.lookup.d_name, dentry->d_name.name,
PVFS2_NAME_LEN);
gossip_debug(GOSSIP_NAME_DEBUG,
"%s: doing lookup on %s under %pU,%d (follow=%s)\n",
__func__,
new_op->upcall.req.lookup.d_name,
&new_op->upcall.req.lookup.parent_refn.khandle,
new_op->upcall.req.lookup.parent_refn.fs_id,
((new_op->upcall.req.lookup.sym_follow ==
PVFS2_LOOKUP_LINK_FOLLOW) ? "yes" : "no"));
ret = service_operation(new_op, __func__, get_interruptible_flag(dir));
gossip_debug(GOSSIP_NAME_DEBUG,
"Lookup Got %pU, fsid %d (ret=%d)\n",
&new_op->downcall.resp.lookup.refn.khandle,
new_op->downcall.resp.lookup.refn.fs_id,
ret);
if (ret < 0) {
if (ret == -ENOENT) {
/*
* if no inode was found, add a negative dentry to
* dcache anyway; if we don't, we don't hold expected
* lookup semantics and we most noticeably break
* during directory renames.
*
* however, if the operation failed or exited, do not
* add the dentry (e.g. in the case that a touch is
* issued on a file that already exists that was
* interrupted during this lookup -- no need to add
* another negative dentry for an existing file)
*/
gossip_debug(GOSSIP_NAME_DEBUG,
"pvfs2_lookup: Adding *negative* dentry "
"%p for %s\n",
dentry,
dentry->d_name.name);
d_add(dentry, NULL);
res = NULL;
goto out;
}
/* must be a non-recoverable error */
res = ERR_PTR(ret);
goto out;
}
inode = pvfs2_iget(dir->i_sb, &new_op->downcall.resp.lookup.refn);
if (IS_ERR(inode)) {
gossip_debug(GOSSIP_NAME_DEBUG,
"error %ld from iget\n", PTR_ERR(inode));
res = ERR_CAST(inode);
goto out;
}
gossip_debug(GOSSIP_NAME_DEBUG,
"%s:%s:%d "
"Found good inode [%lu] with count [%d]\n",
__FILE__,
__func__,
__LINE__,
inode->i_ino,
(int)atomic_read(&inode->i_count));
/* update dentry/inode pair into dcache */
res = d_splice_alias(inode, dentry);
gossip_debug(GOSSIP_NAME_DEBUG,
"Lookup success (inode ct = %d)\n",
(int)atomic_read(&inode->i_count));
out:
op_release(new_op);
return res;
}
/* return 0 on success; non-zero otherwise */
static int pvfs2_unlink(struct inode *dir, struct dentry *dentry)
{
struct inode *inode = dentry->d_inode;
struct pvfs2_inode_s *parent = PVFS2_I(dir);
struct pvfs2_kernel_op_s *new_op;
int ret;
gossip_debug(GOSSIP_NAME_DEBUG,
"%s: called on %s\n"
" (inode %pU): Parent is %pU | fs_id %d\n",
__func__,
dentry->d_name.name,
get_khandle_from_ino(inode),
&parent->refn.khandle,
parent->refn.fs_id);
new_op = op_alloc(PVFS2_VFS_OP_REMOVE);
if (!new_op)
return -ENOMEM;
new_op->upcall.req.remove.parent_refn = parent->refn;
strncpy(new_op->upcall.req.remove.d_name, dentry->d_name.name,
PVFS2_NAME_LEN);
ret = service_operation(new_op, "pvfs2_unlink",
get_interruptible_flag(inode));
/* when request is serviced properly, free req op struct */
op_release(new_op);
if (!ret) {
drop_nlink(inode);
SetMtimeFlag(parent);
dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb);
mark_inode_dirty_sync(dir);
}
return ret;
}
/*
* pvfs2_link() is only implemented here to make sure that we return a
* reasonable error code (the kernel will return a misleading EPERM
* otherwise). PVFS2 does not support hard links.
*/
static int pvfs2_link(struct dentry *old_dentry,
struct inode *dir,
struct dentry *dentry)
{
return -EOPNOTSUPP;
}
/*
* pvfs2_mknod() is only implemented here to make sure that we return a
* reasonable error code (the kernel will return a misleading EPERM
* otherwise). PVFS2 does not support special files such as fifos or devices.
*/
static int pvfs2_mknod(struct inode *dir,
struct dentry *dentry,
umode_t mode,
dev_t rdev)
{
return -EOPNOTSUPP;
}
static int pvfs2_symlink(struct inode *dir,
struct dentry *dentry,
const char *symname)
{
struct pvfs2_inode_s *parent = PVFS2_I(dir);
struct pvfs2_kernel_op_s *new_op;
struct inode *inode;
int mode = 755;
int ret;
gossip_debug(GOSSIP_NAME_DEBUG, "%s: called\n", __func__);
if (!symname)
return -EINVAL;
new_op = op_alloc(PVFS2_VFS_OP_SYMLINK);
if (!new_op)
return -ENOMEM;
new_op->upcall.req.sym.parent_refn = parent->refn;
fill_default_sys_attrs(new_op->upcall.req.sym.attributes,
PVFS_TYPE_SYMLINK,
mode);
strncpy(new_op->upcall.req.sym.entry_name,
dentry->d_name.name,
PVFS2_NAME_LEN);
strncpy(new_op->upcall.req.sym.target, symname, PVFS2_NAME_LEN);
ret = service_operation(new_op, __func__, get_interruptible_flag(dir));
gossip_debug(GOSSIP_NAME_DEBUG,
"Symlink Got PVFS2 handle %pU on fsid %d (ret=%d)\n",
&new_op->downcall.resp.sym.refn.khandle,
new_op->downcall.resp.sym.refn.fs_id, ret);
if (ret < 0) {
gossip_debug(GOSSIP_NAME_DEBUG,
"%s: failed with error code %d\n",
__func__, ret);
goto out;
}
inode = pvfs2_new_inode(dir->i_sb, dir, S_IFLNK | mode, 0,
&new_op->downcall.resp.sym.refn);
if (IS_ERR(inode)) {
gossip_err
("*** Failed to allocate pvfs2 symlink inode\n");
ret = PTR_ERR(inode);
goto out;
}
gossip_debug(GOSSIP_NAME_DEBUG,
"Assigned symlink inode new number of %pU\n",
get_khandle_from_ino(inode));
d_instantiate(dentry, inode);
unlock_new_inode(inode);
gossip_debug(GOSSIP_NAME_DEBUG,
"Inode (Symlink) %pU -> %s\n",
get_khandle_from_ino(inode),
dentry->d_name.name);
SetMtimeFlag(parent);
dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb);
mark_inode_dirty_sync(dir);
ret = 0;
out:
op_release(new_op);
return ret;
}
static int pvfs2_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
struct pvfs2_inode_s *parent = PVFS2_I(dir);
struct pvfs2_kernel_op_s *new_op;
struct inode *inode;
int ret;
new_op = op_alloc(PVFS2_VFS_OP_MKDIR);
if (!new_op)
return -ENOMEM;
new_op->upcall.req.mkdir.parent_refn = parent->refn;
fill_default_sys_attrs(new_op->upcall.req.mkdir.attributes,
PVFS_TYPE_DIRECTORY, mode);
strncpy(new_op->upcall.req.mkdir.d_name,
dentry->d_name.name, PVFS2_NAME_LEN);
ret = service_operation(new_op, __func__, get_interruptible_flag(dir));
gossip_debug(GOSSIP_NAME_DEBUG,
"Mkdir Got PVFS2 handle %pU on fsid %d\n",
&new_op->downcall.resp.mkdir.refn.khandle,
new_op->downcall.resp.mkdir.refn.fs_id);
if (ret < 0) {
gossip_debug(GOSSIP_NAME_DEBUG,
"%s: failed with error code %d\n",
__func__, ret);
goto out;
}
inode = pvfs2_new_inode(dir->i_sb, dir, S_IFDIR | mode, 0,
&new_op->downcall.resp.mkdir.refn);
if (IS_ERR(inode)) {
gossip_err("*** Failed to allocate pvfs2 dir inode\n");
ret = PTR_ERR(inode);
goto out;
}
gossip_debug(GOSSIP_NAME_DEBUG,
"Assigned dir inode new number of %pU\n",
get_khandle_from_ino(inode));
d_instantiate(dentry, inode);
unlock_new_inode(inode);
gossip_debug(GOSSIP_NAME_DEBUG,
"Inode (Directory) %pU -> %s\n",
get_khandle_from_ino(inode),
dentry->d_name.name);
/*
* NOTE: we have no good way to keep nlink consistent for directories
* across clients; keep constant at 1.
*/
SetMtimeFlag(parent);
dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb);
mark_inode_dirty_sync(dir);
out:
op_release(new_op);
return ret;
}
static int pvfs2_rename(struct inode *old_dir,
struct dentry *old_dentry,
struct inode *new_dir,
struct dentry *new_dentry)
{
struct pvfs2_kernel_op_s *new_op;
int ret;
gossip_debug(GOSSIP_NAME_DEBUG,
"pvfs2_rename: called (%s/%s => %s/%s) ct=%d\n",
old_dentry->d_parent->d_name.name,
old_dentry->d_name.name,
new_dentry->d_parent->d_name.name,
new_dentry->d_name.name,
d_count(new_dentry));
new_op = op_alloc(PVFS2_VFS_OP_RENAME);
if (!new_op)
return -EINVAL;
new_op->upcall.req.rename.old_parent_refn = PVFS2_I(old_dir)->refn;
new_op->upcall.req.rename.new_parent_refn = PVFS2_I(new_dir)->refn;
strncpy(new_op->upcall.req.rename.d_old_name,
old_dentry->d_name.name,
PVFS2_NAME_LEN);
strncpy(new_op->upcall.req.rename.d_new_name,
new_dentry->d_name.name,
PVFS2_NAME_LEN);
ret = service_operation(new_op,
"pvfs2_rename",
get_interruptible_flag(old_dentry->d_inode));
gossip_debug(GOSSIP_NAME_DEBUG,
"pvfs2_rename: got downcall status %d\n",
ret);
if (new_dentry->d_inode)
new_dentry->d_inode->i_ctime = CURRENT_TIME;
op_release(new_op);
return ret;
}
/* PVFS2 implementation of VFS inode operations for directories */
struct inode_operations pvfs2_dir_inode_operations = {
.lookup = pvfs2_lookup,
.get_acl = pvfs2_get_acl,
.set_acl = pvfs2_set_acl,
.create = pvfs2_create,
.link = pvfs2_link,
.unlink = pvfs2_unlink,
.symlink = pvfs2_symlink,
.mkdir = pvfs2_mkdir,
.rmdir = pvfs2_unlink,
.mknod = pvfs2_mknod,
.rename = pvfs2_rename,
.setattr = pvfs2_setattr,
.getattr = pvfs2_getattr,
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.removexattr = generic_removexattr,
.listxattr = pvfs2_listxattr,
};

970
fs/orangefs/pvfs2-bufmap.c Normal file
Просмотреть файл

@ -0,0 +1,970 @@
/*
* (C) 2001 Clemson University and The University of Chicago
*
* See COPYING in top-level directory.
*/
#include "protocol.h"
#include "pvfs2-kernel.h"
#include "pvfs2-bufmap.h"
DECLARE_WAIT_QUEUE_HEAD(pvfs2_bufmap_init_waitq);
struct pvfs2_bufmap {
atomic_t refcnt;
int desc_size;
int desc_shift;
int desc_count;
int total_size;
int page_count;
struct page **page_array;
struct pvfs_bufmap_desc *desc_array;
/* array to track usage of buffer descriptors */
int *buffer_index_array;
spinlock_t buffer_index_lock;
/* array to track usage of buffer descriptors for readdir */
int readdir_index_array[PVFS2_READDIR_DEFAULT_DESC_COUNT];
spinlock_t readdir_index_lock;
} *__pvfs2_bufmap;
static DEFINE_SPINLOCK(pvfs2_bufmap_lock);
static void
pvfs2_bufmap_unmap(struct pvfs2_bufmap *bufmap)
{
int i;
for (i = 0; i < bufmap->page_count; i++)
page_cache_release(bufmap->page_array[i]);
}
static void
pvfs2_bufmap_free(struct pvfs2_bufmap *bufmap)
{
kfree(bufmap->page_array);
kfree(bufmap->desc_array);
kfree(bufmap->buffer_index_array);
kfree(bufmap);
}
struct pvfs2_bufmap *pvfs2_bufmap_ref(void)
{
struct pvfs2_bufmap *bufmap = NULL;
spin_lock(&pvfs2_bufmap_lock);
if (__pvfs2_bufmap) {
bufmap = __pvfs2_bufmap;
atomic_inc(&bufmap->refcnt);
}
spin_unlock(&pvfs2_bufmap_lock);
return bufmap;
}
void pvfs2_bufmap_unref(struct pvfs2_bufmap *bufmap)
{
if (atomic_dec_and_lock(&bufmap->refcnt, &pvfs2_bufmap_lock)) {
__pvfs2_bufmap = NULL;
spin_unlock(&pvfs2_bufmap_lock);
pvfs2_bufmap_unmap(bufmap);
pvfs2_bufmap_free(bufmap);
}
}
inline int pvfs_bufmap_size_query(void)
{
struct pvfs2_bufmap *bufmap = pvfs2_bufmap_ref();
int size = bufmap ? bufmap->desc_size : 0;
pvfs2_bufmap_unref(bufmap);
return size;
}
inline int pvfs_bufmap_shift_query(void)
{
struct pvfs2_bufmap *bufmap = pvfs2_bufmap_ref();
int shift = bufmap ? bufmap->desc_shift : 0;
pvfs2_bufmap_unref(bufmap);
return shift;
}
static DECLARE_WAIT_QUEUE_HEAD(bufmap_waitq);
static DECLARE_WAIT_QUEUE_HEAD(readdir_waitq);
/*
* get_bufmap_init
*
* If bufmap_init is 1, then the shared memory system, including the
* buffer_index_array, is available. Otherwise, it is not.
*
* returns the value of bufmap_init
*/
int get_bufmap_init(void)
{
return __pvfs2_bufmap ? 1 : 0;
}
static struct pvfs2_bufmap *
pvfs2_bufmap_alloc(struct PVFS_dev_map_desc *user_desc)
{
struct pvfs2_bufmap *bufmap;
bufmap = kzalloc(sizeof(*bufmap), GFP_KERNEL);
if (!bufmap)
goto out;
atomic_set(&bufmap->refcnt, 1);
bufmap->total_size = user_desc->total_size;
bufmap->desc_count = user_desc->count;
bufmap->desc_size = user_desc->size;
bufmap->desc_shift = ilog2(bufmap->desc_size);
spin_lock_init(&bufmap->buffer_index_lock);
bufmap->buffer_index_array =
kcalloc(bufmap->desc_count, sizeof(int), GFP_KERNEL);
if (!bufmap->buffer_index_array) {
gossip_err("pvfs2: could not allocate %d buffer indices\n",
bufmap->desc_count);
goto out_free_bufmap;
}
spin_lock_init(&bufmap->readdir_index_lock);
bufmap->desc_array =
kcalloc(bufmap->desc_count, sizeof(struct pvfs_bufmap_desc),
GFP_KERNEL);
if (!bufmap->desc_array) {
gossip_err("pvfs2: could not allocate %d descriptors\n",
bufmap->desc_count);
goto out_free_index_array;
}
bufmap->page_count = bufmap->total_size / PAGE_SIZE;
/* allocate storage to track our page mappings */
bufmap->page_array =
kcalloc(bufmap->page_count, sizeof(struct page *), GFP_KERNEL);
if (!bufmap->page_array)
goto out_free_desc_array;
return bufmap;
out_free_desc_array:
kfree(bufmap->desc_array);
out_free_index_array:
kfree(bufmap->buffer_index_array);
out_free_bufmap:
kfree(bufmap);
out:
return NULL;
}
static int
pvfs2_bufmap_map(struct pvfs2_bufmap *bufmap,
struct PVFS_dev_map_desc *user_desc)
{
int pages_per_desc = bufmap->desc_size / PAGE_SIZE;
int offset = 0, ret, i;
/* map the pages */
down_write(&current->mm->mmap_sem);
ret = get_user_pages(current,
current->mm,
(unsigned long)user_desc->ptr,
bufmap->page_count,
1,
0,
bufmap->page_array,
NULL);
up_write(&current->mm->mmap_sem);
if (ret < 0)
return ret;
if (ret != bufmap->page_count) {
gossip_err("pvfs2 error: asked for %d pages, only got %d.\n",
bufmap->page_count, ret);
for (i = 0; i < ret; i++) {
SetPageError(bufmap->page_array[i]);
page_cache_release(bufmap->page_array[i]);
}
return -ENOMEM;
}
/*
* ideally we want to get kernel space pointers for each page, but
* we can't kmap that many pages at once if highmem is being used.
* so instead, we just kmap/kunmap the page address each time the
* kaddr is needed.
*/
for (i = 0; i < bufmap->page_count; i++)
flush_dcache_page(bufmap->page_array[i]);
/* build a list of available descriptors */
for (offset = 0, i = 0; i < bufmap->desc_count; i++) {
bufmap->desc_array[i].page_array = &bufmap->page_array[offset];
bufmap->desc_array[i].array_count = pages_per_desc;
bufmap->desc_array[i].uaddr =
(user_desc->ptr + (i * pages_per_desc * PAGE_SIZE));
offset += pages_per_desc;
}
return 0;
}
/*
* pvfs_bufmap_initialize()
*
* initializes the mapped buffer interface
*
* returns 0 on success, -errno on failure
*/
int pvfs_bufmap_initialize(struct PVFS_dev_map_desc *user_desc)
{
struct pvfs2_bufmap *bufmap;
int ret = -EINVAL;
gossip_debug(GOSSIP_BUFMAP_DEBUG,
"pvfs_bufmap_initialize: called (ptr ("
"%p) sz (%d) cnt(%d).\n",
user_desc->ptr,
user_desc->size,
user_desc->count);
/*
* sanity check alignment and size of buffer that caller wants to
* work with
*/
if (PAGE_ALIGN((unsigned long)user_desc->ptr) !=
(unsigned long)user_desc->ptr) {
gossip_err("pvfs2 error: memory alignment (front). %p\n",
user_desc->ptr);
goto out;
}
if (PAGE_ALIGN(((unsigned long)user_desc->ptr + user_desc->total_size))
!= (unsigned long)(user_desc->ptr + user_desc->total_size)) {
gossip_err("pvfs2 error: memory alignment (back).(%p + %d)\n",
user_desc->ptr,
user_desc->total_size);
goto out;
}
if (user_desc->total_size != (user_desc->size * user_desc->count)) {
gossip_err("pvfs2 error: user provided an oddly sized buffer: (%d, %d, %d)\n",
user_desc->total_size,
user_desc->size,
user_desc->count);
goto out;
}
if ((user_desc->size % PAGE_SIZE) != 0) {
gossip_err("pvfs2 error: bufmap size not page size divisible (%d).\n",
user_desc->size);
goto out;
}
ret = -ENOMEM;
bufmap = pvfs2_bufmap_alloc(user_desc);
if (!bufmap)
goto out;
ret = pvfs2_bufmap_map(bufmap, user_desc);
if (ret)
goto out_free_bufmap;
spin_lock(&pvfs2_bufmap_lock);
if (__pvfs2_bufmap) {
spin_unlock(&pvfs2_bufmap_lock);
gossip_err("pvfs2: error: bufmap already initialized.\n");
ret = -EALREADY;
goto out_unmap_bufmap;
}
__pvfs2_bufmap = bufmap;
spin_unlock(&pvfs2_bufmap_lock);
/*
* If there are operations in pvfs2_bufmap_init_waitq, wake them up.
* This scenario occurs when the client-core is restarted and I/O
* requests in the in-progress or waiting tables are restarted. I/O
* requests cannot be restarted until the shared memory system is
* completely re-initialized, so we put the I/O requests in this
* waitq until initialization has completed. NOTE: the I/O requests
* are also on a timer, so they don't wait forever just in case the
* client-core doesn't come back up.
*/
wake_up_interruptible(&pvfs2_bufmap_init_waitq);
gossip_debug(GOSSIP_BUFMAP_DEBUG,
"pvfs_bufmap_initialize: exiting normally\n");
return 0;
out_unmap_bufmap:
pvfs2_bufmap_unmap(bufmap);
out_free_bufmap:
pvfs2_bufmap_free(bufmap);
out:
return ret;
}
/*
* pvfs_bufmap_finalize()
*
* shuts down the mapped buffer interface and releases any resources
* associated with it
*
* no return value
*/
void pvfs_bufmap_finalize(void)
{
gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs2_bufmap_finalize: called\n");
BUG_ON(!__pvfs2_bufmap);
pvfs2_bufmap_unref(__pvfs2_bufmap);
gossip_debug(GOSSIP_BUFMAP_DEBUG,
"pvfs2_bufmap_finalize: exiting normally\n");
}
struct slot_args {
int slot_count;
int *slot_array;
spinlock_t *slot_lock;
wait_queue_head_t *slot_wq;
};
static int wait_for_a_slot(struct slot_args *slargs, int *buffer_index)
{
int ret = -1;
int i = 0;
DECLARE_WAITQUEUE(my_wait, current);
add_wait_queue_exclusive(slargs->slot_wq, &my_wait);
while (1) {
set_current_state(TASK_INTERRUPTIBLE);
/*
* check for available desc, slot_lock is the appropriate
* index_lock
*/
spin_lock(slargs->slot_lock);
for (i = 0; i < slargs->slot_count; i++)
if (slargs->slot_array[i] == 0) {
slargs->slot_array[i] = 1;
*buffer_index = i;
ret = 0;
break;
}
spin_unlock(slargs->slot_lock);
/* if we acquired a buffer, then break out of while */
if (ret == 0)
break;
if (!signal_pending(current)) {
int timeout =
MSECS_TO_JIFFIES(1000 * slot_timeout_secs);
gossip_debug(GOSSIP_BUFMAP_DEBUG,
"[BUFMAP]: waiting %d "
"seconds for a slot\n",
slot_timeout_secs);
if (!schedule_timeout(timeout)) {
gossip_debug(GOSSIP_BUFMAP_DEBUG,
"*** wait_for_a_slot timed out\n");
ret = -ETIMEDOUT;
break;
}
gossip_debug(GOSSIP_BUFMAP_DEBUG,
"[BUFMAP]: woken up by a slot becoming available.\n");
continue;
}
gossip_debug(GOSSIP_BUFMAP_DEBUG, "pvfs2: %s interrupted.\n",
__func__);
ret = -EINTR;
break;
}
set_current_state(TASK_RUNNING);
remove_wait_queue(slargs->slot_wq, &my_wait);
return ret;
}
static void put_back_slot(struct slot_args *slargs, int buffer_index)
{
/* slot_lock is the appropriate index_lock */
spin_lock(slargs->slot_lock);
if (buffer_index < 0 || buffer_index >= slargs->slot_count) {
spin_unlock(slargs->slot_lock);
return;
}
/* put the desc back on the queue */
slargs->slot_array[buffer_index] = 0;
spin_unlock(slargs->slot_lock);
/* wake up anyone who may be sleeping on the queue */
wake_up_interruptible(slargs->slot_wq);
}
/*
* pvfs_bufmap_get()
*
* gets a free mapped buffer descriptor, will sleep until one becomes
* available if necessary
*
* returns 0 on success, -errno on failure
*/
int pvfs_bufmap_get(struct pvfs2_bufmap **mapp, int *buffer_index)
{
struct pvfs2_bufmap *bufmap = pvfs2_bufmap_ref();
struct slot_args slargs;
int ret;
if (!bufmap) {
gossip_err("pvfs2: please confirm that pvfs2-client daemon is running.\n");
return -EIO;
}
slargs.slot_count = bufmap->desc_count;
slargs.slot_array = bufmap->buffer_index_array;
slargs.slot_lock = &bufmap->buffer_index_lock;
slargs.slot_wq = &bufmap_waitq;
ret = wait_for_a_slot(&slargs, buffer_index);
if (ret)
pvfs2_bufmap_unref(bufmap);
*mapp = bufmap;
return ret;
}
/*
* pvfs_bufmap_put()
*
* returns a mapped buffer descriptor to the collection
*
* no return value
*/
void pvfs_bufmap_put(struct pvfs2_bufmap *bufmap, int buffer_index)
{
struct slot_args slargs;
slargs.slot_count = bufmap->desc_count;
slargs.slot_array = bufmap->buffer_index_array;
slargs.slot_lock = &bufmap->buffer_index_lock;
slargs.slot_wq = &bufmap_waitq;
put_back_slot(&slargs, buffer_index);
pvfs2_bufmap_unref(bufmap);
}
/*
* readdir_index_get()
*
* gets a free descriptor, will sleep until one becomes
* available if necessary.
* Although the readdir buffers are not mapped into kernel space
* we could do that at a later point of time. Regardless, these
* indices are used by the client-core.
*
* returns 0 on success, -errno on failure
*/
int readdir_index_get(struct pvfs2_bufmap **mapp, int *buffer_index)
{
struct pvfs2_bufmap *bufmap = pvfs2_bufmap_ref();
struct slot_args slargs;
int ret;
if (!bufmap) {
gossip_err("pvfs2: please confirm that pvfs2-client daemon is running.\n");
return -EIO;
}
slargs.slot_count = PVFS2_READDIR_DEFAULT_DESC_COUNT;
slargs.slot_array = bufmap->readdir_index_array;
slargs.slot_lock = &bufmap->readdir_index_lock;
slargs.slot_wq = &readdir_waitq;
ret = wait_for_a_slot(&slargs, buffer_index);
if (ret)
pvfs2_bufmap_unref(bufmap);
*mapp = bufmap;
return ret;
}
void readdir_index_put(struct pvfs2_bufmap *bufmap, int buffer_index)
{
struct slot_args slargs;
slargs.slot_count = PVFS2_READDIR_DEFAULT_DESC_COUNT;
slargs.slot_array = bufmap->readdir_index_array;
slargs.slot_lock = &bufmap->readdir_index_lock;
slargs.slot_wq = &readdir_waitq;
put_back_slot(&slargs, buffer_index);
pvfs2_bufmap_unref(bufmap);
}
/*
* pvfs_bufmap_copy_iovec_from_user()
*
* copies data from several user space address's in an iovec
* to a mapped buffer
*
* Note that the mapped buffer is a series of pages and therefore
* the copies have to be split by PAGE_SIZE bytes at a time.
* Note that this routine checks that summation of iov_len
* across all the elements of iov is equal to size.
*
* returns 0 on success, -errno on failure
*/
int pvfs_bufmap_copy_iovec_from_user(struct pvfs2_bufmap *bufmap,
int buffer_index,
const struct iovec *iov,
unsigned long nr_segs,
size_t size)
{
size_t ret = 0;
size_t amt_copied = 0;
size_t cur_copy_size = 0;
unsigned int to_page_offset = 0;
unsigned int to_page_index = 0;
void *to_kaddr = NULL;
void __user *from_addr = NULL;
struct iovec *copied_iovec = NULL;
struct pvfs_bufmap_desc *to;
unsigned int seg;
char *tmp_printer = NULL;
int tmp_int = 0;
gossip_debug(GOSSIP_BUFMAP_DEBUG,
"pvfs_bufmap_copy_iovec_from_user: index %d, "
"size %zd\n",
buffer_index,
size);
to = &bufmap->desc_array[buffer_index];
/*
* copy the passed in iovec so that we can change some of its fields
*/
copied_iovec = kmalloc_array(nr_segs,
sizeof(*copied_iovec),
PVFS2_BUFMAP_GFP_FLAGS);
if (copied_iovec == NULL)
return -ENOMEM;
memcpy(copied_iovec, iov, nr_segs * sizeof(*copied_iovec));
/*
* Go through each segment in the iovec and make sure that
* the summation of iov_len matches the given size.
*/
for (seg = 0, amt_copied = 0; seg < nr_segs; seg++)
amt_copied += copied_iovec[seg].iov_len;
if (amt_copied != size) {
gossip_err(
"pvfs2_bufmap_copy_iovec_from_user: computed total ("
"%zd) is not equal to (%zd)\n",
amt_copied,
size);
kfree(copied_iovec);
return -EINVAL;
}
to_page_index = 0;
to_page_offset = 0;
amt_copied = 0;
seg = 0;
/*
* Go through each segment in the iovec and copy its
* buffer into the mapped buffer one page at a time though
*/
while (amt_copied < size) {
struct iovec *iv = &copied_iovec[seg];
int inc_to_page_index;
if (iv->iov_len < (PAGE_SIZE - to_page_offset)) {
cur_copy_size =
PVFS_util_min(iv->iov_len, size - amt_copied);
seg++;
from_addr = iv->iov_base;
inc_to_page_index = 0;
} else if (iv->iov_len == (PAGE_SIZE - to_page_offset)) {
cur_copy_size =
PVFS_util_min(iv->iov_len, size - amt_copied);
seg++;
from_addr = iv->iov_base;
inc_to_page_index = 1;
} else {
cur_copy_size =
PVFS_util_min(PAGE_SIZE - to_page_offset,
size - amt_copied);
from_addr = iv->iov_base;
iv->iov_base += cur_copy_size;
iv->iov_len -= cur_copy_size;
inc_to_page_index = 1;
}
to_kaddr = pvfs2_kmap(to->page_array[to_page_index]);
ret =
copy_from_user(to_kaddr + to_page_offset,
from_addr,
cur_copy_size);
if (!PageReserved(to->page_array[to_page_index]))
SetPageDirty(to->page_array[to_page_index]);
if (!tmp_printer) {
tmp_printer = (char *)(to_kaddr + to_page_offset);
tmp_int += tmp_printer[0];
gossip_debug(GOSSIP_BUFMAP_DEBUG,
"First character (integer value) in pvfs_bufmap_copy_from_user: %d\n",
tmp_int);
}
pvfs2_kunmap(to->page_array[to_page_index]);
if (ret) {
gossip_err("Failed to copy data from user space\n");
kfree(copied_iovec);
return -EFAULT;
}
amt_copied += cur_copy_size;
if (inc_to_page_index) {
to_page_offset = 0;
to_page_index++;
} else {
to_page_offset += cur_copy_size;
}
}
kfree(copied_iovec);
return 0;
}
/*
* pvfs_bufmap_copy_iovec_from_kernel()
*
* copies data from several kernel space address's in an iovec
* to a mapped buffer
*
* Note that the mapped buffer is a series of pages and therefore
* the copies have to be split by PAGE_SIZE bytes at a time.
* Note that this routine checks that summation of iov_len
* across all the elements of iov is equal to size.
*
* returns 0 on success, -errno on failure
*/
int pvfs_bufmap_copy_iovec_from_kernel(struct pvfs2_bufmap *bufmap,
int buffer_index, const struct iovec *iov,
unsigned long nr_segs, size_t size)
{
size_t amt_copied = 0;
size_t cur_copy_size = 0;
int to_page_index = 0;
void *to_kaddr = NULL;
void *from_kaddr = NULL;
struct iovec *copied_iovec = NULL;
struct pvfs_bufmap_desc *to;
unsigned int seg;
unsigned to_page_offset = 0;
gossip_debug(GOSSIP_BUFMAP_DEBUG,
"pvfs_bufmap_copy_iovec_from_kernel: index %d, "
"size %zd\n",
buffer_index,
size);
to = &bufmap->desc_array[buffer_index];
/*
* copy the passed in iovec so that we can change some of its fields
*/
copied_iovec = kmalloc_array(nr_segs,
sizeof(*copied_iovec),
PVFS2_BUFMAP_GFP_FLAGS);
if (copied_iovec == NULL)
return -ENOMEM;
memcpy(copied_iovec, iov, nr_segs * sizeof(*copied_iovec));
/*
* Go through each segment in the iovec and make sure that
* the summation of iov_len matches the given size.
*/
for (seg = 0, amt_copied = 0; seg < nr_segs; seg++)
amt_copied += copied_iovec[seg].iov_len;
if (amt_copied != size) {
gossip_err("pvfs2_bufmap_copy_iovec_from_kernel: computed total(%zd) is not equal to (%zd)\n",
amt_copied,
size);
kfree(copied_iovec);
return -EINVAL;
}
to_page_index = 0;
amt_copied = 0;
seg = 0;
to_page_offset = 0;
/*
* Go through each segment in the iovec and copy its
* buffer into the mapped buffer one page at a time though
*/
while (amt_copied < size) {
struct iovec *iv = &copied_iovec[seg];
int inc_to_page_index;
if (iv->iov_len < (PAGE_SIZE - to_page_offset)) {
cur_copy_size =
PVFS_util_min(iv->iov_len, size - amt_copied);
seg++;
from_kaddr = iv->iov_base;
inc_to_page_index = 0;
} else if (iv->iov_len == (PAGE_SIZE - to_page_offset)) {
cur_copy_size =
PVFS_util_min(iv->iov_len, size - amt_copied);
seg++;
from_kaddr = iv->iov_base;
inc_to_page_index = 1;
} else {
cur_copy_size =
PVFS_util_min(PAGE_SIZE - to_page_offset,
size - amt_copied);
from_kaddr = iv->iov_base;
iv->iov_base += cur_copy_size;
iv->iov_len -= cur_copy_size;
inc_to_page_index = 1;
}
to_kaddr = pvfs2_kmap(to->page_array[to_page_index]);
memcpy(to_kaddr + to_page_offset, from_kaddr, cur_copy_size);
if (!PageReserved(to->page_array[to_page_index]))
SetPageDirty(to->page_array[to_page_index]);
pvfs2_kunmap(to->page_array[to_page_index]);
amt_copied += cur_copy_size;
if (inc_to_page_index) {
to_page_offset = 0;
to_page_index++;
} else {
to_page_offset += cur_copy_size;
}
}
kfree(copied_iovec);
return 0;
}
/*
* pvfs_bufmap_copy_to_user_iovec()
*
* copies data to several user space address's in an iovec
* from a mapped buffer
*
* returns 0 on success, -errno on failure
*/
int pvfs_bufmap_copy_to_user_iovec(struct pvfs2_bufmap *bufmap,
int buffer_index, const struct iovec *iov,
unsigned long nr_segs, size_t size)
{
size_t ret = 0;
size_t amt_copied = 0;
size_t cur_copy_size = 0;
int from_page_index = 0;
void *from_kaddr = NULL;
void __user *to_addr = NULL;
struct iovec *copied_iovec = NULL;
struct pvfs_bufmap_desc *from;
unsigned int seg;
unsigned from_page_offset = 0;
char *tmp_printer = NULL;
int tmp_int = 0;
gossip_debug(GOSSIP_BUFMAP_DEBUG,
"pvfs_bufmap_copy_to_user_iovec: index %d, size %zd\n",
buffer_index,
size);
from = &bufmap->desc_array[buffer_index];
/*
* copy the passed in iovec so that we can change some of its fields
*/
copied_iovec = kmalloc_array(nr_segs,
sizeof(*copied_iovec),
PVFS2_BUFMAP_GFP_FLAGS);
if (copied_iovec == NULL)
return -ENOMEM;
memcpy(copied_iovec, iov, nr_segs * sizeof(*copied_iovec));
/*
* Go through each segment in the iovec and make sure that
* the summation of iov_len is greater than the given size.
*/
for (seg = 0, amt_copied = 0; seg < nr_segs; seg++)
amt_copied += copied_iovec[seg].iov_len;
if (amt_copied < size) {
gossip_err("pvfs2_bufmap_copy_to_user_iovec: computed total (%zd) is less than (%zd)\n",
amt_copied,
size);
kfree(copied_iovec);
return -EINVAL;
}
from_page_index = 0;
amt_copied = 0;
seg = 0;
from_page_offset = 0;
/*
* Go through each segment in the iovec and copy from the mapper buffer,
* but make sure that we do so one page at a time.
*/
while (amt_copied < size) {
struct iovec *iv = &copied_iovec[seg];
int inc_from_page_index;
if (iv->iov_len < (PAGE_SIZE - from_page_offset)) {
cur_copy_size =
PVFS_util_min(iv->iov_len, size - amt_copied);
seg++;
to_addr = iv->iov_base;
inc_from_page_index = 0;
} else if (iv->iov_len == (PAGE_SIZE - from_page_offset)) {
cur_copy_size =
PVFS_util_min(iv->iov_len, size - amt_copied);
seg++;
to_addr = iv->iov_base;
inc_from_page_index = 1;
} else {
cur_copy_size =
PVFS_util_min(PAGE_SIZE - from_page_offset,
size - amt_copied);
to_addr = iv->iov_base;
iv->iov_base += cur_copy_size;
iv->iov_len -= cur_copy_size;
inc_from_page_index = 1;
}
from_kaddr = pvfs2_kmap(from->page_array[from_page_index]);
if (!tmp_printer) {
tmp_printer = (char *)(from_kaddr + from_page_offset);
tmp_int += tmp_printer[0];
gossip_debug(GOSSIP_BUFMAP_DEBUG,
"First character (integer value) in pvfs_bufmap_copy_to_user_iovec: %d\n",
tmp_int);
}
ret =
copy_to_user(to_addr,
from_kaddr + from_page_offset,
cur_copy_size);
pvfs2_kunmap(from->page_array[from_page_index]);
if (ret) {
gossip_err("Failed to copy data to user space\n");
kfree(copied_iovec);
return -EFAULT;
}
amt_copied += cur_copy_size;
if (inc_from_page_index) {
from_page_offset = 0;
from_page_index++;
} else {
from_page_offset += cur_copy_size;
}
}
kfree(copied_iovec);
return 0;
}
/*
* pvfs_bufmap_copy_to_kernel_iovec()
*
* copies data to several kernel space address's in an iovec
* from a mapped buffer
*
* returns 0 on success, -errno on failure
*/
int pvfs_bufmap_copy_to_kernel_iovec(struct pvfs2_bufmap *bufmap,
int buffer_index, const struct iovec *iov,
unsigned long nr_segs, size_t size)
{
size_t amt_copied = 0;
size_t cur_copy_size = 0;
int from_page_index = 0;
void *from_kaddr = NULL;
void *to_kaddr = NULL;
struct iovec *copied_iovec = NULL;
struct pvfs_bufmap_desc *from;
unsigned int seg;
unsigned int from_page_offset = 0;
gossip_debug(GOSSIP_BUFMAP_DEBUG,
"pvfs_bufmap_copy_to_kernel_iovec: index %d, size %zd\n",
buffer_index,
size);
from = &bufmap->desc_array[buffer_index];
/*
* copy the passed in iovec so that we can change some of its fields
*/
copied_iovec = kmalloc_array(nr_segs,
sizeof(*copied_iovec),
PVFS2_BUFMAP_GFP_FLAGS);
if (copied_iovec == NULL)
return -ENOMEM;
memcpy(copied_iovec, iov, nr_segs * sizeof(*copied_iovec));
/*
* Go through each segment in the iovec and make sure that
* the summation of iov_len is greater than the given size.
*/
for (seg = 0, amt_copied = 0; seg < nr_segs; seg++)
amt_copied += copied_iovec[seg].iov_len;
if (amt_copied < size) {
gossip_err("pvfs2_bufmap_copy_to_kernel_iovec: computed total (%zd) is less than (%zd)\n",
amt_copied,
size);
kfree(copied_iovec);
return -EINVAL;
}
from_page_index = 0;
amt_copied = 0;
seg = 0;
from_page_offset = 0;
/*
* Go through each segment in the iovec and copy from the mapper buffer,
* but make sure that we do so one page at a time.
*/
while (amt_copied < size) {
struct iovec *iv = &copied_iovec[seg];
int inc_from_page_index;
if (iv->iov_len < (PAGE_SIZE - from_page_offset)) {
cur_copy_size =
PVFS_util_min(iv->iov_len, size - amt_copied);
seg++;
to_kaddr = iv->iov_base;
inc_from_page_index = 0;
} else if (iv->iov_len == (PAGE_SIZE - from_page_offset)) {
cur_copy_size =
PVFS_util_min(iv->iov_len, size - amt_copied);
seg++;
to_kaddr = iv->iov_base;
inc_from_page_index = 1;
} else {
cur_copy_size =
PVFS_util_min(PAGE_SIZE - from_page_offset,
size - amt_copied);
to_kaddr = iv->iov_base;
iv->iov_base += cur_copy_size;
iv->iov_len -= cur_copy_size;
inc_from_page_index = 1;
}
from_kaddr = pvfs2_kmap(from->page_array[from_page_index]);
memcpy(to_kaddr, from_kaddr + from_page_offset, cur_copy_size);
pvfs2_kunmap(from->page_array[from_page_index]);
amt_copied += cur_copy_size;
if (inc_from_page_index) {
from_page_offset = 0;
from_page_index++;
} else {
from_page_offset += cur_copy_size;
}
}
kfree(copied_iovec);
return 0;
}

260
fs/orangefs/pvfs2-cache.c Normal file
Просмотреть файл

@ -0,0 +1,260 @@
/*
* (C) 2001 Clemson University and The University of Chicago
*
* See COPYING in top-level directory.
*/
#include "protocol.h"
#include "pvfs2-kernel.h"
/* tags assigned to kernel upcall operations */
static __u64 next_tag_value;
static DEFINE_SPINLOCK(next_tag_value_lock);
/* the pvfs2 memory caches */
/* a cache for pvfs2 upcall/downcall operations */
static struct kmem_cache *op_cache;
/* a cache for device (/dev/pvfs2-req) communication */
static struct kmem_cache *dev_req_cache;
/* a cache for pvfs2_kiocb objects (i.e pvfs2 iocb structures ) */
static struct kmem_cache *pvfs2_kiocb_cache;
int op_cache_initialize(void)
{
op_cache = kmem_cache_create("pvfs2_op_cache",
sizeof(struct pvfs2_kernel_op_s),
0,
PVFS2_CACHE_CREATE_FLAGS,
NULL);
if (!op_cache) {
gossip_err("Cannot create pvfs2_op_cache\n");
return -ENOMEM;
}
/* initialize our atomic tag counter */
spin_lock(&next_tag_value_lock);
next_tag_value = 100;
spin_unlock(&next_tag_value_lock);
return 0;
}
int op_cache_finalize(void)
{
kmem_cache_destroy(op_cache);
return 0;
}
char *get_opname_string(struct pvfs2_kernel_op_s *new_op)
{
if (new_op) {
__s32 type = new_op->upcall.type;
if (type == PVFS2_VFS_OP_FILE_IO)
return "OP_FILE_IO";
else if (type == PVFS2_VFS_OP_LOOKUP)
return "OP_LOOKUP";
else if (type == PVFS2_VFS_OP_CREATE)
return "OP_CREATE";
else if (type == PVFS2_VFS_OP_GETATTR)
return "OP_GETATTR";
else if (type == PVFS2_VFS_OP_REMOVE)
return "OP_REMOVE";
else if (type == PVFS2_VFS_OP_MKDIR)
return "OP_MKDIR";
else if (type == PVFS2_VFS_OP_READDIR)
return "OP_READDIR";
else if (type == PVFS2_VFS_OP_READDIRPLUS)
return "OP_READDIRPLUS";
else if (type == PVFS2_VFS_OP_SETATTR)
return "OP_SETATTR";
else if (type == PVFS2_VFS_OP_SYMLINK)
return "OP_SYMLINK";
else if (type == PVFS2_VFS_OP_RENAME)
return "OP_RENAME";
else if (type == PVFS2_VFS_OP_STATFS)
return "OP_STATFS";
else if (type == PVFS2_VFS_OP_TRUNCATE)
return "OP_TRUNCATE";
else if (type == PVFS2_VFS_OP_MMAP_RA_FLUSH)
return "OP_MMAP_RA_FLUSH";
else if (type == PVFS2_VFS_OP_FS_MOUNT)
return "OP_FS_MOUNT";
else if (type == PVFS2_VFS_OP_FS_UMOUNT)
return "OP_FS_UMOUNT";
else if (type == PVFS2_VFS_OP_GETXATTR)
return "OP_GETXATTR";
else if (type == PVFS2_VFS_OP_SETXATTR)
return "OP_SETXATTR";
else if (type == PVFS2_VFS_OP_LISTXATTR)
return "OP_LISTXATTR";
else if (type == PVFS2_VFS_OP_REMOVEXATTR)
return "OP_REMOVEXATTR";
else if (type == PVFS2_VFS_OP_PARAM)
return "OP_PARAM";
else if (type == PVFS2_VFS_OP_PERF_COUNT)
return "OP_PERF_COUNT";
else if (type == PVFS2_VFS_OP_CANCEL)
return "OP_CANCEL";
else if (type == PVFS2_VFS_OP_FSYNC)
return "OP_FSYNC";
else if (type == PVFS2_VFS_OP_FSKEY)
return "OP_FSKEY";
else if (type == PVFS2_VFS_OP_FILE_IOX)
return "OP_FILE_IOX";
}
return "OP_UNKNOWN?";
}
static struct pvfs2_kernel_op_s *op_alloc_common(__s32 op_linger, __s32 type)
{
struct pvfs2_kernel_op_s *new_op = NULL;
new_op = kmem_cache_alloc(op_cache, PVFS2_CACHE_ALLOC_FLAGS);
if (new_op) {
memset(new_op, 0, sizeof(struct pvfs2_kernel_op_s));
INIT_LIST_HEAD(&new_op->list);
spin_lock_init(&new_op->lock);
init_waitqueue_head(&new_op->waitq);
init_waitqueue_head(&new_op->io_completion_waitq);
atomic_set(&new_op->aio_ref_count, 0);
pvfs2_op_initialize(new_op);
/* initialize the op specific tag and upcall credentials */
spin_lock(&next_tag_value_lock);
new_op->tag = next_tag_value++;
if (next_tag_value == 0)
next_tag_value = 100;
spin_unlock(&next_tag_value_lock);
new_op->upcall.type = type;
new_op->attempts = 0;
gossip_debug(GOSSIP_CACHE_DEBUG,
"Alloced OP (%p: %llu %s)\n",
new_op,
llu(new_op->tag),
get_opname_string(new_op));
new_op->upcall.uid = from_kuid(current_user_ns(),
current_fsuid());
new_op->upcall.gid = from_kgid(current_user_ns(),
current_fsgid());
new_op->op_linger = new_op->op_linger_tmp = op_linger;
} else {
gossip_err("op_alloc: kmem_cache_alloc failed!\n");
}
return new_op;
}
struct pvfs2_kernel_op_s *op_alloc(__s32 type)
{
return op_alloc_common(1, type);
}
struct pvfs2_kernel_op_s *op_alloc_trailer(__s32 type)
{
return op_alloc_common(2, type);
}
void op_release(struct pvfs2_kernel_op_s *pvfs2_op)
{
if (pvfs2_op) {
gossip_debug(GOSSIP_CACHE_DEBUG,
"Releasing OP (%p: %llu)\n",
pvfs2_op,
llu(pvfs2_op->tag));
pvfs2_op_initialize(pvfs2_op);
kmem_cache_free(op_cache, pvfs2_op);
} else {
gossip_err("NULL pointer in op_release\n");
}
}
int dev_req_cache_initialize(void)
{
dev_req_cache = kmem_cache_create("pvfs2_devreqcache",
MAX_ALIGNED_DEV_REQ_DOWNSIZE,
0,
PVFS2_CACHE_CREATE_FLAGS,
NULL);
if (!dev_req_cache) {
gossip_err("Cannot create pvfs2_dev_req_cache\n");
return -ENOMEM;
}
return 0;
}
int dev_req_cache_finalize(void)
{
kmem_cache_destroy(dev_req_cache);
return 0;
}
void *dev_req_alloc(void)
{
void *buffer;
buffer = kmem_cache_alloc(dev_req_cache, PVFS2_CACHE_ALLOC_FLAGS);
if (buffer == NULL)
gossip_err("Failed to allocate from dev_req_cache\n");
else
memset(buffer, 0, sizeof(MAX_ALIGNED_DEV_REQ_DOWNSIZE));
return buffer;
}
void dev_req_release(void *buffer)
{
if (buffer)
kmem_cache_free(dev_req_cache, buffer);
else
gossip_err("NULL pointer passed to dev_req_release\n");
}
int kiocb_cache_initialize(void)
{
pvfs2_kiocb_cache = kmem_cache_create("pvfs2_kiocbcache",
sizeof(struct pvfs2_kiocb_s),
0,
PVFS2_CACHE_CREATE_FLAGS,
NULL);
if (!pvfs2_kiocb_cache) {
gossip_err("Cannot create pvfs2_kiocb_cache!\n");
return -ENOMEM;
}
return 0;
}
int kiocb_cache_finalize(void)
{
kmem_cache_destroy(pvfs2_kiocb_cache);
return 0;
}
struct pvfs2_kiocb_s *kiocb_alloc(void)
{
struct pvfs2_kiocb_s *x = NULL;
x = kmem_cache_alloc(pvfs2_kiocb_cache, PVFS2_CACHE_ALLOC_FLAGS);
if (x == NULL)
gossip_err("kiocb_alloc: kmem_cache_alloc failed!\n");
else
memset(x, 0, sizeof(struct pvfs2_kiocb_s));
return x;
}
void kiocb_release(struct pvfs2_kiocb_s *x)
{
if (x)
kmem_cache_free(pvfs2_kiocb_cache, x);
else
gossip_err("kiocb_release: kmem_cache_free NULL pointer!\n");
}

458
fs/orangefs/pvfs2-debugfs.c Normal file
Просмотреть файл

@ -0,0 +1,458 @@
/*
* What: /sys/kernel/debug/orangefs/debug-help
* Date: June 2015
* Contact: Mike Marshall <hubcap@omnibond.com>
* Description:
* List of client and kernel debug keywords.
*
*
* What: /sys/kernel/debug/orangefs/client-debug
* Date: June 2015
* Contact: Mike Marshall <hubcap@omnibond.com>
* Description:
* Debug setting for "the client", the userspace
* helper for the kernel module.
*
*
* What: /sys/kernel/debug/orangefs/kernel-debug
* Date: June 2015
* Contact: Mike Marshall <hubcap@omnibond.com>
* Description:
* Debug setting for the orangefs kernel module.
*
* Any of the keywords, or comma-separated lists
* of keywords, from debug-help can be catted to
* client-debug or kernel-debug.
*
* "none", "all" and "verbose" are special keywords
* for client-debug. Setting client-debug to "all"
* is kind of like trying to drink water from a
* fire hose, "verbose" triggers most of the same
* output except for the constant flow of output
* from the main wait loop.
*
* "none" and "all" are similar settings for kernel-debug
* no need for a "verbose".
*/
#include <linux/debugfs.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include "pvfs2-debugfs.h"
#include "protocol.h"
#include "pvfs2-kernel.h"
static int orangefs_debug_disabled = 1;
static int orangefs_debug_help_open(struct inode *, struct file *);
const struct file_operations debug_help_fops = {
.open = orangefs_debug_help_open,
.read = seq_read,
.release = seq_release,
.llseek = seq_lseek,
};
static void *help_start(struct seq_file *, loff_t *);
static void *help_next(struct seq_file *, void *, loff_t *);
static void help_stop(struct seq_file *, void *);
static int help_show(struct seq_file *, void *);
static const struct seq_operations help_debug_ops = {
.start = help_start,
.next = help_next,
.stop = help_stop,
.show = help_show,
};
/*
* Used to protect data in ORANGEFS_KMOD_DEBUG_FILE and
* ORANGEFS_KMOD_DEBUG_FILE.
*/
DEFINE_MUTEX(orangefs_debug_lock);
int orangefs_debug_open(struct inode *, struct file *);
static ssize_t orangefs_debug_read(struct file *,
char __user *,
size_t,
loff_t *);
static ssize_t orangefs_debug_write(struct file *,
const char __user *,
size_t,
loff_t *);
static const struct file_operations kernel_debug_fops = {
.open = orangefs_debug_open,
.read = orangefs_debug_read,
.write = orangefs_debug_write,
.llseek = generic_file_llseek,
};
/*
* initialize kmod debug operations, create orangefs debugfs dir and
* ORANGEFS_KMOD_DEBUG_HELP_FILE.
*/
int pvfs2_debugfs_init(void)
{
int rc = -ENOMEM;
debug_dir = debugfs_create_dir("orangefs", NULL);
if (!debug_dir)
goto out;
help_file_dentry = debugfs_create_file(ORANGEFS_KMOD_DEBUG_HELP_FILE,
0444,
debug_dir,
debug_help_string,
&debug_help_fops);
if (!help_file_dentry)
goto out;
orangefs_debug_disabled = 0;
rc = 0;
out:
if (rc)
pvfs2_debugfs_cleanup();
return rc;
}
void pvfs2_debugfs_cleanup(void)
{
debugfs_remove_recursive(debug_dir);
}
/* open ORANGEFS_KMOD_DEBUG_HELP_FILE */
static int orangefs_debug_help_open(struct inode *inode, struct file *file)
{
int rc = -ENODEV;
int ret;
gossip_debug(GOSSIP_DEBUGFS_DEBUG,
"orangefs_debug_help_open: start\n");
if (orangefs_debug_disabled)
goto out;
ret = seq_open(file, &help_debug_ops);
if (ret)
goto out;
((struct seq_file *)(file->private_data))->private = inode->i_private;
rc = 0;
out:
gossip_debug(GOSSIP_DEBUGFS_DEBUG,
"orangefs_debug_help_open: rc:%d:\n",
rc);
return rc;
}
/*
* I think start always gets called again after stop. Start
* needs to return NULL when it is done. The whole "payload"
* in this case is a single (long) string, so by the second
* time we get to start (pos = 1), we're done.
*/
static void *help_start(struct seq_file *m, loff_t *pos)
{
void *payload = NULL;
gossip_debug(GOSSIP_DEBUGFS_DEBUG, "help_start: start\n");
if (*pos == 0)
payload = m->private;
return payload;
}
static void *help_next(struct seq_file *m, void *v, loff_t *pos)
{
gossip_debug(GOSSIP_DEBUGFS_DEBUG, "help_next: start\n");
return NULL;
}
static void help_stop(struct seq_file *m, void *p)
{
gossip_debug(GOSSIP_DEBUGFS_DEBUG, "help_stop: start\n");
}
static int help_show(struct seq_file *m, void *v)
{
gossip_debug(GOSSIP_DEBUGFS_DEBUG, "help_show: start\n");
seq_puts(m, v);
return 0;
}
/*
* initialize the kernel-debug file.
*/
int pvfs2_kernel_debug_init(void)
{
int rc = -ENOMEM;
struct dentry *ret;
char *k_buffer = NULL;
gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: start\n", __func__);
k_buffer = kzalloc(PVFS2_MAX_DEBUG_STRING_LEN, GFP_KERNEL);
if (!k_buffer)
goto out;
if (strlen(kernel_debug_string) + 1 < PVFS2_MAX_DEBUG_STRING_LEN) {
strcpy(k_buffer, kernel_debug_string);
strcat(k_buffer, "\n");
} else {
strcpy(k_buffer, "none\n");
pr_info("%s: overflow 1!\n", __func__);
}
ret = debugfs_create_file(ORANGEFS_KMOD_DEBUG_FILE,
0444,
debug_dir,
k_buffer,
&kernel_debug_fops);
if (!ret) {
pr_info("%s: failed to create %s.\n",
__func__,
ORANGEFS_KMOD_DEBUG_FILE);
goto out;
}
rc = 0;
out:
if (rc)
pvfs2_debugfs_cleanup();
gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: rc:%d:\n", __func__, rc);
return rc;
}
/*
* initialize the client-debug file.
*/
int pvfs2_client_debug_init(void)
{
int rc = -ENOMEM;
char *c_buffer = NULL;
gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: start\n", __func__);
c_buffer = kzalloc(PVFS2_MAX_DEBUG_STRING_LEN, GFP_KERNEL);
if (!c_buffer)
goto out;
if (strlen(client_debug_string) + 1 < PVFS2_MAX_DEBUG_STRING_LEN) {
strcpy(c_buffer, client_debug_string);
strcat(c_buffer, "\n");
} else {
strcpy(c_buffer, "none\n");
pr_info("%s: overflow! 2\n", __func__);
}
client_debug_dentry = debugfs_create_file(ORANGEFS_CLIENT_DEBUG_FILE,
0444,
debug_dir,
c_buffer,
&kernel_debug_fops);
if (!client_debug_dentry) {
pr_info("%s: failed to create %s.\n",
__func__,
ORANGEFS_CLIENT_DEBUG_FILE);
goto out;
}
rc = 0;
out:
if (rc)
pvfs2_debugfs_cleanup();
gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: rc:%d:\n", __func__, rc);
return rc;
}
/* open ORANGEFS_KMOD_DEBUG_FILE or ORANGEFS_CLIENT_DEBUG_FILE.*/
int orangefs_debug_open(struct inode *inode, struct file *file)
{
int rc = -ENODEV;
gossip_debug(GOSSIP_DEBUGFS_DEBUG,
"%s: orangefs_debug_disabled: %d\n",
__func__,
orangefs_debug_disabled);
if (orangefs_debug_disabled)
goto out;
rc = 0;
mutex_lock(&orangefs_debug_lock);
file->private_data = inode->i_private;
mutex_unlock(&orangefs_debug_lock);
out:
gossip_debug(GOSSIP_DEBUGFS_DEBUG,
"orangefs_debug_open: rc: %d\n",
rc);
return rc;
}
static ssize_t orangefs_debug_read(struct file *file,
char __user *ubuf,
size_t count,
loff_t *ppos)
{
char *buf;
int sprintf_ret;
ssize_t read_ret = -ENOMEM;
gossip_debug(GOSSIP_DEBUGFS_DEBUG, "orangefs_debug_read: start\n");
buf = kmalloc(PVFS2_MAX_DEBUG_STRING_LEN, GFP_KERNEL);
if (!buf)
goto out;
mutex_lock(&orangefs_debug_lock);
sprintf_ret = sprintf(buf, "%s", (char *)file->private_data);
mutex_unlock(&orangefs_debug_lock);
read_ret = simple_read_from_buffer(ubuf, count, ppos, buf, sprintf_ret);
kfree(buf);
out:
gossip_debug(GOSSIP_DEBUGFS_DEBUG,
"orangefs_debug_read: ret: %zu\n",
read_ret);
return read_ret;
}
static ssize_t orangefs_debug_write(struct file *file,
const char __user *ubuf,
size_t count,
loff_t *ppos)
{
char *buf;
int rc = -EFAULT;
size_t silly = 0;
char *debug_string;
struct pvfs2_kernel_op_s *new_op = NULL;
struct client_debug_mask c_mask = { NULL, 0, 0 };
gossip_debug(GOSSIP_DEBUGFS_DEBUG,
"orangefs_debug_write: %s\n",
file->f_path.dentry->d_name.name);
/*
* Thwart users who try to jamb a ridiculous number
* of bytes into the debug file...
*/
if (count > PVFS2_MAX_DEBUG_STRING_LEN + 1) {
silly = count;
count = PVFS2_MAX_DEBUG_STRING_LEN + 1;
}
buf = kmalloc(PVFS2_MAX_DEBUG_STRING_LEN, GFP_KERNEL);
if (!buf)
goto out;
memset(buf, 0, PVFS2_MAX_DEBUG_STRING_LEN);
if (copy_from_user(buf, ubuf, count - 1)) {
gossip_debug(GOSSIP_DEBUGFS_DEBUG,
"%s: copy_from_user failed!\n",
__func__);
goto out;
}
/*
* Map the keyword string from userspace into a valid debug mask.
* The mapping process involves mapping the human-inputted string
* into a valid mask, and then rebuilding the string from the
* verified valid mask.
*
* A service operation is required to set a new client-side
* debug mask.
*/
if (!strcmp(file->f_path.dentry->d_name.name,
ORANGEFS_KMOD_DEBUG_FILE)) {
debug_string_to_mask(buf, &gossip_debug_mask, 0);
debug_mask_to_string(&gossip_debug_mask, 0);
debug_string = kernel_debug_string;
gossip_debug(GOSSIP_DEBUGFS_DEBUG,
"New kernel debug string is %s\n",
kernel_debug_string);
} else {
/* Can't reset client debug mask if client is not running. */
if (is_daemon_in_service()) {
pr_info("%s: Client not running :%d:\n",
__func__,
is_daemon_in_service());
goto out;
}
debug_string_to_mask(buf, &c_mask, 1);
debug_mask_to_string(&c_mask, 1);
debug_string = client_debug_string;
new_op = op_alloc(PVFS2_VFS_OP_PARAM);
if (!new_op) {
pr_info("%s: op_alloc failed!\n", __func__);
goto out;
}
new_op->upcall.req.param.op =
PVFS2_PARAM_REQUEST_OP_TWO_MASK_VALUES;
new_op->upcall.req.param.type = PVFS2_PARAM_REQUEST_SET;
memset(new_op->upcall.req.param.s_value,
0,
PVFS2_MAX_DEBUG_STRING_LEN);
sprintf(new_op->upcall.req.param.s_value,
"%llx %llx\n",
c_mask.mask1,
c_mask.mask2);
/* service_operation returns 0 on success... */
rc = service_operation(new_op,
"pvfs2_param",
PVFS2_OP_INTERRUPTIBLE);
if (rc)
gossip_debug(GOSSIP_DEBUGFS_DEBUG,
"%s: service_operation failed! rc:%d:\n",
__func__,
rc);
op_release(new_op);
}
mutex_lock(&orangefs_debug_lock);
memset(file->f_inode->i_private, 0, PVFS2_MAX_DEBUG_STRING_LEN);
sprintf((char *)file->f_inode->i_private, "%s\n", debug_string);
mutex_unlock(&orangefs_debug_lock);
*ppos += count;
if (silly)
rc = silly;
else
rc = count;
out:
gossip_debug(GOSSIP_DEBUGFS_DEBUG,
"orangefs_debug_write: rc: %d\n",
rc);
kfree(buf);
return rc;
}

316
fs/orangefs/pvfs2-mod.c Normal file
Просмотреть файл

@ -0,0 +1,316 @@
/*
* (C) 2001 Clemson University and The University of Chicago
*
* Changes by Acxiom Corporation to add proc file handler for pvfs2 client
* parameters, Copyright Acxiom Corporation, 2005.
*
* See COPYING in top-level directory.
*/
#include "protocol.h"
#include "pvfs2-kernel.h"
#include "pvfs2-debugfs.h"
#include "pvfs2-sysfs.h"
/* PVFS2_VERSION is a ./configure define */
#ifndef PVFS2_VERSION
#define PVFS2_VERSION "Unknown"
#endif
/*
* global variables declared here
*/
/* array of client debug keyword/mask values */
struct client_debug_mask *cdm_array;
int cdm_element_count;
char kernel_debug_string[PVFS2_MAX_DEBUG_STRING_LEN] = "none";
char client_debug_string[PVFS2_MAX_DEBUG_STRING_LEN];
char client_debug_array_string[PVFS2_MAX_DEBUG_STRING_LEN];
char *debug_help_string;
int help_string_initialized;
struct dentry *help_file_dentry;
struct dentry *client_debug_dentry;
struct dentry *debug_dir;
int client_verbose_index;
int client_all_index;
struct pvfs2_stats g_pvfs2_stats;
/* the size of the hash tables for ops in progress */
int hash_table_size = 509;
static ulong module_parm_debug_mask;
__u64 gossip_debug_mask;
struct client_debug_mask client_debug_mask = { NULL, 0, 0 };
unsigned int kernel_mask_set_mod_init; /* implicitly false */
int op_timeout_secs = PVFS2_DEFAULT_OP_TIMEOUT_SECS;
int slot_timeout_secs = PVFS2_DEFAULT_SLOT_TIMEOUT_SECS;
__u32 DEBUG_LINE = 50;
MODULE_LICENSE("GPL");
MODULE_AUTHOR("PVFS2 Development Team");
MODULE_DESCRIPTION("The Linux Kernel VFS interface to PVFS2");
MODULE_PARM_DESC(module_parm_debug_mask, "debugging level (see pvfs2-debug.h for values)");
MODULE_PARM_DESC(op_timeout_secs, "Operation timeout in seconds");
MODULE_PARM_DESC(slot_timeout_secs, "Slot timeout in seconds");
MODULE_PARM_DESC(hash_table_size,
"size of hash table for operations in progress");
static struct file_system_type pvfs2_fs_type = {
.name = "pvfs2",
.mount = pvfs2_mount,
.kill_sb = pvfs2_kill_sb,
.owner = THIS_MODULE,
};
module_param(hash_table_size, int, 0);
module_param(module_parm_debug_mask, ulong, 0755);
module_param(op_timeout_secs, int, 0);
module_param(slot_timeout_secs, int, 0);
/* synchronizes the request device file */
struct mutex devreq_mutex;
/*
blocks non-priority requests from being queued for servicing. this
could be used for protecting the request list data structure, but
for now it's only being used to stall the op addition to the request
list
*/
struct mutex request_mutex;
/* hash table for storing operations waiting for matching downcall */
struct list_head *htable_ops_in_progress;
DEFINE_SPINLOCK(htable_ops_in_progress_lock);
/* list for queueing upcall operations */
LIST_HEAD(pvfs2_request_list);
/* used to protect the above pvfs2_request_list */
DEFINE_SPINLOCK(pvfs2_request_list_lock);
/* used for incoming request notification */
DECLARE_WAIT_QUEUE_HEAD(pvfs2_request_list_waitq);
static int __init pvfs2_init(void)
{
int ret = -1;
__u32 i = 0;
/* convert input debug mask to a 64-bit unsigned integer */
gossip_debug_mask = (unsigned long long) module_parm_debug_mask;
/*
* set the kernel's gossip debug string; invalid mask values will
* be ignored.
*/
debug_mask_to_string(&gossip_debug_mask, 0);
/* remove any invalid values from the mask */
debug_string_to_mask(kernel_debug_string, &gossip_debug_mask, 0);
/*
* if the mask has a non-zero value, then indicate that the mask
* was set when the kernel module was loaded. The pvfs2 dev ioctl
* command will look at this boolean to determine if the kernel's
* debug mask should be overwritten when the client-core is started.
*/
if (gossip_debug_mask != 0)
kernel_mask_set_mod_init = true;
/* print information message to the system log */
pr_info("pvfs2: pvfs2_init called with debug mask: :%s: :%llx:\n",
kernel_debug_string,
(unsigned long long)gossip_debug_mask);
ret = bdi_init(&pvfs2_backing_dev_info);
if (ret)
return ret;
if (op_timeout_secs < 0)
op_timeout_secs = 0;
if (slot_timeout_secs < 0)
slot_timeout_secs = 0;
/* initialize global book keeping data structures */
ret = op_cache_initialize();
if (ret < 0)
goto err;
ret = dev_req_cache_initialize();
if (ret < 0)
goto cleanup_op;
ret = pvfs2_inode_cache_initialize();
if (ret < 0)
goto cleanup_req;
ret = kiocb_cache_initialize();
if (ret < 0)
goto cleanup_inode;
/* Initialize the pvfsdev subsystem. */
ret = pvfs2_dev_init();
if (ret < 0) {
gossip_err("pvfs2: could not initialize device subsystem %d!\n",
ret);
goto cleanup_kiocb;
}
mutex_init(&devreq_mutex);
mutex_init(&request_mutex);
htable_ops_in_progress =
kcalloc(hash_table_size, sizeof(struct list_head), GFP_KERNEL);
if (!htable_ops_in_progress) {
gossip_err("Failed to initialize op hashtable");
ret = -ENOMEM;
goto cleanup_device;
}
/* initialize a doubly linked at each hash table index */
for (i = 0; i < hash_table_size; i++)
INIT_LIST_HEAD(&htable_ops_in_progress[i]);
ret = fsid_key_table_initialize();
if (ret < 0)
goto cleanup_progress_table;
/*
* Build the contents of /sys/kernel/debug/orangefs/debug-help
* from the keywords in the kernel keyword/mask array.
*
* The keywords in the client keyword/mask array are
* unknown at boot time.
*
* orangefs_prepare_debugfs_help_string will be used again
* later to rebuild the debug-help file after the client starts
* and passes along the needed info. The argument signifies
* which time orangefs_prepare_debugfs_help_string is being
* called.
*
*/
ret = orangefs_prepare_debugfs_help_string(1);
if (ret)
goto out;
pvfs2_debugfs_init();
pvfs2_kernel_debug_init();
orangefs_sysfs_init();
ret = register_filesystem(&pvfs2_fs_type);
if (ret == 0) {
pr_info("pvfs2: module version %s loaded\n", PVFS2_VERSION);
return 0;
}
pvfs2_debugfs_cleanup();
orangefs_sysfs_exit();
fsid_key_table_finalize();
cleanup_progress_table:
kfree(htable_ops_in_progress);
cleanup_device:
pvfs2_dev_cleanup();
cleanup_kiocb:
kiocb_cache_finalize();
cleanup_inode:
pvfs2_inode_cache_finalize();
cleanup_req:
dev_req_cache_finalize();
cleanup_op:
op_cache_finalize();
err:
bdi_destroy(&pvfs2_backing_dev_info);
out:
return ret;
}
static void __exit pvfs2_exit(void)
{
int i = 0;
struct pvfs2_kernel_op_s *cur_op = NULL;
gossip_debug(GOSSIP_INIT_DEBUG, "pvfs2: pvfs2_exit called\n");
unregister_filesystem(&pvfs2_fs_type);
pvfs2_debugfs_cleanup();
orangefs_sysfs_exit();
fsid_key_table_finalize();
pvfs2_dev_cleanup();
/* clear out all pending upcall op requests */
spin_lock(&pvfs2_request_list_lock);
while (!list_empty(&pvfs2_request_list)) {
cur_op = list_entry(pvfs2_request_list.next,
struct pvfs2_kernel_op_s,
list);
list_del(&cur_op->list);
gossip_debug(GOSSIP_INIT_DEBUG,
"Freeing unhandled upcall request type %d\n",
cur_op->upcall.type);
op_release(cur_op);
}
spin_unlock(&pvfs2_request_list_lock);
for (i = 0; i < hash_table_size; i++)
while (!list_empty(&htable_ops_in_progress[i])) {
cur_op = list_entry(htable_ops_in_progress[i].next,
struct pvfs2_kernel_op_s,
list);
op_release(cur_op);
}
kiocb_cache_finalize();
pvfs2_inode_cache_finalize();
dev_req_cache_finalize();
op_cache_finalize();
kfree(htable_ops_in_progress);
bdi_destroy(&pvfs2_backing_dev_info);
pr_info("pvfs2: module version %s unloaded\n", PVFS2_VERSION);
}
/*
* What we do in this function is to walk the list of operations
* that are in progress in the hash table and mark them as purged as well.
*/
void purge_inprogress_ops(void)
{
int i;
for (i = 0; i < hash_table_size; i++) {
struct pvfs2_kernel_op_s *op;
struct pvfs2_kernel_op_s *next;
list_for_each_entry_safe(op,
next,
&htable_ops_in_progress[i],
list) {
spin_lock(&op->lock);
gossip_debug(GOSSIP_INIT_DEBUG,
"pvfs2-client-core: purging in-progress op tag "
"%llu %s\n",
llu(op->tag),
get_opname_string(op));
set_op_state_purged(op);
spin_unlock(&op->lock);
wake_up_interruptible(&op->waitq);
}
}
}
module_init(pvfs2_init);
module_exit(pvfs2_exit);