2005-04-17 02:20:36 +04:00
|
|
|
/*
|
|
|
|
* fs/libfs.c
|
|
|
|
* Library for filesystems writers.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/pagemap.h>
|
|
|
|
#include <linux/mount.h>
|
|
|
|
#include <linux/vfs.h>
|
2006-03-23 14:00:36 +03:00
|
|
|
#include <linux/mutex.h>
|
2007-10-22 03:42:05 +04:00
|
|
|
#include <linux/exportfs.h>
|
2006-03-23 14:00:36 +03:00
|
|
|
|
2005-04-17 02:20:36 +04:00
|
|
|
#include <asm/uaccess.h>
|
|
|
|
|
|
|
|
int simple_getattr(struct vfsmount *mnt, struct dentry *dentry,
|
|
|
|
struct kstat *stat)
|
|
|
|
{
|
|
|
|
struct inode *inode = dentry->d_inode;
|
|
|
|
generic_fillattr(inode, stat);
|
|
|
|
stat->blocks = inode->i_mapping->nrpages << (PAGE_CACHE_SHIFT - 9);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2006-06-23 13:02:58 +04:00
|
|
|
int simple_statfs(struct dentry *dentry, struct kstatfs *buf)
|
2005-04-17 02:20:36 +04:00
|
|
|
{
|
2006-06-23 13:02:58 +04:00
|
|
|
buf->f_type = dentry->d_sb->s_magic;
|
2005-04-17 02:20:36 +04:00
|
|
|
buf->f_bsize = PAGE_CACHE_SIZE;
|
|
|
|
buf->f_namelen = NAME_MAX;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Retaining negative dentries for an in-memory filesystem just wastes
|
|
|
|
* memory and lookup time: arrange for them to be deleted immediately.
|
|
|
|
*/
|
|
|
|
static int simple_delete_dentry(struct dentry *dentry)
|
|
|
|
{
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Lookup the data. This is trivial - if the dentry didn't already
|
|
|
|
* exist, we know it is negative. Set d_op to delete negative dentries.
|
|
|
|
*/
|
|
|
|
struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
|
|
|
|
{
|
2009-02-20 09:02:22 +03:00
|
|
|
static const struct dentry_operations simple_dentry_operations = {
|
2005-04-17 02:20:36 +04:00
|
|
|
.d_delete = simple_delete_dentry,
|
|
|
|
};
|
|
|
|
|
|
|
|
if (dentry->d_name.len > NAME_MAX)
|
|
|
|
return ERR_PTR(-ENAMETOOLONG);
|
|
|
|
dentry->d_op = &simple_dentry_operations;
|
|
|
|
d_add(dentry, NULL);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
int simple_sync_file(struct file * file, struct dentry *dentry, int datasync)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int dcache_dir_open(struct inode *inode, struct file *file)
|
|
|
|
{
|
|
|
|
static struct qstr cursor_name = {.len = 1, .name = "."};
|
|
|
|
|
2006-12-08 13:36:35 +03:00
|
|
|
file->private_data = d_alloc(file->f_path.dentry, &cursor_name);
|
2005-04-17 02:20:36 +04:00
|
|
|
|
|
|
|
return file->private_data ? 0 : -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
int dcache_dir_close(struct inode *inode, struct file *file)
|
|
|
|
{
|
|
|
|
dput(file->private_data);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
|
|
|
|
{
|
2006-12-08 13:36:35 +03:00
|
|
|
mutex_lock(&file->f_path.dentry->d_inode->i_mutex);
|
2005-04-17 02:20:36 +04:00
|
|
|
switch (origin) {
|
|
|
|
case 1:
|
|
|
|
offset += file->f_pos;
|
|
|
|
case 0:
|
|
|
|
if (offset >= 0)
|
|
|
|
break;
|
|
|
|
default:
|
2006-12-08 13:36:35 +03:00
|
|
|
mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
|
2005-04-17 02:20:36 +04:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
if (offset != file->f_pos) {
|
|
|
|
file->f_pos = offset;
|
|
|
|
if (file->f_pos >= 2) {
|
|
|
|
struct list_head *p;
|
|
|
|
struct dentry *cursor = file->private_data;
|
|
|
|
loff_t n = file->f_pos - 2;
|
|
|
|
|
|
|
|
spin_lock(&dcache_lock);
|
[PATCH] shrink dentry struct
Some long time ago, dentry struct was carefully tuned so that on 32 bits
UP, sizeof(struct dentry) was exactly 128, ie a power of 2, and a multiple
of memory cache lines.
Then RCU was added and dentry struct enlarged by two pointers, with nice
results for SMP, but not so good on UP, because breaking the above tuning
(128 + 8 = 136 bytes)
This patch reverts this unwanted side effect, by using an union (d_u),
where d_rcu and d_child are placed so that these two fields can share their
memory needs.
At the time d_free() is called (and d_rcu is really used), d_child is known
to be empty and not touched by the dentry freeing.
Lockless lookups only access d_name, d_parent, d_lock, d_op, d_flags (so
the previous content of d_child is not needed if said dentry was unhashed
but still accessed by a CPU because of RCU constraints)
As dentry cache easily contains millions of entries, a size reduction is
worth the extra complexity of the ugly C union.
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Cc: Dipankar Sarma <dipankar@in.ibm.com>
Cc: Maneesh Soni <maneesh@in.ibm.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Ian Kent <raven@themaw.net>
Cc: Paul Jackson <pj@sgi.com>
Cc: Al Viro <viro@ftp.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: Neil Brown <neilb@cse.unsw.edu.au>
Cc: James Morris <jmorris@namei.org>
Cc: Stephen Smalley <sds@epoch.ncsc.mil>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-01-08 12:03:32 +03:00
|
|
|
list_del(&cursor->d_u.d_child);
|
2006-12-08 13:36:35 +03:00
|
|
|
p = file->f_path.dentry->d_subdirs.next;
|
|
|
|
while (n && p != &file->f_path.dentry->d_subdirs) {
|
2005-04-17 02:20:36 +04:00
|
|
|
struct dentry *next;
|
[PATCH] shrink dentry struct
Some long time ago, dentry struct was carefully tuned so that on 32 bits
UP, sizeof(struct dentry) was exactly 128, ie a power of 2, and a multiple
of memory cache lines.
Then RCU was added and dentry struct enlarged by two pointers, with nice
results for SMP, but not so good on UP, because breaking the above tuning
(128 + 8 = 136 bytes)
This patch reverts this unwanted side effect, by using an union (d_u),
where d_rcu and d_child are placed so that these two fields can share their
memory needs.
At the time d_free() is called (and d_rcu is really used), d_child is known
to be empty and not touched by the dentry freeing.
Lockless lookups only access d_name, d_parent, d_lock, d_op, d_flags (so
the previous content of d_child is not needed if said dentry was unhashed
but still accessed by a CPU because of RCU constraints)
As dentry cache easily contains millions of entries, a size reduction is
worth the extra complexity of the ugly C union.
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Cc: Dipankar Sarma <dipankar@in.ibm.com>
Cc: Maneesh Soni <maneesh@in.ibm.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Ian Kent <raven@themaw.net>
Cc: Paul Jackson <pj@sgi.com>
Cc: Al Viro <viro@ftp.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: Neil Brown <neilb@cse.unsw.edu.au>
Cc: James Morris <jmorris@namei.org>
Cc: Stephen Smalley <sds@epoch.ncsc.mil>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-01-08 12:03:32 +03:00
|
|
|
next = list_entry(p, struct dentry, d_u.d_child);
|
2005-04-17 02:20:36 +04:00
|
|
|
if (!d_unhashed(next) && next->d_inode)
|
|
|
|
n--;
|
|
|
|
p = p->next;
|
|
|
|
}
|
[PATCH] shrink dentry struct
Some long time ago, dentry struct was carefully tuned so that on 32 bits
UP, sizeof(struct dentry) was exactly 128, ie a power of 2, and a multiple
of memory cache lines.
Then RCU was added and dentry struct enlarged by two pointers, with nice
results for SMP, but not so good on UP, because breaking the above tuning
(128 + 8 = 136 bytes)
This patch reverts this unwanted side effect, by using an union (d_u),
where d_rcu and d_child are placed so that these two fields can share their
memory needs.
At the time d_free() is called (and d_rcu is really used), d_child is known
to be empty and not touched by the dentry freeing.
Lockless lookups only access d_name, d_parent, d_lock, d_op, d_flags (so
the previous content of d_child is not needed if said dentry was unhashed
but still accessed by a CPU because of RCU constraints)
As dentry cache easily contains millions of entries, a size reduction is
worth the extra complexity of the ugly C union.
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Cc: Dipankar Sarma <dipankar@in.ibm.com>
Cc: Maneesh Soni <maneesh@in.ibm.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Ian Kent <raven@themaw.net>
Cc: Paul Jackson <pj@sgi.com>
Cc: Al Viro <viro@ftp.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: Neil Brown <neilb@cse.unsw.edu.au>
Cc: James Morris <jmorris@namei.org>
Cc: Stephen Smalley <sds@epoch.ncsc.mil>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-01-08 12:03:32 +03:00
|
|
|
list_add_tail(&cursor->d_u.d_child, p);
|
2005-04-17 02:20:36 +04:00
|
|
|
spin_unlock(&dcache_lock);
|
|
|
|
}
|
|
|
|
}
|
2006-12-08 13:36:35 +03:00
|
|
|
mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
|
2005-04-17 02:20:36 +04:00
|
|
|
return offset;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Relationship between i_mode and the DT_xxx types */
|
|
|
|
static inline unsigned char dt_type(struct inode *inode)
|
|
|
|
{
|
|
|
|
return (inode->i_mode >> 12) & 15;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Directory is locked and all positive dentries in it are safe, since
|
|
|
|
* for ramfs-type trees they can't go away without unlink() or rmdir(),
|
|
|
|
* both impossible due to the lock on directory.
|
|
|
|
*/
|
|
|
|
|
|
|
|
int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
|
|
|
|
{
|
2006-12-08 13:36:35 +03:00
|
|
|
struct dentry *dentry = filp->f_path.dentry;
|
2005-04-17 02:20:36 +04:00
|
|
|
struct dentry *cursor = filp->private_data;
|
[PATCH] shrink dentry struct
Some long time ago, dentry struct was carefully tuned so that on 32 bits
UP, sizeof(struct dentry) was exactly 128, ie a power of 2, and a multiple
of memory cache lines.
Then RCU was added and dentry struct enlarged by two pointers, with nice
results for SMP, but not so good on UP, because breaking the above tuning
(128 + 8 = 136 bytes)
This patch reverts this unwanted side effect, by using an union (d_u),
where d_rcu and d_child are placed so that these two fields can share their
memory needs.
At the time d_free() is called (and d_rcu is really used), d_child is known
to be empty and not touched by the dentry freeing.
Lockless lookups only access d_name, d_parent, d_lock, d_op, d_flags (so
the previous content of d_child is not needed if said dentry was unhashed
but still accessed by a CPU because of RCU constraints)
As dentry cache easily contains millions of entries, a size reduction is
worth the extra complexity of the ugly C union.
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Cc: Dipankar Sarma <dipankar@in.ibm.com>
Cc: Maneesh Soni <maneesh@in.ibm.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Ian Kent <raven@themaw.net>
Cc: Paul Jackson <pj@sgi.com>
Cc: Al Viro <viro@ftp.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: Neil Brown <neilb@cse.unsw.edu.au>
Cc: James Morris <jmorris@namei.org>
Cc: Stephen Smalley <sds@epoch.ncsc.mil>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-01-08 12:03:32 +03:00
|
|
|
struct list_head *p, *q = &cursor->d_u.d_child;
|
2005-04-17 02:20:36 +04:00
|
|
|
ino_t ino;
|
|
|
|
int i = filp->f_pos;
|
|
|
|
|
|
|
|
switch (i) {
|
|
|
|
case 0:
|
|
|
|
ino = dentry->d_inode->i_ino;
|
|
|
|
if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
|
|
|
|
break;
|
|
|
|
filp->f_pos++;
|
|
|
|
i++;
|
|
|
|
/* fallthrough */
|
|
|
|
case 1:
|
|
|
|
ino = parent_ino(dentry);
|
|
|
|
if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
|
|
|
|
break;
|
|
|
|
filp->f_pos++;
|
|
|
|
i++;
|
|
|
|
/* fallthrough */
|
|
|
|
default:
|
|
|
|
spin_lock(&dcache_lock);
|
2006-06-26 11:24:40 +04:00
|
|
|
if (filp->f_pos == 2)
|
|
|
|
list_move(q, &dentry->d_subdirs);
|
|
|
|
|
2005-04-17 02:20:36 +04:00
|
|
|
for (p=q->next; p != &dentry->d_subdirs; p=p->next) {
|
|
|
|
struct dentry *next;
|
[PATCH] shrink dentry struct
Some long time ago, dentry struct was carefully tuned so that on 32 bits
UP, sizeof(struct dentry) was exactly 128, ie a power of 2, and a multiple
of memory cache lines.
Then RCU was added and dentry struct enlarged by two pointers, with nice
results for SMP, but not so good on UP, because breaking the above tuning
(128 + 8 = 136 bytes)
This patch reverts this unwanted side effect, by using an union (d_u),
where d_rcu and d_child are placed so that these two fields can share their
memory needs.
At the time d_free() is called (and d_rcu is really used), d_child is known
to be empty and not touched by the dentry freeing.
Lockless lookups only access d_name, d_parent, d_lock, d_op, d_flags (so
the previous content of d_child is not needed if said dentry was unhashed
but still accessed by a CPU because of RCU constraints)
As dentry cache easily contains millions of entries, a size reduction is
worth the extra complexity of the ugly C union.
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Cc: Dipankar Sarma <dipankar@in.ibm.com>
Cc: Maneesh Soni <maneesh@in.ibm.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Ian Kent <raven@themaw.net>
Cc: Paul Jackson <pj@sgi.com>
Cc: Al Viro <viro@ftp.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: Neil Brown <neilb@cse.unsw.edu.au>
Cc: James Morris <jmorris@namei.org>
Cc: Stephen Smalley <sds@epoch.ncsc.mil>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-01-08 12:03:32 +03:00
|
|
|
next = list_entry(p, struct dentry, d_u.d_child);
|
2005-04-17 02:20:36 +04:00
|
|
|
if (d_unhashed(next) || !next->d_inode)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
spin_unlock(&dcache_lock);
|
2007-05-09 08:44:57 +04:00
|
|
|
if (filldir(dirent, next->d_name.name,
|
|
|
|
next->d_name.len, filp->f_pos,
|
|
|
|
next->d_inode->i_ino,
|
|
|
|
dt_type(next->d_inode)) < 0)
|
2005-04-17 02:20:36 +04:00
|
|
|
return 0;
|
|
|
|
spin_lock(&dcache_lock);
|
|
|
|
/* next is still alive */
|
2006-06-26 11:24:40 +04:00
|
|
|
list_move(q, p);
|
2005-04-17 02:20:36 +04:00
|
|
|
p = q;
|
|
|
|
filp->f_pos++;
|
|
|
|
}
|
|
|
|
spin_unlock(&dcache_lock);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos)
|
|
|
|
{
|
|
|
|
return -EISDIR;
|
|
|
|
}
|
|
|
|
|
2006-03-28 13:56:42 +04:00
|
|
|
const struct file_operations simple_dir_operations = {
|
2005-04-17 02:20:36 +04:00
|
|
|
.open = dcache_dir_open,
|
|
|
|
.release = dcache_dir_close,
|
|
|
|
.llseek = dcache_dir_lseek,
|
|
|
|
.read = generic_read_dir,
|
|
|
|
.readdir = dcache_readdir,
|
2005-06-26 01:55:41 +04:00
|
|
|
.fsync = simple_sync_file,
|
2005-04-17 02:20:36 +04:00
|
|
|
};
|
|
|
|
|
2007-02-12 11:55:39 +03:00
|
|
|
const struct inode_operations simple_dir_inode_operations = {
|
2005-04-17 02:20:36 +04:00
|
|
|
.lookup = simple_lookup,
|
|
|
|
};
|
|
|
|
|
2007-03-05 11:30:28 +03:00
|
|
|
static const struct super_operations simple_super_operations = {
|
|
|
|
.statfs = simple_statfs,
|
|
|
|
};
|
|
|
|
|
2005-04-17 02:20:36 +04:00
|
|
|
/*
|
|
|
|
* Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that
|
|
|
|
* will never be mountable)
|
|
|
|
*/
|
[PATCH] VFS: Permit filesystem to override root dentry on mount
Extend the get_sb() filesystem operation to take an extra argument that
permits the VFS to pass in the target vfsmount that defines the mountpoint.
The filesystem is then required to manually set the superblock and root dentry
pointers. For most filesystems, this should be done with simple_set_mnt()
which will set the superblock pointer and then set the root dentry to the
superblock's s_root (as per the old default behaviour).
The get_sb() op now returns an integer as there's now no need to return the
superblock pointer.
This patch permits a superblock to be implicitly shared amongst several mount
points, such as can be done with NFS to avoid potential inode aliasing. In
such a case, simple_set_mnt() would not be called, and instead the mnt_root
and mnt_sb would be set directly.
The patch also makes the following changes:
(*) the get_sb_*() convenience functions in the core kernel now take a vfsmount
pointer argument and return an integer, so most filesystems have to change
very little.
(*) If one of the convenience function is not used, then get_sb() should
normally call simple_set_mnt() to instantiate the vfsmount. This will
always return 0, and so can be tail-called from get_sb().
(*) generic_shutdown_super() now calls shrink_dcache_sb() to clean up the
dcache upon superblock destruction rather than shrink_dcache_anon().
This is required because the superblock may now have multiple trees that
aren't actually bound to s_root, but that still need to be cleaned up. The
currently called functions assume that the whole tree is rooted at s_root,
and that anonymous dentries are not the roots of trees which results in
dentries being left unculled.
However, with the way NFS superblock sharing are currently set to be
implemented, these assumptions are violated: the root of the filesystem is
simply a dummy dentry and inode (the real inode for '/' may well be
inaccessible), and all the vfsmounts are rooted on anonymous[*] dentries
with child trees.
[*] Anonymous until discovered from another tree.
(*) The documentation has been adjusted, including the additional bit of
changing ext2_* into foo_* in the documentation.
[akpm@osdl.org: convert ipath_fs, do other stuff]
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Nathan Scott <nathans@sgi.com>
Cc: Roland Dreier <rolandd@cisco.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-06-23 13:02:57 +04:00
|
|
|
int get_sb_pseudo(struct file_system_type *fs_type, char *name,
|
2007-02-12 11:55:41 +03:00
|
|
|
const struct super_operations *ops, unsigned long magic,
|
[PATCH] VFS: Permit filesystem to override root dentry on mount
Extend the get_sb() filesystem operation to take an extra argument that
permits the VFS to pass in the target vfsmount that defines the mountpoint.
The filesystem is then required to manually set the superblock and root dentry
pointers. For most filesystems, this should be done with simple_set_mnt()
which will set the superblock pointer and then set the root dentry to the
superblock's s_root (as per the old default behaviour).
The get_sb() op now returns an integer as there's now no need to return the
superblock pointer.
This patch permits a superblock to be implicitly shared amongst several mount
points, such as can be done with NFS to avoid potential inode aliasing. In
such a case, simple_set_mnt() would not be called, and instead the mnt_root
and mnt_sb would be set directly.
The patch also makes the following changes:
(*) the get_sb_*() convenience functions in the core kernel now take a vfsmount
pointer argument and return an integer, so most filesystems have to change
very little.
(*) If one of the convenience function is not used, then get_sb() should
normally call simple_set_mnt() to instantiate the vfsmount. This will
always return 0, and so can be tail-called from get_sb().
(*) generic_shutdown_super() now calls shrink_dcache_sb() to clean up the
dcache upon superblock destruction rather than shrink_dcache_anon().
This is required because the superblock may now have multiple trees that
aren't actually bound to s_root, but that still need to be cleaned up. The
currently called functions assume that the whole tree is rooted at s_root,
and that anonymous dentries are not the roots of trees which results in
dentries being left unculled.
However, with the way NFS superblock sharing are currently set to be
implemented, these assumptions are violated: the root of the filesystem is
simply a dummy dentry and inode (the real inode for '/' may well be
inaccessible), and all the vfsmounts are rooted on anonymous[*] dentries
with child trees.
[*] Anonymous until discovered from another tree.
(*) The documentation has been adjusted, including the additional bit of
changing ext2_* into foo_* in the documentation.
[akpm@osdl.org: convert ipath_fs, do other stuff]
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Nathan Scott <nathans@sgi.com>
Cc: Roland Dreier <rolandd@cisco.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-06-23 13:02:57 +04:00
|
|
|
struct vfsmount *mnt)
|
2005-04-17 02:20:36 +04:00
|
|
|
{
|
|
|
|
struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
|
|
|
|
struct dentry *dentry;
|
|
|
|
struct inode *root;
|
|
|
|
struct qstr d_name = {.name = name, .len = strlen(name)};
|
|
|
|
|
|
|
|
if (IS_ERR(s))
|
[PATCH] VFS: Permit filesystem to override root dentry on mount
Extend the get_sb() filesystem operation to take an extra argument that
permits the VFS to pass in the target vfsmount that defines the mountpoint.
The filesystem is then required to manually set the superblock and root dentry
pointers. For most filesystems, this should be done with simple_set_mnt()
which will set the superblock pointer and then set the root dentry to the
superblock's s_root (as per the old default behaviour).
The get_sb() op now returns an integer as there's now no need to return the
superblock pointer.
This patch permits a superblock to be implicitly shared amongst several mount
points, such as can be done with NFS to avoid potential inode aliasing. In
such a case, simple_set_mnt() would not be called, and instead the mnt_root
and mnt_sb would be set directly.
The patch also makes the following changes:
(*) the get_sb_*() convenience functions in the core kernel now take a vfsmount
pointer argument and return an integer, so most filesystems have to change
very little.
(*) If one of the convenience function is not used, then get_sb() should
normally call simple_set_mnt() to instantiate the vfsmount. This will
always return 0, and so can be tail-called from get_sb().
(*) generic_shutdown_super() now calls shrink_dcache_sb() to clean up the
dcache upon superblock destruction rather than shrink_dcache_anon().
This is required because the superblock may now have multiple trees that
aren't actually bound to s_root, but that still need to be cleaned up. The
currently called functions assume that the whole tree is rooted at s_root,
and that anonymous dentries are not the roots of trees which results in
dentries being left unculled.
However, with the way NFS superblock sharing are currently set to be
implemented, these assumptions are violated: the root of the filesystem is
simply a dummy dentry and inode (the real inode for '/' may well be
inaccessible), and all the vfsmounts are rooted on anonymous[*] dentries
with child trees.
[*] Anonymous until discovered from another tree.
(*) The documentation has been adjusted, including the additional bit of
changing ext2_* into foo_* in the documentation.
[akpm@osdl.org: convert ipath_fs, do other stuff]
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Nathan Scott <nathans@sgi.com>
Cc: Roland Dreier <rolandd@cisco.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-06-23 13:02:57 +04:00
|
|
|
return PTR_ERR(s);
|
2005-04-17 02:20:36 +04:00
|
|
|
|
|
|
|
s->s_flags = MS_NOUSER;
|
|
|
|
s->s_maxbytes = ~0ULL;
|
2008-07-30 09:33:03 +04:00
|
|
|
s->s_blocksize = PAGE_SIZE;
|
|
|
|
s->s_blocksize_bits = PAGE_SHIFT;
|
2005-04-17 02:20:36 +04:00
|
|
|
s->s_magic = magic;
|
2007-03-05 11:30:28 +03:00
|
|
|
s->s_op = ops ? ops : &simple_super_operations;
|
2005-04-17 02:20:36 +04:00
|
|
|
s->s_time_gran = 1;
|
|
|
|
root = new_inode(s);
|
|
|
|
if (!root)
|
|
|
|
goto Enomem;
|
2007-05-08 11:32:31 +04:00
|
|
|
/*
|
|
|
|
* since this is the first inode, make it number 1. New inodes created
|
|
|
|
* after this must take care not to collide with it (by passing
|
|
|
|
* max_reserved of 1 to iunique).
|
|
|
|
*/
|
|
|
|
root->i_ino = 1;
|
2005-04-17 02:20:36 +04:00
|
|
|
root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
|
|
|
|
root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME;
|
|
|
|
dentry = d_alloc(NULL, &d_name);
|
|
|
|
if (!dentry) {
|
|
|
|
iput(root);
|
|
|
|
goto Enomem;
|
|
|
|
}
|
|
|
|
dentry->d_sb = s;
|
|
|
|
dentry->d_parent = dentry;
|
|
|
|
d_instantiate(dentry, root);
|
|
|
|
s->s_root = dentry;
|
|
|
|
s->s_flags |= MS_ACTIVE;
|
2009-03-04 23:06:34 +03:00
|
|
|
simple_set_mnt(mnt, s);
|
|
|
|
return 0;
|
2005-04-17 02:20:36 +04:00
|
|
|
|
|
|
|
Enomem:
|
|
|
|
up_write(&s->s_umount);
|
|
|
|
deactivate_super(s);
|
[PATCH] VFS: Permit filesystem to override root dentry on mount
Extend the get_sb() filesystem operation to take an extra argument that
permits the VFS to pass in the target vfsmount that defines the mountpoint.
The filesystem is then required to manually set the superblock and root dentry
pointers. For most filesystems, this should be done with simple_set_mnt()
which will set the superblock pointer and then set the root dentry to the
superblock's s_root (as per the old default behaviour).
The get_sb() op now returns an integer as there's now no need to return the
superblock pointer.
This patch permits a superblock to be implicitly shared amongst several mount
points, such as can be done with NFS to avoid potential inode aliasing. In
such a case, simple_set_mnt() would not be called, and instead the mnt_root
and mnt_sb would be set directly.
The patch also makes the following changes:
(*) the get_sb_*() convenience functions in the core kernel now take a vfsmount
pointer argument and return an integer, so most filesystems have to change
very little.
(*) If one of the convenience function is not used, then get_sb() should
normally call simple_set_mnt() to instantiate the vfsmount. This will
always return 0, and so can be tail-called from get_sb().
(*) generic_shutdown_super() now calls shrink_dcache_sb() to clean up the
dcache upon superblock destruction rather than shrink_dcache_anon().
This is required because the superblock may now have multiple trees that
aren't actually bound to s_root, but that still need to be cleaned up. The
currently called functions assume that the whole tree is rooted at s_root,
and that anonymous dentries are not the roots of trees which results in
dentries being left unculled.
However, with the way NFS superblock sharing are currently set to be
implemented, these assumptions are violated: the root of the filesystem is
simply a dummy dentry and inode (the real inode for '/' may well be
inaccessible), and all the vfsmounts are rooted on anonymous[*] dentries
with child trees.
[*] Anonymous until discovered from another tree.
(*) The documentation has been adjusted, including the additional bit of
changing ext2_* into foo_* in the documentation.
[akpm@osdl.org: convert ipath_fs, do other stuff]
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Nathan Scott <nathans@sgi.com>
Cc: Roland Dreier <rolandd@cisco.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-06-23 13:02:57 +04:00
|
|
|
return -ENOMEM;
|
2005-04-17 02:20:36 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
|
|
|
|
{
|
|
|
|
struct inode *inode = old_dentry->d_inode;
|
|
|
|
|
|
|
|
inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
|
2006-10-01 10:29:04 +04:00
|
|
|
inc_nlink(inode);
|
2005-04-17 02:20:36 +04:00
|
|
|
atomic_inc(&inode->i_count);
|
|
|
|
dget(dentry);
|
|
|
|
d_instantiate(dentry, inode);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline int simple_positive(struct dentry *dentry)
|
|
|
|
{
|
|
|
|
return dentry->d_inode && !d_unhashed(dentry);
|
|
|
|
}
|
|
|
|
|
|
|
|
int simple_empty(struct dentry *dentry)
|
|
|
|
{
|
|
|
|
struct dentry *child;
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
spin_lock(&dcache_lock);
|
[PATCH] shrink dentry struct
Some long time ago, dentry struct was carefully tuned so that on 32 bits
UP, sizeof(struct dentry) was exactly 128, ie a power of 2, and a multiple
of memory cache lines.
Then RCU was added and dentry struct enlarged by two pointers, with nice
results for SMP, but not so good on UP, because breaking the above tuning
(128 + 8 = 136 bytes)
This patch reverts this unwanted side effect, by using an union (d_u),
where d_rcu and d_child are placed so that these two fields can share their
memory needs.
At the time d_free() is called (and d_rcu is really used), d_child is known
to be empty and not touched by the dentry freeing.
Lockless lookups only access d_name, d_parent, d_lock, d_op, d_flags (so
the previous content of d_child is not needed if said dentry was unhashed
but still accessed by a CPU because of RCU constraints)
As dentry cache easily contains millions of entries, a size reduction is
worth the extra complexity of the ugly C union.
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Cc: Dipankar Sarma <dipankar@in.ibm.com>
Cc: Maneesh Soni <maneesh@in.ibm.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Ian Kent <raven@themaw.net>
Cc: Paul Jackson <pj@sgi.com>
Cc: Al Viro <viro@ftp.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: Neil Brown <neilb@cse.unsw.edu.au>
Cc: James Morris <jmorris@namei.org>
Cc: Stephen Smalley <sds@epoch.ncsc.mil>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-01-08 12:03:32 +03:00
|
|
|
list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child)
|
2005-04-17 02:20:36 +04:00
|
|
|
if (simple_positive(child))
|
|
|
|
goto out;
|
|
|
|
ret = 1;
|
|
|
|
out:
|
|
|
|
spin_unlock(&dcache_lock);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
int simple_unlink(struct inode *dir, struct dentry *dentry)
|
|
|
|
{
|
|
|
|
struct inode *inode = dentry->d_inode;
|
|
|
|
|
|
|
|
inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
|
2006-10-01 10:29:03 +04:00
|
|
|
drop_nlink(inode);
|
2005-04-17 02:20:36 +04:00
|
|
|
dput(dentry);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int simple_rmdir(struct inode *dir, struct dentry *dentry)
|
|
|
|
{
|
|
|
|
if (!simple_empty(dentry))
|
|
|
|
return -ENOTEMPTY;
|
|
|
|
|
2006-10-01 10:29:03 +04:00
|
|
|
drop_nlink(dentry->d_inode);
|
2005-04-17 02:20:36 +04:00
|
|
|
simple_unlink(dir, dentry);
|
2006-10-01 10:29:03 +04:00
|
|
|
drop_nlink(dir);
|
2005-04-17 02:20:36 +04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int simple_rename(struct inode *old_dir, struct dentry *old_dentry,
|
|
|
|
struct inode *new_dir, struct dentry *new_dentry)
|
|
|
|
{
|
|
|
|
struct inode *inode = old_dentry->d_inode;
|
|
|
|
int they_are_dirs = S_ISDIR(old_dentry->d_inode->i_mode);
|
|
|
|
|
|
|
|
if (!simple_empty(new_dentry))
|
|
|
|
return -ENOTEMPTY;
|
|
|
|
|
|
|
|
if (new_dentry->d_inode) {
|
|
|
|
simple_unlink(new_dir, new_dentry);
|
|
|
|
if (they_are_dirs)
|
2006-10-01 10:29:03 +04:00
|
|
|
drop_nlink(old_dir);
|
2005-04-17 02:20:36 +04:00
|
|
|
} else if (they_are_dirs) {
|
2006-10-01 10:29:03 +04:00
|
|
|
drop_nlink(old_dir);
|
2006-10-01 10:29:04 +04:00
|
|
|
inc_nlink(new_dir);
|
2005-04-17 02:20:36 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
old_dir->i_ctime = old_dir->i_mtime = new_dir->i_ctime =
|
|
|
|
new_dir->i_mtime = inode->i_ctime = CURRENT_TIME;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int simple_readpage(struct file *file, struct page *page)
|
|
|
|
{
|
2006-09-29 12:59:09 +04:00
|
|
|
clear_highpage(page);
|
2005-04-17 02:20:36 +04:00
|
|
|
flush_dcache_page(page);
|
|
|
|
SetPageUptodate(page);
|
|
|
|
unlock_page(page);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int simple_prepare_write(struct file *file, struct page *page,
|
|
|
|
unsigned from, unsigned to)
|
|
|
|
{
|
|
|
|
if (!PageUptodate(page)) {
|
2008-02-05 09:28:29 +03:00
|
|
|
if (to - from != PAGE_CACHE_SIZE)
|
|
|
|
zero_user_segments(page,
|
|
|
|
0, from,
|
|
|
|
to, PAGE_CACHE_SIZE);
|
2005-04-17 02:20:36 +04:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2007-10-16 12:25:01 +04:00
|
|
|
int simple_write_begin(struct file *file, struct address_space *mapping,
|
|
|
|
loff_t pos, unsigned len, unsigned flags,
|
|
|
|
struct page **pagep, void **fsdata)
|
|
|
|
{
|
|
|
|
struct page *page;
|
|
|
|
pgoff_t index;
|
|
|
|
unsigned from;
|
|
|
|
|
|
|
|
index = pos >> PAGE_CACHE_SHIFT;
|
|
|
|
from = pos & (PAGE_CACHE_SIZE - 1);
|
|
|
|
|
fs: symlink write_begin allocation context fix
With the write_begin/write_end aops, page_symlink was broken because it
could no longer pass a GFP_NOFS type mask into the point where the
allocations happened. They are done in write_begin, which would always
assume that the filesystem can be entered from reclaim. This bug could
cause filesystem deadlocks.
The funny thing with having a gfp_t mask there is that it doesn't really
allow the caller to arbitrarily tinker with the context in which it can be
called. It couldn't ever be GFP_ATOMIC, for example, because it needs to
take the page lock. The only thing any callers care about is __GFP_FS
anyway, so turn that into a single flag.
Add a new flag for write_begin, AOP_FLAG_NOFS. Filesystems can now act on
this flag in their write_begin function. Change __grab_cache_page to
accept a nofs argument as well, to honour that flag (while we're there,
change the name to grab_cache_page_write_begin which is more instructive
and does away with random leading underscores).
This is really a more flexible way to go in the end anyway -- if a
filesystem happens to want any extra allocations aside from the pagecache
ones in ints write_begin function, it may now use GFP_KERNEL (rather than
GFP_NOFS) for common case allocations (eg. ocfs2_alloc_write_ctxt, for a
random example).
[kosaki.motohiro@jp.fujitsu.com: fix ubifs]
[kosaki.motohiro@jp.fujitsu.com: fix fuse]
Signed-off-by: Nick Piggin <npiggin@suse.de>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: <stable@kernel.org> [2.6.28.x]
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
[ Cleaned up the calling convention: just pass in the AOP flags
untouched to the grab_cache_page_write_begin() function. That
just simplifies everybody, and may even allow future expansion of the
logic. - Linus ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-01-04 23:00:53 +03:00
|
|
|
page = grab_cache_page_write_begin(mapping, index, flags);
|
2007-10-16 12:25:01 +04:00
|
|
|
if (!page)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
*pagep = page;
|
|
|
|
|
|
|
|
return simple_prepare_write(file, page, from, from+len);
|
|
|
|
}
|
|
|
|
|
2007-10-17 10:27:16 +04:00
|
|
|
static int simple_commit_write(struct file *file, struct page *page,
|
|
|
|
unsigned from, unsigned to)
|
2005-04-17 02:20:36 +04:00
|
|
|
{
|
|
|
|
struct inode *inode = page->mapping->host;
|
|
|
|
loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
|
|
|
|
|
2007-02-21 00:58:08 +03:00
|
|
|
if (!PageUptodate(page))
|
|
|
|
SetPageUptodate(page);
|
2005-04-17 02:20:36 +04:00
|
|
|
/*
|
|
|
|
* No need to use i_size_read() here, the i_size
|
2006-01-10 02:59:24 +03:00
|
|
|
* cannot change under us because we hold the i_mutex.
|
2005-04-17 02:20:36 +04:00
|
|
|
*/
|
|
|
|
if (pos > inode->i_size)
|
|
|
|
i_size_write(inode, pos);
|
|
|
|
set_page_dirty(page);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2007-10-16 12:25:01 +04:00
|
|
|
int simple_write_end(struct file *file, struct address_space *mapping,
|
|
|
|
loff_t pos, unsigned len, unsigned copied,
|
|
|
|
struct page *page, void *fsdata)
|
|
|
|
{
|
|
|
|
unsigned from = pos & (PAGE_CACHE_SIZE - 1);
|
|
|
|
|
|
|
|
/* zero the stale part of the page if we did a short copy */
|
|
|
|
if (copied < len) {
|
|
|
|
void *kaddr = kmap_atomic(page, KM_USER0);
|
|
|
|
memset(kaddr + from + copied, 0, len - copied);
|
|
|
|
flush_dcache_page(page);
|
|
|
|
kunmap_atomic(kaddr, KM_USER0);
|
|
|
|
}
|
|
|
|
|
|
|
|
simple_commit_write(file, page, from, from+copied);
|
|
|
|
|
|
|
|
unlock_page(page);
|
|
|
|
page_cache_release(page);
|
|
|
|
|
|
|
|
return copied;
|
|
|
|
}
|
|
|
|
|
2007-05-08 11:32:31 +04:00
|
|
|
/*
|
|
|
|
* the inodes created here are not hashed. If you use iunique to generate
|
|
|
|
* unique inode values later for this filesystem, then you must take care
|
|
|
|
* to pass it an appropriate max_reserved value to avoid collisions.
|
|
|
|
*/
|
2005-04-17 02:20:36 +04:00
|
|
|
int simple_fill_super(struct super_block *s, int magic, struct tree_descr *files)
|
|
|
|
{
|
|
|
|
struct inode *inode;
|
|
|
|
struct dentry *root;
|
|
|
|
struct dentry *dentry;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
s->s_blocksize = PAGE_CACHE_SIZE;
|
|
|
|
s->s_blocksize_bits = PAGE_CACHE_SHIFT;
|
|
|
|
s->s_magic = magic;
|
2007-03-05 11:30:28 +03:00
|
|
|
s->s_op = &simple_super_operations;
|
2005-04-17 02:20:36 +04:00
|
|
|
s->s_time_gran = 1;
|
|
|
|
|
|
|
|
inode = new_inode(s);
|
|
|
|
if (!inode)
|
|
|
|
return -ENOMEM;
|
2007-05-08 11:32:31 +04:00
|
|
|
/*
|
|
|
|
* because the root inode is 1, the files array must not contain an
|
|
|
|
* entry at index 1
|
|
|
|
*/
|
|
|
|
inode->i_ino = 1;
|
2005-04-17 02:20:36 +04:00
|
|
|
inode->i_mode = S_IFDIR | 0755;
|
|
|
|
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
|
|
|
|
inode->i_op = &simple_dir_inode_operations;
|
|
|
|
inode->i_fop = &simple_dir_operations;
|
2006-02-03 14:04:48 +03:00
|
|
|
inode->i_nlink = 2;
|
2005-04-17 02:20:36 +04:00
|
|
|
root = d_alloc_root(inode);
|
|
|
|
if (!root) {
|
|
|
|
iput(inode);
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
for (i = 0; !files->name || files->name[0]; i++, files++) {
|
|
|
|
if (!files->name)
|
|
|
|
continue;
|
2007-05-08 11:32:31 +04:00
|
|
|
|
|
|
|
/* warn if it tries to conflict with the root inode */
|
|
|
|
if (unlikely(i == 1))
|
|
|
|
printk(KERN_WARNING "%s: %s passed in a files array"
|
|
|
|
"with an index of 1!\n", __func__,
|
|
|
|
s->s_type->name);
|
|
|
|
|
2005-04-17 02:20:36 +04:00
|
|
|
dentry = d_alloc_name(root, files->name);
|
|
|
|
if (!dentry)
|
|
|
|
goto out;
|
|
|
|
inode = new_inode(s);
|
|
|
|
if (!inode)
|
|
|
|
goto out;
|
|
|
|
inode->i_mode = S_IFREG | files->mode;
|
|
|
|
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
|
|
|
|
inode->i_fop = files->ops;
|
|
|
|
inode->i_ino = i;
|
|
|
|
d_add(dentry, inode);
|
|
|
|
}
|
|
|
|
s->s_root = root;
|
|
|
|
return 0;
|
|
|
|
out:
|
|
|
|
d_genocide(root);
|
|
|
|
dput(root);
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
static DEFINE_SPINLOCK(pin_fs_lock);
|
|
|
|
|
2006-06-09 17:34:16 +04:00
|
|
|
int simple_pin_fs(struct file_system_type *type, struct vfsmount **mount, int *count)
|
2005-04-17 02:20:36 +04:00
|
|
|
{
|
|
|
|
struct vfsmount *mnt = NULL;
|
|
|
|
spin_lock(&pin_fs_lock);
|
|
|
|
if (unlikely(!*mount)) {
|
|
|
|
spin_unlock(&pin_fs_lock);
|
2006-06-09 17:34:16 +04:00
|
|
|
mnt = vfs_kern_mount(type, 0, type->name, NULL);
|
2005-04-17 02:20:36 +04:00
|
|
|
if (IS_ERR(mnt))
|
|
|
|
return PTR_ERR(mnt);
|
|
|
|
spin_lock(&pin_fs_lock);
|
|
|
|
if (!*mount)
|
|
|
|
*mount = mnt;
|
|
|
|
}
|
|
|
|
mntget(*mount);
|
|
|
|
++*count;
|
|
|
|
spin_unlock(&pin_fs_lock);
|
|
|
|
mntput(mnt);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void simple_release_fs(struct vfsmount **mount, int *count)
|
|
|
|
{
|
|
|
|
struct vfsmount *mnt;
|
|
|
|
spin_lock(&pin_fs_lock);
|
|
|
|
mnt = *mount;
|
|
|
|
if (!--*count)
|
|
|
|
*mount = NULL;
|
|
|
|
spin_unlock(&pin_fs_lock);
|
|
|
|
mntput(mnt);
|
|
|
|
}
|
|
|
|
|
2008-07-04 20:59:51 +04:00
|
|
|
/**
|
|
|
|
* simple_read_from_buffer - copy data from the buffer to user space
|
|
|
|
* @to: the user space buffer to read to
|
|
|
|
* @count: the maximum number of bytes to read
|
|
|
|
* @ppos: the current position in the buffer
|
|
|
|
* @from: the buffer to read from
|
|
|
|
* @available: the size of the buffer
|
|
|
|
*
|
|
|
|
* The simple_read_from_buffer() function reads up to @count bytes from the
|
|
|
|
* buffer @from at offset @ppos into the user space address starting at @to.
|
|
|
|
*
|
|
|
|
* On success, the number of bytes read is returned and the offset @ppos is
|
|
|
|
* advanced by this number, or negative value is returned on error.
|
|
|
|
**/
|
2005-04-17 02:20:36 +04:00
|
|
|
ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos,
|
|
|
|
const void *from, size_t available)
|
|
|
|
{
|
|
|
|
loff_t pos = *ppos;
|
|
|
|
if (pos < 0)
|
|
|
|
return -EINVAL;
|
|
|
|
if (pos >= available)
|
|
|
|
return 0;
|
|
|
|
if (count > available - pos)
|
|
|
|
count = available - pos;
|
|
|
|
if (copy_to_user(to, from + pos, count))
|
|
|
|
return -EFAULT;
|
|
|
|
*ppos = pos + count;
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
2008-07-04 20:59:51 +04:00
|
|
|
/**
|
|
|
|
* memory_read_from_buffer - copy data from the buffer
|
|
|
|
* @to: the kernel space buffer to read to
|
|
|
|
* @count: the maximum number of bytes to read
|
|
|
|
* @ppos: the current position in the buffer
|
|
|
|
* @from: the buffer to read from
|
|
|
|
* @available: the size of the buffer
|
|
|
|
*
|
|
|
|
* The memory_read_from_buffer() function reads up to @count bytes from the
|
|
|
|
* buffer @from at offset @ppos into the kernel space address starting at @to.
|
|
|
|
*
|
|
|
|
* On success, the number of bytes read is returned and the offset @ppos is
|
|
|
|
* advanced by this number, or negative value is returned on error.
|
|
|
|
**/
|
2008-06-06 09:46:21 +04:00
|
|
|
ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos,
|
|
|
|
const void *from, size_t available)
|
|
|
|
{
|
|
|
|
loff_t pos = *ppos;
|
|
|
|
|
|
|
|
if (pos < 0)
|
|
|
|
return -EINVAL;
|
|
|
|
if (pos >= available)
|
|
|
|
return 0;
|
|
|
|
if (count > available - pos)
|
|
|
|
count = available - pos;
|
|
|
|
memcpy(to, from + pos, count);
|
|
|
|
*ppos = pos + count;
|
|
|
|
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
2005-04-17 02:20:36 +04:00
|
|
|
/*
|
|
|
|
* Transaction based IO.
|
|
|
|
* The file expects a single write which triggers the transaction, and then
|
|
|
|
* possibly a read which collects the result - which is stored in a
|
|
|
|
* file-local buffer.
|
|
|
|
*/
|
2009-03-25 18:48:35 +03:00
|
|
|
|
|
|
|
void simple_transaction_set(struct file *file, size_t n)
|
|
|
|
{
|
|
|
|
struct simple_transaction_argresp *ar = file->private_data;
|
|
|
|
|
|
|
|
BUG_ON(n > SIMPLE_TRANSACTION_LIMIT);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The barrier ensures that ar->size will really remain zero until
|
|
|
|
* ar->data is ready for reading.
|
|
|
|
*/
|
|
|
|
smp_mb();
|
|
|
|
ar->size = n;
|
|
|
|
}
|
|
|
|
|
2005-04-17 02:20:36 +04:00
|
|
|
char *simple_transaction_get(struct file *file, const char __user *buf, size_t size)
|
|
|
|
{
|
|
|
|
struct simple_transaction_argresp *ar;
|
|
|
|
static DEFINE_SPINLOCK(simple_transaction_lock);
|
|
|
|
|
|
|
|
if (size > SIMPLE_TRANSACTION_LIMIT - 1)
|
|
|
|
return ERR_PTR(-EFBIG);
|
|
|
|
|
|
|
|
ar = (struct simple_transaction_argresp *)get_zeroed_page(GFP_KERNEL);
|
|
|
|
if (!ar)
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
|
|
|
|
spin_lock(&simple_transaction_lock);
|
|
|
|
|
|
|
|
/* only one write allowed per open */
|
|
|
|
if (file->private_data) {
|
|
|
|
spin_unlock(&simple_transaction_lock);
|
|
|
|
free_page((unsigned long)ar);
|
|
|
|
return ERR_PTR(-EBUSY);
|
|
|
|
}
|
|
|
|
|
|
|
|
file->private_data = ar;
|
|
|
|
|
|
|
|
spin_unlock(&simple_transaction_lock);
|
|
|
|
|
|
|
|
if (copy_from_user(ar->data, buf, size))
|
|
|
|
return ERR_PTR(-EFAULT);
|
|
|
|
|
|
|
|
return ar->data;
|
|
|
|
}
|
|
|
|
|
|
|
|
ssize_t simple_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos)
|
|
|
|
{
|
|
|
|
struct simple_transaction_argresp *ar = file->private_data;
|
|
|
|
|
|
|
|
if (!ar)
|
|
|
|
return 0;
|
|
|
|
return simple_read_from_buffer(buf, size, pos, ar->data, ar->size);
|
|
|
|
}
|
|
|
|
|
|
|
|
int simple_transaction_release(struct inode *inode, struct file *file)
|
|
|
|
{
|
|
|
|
free_page((unsigned long)file->private_data);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2005-05-18 16:40:59 +04:00
|
|
|
/* Simple attribute files */
|
|
|
|
|
|
|
|
struct simple_attr {
|
2008-02-08 15:20:26 +03:00
|
|
|
int (*get)(void *, u64 *);
|
|
|
|
int (*set)(void *, u64);
|
2005-05-18 16:40:59 +04:00
|
|
|
char get_buf[24]; /* enough to store a u64 and "\n\0" */
|
|
|
|
char set_buf[24];
|
|
|
|
void *data;
|
|
|
|
const char *fmt; /* format for read operation */
|
2006-03-23 14:00:36 +03:00
|
|
|
struct mutex mutex; /* protects access to these buffers */
|
2005-05-18 16:40:59 +04:00
|
|
|
};
|
|
|
|
|
|
|
|
/* simple_attr_open is called by an actual attribute open file operation
|
|
|
|
* to set the attribute specific access operations. */
|
|
|
|
int simple_attr_open(struct inode *inode, struct file *file,
|
2008-02-08 15:20:26 +03:00
|
|
|
int (*get)(void *, u64 *), int (*set)(void *, u64),
|
2005-05-18 16:40:59 +04:00
|
|
|
const char *fmt)
|
|
|
|
{
|
|
|
|
struct simple_attr *attr;
|
|
|
|
|
|
|
|
attr = kmalloc(sizeof(*attr), GFP_KERNEL);
|
|
|
|
if (!attr)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
attr->get = get;
|
|
|
|
attr->set = set;
|
2006-09-27 12:50:46 +04:00
|
|
|
attr->data = inode->i_private;
|
2005-05-18 16:40:59 +04:00
|
|
|
attr->fmt = fmt;
|
2006-03-23 14:00:36 +03:00
|
|
|
mutex_init(&attr->mutex);
|
2005-05-18 16:40:59 +04:00
|
|
|
|
|
|
|
file->private_data = attr;
|
|
|
|
|
|
|
|
return nonseekable_open(inode, file);
|
|
|
|
}
|
|
|
|
|
2008-02-08 15:20:28 +03:00
|
|
|
int simple_attr_release(struct inode *inode, struct file *file)
|
2005-05-18 16:40:59 +04:00
|
|
|
{
|
|
|
|
kfree(file->private_data);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* read from the buffer that is filled with the get function */
|
|
|
|
ssize_t simple_attr_read(struct file *file, char __user *buf,
|
|
|
|
size_t len, loff_t *ppos)
|
|
|
|
{
|
|
|
|
struct simple_attr *attr;
|
|
|
|
size_t size;
|
|
|
|
ssize_t ret;
|
|
|
|
|
|
|
|
attr = file->private_data;
|
|
|
|
|
|
|
|
if (!attr->get)
|
|
|
|
return -EACCES;
|
|
|
|
|
2008-02-08 15:20:27 +03:00
|
|
|
ret = mutex_lock_interruptible(&attr->mutex);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2008-02-08 15:20:26 +03:00
|
|
|
if (*ppos) { /* continued read */
|
2005-05-18 16:40:59 +04:00
|
|
|
size = strlen(attr->get_buf);
|
2008-02-08 15:20:26 +03:00
|
|
|
} else { /* first read */
|
|
|
|
u64 val;
|
|
|
|
ret = attr->get(attr->data, &val);
|
|
|
|
if (ret)
|
|
|
|
goto out;
|
|
|
|
|
2005-05-18 16:40:59 +04:00
|
|
|
size = scnprintf(attr->get_buf, sizeof(attr->get_buf),
|
2008-02-08 15:20:26 +03:00
|
|
|
attr->fmt, (unsigned long long)val);
|
|
|
|
}
|
2005-05-18 16:40:59 +04:00
|
|
|
|
|
|
|
ret = simple_read_from_buffer(buf, len, ppos, attr->get_buf, size);
|
2008-02-08 15:20:26 +03:00
|
|
|
out:
|
2006-03-23 14:00:36 +03:00
|
|
|
mutex_unlock(&attr->mutex);
|
2005-05-18 16:40:59 +04:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* interpret the buffer as a number to call the set function with */
|
|
|
|
ssize_t simple_attr_write(struct file *file, const char __user *buf,
|
|
|
|
size_t len, loff_t *ppos)
|
|
|
|
{
|
|
|
|
struct simple_attr *attr;
|
|
|
|
u64 val;
|
|
|
|
size_t size;
|
|
|
|
ssize_t ret;
|
|
|
|
|
|
|
|
attr = file->private_data;
|
|
|
|
if (!attr->set)
|
|
|
|
return -EACCES;
|
|
|
|
|
2008-02-08 15:20:27 +03:00
|
|
|
ret = mutex_lock_interruptible(&attr->mutex);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2005-05-18 16:40:59 +04:00
|
|
|
ret = -EFAULT;
|
|
|
|
size = min(sizeof(attr->set_buf) - 1, len);
|
|
|
|
if (copy_from_user(attr->set_buf, buf, size))
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
ret = len; /* claim we got the whole input */
|
|
|
|
attr->set_buf[size] = '\0';
|
|
|
|
val = simple_strtol(attr->set_buf, NULL, 0);
|
|
|
|
attr->set(attr->data, val);
|
|
|
|
out:
|
2006-03-23 14:00:36 +03:00
|
|
|
mutex_unlock(&attr->mutex);
|
2005-05-18 16:40:59 +04:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2007-10-22 03:42:05 +04:00
|
|
|
/**
|
|
|
|
* generic_fh_to_dentry - generic helper for the fh_to_dentry export operation
|
|
|
|
* @sb: filesystem to do the file handle conversion on
|
|
|
|
* @fid: file handle to convert
|
|
|
|
* @fh_len: length of the file handle in bytes
|
|
|
|
* @fh_type: type of file handle
|
|
|
|
* @get_inode: filesystem callback to retrieve inode
|
|
|
|
*
|
|
|
|
* This function decodes @fid as long as it has one of the well-known
|
|
|
|
* Linux filehandle types and calls @get_inode on it to retrieve the
|
|
|
|
* inode for the object specified in the file handle.
|
|
|
|
*/
|
|
|
|
struct dentry *generic_fh_to_dentry(struct super_block *sb, struct fid *fid,
|
|
|
|
int fh_len, int fh_type, struct inode *(*get_inode)
|
|
|
|
(struct super_block *sb, u64 ino, u32 gen))
|
|
|
|
{
|
|
|
|
struct inode *inode = NULL;
|
|
|
|
|
|
|
|
if (fh_len < 2)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
switch (fh_type) {
|
|
|
|
case FILEID_INO32_GEN:
|
|
|
|
case FILEID_INO32_GEN_PARENT:
|
|
|
|
inode = get_inode(sb, fid->i32.ino, fid->i32.gen);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2008-08-11 17:48:57 +04:00
|
|
|
return d_obtain_alias(inode);
|
2007-10-22 03:42:05 +04:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(generic_fh_to_dentry);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* generic_fh_to_dentry - generic helper for the fh_to_parent export operation
|
|
|
|
* @sb: filesystem to do the file handle conversion on
|
|
|
|
* @fid: file handle to convert
|
|
|
|
* @fh_len: length of the file handle in bytes
|
|
|
|
* @fh_type: type of file handle
|
|
|
|
* @get_inode: filesystem callback to retrieve inode
|
|
|
|
*
|
|
|
|
* This function decodes @fid as long as it has one of the well-known
|
|
|
|
* Linux filehandle types and calls @get_inode on it to retrieve the
|
|
|
|
* inode for the _parent_ object specified in the file handle if it
|
|
|
|
* is specified in the file handle, or NULL otherwise.
|
|
|
|
*/
|
|
|
|
struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid,
|
|
|
|
int fh_len, int fh_type, struct inode *(*get_inode)
|
|
|
|
(struct super_block *sb, u64 ino, u32 gen))
|
|
|
|
{
|
|
|
|
struct inode *inode = NULL;
|
|
|
|
|
|
|
|
if (fh_len <= 2)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
switch (fh_type) {
|
|
|
|
case FILEID_INO32_GEN_PARENT:
|
|
|
|
inode = get_inode(sb, fid->i32.parent_ino,
|
|
|
|
(fh_len > 3 ? fid->i32.parent_gen : 0));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2008-08-11 17:48:57 +04:00
|
|
|
return d_obtain_alias(inode);
|
2007-10-22 03:42:05 +04:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(generic_fh_to_parent);
|
|
|
|
|
2005-04-17 02:20:36 +04:00
|
|
|
EXPORT_SYMBOL(dcache_dir_close);
|
|
|
|
EXPORT_SYMBOL(dcache_dir_lseek);
|
|
|
|
EXPORT_SYMBOL(dcache_dir_open);
|
|
|
|
EXPORT_SYMBOL(dcache_readdir);
|
|
|
|
EXPORT_SYMBOL(generic_read_dir);
|
|
|
|
EXPORT_SYMBOL(get_sb_pseudo);
|
2007-10-16 12:25:01 +04:00
|
|
|
EXPORT_SYMBOL(simple_write_begin);
|
|
|
|
EXPORT_SYMBOL(simple_write_end);
|
2005-04-17 02:20:36 +04:00
|
|
|
EXPORT_SYMBOL(simple_dir_inode_operations);
|
|
|
|
EXPORT_SYMBOL(simple_dir_operations);
|
|
|
|
EXPORT_SYMBOL(simple_empty);
|
|
|
|
EXPORT_SYMBOL(d_alloc_name);
|
|
|
|
EXPORT_SYMBOL(simple_fill_super);
|
|
|
|
EXPORT_SYMBOL(simple_getattr);
|
|
|
|
EXPORT_SYMBOL(simple_link);
|
|
|
|
EXPORT_SYMBOL(simple_lookup);
|
|
|
|
EXPORT_SYMBOL(simple_pin_fs);
|
2008-10-30 00:00:55 +03:00
|
|
|
EXPORT_UNUSED_SYMBOL(simple_prepare_write);
|
2005-04-17 02:20:36 +04:00
|
|
|
EXPORT_SYMBOL(simple_readpage);
|
|
|
|
EXPORT_SYMBOL(simple_release_fs);
|
|
|
|
EXPORT_SYMBOL(simple_rename);
|
|
|
|
EXPORT_SYMBOL(simple_rmdir);
|
|
|
|
EXPORT_SYMBOL(simple_statfs);
|
|
|
|
EXPORT_SYMBOL(simple_sync_file);
|
|
|
|
EXPORT_SYMBOL(simple_unlink);
|
|
|
|
EXPORT_SYMBOL(simple_read_from_buffer);
|
2008-06-06 09:46:21 +04:00
|
|
|
EXPORT_SYMBOL(memory_read_from_buffer);
|
2009-03-25 18:48:35 +03:00
|
|
|
EXPORT_SYMBOL(simple_transaction_set);
|
2005-04-17 02:20:36 +04:00
|
|
|
EXPORT_SYMBOL(simple_transaction_get);
|
|
|
|
EXPORT_SYMBOL(simple_transaction_read);
|
|
|
|
EXPORT_SYMBOL(simple_transaction_release);
|
2005-05-18 16:40:59 +04:00
|
|
|
EXPORT_SYMBOL_GPL(simple_attr_open);
|
2008-02-08 15:20:28 +03:00
|
|
|
EXPORT_SYMBOL_GPL(simple_attr_release);
|
2005-05-18 16:40:59 +04:00
|
|
|
EXPORT_SYMBOL_GPL(simple_attr_read);
|
|
|
|
EXPORT_SYMBOL_GPL(simple_attr_write);
|