take the targets of /proc/*/ns/* symlinks to separate fs
New pseudo-filesystem: nsfs. Targets of /proc/*/ns/* live there now.
It's not mountable (not even registered, so it's not in /proc/filesystems,
etc.). Files on it *are* bindable - we explicitly permit that in do_loopback().
This stuff lives in fs/nsfs.c now; proc_ns_fget() moved there as well.
get_proc_ns() is a macro now (it's simply returning ->i_private; would
have been an inline, if not for header ordering headache).
proc_ns_inode() is an ex-parrot. The interface used in procfs is
ns_get_path(path, task, ops) and ns_get_name(buf, size, task, ops).
Dentries and inodes are never hashed; a non-counting reference to dentry
is stashed in ns_common (removed by ->d_prune()) and reused by ns_get_path()
if present. See ns_get_path()/ns_prune_dentry/nsfs_evict() for details
of that mechanism.
As the result, proc_ns_follow_link() has stopped poking in nd->path.mnt;
it does nd_jump_link() on a consistent <vfsmount,dentry> pair it gets
from ns_get_path().
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2014-11-01 17:57:28 +03:00
|
|
|
#include <linux/mount.h>
|
|
|
|
#include <linux/file.h>
|
|
|
|
#include <linux/fs.h>
|
|
|
|
#include <linux/proc_ns.h>
|
|
|
|
#include <linux/magic.h>
|
|
|
|
#include <linux/ktime.h>
|
2015-05-24 20:49:04 +03:00
|
|
|
#include <linux/seq_file.h>
|
2016-09-06 10:47:14 +03:00
|
|
|
#include <linux/user_namespace.h>
|
|
|
|
#include <linux/nsfs.h>
|
2017-01-25 04:04:15 +03:00
|
|
|
#include <linux/uaccess.h>
|
take the targets of /proc/*/ns/* symlinks to separate fs
New pseudo-filesystem: nsfs. Targets of /proc/*/ns/* live there now.
It's not mountable (not even registered, so it's not in /proc/filesystems,
etc.). Files on it *are* bindable - we explicitly permit that in do_loopback().
This stuff lives in fs/nsfs.c now; proc_ns_fget() moved there as well.
get_proc_ns() is a macro now (it's simply returning ->i_private; would
have been an inline, if not for header ordering headache).
proc_ns_inode() is an ex-parrot. The interface used in procfs is
ns_get_path(path, task, ops) and ns_get_name(buf, size, task, ops).
Dentries and inodes are never hashed; a non-counting reference to dentry
is stashed in ns_common (removed by ->d_prune()) and reused by ns_get_path()
if present. See ns_get_path()/ns_prune_dentry/nsfs_evict() for details
of that mechanism.
As the result, proc_ns_follow_link() has stopped poking in nd->path.mnt;
it does nd_jump_link() on a consistent <vfsmount,dentry> pair it gets
from ns_get_path().
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2014-11-01 17:57:28 +03:00
|
|
|
|
|
|
|
static struct vfsmount *nsfs_mnt;
|
|
|
|
|
2016-09-06 10:47:14 +03:00
|
|
|
static long ns_ioctl(struct file *filp, unsigned int ioctl,
|
|
|
|
unsigned long arg);
|
take the targets of /proc/*/ns/* symlinks to separate fs
New pseudo-filesystem: nsfs. Targets of /proc/*/ns/* live there now.
It's not mountable (not even registered, so it's not in /proc/filesystems,
etc.). Files on it *are* bindable - we explicitly permit that in do_loopback().
This stuff lives in fs/nsfs.c now; proc_ns_fget() moved there as well.
get_proc_ns() is a macro now (it's simply returning ->i_private; would
have been an inline, if not for header ordering headache).
proc_ns_inode() is an ex-parrot. The interface used in procfs is
ns_get_path(path, task, ops) and ns_get_name(buf, size, task, ops).
Dentries and inodes are never hashed; a non-counting reference to dentry
is stashed in ns_common (removed by ->d_prune()) and reused by ns_get_path()
if present. See ns_get_path()/ns_prune_dentry/nsfs_evict() for details
of that mechanism.
As the result, proc_ns_follow_link() has stopped poking in nd->path.mnt;
it does nd_jump_link() on a consistent <vfsmount,dentry> pair it gets
from ns_get_path().
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2014-11-01 17:57:28 +03:00
|
|
|
static const struct file_operations ns_file_operations = {
|
|
|
|
.llseek = no_llseek,
|
2016-09-06 10:47:14 +03:00
|
|
|
.unlocked_ioctl = ns_ioctl,
|
take the targets of /proc/*/ns/* symlinks to separate fs
New pseudo-filesystem: nsfs. Targets of /proc/*/ns/* live there now.
It's not mountable (not even registered, so it's not in /proc/filesystems,
etc.). Files on it *are* bindable - we explicitly permit that in do_loopback().
This stuff lives in fs/nsfs.c now; proc_ns_fget() moved there as well.
get_proc_ns() is a macro now (it's simply returning ->i_private; would
have been an inline, if not for header ordering headache).
proc_ns_inode() is an ex-parrot. The interface used in procfs is
ns_get_path(path, task, ops) and ns_get_name(buf, size, task, ops).
Dentries and inodes are never hashed; a non-counting reference to dentry
is stashed in ns_common (removed by ->d_prune()) and reused by ns_get_path()
if present. See ns_get_path()/ns_prune_dentry/nsfs_evict() for details
of that mechanism.
As the result, proc_ns_follow_link() has stopped poking in nd->path.mnt;
it does nd_jump_link() on a consistent <vfsmount,dentry> pair it gets
from ns_get_path().
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2014-11-01 17:57:28 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
|
|
|
|
{
|
2015-03-18 01:26:12 +03:00
|
|
|
struct inode *inode = d_inode(dentry);
|
take the targets of /proc/*/ns/* symlinks to separate fs
New pseudo-filesystem: nsfs. Targets of /proc/*/ns/* live there now.
It's not mountable (not even registered, so it's not in /proc/filesystems,
etc.). Files on it *are* bindable - we explicitly permit that in do_loopback().
This stuff lives in fs/nsfs.c now; proc_ns_fget() moved there as well.
get_proc_ns() is a macro now (it's simply returning ->i_private; would
have been an inline, if not for header ordering headache).
proc_ns_inode() is an ex-parrot. The interface used in procfs is
ns_get_path(path, task, ops) and ns_get_name(buf, size, task, ops).
Dentries and inodes are never hashed; a non-counting reference to dentry
is stashed in ns_common (removed by ->d_prune()) and reused by ns_get_path()
if present. See ns_get_path()/ns_prune_dentry/nsfs_evict() for details
of that mechanism.
As the result, proc_ns_follow_link() has stopped poking in nd->path.mnt;
it does nd_jump_link() on a consistent <vfsmount,dentry> pair it gets
from ns_get_path().
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2014-11-01 17:57:28 +03:00
|
|
|
const struct proc_ns_operations *ns_ops = dentry->d_fsdata;
|
|
|
|
|
|
|
|
return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]",
|
|
|
|
ns_ops->name, inode->i_ino);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void ns_prune_dentry(struct dentry *dentry)
|
|
|
|
{
|
2015-03-18 01:26:12 +03:00
|
|
|
struct inode *inode = d_inode(dentry);
|
take the targets of /proc/*/ns/* symlinks to separate fs
New pseudo-filesystem: nsfs. Targets of /proc/*/ns/* live there now.
It's not mountable (not even registered, so it's not in /proc/filesystems,
etc.). Files on it *are* bindable - we explicitly permit that in do_loopback().
This stuff lives in fs/nsfs.c now; proc_ns_fget() moved there as well.
get_proc_ns() is a macro now (it's simply returning ->i_private; would
have been an inline, if not for header ordering headache).
proc_ns_inode() is an ex-parrot. The interface used in procfs is
ns_get_path(path, task, ops) and ns_get_name(buf, size, task, ops).
Dentries and inodes are never hashed; a non-counting reference to dentry
is stashed in ns_common (removed by ->d_prune()) and reused by ns_get_path()
if present. See ns_get_path()/ns_prune_dentry/nsfs_evict() for details
of that mechanism.
As the result, proc_ns_follow_link() has stopped poking in nd->path.mnt;
it does nd_jump_link() on a consistent <vfsmount,dentry> pair it gets
from ns_get_path().
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2014-11-01 17:57:28 +03:00
|
|
|
if (inode) {
|
|
|
|
struct ns_common *ns = inode->i_private;
|
|
|
|
atomic_long_set(&ns->stashed, 0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
const struct dentry_operations ns_dentry_operations =
|
|
|
|
{
|
|
|
|
.d_prune = ns_prune_dentry,
|
|
|
|
.d_delete = always_delete_dentry,
|
|
|
|
.d_dname = ns_dname,
|
|
|
|
};
|
|
|
|
|
|
|
|
static void nsfs_evict(struct inode *inode)
|
|
|
|
{
|
|
|
|
struct ns_common *ns = inode->i_private;
|
|
|
|
clear_inode(inode);
|
|
|
|
ns->ops->put(ns);
|
|
|
|
}
|
|
|
|
|
2016-09-06 10:47:14 +03:00
|
|
|
static void *__ns_get_path(struct path *path, struct ns_common *ns)
|
take the targets of /proc/*/ns/* symlinks to separate fs
New pseudo-filesystem: nsfs. Targets of /proc/*/ns/* live there now.
It's not mountable (not even registered, so it's not in /proc/filesystems,
etc.). Files on it *are* bindable - we explicitly permit that in do_loopback().
This stuff lives in fs/nsfs.c now; proc_ns_fget() moved there as well.
get_proc_ns() is a macro now (it's simply returning ->i_private; would
have been an inline, if not for header ordering headache).
proc_ns_inode() is an ex-parrot. The interface used in procfs is
ns_get_path(path, task, ops) and ns_get_name(buf, size, task, ops).
Dentries and inodes are never hashed; a non-counting reference to dentry
is stashed in ns_common (removed by ->d_prune()) and reused by ns_get_path()
if present. See ns_get_path()/ns_prune_dentry/nsfs_evict() for details
of that mechanism.
As the result, proc_ns_follow_link() has stopped poking in nd->path.mnt;
it does nd_jump_link() on a consistent <vfsmount,dentry> pair it gets
from ns_get_path().
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2014-11-01 17:57:28 +03:00
|
|
|
{
|
2016-09-23 03:39:20 +03:00
|
|
|
struct vfsmount *mnt = nsfs_mnt;
|
take the targets of /proc/*/ns/* symlinks to separate fs
New pseudo-filesystem: nsfs. Targets of /proc/*/ns/* live there now.
It's not mountable (not even registered, so it's not in /proc/filesystems,
etc.). Files on it *are* bindable - we explicitly permit that in do_loopback().
This stuff lives in fs/nsfs.c now; proc_ns_fget() moved there as well.
get_proc_ns() is a macro now (it's simply returning ->i_private; would
have been an inline, if not for header ordering headache).
proc_ns_inode() is an ex-parrot. The interface used in procfs is
ns_get_path(path, task, ops) and ns_get_name(buf, size, task, ops).
Dentries and inodes are never hashed; a non-counting reference to dentry
is stashed in ns_common (removed by ->d_prune()) and reused by ns_get_path()
if present. See ns_get_path()/ns_prune_dentry/nsfs_evict() for details
of that mechanism.
As the result, proc_ns_follow_link() has stopped poking in nd->path.mnt;
it does nd_jump_link() on a consistent <vfsmount,dentry> pair it gets
from ns_get_path().
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2014-11-01 17:57:28 +03:00
|
|
|
struct dentry *dentry;
|
|
|
|
struct inode *inode;
|
|
|
|
unsigned long d;
|
|
|
|
|
|
|
|
rcu_read_lock();
|
|
|
|
d = atomic_long_read(&ns->stashed);
|
|
|
|
if (!d)
|
|
|
|
goto slow;
|
|
|
|
dentry = (struct dentry *)d;
|
|
|
|
if (!lockref_get_not_dead(&dentry->d_lockref))
|
|
|
|
goto slow;
|
|
|
|
rcu_read_unlock();
|
2016-09-06 10:47:14 +03:00
|
|
|
ns->ops->put(ns);
|
take the targets of /proc/*/ns/* symlinks to separate fs
New pseudo-filesystem: nsfs. Targets of /proc/*/ns/* live there now.
It's not mountable (not even registered, so it's not in /proc/filesystems,
etc.). Files on it *are* bindable - we explicitly permit that in do_loopback().
This stuff lives in fs/nsfs.c now; proc_ns_fget() moved there as well.
get_proc_ns() is a macro now (it's simply returning ->i_private; would
have been an inline, if not for header ordering headache).
proc_ns_inode() is an ex-parrot. The interface used in procfs is
ns_get_path(path, task, ops) and ns_get_name(buf, size, task, ops).
Dentries and inodes are never hashed; a non-counting reference to dentry
is stashed in ns_common (removed by ->d_prune()) and reused by ns_get_path()
if present. See ns_get_path()/ns_prune_dentry/nsfs_evict() for details
of that mechanism.
As the result, proc_ns_follow_link() has stopped poking in nd->path.mnt;
it does nd_jump_link() on a consistent <vfsmount,dentry> pair it gets
from ns_get_path().
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2014-11-01 17:57:28 +03:00
|
|
|
got_it:
|
2016-09-23 03:39:20 +03:00
|
|
|
path->mnt = mntget(mnt);
|
take the targets of /proc/*/ns/* symlinks to separate fs
New pseudo-filesystem: nsfs. Targets of /proc/*/ns/* live there now.
It's not mountable (not even registered, so it's not in /proc/filesystems,
etc.). Files on it *are* bindable - we explicitly permit that in do_loopback().
This stuff lives in fs/nsfs.c now; proc_ns_fget() moved there as well.
get_proc_ns() is a macro now (it's simply returning ->i_private; would
have been an inline, if not for header ordering headache).
proc_ns_inode() is an ex-parrot. The interface used in procfs is
ns_get_path(path, task, ops) and ns_get_name(buf, size, task, ops).
Dentries and inodes are never hashed; a non-counting reference to dentry
is stashed in ns_common (removed by ->d_prune()) and reused by ns_get_path()
if present. See ns_get_path()/ns_prune_dentry/nsfs_evict() for details
of that mechanism.
As the result, proc_ns_follow_link() has stopped poking in nd->path.mnt;
it does nd_jump_link() on a consistent <vfsmount,dentry> pair it gets
from ns_get_path().
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2014-11-01 17:57:28 +03:00
|
|
|
path->dentry = dentry;
|
|
|
|
return NULL;
|
|
|
|
slow:
|
|
|
|
rcu_read_unlock();
|
|
|
|
inode = new_inode_pseudo(mnt->mnt_sb);
|
|
|
|
if (!inode) {
|
2016-09-06 10:47:14 +03:00
|
|
|
ns->ops->put(ns);
|
take the targets of /proc/*/ns/* symlinks to separate fs
New pseudo-filesystem: nsfs. Targets of /proc/*/ns/* live there now.
It's not mountable (not even registered, so it's not in /proc/filesystems,
etc.). Files on it *are* bindable - we explicitly permit that in do_loopback().
This stuff lives in fs/nsfs.c now; proc_ns_fget() moved there as well.
get_proc_ns() is a macro now (it's simply returning ->i_private; would
have been an inline, if not for header ordering headache).
proc_ns_inode() is an ex-parrot. The interface used in procfs is
ns_get_path(path, task, ops) and ns_get_name(buf, size, task, ops).
Dentries and inodes are never hashed; a non-counting reference to dentry
is stashed in ns_common (removed by ->d_prune()) and reused by ns_get_path()
if present. See ns_get_path()/ns_prune_dentry/nsfs_evict() for details
of that mechanism.
As the result, proc_ns_follow_link() has stopped poking in nd->path.mnt;
it does nd_jump_link() on a consistent <vfsmount,dentry> pair it gets
from ns_get_path().
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2014-11-01 17:57:28 +03:00
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
}
|
|
|
|
inode->i_ino = ns->inum;
|
2016-09-14 17:48:04 +03:00
|
|
|
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
|
take the targets of /proc/*/ns/* symlinks to separate fs
New pseudo-filesystem: nsfs. Targets of /proc/*/ns/* live there now.
It's not mountable (not even registered, so it's not in /proc/filesystems,
etc.). Files on it *are* bindable - we explicitly permit that in do_loopback().
This stuff lives in fs/nsfs.c now; proc_ns_fget() moved there as well.
get_proc_ns() is a macro now (it's simply returning ->i_private; would
have been an inline, if not for header ordering headache).
proc_ns_inode() is an ex-parrot. The interface used in procfs is
ns_get_path(path, task, ops) and ns_get_name(buf, size, task, ops).
Dentries and inodes are never hashed; a non-counting reference to dentry
is stashed in ns_common (removed by ->d_prune()) and reused by ns_get_path()
if present. See ns_get_path()/ns_prune_dentry/nsfs_evict() for details
of that mechanism.
As the result, proc_ns_follow_link() has stopped poking in nd->path.mnt;
it does nd_jump_link() on a consistent <vfsmount,dentry> pair it gets
from ns_get_path().
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2014-11-01 17:57:28 +03:00
|
|
|
inode->i_flags |= S_IMMUTABLE;
|
|
|
|
inode->i_mode = S_IFREG | S_IRUGO;
|
|
|
|
inode->i_fop = &ns_file_operations;
|
|
|
|
inode->i_private = ns;
|
|
|
|
|
2017-07-04 19:25:22 +03:00
|
|
|
dentry = d_alloc_pseudo(mnt->mnt_sb, &empty_name);
|
take the targets of /proc/*/ns/* symlinks to separate fs
New pseudo-filesystem: nsfs. Targets of /proc/*/ns/* live there now.
It's not mountable (not even registered, so it's not in /proc/filesystems,
etc.). Files on it *are* bindable - we explicitly permit that in do_loopback().
This stuff lives in fs/nsfs.c now; proc_ns_fget() moved there as well.
get_proc_ns() is a macro now (it's simply returning ->i_private; would
have been an inline, if not for header ordering headache).
proc_ns_inode() is an ex-parrot. The interface used in procfs is
ns_get_path(path, task, ops) and ns_get_name(buf, size, task, ops).
Dentries and inodes are never hashed; a non-counting reference to dentry
is stashed in ns_common (removed by ->d_prune()) and reused by ns_get_path()
if present. See ns_get_path()/ns_prune_dentry/nsfs_evict() for details
of that mechanism.
As the result, proc_ns_follow_link() has stopped poking in nd->path.mnt;
it does nd_jump_link() on a consistent <vfsmount,dentry> pair it gets
from ns_get_path().
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2014-11-01 17:57:28 +03:00
|
|
|
if (!dentry) {
|
|
|
|
iput(inode);
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
}
|
|
|
|
d_instantiate(dentry, inode);
|
2017-04-20 01:11:00 +03:00
|
|
|
dentry->d_flags |= DCACHE_RCUACCESS;
|
2016-09-06 10:47:14 +03:00
|
|
|
dentry->d_fsdata = (void *)ns->ops;
|
take the targets of /proc/*/ns/* symlinks to separate fs
New pseudo-filesystem: nsfs. Targets of /proc/*/ns/* live there now.
It's not mountable (not even registered, so it's not in /proc/filesystems,
etc.). Files on it *are* bindable - we explicitly permit that in do_loopback().
This stuff lives in fs/nsfs.c now; proc_ns_fget() moved there as well.
get_proc_ns() is a macro now (it's simply returning ->i_private; would
have been an inline, if not for header ordering headache).
proc_ns_inode() is an ex-parrot. The interface used in procfs is
ns_get_path(path, task, ops) and ns_get_name(buf, size, task, ops).
Dentries and inodes are never hashed; a non-counting reference to dentry
is stashed in ns_common (removed by ->d_prune()) and reused by ns_get_path()
if present. See ns_get_path()/ns_prune_dentry/nsfs_evict() for details
of that mechanism.
As the result, proc_ns_follow_link() has stopped poking in nd->path.mnt;
it does nd_jump_link() on a consistent <vfsmount,dentry> pair it gets
from ns_get_path().
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2014-11-01 17:57:28 +03:00
|
|
|
d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry);
|
|
|
|
if (d) {
|
|
|
|
d_delete(dentry); /* make sure ->d_prune() does nothing */
|
|
|
|
dput(dentry);
|
|
|
|
cpu_relax();
|
2016-09-06 10:47:14 +03:00
|
|
|
return ERR_PTR(-EAGAIN);
|
take the targets of /proc/*/ns/* symlinks to separate fs
New pseudo-filesystem: nsfs. Targets of /proc/*/ns/* live there now.
It's not mountable (not even registered, so it's not in /proc/filesystems,
etc.). Files on it *are* bindable - we explicitly permit that in do_loopback().
This stuff lives in fs/nsfs.c now; proc_ns_fget() moved there as well.
get_proc_ns() is a macro now (it's simply returning ->i_private; would
have been an inline, if not for header ordering headache).
proc_ns_inode() is an ex-parrot. The interface used in procfs is
ns_get_path(path, task, ops) and ns_get_name(buf, size, task, ops).
Dentries and inodes are never hashed; a non-counting reference to dentry
is stashed in ns_common (removed by ->d_prune()) and reused by ns_get_path()
if present. See ns_get_path()/ns_prune_dentry/nsfs_evict() for details
of that mechanism.
As the result, proc_ns_follow_link() has stopped poking in nd->path.mnt;
it does nd_jump_link() on a consistent <vfsmount,dentry> pair it gets
from ns_get_path().
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2014-11-01 17:57:28 +03:00
|
|
|
}
|
|
|
|
goto got_it;
|
|
|
|
}
|
|
|
|
|
2016-09-06 10:47:14 +03:00
|
|
|
void *ns_get_path(struct path *path, struct task_struct *task,
|
|
|
|
const struct proc_ns_operations *ns_ops)
|
|
|
|
{
|
|
|
|
struct ns_common *ns;
|
|
|
|
void *ret;
|
|
|
|
|
|
|
|
again:
|
|
|
|
ns = ns_ops->get(task);
|
|
|
|
if (!ns)
|
|
|
|
return ERR_PTR(-ENOENT);
|
|
|
|
|
|
|
|
ret = __ns_get_path(path, ns);
|
|
|
|
if (IS_ERR(ret) && PTR_ERR(ret) == -EAGAIN)
|
|
|
|
goto again;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2016-10-25 04:29:13 +03:00
|
|
|
int open_related_ns(struct ns_common *ns,
|
2016-09-06 10:47:14 +03:00
|
|
|
struct ns_common *(*get_ns)(struct ns_common *ns))
|
|
|
|
{
|
|
|
|
struct path path = {};
|
|
|
|
struct file *f;
|
|
|
|
void *err;
|
|
|
|
int fd;
|
|
|
|
|
|
|
|
fd = get_unused_fd_flags(O_CLOEXEC);
|
|
|
|
if (fd < 0)
|
|
|
|
return fd;
|
|
|
|
|
|
|
|
while (1) {
|
|
|
|
struct ns_common *relative;
|
|
|
|
|
|
|
|
relative = get_ns(ns);
|
|
|
|
if (IS_ERR(relative)) {
|
|
|
|
put_unused_fd(fd);
|
|
|
|
return PTR_ERR(relative);
|
|
|
|
}
|
|
|
|
|
|
|
|
err = __ns_get_path(&path, relative);
|
|
|
|
if (IS_ERR(err) && PTR_ERR(err) == -EAGAIN)
|
|
|
|
continue;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (IS_ERR(err)) {
|
|
|
|
put_unused_fd(fd);
|
|
|
|
return PTR_ERR(err);
|
|
|
|
}
|
|
|
|
|
|
|
|
f = dentry_open(&path, O_RDONLY, current_cred());
|
|
|
|
path_put(&path);
|
|
|
|
if (IS_ERR(f)) {
|
|
|
|
put_unused_fd(fd);
|
|
|
|
fd = PTR_ERR(f);
|
|
|
|
} else
|
|
|
|
fd_install(fd, f);
|
|
|
|
|
|
|
|
return fd;
|
|
|
|
}
|
|
|
|
|
|
|
|
static long ns_ioctl(struct file *filp, unsigned int ioctl,
|
|
|
|
unsigned long arg)
|
|
|
|
{
|
2017-01-25 04:04:15 +03:00
|
|
|
struct user_namespace *user_ns;
|
2016-09-06 10:47:14 +03:00
|
|
|
struct ns_common *ns = get_proc_ns(file_inode(filp));
|
2017-01-25 04:04:15 +03:00
|
|
|
uid_t __user *argp;
|
|
|
|
uid_t uid;
|
2016-09-06 10:47:14 +03:00
|
|
|
|
|
|
|
switch (ioctl) {
|
|
|
|
case NS_GET_USERNS:
|
|
|
|
return open_related_ns(ns, ns_get_owner);
|
2016-09-06 10:47:15 +03:00
|
|
|
case NS_GET_PARENT:
|
|
|
|
if (!ns->ops->get_parent)
|
|
|
|
return -EINVAL;
|
|
|
|
return open_related_ns(ns, ns->ops->get_parent);
|
nsfs: Add an ioctl() to return the namespace type
Linux 4.9 added two ioctl() operations that can be used to discover:
* the parental relationships for hierarchical namespaces (user and PID)
[NS_GET_PARENT]
* the user namespaces that owns a specified non-user-namespace
[NS_GET_USERNS]
For no good reason that I can glean, NS_GET_USERNS was made synonymous
with NS_GET_PARENT for user namespaces. It might have been better if
NS_GET_USERNS had returned an error if the supplied file descriptor
referred to a user namespace, since it suggests that the caller may be
confused. More particularly, if it had generated an error, then I wouldn't
need the new ioctl() operation proposed here. (On the other hand, what
I propose here may be more generally useful.)
I would like to write code that discovers namespace relationships for
the purpose of understanding the namespace setup on a running system.
In particular, given a file descriptor (or pathname) for a namespace,
N, I'd like to obtain the corresponding user namespace. Namespace N
might be a user namespace (in which case my code would just use N) or
a non-user namespace (in which case my code will use NS_GET_USERNS to
get the user namespace associated with N). The problem is that there
is no way to tell the difference by looking at the file descriptor
(and if I try to use NS_GET_USERNS on an N that is a user namespace, I
get the parent user namespace of N, which is not what I want).
This patch therefore adds a new ioctl(), NS_GET_NSTYPE, which, given
a file descriptor that refers to a user namespace, returns the
namespace type (one of the CLONE_NEW* constants).
Signed-off-by: Michael Kerrisk <mtk-manpages@gmail.com>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
2017-01-25 04:03:36 +03:00
|
|
|
case NS_GET_NSTYPE:
|
|
|
|
return ns->ops->type;
|
2017-01-25 04:04:15 +03:00
|
|
|
case NS_GET_OWNER_UID:
|
|
|
|
if (ns->ops->type != CLONE_NEWUSER)
|
|
|
|
return -EINVAL;
|
|
|
|
user_ns = container_of(ns, struct user_namespace, ns);
|
|
|
|
argp = (uid_t __user *) arg;
|
|
|
|
uid = from_kuid_munged(current_user_ns(), user_ns->owner);
|
|
|
|
return put_user(uid, argp);
|
2016-09-06 10:47:14 +03:00
|
|
|
default:
|
|
|
|
return -ENOTTY;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
take the targets of /proc/*/ns/* symlinks to separate fs
New pseudo-filesystem: nsfs. Targets of /proc/*/ns/* live there now.
It's not mountable (not even registered, so it's not in /proc/filesystems,
etc.). Files on it *are* bindable - we explicitly permit that in do_loopback().
This stuff lives in fs/nsfs.c now; proc_ns_fget() moved there as well.
get_proc_ns() is a macro now (it's simply returning ->i_private; would
have been an inline, if not for header ordering headache).
proc_ns_inode() is an ex-parrot. The interface used in procfs is
ns_get_path(path, task, ops) and ns_get_name(buf, size, task, ops).
Dentries and inodes are never hashed; a non-counting reference to dentry
is stashed in ns_common (removed by ->d_prune()) and reused by ns_get_path()
if present. See ns_get_path()/ns_prune_dentry/nsfs_evict() for details
of that mechanism.
As the result, proc_ns_follow_link() has stopped poking in nd->path.mnt;
it does nd_jump_link() on a consistent <vfsmount,dentry> pair it gets
from ns_get_path().
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2014-11-01 17:57:28 +03:00
|
|
|
int ns_get_name(char *buf, size_t size, struct task_struct *task,
|
|
|
|
const struct proc_ns_operations *ns_ops)
|
|
|
|
{
|
|
|
|
struct ns_common *ns;
|
|
|
|
int res = -ENOENT;
|
2017-05-09 01:56:38 +03:00
|
|
|
const char *name;
|
take the targets of /proc/*/ns/* symlinks to separate fs
New pseudo-filesystem: nsfs. Targets of /proc/*/ns/* live there now.
It's not mountable (not even registered, so it's not in /proc/filesystems,
etc.). Files on it *are* bindable - we explicitly permit that in do_loopback().
This stuff lives in fs/nsfs.c now; proc_ns_fget() moved there as well.
get_proc_ns() is a macro now (it's simply returning ->i_private; would
have been an inline, if not for header ordering headache).
proc_ns_inode() is an ex-parrot. The interface used in procfs is
ns_get_path(path, task, ops) and ns_get_name(buf, size, task, ops).
Dentries and inodes are never hashed; a non-counting reference to dentry
is stashed in ns_common (removed by ->d_prune()) and reused by ns_get_path()
if present. See ns_get_path()/ns_prune_dentry/nsfs_evict() for details
of that mechanism.
As the result, proc_ns_follow_link() has stopped poking in nd->path.mnt;
it does nd_jump_link() on a consistent <vfsmount,dentry> pair it gets
from ns_get_path().
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2014-11-01 17:57:28 +03:00
|
|
|
ns = ns_ops->get(task);
|
|
|
|
if (ns) {
|
2017-05-09 01:56:38 +03:00
|
|
|
name = ns_ops->real_ns_name ? : ns_ops->name;
|
|
|
|
res = snprintf(buf, size, "%s:[%u]", name, ns->inum);
|
take the targets of /proc/*/ns/* symlinks to separate fs
New pseudo-filesystem: nsfs. Targets of /proc/*/ns/* live there now.
It's not mountable (not even registered, so it's not in /proc/filesystems,
etc.). Files on it *are* bindable - we explicitly permit that in do_loopback().
This stuff lives in fs/nsfs.c now; proc_ns_fget() moved there as well.
get_proc_ns() is a macro now (it's simply returning ->i_private; would
have been an inline, if not for header ordering headache).
proc_ns_inode() is an ex-parrot. The interface used in procfs is
ns_get_path(path, task, ops) and ns_get_name(buf, size, task, ops).
Dentries and inodes are never hashed; a non-counting reference to dentry
is stashed in ns_common (removed by ->d_prune()) and reused by ns_get_path()
if present. See ns_get_path()/ns_prune_dentry/nsfs_evict() for details
of that mechanism.
As the result, proc_ns_follow_link() has stopped poking in nd->path.mnt;
it does nd_jump_link() on a consistent <vfsmount,dentry> pair it gets
from ns_get_path().
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2014-11-01 17:57:28 +03:00
|
|
|
ns_ops->put(ns);
|
|
|
|
}
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct file *proc_ns_fget(int fd)
|
|
|
|
{
|
|
|
|
struct file *file;
|
|
|
|
|
|
|
|
file = fget(fd);
|
|
|
|
if (!file)
|
|
|
|
return ERR_PTR(-EBADF);
|
|
|
|
|
|
|
|
if (file->f_op != &ns_file_operations)
|
|
|
|
goto out_invalid;
|
|
|
|
|
|
|
|
return file;
|
|
|
|
|
|
|
|
out_invalid:
|
|
|
|
fput(file);
|
|
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
}
|
|
|
|
|
2015-05-24 20:49:04 +03:00
|
|
|
static int nsfs_show_path(struct seq_file *seq, struct dentry *dentry)
|
|
|
|
{
|
|
|
|
struct inode *inode = d_inode(dentry);
|
|
|
|
const struct proc_ns_operations *ns_ops = dentry->d_fsdata;
|
|
|
|
|
2015-09-11 23:07:48 +03:00
|
|
|
seq_printf(seq, "%s:[%lu]", ns_ops->name, inode->i_ino);
|
|
|
|
return 0;
|
2015-05-24 20:49:04 +03:00
|
|
|
}
|
|
|
|
|
take the targets of /proc/*/ns/* symlinks to separate fs
New pseudo-filesystem: nsfs. Targets of /proc/*/ns/* live there now.
It's not mountable (not even registered, so it's not in /proc/filesystems,
etc.). Files on it *are* bindable - we explicitly permit that in do_loopback().
This stuff lives in fs/nsfs.c now; proc_ns_fget() moved there as well.
get_proc_ns() is a macro now (it's simply returning ->i_private; would
have been an inline, if not for header ordering headache).
proc_ns_inode() is an ex-parrot. The interface used in procfs is
ns_get_path(path, task, ops) and ns_get_name(buf, size, task, ops).
Dentries and inodes are never hashed; a non-counting reference to dentry
is stashed in ns_common (removed by ->d_prune()) and reused by ns_get_path()
if present. See ns_get_path()/ns_prune_dentry/nsfs_evict() for details
of that mechanism.
As the result, proc_ns_follow_link() has stopped poking in nd->path.mnt;
it does nd_jump_link() on a consistent <vfsmount,dentry> pair it gets
from ns_get_path().
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2014-11-01 17:57:28 +03:00
|
|
|
static const struct super_operations nsfs_ops = {
|
|
|
|
.statfs = simple_statfs,
|
|
|
|
.evict_inode = nsfs_evict,
|
2015-05-24 20:49:04 +03:00
|
|
|
.show_path = nsfs_show_path,
|
take the targets of /proc/*/ns/* symlinks to separate fs
New pseudo-filesystem: nsfs. Targets of /proc/*/ns/* live there now.
It's not mountable (not even registered, so it's not in /proc/filesystems,
etc.). Files on it *are* bindable - we explicitly permit that in do_loopback().
This stuff lives in fs/nsfs.c now; proc_ns_fget() moved there as well.
get_proc_ns() is a macro now (it's simply returning ->i_private; would
have been an inline, if not for header ordering headache).
proc_ns_inode() is an ex-parrot. The interface used in procfs is
ns_get_path(path, task, ops) and ns_get_name(buf, size, task, ops).
Dentries and inodes are never hashed; a non-counting reference to dentry
is stashed in ns_common (removed by ->d_prune()) and reused by ns_get_path()
if present. See ns_get_path()/ns_prune_dentry/nsfs_evict() for details
of that mechanism.
As the result, proc_ns_follow_link() has stopped poking in nd->path.mnt;
it does nd_jump_link() on a consistent <vfsmount,dentry> pair it gets
from ns_get_path().
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2014-11-01 17:57:28 +03:00
|
|
|
};
|
|
|
|
static struct dentry *nsfs_mount(struct file_system_type *fs_type,
|
|
|
|
int flags, const char *dev_name, void *data)
|
|
|
|
{
|
|
|
|
return mount_pseudo(fs_type, "nsfs:", &nsfs_ops,
|
|
|
|
&ns_dentry_operations, NSFS_MAGIC);
|
|
|
|
}
|
|
|
|
static struct file_system_type nsfs = {
|
|
|
|
.name = "nsfs",
|
|
|
|
.mount = nsfs_mount,
|
|
|
|
.kill_sb = kill_anon_super,
|
|
|
|
};
|
|
|
|
|
|
|
|
void __init nsfs_init(void)
|
|
|
|
{
|
|
|
|
nsfs_mnt = kern_mount(&nsfs);
|
|
|
|
if (IS_ERR(nsfs_mnt))
|
|
|
|
panic("can't set nsfs up\n");
|
|
|
|
nsfs_mnt->mnt_sb->s_flags &= ~MS_NOUSER;
|
|
|
|
}
|