cgroup->name handling became quite complicated over time involving
dedicated struct cgroup_name for RCU protection.  Now that cgroup is
on kernfs, we can drop all of it and simply use kernfs_name/path() and
friends.  Replace cgroup->name and all related code with kernfs
name/path constructs.

* Reimplement cgroup_name() and cgroup_path() as thin wrappers on top
  of kernfs counterparts, which involves semantic changes.
  pr_cont_cgroup_name() and pr_cont_cgroup_path() added.

* cgroup->name handling dropped from cgroup_rename().

* All users of cgroup_name/path() updated to the new semantics.  Users
  which were formatting the string just to printk them are converted
  to use pr_cont_cgroup_name/path() instead, which simplifies things
  quite a bit.  As cgroup_name() no longer requires RCU read lock
  around it, RCU lockings which were protecting only cgroup_name() are
  removed.

v2: Comment above oom_info_lock updated as suggested by Michal.

v3: dummy_top doesn't have a kn associated and
    pr_cont_cgroup_name/path() ended up calling the matching kernfs
    functions with NULL kn leading to oops.  Test for NULL kn and
    print "/" if so.  This issue was reported by Fengguang Wu.

v4: Rebased on top of 0ab02ca8f8 ("cgroup: protect modifications to
    cgroup_idr with cgroup_mutex").

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Li Zefan <lizefan@huawei.com>
Cc: Fengguang Wu <fengguang.wu@intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
This commit is contained in:
Tejun Heo 2014-02-12 09:29:50 -05:00
Родитель 6f30558f37
Коммит e61734c55c
7 изменённых файлов: 110 добавлений и 210 удалений

Просмотреть файл

@ -241,12 +241,16 @@ static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd)
*/
static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
{
int ret;
char *p;
ret = cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
if (ret)
p = cgroup_path(blkg->blkcg->css.cgroup, buf, buflen);
if (!p) {
strncpy(buf, "<unavailable>", buflen);
return ret;
return -ENAMETOOLONG;
}
memmove(buf, p, buf + buflen - p);
return 0;
}
/**

Просмотреть файл

@ -112,6 +112,7 @@ char *kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen)
spin_unlock_irqrestore(&kernfs_rename_lock, flags);
return p;
}
EXPORT_SYMBOL_GPL(kernfs_path);
/**
* pr_cont_kernfs_name - pr_cont name of a kernfs_node

Просмотреть файл

@ -138,11 +138,6 @@ enum {
CGRP_SANE_BEHAVIOR,
};
struct cgroup_name {
struct rcu_head rcu_head;
char name[];
};
struct cgroup {
unsigned long flags; /* "unsigned long" so bitops work */
@ -179,19 +174,6 @@ struct cgroup {
*/
u64 serial_nr;
/*
* This is a copy of dentry->d_name, and it's needed because
* we can't use dentry->d_name in cgroup_path().
*
* You must acquire rcu_read_lock() to access cgrp->name, and
* the only place that can change it is rename(), which is
* protected by parent dir's i_mutex.
*
* Normally you should use cgroup_name() wrapper rather than
* access it directly.
*/
struct cgroup_name __rcu *name;
/* Private pointers for each registered subsystem */
struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT];
@ -479,12 +461,6 @@ static inline bool cgroup_sane_behavior(const struct cgroup *cgrp)
return cgrp->root->flags & CGRP_ROOT_SANE_BEHAVIOR;
}
/* Caller should hold rcu_read_lock() */
static inline const char *cgroup_name(const struct cgroup *cgrp)
{
return rcu_dereference(cgrp->name)->name;
}
/* returns ino associated with a cgroup, 0 indicates unmounted root */
static inline ino_t cgroup_ino(struct cgroup *cgrp)
{
@ -503,14 +479,47 @@ static inline struct cftype *seq_cft(struct seq_file *seq)
struct cgroup_subsys_state *seq_css(struct seq_file *seq);
/*
* Name / path handling functions. All are thin wrappers around the kernfs
* counterparts and can be called under any context.
*/
static inline int cgroup_name(struct cgroup *cgrp, char *buf, size_t buflen)
{
return kernfs_name(cgrp->kn, buf, buflen);
}
static inline char * __must_check cgroup_path(struct cgroup *cgrp, char *buf,
size_t buflen)
{
return kernfs_path(cgrp->kn, buf, buflen);
}
static inline void pr_cont_cgroup_name(struct cgroup *cgrp)
{
/* dummy_top doesn't have a kn associated */
if (cgrp->kn)
pr_cont_kernfs_name(cgrp->kn);
else
pr_cont("/");
}
static inline void pr_cont_cgroup_path(struct cgroup *cgrp)
{
/* dummy_top doesn't have a kn associated */
if (cgrp->kn)
pr_cont_kernfs_path(cgrp->kn);
else
pr_cont("/");
}
char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen);
int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
int cgroup_rm_cftypes(struct cftype *cfts);
bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor);
int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen);
int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen);
int cgroup_task_count(const struct cgroup *cgrp);
/*

Просмотреть файл

@ -145,8 +145,6 @@ static int cgroup_root_count;
/* hierarchy ID allocation and mapping, protected by cgroup_mutex */
static DEFINE_IDR(cgroup_hierarchy_idr);
static struct cgroup_name root_cgroup_name = { .name = "/" };
/*
* Assign a monotonically increasing serial number to cgroups. It
* guarantees cgroups with bigger numbers are newer than those with smaller
@ -888,17 +886,6 @@ static int cgroup_populate_dir(struct cgroup *cgrp, unsigned long subsys_mask);
static struct kernfs_syscall_ops cgroup_kf_syscall_ops;
static const struct file_operations proc_cgroupstats_operations;
static struct cgroup_name *cgroup_alloc_name(const char *name_str)
{
struct cgroup_name *name;
name = kmalloc(sizeof(*name) + strlen(name_str) + 1, GFP_KERNEL);
if (!name)
return NULL;
strcpy(name->name, name_str);
return name;
}
static char *cgroup_file_name(struct cgroup *cgrp, const struct cftype *cft,
char *buf)
{
@ -958,8 +945,6 @@ static void cgroup_free_fn(struct work_struct *work)
cgroup_pidlist_destroy_all(cgrp);
kernfs_put(cgrp->kn);
kfree(rcu_dereference_raw(cgrp->name));
kfree(cgrp);
}
@ -1377,7 +1362,6 @@ static void init_cgroup_root(struct cgroupfs_root *root)
INIT_LIST_HEAD(&root->root_list);
root->number_of_cgroups = 1;
cgrp->root = root;
RCU_INIT_POINTER(cgrp->name, &root_cgroup_name);
init_cgroup_housekeeping(cgrp);
idr_init(&root->cgroup_idr);
}
@ -1597,57 +1581,6 @@ static struct file_system_type cgroup_fs_type = {
static struct kobject *cgroup_kobj;
/**
* cgroup_path - generate the path of a cgroup
* @cgrp: the cgroup in question
* @buf: the buffer to write the path into
* @buflen: the length of the buffer
*
* Writes path of cgroup into buf. Returns 0 on success, -errno on error.
*
* We can't generate cgroup path using dentry->d_name, as accessing
* dentry->name must be protected by irq-unsafe dentry->d_lock or parent
* inode's i_mutex, while on the other hand cgroup_path() can be called
* with some irq-safe spinlocks held.
*/
int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
{
int ret = -ENAMETOOLONG;
char *start;
if (!cgrp->parent) {
if (strlcpy(buf, "/", buflen) >= buflen)
return -ENAMETOOLONG;
return 0;
}
start = buf + buflen - 1;
*start = '\0';
rcu_read_lock();
do {
const char *name = cgroup_name(cgrp);
int len;
len = strlen(name);
if ((start -= len) < buf)
goto out;
memcpy(start, name, len);
if (--start < buf)
goto out;
*start = '/';
cgrp = cgrp->parent;
} while (cgrp->parent);
ret = 0;
memmove(buf, start, buf + buflen - start);
out:
rcu_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(cgroup_path);
/**
* task_cgroup_path - cgroup path of a task in the first cgroup hierarchy
* @task: target task
@ -1659,16 +1592,14 @@ EXPORT_SYMBOL_GPL(cgroup_path);
* function grabs cgroup_mutex and shouldn't be used inside locks used by
* cgroup controller callbacks.
*
* Returns 0 on success, fails with -%ENAMETOOLONG if @buflen is too short.
* Return value is the same as kernfs_path().
*/
int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
{
struct cgroupfs_root *root;
struct cgroup *cgrp;
int hierarchy_id = 1, ret = 0;
if (buflen < 2)
return -ENAMETOOLONG;
int hierarchy_id = 1;
char *path = NULL;
mutex_lock(&cgroup_mutex);
@ -1676,14 +1607,15 @@ int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
if (root) {
cgrp = task_cgroup_from_root(task, root);
ret = cgroup_path(cgrp, buf, buflen);
path = cgroup_path(cgrp, buf, buflen);
} else {
/* if no hierarchy exists, everyone is in "/" */
memcpy(buf, "/", 2);
if (strlcpy(buf, "/", buflen) < buflen)
path = buf;
}
mutex_unlock(&cgroup_mutex);
return ret;
return path;
}
EXPORT_SYMBOL_GPL(task_cgroup_path);
@ -2211,7 +2143,6 @@ static int cgroup_rename(struct kernfs_node *kn, struct kernfs_node *new_parent,
const char *new_name_str)
{
struct cgroup *cgrp = kn->priv;
struct cgroup_name *name, *old_name;
int ret;
if (kernfs_type(kn) != KERNFS_DIR)
@ -2226,25 +2157,13 @@ static int cgroup_rename(struct kernfs_node *kn, struct kernfs_node *new_parent,
if (cgroup_sane_behavior(cgrp))
return -EPERM;
name = cgroup_alloc_name(new_name_str);
if (!name)
return -ENOMEM;
mutex_lock(&cgroup_tree_mutex);
mutex_lock(&cgroup_mutex);
ret = kernfs_rename(kn, new_parent, new_name_str);
if (!ret) {
old_name = rcu_dereference_protected(cgrp->name, true);
rcu_assign_pointer(cgrp->name, name);
} else {
old_name = name;
}
mutex_unlock(&cgroup_mutex);
mutex_unlock(&cgroup_tree_mutex);
kfree_rcu(old_name, rcu_head);
return ret;
}
@ -3719,14 +3638,13 @@ err_free:
/**
* cgroup_create - create a cgroup
* @parent: cgroup that will be parent of the new cgroup
* @name_str: name of the new cgroup
* @name: name of the new cgroup
* @mode: mode to set on new cgroup
*/
static long cgroup_create(struct cgroup *parent, const char *name_str,
static long cgroup_create(struct cgroup *parent, const char *name,
umode_t mode)
{
struct cgroup *cgrp;
struct cgroup_name *name;
struct cgroupfs_root *root = parent->root;
int ssid, err;
struct cgroup_subsys *ss;
@ -3737,13 +3655,6 @@ static long cgroup_create(struct cgroup *parent, const char *name_str,
if (!cgrp)
return -ENOMEM;
name = cgroup_alloc_name(name_str);
if (!name) {
err = -ENOMEM;
goto err_free_cgrp;
}
rcu_assign_pointer(cgrp->name, name);
mutex_lock(&cgroup_tree_mutex);
/*
@ -3781,7 +3692,7 @@ static long cgroup_create(struct cgroup *parent, const char *name_str,
set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
/* create the directory */
kn = kernfs_create_dir(parent->kn, name->name, mode, cgrp);
kn = kernfs_create_dir(parent->kn, name, mode, cgrp);
if (IS_ERR(kn)) {
err = PTR_ERR(kn);
goto err_free_id;
@ -3839,8 +3750,6 @@ err_unlock:
mutex_unlock(&cgroup_mutex);
err_unlock_tree:
mutex_unlock(&cgroup_tree_mutex);
kfree(rcu_dereference_raw(cgrp->name));
err_free_cgrp:
kfree(cgrp);
return err;
@ -4304,12 +4213,12 @@ int proc_cgroup_show(struct seq_file *m, void *v)
{
struct pid *pid;
struct task_struct *tsk;
char *buf;
char *buf, *path;
int retval;
struct cgroupfs_root *root;
retval = -ENOMEM;
buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
buf = kmalloc(PATH_MAX, GFP_KERNEL);
if (!buf)
goto out;
@ -4337,10 +4246,12 @@ int proc_cgroup_show(struct seq_file *m, void *v)
root->name);
seq_putc(m, ':');
cgrp = task_cgroup_from_root(tsk, root);
retval = cgroup_path(cgrp, buf, PAGE_SIZE);
if (retval < 0)
path = cgroup_path(cgrp, buf, PATH_MAX);
if (!path) {
retval = -ENAMETOOLONG;
goto out_unlock;
seq_puts(m, buf);
}
seq_puts(m, path);
seq_putc(m, '\n');
}
@ -4588,16 +4499,17 @@ static void cgroup_release_agent(struct work_struct *work)
while (!list_empty(&release_list)) {
char *argv[3], *envp[3];
int i;
char *pathbuf = NULL, *agentbuf = NULL;
char *pathbuf = NULL, *agentbuf = NULL, *path;
struct cgroup *cgrp = list_entry(release_list.next,
struct cgroup,
release_list);
list_del_init(&cgrp->release_list);
raw_spin_unlock(&release_list_lock);
pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
if (!pathbuf)
goto continue_free;
if (cgroup_path(cgrp, pathbuf, PAGE_SIZE) < 0)
path = cgroup_path(cgrp, pathbuf, PATH_MAX);
if (!path)
goto continue_free;
agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
if (!agentbuf)
@ -4605,7 +4517,7 @@ static void cgroup_release_agent(struct work_struct *work)
i = 0;
argv[i++] = agentbuf;
argv[i++] = pathbuf;
argv[i++] = path;
argv[i] = NULL;
i = 0;
@ -4755,6 +4667,11 @@ static int current_css_set_cg_links_read(struct seq_file *seq, void *v)
{
struct cgrp_cset_link *link;
struct css_set *cset;
char *name_buf;
name_buf = kmalloc(NAME_MAX + 1, GFP_KERNEL);
if (!name_buf)
return -ENOMEM;
read_lock(&css_set_lock);
rcu_read_lock();
@ -4763,14 +4680,17 @@ static int current_css_set_cg_links_read(struct seq_file *seq, void *v)
struct cgroup *c = link->cgrp;
const char *name = "?";
if (c != cgroup_dummy_top)
name = cgroup_name(c);
if (c != cgroup_dummy_top) {
cgroup_name(c, name_buf, NAME_MAX + 1);
name = name_buf;
}
seq_printf(seq, "Root %d group %s\n",
c->root->hierarchy_id, name);
}
rcu_read_unlock();
read_unlock(&css_set_lock);
kfree(name_buf);
return 0;
}

Просмотреть файл

@ -2088,10 +2088,9 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
parent = parent_cs(parent);
if (cgroup_transfer_tasks(parent->css.cgroup, cs->css.cgroup)) {
rcu_read_lock();
printk(KERN_ERR "cpuset: failed to transfer tasks out of empty cpuset %s\n",
cgroup_name(cs->css.cgroup));
rcu_read_unlock();
printk(KERN_ERR "cpuset: failed to transfer tasks out of empty cpuset ");
pr_cont_cgroup_name(cs->css.cgroup);
pr_cont("\n");
}
}
@ -2619,19 +2618,17 @@ void cpuset_print_task_mems_allowed(struct task_struct *tsk)
/* Statically allocated to prevent using excess stack. */
static char cpuset_nodelist[CPUSET_NODELIST_LEN];
static DEFINE_SPINLOCK(cpuset_buffer_lock);
struct cgroup *cgrp = task_cs(tsk)->css.cgroup;
rcu_read_lock();
spin_lock(&cpuset_buffer_lock);
nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN,
tsk->mems_allowed);
printk(KERN_INFO "%s cpuset=%s mems_allowed=%s\n",
tsk->comm, cgroup_name(cgrp), cpuset_nodelist);
printk(KERN_INFO "%s cpuset=", tsk->comm);
pr_cont_cgroup_name(cgrp);
pr_cont(" mems_allowed=%s\n", cpuset_nodelist);
spin_unlock(&cpuset_buffer_lock);
rcu_read_unlock();
}
/*
@ -2681,12 +2678,12 @@ int proc_cpuset_show(struct seq_file *m, void *unused_v)
{
struct pid *pid;
struct task_struct *tsk;
char *buf;
char *buf, *p;
struct cgroup_subsys_state *css;
int retval;
retval = -ENOMEM;
buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
buf = kmalloc(PATH_MAX, GFP_KERNEL);
if (!buf)
goto out;
@ -2696,14 +2693,16 @@ int proc_cpuset_show(struct seq_file *m, void *unused_v)
if (!tsk)
goto out_free;
retval = -ENAMETOOLONG;
rcu_read_lock();
css = task_css(tsk, cpuset_cgrp_id);
retval = cgroup_path(css->cgroup, buf, PAGE_SIZE);
p = cgroup_path(css->cgroup, buf, PATH_MAX);
rcu_read_unlock();
if (retval < 0)
if (!p)
goto out_put_task;
seq_puts(m, buf);
seq_puts(m, p);
seq_putc(m, '\n');
retval = 0;
out_put_task:
put_task_struct(tsk);
out_free:

Просмотреть файл

@ -111,8 +111,7 @@ static char *task_group_path(struct task_group *tg)
if (autogroup_path(tg, group_path, PATH_MAX))
return group_path;
cgroup_path(tg->css.cgroup, group_path, PATH_MAX);
return group_path;
return cgroup_path(tg->css.cgroup, group_path, PATH_MAX);
}
#endif

Просмотреть файл

@ -1683,15 +1683,8 @@ static void move_unlock_mem_cgroup(struct mem_cgroup *memcg,
*/
void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
{
/*
* protects memcg_name and makes sure that parallel ooms do not
* interleave
*/
/* oom_info_lock ensures that parallel ooms do not interleave */
static DEFINE_SPINLOCK(oom_info_lock);
struct cgroup *task_cgrp;
struct cgroup *mem_cgrp;
static char memcg_name[PATH_MAX];
int ret;
struct mem_cgroup *iter;
unsigned int i;
@ -1701,36 +1694,14 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
spin_lock(&oom_info_lock);
rcu_read_lock();
mem_cgrp = memcg->css.cgroup;
task_cgrp = task_cgroup(p, memory_cgrp_id);
pr_info("Task in ");
pr_cont_cgroup_path(task_cgroup(p, memory_cgrp_id));
pr_info(" killed as a result of limit of ");
pr_cont_cgroup_path(memcg->css.cgroup);
pr_info("\n");
ret = cgroup_path(task_cgrp, memcg_name, PATH_MAX);
if (ret < 0) {
/*
* Unfortunately, we are unable to convert to a useful name
* But we'll still print out the usage information
*/
rcu_read_unlock();
goto done;
}
rcu_read_unlock();
pr_info("Task in %s killed", memcg_name);
rcu_read_lock();
ret = cgroup_path(mem_cgrp, memcg_name, PATH_MAX);
if (ret < 0) {
rcu_read_unlock();
goto done;
}
rcu_read_unlock();
/*
* Continues from above, so we don't need an KERN_ level
*/
pr_cont(" as a result of limit of %s\n", memcg_name);
done:
pr_info("memory: usage %llukB, limit %llukB, failcnt %llu\n",
res_counter_read_u64(&memcg->res, RES_USAGE) >> 10,
res_counter_read_u64(&memcg->res, RES_LIMIT) >> 10,
@ -1745,13 +1716,8 @@ done:
res_counter_read_u64(&memcg->kmem, RES_FAILCNT));
for_each_mem_cgroup_tree(iter, memcg) {
pr_info("Memory cgroup stats");
rcu_read_lock();
ret = cgroup_path(iter->css.cgroup, memcg_name, PATH_MAX);
if (!ret)
pr_cont(" for %s", memcg_name);
rcu_read_unlock();
pr_info("Memory cgroup stats for ");
pr_cont_cgroup_path(iter->css.cgroup);
pr_cont(":");
for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
@ -3401,7 +3367,7 @@ static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
struct kmem_cache *s)
{
struct kmem_cache *new = NULL;
static char *tmp_name = NULL;
static char *tmp_path = NULL, *tmp_name = NULL;
static DEFINE_MUTEX(mutex); /* protects tmp_name */
BUG_ON(!memcg_can_account_kmem(memcg));
@ -3413,18 +3379,20 @@ static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
* This static temporary buffer is used to prevent from
* pointless shortliving allocation.
*/
if (!tmp_name) {
tmp_name = kmalloc(PATH_MAX, GFP_KERNEL);
if (!tmp_path || !tmp_name) {
if (!tmp_path)
tmp_path = kmalloc(PATH_MAX, GFP_KERNEL);
if (!tmp_name)
tmp_name = kmalloc(NAME_MAX + 1, GFP_KERNEL);
if (!tmp_path || !tmp_name)
goto out;
}
rcu_read_lock();
snprintf(tmp_name, PATH_MAX, "%s(%d:%s)", s->name,
memcg_cache_id(memcg), cgroup_name(memcg->css.cgroup));
rcu_read_unlock();
cgroup_name(memcg->css.cgroup, tmp_name, NAME_MAX + 1);
snprintf(tmp_path, PATH_MAX, "%s(%d:%s)", s->name,
memcg_cache_id(memcg), tmp_name);
new = kmem_cache_create_memcg(memcg, tmp_name, s->object_size, s->align,
new = kmem_cache_create_memcg(memcg, tmp_path, s->object_size, s->align,
(s->flags & ~SLAB_PANIC), s->ctor, s);
if (new)
new->allocflags |= __GFP_KMEMCG;