Merge branch 'for-5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup updates from Tejun Heo: - cgroup.kill is added which implements atomic killing of the whole subtree. Down the line, this should be able to replace the multiple userland implementations of "keep killing till empty". - PSI can now be turned off at boot time to avoid overhead for configurations which don't care about PSI. * 'for-5.14' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: cgroup: make per-cgroup pressure stall tracking configurable cgroup: Fix kernel-doc cgroup: inline cgroup_task_freeze() tests/cgroup: test cgroup.kill tests/cgroup: move cg_wait_for(), cg_prepare_for_wait() tests/cgroup: use cgroup.kill in cg_killall() docs/cgroup: add entry for cgroup.kill cgroup: introduce cgroup.kill
This commit is contained in:
Коммит
3dbdb38e28
|
@ -953,6 +953,21 @@ All cgroup core files are prefixed with "cgroup."
|
|||
it's possible to delete a frozen (and empty) cgroup, as well as
|
||||
create new sub-cgroups.
|
||||
|
||||
cgroup.kill
|
||||
A write-only single value file which exists in non-root cgroups.
|
||||
The only allowed value is "1".
|
||||
|
||||
Writing "1" to the file causes the cgroup and all descendant cgroups to
|
||||
be killed. This means that all processes located in the affected cgroup
|
||||
tree will be killed via SIGKILL.
|
||||
|
||||
Killing a cgroup tree will deal with concurrent forks appropriately and
|
||||
is protected against migrations.
|
||||
|
||||
In a threaded cgroup, writing this file fails with EOPNOTSUPP as
|
||||
killing cgroups is a process directed operation, i.e. it affects
|
||||
the whole thread-group.
|
||||
|
||||
Controllers
|
||||
===========
|
||||
|
||||
|
|
|
@ -497,16 +497,21 @@
|
|||
ccw_timeout_log [S390]
|
||||
See Documentation/s390/common_io.rst for details.
|
||||
|
||||
cgroup_disable= [KNL] Disable a particular controller
|
||||
Format: {name of the controller(s) to disable}
|
||||
cgroup_disable= [KNL] Disable a particular controller or optional feature
|
||||
Format: {name of the controller(s) or feature(s) to disable}
|
||||
The effects of cgroup_disable=foo are:
|
||||
- foo isn't auto-mounted if you mount all cgroups in
|
||||
a single hierarchy
|
||||
- foo isn't visible as an individually mountable
|
||||
subsystem
|
||||
- if foo is an optional feature then the feature is
|
||||
disabled and corresponding cgroup files are not
|
||||
created
|
||||
{Currently only "memory" controller deal with this and
|
||||
cut the overhead, others just disable the usage. So
|
||||
only cgroup_disable=memory is actually worthy}
|
||||
Specifying "pressure" disables per-cgroup pressure
|
||||
stall information accounting feature
|
||||
|
||||
cgroup_no_v1= [KNL] Disable cgroup controllers and named hierarchies in v1
|
||||
Format: { { controller | "all" | "named" }
|
||||
|
|
|
@ -71,6 +71,9 @@ enum {
|
|||
|
||||
/* Cgroup is frozen. */
|
||||
CGRP_FROZEN,
|
||||
|
||||
/* Control group has to be killed. */
|
||||
CGRP_KILL,
|
||||
};
|
||||
|
||||
/* cgroup_root->flags */
|
||||
|
@ -110,6 +113,7 @@ enum {
|
|||
CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */
|
||||
CFTYPE_WORLD_WRITABLE = (1 << 4), /* (DON'T USE FOR NEW FILES) S_IWUGO */
|
||||
CFTYPE_DEBUG = (1 << 5), /* create when cgroup_debug */
|
||||
CFTYPE_PRESSURE = (1 << 6), /* only if pressure feature is enabled */
|
||||
|
||||
/* internal flags, do not use outside cgroup core proper */
|
||||
__CFTYPE_ONLY_ON_DFL = (1 << 16), /* only on default hierarchy */
|
||||
|
|
|
@ -676,6 +676,8 @@ static inline struct psi_group *cgroup_psi(struct cgroup *cgrp)
|
|||
return &cgrp->psi;
|
||||
}
|
||||
|
||||
bool cgroup_psi_enabled(void);
|
||||
|
||||
static inline void cgroup_init_kthreadd(void)
|
||||
{
|
||||
/*
|
||||
|
@ -735,6 +737,11 @@ static inline struct psi_group *cgroup_psi(struct cgroup *cgrp)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static inline bool cgroup_psi_enabled(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool task_under_cgroup_hierarchy(struct task_struct *task,
|
||||
struct cgroup *ancestor)
|
||||
{
|
||||
|
@ -906,20 +913,6 @@ void cgroup_freeze(struct cgroup *cgrp, bool freeze);
|
|||
void cgroup_freezer_migrate_task(struct task_struct *task, struct cgroup *src,
|
||||
struct cgroup *dst);
|
||||
|
||||
static inline bool cgroup_task_freeze(struct task_struct *task)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
if (task->flags & PF_KTHREAD)
|
||||
return false;
|
||||
|
||||
rcu_read_lock();
|
||||
ret = test_bit(CGRP_FREEZE, &task_dfl_cgroup(task)->flags);
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline bool cgroup_task_frozen(struct task_struct *task)
|
||||
{
|
||||
return task->frozen;
|
||||
|
@ -929,10 +922,6 @@ static inline bool cgroup_task_frozen(struct task_struct *task)
|
|||
|
||||
static inline void cgroup_enter_frozen(void) { }
|
||||
static inline void cgroup_leave_frozen(bool always_leave) { }
|
||||
static inline bool cgroup_task_freeze(struct task_struct *task)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline bool cgroup_task_frozen(struct task_struct *task)
|
||||
{
|
||||
return false;
|
||||
|
|
|
@ -209,6 +209,22 @@ struct cgroup_namespace init_cgroup_ns = {
|
|||
static struct file_system_type cgroup2_fs_type;
|
||||
static struct cftype cgroup_base_files[];
|
||||
|
||||
/* cgroup optional features */
|
||||
enum cgroup_opt_features {
|
||||
#ifdef CONFIG_PSI
|
||||
OPT_FEATURE_PRESSURE,
|
||||
#endif
|
||||
OPT_FEATURE_COUNT
|
||||
};
|
||||
|
||||
static const char *cgroup_opt_feature_names[OPT_FEATURE_COUNT] = {
|
||||
#ifdef CONFIG_PSI
|
||||
"pressure",
|
||||
#endif
|
||||
};
|
||||
|
||||
static u16 cgroup_feature_disable_mask __read_mostly;
|
||||
|
||||
static int cgroup_apply_control(struct cgroup *cgrp);
|
||||
static void cgroup_finalize_control(struct cgroup *cgrp, int ret);
|
||||
static void css_task_iter_skip(struct css_task_iter *it,
|
||||
|
@ -2390,7 +2406,7 @@ struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
|
|||
}
|
||||
|
||||
/**
|
||||
* cgroup_taskset_migrate - migrate a taskset
|
||||
* cgroup_migrate_execute - migrate a taskset
|
||||
* @mgctx: migration context
|
||||
*
|
||||
* Migrate tasks in @mgctx as setup by migration preparation functions.
|
||||
|
@ -3632,6 +3648,18 @@ static void cgroup_pressure_release(struct kernfs_open_file *of)
|
|||
{
|
||||
psi_trigger_replace(&of->priv, NULL);
|
||||
}
|
||||
|
||||
bool cgroup_psi_enabled(void)
|
||||
{
|
||||
return (cgroup_feature_disable_mask & (1 << OPT_FEATURE_PRESSURE)) == 0;
|
||||
}
|
||||
|
||||
#else /* CONFIG_PSI */
|
||||
bool cgroup_psi_enabled(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_PSI */
|
||||
|
||||
static int cgroup_freeze_show(struct seq_file *seq, void *v)
|
||||
|
@ -3668,6 +3696,80 @@ static ssize_t cgroup_freeze_write(struct kernfs_open_file *of,
|
|||
return nbytes;
|
||||
}
|
||||
|
||||
static void __cgroup_kill(struct cgroup *cgrp)
|
||||
{
|
||||
struct css_task_iter it;
|
||||
struct task_struct *task;
|
||||
|
||||
lockdep_assert_held(&cgroup_mutex);
|
||||
|
||||
spin_lock_irq(&css_set_lock);
|
||||
set_bit(CGRP_KILL, &cgrp->flags);
|
||||
spin_unlock_irq(&css_set_lock);
|
||||
|
||||
css_task_iter_start(&cgrp->self, CSS_TASK_ITER_PROCS | CSS_TASK_ITER_THREADED, &it);
|
||||
while ((task = css_task_iter_next(&it))) {
|
||||
/* Ignore kernel threads here. */
|
||||
if (task->flags & PF_KTHREAD)
|
||||
continue;
|
||||
|
||||
/* Skip tasks that are already dying. */
|
||||
if (__fatal_signal_pending(task))
|
||||
continue;
|
||||
|
||||
send_sig(SIGKILL, task, 0);
|
||||
}
|
||||
css_task_iter_end(&it);
|
||||
|
||||
spin_lock_irq(&css_set_lock);
|
||||
clear_bit(CGRP_KILL, &cgrp->flags);
|
||||
spin_unlock_irq(&css_set_lock);
|
||||
}
|
||||
|
||||
static void cgroup_kill(struct cgroup *cgrp)
|
||||
{
|
||||
struct cgroup_subsys_state *css;
|
||||
struct cgroup *dsct;
|
||||
|
||||
lockdep_assert_held(&cgroup_mutex);
|
||||
|
||||
cgroup_for_each_live_descendant_pre(dsct, css, cgrp)
|
||||
__cgroup_kill(dsct);
|
||||
}
|
||||
|
||||
static ssize_t cgroup_kill_write(struct kernfs_open_file *of, char *buf,
|
||||
size_t nbytes, loff_t off)
|
||||
{
|
||||
ssize_t ret = 0;
|
||||
int kill;
|
||||
struct cgroup *cgrp;
|
||||
|
||||
ret = kstrtoint(strstrip(buf), 0, &kill);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (kill != 1)
|
||||
return -ERANGE;
|
||||
|
||||
cgrp = cgroup_kn_lock_live(of->kn, false);
|
||||
if (!cgrp)
|
||||
return -ENOENT;
|
||||
|
||||
/*
|
||||
* Killing is a process directed operation, i.e. the whole thread-group
|
||||
* is taken down so act like we do for cgroup.procs and only make this
|
||||
* writable in non-threaded cgroups.
|
||||
*/
|
||||
if (cgroup_is_threaded(cgrp))
|
||||
ret = -EOPNOTSUPP;
|
||||
else
|
||||
cgroup_kill(cgrp);
|
||||
|
||||
cgroup_kn_unlock(of->kn);
|
||||
|
||||
return ret ?: nbytes;
|
||||
}
|
||||
|
||||
static int cgroup_file_open(struct kernfs_open_file *of)
|
||||
{
|
||||
struct cftype *cft = of_cft(of);
|
||||
|
@ -3882,6 +3984,8 @@ static int cgroup_addrm_files(struct cgroup_subsys_state *css,
|
|||
restart:
|
||||
for (cft = cfts; cft != cft_end && cft->name[0] != '\0'; cft++) {
|
||||
/* does cft->flags tell us to skip this file on @cgrp? */
|
||||
if ((cft->flags & CFTYPE_PRESSURE) && !cgroup_psi_enabled())
|
||||
continue;
|
||||
if ((cft->flags & __CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp))
|
||||
continue;
|
||||
if ((cft->flags & __CFTYPE_NOT_ON_DFL) && cgroup_on_dfl(cgrp))
|
||||
|
@ -3959,6 +4063,9 @@ static int cgroup_init_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
|
|||
|
||||
WARN_ON(cft->ss || cft->kf_ops);
|
||||
|
||||
if ((cft->flags & CFTYPE_PRESSURE) && !cgroup_psi_enabled())
|
||||
continue;
|
||||
|
||||
if (cft->seq_start)
|
||||
kf_ops = &cgroup_kf_ops;
|
||||
else
|
||||
|
@ -4860,6 +4967,11 @@ static struct cftype cgroup_base_files[] = {
|
|||
.seq_show = cgroup_freeze_show,
|
||||
.write = cgroup_freeze_write,
|
||||
},
|
||||
{
|
||||
.name = "cgroup.kill",
|
||||
.flags = CFTYPE_NOT_ON_ROOT,
|
||||
.write = cgroup_kill_write,
|
||||
},
|
||||
{
|
||||
.name = "cpu.stat",
|
||||
.seq_show = cpu_stat_show,
|
||||
|
@ -4867,6 +4979,7 @@ static struct cftype cgroup_base_files[] = {
|
|||
#ifdef CONFIG_PSI
|
||||
{
|
||||
.name = "io.pressure",
|
||||
.flags = CFTYPE_PRESSURE,
|
||||
.seq_show = cgroup_io_pressure_show,
|
||||
.write = cgroup_io_pressure_write,
|
||||
.poll = cgroup_pressure_poll,
|
||||
|
@ -4874,6 +4987,7 @@ static struct cftype cgroup_base_files[] = {
|
|||
},
|
||||
{
|
||||
.name = "memory.pressure",
|
||||
.flags = CFTYPE_PRESSURE,
|
||||
.seq_show = cgroup_memory_pressure_show,
|
||||
.write = cgroup_memory_pressure_write,
|
||||
.poll = cgroup_pressure_poll,
|
||||
|
@ -4881,6 +4995,7 @@ static struct cftype cgroup_base_files[] = {
|
|||
},
|
||||
{
|
||||
.name = "cpu.pressure",
|
||||
.flags = CFTYPE_PRESSURE,
|
||||
.seq_show = cgroup_cpu_pressure_show,
|
||||
.write = cgroup_cpu_pressure_write,
|
||||
.poll = cgroup_pressure_poll,
|
||||
|
@ -6080,6 +6195,8 @@ void cgroup_post_fork(struct task_struct *child,
|
|||
struct kernel_clone_args *kargs)
|
||||
__releases(&cgroup_threadgroup_rwsem) __releases(&cgroup_mutex)
|
||||
{
|
||||
unsigned long cgrp_flags = 0;
|
||||
bool kill = false;
|
||||
struct cgroup_subsys *ss;
|
||||
struct css_set *cset;
|
||||
int i;
|
||||
|
@ -6091,6 +6208,11 @@ void cgroup_post_fork(struct task_struct *child,
|
|||
|
||||
/* init tasks are special, only link regular threads */
|
||||
if (likely(child->pid)) {
|
||||
if (kargs->cgrp)
|
||||
cgrp_flags = kargs->cgrp->flags;
|
||||
else
|
||||
cgrp_flags = cset->dfl_cgrp->flags;
|
||||
|
||||
WARN_ON_ONCE(!list_empty(&child->cg_list));
|
||||
cset->nr_tasks++;
|
||||
css_set_move_task(child, NULL, cset, false);
|
||||
|
@ -6099,23 +6221,32 @@ void cgroup_post_fork(struct task_struct *child,
|
|||
cset = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the cgroup has to be frozen, the new task has too. Let's set
|
||||
* the JOBCTL_TRAP_FREEZE jobctl bit to get the task into the
|
||||
* frozen state.
|
||||
*/
|
||||
if (unlikely(cgroup_task_freeze(child))) {
|
||||
spin_lock(&child->sighand->siglock);
|
||||
WARN_ON_ONCE(child->frozen);
|
||||
child->jobctl |= JOBCTL_TRAP_FREEZE;
|
||||
spin_unlock(&child->sighand->siglock);
|
||||
if (!(child->flags & PF_KTHREAD)) {
|
||||
if (unlikely(test_bit(CGRP_FREEZE, &cgrp_flags))) {
|
||||
/*
|
||||
* If the cgroup has to be frozen, the new task has
|
||||
* too. Let's set the JOBCTL_TRAP_FREEZE jobctl bit to
|
||||
* get the task into the frozen state.
|
||||
*/
|
||||
spin_lock(&child->sighand->siglock);
|
||||
WARN_ON_ONCE(child->frozen);
|
||||
child->jobctl |= JOBCTL_TRAP_FREEZE;
|
||||
spin_unlock(&child->sighand->siglock);
|
||||
|
||||
/*
|
||||
* Calling cgroup_update_frozen() isn't required here,
|
||||
* because it will be called anyway a bit later from
|
||||
* do_freezer_trap(). So we avoid cgroup's transient
|
||||
* switch from the frozen state and back.
|
||||
*/
|
||||
}
|
||||
|
||||
/*
|
||||
* Calling cgroup_update_frozen() isn't required here,
|
||||
* because it will be called anyway a bit later from
|
||||
* do_freezer_trap(). So we avoid cgroup's transient switch
|
||||
* from the frozen state and back.
|
||||
* If the cgroup is to be killed notice it now and take the
|
||||
* child down right after we finished preparing it for
|
||||
* userspace.
|
||||
*/
|
||||
kill = test_bit(CGRP_KILL, &cgrp_flags);
|
||||
}
|
||||
|
||||
spin_unlock_irq(&css_set_lock);
|
||||
|
@ -6138,6 +6269,10 @@ void cgroup_post_fork(struct task_struct *child,
|
|||
put_css_set(rcset);
|
||||
}
|
||||
|
||||
/* Cgroup has to be killed so take down child immediately. */
|
||||
if (unlikely(kill))
|
||||
do_send_sig_info(SIGKILL, SEND_SIG_NOINFO, child, PIDTYPE_TGID);
|
||||
|
||||
cgroup_css_set_put_fork(kargs);
|
||||
}
|
||||
|
||||
|
@ -6163,7 +6298,8 @@ void cgroup_exit(struct task_struct *tsk)
|
|||
cset->nr_tasks--;
|
||||
|
||||
WARN_ON_ONCE(cgroup_task_frozen(tsk));
|
||||
if (unlikely(cgroup_task_freeze(tsk)))
|
||||
if (unlikely(!(tsk->flags & PF_KTHREAD) &&
|
||||
test_bit(CGRP_FREEZE, &task_dfl_cgroup(tsk)->flags)))
|
||||
cgroup_update_frozen(task_dfl_cgroup(tsk));
|
||||
|
||||
spin_unlock_irq(&css_set_lock);
|
||||
|
@ -6214,6 +6350,15 @@ static int __init cgroup_disable(char *str)
|
|||
pr_info("Disabling %s control group subsystem\n",
|
||||
ss->name);
|
||||
}
|
||||
|
||||
for (i = 0; i < OPT_FEATURE_COUNT; i++) {
|
||||
if (strcmp(token, cgroup_opt_feature_names[i]))
|
||||
continue;
|
||||
cgroup_feature_disable_mask |= 1 << i;
|
||||
pr_info("Disabling %s control group feature\n",
|
||||
cgroup_opt_feature_names[i]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
@ -6512,6 +6657,9 @@ static ssize_t show_delegatable_files(struct cftype *files, char *buf,
|
|||
if (!(cft->flags & CFTYPE_NS_DELEGATABLE))
|
||||
continue;
|
||||
|
||||
if ((cft->flags & CFTYPE_PRESSURE) && !cgroup_psi_enabled())
|
||||
continue;
|
||||
|
||||
if (prefix)
|
||||
ret += snprintf(buf + ret, size - ret, "%s.", prefix);
|
||||
|
||||
|
|
|
@ -220,7 +220,7 @@ void cgroup_rstat_flush_irqsafe(struct cgroup *cgrp)
|
|||
}
|
||||
|
||||
/**
|
||||
* cgroup_rstat_flush_begin - flush stats in @cgrp's subtree and hold
|
||||
* cgroup_rstat_flush_hold - flush stats in @cgrp's subtree and hold
|
||||
* @cgrp: target cgroup
|
||||
*
|
||||
* Flush stats in @cgrp's subtree and prevent further flushes. Must be
|
||||
|
|
|
@ -148,6 +148,7 @@
|
|||
static int psi_bug __read_mostly;
|
||||
|
||||
DEFINE_STATIC_KEY_FALSE(psi_disabled);
|
||||
DEFINE_STATIC_KEY_TRUE(psi_cgroups_enabled);
|
||||
|
||||
#ifdef CONFIG_PSI_DEFAULT_DISABLED
|
||||
static bool psi_enable;
|
||||
|
@ -215,6 +216,9 @@ void __init psi_init(void)
|
|||
return;
|
||||
}
|
||||
|
||||
if (!cgroup_psi_enabled())
|
||||
static_branch_disable(&psi_cgroups_enabled);
|
||||
|
||||
psi_period = jiffies_to_nsecs(PSI_FREQ);
|
||||
group_init(&psi_system);
|
||||
}
|
||||
|
@ -748,23 +752,23 @@ static void psi_group_change(struct psi_group *group, int cpu,
|
|||
|
||||
static struct psi_group *iterate_groups(struct task_struct *task, void **iter)
|
||||
{
|
||||
if (*iter == &psi_system)
|
||||
return NULL;
|
||||
|
||||
#ifdef CONFIG_CGROUPS
|
||||
struct cgroup *cgroup = NULL;
|
||||
if (static_branch_likely(&psi_cgroups_enabled)) {
|
||||
struct cgroup *cgroup = NULL;
|
||||
|
||||
if (!*iter)
|
||||
cgroup = task->cgroups->dfl_cgrp;
|
||||
else if (*iter == &psi_system)
|
||||
return NULL;
|
||||
else
|
||||
cgroup = cgroup_parent(*iter);
|
||||
if (!*iter)
|
||||
cgroup = task->cgroups->dfl_cgrp;
|
||||
else
|
||||
cgroup = cgroup_parent(*iter);
|
||||
|
||||
if (cgroup && cgroup_parent(cgroup)) {
|
||||
*iter = cgroup;
|
||||
return cgroup_psi(cgroup);
|
||||
if (cgroup && cgroup_parent(cgroup)) {
|
||||
*iter = cgroup;
|
||||
return cgroup_psi(cgroup);
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (*iter)
|
||||
return NULL;
|
||||
#endif
|
||||
*iter = &psi_system;
|
||||
return &psi_system;
|
||||
|
|
|
@ -2,4 +2,5 @@
|
|||
test_memcontrol
|
||||
test_core
|
||||
test_freezer
|
||||
test_kmem
|
||||
test_kmem
|
||||
test_kill
|
||||
|
|
|
@ -9,6 +9,7 @@ TEST_GEN_PROGS = test_memcontrol
|
|||
TEST_GEN_PROGS += test_kmem
|
||||
TEST_GEN_PROGS += test_core
|
||||
TEST_GEN_PROGS += test_freezer
|
||||
TEST_GEN_PROGS += test_kill
|
||||
|
||||
include ../lib.mk
|
||||
|
||||
|
@ -16,3 +17,4 @@ $(OUTPUT)/test_memcontrol: cgroup_util.c ../clone3/clone3_selftests.h
|
|||
$(OUTPUT)/test_kmem: cgroup_util.c ../clone3/clone3_selftests.h
|
||||
$(OUTPUT)/test_core: cgroup_util.c ../clone3/clone3_selftests.h
|
||||
$(OUTPUT)/test_freezer: cgroup_util.c ../clone3/clone3_selftests.h
|
||||
$(OUTPUT)/test_kill: cgroup_util.c ../clone3/clone3_selftests.h ../pidfd/pidfd.h
|
||||
|
|
|
@ -5,10 +5,12 @@
|
|||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <linux/limits.h>
|
||||
#include <poll.h>
|
||||
#include <signal.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/inotify.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/wait.h>
|
||||
|
@ -252,6 +254,10 @@ int cg_killall(const char *cgroup)
|
|||
char buf[PAGE_SIZE];
|
||||
char *ptr = buf;
|
||||
|
||||
/* If cgroup.kill exists use it. */
|
||||
if (!cg_write(cgroup, "cgroup.kill", "1"))
|
||||
return 0;
|
||||
|
||||
if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
|
||||
return -1;
|
||||
|
||||
|
@ -576,3 +582,48 @@ int clone_into_cgroup_run_wait(const char *cgroup)
|
|||
(void)clone_reap(pid, WEXITED);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cg_prepare_for_wait(const char *cgroup)
|
||||
{
|
||||
int fd, ret = -1;
|
||||
|
||||
fd = inotify_init1(0);
|
||||
if (fd == -1)
|
||||
return fd;
|
||||
|
||||
ret = inotify_add_watch(fd, cg_control(cgroup, "cgroup.events"),
|
||||
IN_MODIFY);
|
||||
if (ret == -1) {
|
||||
close(fd);
|
||||
fd = -1;
|
||||
}
|
||||
|
||||
return fd;
|
||||
}
|
||||
|
||||
int cg_wait_for(int fd)
|
||||
{
|
||||
int ret = -1;
|
||||
struct pollfd fds = {
|
||||
.fd = fd,
|
||||
.events = POLLIN,
|
||||
};
|
||||
|
||||
while (true) {
|
||||
ret = poll(&fds, 1, 10000);
|
||||
|
||||
if (ret == -1) {
|
||||
if (errno == EINTR)
|
||||
continue;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
if (ret > 0 && fds.revents & POLLIN) {
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -54,3 +54,5 @@ extern pid_t clone_into_cgroup(int cgroup_fd);
|
|||
extern int clone_reap(pid_t pid, int options);
|
||||
extern int clone_into_cgroup_run_wait(const char *cgroup);
|
||||
extern int dirfd_open_opath(const char *dir);
|
||||
extern int cg_prepare_for_wait(const char *cgroup);
|
||||
extern int cg_wait_for(int fd);
|
||||
|
|
|
@ -7,9 +7,7 @@
|
|||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
#include <poll.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/inotify.h>
|
||||
#include <string.h>
|
||||
#include <sys/wait.h>
|
||||
|
||||
|
@ -54,61 +52,6 @@ static int cg_freeze_nowait(const char *cgroup, bool freeze)
|
|||
return cg_write(cgroup, "cgroup.freeze", freeze ? "1" : "0");
|
||||
}
|
||||
|
||||
/*
|
||||
* Prepare for waiting on cgroup.events file.
|
||||
*/
|
||||
static int cg_prepare_for_wait(const char *cgroup)
|
||||
{
|
||||
int fd, ret = -1;
|
||||
|
||||
fd = inotify_init1(0);
|
||||
if (fd == -1) {
|
||||
debug("Error: inotify_init1() failed\n");
|
||||
return fd;
|
||||
}
|
||||
|
||||
ret = inotify_add_watch(fd, cg_control(cgroup, "cgroup.events"),
|
||||
IN_MODIFY);
|
||||
if (ret == -1) {
|
||||
debug("Error: inotify_add_watch() failed\n");
|
||||
close(fd);
|
||||
fd = -1;
|
||||
}
|
||||
|
||||
return fd;
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait for an event. If there are no events for 10 seconds,
|
||||
* treat this an error.
|
||||
*/
|
||||
static int cg_wait_for(int fd)
|
||||
{
|
||||
int ret = -1;
|
||||
struct pollfd fds = {
|
||||
.fd = fd,
|
||||
.events = POLLIN,
|
||||
};
|
||||
|
||||
while (true) {
|
||||
ret = poll(&fds, 1, 10000);
|
||||
|
||||
if (ret == -1) {
|
||||
if (errno == EINTR)
|
||||
continue;
|
||||
debug("Error: poll() failed\n");
|
||||
break;
|
||||
}
|
||||
|
||||
if (ret > 0 && fds.revents & POLLIN) {
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Attach a task to the given cgroup and wait for a cgroup frozen event.
|
||||
* All transient events (e.g. populated) are ignored.
|
||||
|
|
|
@ -0,0 +1,297 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#include <errno.h>
|
||||
#include <linux/limits.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "../kselftest.h"
|
||||
#include "../pidfd/pidfd.h"
|
||||
#include "cgroup_util.h"
|
||||
|
||||
/*
|
||||
* Kill the given cgroup and wait for the inotify signal.
|
||||
* If there are no events in 10 seconds, treat this as an error.
|
||||
* Then check that the cgroup is in the desired state.
|
||||
*/
|
||||
static int cg_kill_wait(const char *cgroup)
|
||||
{
|
||||
int fd, ret = -1;
|
||||
|
||||
fd = cg_prepare_for_wait(cgroup);
|
||||
if (fd < 0)
|
||||
return fd;
|
||||
|
||||
ret = cg_write(cgroup, "cgroup.kill", "1");
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = cg_wait_for(fd);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
out:
|
||||
close(fd);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* A simple process running in a sleep loop until being
|
||||
* re-parented.
|
||||
*/
|
||||
static int child_fn(const char *cgroup, void *arg)
|
||||
{
|
||||
int ppid = getppid();
|
||||
|
||||
while (getppid() == ppid)
|
||||
usleep(1000);
|
||||
|
||||
return getppid() == ppid;
|
||||
}
|
||||
|
||||
static int test_cgkill_simple(const char *root)
|
||||
{
|
||||
pid_t pids[100];
|
||||
int ret = KSFT_FAIL;
|
||||
char *cgroup = NULL;
|
||||
int i;
|
||||
|
||||
cgroup = cg_name(root, "cg_test_simple");
|
||||
if (!cgroup)
|
||||
goto cleanup;
|
||||
|
||||
if (cg_create(cgroup))
|
||||
goto cleanup;
|
||||
|
||||
for (i = 0; i < 100; i++)
|
||||
pids[i] = cg_run_nowait(cgroup, child_fn, NULL);
|
||||
|
||||
if (cg_wait_for_proc_count(cgroup, 100))
|
||||
goto cleanup;
|
||||
|
||||
if (cg_read_strcmp(cgroup, "cgroup.events", "populated 1\n"))
|
||||
goto cleanup;
|
||||
|
||||
if (cg_kill_wait(cgroup))
|
||||
goto cleanup;
|
||||
|
||||
ret = KSFT_PASS;
|
||||
|
||||
cleanup:
|
||||
for (i = 0; i < 100; i++)
|
||||
wait_for_pid(pids[i]);
|
||||
|
||||
if (ret == KSFT_PASS &&
|
||||
cg_read_strcmp(cgroup, "cgroup.events", "populated 0\n"))
|
||||
ret = KSFT_FAIL;
|
||||
|
||||
if (cgroup)
|
||||
cg_destroy(cgroup);
|
||||
free(cgroup);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* The test creates the following hierarchy:
|
||||
* A
|
||||
* / / \ \
|
||||
* B E I K
|
||||
* /\ |
|
||||
* C D F
|
||||
* |
|
||||
* G
|
||||
* |
|
||||
* H
|
||||
*
|
||||
* with a process in C, H and 3 processes in K.
|
||||
* Then it tries to kill the whole tree.
|
||||
*/
|
||||
static int test_cgkill_tree(const char *root)
|
||||
{
|
||||
pid_t pids[5];
|
||||
char *cgroup[10] = {0};
|
||||
int ret = KSFT_FAIL;
|
||||
int i;
|
||||
|
||||
cgroup[0] = cg_name(root, "cg_test_tree_A");
|
||||
if (!cgroup[0])
|
||||
goto cleanup;
|
||||
|
||||
cgroup[1] = cg_name(cgroup[0], "B");
|
||||
if (!cgroup[1])
|
||||
goto cleanup;
|
||||
|
||||
cgroup[2] = cg_name(cgroup[1], "C");
|
||||
if (!cgroup[2])
|
||||
goto cleanup;
|
||||
|
||||
cgroup[3] = cg_name(cgroup[1], "D");
|
||||
if (!cgroup[3])
|
||||
goto cleanup;
|
||||
|
||||
cgroup[4] = cg_name(cgroup[0], "E");
|
||||
if (!cgroup[4])
|
||||
goto cleanup;
|
||||
|
||||
cgroup[5] = cg_name(cgroup[4], "F");
|
||||
if (!cgroup[5])
|
||||
goto cleanup;
|
||||
|
||||
cgroup[6] = cg_name(cgroup[5], "G");
|
||||
if (!cgroup[6])
|
||||
goto cleanup;
|
||||
|
||||
cgroup[7] = cg_name(cgroup[6], "H");
|
||||
if (!cgroup[7])
|
||||
goto cleanup;
|
||||
|
||||
cgroup[8] = cg_name(cgroup[0], "I");
|
||||
if (!cgroup[8])
|
||||
goto cleanup;
|
||||
|
||||
cgroup[9] = cg_name(cgroup[0], "K");
|
||||
if (!cgroup[9])
|
||||
goto cleanup;
|
||||
|
||||
for (i = 0; i < 10; i++)
|
||||
if (cg_create(cgroup[i]))
|
||||
goto cleanup;
|
||||
|
||||
pids[0] = cg_run_nowait(cgroup[2], child_fn, NULL);
|
||||
pids[1] = cg_run_nowait(cgroup[7], child_fn, NULL);
|
||||
pids[2] = cg_run_nowait(cgroup[9], child_fn, NULL);
|
||||
pids[3] = cg_run_nowait(cgroup[9], child_fn, NULL);
|
||||
pids[4] = cg_run_nowait(cgroup[9], child_fn, NULL);
|
||||
|
||||
/*
|
||||
* Wait until all child processes will enter
|
||||
* corresponding cgroups.
|
||||
*/
|
||||
|
||||
if (cg_wait_for_proc_count(cgroup[2], 1) ||
|
||||
cg_wait_for_proc_count(cgroup[7], 1) ||
|
||||
cg_wait_for_proc_count(cgroup[9], 3))
|
||||
goto cleanup;
|
||||
|
||||
/*
|
||||
* Kill A and check that we get an empty notification.
|
||||
*/
|
||||
if (cg_kill_wait(cgroup[0]))
|
||||
goto cleanup;
|
||||
|
||||
ret = KSFT_PASS;
|
||||
|
||||
cleanup:
|
||||
for (i = 0; i < 5; i++)
|
||||
wait_for_pid(pids[i]);
|
||||
|
||||
if (ret == KSFT_PASS &&
|
||||
cg_read_strcmp(cgroup[0], "cgroup.events", "populated 0\n"))
|
||||
ret = KSFT_FAIL;
|
||||
|
||||
for (i = 9; i >= 0 && cgroup[i]; i--) {
|
||||
cg_destroy(cgroup[i]);
|
||||
free(cgroup[i]);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int forkbomb_fn(const char *cgroup, void *arg)
|
||||
{
|
||||
int ppid;
|
||||
|
||||
fork();
|
||||
fork();
|
||||
|
||||
ppid = getppid();
|
||||
|
||||
while (getppid() == ppid)
|
||||
usleep(1000);
|
||||
|
||||
return getppid() == ppid;
|
||||
}
|
||||
|
||||
/*
|
||||
* The test runs a fork bomb in a cgroup and tries to kill it.
|
||||
*/
|
||||
static int test_cgkill_forkbomb(const char *root)
|
||||
{
|
||||
int ret = KSFT_FAIL;
|
||||
char *cgroup = NULL;
|
||||
pid_t pid = -ESRCH;
|
||||
|
||||
cgroup = cg_name(root, "cg_forkbomb_test");
|
||||
if (!cgroup)
|
||||
goto cleanup;
|
||||
|
||||
if (cg_create(cgroup))
|
||||
goto cleanup;
|
||||
|
||||
pid = cg_run_nowait(cgroup, forkbomb_fn, NULL);
|
||||
if (pid < 0)
|
||||
goto cleanup;
|
||||
|
||||
usleep(100000);
|
||||
|
||||
if (cg_kill_wait(cgroup))
|
||||
goto cleanup;
|
||||
|
||||
if (cg_wait_for_proc_count(cgroup, 0))
|
||||
goto cleanup;
|
||||
|
||||
ret = KSFT_PASS;
|
||||
|
||||
cleanup:
|
||||
if (pid > 0)
|
||||
wait_for_pid(pid);
|
||||
|
||||
if (ret == KSFT_PASS &&
|
||||
cg_read_strcmp(cgroup, "cgroup.events", "populated 0\n"))
|
||||
ret = KSFT_FAIL;
|
||||
|
||||
if (cgroup)
|
||||
cg_destroy(cgroup);
|
||||
free(cgroup);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define T(x) { x, #x }
|
||||
struct cgkill_test {
|
||||
int (*fn)(const char *root);
|
||||
const char *name;
|
||||
} tests[] = {
|
||||
T(test_cgkill_simple),
|
||||
T(test_cgkill_tree),
|
||||
T(test_cgkill_forkbomb),
|
||||
};
|
||||
#undef T
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
char root[PATH_MAX];
|
||||
int i, ret = EXIT_SUCCESS;
|
||||
|
||||
if (cg_find_unified_root(root, sizeof(root)))
|
||||
ksft_exit_skip("cgroup v2 isn't mounted\n");
|
||||
for (i = 0; i < ARRAY_SIZE(tests); i++) {
|
||||
switch (tests[i].fn(root)) {
|
||||
case KSFT_PASS:
|
||||
ksft_test_result_pass("%s\n", tests[i].name);
|
||||
break;
|
||||
case KSFT_SKIP:
|
||||
ksft_test_result_skip("%s\n", tests[i].name);
|
||||
break;
|
||||
default:
|
||||
ret = EXIT_FAILURE;
|
||||
ksft_test_result_fail("%s\n", tests[i].name);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
Загрузка…
Ссылка в новой задаче