Merge feature/memory-reclaim/5.15 into v5.15
* commit 'fed46d1f99d22a5a9efd06da0bf5baf6a04045d8': selftests: cgroup: add a selftest for memory.reclaim selftests: cgroup: fix unsigned comparison with less than zero selftests: cgroup: fix alloc_anon_noexit() instantly freeing memory selftests: cgroup: return -errno from cg_read()/cg_write() on failure memcg: introduce per-memcg reclaim interface
This commit is contained in:
Коммит
e704bade90
|
@ -1200,6 +1200,27 @@ PAGE_SIZE multiple when read back.
|
|||
high limit is used and monitored properly, this limit's
|
||||
utility is limited to providing the final safety net.
|
||||
|
||||
memory.reclaim
|
||||
A write-only nested-keyed file which exists for all cgroups.
|
||||
|
||||
This is a simple interface to trigger memory reclaim in the
|
||||
target cgroup.
|
||||
|
||||
This file accepts a single key, the number of bytes to reclaim.
|
||||
No nested keys are currently supported.
|
||||
|
||||
Example::
|
||||
|
||||
echo "1G" > memory.reclaim
|
||||
|
||||
The interface can be later extended with nested keys to
|
||||
configure the reclaim behavior. For example, specify the
|
||||
type of memory to reclaim from (anon, file, ..).
|
||||
|
||||
Please note that the kernel can over or under reclaim from
|
||||
the target cgroup. If less bytes are reclaimed than the
|
||||
specified amount, -EAGAIN is returned.
|
||||
|
||||
memory.oom.group
|
||||
A read-write single value file which exists on non-root
|
||||
cgroups. The default value is "0".
|
||||
|
|
|
@ -6483,6 +6483,46 @@ static ssize_t memory_oom_group_write(struct kernfs_open_file *of,
|
|||
return nbytes;
|
||||
}
|
||||
|
||||
static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
|
||||
size_t nbytes, loff_t off)
|
||||
{
|
||||
struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
|
||||
unsigned int nr_retries = MAX_RECLAIM_RETRIES;
|
||||
unsigned long nr_to_reclaim, nr_reclaimed = 0;
|
||||
int err;
|
||||
|
||||
buf = strstrip(buf);
|
||||
err = page_counter_memparse(buf, "", &nr_to_reclaim);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
while (nr_reclaimed < nr_to_reclaim) {
|
||||
unsigned long reclaimed;
|
||||
|
||||
if (signal_pending(current))
|
||||
return -EINTR;
|
||||
|
||||
/*
|
||||
* This is the final attempt, drain percpu lru caches in the
|
||||
* hope of introducing more evictable pages for
|
||||
* try_to_free_mem_cgroup_pages().
|
||||
*/
|
||||
if (!nr_retries)
|
||||
lru_add_drain_all();
|
||||
|
||||
reclaimed = try_to_free_mem_cgroup_pages(memcg,
|
||||
nr_to_reclaim - nr_reclaimed,
|
||||
GFP_KERNEL, true);
|
||||
|
||||
if (!reclaimed && !nr_retries--)
|
||||
return -EAGAIN;
|
||||
|
||||
nr_reclaimed += reclaimed;
|
||||
}
|
||||
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
static struct cftype memory_files[] = {
|
||||
{
|
||||
.name = "current",
|
||||
|
@ -6541,6 +6581,11 @@ static struct cftype memory_files[] = {
|
|||
.seq_show = memory_oom_group_show,
|
||||
.write = memory_oom_group_write,
|
||||
},
|
||||
{
|
||||
.name = "reclaim",
|
||||
.flags = CFTYPE_NS_DELEGATABLE,
|
||||
.write = memory_reclaim,
|
||||
},
|
||||
{ } /* terminate */
|
||||
};
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include "cgroup_util.h"
|
||||
#include "../clone3/clone3_selftests.h"
|
||||
|
||||
/* Returns read len on success, or -errno on failure. */
|
||||
static ssize_t read_text(const char *path, char *buf, size_t max_len)
|
||||
{
|
||||
ssize_t len;
|
||||
|
@ -26,35 +27,29 @@ static ssize_t read_text(const char *path, char *buf, size_t max_len)
|
|||
|
||||
fd = open(path, O_RDONLY);
|
||||
if (fd < 0)
|
||||
return fd;
|
||||
return -errno;
|
||||
|
||||
len = read(fd, buf, max_len - 1);
|
||||
if (len < 0)
|
||||
goto out;
|
||||
|
||||
buf[len] = 0;
|
||||
out:
|
||||
if (len >= 0)
|
||||
buf[len] = 0;
|
||||
|
||||
close(fd);
|
||||
return len;
|
||||
return len < 0 ? -errno : len;
|
||||
}
|
||||
|
||||
/* Returns written len on success, or -errno on failure. */
|
||||
static ssize_t write_text(const char *path, char *buf, ssize_t len)
|
||||
{
|
||||
int fd;
|
||||
|
||||
fd = open(path, O_WRONLY | O_APPEND);
|
||||
if (fd < 0)
|
||||
return fd;
|
||||
return -errno;
|
||||
|
||||
len = write(fd, buf, len);
|
||||
if (len < 0) {
|
||||
close(fd);
|
||||
return len;
|
||||
}
|
||||
|
||||
close(fd);
|
||||
|
||||
return len;
|
||||
return len < 0 ? -errno : len;
|
||||
}
|
||||
|
||||
char *cg_name(const char *root, const char *name)
|
||||
|
@ -87,16 +82,16 @@ char *cg_control(const char *cgroup, const char *control)
|
|||
return ret;
|
||||
}
|
||||
|
||||
/* Returns 0 on success, or -errno on failure. */
|
||||
int cg_read(const char *cgroup, const char *control, char *buf, size_t len)
|
||||
{
|
||||
char path[PATH_MAX];
|
||||
ssize_t ret;
|
||||
|
||||
snprintf(path, sizeof(path), "%s/%s", cgroup, control);
|
||||
|
||||
if (read_text(path, buf, len) >= 0)
|
||||
return 0;
|
||||
|
||||
return -1;
|
||||
ret = read_text(path, buf, len);
|
||||
return ret >= 0 ? 0 : ret;
|
||||
}
|
||||
|
||||
int cg_read_strcmp(const char *cgroup, const char *control,
|
||||
|
@ -177,17 +172,15 @@ long cg_read_lc(const char *cgroup, const char *control)
|
|||
return cnt;
|
||||
}
|
||||
|
||||
/* Returns 0 on success, or -errno on failure. */
|
||||
int cg_write(const char *cgroup, const char *control, char *buf)
|
||||
{
|
||||
char path[PATH_MAX];
|
||||
ssize_t len = strlen(buf);
|
||||
ssize_t len = strlen(buf), ret;
|
||||
|
||||
snprintf(path, sizeof(path), "%s/%s", cgroup, control);
|
||||
|
||||
if (write_text(path, buf, len) == len)
|
||||
return 0;
|
||||
|
||||
return -1;
|
||||
ret = write_text(path, buf, len);
|
||||
return ret == len ? 0 : ret;
|
||||
}
|
||||
|
||||
int cg_find_unified_root(char *root, size_t len)
|
||||
|
@ -538,6 +531,7 @@ int set_oom_adj_score(int pid, int score)
|
|||
ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size)
|
||||
{
|
||||
char path[PATH_MAX];
|
||||
ssize_t ret;
|
||||
|
||||
if (!pid)
|
||||
snprintf(path, sizeof(path), "/proc/%s/%s",
|
||||
|
@ -545,7 +539,8 @@ ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t
|
|||
else
|
||||
snprintf(path, sizeof(path), "/proc/%d/%s", pid, item);
|
||||
|
||||
return read_text(path, buf, size);
|
||||
ret = read_text(path, buf, size);
|
||||
return ret < 0 ? -1 : ret;
|
||||
}
|
||||
|
||||
int proc_read_strstr(int pid, bool thread, const char *item, const char *needle)
|
||||
|
|
|
@ -210,13 +210,17 @@ static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg)
|
|||
static int alloc_anon_noexit(const char *cgroup, void *arg)
|
||||
{
|
||||
int ppid = getppid();
|
||||
size_t size = (unsigned long)arg;
|
||||
char *buf, *ptr;
|
||||
|
||||
if (alloc_anon(cgroup, arg))
|
||||
return -1;
|
||||
buf = malloc(size);
|
||||
for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
|
||||
*ptr = 0;
|
||||
|
||||
while (getppid() == ppid)
|
||||
sleep(1);
|
||||
|
||||
free(buf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -679,6 +683,111 @@ cleanup:
|
|||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* This test checks that memory.reclaim reclaims the given
|
||||
* amount of memory (from both anon and file, if possible).
|
||||
*/
|
||||
static int test_memcg_reclaim(const char *root)
|
||||
{
|
||||
int ret = KSFT_FAIL, fd, retries;
|
||||
char *memcg;
|
||||
long current, expected_usage, to_reclaim;
|
||||
char buf[64];
|
||||
|
||||
memcg = cg_name(root, "memcg_test");
|
||||
if (!memcg)
|
||||
goto cleanup;
|
||||
|
||||
if (cg_create(memcg))
|
||||
goto cleanup;
|
||||
|
||||
current = cg_read_long(memcg, "memory.current");
|
||||
if (current != 0)
|
||||
goto cleanup;
|
||||
|
||||
fd = get_temp_fd();
|
||||
if (fd < 0)
|
||||
goto cleanup;
|
||||
|
||||
cg_run_nowait(memcg, alloc_pagecache_50M_noexit, (void *)(long)fd);
|
||||
|
||||
/*
|
||||
* If swap is enabled, try to reclaim from both anon and file, else try
|
||||
* to reclaim from file only.
|
||||
*/
|
||||
if (is_swap_enabled()) {
|
||||
cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(50));
|
||||
expected_usage = MB(100);
|
||||
} else
|
||||
expected_usage = MB(50);
|
||||
|
||||
/*
|
||||
* Wait until current usage reaches the expected usage (or we run out of
|
||||
* retries).
|
||||
*/
|
||||
retries = 5;
|
||||
while (!values_close(cg_read_long(memcg, "memory.current"),
|
||||
expected_usage, 10)) {
|
||||
if (retries--) {
|
||||
sleep(1);
|
||||
continue;
|
||||
} else {
|
||||
fprintf(stderr,
|
||||
"failed to allocate %ld for memcg reclaim test\n",
|
||||
expected_usage);
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Reclaim until current reaches 30M, this makes sure we hit both anon
|
||||
* and file if swap is enabled.
|
||||
*/
|
||||
retries = 5;
|
||||
while (true) {
|
||||
int err;
|
||||
|
||||
current = cg_read_long(memcg, "memory.current");
|
||||
to_reclaim = current - MB(30);
|
||||
|
||||
/*
|
||||
* We only keep looping if we get EAGAIN, which means we could
|
||||
* not reclaim the full amount.
|
||||
*/
|
||||
if (to_reclaim <= 0)
|
||||
goto cleanup;
|
||||
|
||||
|
||||
snprintf(buf, sizeof(buf), "%ld", to_reclaim);
|
||||
err = cg_write(memcg, "memory.reclaim", buf);
|
||||
if (!err) {
|
||||
/*
|
||||
* If writing succeeds, then the written amount should have been
|
||||
* fully reclaimed (and maybe more).
|
||||
*/
|
||||
current = cg_read_long(memcg, "memory.current");
|
||||
if (!values_close(current, MB(30), 3) && current > MB(30))
|
||||
goto cleanup;
|
||||
break;
|
||||
}
|
||||
|
||||
/* The kernel could not reclaim the full amount, try again. */
|
||||
if (err == -EAGAIN && retries--)
|
||||
continue;
|
||||
|
||||
/* We got an unexpected error or ran out of retries. */
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
ret = KSFT_PASS;
|
||||
cleanup:
|
||||
cg_destroy(memcg);
|
||||
free(memcg);
|
||||
close(fd);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int alloc_anon_50M_check_swap(const char *cgroup, void *arg)
|
||||
{
|
||||
long mem_max = (long)arg;
|
||||
|
@ -1181,6 +1290,7 @@ struct memcg_test {
|
|||
T(test_memcg_low),
|
||||
T(test_memcg_high),
|
||||
T(test_memcg_max),
|
||||
T(test_memcg_reclaim),
|
||||
T(test_memcg_oom_events),
|
||||
T(test_memcg_swap_max),
|
||||
T(test_memcg_sock),
|
||||
|
|
Загрузка…
Ссылка в новой задаче