Merge feature/memory-reclaim/5.15 into v5.15

* commit 'fed46d1f99d22a5a9efd06da0bf5baf6a04045d8':
  selftests: cgroup: add a selftest for memory.reclaim
  selftests: cgroup: fix unsigned comparison with less than zero
  selftests: cgroup: fix alloc_anon_noexit() instantly freeing memory
  selftests: cgroup: return -errno from cg_read()/cg_write() on failure
  memcg: introduce per-memcg reclaim interface
This commit is contained in:
Mitchell Levy 2024-10-10 15:55:57 -07:00
Родитель 36b75628d4 fed46d1f99
Коммит e704bade90
4 изменённых файлов: 198 добавлений и 27 удалений

Просмотреть файл

@ -1200,6 +1200,27 @@ PAGE_SIZE multiple when read back.
high limit is used and monitored properly, this limit's
utility is limited to providing the final safety net.
memory.reclaim
A write-only nested-keyed file which exists for all cgroups.
This is a simple interface to trigger memory reclaim in the
target cgroup.
This file accepts a single key, the number of bytes to reclaim.
No nested keys are currently supported.
Example::
echo "1G" > memory.reclaim
The interface can be later extended with nested keys to
configure the reclaim behavior. For example, specify the
type of memory to reclaim from (anon, file, ..).
Please note that the kernel can over or under reclaim from
the target cgroup. If less bytes are reclaimed than the
specified amount, -EAGAIN is returned.
memory.oom.group
A read-write single value file which exists on non-root
cgroups. The default value is "0".

Просмотреть файл

@ -6483,6 +6483,46 @@ static ssize_t memory_oom_group_write(struct kernfs_open_file *of,
return nbytes;
}
static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
size_t nbytes, loff_t off)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
unsigned int nr_retries = MAX_RECLAIM_RETRIES;
unsigned long nr_to_reclaim, nr_reclaimed = 0;
int err;
buf = strstrip(buf);
err = page_counter_memparse(buf, "", &nr_to_reclaim);
if (err)
return err;
while (nr_reclaimed < nr_to_reclaim) {
unsigned long reclaimed;
if (signal_pending(current))
return -EINTR;
/*
* This is the final attempt, drain percpu lru caches in the
* hope of introducing more evictable pages for
* try_to_free_mem_cgroup_pages().
*/
if (!nr_retries)
lru_add_drain_all();
reclaimed = try_to_free_mem_cgroup_pages(memcg,
nr_to_reclaim - nr_reclaimed,
GFP_KERNEL, true);
if (!reclaimed && !nr_retries--)
return -EAGAIN;
nr_reclaimed += reclaimed;
}
return nbytes;
}
static struct cftype memory_files[] = {
{
.name = "current",
@ -6541,6 +6581,11 @@ static struct cftype memory_files[] = {
.seq_show = memory_oom_group_show,
.write = memory_oom_group_write,
},
{
.name = "reclaim",
.flags = CFTYPE_NS_DELEGATABLE,
.write = memory_reclaim,
},
{ } /* terminate */
};

Просмотреть файл

@ -19,6 +19,7 @@
#include "cgroup_util.h"
#include "../clone3/clone3_selftests.h"
/* Returns read len on success, or -errno on failure. */
static ssize_t read_text(const char *path, char *buf, size_t max_len)
{
ssize_t len;
@ -26,35 +27,29 @@ static ssize_t read_text(const char *path, char *buf, size_t max_len)
fd = open(path, O_RDONLY);
if (fd < 0)
return fd;
return -errno;
len = read(fd, buf, max_len - 1);
if (len < 0)
goto out;
buf[len] = 0;
out:
if (len >= 0)
buf[len] = 0;
close(fd);
return len;
return len < 0 ? -errno : len;
}
/* Returns written len on success, or -errno on failure. */
static ssize_t write_text(const char *path, char *buf, ssize_t len)
{
int fd;
fd = open(path, O_WRONLY | O_APPEND);
if (fd < 0)
return fd;
return -errno;
len = write(fd, buf, len);
if (len < 0) {
close(fd);
return len;
}
close(fd);
return len;
return len < 0 ? -errno : len;
}
char *cg_name(const char *root, const char *name)
@ -87,16 +82,16 @@ char *cg_control(const char *cgroup, const char *control)
return ret;
}
/* Returns 0 on success, or -errno on failure. */
int cg_read(const char *cgroup, const char *control, char *buf, size_t len)
{
char path[PATH_MAX];
ssize_t ret;
snprintf(path, sizeof(path), "%s/%s", cgroup, control);
if (read_text(path, buf, len) >= 0)
return 0;
return -1;
ret = read_text(path, buf, len);
return ret >= 0 ? 0 : ret;
}
int cg_read_strcmp(const char *cgroup, const char *control,
@ -177,17 +172,15 @@ long cg_read_lc(const char *cgroup, const char *control)
return cnt;
}
/* Returns 0 on success, or -errno on failure. */
int cg_write(const char *cgroup, const char *control, char *buf)
{
char path[PATH_MAX];
ssize_t len = strlen(buf);
ssize_t len = strlen(buf), ret;
snprintf(path, sizeof(path), "%s/%s", cgroup, control);
if (write_text(path, buf, len) == len)
return 0;
return -1;
ret = write_text(path, buf, len);
return ret == len ? 0 : ret;
}
int cg_find_unified_root(char *root, size_t len)
@ -538,6 +531,7 @@ int set_oom_adj_score(int pid, int score)
ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size)
{
char path[PATH_MAX];
ssize_t ret;
if (!pid)
snprintf(path, sizeof(path), "/proc/%s/%s",
@ -545,7 +539,8 @@ ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t
else
snprintf(path, sizeof(path), "/proc/%d/%s", pid, item);
return read_text(path, buf, size);
ret = read_text(path, buf, size);
return ret < 0 ? -1 : ret;
}
int proc_read_strstr(int pid, bool thread, const char *item, const char *needle)

Просмотреть файл

@ -210,13 +210,17 @@ static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg)
static int alloc_anon_noexit(const char *cgroup, void *arg)
{
int ppid = getppid();
size_t size = (unsigned long)arg;
char *buf, *ptr;
if (alloc_anon(cgroup, arg))
return -1;
buf = malloc(size);
for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
*ptr = 0;
while (getppid() == ppid)
sleep(1);
free(buf);
return 0;
}
@ -679,6 +683,111 @@ cleanup:
return ret;
}
/*
* This test checks that memory.reclaim reclaims the given
* amount of memory (from both anon and file, if possible).
*/
static int test_memcg_reclaim(const char *root)
{
int ret = KSFT_FAIL, fd, retries;
char *memcg;
long current, expected_usage, to_reclaim;
char buf[64];
memcg = cg_name(root, "memcg_test");
if (!memcg)
goto cleanup;
if (cg_create(memcg))
goto cleanup;
current = cg_read_long(memcg, "memory.current");
if (current != 0)
goto cleanup;
fd = get_temp_fd();
if (fd < 0)
goto cleanup;
cg_run_nowait(memcg, alloc_pagecache_50M_noexit, (void *)(long)fd);
/*
* If swap is enabled, try to reclaim from both anon and file, else try
* to reclaim from file only.
*/
if (is_swap_enabled()) {
cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(50));
expected_usage = MB(100);
} else
expected_usage = MB(50);
/*
* Wait until current usage reaches the expected usage (or we run out of
* retries).
*/
retries = 5;
while (!values_close(cg_read_long(memcg, "memory.current"),
expected_usage, 10)) {
if (retries--) {
sleep(1);
continue;
} else {
fprintf(stderr,
"failed to allocate %ld for memcg reclaim test\n",
expected_usage);
goto cleanup;
}
}
/*
* Reclaim until current reaches 30M, this makes sure we hit both anon
* and file if swap is enabled.
*/
retries = 5;
while (true) {
int err;
current = cg_read_long(memcg, "memory.current");
to_reclaim = current - MB(30);
/*
* We only keep looping if we get EAGAIN, which means we could
* not reclaim the full amount.
*/
if (to_reclaim <= 0)
goto cleanup;
snprintf(buf, sizeof(buf), "%ld", to_reclaim);
err = cg_write(memcg, "memory.reclaim", buf);
if (!err) {
/*
* If writing succeeds, then the written amount should have been
* fully reclaimed (and maybe more).
*/
current = cg_read_long(memcg, "memory.current");
if (!values_close(current, MB(30), 3) && current > MB(30))
goto cleanup;
break;
}
/* The kernel could not reclaim the full amount, try again. */
if (err == -EAGAIN && retries--)
continue;
/* We got an unexpected error or ran out of retries. */
goto cleanup;
}
ret = KSFT_PASS;
cleanup:
cg_destroy(memcg);
free(memcg);
close(fd);
return ret;
}
static int alloc_anon_50M_check_swap(const char *cgroup, void *arg)
{
long mem_max = (long)arg;
@ -1181,6 +1290,7 @@ struct memcg_test {
T(test_memcg_low),
T(test_memcg_high),
T(test_memcg_max),
T(test_memcg_reclaim),
T(test_memcg_oom_events),
T(test_memcg_swap_max),
T(test_memcg_sock),