Merge branch 'Make struct bpf_cpumask RCU safe'

David Vernet says:

====================

The struct bpf_cpumask type is currently not RCU safe. It uses the
bpf_mem_cache_{alloc,free}() APIs to allocate and release cpumasks, and
those allocations may be reused before an RCU grace period has elapsed.
We want to be able to enable using this pattern in BPF programs:

private(MASK) static struct bpf_cpumask __kptr *global;

int BPF_PROG(prog, ...)
{
	struct bpf_cpumask *cpumask;

	bpf_rcu_read_lock();
	cpumask = global;
	if (!cpumask) {
		bpf_rcu_read_unlock();
		return -1;
	}
	bpf_cpumask_setall(cpumask);
	...
	bpf_rcu_read_unlock();
}

In other words, to be able to pass a kptr to KF_RCU bpf_cpumask kfuncs
without requiring the acquisition and release of refcounts using
bpf_cpumask_kptr_get(). This patchset enables this by making the struct
bpf_cpumask type RCU safe, and removing the bpf_cpumask_kptr_get()
function.
---
v1: https://lore.kernel.org/all/20230316014122.678082-2-void@manifault.com/

Changelog:
----------
v1 -> v2:
- Add doxygen comment for new @rcu field in struct bpf_cpumask.
====================

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Alexei Starovoitov 2023-03-16 12:28:30 -07:00
Родитель 6cb9430be1 fec2c6d14f
Коммит deb9fd64d1
7 изменённых файлов: 105 добавлений и 88 удалений

Просмотреть файл

@ -117,12 +117,7 @@ For example:
As mentioned and illustrated above, these ``struct bpf_cpumask *`` objects can As mentioned and illustrated above, these ``struct bpf_cpumask *`` objects can
also be stored in a map and used as kptrs. If a ``struct bpf_cpumask *`` is in also be stored in a map and used as kptrs. If a ``struct bpf_cpumask *`` is in
a map, the reference can be removed from the map with bpf_kptr_xchg(), or a map, the reference can be removed from the map with bpf_kptr_xchg(), or
opportunistically acquired with bpf_cpumask_kptr_get(): opportunistically acquired using RCU:
.. kernel-doc:: kernel/bpf/cpumask.c
:identifiers: bpf_cpumask_kptr_get
Here is an example of a ``struct bpf_cpumask *`` being retrieved from a map:
.. code-block:: c .. code-block:: c
@ -144,7 +139,7 @@ Here is an example of a ``struct bpf_cpumask *`` being retrieved from a map:
/** /**
* A simple example tracepoint program showing how a * A simple example tracepoint program showing how a
* struct bpf_cpumask * kptr that is stored in a map can * struct bpf_cpumask * kptr that is stored in a map can
* be acquired using the bpf_cpumask_kptr_get() kfunc. * be passed to kfuncs using RCU protection.
*/ */
SEC("tp_btf/cgroup_mkdir") SEC("tp_btf/cgroup_mkdir")
int BPF_PROG(cgrp_ancestor_example, struct cgroup *cgrp, const char *path) int BPF_PROG(cgrp_ancestor_example, struct cgroup *cgrp, const char *path)
@ -158,26 +153,21 @@ Here is an example of a ``struct bpf_cpumask *`` being retrieved from a map:
if (!v) if (!v)
return -ENOENT; return -ENOENT;
bpf_rcu_read_lock();
/* Acquire a reference to the bpf_cpumask * kptr that's already stored in the map. */ /* Acquire a reference to the bpf_cpumask * kptr that's already stored in the map. */
kptr = bpf_cpumask_kptr_get(&v->cpumask); kptr = v->cpumask;
if (!kptr) if (!kptr) {
/* If no bpf_cpumask was present in the map, it's because /* If no bpf_cpumask was present in the map, it's because
* we're racing with another CPU that removed it with * we're racing with another CPU that removed it with
* bpf_kptr_xchg() between the bpf_map_lookup_elem() * bpf_kptr_xchg() between the bpf_map_lookup_elem()
* above, and our call to bpf_cpumask_kptr_get(). * above, and our load of the pointer from the map.
* bpf_cpumask_kptr_get() internally safely handles this
* race, and will return NULL if the cpumask is no longer
* present in the map by the time we invoke the kfunc.
*/ */
bpf_rcu_read_unlock();
return -EBUSY; return -EBUSY;
}
/* Free the reference we just took above. Note that the bpf_cpumask_setall(kptr);
* original struct bpf_cpumask * kptr is still in the map. It will bpf_rcu_read_unlock();
* be freed either at a later time if another context deletes
* it from the map, or automatically by the BPF subsystem if
* it's still present when the map is destroyed.
*/
bpf_cpumask_release(kptr);
return 0; return 0;
} }

Просмотреть файл

@ -9,6 +9,7 @@
/** /**
* struct bpf_cpumask - refcounted BPF cpumask wrapper structure * struct bpf_cpumask - refcounted BPF cpumask wrapper structure
* @cpumask: The actual cpumask embedded in the struct. * @cpumask: The actual cpumask embedded in the struct.
* @rcu: The RCU head used to free the cpumask with RCU safety.
* @usage: Object reference counter. When the refcount goes to 0, the * @usage: Object reference counter. When the refcount goes to 0, the
* memory is released back to the BPF allocator, which provides * memory is released back to the BPF allocator, which provides
* RCU safety. * RCU safety.
@ -24,6 +25,7 @@
*/ */
struct bpf_cpumask { struct bpf_cpumask {
cpumask_t cpumask; cpumask_t cpumask;
struct rcu_head rcu;
refcount_t usage; refcount_t usage;
}; };
@ -80,32 +82,14 @@ __bpf_kfunc struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask)
return cpumask; return cpumask;
} }
/** static void cpumask_free_cb(struct rcu_head *head)
* bpf_cpumask_kptr_get() - Attempt to acquire a reference to a BPF cpumask
* stored in a map.
* @cpumaskp: A pointer to a BPF cpumask map value.
*
* Attempts to acquire a reference to a BPF cpumask stored in a map value. The
* cpumask returned by this function must either be embedded in a map as a
* kptr, or freed with bpf_cpumask_release(). This function may return NULL if
* no BPF cpumask was found in the specified map value.
*/
__bpf_kfunc struct bpf_cpumask *bpf_cpumask_kptr_get(struct bpf_cpumask **cpumaskp)
{ {
struct bpf_cpumask *cpumask; struct bpf_cpumask *cpumask;
/* The BPF memory allocator frees memory backing its caches in an RCU cpumask = container_of(head, struct bpf_cpumask, rcu);
* callback. Thus, we can safely use RCU to ensure that the cpumask is migrate_disable();
* safe to read. bpf_mem_cache_free(&bpf_cpumask_ma, cpumask);
*/ migrate_enable();
rcu_read_lock();
cpumask = READ_ONCE(*cpumaskp);
if (cpumask && !refcount_inc_not_zero(&cpumask->usage))
cpumask = NULL;
rcu_read_unlock();
return cpumask;
} }
/** /**
@ -121,11 +105,8 @@ __bpf_kfunc void bpf_cpumask_release(struct bpf_cpumask *cpumask)
if (!cpumask) if (!cpumask)
return; return;
if (refcount_dec_and_test(&cpumask->usage)) { if (refcount_dec_and_test(&cpumask->usage))
migrate_disable(); call_rcu(&cpumask->rcu, cpumask_free_cb);
bpf_mem_cache_free(&bpf_cpumask_ma, cpumask);
migrate_enable();
}
} }
/** /**
@ -426,7 +407,6 @@ BTF_SET8_START(cpumask_kfunc_btf_ids)
BTF_ID_FLAGS(func, bpf_cpumask_create, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_cpumask_create, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_cpumask_release, KF_RELEASE | KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_cpumask_release, KF_RELEASE | KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_cpumask_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_cpumask_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_cpumask_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_cpumask_first, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_first, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_first_zero, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_first_zero, KF_RCU)
BTF_ID_FLAGS(func, bpf_cpumask_set_cpu, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_set_cpu, KF_RCU)

Просмотреть файл

@ -4599,6 +4599,7 @@ static bool in_rcu_cs(struct bpf_verifier_env *env)
BTF_SET_START(rcu_protected_types) BTF_SET_START(rcu_protected_types)
BTF_ID(struct, prog_test_ref_kfunc) BTF_ID(struct, prog_test_ref_kfunc)
BTF_ID(struct, cgroup) BTF_ID(struct, cgroup)
BTF_ID(struct, bpf_cpumask)
BTF_SET_END(rcu_protected_types) BTF_SET_END(rcu_protected_types)
static bool rcu_protected_object(const struct btf *btf, u32 btf_id) static bool rcu_protected_object(const struct btf *btf, u32 btf_id)

Просмотреть файл

@ -16,7 +16,7 @@ static const char * const cpumask_success_testcases[] = {
"test_copy_any_anyand", "test_copy_any_anyand",
"test_insert_leave", "test_insert_leave",
"test_insert_remove_release", "test_insert_remove_release",
"test_insert_kptr_get_release", "test_global_mask_rcu",
}; };
static void verify_success(const char *prog_name) static void verify_success(const char *prog_name)

Просмотреть файл

@ -9,6 +9,9 @@
int err; int err;
#define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8)))
private(MASK) static struct bpf_cpumask __kptr * global_mask;
struct __cpumask_map_value { struct __cpumask_map_value {
struct bpf_cpumask __kptr * cpumask; struct bpf_cpumask __kptr * cpumask;
}; };
@ -23,7 +26,6 @@ struct array_map {
struct bpf_cpumask *bpf_cpumask_create(void) __ksym; struct bpf_cpumask *bpf_cpumask_create(void) __ksym;
void bpf_cpumask_release(struct bpf_cpumask *cpumask) __ksym; void bpf_cpumask_release(struct bpf_cpumask *cpumask) __ksym;
struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask) __ksym; struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask) __ksym;
struct bpf_cpumask *bpf_cpumask_kptr_get(struct bpf_cpumask **cpumask) __ksym;
u32 bpf_cpumask_first(const struct cpumask *cpumask) __ksym; u32 bpf_cpumask_first(const struct cpumask *cpumask) __ksym;
u32 bpf_cpumask_first_zero(const struct cpumask *cpumask) __ksym; u32 bpf_cpumask_first_zero(const struct cpumask *cpumask) __ksym;
void bpf_cpumask_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym; void bpf_cpumask_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym;
@ -51,6 +53,9 @@ void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src) __ksym
u32 bpf_cpumask_any(const struct cpumask *src) __ksym; u32 bpf_cpumask_any(const struct cpumask *src) __ksym;
u32 bpf_cpumask_any_and(const struct cpumask *src1, const struct cpumask *src2) __ksym; u32 bpf_cpumask_any_and(const struct cpumask *src1, const struct cpumask *src2) __ksym;
void bpf_rcu_read_lock(void) __ksym;
void bpf_rcu_read_unlock(void) __ksym;
static inline const struct cpumask *cast(struct bpf_cpumask *cpumask) static inline const struct cpumask *cast(struct bpf_cpumask *cpumask)
{ {
return (const struct cpumask *)cpumask; return (const struct cpumask *)cpumask;

Просмотреть файл

@ -94,30 +94,6 @@ int BPF_PROG(test_insert_remove_no_release, struct task_struct *task, u64 clone_
return 0; return 0;
} }
SEC("tp_btf/task_newtask")
__failure __msg("Unreleased reference")
int BPF_PROG(test_kptr_get_no_release, struct task_struct *task, u64 clone_flags)
{
struct bpf_cpumask *cpumask;
struct __cpumask_map_value *v;
cpumask = create_cpumask();
if (!cpumask)
return 0;
if (cpumask_map_insert(cpumask))
return 0;
v = cpumask_map_value_lookup();
if (!v)
return 0;
cpumask = bpf_cpumask_kptr_get(&v->cpumask);
/* cpumask is never released. */
return 0;
}
SEC("tp_btf/task_newtask") SEC("tp_btf/task_newtask")
__failure __msg("NULL pointer passed to trusted arg0") __failure __msg("NULL pointer passed to trusted arg0")
int BPF_PROG(test_cpumask_null, struct task_struct *task, u64 clone_flags) int BPF_PROG(test_cpumask_null, struct task_struct *task, u64 clone_flags)
@ -127,3 +103,65 @@ int BPF_PROG(test_cpumask_null, struct task_struct *task, u64 clone_flags)
return 0; return 0;
} }
SEC("tp_btf/task_newtask")
__failure __msg("R2 must be a rcu pointer")
int BPF_PROG(test_global_mask_out_of_rcu, struct task_struct *task, u64 clone_flags)
{
struct bpf_cpumask *local, *prev;
local = create_cpumask();
if (!local)
return 0;
prev = bpf_kptr_xchg(&global_mask, local);
if (prev) {
bpf_cpumask_release(prev);
err = 3;
return 0;
}
bpf_rcu_read_lock();
local = global_mask;
if (!local) {
err = 4;
bpf_rcu_read_unlock();
return 0;
}
bpf_rcu_read_unlock();
/* RCU region is exited before calling KF_RCU kfunc. */
bpf_cpumask_test_cpu(0, (const struct cpumask *)local);
return 0;
}
SEC("tp_btf/task_newtask")
__failure __msg("NULL pointer passed to trusted arg1")
int BPF_PROG(test_global_mask_no_null_check, struct task_struct *task, u64 clone_flags)
{
struct bpf_cpumask *local, *prev;
local = create_cpumask();
if (!local)
return 0;
prev = bpf_kptr_xchg(&global_mask, local);
if (prev) {
bpf_cpumask_release(prev);
err = 3;
return 0;
}
bpf_rcu_read_lock();
local = global_mask;
/* No NULL check is performed on global cpumask kptr. */
bpf_cpumask_test_cpu(0, (const struct cpumask *)local);
bpf_rcu_read_unlock();
return 0;
}

Просмотреть файл

@ -395,31 +395,34 @@ int BPF_PROG(test_insert_remove_release, struct task_struct *task, u64 clone_fla
} }
SEC("tp_btf/task_newtask") SEC("tp_btf/task_newtask")
int BPF_PROG(test_insert_kptr_get_release, struct task_struct *task, u64 clone_flags) int BPF_PROG(test_global_mask_rcu, struct task_struct *task, u64 clone_flags)
{ {
struct bpf_cpumask *cpumask; struct bpf_cpumask *local, *prev;
struct __cpumask_map_value *v;
cpumask = create_cpumask(); if (!is_test_task())
if (!cpumask)
return 0; return 0;
if (cpumask_map_insert(cpumask)) { local = create_cpumask();
if (!local)
return 0;
prev = bpf_kptr_xchg(&global_mask, local);
if (prev) {
bpf_cpumask_release(prev);
err = 3; err = 3;
return 0; return 0;
} }
v = cpumask_map_value_lookup(); bpf_rcu_read_lock();
if (!v) { local = global_mask;
if (!local) {
err = 4; err = 4;
bpf_rcu_read_unlock();
return 0; return 0;
} }
cpumask = bpf_cpumask_kptr_get(&v->cpumask); bpf_cpumask_test_cpu(0, (const struct cpumask *)local);
if (cpumask) bpf_rcu_read_unlock();
bpf_cpumask_release(cpumask);
else
err = 5;
return 0; return 0;
} }