bpf: implement numbers iterator
Implement the first open-coded iterator type over a range of integers. It's public API consists of: - bpf_iter_num_new() constructor, which accepts [start, end) range (that is, start is inclusive, end is exclusive). - bpf_iter_num_next() which will keep returning read-only pointer to int until the range is exhausted, at which point NULL will be returned. If bpf_iter_num_next() is kept calling after this, NULL will be persistently returned. - bpf_iter_num_destroy() destructor, which needs to be called at some point to clean up iterator state. BPF verifier enforces that iterator destructor is called at some point before BPF program exits. Note that `start = end = X` is a valid combination to setup an empty iterator. bpf_iter_num_new() will return 0 (success) for any such combination. If bpf_iter_num_new() detects invalid combination of input arguments, it returns error, resets iterator state to, effectively, empty iterator, so any subsequent call to bpf_iter_num_next() will keep returning NULL. BPF verifier has no knowledge that returned integers are in the [start, end) value range, as both `start` and `end` are not statically known and enforced: they are runtime values. While the implementation is pretty trivial, some care needs to be taken to avoid overflows and underflows. Subsequent selftests will validate correctness of [start, end) semantics, especially around extremes (INT_MIN and INT_MAX). Similarly to bpf_loop(), we enforce that no more than BPF_MAX_LOOPS can be specified. bpf_iter_num_{new,next,destroy}() is a logical evolution from bounded BPF loops and bpf_loop() helper and is the basis for implementing ergonomic BPF loops with no statically known or verified bounds. Subsequent patches implement bpf_for() macro, demonstrating how this can be wrapped into something that works and feels like a normal for() loop in C language. Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Link: https://lore.kernel.org/r/20230308184121.1165081-5-andrii@kernel.org Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Родитель
06accc8779
Коммит
6018e1f407
|
@ -1617,8 +1617,12 @@ struct bpf_array {
|
|||
#define BPF_COMPLEXITY_LIMIT_INSNS 1000000 /* yes. 1M insns */
|
||||
#define MAX_TAIL_CALL_CNT 33
|
||||
|
||||
/* Maximum number of loops for bpf_loop */
|
||||
#define BPF_MAX_LOOPS BIT(23)
|
||||
/* Maximum number of loops for bpf_loop and bpf_iter_num.
|
||||
* It's enum to expose it (and thus make it discoverable) through BTF.
|
||||
*/
|
||||
enum {
|
||||
BPF_MAX_LOOPS = 8 * 1024 * 1024,
|
||||
};
|
||||
|
||||
#define BPF_F_ACCESS_MASK (BPF_F_RDONLY | \
|
||||
BPF_F_RDONLY_PROG | \
|
||||
|
|
|
@ -7112,4 +7112,12 @@ enum {
|
|||
BPF_F_TIMER_ABS = (1ULL << 0),
|
||||
};
|
||||
|
||||
/* BPF numbers iterator state */
|
||||
struct bpf_iter_num {
|
||||
/* opaque iterator state; having __u64 here allows to preserve correct
|
||||
* alignment requirements in vmlinux.h, generated from BTF
|
||||
*/
|
||||
__u64 __opaque[1];
|
||||
} __attribute__((aligned(8)));
|
||||
|
||||
#endif /* _UAPI__LINUX_BPF_H__ */
|
||||
|
|
|
@ -776,3 +776,73 @@ const struct bpf_func_proto bpf_loop_proto = {
|
|||
.arg3_type = ARG_PTR_TO_STACK_OR_NULL,
|
||||
.arg4_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
struct bpf_iter_num_kern {
|
||||
int cur; /* current value, inclusive */
|
||||
int end; /* final value, exclusive */
|
||||
} __aligned(8);
|
||||
|
||||
__diag_push();
|
||||
__diag_ignore_all("-Wmissing-prototypes",
|
||||
"Global functions as their definitions will be in vmlinux BTF");
|
||||
|
||||
__bpf_kfunc int bpf_iter_num_new(struct bpf_iter_num *it, int start, int end)
|
||||
{
|
||||
struct bpf_iter_num_kern *s = (void *)it;
|
||||
|
||||
BUILD_BUG_ON(sizeof(struct bpf_iter_num_kern) != sizeof(struct bpf_iter_num));
|
||||
BUILD_BUG_ON(__alignof__(struct bpf_iter_num_kern) != __alignof__(struct bpf_iter_num));
|
||||
|
||||
BTF_TYPE_EMIT(struct btf_iter_num);
|
||||
|
||||
/* start == end is legit, it's an empty range and we'll just get NULL
|
||||
* on first (and any subsequent) bpf_iter_num_next() call
|
||||
*/
|
||||
if (start > end) {
|
||||
s->cur = s->end = 0;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* avoid overflows, e.g., if start == INT_MIN and end == INT_MAX */
|
||||
if ((s64)end - (s64)start > BPF_MAX_LOOPS) {
|
||||
s->cur = s->end = 0;
|
||||
return -E2BIG;
|
||||
}
|
||||
|
||||
/* user will call bpf_iter_num_next() first,
|
||||
* which will set s->cur to exactly start value;
|
||||
* underflow shouldn't matter
|
||||
*/
|
||||
s->cur = start - 1;
|
||||
s->end = end;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
__bpf_kfunc int *bpf_iter_num_next(struct bpf_iter_num* it)
|
||||
{
|
||||
struct bpf_iter_num_kern *s = (void *)it;
|
||||
|
||||
/* check failed initialization or if we are done (same behavior);
|
||||
* need to be careful about overflow, so convert to s64 for checks,
|
||||
* e.g., if s->cur == s->end == INT_MAX, we can't just do
|
||||
* s->cur + 1 >= s->end
|
||||
*/
|
||||
if ((s64)(s->cur + 1) >= s->end) {
|
||||
s->cur = s->end = 0;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
s->cur++;
|
||||
|
||||
return &s->cur;
|
||||
}
|
||||
|
||||
__bpf_kfunc void bpf_iter_num_destroy(struct bpf_iter_num *it)
|
||||
{
|
||||
struct bpf_iter_num_kern *s = (void *)it;
|
||||
|
||||
s->cur = s->end = 0;
|
||||
}
|
||||
|
||||
__diag_pop();
|
||||
|
|
|
@ -2411,6 +2411,9 @@ BTF_ID_FLAGS(func, bpf_rcu_read_lock)
|
|||
BTF_ID_FLAGS(func, bpf_rcu_read_unlock)
|
||||
BTF_ID_FLAGS(func, bpf_dynptr_slice, KF_RET_NULL)
|
||||
BTF_ID_FLAGS(func, bpf_dynptr_slice_rdwr, KF_RET_NULL)
|
||||
BTF_ID_FLAGS(func, bpf_iter_num_new, KF_ITER_NEW)
|
||||
BTF_ID_FLAGS(func, bpf_iter_num_next, KF_ITER_NEXT | KF_RET_NULL)
|
||||
BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY)
|
||||
BTF_SET8_END(common_btf_ids)
|
||||
|
||||
static const struct btf_kfunc_id_set common_kfunc_set = {
|
||||
|
|
|
@ -7112,4 +7112,12 @@ enum {
|
|||
BPF_F_TIMER_ABS = (1ULL << 0),
|
||||
};
|
||||
|
||||
/* BPF numbers iterator state */
|
||||
struct bpf_iter_num {
|
||||
/* opaque iterator state; having __u64 here allows to preserve correct
|
||||
* alignment requirements in vmlinux.h, generated from BTF
|
||||
*/
|
||||
__u64 __opaque[1];
|
||||
} __attribute__((aligned(8)));
|
||||
|
||||
#endif /* _UAPI__LINUX_BPF_H__ */
|
||||
|
|
Загрузка…
Ссылка в новой задаче