Merge branch 'Add bpf_loop helper'
Joanne Koong says: ==================== This patchset add a new helper, bpf_loop. One of the complexities of using for loops in bpf programs is that the verifier needs to ensure that in every possibility of the loop logic, the loop will always terminate. As such, there is a limit on how many iterations the loop can do. The bpf_loop helper moves the loop logic into the kernel and can thereby guarantee that the loop will always terminate. The bpf_loop helper simplifies a lot of the complexity the verifier needs to check, as well as removes the constraint on the number of loops able to be run. From the test results, we see that using bpf_loop in place of the traditional for loop led to a decrease in verification time and number of bpf instructions by ~99%. The benchmark results show that as the number of iterations increases, the overhead per iteration decreases. The high-level overview of the patches - Patch 1 - kernel-side + API changes for adding bpf_loop Patch 2 - tests Patch 3 - use bpf_loop in strobemeta + pyperf600 and measure verifier performance Patch 4 - benchmark for throughput + latency of bpf_loop call v3 -> v4: ~ Address nits: use usleep for triggering bpf programs, fix copyright style v2 -> v3: ~ Rerun benchmarks on physical machine, update results ~ Propagate original error codes in the verifier v1 -> v2: ~ Change helper name to bpf_loop (instead of bpf_for_each) ~ Set max nr_loops (~8 million loops) for bpf_loop call ~ Split tests + strobemeta/pyperf600 changes into two patches ~ Add new ops_report_final helper for outputting throughput and latency ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Коммит
b98057ef73
|
@ -2164,6 +2164,7 @@ extern const struct bpf_func_proto bpf_sk_setsockopt_proto;
|
|||
extern const struct bpf_func_proto bpf_sk_getsockopt_proto;
|
||||
extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto;
|
||||
extern const struct bpf_func_proto bpf_find_vma_proto;
|
||||
extern const struct bpf_func_proto bpf_loop_proto;
|
||||
|
||||
const struct bpf_func_proto *tracing_prog_func_proto(
|
||||
enum bpf_func_id func_id, const struct bpf_prog *prog);
|
||||
|
|
|
@ -4957,6 +4957,30 @@ union bpf_attr {
|
|||
* **-ENOENT** if *task->mm* is NULL, or no vma contains *addr*.
|
||||
* **-EBUSY** if failed to try lock mmap_lock.
|
||||
* **-EINVAL** for invalid **flags**.
|
||||
*
|
||||
* long bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx, u64 flags)
|
||||
* Description
|
||||
* For **nr_loops**, call **callback_fn** function
|
||||
* with **callback_ctx** as the context parameter.
|
||||
* The **callback_fn** should be a static function and
|
||||
* the **callback_ctx** should be a pointer to the stack.
|
||||
* The **flags** is used to control certain aspects of the helper.
|
||||
* Currently, the **flags** must be 0. Currently, nr_loops is
|
||||
* limited to 1 << 23 (~8 million) loops.
|
||||
*
|
||||
* long (\*callback_fn)(u32 index, void \*ctx);
|
||||
*
|
||||
* where **index** is the current index in the loop. The index
|
||||
* is zero-indexed.
|
||||
*
|
||||
* If **callback_fn** returns 0, the helper will continue to the next
|
||||
* loop. If return value is 1, the helper will skip the rest of
|
||||
* the loops and return. Other return values are not used now,
|
||||
* and will be rejected by the verifier.
|
||||
*
|
||||
* Return
|
||||
* The number of loops performed, **-EINVAL** for invalid **flags**,
|
||||
* **-E2BIG** if **nr_loops** exceeds the maximum number of loops.
|
||||
*/
|
||||
#define __BPF_FUNC_MAPPER(FN) \
|
||||
FN(unspec), \
|
||||
|
@ -5140,6 +5164,7 @@ union bpf_attr {
|
|||
FN(skc_to_unix_sock), \
|
||||
FN(kallsyms_lookup_name), \
|
||||
FN(find_vma), \
|
||||
FN(loop), \
|
||||
/* */
|
||||
|
||||
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
||||
|
|
|
@ -714,3 +714,38 @@ const struct bpf_func_proto bpf_for_each_map_elem_proto = {
|
|||
.arg3_type = ARG_PTR_TO_STACK_OR_NULL,
|
||||
.arg4_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
/* maximum number of loops */
|
||||
#define MAX_LOOPS BIT(23)
|
||||
|
||||
BPF_CALL_4(bpf_loop, u32, nr_loops, void *, callback_fn, void *, callback_ctx,
|
||||
u64, flags)
|
||||
{
|
||||
bpf_callback_t callback = (bpf_callback_t)callback_fn;
|
||||
u64 ret;
|
||||
u32 i;
|
||||
|
||||
if (flags)
|
||||
return -EINVAL;
|
||||
if (nr_loops > MAX_LOOPS)
|
||||
return -E2BIG;
|
||||
|
||||
for (i = 0; i < nr_loops; i++) {
|
||||
ret = callback((u64)i, (u64)(long)callback_ctx, 0, 0, 0);
|
||||
/* return value: 0 - continue, 1 - stop and return */
|
||||
if (ret)
|
||||
return i + 1;
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
const struct bpf_func_proto bpf_loop_proto = {
|
||||
.func = bpf_loop,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_ANYTHING,
|
||||
.arg2_type = ARG_PTR_TO_FUNC,
|
||||
.arg3_type = ARG_PTR_TO_STACK_OR_NULL,
|
||||
.arg4_type = ARG_ANYTHING,
|
||||
};
|
||||
|
|
|
@ -1378,6 +1378,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
|
|||
return &bpf_ringbuf_query_proto;
|
||||
case BPF_FUNC_for_each_map_elem:
|
||||
return &bpf_for_each_map_elem_proto;
|
||||
case BPF_FUNC_loop:
|
||||
return &bpf_loop_proto;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -6085,6 +6085,27 @@ static int set_map_elem_callback_state(struct bpf_verifier_env *env,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int set_loop_callback_state(struct bpf_verifier_env *env,
|
||||
struct bpf_func_state *caller,
|
||||
struct bpf_func_state *callee,
|
||||
int insn_idx)
|
||||
{
|
||||
/* bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx,
|
||||
* u64 flags);
|
||||
* callback_fn(u32 index, void *callback_ctx);
|
||||
*/
|
||||
callee->regs[BPF_REG_1].type = SCALAR_VALUE;
|
||||
callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
|
||||
|
||||
/* unused */
|
||||
__mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
|
||||
__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
|
||||
__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
|
||||
|
||||
callee->in_callback_fn = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int set_timer_callback_state(struct bpf_verifier_env *env,
|
||||
struct bpf_func_state *caller,
|
||||
struct bpf_func_state *callee,
|
||||
|
@ -6458,13 +6479,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
|
|||
return err;
|
||||
}
|
||||
|
||||
if (func_id == BPF_FUNC_tail_call) {
|
||||
err = check_reference_leak(env);
|
||||
if (err) {
|
||||
verbose(env, "tail_call would lead to reference leak\n");
|
||||
return err;
|
||||
}
|
||||
} else if (is_release_function(func_id)) {
|
||||
if (is_release_function(func_id)) {
|
||||
err = release_reference(env, meta.ref_obj_id);
|
||||
if (err) {
|
||||
verbose(env, "func %s#%d reference has not been acquired before\n",
|
||||
|
@ -6475,41 +6490,46 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
|
|||
|
||||
regs = cur_regs(env);
|
||||
|
||||
/* check that flags argument in get_local_storage(map, flags) is 0,
|
||||
* this is required because get_local_storage() can't return an error.
|
||||
*/
|
||||
if (func_id == BPF_FUNC_get_local_storage &&
|
||||
!register_is_null(®s[BPF_REG_2])) {
|
||||
verbose(env, "get_local_storage() doesn't support non-zero flags\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (func_id == BPF_FUNC_for_each_map_elem) {
|
||||
switch (func_id) {
|
||||
case BPF_FUNC_tail_call:
|
||||
err = check_reference_leak(env);
|
||||
if (err) {
|
||||
verbose(env, "tail_call would lead to reference leak\n");
|
||||
return err;
|
||||
}
|
||||
break;
|
||||
case BPF_FUNC_get_local_storage:
|
||||
/* check that flags argument in get_local_storage(map, flags) is 0,
|
||||
* this is required because get_local_storage() can't return an error.
|
||||
*/
|
||||
if (!register_is_null(®s[BPF_REG_2])) {
|
||||
verbose(env, "get_local_storage() doesn't support non-zero flags\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
break;
|
||||
case BPF_FUNC_for_each_map_elem:
|
||||
err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
|
||||
set_map_elem_callback_state);
|
||||
if (err < 0)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (func_id == BPF_FUNC_timer_set_callback) {
|
||||
break;
|
||||
case BPF_FUNC_timer_set_callback:
|
||||
err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
|
||||
set_timer_callback_state);
|
||||
if (err < 0)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (func_id == BPF_FUNC_find_vma) {
|
||||
break;
|
||||
case BPF_FUNC_find_vma:
|
||||
err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
|
||||
set_find_vma_callback_state);
|
||||
if (err < 0)
|
||||
return -EINVAL;
|
||||
break;
|
||||
case BPF_FUNC_snprintf:
|
||||
err = check_bpf_snprintf_call(env, regs);
|
||||
break;
|
||||
case BPF_FUNC_loop:
|
||||
err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
|
||||
set_loop_callback_state);
|
||||
break;
|
||||
}
|
||||
|
||||
if (func_id == BPF_FUNC_snprintf) {
|
||||
err = check_bpf_snprintf_call(env, regs);
|
||||
if (err < 0)
|
||||
return err;
|
||||
}
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/* reset caller saved regs */
|
||||
for (i = 0; i < CALLER_SAVED_REGS; i++) {
|
||||
|
|
|
@ -4957,6 +4957,30 @@ union bpf_attr {
|
|||
* **-ENOENT** if *task->mm* is NULL, or no vma contains *addr*.
|
||||
* **-EBUSY** if failed to try lock mmap_lock.
|
||||
* **-EINVAL** for invalid **flags**.
|
||||
*
|
||||
* long bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx, u64 flags)
|
||||
* Description
|
||||
* For **nr_loops**, call **callback_fn** function
|
||||
* with **callback_ctx** as the context parameter.
|
||||
* The **callback_fn** should be a static function and
|
||||
* the **callback_ctx** should be a pointer to the stack.
|
||||
* The **flags** is used to control certain aspects of the helper.
|
||||
* Currently, the **flags** must be 0. Currently, nr_loops is
|
||||
* limited to 1 << 23 (~8 million) loops.
|
||||
*
|
||||
* long (\*callback_fn)(u32 index, void \*ctx);
|
||||
*
|
||||
* where **index** is the current index in the loop. The index
|
||||
* is zero-indexed.
|
||||
*
|
||||
* If **callback_fn** returns 0, the helper will continue to the next
|
||||
* loop. If return value is 1, the helper will skip the rest of
|
||||
* the loops and return. Other return values are not used now,
|
||||
* and will be rejected by the verifier.
|
||||
*
|
||||
* Return
|
||||
* The number of loops performed, **-EINVAL** for invalid **flags**,
|
||||
* **-E2BIG** if **nr_loops** exceeds the maximum number of loops.
|
||||
*/
|
||||
#define __BPF_FUNC_MAPPER(FN) \
|
||||
FN(unspec), \
|
||||
|
@ -5140,6 +5164,7 @@ union bpf_attr {
|
|||
FN(skc_to_unix_sock), \
|
||||
FN(kallsyms_lookup_name), \
|
||||
FN(find_vma), \
|
||||
FN(loop), \
|
||||
/* */
|
||||
|
||||
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
|
||||
|
|
|
@ -531,6 +531,7 @@ $(OUTPUT)/bench_trigger.o: $(OUTPUT)/trigger_bench.skel.h
|
|||
$(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \
|
||||
$(OUTPUT)/perfbuf_bench.skel.h
|
||||
$(OUTPUT)/bench_bloom_filter_map.o: $(OUTPUT)/bloom_filter_bench.skel.h
|
||||
$(OUTPUT)/bench_bpf_loop.o: $(OUTPUT)/bpf_loop_bench.skel.h
|
||||
$(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ)
|
||||
$(OUTPUT)/bench: LDLIBS += -lm
|
||||
$(OUTPUT)/bench: $(OUTPUT)/bench.o \
|
||||
|
@ -540,7 +541,8 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \
|
|||
$(OUTPUT)/bench_rename.o \
|
||||
$(OUTPUT)/bench_trigger.o \
|
||||
$(OUTPUT)/bench_ringbufs.o \
|
||||
$(OUTPUT)/bench_bloom_filter_map.o
|
||||
$(OUTPUT)/bench_bloom_filter_map.o \
|
||||
$(OUTPUT)/bench_bpf_loop.o
|
||||
$(call msg,BINARY,,$@)
|
||||
$(Q)$(CC) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@
|
||||
|
||||
|
|
|
@ -134,6 +134,39 @@ void hits_drops_report_final(struct bench_res res[], int res_cnt)
|
|||
total_ops_mean, total_ops_stddev);
|
||||
}
|
||||
|
||||
void ops_report_progress(int iter, struct bench_res *res, long delta_ns)
|
||||
{
|
||||
double hits_per_sec, hits_per_prod;
|
||||
|
||||
hits_per_sec = res->hits / 1000000.0 / (delta_ns / 1000000000.0);
|
||||
hits_per_prod = hits_per_sec / env.producer_cnt;
|
||||
|
||||
printf("Iter %3d (%7.3lfus): ", iter, (delta_ns - 1000000000) / 1000.0);
|
||||
|
||||
printf("hits %8.3lfM/s (%7.3lfM/prod)\n", hits_per_sec, hits_per_prod);
|
||||
}
|
||||
|
||||
void ops_report_final(struct bench_res res[], int res_cnt)
|
||||
{
|
||||
double hits_mean = 0.0, hits_stddev = 0.0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < res_cnt; i++)
|
||||
hits_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt);
|
||||
|
||||
if (res_cnt > 1) {
|
||||
for (i = 0; i < res_cnt; i++)
|
||||
hits_stddev += (hits_mean - res[i].hits / 1000000.0) *
|
||||
(hits_mean - res[i].hits / 1000000.0) /
|
||||
(res_cnt - 1.0);
|
||||
|
||||
hits_stddev = sqrt(hits_stddev);
|
||||
}
|
||||
printf("Summary: throughput %8.3lf \u00B1 %5.3lf M ops/s (%7.3lfM ops/prod), ",
|
||||
hits_mean, hits_stddev, hits_mean / env.producer_cnt);
|
||||
printf("latency %8.3lf ns/op\n", 1000.0 / hits_mean * env.producer_cnt);
|
||||
}
|
||||
|
||||
const char *argp_program_version = "benchmark";
|
||||
const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
|
||||
const char argp_program_doc[] =
|
||||
|
@ -171,10 +204,12 @@ static const struct argp_option opts[] = {
|
|||
|
||||
extern struct argp bench_ringbufs_argp;
|
||||
extern struct argp bench_bloom_map_argp;
|
||||
extern struct argp bench_bpf_loop_argp;
|
||||
|
||||
static const struct argp_child bench_parsers[] = {
|
||||
{ &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 },
|
||||
{ &bench_bloom_map_argp, 0, "Bloom filter map benchmark", 0 },
|
||||
{ &bench_bpf_loop_argp, 0, "bpf_loop helper benchmark", 0 },
|
||||
{},
|
||||
};
|
||||
|
||||
|
@ -373,6 +408,7 @@ extern const struct bench bench_bloom_update;
|
|||
extern const struct bench bench_bloom_false_positive;
|
||||
extern const struct bench bench_hashmap_without_bloom;
|
||||
extern const struct bench bench_hashmap_with_bloom;
|
||||
extern const struct bench bench_bpf_loop;
|
||||
|
||||
static const struct bench *benchs[] = {
|
||||
&bench_count_global,
|
||||
|
@ -404,6 +440,7 @@ static const struct bench *benchs[] = {
|
|||
&bench_bloom_false_positive,
|
||||
&bench_hashmap_without_bloom,
|
||||
&bench_hashmap_with_bloom,
|
||||
&bench_bpf_loop,
|
||||
};
|
||||
|
||||
static void setup_benchmark()
|
||||
|
|
|
@ -59,6 +59,8 @@ void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns);
|
|||
void hits_drops_report_final(struct bench_res res[], int res_cnt);
|
||||
void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns);
|
||||
void false_hits_report_final(struct bench_res res[], int res_cnt);
|
||||
void ops_report_progress(int iter, struct bench_res *res, long delta_ns);
|
||||
void ops_report_final(struct bench_res res[], int res_cnt);
|
||||
|
||||
static inline __u64 get_time_ns() {
|
||||
struct timespec t;
|
||||
|
|
|
@ -0,0 +1,105 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2021 Facebook */
|
||||
|
||||
#include <argp.h>
|
||||
#include "bench.h"
|
||||
#include "bpf_loop_bench.skel.h"
|
||||
|
||||
/* BPF triggering benchmarks */
|
||||
static struct ctx {
|
||||
struct bpf_loop_bench *skel;
|
||||
} ctx;
|
||||
|
||||
static struct {
|
||||
__u32 nr_loops;
|
||||
} args = {
|
||||
.nr_loops = 10,
|
||||
};
|
||||
|
||||
enum {
|
||||
ARG_NR_LOOPS = 4000,
|
||||
};
|
||||
|
||||
static const struct argp_option opts[] = {
|
||||
{ "nr_loops", ARG_NR_LOOPS, "nr_loops", 0,
|
||||
"Set number of loops for the bpf_loop helper"},
|
||||
{},
|
||||
};
|
||||
|
||||
static error_t parse_arg(int key, char *arg, struct argp_state *state)
|
||||
{
|
||||
switch (key) {
|
||||
case ARG_NR_LOOPS:
|
||||
args.nr_loops = strtol(arg, NULL, 10);
|
||||
break;
|
||||
default:
|
||||
return ARGP_ERR_UNKNOWN;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* exported into benchmark runner */
|
||||
const struct argp bench_bpf_loop_argp = {
|
||||
.options = opts,
|
||||
.parser = parse_arg,
|
||||
};
|
||||
|
||||
static void validate(void)
|
||||
{
|
||||
if (env.consumer_cnt != 1) {
|
||||
fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
static void *producer(void *input)
|
||||
{
|
||||
while (true)
|
||||
/* trigger the bpf program */
|
||||
syscall(__NR_getpgid);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void *consumer(void *input)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void measure(struct bench_res *res)
|
||||
{
|
||||
res->hits = atomic_swap(&ctx.skel->bss->hits, 0);
|
||||
}
|
||||
|
||||
static void setup(void)
|
||||
{
|
||||
struct bpf_link *link;
|
||||
|
||||
setup_libbpf();
|
||||
|
||||
ctx.skel = bpf_loop_bench__open_and_load();
|
||||
if (!ctx.skel) {
|
||||
fprintf(stderr, "failed to open skeleton\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
link = bpf_program__attach(ctx.skel->progs.benchmark);
|
||||
if (!link) {
|
||||
fprintf(stderr, "failed to attach program!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
ctx.skel->bss->nr_loops = args.nr_loops;
|
||||
}
|
||||
|
||||
const struct bench bench_bpf_loop = {
|
||||
.name = "bpf-loop",
|
||||
.validate = validate,
|
||||
.setup = setup,
|
||||
.producer_thread = producer,
|
||||
.consumer_thread = consumer,
|
||||
.measure = measure,
|
||||
.report_progress = ops_report_progress,
|
||||
.report_final = ops_report_final,
|
||||
};
|
|
@ -0,0 +1,15 @@
|
|||
#!/bin/bash
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
source ./benchs/run_common.sh
|
||||
|
||||
set -eufo pipefail
|
||||
|
||||
for t in 1 4 8 12 16; do
|
||||
for i in 10 100 500 1000 5000 10000 50000 100000 500000 1000000; do
|
||||
subtitle "nr_loops: $i, nr_threads: $t"
|
||||
summarize_ops "bpf_loop: " \
|
||||
"$($RUN_BENCH -p $t --nr_loops $i bpf-loop)"
|
||||
printf "\n"
|
||||
done
|
||||
done
|
|
@ -33,6 +33,14 @@ function percentage()
|
|||
echo "$*" | sed -E "s/.*Percentage\s=\s+([0-9]+\.[0-9]+).*/\1/"
|
||||
}
|
||||
|
||||
function ops()
|
||||
{
|
||||
echo -n "throughput: "
|
||||
echo -n "$*" | sed -E "s/.*throughput\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+\sM\sops\/s).*/\1/"
|
||||
echo -n -e ", latency: "
|
||||
echo "$*" | sed -E "s/.*latency\s+([0-9]+\.[0-9]+\sns\/op).*/\1/"
|
||||
}
|
||||
|
||||
function total()
|
||||
{
|
||||
echo "$*" | sed -E "s/.*total operations\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/"
|
||||
|
@ -52,6 +60,13 @@ function summarize_percentage()
|
|||
printf "%-20s %s%%\n" "$bench" "$(percentage $summary)"
|
||||
}
|
||||
|
||||
function summarize_ops()
|
||||
{
|
||||
bench="$1"
|
||||
summary=$(echo $2 | tail -n1)
|
||||
printf "%-20s %s\n" "$bench" "$(ops $summary)"
|
||||
}
|
||||
|
||||
function summarize_total()
|
||||
{
|
||||
bench="$1"
|
||||
|
|
|
@ -0,0 +1,145 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2021 Facebook */
|
||||
|
||||
#include <test_progs.h>
|
||||
#include <network_helpers.h>
|
||||
#include "bpf_loop.skel.h"
|
||||
|
||||
static void check_nr_loops(struct bpf_loop *skel)
|
||||
{
|
||||
struct bpf_link *link;
|
||||
|
||||
link = bpf_program__attach(skel->progs.test_prog);
|
||||
if (!ASSERT_OK_PTR(link, "link"))
|
||||
return;
|
||||
|
||||
/* test 0 loops */
|
||||
skel->bss->nr_loops = 0;
|
||||
|
||||
usleep(1);
|
||||
|
||||
ASSERT_EQ(skel->bss->nr_loops_returned, skel->bss->nr_loops,
|
||||
"0 loops");
|
||||
|
||||
/* test 500 loops */
|
||||
skel->bss->nr_loops = 500;
|
||||
|
||||
usleep(1);
|
||||
|
||||
ASSERT_EQ(skel->bss->nr_loops_returned, skel->bss->nr_loops,
|
||||
"500 loops");
|
||||
ASSERT_EQ(skel->bss->g_output, (500 * 499) / 2, "g_output");
|
||||
|
||||
/* test exceeding the max limit */
|
||||
skel->bss->nr_loops = -1;
|
||||
|
||||
usleep(1);
|
||||
|
||||
ASSERT_EQ(skel->bss->err, -E2BIG, "over max limit");
|
||||
|
||||
bpf_link__destroy(link);
|
||||
}
|
||||
|
||||
static void check_callback_fn_stop(struct bpf_loop *skel)
|
||||
{
|
||||
struct bpf_link *link;
|
||||
|
||||
link = bpf_program__attach(skel->progs.test_prog);
|
||||
if (!ASSERT_OK_PTR(link, "link"))
|
||||
return;
|
||||
|
||||
/* testing that loop is stopped when callback_fn returns 1 */
|
||||
skel->bss->nr_loops = 400;
|
||||
skel->data->stop_index = 50;
|
||||
|
||||
usleep(1);
|
||||
|
||||
ASSERT_EQ(skel->bss->nr_loops_returned, skel->data->stop_index + 1,
|
||||
"nr_loops_returned");
|
||||
ASSERT_EQ(skel->bss->g_output, (50 * 49) / 2,
|
||||
"g_output");
|
||||
|
||||
bpf_link__destroy(link);
|
||||
}
|
||||
|
||||
static void check_null_callback_ctx(struct bpf_loop *skel)
|
||||
{
|
||||
struct bpf_link *link;
|
||||
|
||||
/* check that user is able to pass in a null callback_ctx */
|
||||
link = bpf_program__attach(skel->progs.prog_null_ctx);
|
||||
if (!ASSERT_OK_PTR(link, "link"))
|
||||
return;
|
||||
|
||||
skel->bss->nr_loops = 10;
|
||||
|
||||
usleep(1);
|
||||
|
||||
ASSERT_EQ(skel->bss->nr_loops_returned, skel->bss->nr_loops,
|
||||
"nr_loops_returned");
|
||||
|
||||
bpf_link__destroy(link);
|
||||
}
|
||||
|
||||
static void check_invalid_flags(struct bpf_loop *skel)
|
||||
{
|
||||
struct bpf_link *link;
|
||||
|
||||
/* check that passing in non-zero flags returns -EINVAL */
|
||||
link = bpf_program__attach(skel->progs.prog_invalid_flags);
|
||||
if (!ASSERT_OK_PTR(link, "link"))
|
||||
return;
|
||||
|
||||
usleep(1);
|
||||
|
||||
ASSERT_EQ(skel->bss->err, -EINVAL, "err");
|
||||
|
||||
bpf_link__destroy(link);
|
||||
}
|
||||
|
||||
static void check_nested_calls(struct bpf_loop *skel)
|
||||
{
|
||||
__u32 nr_loops = 100, nested_callback_nr_loops = 4;
|
||||
struct bpf_link *link;
|
||||
|
||||
/* check that nested calls are supported */
|
||||
link = bpf_program__attach(skel->progs.prog_nested_calls);
|
||||
if (!ASSERT_OK_PTR(link, "link"))
|
||||
return;
|
||||
|
||||
skel->bss->nr_loops = nr_loops;
|
||||
skel->bss->nested_callback_nr_loops = nested_callback_nr_loops;
|
||||
|
||||
usleep(1);
|
||||
|
||||
ASSERT_EQ(skel->bss->nr_loops_returned, nr_loops * nested_callback_nr_loops
|
||||
* nested_callback_nr_loops, "nr_loops_returned");
|
||||
ASSERT_EQ(skel->bss->g_output, (4 * 3) / 2 * nested_callback_nr_loops
|
||||
* nr_loops, "g_output");
|
||||
|
||||
bpf_link__destroy(link);
|
||||
}
|
||||
|
||||
void test_bpf_loop(void)
|
||||
{
|
||||
struct bpf_loop *skel;
|
||||
|
||||
skel = bpf_loop__open_and_load();
|
||||
if (!ASSERT_OK_PTR(skel, "bpf_loop__open_and_load"))
|
||||
return;
|
||||
|
||||
skel->bss->pid = getpid();
|
||||
|
||||
if (test__start_subtest("check_nr_loops"))
|
||||
check_nr_loops(skel);
|
||||
if (test__start_subtest("check_callback_fn_stop"))
|
||||
check_callback_fn_stop(skel);
|
||||
if (test__start_subtest("check_null_callback_ctx"))
|
||||
check_null_callback_ctx(skel);
|
||||
if (test__start_subtest("check_invalid_flags"))
|
||||
check_invalid_flags(skel);
|
||||
if (test__start_subtest("check_nested_calls"))
|
||||
check_nested_calls(skel);
|
||||
|
||||
bpf_loop__destroy(skel);
|
||||
}
|
|
@ -115,6 +115,12 @@ void test_verif_scale_pyperf600()
|
|||
scale_test("pyperf600.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
|
||||
}
|
||||
|
||||
void test_verif_scale_pyperf600_bpf_loop(void)
|
||||
{
|
||||
/* use the bpf_loop helper*/
|
||||
scale_test("pyperf600_bpf_loop.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
|
||||
}
|
||||
|
||||
void test_verif_scale_pyperf600_nounroll()
|
||||
{
|
||||
/* no unroll at all.
|
||||
|
@ -165,6 +171,12 @@ void test_verif_scale_strobemeta()
|
|||
scale_test("strobemeta.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
|
||||
}
|
||||
|
||||
void test_verif_scale_strobemeta_bpf_loop(void)
|
||||
{
|
||||
/* use the bpf_loop helper*/
|
||||
scale_test("strobemeta_bpf_loop.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
|
||||
}
|
||||
|
||||
void test_verif_scale_strobemeta_nounroll1()
|
||||
{
|
||||
/* no unroll, tiny loops */
|
||||
|
|
|
@ -0,0 +1,112 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2021 Facebook */
|
||||
|
||||
#include "vmlinux.h"
|
||||
#include <bpf/bpf_helpers.h>
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
||||
struct callback_ctx {
|
||||
int output;
|
||||
};
|
||||
|
||||
/* These should be set by the user program */
|
||||
u32 nested_callback_nr_loops;
|
||||
u32 stop_index = -1;
|
||||
u32 nr_loops;
|
||||
int pid;
|
||||
|
||||
/* Making these global variables so that the userspace program
|
||||
* can verify the output through the skeleton
|
||||
*/
|
||||
int nr_loops_returned;
|
||||
int g_output;
|
||||
int err;
|
||||
|
||||
static int callback(__u32 index, void *data)
|
||||
{
|
||||
struct callback_ctx *ctx = data;
|
||||
|
||||
if (index >= stop_index)
|
||||
return 1;
|
||||
|
||||
ctx->output += index;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int empty_callback(__u32 index, void *data)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nested_callback2(__u32 index, void *data)
|
||||
{
|
||||
nr_loops_returned += bpf_loop(nested_callback_nr_loops, callback, data, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nested_callback1(__u32 index, void *data)
|
||||
{
|
||||
bpf_loop(nested_callback_nr_loops, nested_callback2, data, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("fentry/__x64_sys_nanosleep")
|
||||
int test_prog(void *ctx)
|
||||
{
|
||||
struct callback_ctx data = {};
|
||||
|
||||
if (bpf_get_current_pid_tgid() >> 32 != pid)
|
||||
return 0;
|
||||
|
||||
nr_loops_returned = bpf_loop(nr_loops, callback, &data, 0);
|
||||
|
||||
if (nr_loops_returned < 0)
|
||||
err = nr_loops_returned;
|
||||
else
|
||||
g_output = data.output;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("fentry/__x64_sys_nanosleep")
|
||||
int prog_null_ctx(void *ctx)
|
||||
{
|
||||
if (bpf_get_current_pid_tgid() >> 32 != pid)
|
||||
return 0;
|
||||
|
||||
nr_loops_returned = bpf_loop(nr_loops, empty_callback, NULL, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("fentry/__x64_sys_nanosleep")
|
||||
int prog_invalid_flags(void *ctx)
|
||||
{
|
||||
struct callback_ctx data = {};
|
||||
|
||||
if (bpf_get_current_pid_tgid() >> 32 != pid)
|
||||
return 0;
|
||||
|
||||
err = bpf_loop(nr_loops, callback, &data, 1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("fentry/__x64_sys_nanosleep")
|
||||
int prog_nested_calls(void *ctx)
|
||||
{
|
||||
struct callback_ctx data = {};
|
||||
|
||||
if (bpf_get_current_pid_tgid() >> 32 != pid)
|
||||
return 0;
|
||||
|
||||
nr_loops_returned = 0;
|
||||
bpf_loop(nr_loops, nested_callback1, &data, 0);
|
||||
|
||||
g_output = data.output;
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,26 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2021 Facebook */
|
||||
|
||||
#include "vmlinux.h"
|
||||
#include <bpf/bpf_helpers.h>
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
||||
u32 nr_loops;
|
||||
long hits;
|
||||
|
||||
static int empty_callback(__u32 index, void *data)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("fentry/__x64_sys_getpgid")
|
||||
int benchmark(void *ctx)
|
||||
{
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
bpf_loop(nr_loops, empty_callback, NULL, 0);
|
||||
|
||||
__sync_add_and_fetch(&hits, nr_loops);
|
||||
}
|
||||
return 0;
|
||||
}
|
|
@ -159,6 +159,59 @@ struct {
|
|||
__uint(value_size, sizeof(long long) * 127);
|
||||
} stackmap SEC(".maps");
|
||||
|
||||
#ifdef USE_BPF_LOOP
|
||||
struct process_frame_ctx {
|
||||
int cur_cpu;
|
||||
int32_t *symbol_counter;
|
||||
void *frame_ptr;
|
||||
FrameData *frame;
|
||||
PidData *pidData;
|
||||
Symbol *sym;
|
||||
Event *event;
|
||||
bool done;
|
||||
};
|
||||
|
||||
#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var))
|
||||
|
||||
static int process_frame_callback(__u32 i, struct process_frame_ctx *ctx)
|
||||
{
|
||||
int zero = 0;
|
||||
void *frame_ptr = ctx->frame_ptr;
|
||||
PidData *pidData = ctx->pidData;
|
||||
FrameData *frame = ctx->frame;
|
||||
int32_t *symbol_counter = ctx->symbol_counter;
|
||||
int cur_cpu = ctx->cur_cpu;
|
||||
Event *event = ctx->event;
|
||||
Symbol *sym = ctx->sym;
|
||||
|
||||
if (frame_ptr && get_frame_data(frame_ptr, pidData, frame, sym)) {
|
||||
int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu;
|
||||
int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, sym);
|
||||
|
||||
if (!symbol_id) {
|
||||
bpf_map_update_elem(&symbolmap, sym, &zero, 0);
|
||||
symbol_id = bpf_map_lookup_elem(&symbolmap, sym);
|
||||
if (!symbol_id) {
|
||||
ctx->done = true;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
if (*symbol_id == new_symbol_id)
|
||||
(*symbol_counter)++;
|
||||
|
||||
barrier_var(i);
|
||||
if (i >= STACK_MAX_LEN)
|
||||
return 1;
|
||||
|
||||
event->stack[i] = *symbol_id;
|
||||
|
||||
event->stack_len = i + 1;
|
||||
frame_ptr = frame->f_back;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#endif /* USE_BPF_LOOP */
|
||||
|
||||
#ifdef GLOBAL_FUNC
|
||||
__noinline
|
||||
#elif defined(SUBPROGS)
|
||||
|
@ -228,11 +281,26 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx)
|
|||
int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym);
|
||||
if (symbol_counter == NULL)
|
||||
return 0;
|
||||
#ifdef USE_BPF_LOOP
|
||||
struct process_frame_ctx ctx = {
|
||||
.cur_cpu = cur_cpu,
|
||||
.symbol_counter = symbol_counter,
|
||||
.frame_ptr = frame_ptr,
|
||||
.frame = &frame,
|
||||
.pidData = pidData,
|
||||
.sym = &sym,
|
||||
.event = event,
|
||||
};
|
||||
|
||||
bpf_loop(STACK_MAX_LEN, process_frame_callback, &ctx, 0);
|
||||
if (ctx.done)
|
||||
return 0;
|
||||
#else
|
||||
#ifdef NO_UNROLL
|
||||
#pragma clang loop unroll(disable)
|
||||
#else
|
||||
#pragma clang loop unroll(full)
|
||||
#endif
|
||||
#endif /* NO_UNROLL */
|
||||
/* Unwind python stack */
|
||||
for (int i = 0; i < STACK_MAX_LEN; ++i) {
|
||||
if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) {
|
||||
|
@ -251,6 +319,7 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx)
|
|||
frame_ptr = frame.f_back;
|
||||
}
|
||||
}
|
||||
#endif /* USE_BPF_LOOP */
|
||||
event->stack_complete = frame_ptr == NULL;
|
||||
} else {
|
||||
event->stack_complete = 1;
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2021 Facebook */
|
||||
|
||||
#define STACK_MAX_LEN 600
|
||||
#define USE_BPF_LOOP
|
||||
#include "pyperf.h"
|
|
@ -445,6 +445,48 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
|
|||
return payload;
|
||||
}
|
||||
|
||||
#ifdef USE_BPF_LOOP
|
||||
enum read_type {
|
||||
READ_INT_VAR,
|
||||
READ_MAP_VAR,
|
||||
READ_STR_VAR,
|
||||
};
|
||||
|
||||
struct read_var_ctx {
|
||||
struct strobemeta_payload *data;
|
||||
void *tls_base;
|
||||
struct strobemeta_cfg *cfg;
|
||||
void *payload;
|
||||
/* value gets mutated */
|
||||
struct strobe_value_generic *value;
|
||||
enum read_type type;
|
||||
};
|
||||
|
||||
static int read_var_callback(__u32 index, struct read_var_ctx *ctx)
|
||||
{
|
||||
switch (ctx->type) {
|
||||
case READ_INT_VAR:
|
||||
if (index >= STROBE_MAX_INTS)
|
||||
return 1;
|
||||
read_int_var(ctx->cfg, index, ctx->tls_base, ctx->value, ctx->data);
|
||||
break;
|
||||
case READ_MAP_VAR:
|
||||
if (index >= STROBE_MAX_MAPS)
|
||||
return 1;
|
||||
ctx->payload = read_map_var(ctx->cfg, index, ctx->tls_base,
|
||||
ctx->value, ctx->data, ctx->payload);
|
||||
break;
|
||||
case READ_STR_VAR:
|
||||
if (index >= STROBE_MAX_STRS)
|
||||
return 1;
|
||||
ctx->payload += read_str_var(ctx->cfg, index, ctx->tls_base,
|
||||
ctx->value, ctx->data, ctx->payload);
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#endif /* USE_BPF_LOOP */
|
||||
|
||||
/*
|
||||
* read_strobe_meta returns NULL, if no metadata was read; otherwise returns
|
||||
* pointer to *right after* payload ends
|
||||
|
@ -475,11 +517,36 @@ static void *read_strobe_meta(struct task_struct *task,
|
|||
*/
|
||||
tls_base = (void *)task;
|
||||
|
||||
#ifdef USE_BPF_LOOP
|
||||
struct read_var_ctx ctx = {
|
||||
.cfg = cfg,
|
||||
.tls_base = tls_base,
|
||||
.value = &value,
|
||||
.data = data,
|
||||
.payload = payload,
|
||||
};
|
||||
int err;
|
||||
|
||||
ctx.type = READ_INT_VAR;
|
||||
err = bpf_loop(STROBE_MAX_INTS, read_var_callback, &ctx, 0);
|
||||
if (err != STROBE_MAX_INTS)
|
||||
return NULL;
|
||||
|
||||
ctx.type = READ_STR_VAR;
|
||||
err = bpf_loop(STROBE_MAX_STRS, read_var_callback, &ctx, 0);
|
||||
if (err != STROBE_MAX_STRS)
|
||||
return NULL;
|
||||
|
||||
ctx.type = READ_MAP_VAR;
|
||||
err = bpf_loop(STROBE_MAX_MAPS, read_var_callback, &ctx, 0);
|
||||
if (err != STROBE_MAX_MAPS)
|
||||
return NULL;
|
||||
#else
|
||||
#ifdef NO_UNROLL
|
||||
#pragma clang loop unroll(disable)
|
||||
#else
|
||||
#pragma unroll
|
||||
#endif
|
||||
#endif /* NO_UNROLL */
|
||||
for (int i = 0; i < STROBE_MAX_INTS; ++i) {
|
||||
read_int_var(cfg, i, tls_base, &value, data);
|
||||
}
|
||||
|
@ -487,7 +554,7 @@ static void *read_strobe_meta(struct task_struct *task,
|
|||
#pragma clang loop unroll(disable)
|
||||
#else
|
||||
#pragma unroll
|
||||
#endif
|
||||
#endif /* NO_UNROLL */
|
||||
for (int i = 0; i < STROBE_MAX_STRS; ++i) {
|
||||
payload += read_str_var(cfg, i, tls_base, &value, data, payload);
|
||||
}
|
||||
|
@ -495,10 +562,12 @@ static void *read_strobe_meta(struct task_struct *task,
|
|||
#pragma clang loop unroll(disable)
|
||||
#else
|
||||
#pragma unroll
|
||||
#endif
|
||||
#endif /* NO_UNROLL */
|
||||
for (int i = 0; i < STROBE_MAX_MAPS; ++i) {
|
||||
payload = read_map_var(cfg, i, tls_base, &value, data, payload);
|
||||
}
|
||||
#endif /* USE_BPF_LOOP */
|
||||
|
||||
/*
|
||||
* return pointer right after end of payload, so it's possible to
|
||||
* calculate exact amount of useful data that needs to be sent
|
||||
|
|
|
@ -0,0 +1,9 @@
|
|||
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
|
||||
/* Copyright (c) 2021 Facebook */
|
||||
|
||||
#define STROBE_MAX_INTS 2
|
||||
#define STROBE_MAX_STRS 25
|
||||
#define STROBE_MAX_MAPS 100
|
||||
#define STROBE_MAX_MAP_ENTRIES 20
|
||||
#define USE_BPF_LOOP
|
||||
#include "strobemeta.h"
|
Загрузка…
Ссылка в новой задаче