selftests/bpf: Add uprobe triggering overhead benchmarks
Add benchmark to measure overhead of uprobes and uretprobes. Also have a baseline (no uprobe attached) benchmark. On my dev machine, baseline benchmark can trigger 130M user_target() invocations. When uprobe is attached, this falls to just 700K. With uretprobe, we get down to 520K: $ sudo ./bench trig-uprobe-base -a Summary: hits 131.289 ± 2.872M/s # UPROBE $ sudo ./bench -a trig-uprobe-without-nop Summary: hits 0.729 ± 0.007M/s $ sudo ./bench -a trig-uprobe-with-nop Summary: hits 1.798 ± 0.017M/s # URETPROBE $ sudo ./bench -a trig-uretprobe-without-nop Summary: hits 0.508 ± 0.012M/s $ sudo ./bench -a trig-uretprobe-with-nop Summary: hits 0.883 ± 0.008M/s So there is almost 2.5x performance difference between probing nop vs non-nop instruction for entry uprobe. And 1.7x difference for uretprobe. This means that non-nop uprobe overhead is around 1.4 microseconds for uprobe and 2 microseconds for non-nop uretprobe. For nop variants, uprobe and uretprobe overhead is down to 0.556 and 1.13 microseconds, respectively. For comparison, just doing a very low-overhead syscall (with no BPF programs attached anywhere) gives: $ sudo ./bench trig-base -a Summary: hits 4.830 ± 0.036M/s So uprobes are about 2.67x slower than pure context switch. Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/bpf/20211116013041.4072571-1-andrii@kernel.org
This commit is contained in:
Родитель
ebf7f6f0a6
Коммит
d41bc48bfa
|
@ -533,7 +533,9 @@ $(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \
|
|||
$(OUTPUT)/bench_bloom_filter_map.o: $(OUTPUT)/bloom_filter_bench.skel.h
|
||||
$(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ)
|
||||
$(OUTPUT)/bench: LDLIBS += -lm
|
||||
$(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \
|
||||
$(OUTPUT)/bench: $(OUTPUT)/bench.o \
|
||||
$(OUTPUT)/testing_helpers.o \
|
||||
$(OUTPUT)/trace_helpers.o \
|
||||
$(OUTPUT)/bench_count.o \
|
||||
$(OUTPUT)/bench_rename.o \
|
||||
$(OUTPUT)/bench_trigger.o \
|
||||
|
|
|
@ -359,6 +359,11 @@ extern const struct bench bench_trig_kprobe;
|
|||
extern const struct bench bench_trig_fentry;
|
||||
extern const struct bench bench_trig_fentry_sleep;
|
||||
extern const struct bench bench_trig_fmodret;
|
||||
extern const struct bench bench_trig_uprobe_base;
|
||||
extern const struct bench bench_trig_uprobe_with_nop;
|
||||
extern const struct bench bench_trig_uretprobe_with_nop;
|
||||
extern const struct bench bench_trig_uprobe_without_nop;
|
||||
extern const struct bench bench_trig_uretprobe_without_nop;
|
||||
extern const struct bench bench_rb_libbpf;
|
||||
extern const struct bench bench_rb_custom;
|
||||
extern const struct bench bench_pb_libbpf;
|
||||
|
@ -385,6 +390,11 @@ static const struct bench *benchs[] = {
|
|||
&bench_trig_fentry,
|
||||
&bench_trig_fentry_sleep,
|
||||
&bench_trig_fmodret,
|
||||
&bench_trig_uprobe_base,
|
||||
&bench_trig_uprobe_with_nop,
|
||||
&bench_trig_uretprobe_with_nop,
|
||||
&bench_trig_uprobe_without_nop,
|
||||
&bench_trig_uretprobe_without_nop,
|
||||
&bench_rb_libbpf,
|
||||
&bench_rb_custom,
|
||||
&bench_pb_libbpf,
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
/* Copyright (c) 2020 Facebook */
|
||||
#include "bench.h"
|
||||
#include "trigger_bench.skel.h"
|
||||
#include "trace_helpers.h"
|
||||
|
||||
/* BPF triggering benchmarks */
|
||||
static struct trigger_ctx {
|
||||
|
@ -107,6 +108,101 @@ static void *trigger_consumer(void *input)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
/* make sure call is not inlined and not avoided by compiler, so __weak and
|
||||
* inline asm volatile in the body of the function
|
||||
*
|
||||
* There is a performance difference between uprobing at nop location vs other
|
||||
* instructions. So use two different targets, one of which starts with nop
|
||||
* and another doesn't.
|
||||
*
|
||||
* GCC doesn't generate stack setup preample for these functions due to them
|
||||
* having no input arguments and doing nothing in the body.
|
||||
*/
|
||||
__weak void uprobe_target_with_nop(void)
|
||||
{
|
||||
asm volatile ("nop");
|
||||
}
|
||||
|
||||
__weak void uprobe_target_without_nop(void)
|
||||
{
|
||||
asm volatile ("");
|
||||
}
|
||||
|
||||
static void *uprobe_base_producer(void *input)
|
||||
{
|
||||
while (true) {
|
||||
uprobe_target_with_nop();
|
||||
atomic_inc(&base_hits.value);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void *uprobe_producer_with_nop(void *input)
|
||||
{
|
||||
while (true)
|
||||
uprobe_target_with_nop();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void *uprobe_producer_without_nop(void *input)
|
||||
{
|
||||
while (true)
|
||||
uprobe_target_without_nop();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void usetup(bool use_retprobe, bool use_nop)
|
||||
{
|
||||
size_t uprobe_offset;
|
||||
ssize_t base_addr;
|
||||
struct bpf_link *link;
|
||||
|
||||
setup_libbpf();
|
||||
|
||||
ctx.skel = trigger_bench__open_and_load();
|
||||
if (!ctx.skel) {
|
||||
fprintf(stderr, "failed to open skeleton\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
base_addr = get_base_addr();
|
||||
if (use_nop)
|
||||
uprobe_offset = get_uprobe_offset(&uprobe_target_with_nop, base_addr);
|
||||
else
|
||||
uprobe_offset = get_uprobe_offset(&uprobe_target_without_nop, base_addr);
|
||||
|
||||
link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe,
|
||||
use_retprobe,
|
||||
-1 /* all PIDs */,
|
||||
"/proc/self/exe",
|
||||
uprobe_offset);
|
||||
if (!link) {
|
||||
fprintf(stderr, "failed to attach uprobe!\n");
|
||||
exit(1);
|
||||
}
|
||||
ctx.skel->links.bench_trigger_uprobe = link;
|
||||
}
|
||||
|
||||
static void uprobe_setup_with_nop()
|
||||
{
|
||||
usetup(false, true);
|
||||
}
|
||||
|
||||
static void uretprobe_setup_with_nop()
|
||||
{
|
||||
usetup(true, true);
|
||||
}
|
||||
|
||||
static void uprobe_setup_without_nop()
|
||||
{
|
||||
usetup(false, false);
|
||||
}
|
||||
|
||||
static void uretprobe_setup_without_nop()
|
||||
{
|
||||
usetup(true, false);
|
||||
}
|
||||
|
||||
const struct bench bench_trig_base = {
|
||||
.name = "trig-base",
|
||||
.validate = trigger_validate,
|
||||
|
@ -182,3 +278,53 @@ const struct bench bench_trig_fmodret = {
|
|||
.report_progress = hits_drops_report_progress,
|
||||
.report_final = hits_drops_report_final,
|
||||
};
|
||||
|
||||
const struct bench bench_trig_uprobe_base = {
|
||||
.name = "trig-uprobe-base",
|
||||
.setup = NULL, /* no uprobe/uretprobe is attached */
|
||||
.producer_thread = uprobe_base_producer,
|
||||
.consumer_thread = trigger_consumer,
|
||||
.measure = trigger_base_measure,
|
||||
.report_progress = hits_drops_report_progress,
|
||||
.report_final = hits_drops_report_final,
|
||||
};
|
||||
|
||||
const struct bench bench_trig_uprobe_with_nop = {
|
||||
.name = "trig-uprobe-with-nop",
|
||||
.setup = uprobe_setup_with_nop,
|
||||
.producer_thread = uprobe_producer_with_nop,
|
||||
.consumer_thread = trigger_consumer,
|
||||
.measure = trigger_measure,
|
||||
.report_progress = hits_drops_report_progress,
|
||||
.report_final = hits_drops_report_final,
|
||||
};
|
||||
|
||||
const struct bench bench_trig_uretprobe_with_nop = {
|
||||
.name = "trig-uretprobe-with-nop",
|
||||
.setup = uretprobe_setup_with_nop,
|
||||
.producer_thread = uprobe_producer_with_nop,
|
||||
.consumer_thread = trigger_consumer,
|
||||
.measure = trigger_measure,
|
||||
.report_progress = hits_drops_report_progress,
|
||||
.report_final = hits_drops_report_final,
|
||||
};
|
||||
|
||||
const struct bench bench_trig_uprobe_without_nop = {
|
||||
.name = "trig-uprobe-without-nop",
|
||||
.setup = uprobe_setup_without_nop,
|
||||
.producer_thread = uprobe_producer_without_nop,
|
||||
.consumer_thread = trigger_consumer,
|
||||
.measure = trigger_measure,
|
||||
.report_progress = hits_drops_report_progress,
|
||||
.report_final = hits_drops_report_final,
|
||||
};
|
||||
|
||||
const struct bench bench_trig_uretprobe_without_nop = {
|
||||
.name = "trig-uretprobe-without-nop",
|
||||
.setup = uretprobe_setup_without_nop,
|
||||
.producer_thread = uprobe_producer_without_nop,
|
||||
.consumer_thread = trigger_consumer,
|
||||
.measure = trigger_measure,
|
||||
.report_progress = hits_drops_report_progress,
|
||||
.report_final = hits_drops_report_final,
|
||||
};
|
||||
|
|
|
@ -52,3 +52,10 @@ int bench_trigger_fmodret(void *ctx)
|
|||
__sync_add_and_fetch(&hits, 1);
|
||||
return -22;
|
||||
}
|
||||
|
||||
SEC("uprobe/self/uprobe_target")
|
||||
int bench_trigger_uprobe(void *ctx)
|
||||
{
|
||||
__sync_add_and_fetch(&hits, 1);
|
||||
return 0;
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче