diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 7f0e225bf630..e82b7039fc66 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -299,7 +299,8 @@ struct bpf_prog_aux { u32 max_pkt_offset; u32 stack_depth; u32 id; - u32 func_cnt; + u32 func_cnt; /* used by non-func prog as the number of func progs */ + u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */ bool offload_requested; struct bpf_prog **func; void *jit_data; /* JIT specific data. arch dependent */ @@ -317,7 +318,8 @@ struct bpf_prog_aux { #endif struct bpf_prog_offload *offload; struct btf *btf; - u32 type_id; /* type id for this prog/func */ + struct bpf_func_info *func_info; + u32 func_info_cnt; union { struct work_struct work; struct rcu_head rcu; diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 204382f46fd8..11f5df1092d9 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -204,7 +204,6 @@ static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log) struct bpf_subprog_info { u32 start; /* insn idx of function entry point */ u16 stack_depth; /* max. stack depth used by this function */ - u32 type_id; /* btf type_id for this subprog */ }; /* single container for all structs diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 23e2031a43d4..597afdbc1ab9 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2268,6 +2268,19 @@ union bpf_attr { * * Return * 0 on success, or a negative error in case of failure. + * + * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 pop, u64 flags) + * Description + * Will remove *pop* bytes from a *msg* starting at byte *start*. + * This may result in **ENOMEM** errors under certain situations if + * an allocation and copy are required due to a full ring buffer. + * However, the helper will try to avoid doing the allocation + * if possible. Other errors can occur if input parameters are + * invalid either due to *start* byte not being valid part of msg + * payload and/or *pop* value being to large. + * + * Return + * 0 on success, or a negative erro in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2360,7 +2373,8 @@ union bpf_attr { FN(map_push_elem), \ FN(map_pop_elem), \ FN(map_peek_elem), \ - FN(msg_push_data), + FN(msg_push_data), \ + FN(msg_pop_data), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 539e8575689f..f93ed667546f 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -411,7 +411,8 @@ static void bpf_get_prog_name(const struct bpf_prog *prog, char *sym) /* prog->aux->name will be ignored if full btf name is available */ if (prog->aux->btf) { - type = btf_type_by_id(prog->aux->btf, prog->aux->type_id); + type = btf_type_by_id(prog->aux->btf, + prog->aux->func_info[prog->aux->func_idx].type_id); func_name = btf_name_by_offset(prog->aux->btf, type->name_off); snprintf(sym, (size_t)(end - sym), "_%s", func_name); return; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 998377808102..85cbeec06e50 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1214,6 +1214,7 @@ static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) bpf_prog_free_id(prog, do_idr_lock); bpf_prog_kallsyms_del_all(prog); btf_put(prog->aux->btf); + kvfree(prog->aux->func_info); call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); } @@ -2219,46 +2220,28 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, } if (prog->aux->btf) { + u32 krec_size = sizeof(struct bpf_func_info); u32 ucnt, urec_size; info.btf_id = btf_id(prog->aux->btf); ucnt = info.func_info_cnt; - info.func_info_cnt = prog->aux->func_cnt ? : 1; + info.func_info_cnt = prog->aux->func_info_cnt; urec_size = info.func_info_rec_size; - info.func_info_rec_size = sizeof(struct bpf_func_info); + info.func_info_rec_size = krec_size; if (ucnt) { /* expect passed-in urec_size is what the kernel expects */ if (urec_size != info.func_info_rec_size) return -EINVAL; if (bpf_dump_raw_ok()) { - struct bpf_func_info kern_finfo; char __user *user_finfo; - u32 i, insn_offset; user_finfo = u64_to_user_ptr(info.func_info); - if (prog->aux->func_cnt) { - ucnt = min_t(u32, info.func_info_cnt, ucnt); - insn_offset = 0; - for (i = 0; i < ucnt; i++) { - kern_finfo.insn_offset = insn_offset; - kern_finfo.type_id = prog->aux->func[i]->aux->type_id; - if (copy_to_user(user_finfo, &kern_finfo, - sizeof(kern_finfo))) - return -EFAULT; - - /* func[i]->len holds the prog len */ - insn_offset += prog->aux->func[i]->len; - user_finfo += urec_size; - } - } else { - kern_finfo.insn_offset = 0; - kern_finfo.type_id = prog->aux->type_id; - if (copy_to_user(user_finfo, &kern_finfo, - sizeof(kern_finfo))) - return -EFAULT; - } + ucnt = min_t(u32, info.func_info_cnt, ucnt); + if (copy_to_user(user_finfo, prog->aux->func_info, + krec_size * ucnt)) + return -EFAULT; } else { info.func_info_cnt = 0; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 4ce049cd30a3..9584438fa2cc 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4650,7 +4650,7 @@ static int check_btf_func(struct bpf_prog *prog, struct bpf_verifier_env *env, { u32 i, nfuncs, urec_size, min_size, prev_offset; u32 krec_size = sizeof(struct bpf_func_info); - struct bpf_func_info krecord = {}; + struct bpf_func_info *krecord = NULL; const struct btf_type *type; void __user *urecord; struct btf *btf; @@ -4682,6 +4682,12 @@ static int check_btf_func(struct bpf_prog *prog, struct bpf_verifier_env *env, urecord = u64_to_user_ptr(attr->func_info); min_size = min_t(u32, krec_size, urec_size); + krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN); + if (!krecord) { + ret = -ENOMEM; + goto free_btf; + } + for (i = 0; i < nfuncs; i++) { ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size); if (ret) { @@ -4696,59 +4702,69 @@ static int check_btf_func(struct bpf_prog *prog, struct bpf_verifier_env *env, goto free_btf; } - if (copy_from_user(&krecord, urecord, min_size)) { + if (copy_from_user(&krecord[i], urecord, min_size)) { ret = -EFAULT; goto free_btf; } /* check insn_offset */ if (i == 0) { - if (krecord.insn_offset) { + if (krecord[i].insn_offset) { verbose(env, "nonzero insn_offset %u for the first func info record", - krecord.insn_offset); + krecord[i].insn_offset); ret = -EINVAL; goto free_btf; } - } else if (krecord.insn_offset <= prev_offset) { + } else if (krecord[i].insn_offset <= prev_offset) { verbose(env, "same or smaller insn offset (%u) than previous func info record (%u)", - krecord.insn_offset, prev_offset); + krecord[i].insn_offset, prev_offset); ret = -EINVAL; goto free_btf; } - if (env->subprog_info[i].start != krecord.insn_offset) { + if (env->subprog_info[i].start != krecord[i].insn_offset) { verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n"); ret = -EINVAL; goto free_btf; } /* check type_id */ - type = btf_type_by_id(btf, krecord.type_id); + type = btf_type_by_id(btf, krecord[i].type_id); if (!type || BTF_INFO_KIND(type->info) != BTF_KIND_FUNC) { verbose(env, "invalid type id %d in func info", - krecord.type_id); + krecord[i].type_id); ret = -EINVAL; goto free_btf; } - if (i == 0) - prog->aux->type_id = krecord.type_id; - env->subprog_info[i].type_id = krecord.type_id; - - prev_offset = krecord.insn_offset; + prev_offset = krecord[i].insn_offset; urecord += urec_size; } prog->aux->btf = btf; + prog->aux->func_info = krecord; + prog->aux->func_info_cnt = nfuncs; return 0; free_btf: btf_put(btf); + kvfree(krecord); return ret; } +static void adjust_btf_func(struct bpf_verifier_env *env) +{ + int i; + + if (!env->prog->aux->func_info) + return; + + for (i = 0; i < env->subprog_cnt; i++) + env->prog->aux->func_info[i].insn_offset = env->subprog_info[i].start; +} + /* check %cur's range satisfies %old's */ static bool range_within(struct bpf_reg_state *old, struct bpf_reg_state *cur) @@ -6043,15 +6059,17 @@ static int jit_subprogs(struct bpf_verifier_env *env) if (bpf_prog_calc_tag(func[i])) goto out_free; func[i]->is_func = 1; + func[i]->aux->func_idx = i; + /* the btf and func_info will be freed only at prog->aux */ + func[i]->aux->btf = prog->aux->btf; + func[i]->aux->func_info = prog->aux->func_info; + /* Use bpf_prog_F_tag to indicate functions in stack traces. * Long term would need debug info to populate names */ func[i]->aux->name[0] = 'F'; func[i]->aux->stack_depth = env->subprog_info[i].stack_depth; func[i]->jit_requested = 1; - /* the btf will be freed only at prog->aux */ - func[i]->aux->btf = prog->aux->btf; - func[i]->aux->type_id = env->subprog_info[i].type_id; func[i] = bpf_int_jit_compile(func[i]); if (!func[i]->jited) { err = -ENOTSUPP; @@ -6572,6 +6590,9 @@ skip_full_check: convert_pseudo_ld_imm64(env); } + if (ret == 0) + adjust_btf_func(env); + err_release_maps: if (!env->prog->aux->used_maps) /* if we didn't copy map pointers into bpf_prog_info, release diff --git a/net/core/filter.c b/net/core/filter.c index aa274679965d..bd0df75dc7b6 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -463,7 +463,8 @@ static bool convert_bpf_ld_abs(struct sock_filter *fp, struct bpf_insn **insnp) bool ldx_off_ok = offset <= S16_MAX; *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_H); - *insn++ = BPF_ALU64_IMM(BPF_SUB, BPF_REG_TMP, offset); + if (offset) + *insn++ = BPF_ALU64_IMM(BPF_SUB, BPF_REG_TMP, offset); *insn++ = BPF_JMP_IMM(BPF_JSLT, BPF_REG_TMP, size, 2 + endian + (!ldx_off_ok * 2)); if (ldx_off_ok) { @@ -2424,6 +2425,174 @@ static const struct bpf_func_proto bpf_msg_push_data_proto = { .arg4_type = ARG_ANYTHING, }; +static void sk_msg_shift_left(struct sk_msg *msg, int i) +{ + int prev; + + do { + prev = i; + sk_msg_iter_var_next(i); + msg->sg.data[prev] = msg->sg.data[i]; + } while (i != msg->sg.end); + + sk_msg_iter_prev(msg, end); +} + +static void sk_msg_shift_right(struct sk_msg *msg, int i) +{ + struct scatterlist tmp, sge; + + sk_msg_iter_next(msg, end); + sge = sk_msg_elem_cpy(msg, i); + sk_msg_iter_var_next(i); + tmp = sk_msg_elem_cpy(msg, i); + + while (i != msg->sg.end) { + msg->sg.data[i] = sge; + sk_msg_iter_var_next(i); + sge = tmp; + tmp = sk_msg_elem_cpy(msg, i); + } +} + +BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, + u32, len, u64, flags) +{ + u32 i = 0, l, space, offset = 0; + u64 last = start + len; + int pop; + + if (unlikely(flags)) + return -EINVAL; + + /* First find the starting scatterlist element */ + i = msg->sg.start; + do { + l = sk_msg_elem(msg, i)->length; + + if (start < offset + l) + break; + offset += l; + sk_msg_iter_var_next(i); + } while (i != msg->sg.end); + + /* Bounds checks: start and pop must be inside message */ + if (start >= offset + l || last >= msg->sg.size) + return -EINVAL; + + space = MAX_MSG_FRAGS - sk_msg_elem_used(msg); + + pop = len; + /* --------------| offset + * -| start |-------- len -------| + * + * |----- a ----|-------- pop -------|----- b ----| + * |______________________________________________| length + * + * + * a: region at front of scatter element to save + * b: region at back of scatter element to save when length > A + pop + * pop: region to pop from element, same as input 'pop' here will be + * decremented below per iteration. + * + * Two top-level cases to handle when start != offset, first B is non + * zero and second B is zero corresponding to when a pop includes more + * than one element. + * + * Then if B is non-zero AND there is no space allocate space and + * compact A, B regions into page. If there is space shift ring to + * the rigth free'ing the next element in ring to place B, leaving + * A untouched except to reduce length. + */ + if (start != offset) { + struct scatterlist *nsge, *sge = sk_msg_elem(msg, i); + int a = start; + int b = sge->length - pop - a; + + sk_msg_iter_var_next(i); + + if (pop < sge->length - a) { + if (space) { + sge->length = a; + sk_msg_shift_right(msg, i); + nsge = sk_msg_elem(msg, i); + get_page(sg_page(sge)); + sg_set_page(nsge, + sg_page(sge), + b, sge->offset + pop + a); + } else { + struct page *page, *orig; + u8 *to, *from; + + page = alloc_pages(__GFP_NOWARN | + __GFP_COMP | GFP_ATOMIC, + get_order(a + b)); + if (unlikely(!page)) + return -ENOMEM; + + sge->length = a; + orig = sg_page(sge); + from = sg_virt(sge); + to = page_address(page); + memcpy(to, from, a); + memcpy(to + a, from + a + pop, b); + sg_set_page(sge, page, a + b, 0); + put_page(orig); + } + pop = 0; + } else if (pop >= sge->length - a) { + sge->length = a; + pop -= (sge->length - a); + } + } + + /* From above the current layout _must_ be as follows, + * + * -| offset + * -| start + * + * |---- pop ---|---------------- b ------------| + * |____________________________________________| length + * + * Offset and start of the current msg elem are equal because in the + * previous case we handled offset != start and either consumed the + * entire element and advanced to the next element OR pop == 0. + * + * Two cases to handle here are first pop is less than the length + * leaving some remainder b above. Simply adjust the element's layout + * in this case. Or pop >= length of the element so that b = 0. In this + * case advance to next element decrementing pop. + */ + while (pop) { + struct scatterlist *sge = sk_msg_elem(msg, i); + + if (pop < sge->length) { + sge->length -= pop; + sge->offset += pop; + pop = 0; + } else { + pop -= sge->length; + sk_msg_shift_left(msg, i); + } + sk_msg_iter_var_next(i); + } + + sk_mem_uncharge(msg->sk, len - pop); + msg->sg.size -= (len - pop); + sk_msg_compute_data_pointers(msg); + return 0; +} + +static const struct bpf_func_proto bpf_msg_pop_data_proto = { + .func = bpf_msg_pop_data, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_ANYTHING, +}; + BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb) { return task_get_classid(skb); @@ -5097,6 +5266,7 @@ bool bpf_helper_changes_pkt_data(void *func) func == bpf_xdp_adjust_meta || func == bpf_msg_pull_data || func == bpf_msg_push_data || + func == bpf_msg_pop_data || func == bpf_xdp_adjust_tail || #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF) func == bpf_lwt_seg6_store_bytes || @@ -5393,6 +5563,8 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_msg_pull_data_proto; case BPF_FUNC_msg_push_data: return &bpf_msg_push_data_proto; + case BPF_FUNC_msg_pop_data: + return &bpf_msg_pop_data_proto; default: return bpf_base_func_proto(func_id); } diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 3b45fe530f91..a47c1cdf90fc 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -289,12 +289,23 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, { bool cork = false, enospc = msg->sg.start == msg->sg.end; struct sock *sk_redir; - u32 tosend; + u32 tosend, delta = 0; int ret; more_data: - if (psock->eval == __SK_NONE) + if (psock->eval == __SK_NONE) { + /* Track delta in msg size to add/subtract it on SK_DROP from + * returned to user copied size. This ensures user doesn't + * get a positive return code with msg_cut_data and SK_DROP + * verdict. + */ + delta = msg->sg.size; psock->eval = sk_psock_msg_verdict(sk, psock, msg); + if (msg->sg.size < delta) + delta -= msg->sg.size; + else + delta = 0; + } if (msg->cork_bytes && msg->cork_bytes > msg->sg.size && !enospc) { @@ -350,7 +361,7 @@ more_data: default: sk_msg_free_partial(sk, msg, tosend); sk_msg_apply_bytes(psock, tosend); - *copied -= tosend; + *copied -= (tosend + delta); return -EACCES; } diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 7b1af8b59cd2..d4ecc66464e6 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -687,6 +687,7 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk, struct sock *sk_redir; struct tls_rec *rec; int err = 0, send; + u32 delta = 0; bool enospc; psock = sk_psock_get(sk); @@ -694,8 +695,14 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk, return tls_push_record(sk, flags, record_type); more_data: enospc = sk_msg_full(msg); - if (psock->eval == __SK_NONE) + if (psock->eval == __SK_NONE) { + delta = msg->sg.size; psock->eval = sk_psock_msg_verdict(sk, psock, msg); + if (delta < msg->sg.size) + delta -= msg->sg.size; + else + delta = 0; + } if (msg->cork_bytes && msg->cork_bytes > msg->sg.size && !enospc && !full_record) { err = -ENOSPC; @@ -743,7 +750,7 @@ more_data: msg->apply_bytes -= send; if (msg->sg.size == 0) tls_free_open_rec(sk); - *copied -= send; + *copied -= (send + delta); err = -EACCES; } diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index cbd3080e72c7..3850f8d65703 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -713,7 +713,7 @@ static int do_dump(int argc, char **argv) prev_key = NULL; - err = btf_get_from_id(info.btf_id, &btf); + err = btf__get_from_id(info.btf_id, &btf); if (err) { p_err("failed to get btf"); goto exit_free; @@ -857,7 +857,7 @@ static int do_lookup(int argc, char **argv) } /* here means bpf_map_lookup_elem() succeeded */ - err = btf_get_from_id(info.btf_id, &btf); + err = btf__get_from_id(info.btf_id, &btf); if (err) { p_err("failed to get btf"); goto exit_free; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index cb18429818ec..4e04699a2350 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -621,7 +621,7 @@ static int do_dump(int argc, char **argv) goto err_free; } - if (info.btf_id && btf_get_from_id(info.btf_id, &btf)) { + if (info.btf_id && btf__get_from_id(info.btf_id, &btf)) { p_err("failed to get btf"); goto err_free; } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 23e2031a43d4..597afdbc1ab9 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2268,6 +2268,19 @@ union bpf_attr { * * Return * 0 on success, or a negative error in case of failure. + * + * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 pop, u64 flags) + * Description + * Will remove *pop* bytes from a *msg* starting at byte *start*. + * This may result in **ENOMEM** errors under certain situations if + * an allocation and copy are required due to a full ring buffer. + * However, the helper will try to avoid doing the allocation + * if possible. Other errors can occur if input parameters are + * invalid either due to *start* byte not being valid part of msg + * payload and/or *pop* value being to large. + * + * Return + * 0 on success, or a negative erro in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2360,7 +2373,8 @@ union bpf_attr { FN(map_push_elem), \ FN(map_pop_elem), \ FN(map_peek_elem), \ - FN(msg_push_data), + FN(msg_push_data), \ + FN(msg_pop_data), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 1b4a683a00fc..34d9c3619c96 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -145,6 +145,12 @@ include $(srctree)/tools/build/Makefile.include BPF_IN := $(OUTPUT)libbpf-in.o LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE)) +VERSION_SCRIPT := libbpf.map + +GLOBAL_SYM_COUNT = $(shell readelf -s $(BPF_IN) | \ + awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {s++} END{print s}') +VERSIONED_SYM_COUNT = $(shell readelf -s $(OUTPUT)libbpf.so | \ + grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l) CMD_TARGETS = $(LIB_FILE) @@ -158,7 +164,7 @@ TARGETS = $(CMD_TARGETS) all: fixdep all_cmd -all_cmd: $(CMD_TARGETS) +all_cmd: $(CMD_TARGETS) check $(BPF_IN): force elfdep bpfdep @(test -f ../../include/uapi/linux/bpf.h -a -f ../../../include/uapi/linux/bpf.h && ( \ @@ -176,7 +182,8 @@ $(BPF_IN): force elfdep bpfdep $(Q)$(MAKE) $(build)=libbpf $(OUTPUT)libbpf.so: $(BPF_IN) - $(QUIET_LINK)$(CC) --shared $^ -o $@ + $(QUIET_LINK)$(CC) --shared -Wl,--version-script=$(VERSION_SCRIPT) \ + $^ -o $@ $(OUTPUT)libbpf.a: $(BPF_IN) $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^ @@ -184,6 +191,18 @@ $(OUTPUT)libbpf.a: $(BPF_IN) $(OUTPUT)test_libbpf: test_libbpf.cpp $(OUTPUT)libbpf.a $(QUIET_LINK)$(CXX) $^ -lelf -o $@ +check: check_abi + +check_abi: $(OUTPUT)libbpf.so + @if [ "$(GLOBAL_SYM_COUNT)" != "$(VERSIONED_SYM_COUNT)" ]; then \ + echo "Warning: Num of global symbols in $(BPF_IN)" \ + "($(GLOBAL_SYM_COUNT)) does NOT match with num of" \ + "versioned symbols in $^ ($(VERSIONED_SYM_COUNT))." \ + "Please make sure all LIBBPF_API symbols are" \ + "versioned in $(VERSION_SCRIPT)." >&2; \ + exit 1; \ + fi + define do_install if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ diff --git a/tools/lib/bpf/README.rst b/tools/lib/bpf/README.rst new file mode 100644 index 000000000000..2ced9e061c4b --- /dev/null +++ b/tools/lib/bpf/README.rst @@ -0,0 +1,139 @@ +.. SPDX-License-Identifier: GPL-2.0 + +libbpf API naming convention +============================ + +libbpf API provides access to a few logically separated groups of +functions and types. Every group has its own naming convention +described here. It's recommended to follow these conventions whenever a +new function or type is added to keep libbpf API clean and consistent. + +All types and functions provided by libbpf API should have one of the +following prefixes: ``bpf_``, ``btf_``, ``libbpf_``. + +System call wrappers +-------------------- + +System call wrappers are simple wrappers for commands supported by +sys_bpf system call. These wrappers should go to ``bpf.h`` header file +and map one-on-one to corresponding commands. + +For example ``bpf_map_lookup_elem`` wraps ``BPF_MAP_LOOKUP_ELEM`` +command of sys_bpf, ``bpf_prog_attach`` wraps ``BPF_PROG_ATTACH``, etc. + +Objects +------- + +Another class of types and functions provided by libbpf API is "objects" +and functions to work with them. Objects are high-level abstractions +such as BPF program or BPF map. They're represented by corresponding +structures such as ``struct bpf_object``, ``struct bpf_program``, +``struct bpf_map``, etc. + +Structures are forward declared and access to their fields should be +provided via corresponding getters and setters rather than directly. + +These objects are associated with corresponding parts of ELF object that +contains compiled BPF programs. + +For example ``struct bpf_object`` represents ELF object itself created +from an ELF file or from a buffer, ``struct bpf_program`` represents a +program in ELF object and ``struct bpf_map`` is a map. + +Functions that work with an object have names built from object name, +double underscore and part that describes function purpose. + +For example ``bpf_object__open`` consists of the name of corresponding +object, ``bpf_object``, double underscore and ``open`` that defines the +purpose of the function to open ELF file and create ``bpf_object`` from +it. + +Another example: ``bpf_program__load`` is named for corresponding +object, ``bpf_program``, that is separated from other part of the name +by double underscore. + +All objects and corresponding functions other than BTF related should go +to ``libbpf.h``. BTF types and functions should go to ``btf.h``. + +Auxiliary functions +------------------- + +Auxiliary functions and types that don't fit well in any of categories +described above should have ``libbpf_`` prefix, e.g. +``libbpf_get_error`` or ``libbpf_prog_type_by_name``. + +libbpf ABI +========== + +libbpf can be both linked statically or used as DSO. To avoid possible +conflicts with other libraries an application is linked with, all +non-static libbpf symbols should have one of the prefixes mentioned in +API documentation above. See API naming convention to choose the right +name for a new symbol. + +Symbol visibility +----------------- + +libbpf follow the model when all global symbols have visibility "hidden" +by default and to make a symbol visible it has to be explicitly +attributed with ``LIBBPF_API`` macro. For example: + +.. code-block:: c + + LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id); + +This prevents from accidentally exporting a symbol, that is not supposed +to be a part of ABI what, in turn, improves both libbpf developer- and +user-experiences. + +ABI versionning +--------------- + +To make future ABI extensions possible libbpf ABI is versioned. +Versioning is implemented by ``libbpf.map`` version script that is +passed to linker. + +Version name is ``LIBBPF_`` prefix + three-component numeric version, +starting from ``0.0.1``. + +Every time ABI is being changed, e.g. because a new symbol is added or +semantic of existing symbol is changed, ABI version should be bumped. + +For example, if current state of ``libbpf.map`` is: + +.. code-block:: + LIBBPF_0.0.1 { + global: + bpf_func_a; + bpf_func_b; + local: + \*; + }; + +, and a new symbol ``bpf_func_c`` is being introduced, then +``libbpf.map`` should be changed like this: + +.. code-block:: + LIBBPF_0.0.1 { + global: + bpf_func_a; + bpf_func_b; + local: + \*; + }; + LIBBPF_0.0.2 { + global: + bpf_func_c; + } LIBBPF_0.0.1; + +, where new version ``LIBBPF_0.0.2`` depends on the previous +``LIBBPF_0.0.1``. + +Format of version script and ways to handle ABI changes, including +incompatible ones, described in details in [1]. + +Links +===== + +[1] https://www.akkadia.org/drepper/dsohowto.pdf + (Chapter 3. Maintaining APIs and ABIs). diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 13ddc4bd24ee..c2d641f3e16e 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -415,7 +415,7 @@ const char *btf__name_by_offset(const struct btf *btf, __u32 offset) return NULL; } -int btf_get_from_id(__u32 id, struct btf **btf) +int btf__get_from_id(__u32 id, struct btf **btf) { struct bpf_btf_info btf_info = { 0 }; __u32 len = sizeof(btf_info); @@ -466,7 +466,7 @@ int btf_get_from_id(__u32 id, struct btf **btf) goto exit_free; } - *btf = btf__new((__u8 *)btf_info.btf, btf_info.btf_size, NULL); + *btf = btf__new((__u8 *)(long)btf_info.btf, btf_info.btf_size, NULL); if (IS_ERR(*btf)) { err = PTR_ERR(*btf); *btf = NULL; diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 701ad2b6c41f..5336b2f37293 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -73,7 +73,7 @@ LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id); LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id); LIBBPF_API int btf__fd(const struct btf *btf); LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); -LIBBPF_API int btf_get_from_id(__u32 id, struct btf **btf); +LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf); struct btf_ext *btf_ext__new(__u8 *data, __u32 size, btf_print_fn_t err_log); void btf_ext__free(struct btf_ext *btf_ext); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index ed4212a4c5f9..59b748ebd15f 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -9,7 +9,9 @@ * Copyright (C) 2017 Nicira, Inc. */ +#ifndef _GNU_SOURCE #define _GNU_SOURCE +#endif #include #include #include diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map new file mode 100644 index 000000000000..4fb29f6d7a80 --- /dev/null +++ b/tools/lib/bpf/libbpf.map @@ -0,0 +1,121 @@ +LIBBPF_0.0.1 { + global: + bpf_btf_get_fd_by_id; + bpf_create_map; + bpf_create_map_in_map; + bpf_create_map_in_map_node; + bpf_create_map_name; + bpf_create_map_node; + bpf_create_map_xattr; + bpf_load_btf; + bpf_load_program; + bpf_load_program_xattr; + bpf_map__btf_key_type_id; + bpf_map__btf_value_type_id; + bpf_map__def; + bpf_map__fd; + bpf_map__is_offload_neutral; + bpf_map__name; + bpf_map__next; + bpf_map__pin; + bpf_map__prev; + bpf_map__priv; + bpf_map__reuse_fd; + bpf_map__set_ifindex; + bpf_map__set_inner_map_fd; + bpf_map__set_priv; + bpf_map__unpin; + bpf_map_delete_elem; + bpf_map_get_fd_by_id; + bpf_map_get_next_id; + bpf_map_get_next_key; + bpf_map_lookup_and_delete_elem; + bpf_map_lookup_elem; + bpf_map_update_elem; + bpf_obj_get; + bpf_obj_get_info_by_fd; + bpf_obj_pin; + bpf_object__btf_fd; + bpf_object__close; + bpf_object__find_map_by_name; + bpf_object__find_map_by_offset; + bpf_object__find_program_by_title; + bpf_object__kversion; + bpf_object__load; + bpf_object__name; + bpf_object__next; + bpf_object__open; + bpf_object__open_buffer; + bpf_object__open_xattr; + bpf_object__pin; + bpf_object__pin_maps; + bpf_object__pin_programs; + bpf_object__priv; + bpf_object__set_priv; + bpf_object__unload; + bpf_object__unpin_maps; + bpf_object__unpin_programs; + bpf_perf_event_read_simple; + bpf_prog_attach; + bpf_prog_detach; + bpf_prog_detach2; + bpf_prog_get_fd_by_id; + bpf_prog_get_next_id; + bpf_prog_load; + bpf_prog_load_xattr; + bpf_prog_query; + bpf_prog_test_run; + bpf_program__fd; + bpf_program__is_kprobe; + bpf_program__is_perf_event; + bpf_program__is_raw_tracepoint; + bpf_program__is_sched_act; + bpf_program__is_sched_cls; + bpf_program__is_socket_filter; + bpf_program__is_tracepoint; + bpf_program__is_xdp; + bpf_program__load; + bpf_program__next; + bpf_program__nth_fd; + bpf_program__pin; + bpf_program__pin_instance; + bpf_program__prev; + bpf_program__priv; + bpf_program__set_expected_attach_type; + bpf_program__set_ifindex; + bpf_program__set_kprobe; + bpf_program__set_perf_event; + bpf_program__set_prep; + bpf_program__set_priv; + bpf_program__set_raw_tracepoint; + bpf_program__set_sched_act; + bpf_program__set_sched_cls; + bpf_program__set_socket_filter; + bpf_program__set_tracepoint; + bpf_program__set_type; + bpf_program__set_xdp; + bpf_program__title; + bpf_program__unload; + bpf_program__unpin; + bpf_program__unpin_instance; + bpf_raw_tracepoint_open; + bpf_set_link_xdp_fd; + bpf_task_fd_query; + bpf_verify_program; + btf__fd; + btf__find_by_name; + btf__free; + btf__get_from_id; + btf__name_by_offset; + btf__new; + btf__resolve_size; + btf__resolve_type; + btf__type_by_id; + libbpf_attach_type_by_name; + libbpf_get_error; + libbpf_prog_type_by_name; + libbpf_set_print; + libbpf_strerror; + local: + *; +}; diff --git a/tools/lib/bpf/libbpf_errno.c b/tools/lib/bpf/libbpf_errno.c index d83b17f8435c..4343e40588c6 100644 --- a/tools/lib/bpf/libbpf_errno.c +++ b/tools/lib/bpf/libbpf_errno.c @@ -7,6 +7,7 @@ * Copyright (C) 2017 Nicira, Inc. */ +#undef _GNU_SOURCE #include #include diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index 686e57ce40f4..7b69519a09b1 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -113,6 +113,8 @@ static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) = (void *) BPF_FUNC_msg_pull_data; static int (*bpf_msg_push_data)(void *ctx, int start, int end, int flags) = (void *) BPF_FUNC_msg_push_data; +static int (*bpf_msg_pop_data)(void *ctx, int start, int cut, int flags) = + (void *) BPF_FUNC_msg_pop_data; static int (*bpf_bind)(void *ctx, void *addr, int addr_len) = (void *) BPF_FUNC_bind; static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) = diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 7f90d3645af8..37f947ec44ed 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -22,3 +22,4 @@ CONFIG_NET_CLS_FLOWER=m CONFIG_LWTUNNEL=y CONFIG_BPF_STREAM_PARSER=y CONFIG_XDP_SOCKETS=y +CONFIG_FTRACE_SYSCALLS=y diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index bcbda7037840..bae7308b7ec5 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -29,7 +29,6 @@ static uint32_t pass_cnt; static uint32_t error_cnt; static uint32_t skip_cnt; -static bool jit_enabled; #define CHECK(condition, format...) ({ \ int __ret = !!(condition); \ @@ -65,24 +64,6 @@ static int __base_pr(const char *format, ...) return err; } -static bool is_jit_enabled(void) -{ - const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable"; - bool enabled = false; - int sysctl_fd; - - sysctl_fd = open(jit_sysctl, 0, O_RDONLY); - if (sysctl_fd != -1) { - char tmpc; - - if (read(sysctl_fd, &tmpc, sizeof(tmpc)) == 1) - enabled = (tmpc != '0'); - close(sysctl_fd); - } - - return enabled; -} - #define BTF_INFO_ENC(kind, root, vlen) \ ((!!(root) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN)) @@ -2547,8 +2528,8 @@ static int do_test_file(unsigned int test_num) test->btf_kv_notfound)) goto done; - if (!jit_enabled || !has_btf_ext) - goto skip_jit; + if (!has_btf_ext) + goto skip; /* get necessary program info */ info_len = sizeof(struct bpf_prog_info); @@ -2604,7 +2585,7 @@ static int do_test_file(unsigned int test_num) goto done; } - err = btf_get_from_id(info.btf_id, &btf); + err = btf__get_from_id(info.btf_id, &btf); if (CHECK(err, "cannot get btf from kernel, err: %d", err)) goto done; @@ -2636,7 +2617,7 @@ static int do_test_file(unsigned int test_num) finfo = (void *)finfo + rec_size; } -skip_jit: +skip: fprintf(stderr, "OK"); done: @@ -3270,12 +3251,6 @@ static int do_test_func_type(int test_num) err = -1; goto done; } - if (!jit_enabled) { - skip_cnt++; - fprintf(stderr, "SKIPPED, please enable sysctl bpf_jit_enable\n"); - err = 0; - goto done; - } /* get necessary lens */ info_len = sizeof(struct bpf_prog_info); @@ -3452,8 +3427,6 @@ int main(int argc, char **argv) if (args.always_log) libbpf_set_print(__base_pr, __base_pr, __base_pr); - jit_enabled = is_jit_enabled(); - if (args.raw_test) err |= test_raw(); diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index c1e688f61061..1c57abbe0945 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -524,7 +524,7 @@ static void test_bpf_obj_id(void) load_time < now - 60 || load_time > now + 60 || prog_infos[i].created_by_uid != my_uid || prog_infos[i].nr_map_ids != 1 || - *(int *)prog_infos[i].map_ids != map_infos[i].id || + *(int *)(long)prog_infos[i].map_ids != map_infos[i].id || strcmp((char *)prog_infos[i].name, expected_prog_name), "get-prog-info(fd)", "err %d errno %d i %d type %d(%d) info_len %u(%Zu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n", @@ -539,7 +539,7 @@ static void test_bpf_obj_id(void) load_time, now, prog_infos[i].created_by_uid, my_uid, prog_infos[i].nr_map_ids, 1, - *(int *)prog_infos[i].map_ids, map_infos[i].id, + *(int *)(long)prog_infos[i].map_ids, map_infos[i].id, prog_infos[i].name, expected_prog_name)) goto done; } @@ -585,7 +585,7 @@ static void test_bpf_obj_id(void) bzero(&prog_info, sizeof(prog_info)); info_len = sizeof(prog_info); - saved_map_id = *(int *)(prog_infos[i].map_ids); + saved_map_id = *(int *)((long)prog_infos[i].map_ids); prog_info.map_ids = prog_infos[i].map_ids; prog_info.nr_map_ids = 2; err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len); @@ -593,12 +593,12 @@ static void test_bpf_obj_id(void) prog_infos[i].xlated_prog_insns = 0; CHECK(err || info_len != sizeof(struct bpf_prog_info) || memcmp(&prog_info, &prog_infos[i], info_len) || - *(int *)prog_info.map_ids != saved_map_id, + *(int *)(long)prog_info.map_ids != saved_map_id, "get-prog-info(next_id->fd)", "err %d errno %d info_len %u(%Zu) memcmp %d map_id %u(%u)\n", err, errno, info_len, sizeof(struct bpf_prog_info), memcmp(&prog_info, &prog_infos[i], info_len), - *(int *)prog_info.map_ids, saved_map_id); + *(int *)(long)prog_info.map_ids, saved_map_id); close(prog_fd); } CHECK(nr_id_found != nr_iters, diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 622ade0a0957..e85a771f607b 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -79,6 +79,8 @@ int txmsg_start; int txmsg_end; int txmsg_start_push; int txmsg_end_push; +int txmsg_start_pop; +int txmsg_pop; int txmsg_ingress; int txmsg_skb; int ktls; @@ -104,6 +106,8 @@ static const struct option long_options[] = { {"txmsg_end", required_argument, NULL, 'e'}, {"txmsg_start_push", required_argument, NULL, 'p'}, {"txmsg_end_push", required_argument, NULL, 'q'}, + {"txmsg_start_pop", required_argument, NULL, 'w'}, + {"txmsg_pop", required_argument, NULL, 'x'}, {"txmsg_ingress", no_argument, &txmsg_ingress, 1 }, {"txmsg_skb", no_argument, &txmsg_skb, 1 }, {"ktls", no_argument, &ktls, 1 }, @@ -473,13 +477,27 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, clock_gettime(CLOCK_MONOTONIC, &s->end); } else { int slct, recvp = 0, recv, max_fd = fd; + float total_bytes, txmsg_pop_total; int fd_flags = O_NONBLOCK; struct timeval timeout; - float total_bytes; fd_set w; fcntl(fd, fd_flags); + /* Account for pop bytes noting each iteration of apply will + * call msg_pop_data helper so we need to account for this + * by calculating the number of apply iterations. Note user + * of the tool can create cases where no data is sent by + * manipulating pop/push/pull/etc. For example txmsg_apply 1 + * with txmsg_pop 1 will try to apply 1B at a time but each + * iteration will then pop 1B so no data will ever be sent. + * This is really only useful for testing edge cases in code + * paths. + */ total_bytes = (float)iov_count * (float)iov_length * (float)cnt; + txmsg_pop_total = txmsg_pop; + if (txmsg_apply) + txmsg_pop_total *= (total_bytes / txmsg_apply); + total_bytes -= txmsg_pop_total; err = clock_gettime(CLOCK_MONOTONIC, &s->start); if (err < 0) perror("recv start time: "); @@ -488,7 +506,7 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, timeout.tv_sec = 0; timeout.tv_usec = 300000; } else { - timeout.tv_sec = 1; + timeout.tv_sec = 3; timeout.tv_usec = 0; } @@ -503,7 +521,7 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, goto out_errno; } else if (!slct) { if (opt->verbose) - fprintf(stderr, "unexpected timeout\n"); + fprintf(stderr, "unexpected timeout: recved %zu/%f pop_total %f\n", s->bytes_recvd, total_bytes, txmsg_pop_total); errno = -EIO; clock_gettime(CLOCK_MONOTONIC, &s->end); goto out_errno; @@ -619,7 +637,7 @@ static int sendmsg_test(struct sockmap_options *opt) iov_count = 1; err = msg_loop(rx_fd, iov_count, iov_buf, cnt, &s, false, opt); - if (err && opt->verbose) + if (opt->verbose) fprintf(stderr, "msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n", iov_count, iov_buf, cnt, err); @@ -931,6 +949,39 @@ run: } } + if (txmsg_start_pop) { + i = 4; + err = bpf_map_update_elem(map_fd[5], + &i, &txmsg_start_pop, BPF_ANY); + if (err) { + fprintf(stderr, + "ERROR: bpf_map_update_elem %i@%i (txmsg_start_pop): %d (%s)\n", + txmsg_start_pop, i, err, strerror(errno)); + goto out; + } + } else { + i = 4; + bpf_map_update_elem(map_fd[5], + &i, &txmsg_start_pop, BPF_ANY); + } + + if (txmsg_pop) { + i = 5; + err = bpf_map_update_elem(map_fd[5], + &i, &txmsg_pop, BPF_ANY); + if (err) { + fprintf(stderr, + "ERROR: bpf_map_update_elem %i@%i (txmsg_pop): %d (%s)\n", + txmsg_pop, i, err, strerror(errno)); + goto out; + } + } else { + i = 5; + bpf_map_update_elem(map_fd[5], + &i, &txmsg_pop, BPF_ANY); + + } + if (txmsg_ingress) { int in = BPF_F_INGRESS; @@ -1082,6 +1133,11 @@ static void test_options(char *options) snprintf(tstr, OPTSTRING, "end %d,", txmsg_end); strncat(options, tstr, OPTSTRING); } + if (txmsg_start_pop) { + snprintf(tstr, OPTSTRING, "pop (%d,%d),", + txmsg_start_pop, txmsg_start_pop + txmsg_pop); + strncat(options, tstr, OPTSTRING); + } if (txmsg_ingress) strncat(options, "ingress,", OPTSTRING); if (txmsg_skb) @@ -1264,6 +1320,7 @@ static int test_mixed(int cgrp) txmsg_apply = txmsg_cork = 0; txmsg_start = txmsg_end = 0; txmsg_start_push = txmsg_end_push = 0; + txmsg_start_pop = txmsg_pop = 0; /* Test small and large iov_count values with pass/redir/apply/cork */ txmsg_pass = 1; @@ -1383,6 +1440,19 @@ static int test_start_end(int cgrp) txmsg_end = 2; txmsg_start_push = 1; txmsg_end_push = 2; + txmsg_start_pop = 1; + txmsg_pop = 1; + err = test_txmsg(cgrp); + if (err) + goto out; + + /* Cut a byte of pushed data but leave reamining in place */ + txmsg_start = 1; + txmsg_end = 2; + txmsg_start_push = 1; + txmsg_end_push = 3; + txmsg_start_pop = 1; + txmsg_pop = 1; err = test_txmsg(cgrp); if (err) goto out; @@ -1393,6 +1463,9 @@ static int test_start_end(int cgrp) opt.iov_length = 100; txmsg_cork = 1600; + txmsg_start_pop = 0; + txmsg_pop = 0; + for (i = 99; i <= 1600; i += 500) { txmsg_start = 0; txmsg_end = i; @@ -1403,6 +1476,17 @@ static int test_start_end(int cgrp) goto out; } + /* Test pop data in middle of cork */ + for (i = 99; i <= 1600; i += 500) { + txmsg_start_pop = 10; + txmsg_pop = i; + err = test_exec(cgrp, &opt); + if (err) + goto out; + } + txmsg_start_pop = 0; + txmsg_pop = 0; + /* Test start/end with cork but pull data in middle */ for (i = 199; i <= 1600; i += 500) { txmsg_start = 100; @@ -1423,6 +1507,15 @@ static int test_start_end(int cgrp) if (err) goto out; + /* Test pop with cork pulling last sg entry */ + txmsg_start_pop = 1500; + txmsg_pop = 1600; + err = test_exec(cgrp, &opt); + if (err) + goto out; + txmsg_start_pop = 0; + txmsg_pop = 0; + /* Test start/end pull of single byte in last page */ txmsg_start = 1111; txmsg_end = 1112; @@ -1432,6 +1525,13 @@ static int test_start_end(int cgrp) if (err) goto out; + /* Test pop of single byte in last page */ + txmsg_start_pop = 1111; + txmsg_pop = 1112; + err = test_exec(cgrp, &opt); + if (err) + goto out; + /* Test start/end with end < start */ txmsg_start = 1111; txmsg_end = 0; @@ -1456,7 +1556,20 @@ static int test_start_end(int cgrp) txmsg_start_push = 1601; txmsg_end_push = 1600; err = test_exec(cgrp, &opt); + if (err) + goto out; + /* Test pop with start > data */ + txmsg_start_pop = 1601; + txmsg_pop = 1; + err = test_exec(cgrp, &opt); + if (err) + goto out; + + /* Test pop with pop range > data */ + txmsg_start_pop = 1599; + txmsg_pop = 10; + err = test_exec(cgrp, &opt); out: txmsg_start = 0; txmsg_end = 0; @@ -1641,6 +1754,12 @@ int main(int argc, char **argv) case 'q': txmsg_end_push = atoi(optarg); break; + case 'w': + txmsg_start_pop = atoi(optarg); + break; + case 'x': + txmsg_pop = atoi(optarg); + break; case 'a': txmsg_apply = atoi(optarg); break; diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.h b/tools/testing/selftests/bpf/test_sockmap_kern.h index 14b8bbac004f..e7639f66a941 100644 --- a/tools/testing/selftests/bpf/test_sockmap_kern.h +++ b/tools/testing/selftests/bpf/test_sockmap_kern.h @@ -74,7 +74,7 @@ struct bpf_map_def SEC("maps") sock_bytes = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), - .max_entries = 4 + .max_entries = 6 }; struct bpf_map_def SEC("maps") sock_redir_flags = { @@ -181,8 +181,8 @@ int bpf_sockmap(struct bpf_sock_ops *skops) SEC("sk_msg1") int bpf_prog4(struct sk_msg_md *msg) { - int *bytes, zero = 0, one = 1, two = 2, three = 3; - int *start, *end, *start_push, *end_push; + int *bytes, zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5; + int *start, *end, *start_push, *end_push, *start_pop, *pop; bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); if (bytes) @@ -198,15 +198,19 @@ int bpf_prog4(struct sk_msg_md *msg) end_push = bpf_map_lookup_elem(&sock_bytes, &three); if (start_push && end_push) bpf_msg_push_data(msg, *start_push, *end_push, 0); + start_pop = bpf_map_lookup_elem(&sock_bytes, &four); + pop = bpf_map_lookup_elem(&sock_bytes, &five); + if (start_pop && pop) + bpf_msg_pop_data(msg, *start_pop, *pop, 0); return SK_PASS; } SEC("sk_msg2") int bpf_prog5(struct sk_msg_md *msg) { - int zero = 0, one = 1, two = 2, three = 3; - int *start, *end, *start_push, *end_push; - int *bytes, len1, len2 = 0, len3; + int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5; + int *start, *end, *start_push, *end_push, *start_pop, *pop; + int *bytes, len1, len2 = 0, len3, len4; int err1 = -1, err2 = -1; bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); @@ -247,6 +251,20 @@ int bpf_prog5(struct sk_msg_md *msg) bpf_printk("sk_msg2: length push_update %i->%i\n", len2 ? len2 : len1, len3); } + start_pop = bpf_map_lookup_elem(&sock_bytes, &four); + pop = bpf_map_lookup_elem(&sock_bytes, &five); + if (start_pop && pop) { + int err; + + bpf_printk("sk_msg2: pop(%i@%i)\n", + start_pop, pop); + err = bpf_msg_pop_data(msg, *start_pop, *pop, 0); + if (err) + bpf_printk("sk_msg2: pop_data err %i\n", err); + len4 = (__u64)msg->data_end - (__u64)msg->data; + bpf_printk("sk_msg2: length pop_data %i->%i\n", + len1 ? len1 : 0, len4); + } bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n", len1, err1, err2); @@ -256,8 +274,8 @@ int bpf_prog5(struct sk_msg_md *msg) SEC("sk_msg3") int bpf_prog6(struct sk_msg_md *msg) { - int *bytes, *start, *end, *start_push, *end_push, *f; - int zero = 0, one = 1, two = 2, three = 3, key = 0; + int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5, key = 0; + int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop, *f; __u64 flags = 0; bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); @@ -277,6 +295,11 @@ int bpf_prog6(struct sk_msg_md *msg) if (start_push && end_push) bpf_msg_push_data(msg, *start_push, *end_push, 0); + start_pop = bpf_map_lookup_elem(&sock_bytes, &four); + pop = bpf_map_lookup_elem(&sock_bytes, &five); + if (start_pop && pop) + bpf_msg_pop_data(msg, *start_pop, *pop, 0); + f = bpf_map_lookup_elem(&sock_redir_flags, &zero); if (f && *f) { key = 2; @@ -292,8 +315,9 @@ int bpf_prog6(struct sk_msg_md *msg) SEC("sk_msg4") int bpf_prog7(struct sk_msg_md *msg) { - int zero = 0, one = 1, two = 2, three = 3, len1, len2 = 0, len3; - int *bytes, *start, *end, *start_push, *end_push, *f; + int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop, *f; + int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5; + int len1, len2 = 0, len3, len4; int err1 = 0, err2 = 0, key = 0; __u64 flags = 0; @@ -335,6 +359,22 @@ int bpf_prog7(struct sk_msg_md *msg) len2 ? len2 : len1, len3); } + start_pop = bpf_map_lookup_elem(&sock_bytes, &four); + pop = bpf_map_lookup_elem(&sock_bytes, &five); + if (start_pop && pop) { + int err; + + bpf_printk("sk_msg4: pop(%i@%i)\n", + start_pop, pop); + err = bpf_msg_pop_data(msg, *start_pop, *pop, 0); + if (err) + bpf_printk("sk_msg4: pop_data err %i\n", err); + len4 = (__u64)msg->data_end - (__u64)msg->data; + bpf_printk("sk_msg4: length pop_data %i->%i\n", + len1 ? len1 : 0, len4); + } + + f = bpf_map_lookup_elem(&sock_redir_flags, &zero); if (f && *f) { key = 2; @@ -389,8 +429,8 @@ int bpf_prog9(struct sk_msg_md *msg) SEC("sk_msg7") int bpf_prog10(struct sk_msg_md *msg) { - int *bytes, *start, *end, *start_push, *end_push; - int zero = 0, one = 1, two = 2, three = 3; + int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop; + int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5; bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); if (bytes) @@ -406,7 +446,11 @@ int bpf_prog10(struct sk_msg_md *msg) end_push = bpf_map_lookup_elem(&sock_bytes, &three); if (start_push && end_push) bpf_msg_push_data(msg, *start_push, *end_push, 0); - + start_pop = bpf_map_lookup_elem(&sock_bytes, &four); + pop = bpf_map_lookup_elem(&sock_bytes, &five); + if (start_pop && pop) + bpf_msg_pop_data(msg, *start_pop, *pop, 0); + bpf_printk("return sk drop\n"); return SK_DROP; }