2018-04-19 01:55:57 +03:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0 */
|
|
|
|
/* Copyright (c) 2018 Facebook */
|
|
|
|
|
|
|
|
#include <uapi/linux/btf.h>
|
2019-11-14 21:57:15 +03:00
|
|
|
#include <uapi/linux/bpf.h>
|
|
|
|
#include <uapi/linux/bpf_perf_event.h>
|
2018-04-19 01:55:57 +03:00
|
|
|
#include <uapi/linux/types.h>
|
2018-04-19 01:56:00 +03:00
|
|
|
#include <linux/seq_file.h>
|
2018-04-19 01:55:57 +03:00
|
|
|
#include <linux/compiler.h>
|
2018-11-20 02:29:08 +03:00
|
|
|
#include <linux/ctype.h>
|
2018-04-19 01:55:57 +03:00
|
|
|
#include <linux/errno.h>
|
|
|
|
#include <linux/slab.h>
|
2018-04-19 01:56:01 +03:00
|
|
|
#include <linux/anon_inodes.h>
|
|
|
|
#include <linux/file.h>
|
2018-04-19 01:55:57 +03:00
|
|
|
#include <linux/uaccess.h>
|
|
|
|
#include <linux/kernel.h>
|
2018-05-05 00:49:51 +03:00
|
|
|
#include <linux/idr.h>
|
2018-05-23 00:57:18 +03:00
|
|
|
#include <linux/sort.h>
|
2018-04-19 01:55:57 +03:00
|
|
|
#include <linux/bpf_verifier.h>
|
|
|
|
#include <linux/btf.h>
|
2020-07-12 00:53:26 +03:00
|
|
|
#include <linux/btf_ids.h>
|
2019-11-14 21:57:15 +03:00
|
|
|
#include <linux/skmsg.h>
|
|
|
|
#include <linux/perf_event.h>
|
2020-08-25 22:21:19 +03:00
|
|
|
#include <linux/bsearch.h>
|
2020-11-10 04:19:31 +03:00
|
|
|
#include <linux/kobject.h>
|
|
|
|
#include <linux/sysfs.h>
|
2019-11-14 21:57:15 +03:00
|
|
|
#include <net/sock.h>
|
2021-12-01 21:10:31 +03:00
|
|
|
#include "../tools/lib/bpf/relo_core.h"
|
2018-04-19 01:55:57 +03:00
|
|
|
|
|
|
|
/* BTF (BPF Type Format) is the meta data format which describes
|
|
|
|
* the data types of BPF program/map. Hence, it basically focus
|
|
|
|
* on the C programming language which the modern BPF is primary
|
|
|
|
* using.
|
|
|
|
*
|
|
|
|
* ELF Section:
|
|
|
|
* ~~~~~~~~~~~
|
|
|
|
* The BTF data is stored under the ".BTF" ELF section
|
|
|
|
*
|
|
|
|
* struct btf_type:
|
|
|
|
* ~~~~~~~~~~~~~~~
|
|
|
|
* Each 'struct btf_type' object describes a C data type.
|
|
|
|
* Depending on the type it is describing, a 'struct btf_type'
|
|
|
|
* object may be followed by more data. F.e.
|
|
|
|
* To describe an array, 'struct btf_type' is followed by
|
|
|
|
* 'struct btf_array'.
|
|
|
|
*
|
|
|
|
* 'struct btf_type' and any extra data following it are
|
|
|
|
* 4 bytes aligned.
|
|
|
|
*
|
|
|
|
* Type section:
|
|
|
|
* ~~~~~~~~~~~~~
|
|
|
|
* The BTF type section contains a list of 'struct btf_type' objects.
|
|
|
|
* Each one describes a C type. Recall from the above section
|
|
|
|
* that a 'struct btf_type' object could be immediately followed by extra
|
2021-05-25 05:56:59 +03:00
|
|
|
* data in order to describe some particular C types.
|
2018-04-19 01:55:57 +03:00
|
|
|
*
|
|
|
|
* type_id:
|
|
|
|
* ~~~~~~~
|
|
|
|
* Each btf_type object is identified by a type_id. The type_id
|
|
|
|
* is implicitly implied by the location of the btf_type object in
|
|
|
|
* the BTF type section. The first one has type_id 1. The second
|
|
|
|
* one has type_id 2...etc. Hence, an earlier btf_type has
|
|
|
|
* a smaller type_id.
|
|
|
|
*
|
|
|
|
* A btf_type object may refer to another btf_type object by using
|
|
|
|
* type_id (i.e. the "type" in the "struct btf_type").
|
|
|
|
*
|
|
|
|
* NOTE that we cannot assume any reference-order.
|
|
|
|
* A btf_type object can refer to an earlier btf_type object
|
|
|
|
* but it can also refer to a later btf_type object.
|
|
|
|
*
|
|
|
|
* For example, to describe "const void *". A btf_type
|
|
|
|
* object describing "const" may refer to another btf_type
|
|
|
|
* object describing "void *". This type-reference is done
|
|
|
|
* by specifying type_id:
|
|
|
|
*
|
|
|
|
* [1] CONST (anon) type_id=2
|
|
|
|
* [2] PTR (anon) type_id=0
|
|
|
|
*
|
|
|
|
* The above is the btf_verifier debug log:
|
|
|
|
* - Each line started with "[?]" is a btf_type object
|
|
|
|
* - [?] is the type_id of the btf_type object.
|
|
|
|
* - CONST/PTR is the BTF_KIND_XXX
|
|
|
|
* - "(anon)" is the name of the type. It just
|
|
|
|
* happens that CONST and PTR has no name.
|
|
|
|
* - type_id=XXX is the 'u32 type' in btf_type
|
|
|
|
*
|
|
|
|
* NOTE: "void" has type_id 0
|
|
|
|
*
|
|
|
|
* String section:
|
|
|
|
* ~~~~~~~~~~~~~~
|
|
|
|
* The BTF string section contains the names used by the type section.
|
|
|
|
* Each string is referred by an "offset" from the beginning of the
|
|
|
|
* string section.
|
|
|
|
*
|
|
|
|
* Each string is '\0' terminated.
|
|
|
|
*
|
|
|
|
* The first character in the string section must be '\0'
|
|
|
|
* which is used to mean 'anonymous'. Some btf_type may not
|
|
|
|
* have a name.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* BTF verification:
|
|
|
|
*
|
|
|
|
* To verify BTF data, two passes are needed.
|
|
|
|
*
|
|
|
|
* Pass #1
|
|
|
|
* ~~~~~~~
|
|
|
|
* The first pass is to collect all btf_type objects to
|
|
|
|
* an array: "btf->types".
|
|
|
|
*
|
|
|
|
* Depending on the C type that a btf_type is describing,
|
|
|
|
* a btf_type may be followed by extra data. We don't know
|
|
|
|
* how many btf_type is there, and more importantly we don't
|
|
|
|
* know where each btf_type is located in the type section.
|
|
|
|
*
|
|
|
|
* Without knowing the location of each type_id, most verifications
|
|
|
|
* cannot be done. e.g. an earlier btf_type may refer to a later
|
|
|
|
* btf_type (recall the "const void *" above), so we cannot
|
|
|
|
* check this type-reference in the first pass.
|
|
|
|
*
|
|
|
|
* In the first pass, it still does some verifications (e.g.
|
|
|
|
* checking the name is a valid offset to the string section).
|
2018-04-19 01:55:58 +03:00
|
|
|
*
|
|
|
|
* Pass #2
|
|
|
|
* ~~~~~~~
|
|
|
|
* The main focus is to resolve a btf_type that is referring
|
|
|
|
* to another type.
|
|
|
|
*
|
|
|
|
* We have to ensure the referring type:
|
|
|
|
* 1) does exist in the BTF (i.e. in btf->types[])
|
|
|
|
* 2) does not cause a loop:
|
|
|
|
* struct A {
|
|
|
|
* struct B b;
|
|
|
|
* };
|
|
|
|
*
|
|
|
|
* struct B {
|
|
|
|
* struct A a;
|
|
|
|
* };
|
|
|
|
*
|
|
|
|
* btf_type_needs_resolve() decides if a btf_type needs
|
|
|
|
* to be resolved.
|
|
|
|
*
|
|
|
|
* The needs_resolve type implements the "resolve()" ops which
|
|
|
|
* essentially does a DFS and detects backedge.
|
|
|
|
*
|
|
|
|
* During resolve (or DFS), different C types have different
|
|
|
|
* "RESOLVED" conditions.
|
|
|
|
*
|
|
|
|
* When resolving a BTF_KIND_STRUCT, we need to resolve all its
|
|
|
|
* members because a member is always referring to another
|
|
|
|
* type. A struct's member can be treated as "RESOLVED" if
|
|
|
|
* it is referring to a BTF_KIND_PTR. Otherwise, the
|
|
|
|
* following valid C struct would be rejected:
|
|
|
|
*
|
|
|
|
* struct A {
|
|
|
|
* int m;
|
|
|
|
* struct A *a;
|
|
|
|
* };
|
|
|
|
*
|
|
|
|
* When resolving a BTF_KIND_PTR, it needs to keep resolving if
|
|
|
|
* it is referring to another BTF_KIND_PTR. Otherwise, we cannot
|
|
|
|
* detect a pointer loop, e.g.:
|
|
|
|
* BTF_KIND_CONST -> BTF_KIND_PTR -> BTF_KIND_CONST -> BTF_KIND_PTR +
|
|
|
|
* ^ |
|
|
|
|
* +-----------------------------------------+
|
|
|
|
*
|
2018-04-19 01:55:57 +03:00
|
|
|
*/
|
|
|
|
|
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-16 04:07:47 +03:00
|
|
|
#define BITS_PER_U128 (sizeof(u64) * BITS_PER_BYTE * 2)
|
2018-04-19 01:55:57 +03:00
|
|
|
#define BITS_PER_BYTE_MASK (BITS_PER_BYTE - 1)
|
|
|
|
#define BITS_PER_BYTE_MASKED(bits) ((bits) & BITS_PER_BYTE_MASK)
|
|
|
|
#define BITS_ROUNDDOWN_BYTES(bits) ((bits) >> 3)
|
|
|
|
#define BITS_ROUNDUP_BYTES(bits) \
|
|
|
|
(BITS_ROUNDDOWN_BYTES(bits) + !!BITS_PER_BYTE_MASKED(bits))
|
|
|
|
|
2021-02-26 23:22:52 +03:00
|
|
|
#define BTF_INFO_MASK 0x9f00ffff
|
2018-05-23 00:57:20 +03:00
|
|
|
#define BTF_INT_MASK 0x0fffffff
|
|
|
|
#define BTF_TYPE_ID_VALID(type_id) ((type_id) <= BTF_MAX_TYPE)
|
|
|
|
#define BTF_STR_OFFSET_VALID(name_off) ((name_off) <= BTF_MAX_NAME_OFFSET)
|
|
|
|
|
2018-04-19 01:55:57 +03:00
|
|
|
/* 16MB for 64k structs and each has 16 members and
|
|
|
|
* a few MB spaces for the string section.
|
|
|
|
* The hard limit is S32_MAX.
|
|
|
|
*/
|
|
|
|
#define BTF_MAX_SIZE (16 * 1024 * 1024)
|
|
|
|
|
2018-04-19 01:55:58 +03:00
|
|
|
#define for_each_member_from(i, from, struct_type, member) \
|
|
|
|
for (i = from, member = btf_type_member(struct_type) + from; \
|
|
|
|
i < btf_type_vlen(struct_type); \
|
|
|
|
i++, member++)
|
|
|
|
|
2019-04-10 00:20:09 +03:00
|
|
|
#define for_each_vsi_from(i, from, struct_type, member) \
|
|
|
|
for (i = from, member = btf_type_var_secinfo(struct_type) + from; \
|
|
|
|
i < btf_type_vlen(struct_type); \
|
|
|
|
i++, member++)
|
|
|
|
|
2019-08-20 12:31:50 +03:00
|
|
|
DEFINE_IDR(btf_idr);
|
|
|
|
DEFINE_SPINLOCK(btf_idr_lock);
|
2018-05-05 00:49:51 +03:00
|
|
|
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 19:39:45 +03:00
|
|
|
enum btf_kfunc_hook {
|
|
|
|
BTF_KFUNC_HOOK_XDP,
|
|
|
|
BTF_KFUNC_HOOK_TC,
|
|
|
|
BTF_KFUNC_HOOK_STRUCT_OPS,
|
|
|
|
BTF_KFUNC_HOOK_MAX,
|
|
|
|
};
|
|
|
|
|
|
|
|
enum {
|
|
|
|
BTF_KFUNC_SET_MAX_CNT = 32,
|
|
|
|
};
|
|
|
|
|
|
|
|
struct btf_kfunc_set_tab {
|
|
|
|
struct btf_id_set *sets[BTF_KFUNC_HOOK_MAX][BTF_KFUNC_TYPE_MAX];
|
|
|
|
};
|
|
|
|
|
2018-04-19 01:55:57 +03:00
|
|
|
struct btf {
|
2018-05-23 00:57:18 +03:00
|
|
|
void *data;
|
2018-04-19 01:55:57 +03:00
|
|
|
struct btf_type **types;
|
2018-04-19 01:55:58 +03:00
|
|
|
u32 *resolved_ids;
|
|
|
|
u32 *resolved_sizes;
|
2018-04-19 01:55:57 +03:00
|
|
|
const char *strings;
|
|
|
|
void *nohdr_data;
|
2018-05-23 00:57:18 +03:00
|
|
|
struct btf_header hdr;
|
2020-11-10 04:19:28 +03:00
|
|
|
u32 nr_types; /* includes VOID for base BTF */
|
2018-04-19 01:55:57 +03:00
|
|
|
u32 types_size;
|
|
|
|
u32 data_size;
|
2018-04-19 01:56:01 +03:00
|
|
|
refcount_t refcnt;
|
2018-05-05 00:49:51 +03:00
|
|
|
u32 id;
|
|
|
|
struct rcu_head rcu;
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 19:39:45 +03:00
|
|
|
struct btf_kfunc_set_tab *kfunc_set_tab;
|
2020-11-10 04:19:28 +03:00
|
|
|
|
|
|
|
/* split BTF support */
|
|
|
|
struct btf *base_btf;
|
|
|
|
u32 start_id; /* first type ID in this BTF (0 for base BTF) */
|
|
|
|
u32 start_str_off; /* first string offset (0 for base BTF) */
|
2020-11-10 04:19:29 +03:00
|
|
|
char name[MODULE_NAME_LEN];
|
|
|
|
bool kernel_btf;
|
2018-04-19 01:55:57 +03:00
|
|
|
};
|
|
|
|
|
2018-04-19 01:55:58 +03:00
|
|
|
enum verifier_phase {
|
|
|
|
CHECK_META,
|
|
|
|
CHECK_TYPE,
|
|
|
|
};
|
|
|
|
|
|
|
|
struct resolve_vertex {
|
|
|
|
const struct btf_type *t;
|
|
|
|
u32 type_id;
|
|
|
|
u16 next_member;
|
|
|
|
};
|
|
|
|
|
|
|
|
enum visit_state {
|
|
|
|
NOT_VISITED,
|
|
|
|
VISITED,
|
|
|
|
RESOLVED,
|
|
|
|
};
|
|
|
|
|
|
|
|
enum resolve_mode {
|
|
|
|
RESOLVE_TBD, /* To Be Determined */
|
|
|
|
RESOLVE_PTR, /* Resolving for Pointer */
|
|
|
|
RESOLVE_STRUCT_OR_ARRAY, /* Resolving for struct/union
|
|
|
|
* or array
|
|
|
|
*/
|
|
|
|
};
|
|
|
|
|
|
|
|
#define MAX_RESOLVE_DEPTH 32
|
|
|
|
|
2018-05-23 00:57:18 +03:00
|
|
|
struct btf_sec_info {
|
|
|
|
u32 off;
|
|
|
|
u32 len;
|
|
|
|
};
|
|
|
|
|
2018-04-19 01:55:57 +03:00
|
|
|
struct btf_verifier_env {
|
|
|
|
struct btf *btf;
|
2018-04-19 01:55:58 +03:00
|
|
|
u8 *visit_states;
|
|
|
|
struct resolve_vertex stack[MAX_RESOLVE_DEPTH];
|
2018-04-19 01:55:57 +03:00
|
|
|
struct bpf_verifier_log log;
|
|
|
|
u32 log_type_id;
|
2018-04-19 01:55:58 +03:00
|
|
|
u32 top_stack;
|
|
|
|
enum verifier_phase phase;
|
|
|
|
enum resolve_mode resolve_mode;
|
2018-04-19 01:55:57 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
static const char * const btf_kind_str[NR_BTF_KINDS] = {
|
|
|
|
[BTF_KIND_UNKN] = "UNKNOWN",
|
|
|
|
[BTF_KIND_INT] = "INT",
|
|
|
|
[BTF_KIND_PTR] = "PTR",
|
|
|
|
[BTF_KIND_ARRAY] = "ARRAY",
|
|
|
|
[BTF_KIND_STRUCT] = "STRUCT",
|
|
|
|
[BTF_KIND_UNION] = "UNION",
|
|
|
|
[BTF_KIND_ENUM] = "ENUM",
|
|
|
|
[BTF_KIND_FWD] = "FWD",
|
|
|
|
[BTF_KIND_TYPEDEF] = "TYPEDEF",
|
|
|
|
[BTF_KIND_VOLATILE] = "VOLATILE",
|
|
|
|
[BTF_KIND_CONST] = "CONST",
|
|
|
|
[BTF_KIND_RESTRICT] = "RESTRICT",
|
2018-11-20 02:29:08 +03:00
|
|
|
[BTF_KIND_FUNC] = "FUNC",
|
|
|
|
[BTF_KIND_FUNC_PROTO] = "FUNC_PROTO",
|
2019-04-10 00:20:09 +03:00
|
|
|
[BTF_KIND_VAR] = "VAR",
|
|
|
|
[BTF_KIND_DATASEC] = "DATASEC",
|
2021-02-26 23:22:52 +03:00
|
|
|
[BTF_KIND_FLOAT] = "FLOAT",
|
2021-10-12 19:48:38 +03:00
|
|
|
[BTF_KIND_DECL_TAG] = "DECL_TAG",
|
2021-11-12 04:26:09 +03:00
|
|
|
[BTF_KIND_TYPE_TAG] = "TYPE_TAG",
|
2018-04-19 01:55:57 +03:00
|
|
|
};
|
|
|
|
|
bpf: Support bpf program calling kernel function
This patch adds support to BPF verifier to allow bpf program calling
kernel function directly.
The use case included in this set is to allow bpf-tcp-cc to directly
call some tcp-cc helper functions (e.g. "tcp_cong_avoid_ai()"). Those
functions have already been used by some kernel tcp-cc implementations.
This set will also allow the bpf-tcp-cc program to directly call the
kernel tcp-cc implementation, For example, a bpf_dctcp may only want to
implement its own dctcp_cwnd_event() and reuse other dctcp_*() directly
from the kernel tcp_dctcp.c instead of reimplementing (or
copy-and-pasting) them.
The tcp-cc kernel functions mentioned above will be white listed
for the struct_ops bpf-tcp-cc programs to use in a later patch.
The white listed functions are not bounded to a fixed ABI contract.
Those functions have already been used by the existing kernel tcp-cc.
If any of them has changed, both in-tree and out-of-tree kernel tcp-cc
implementations have to be changed. The same goes for the struct_ops
bpf-tcp-cc programs which have to be adjusted accordingly.
This patch is to make the required changes in the bpf verifier.
First change is in btf.c, it adds a case in "btf_check_func_arg_match()".
When the passed in "btf->kernel_btf == true", it means matching the
verifier regs' states with a kernel function. This will handle the
PTR_TO_BTF_ID reg. It also maps PTR_TO_SOCK_COMMON, PTR_TO_SOCKET,
and PTR_TO_TCP_SOCK to its kernel's btf_id.
In the later libbpf patch, the insn calling a kernel function will
look like:
insn->code == (BPF_JMP | BPF_CALL)
insn->src_reg == BPF_PSEUDO_KFUNC_CALL /* <- new in this patch */
insn->imm == func_btf_id /* btf_id of the running kernel */
[ For the future calling function-in-kernel-module support, an array
of module btf_fds can be passed at the load time and insn->off
can be used to index into this array. ]
At the early stage of verifier, the verifier will collect all kernel
function calls into "struct bpf_kfunc_desc". Those
descriptors are stored in "prog->aux->kfunc_tab" and will
be available to the JIT. Since this "add" operation is similar
to the current "add_subprog()" and looking for the same insn->code,
they are done together in the new "add_subprog_and_kfunc()".
In the "do_check()" stage, the new "check_kfunc_call()" is added
to verify the kernel function call instruction:
1. Ensure the kernel function can be used by a particular BPF_PROG_TYPE.
A new bpf_verifier_ops "check_kfunc_call" is added to do that.
The bpf-tcp-cc struct_ops program will implement this function in
a later patch.
2. Call "btf_check_kfunc_args_match()" to ensure the regs can be
used as the args of a kernel function.
3. Mark the regs' type, subreg_def, and zext_dst.
At the later do_misc_fixups() stage, the new fixup_kfunc_call()
will replace the insn->imm with the function address (relative
to __bpf_call_base). If needed, the jit can find the btf_func_model
by calling the new bpf_jit_find_kfunc_model(prog, insn).
With the imm set to the function address, "bpftool prog dump xlated"
will be able to display the kernel function calls the same way as
it displays other bpf helper calls.
gpl_compatible program is required to call kernel function.
This feature currently requires JIT.
The verifier selftests are adjusted because of the changes in
the verbose log in add_subprog_and_kfunc().
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210325015142.1544736-1-kafai@fb.com
2021-03-25 04:51:42 +03:00
|
|
|
const char *btf_type_str(const struct btf_type *t)
|
2020-01-21 03:53:46 +03:00
|
|
|
{
|
|
|
|
return btf_kind_str[BTF_INFO_KIND(t->info)];
|
|
|
|
}
|
|
|
|
|
2020-09-28 14:31:04 +03:00
|
|
|
/* Chunk size we use in safe copy of data to be shown. */
|
|
|
|
#define BTF_SHOW_OBJ_SAFE_SIZE 32
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This is the maximum size of a base type value (equivalent to a
|
|
|
|
* 128-bit int); if we are at the end of our safe buffer and have
|
|
|
|
* less than 16 bytes space we can't be assured of being able
|
|
|
|
* to copy the next type safely, so in such cases we will initiate
|
|
|
|
* a new copy.
|
|
|
|
*/
|
|
|
|
#define BTF_SHOW_OBJ_BASE_TYPE_SIZE 16
|
|
|
|
|
|
|
|
/* Type name size */
|
|
|
|
#define BTF_SHOW_NAME_SIZE 80
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Common data to all BTF show operations. Private show functions can add
|
|
|
|
* their own data to a structure containing a struct btf_show and consult it
|
|
|
|
* in the show callback. See btf_type_show() below.
|
|
|
|
*
|
|
|
|
* One challenge with showing nested data is we want to skip 0-valued
|
|
|
|
* data, but in order to figure out whether a nested object is all zeros
|
|
|
|
* we need to walk through it. As a result, we need to make two passes
|
|
|
|
* when handling structs, unions and arrays; the first path simply looks
|
|
|
|
* for nonzero data, while the second actually does the display. The first
|
|
|
|
* pass is signalled by show->state.depth_check being set, and if we
|
|
|
|
* encounter a non-zero value we set show->state.depth_to_show to
|
|
|
|
* the depth at which we encountered it. When we have completed the
|
|
|
|
* first pass, we will know if anything needs to be displayed if
|
|
|
|
* depth_to_show > depth. See btf_[struct,array]_show() for the
|
|
|
|
* implementation of this.
|
|
|
|
*
|
|
|
|
* Another problem is we want to ensure the data for display is safe to
|
|
|
|
* access. To support this, the anonymous "struct {} obj" tracks the data
|
|
|
|
* object and our safe copy of it. We copy portions of the data needed
|
|
|
|
* to the object "copy" buffer, but because its size is limited to
|
|
|
|
* BTF_SHOW_OBJ_COPY_LEN bytes, multiple copies may be required as we
|
|
|
|
* traverse larger objects for display.
|
|
|
|
*
|
|
|
|
* The various data type show functions all start with a call to
|
|
|
|
* btf_show_start_type() which returns a pointer to the safe copy
|
|
|
|
* of the data needed (or if BTF_SHOW_UNSAFE is specified, to the
|
|
|
|
* raw data itself). btf_show_obj_safe() is responsible for
|
|
|
|
* using copy_from_kernel_nofault() to update the safe data if necessary
|
|
|
|
* as we traverse the object's data. skbuff-like semantics are
|
|
|
|
* used:
|
|
|
|
*
|
|
|
|
* - obj.head points to the start of the toplevel object for display
|
|
|
|
* - obj.size is the size of the toplevel object
|
|
|
|
* - obj.data points to the current point in the original data at
|
|
|
|
* which our safe data starts. obj.data will advance as we copy
|
|
|
|
* portions of the data.
|
|
|
|
*
|
|
|
|
* In most cases a single copy will suffice, but larger data structures
|
|
|
|
* such as "struct task_struct" will require many copies. The logic in
|
|
|
|
* btf_show_obj_safe() handles the logic that determines if a new
|
|
|
|
* copy_from_kernel_nofault() is needed.
|
|
|
|
*/
|
|
|
|
struct btf_show {
|
|
|
|
u64 flags;
|
|
|
|
void *target; /* target of show operation (seq file, buffer) */
|
|
|
|
void (*showfn)(struct btf_show *show, const char *fmt, va_list args);
|
|
|
|
const struct btf *btf;
|
|
|
|
/* below are used during iteration */
|
|
|
|
struct {
|
|
|
|
u8 depth;
|
|
|
|
u8 depth_to_show;
|
|
|
|
u8 depth_check;
|
|
|
|
u8 array_member:1,
|
|
|
|
array_terminated:1;
|
|
|
|
u16 array_encoding;
|
|
|
|
u32 type_id;
|
|
|
|
int status; /* non-zero for error */
|
|
|
|
const struct btf_type *type;
|
|
|
|
const struct btf_member *member;
|
|
|
|
char name[BTF_SHOW_NAME_SIZE]; /* space for member name/type */
|
|
|
|
} state;
|
|
|
|
struct {
|
|
|
|
u32 size;
|
|
|
|
void *head;
|
|
|
|
void *data;
|
|
|
|
u8 safe[BTF_SHOW_OBJ_SAFE_SIZE];
|
|
|
|
} obj;
|
|
|
|
};
|
|
|
|
|
2018-04-19 01:55:57 +03:00
|
|
|
struct btf_kind_operations {
|
|
|
|
s32 (*check_meta)(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t,
|
|
|
|
u32 meta_left);
|
2018-04-19 01:55:58 +03:00
|
|
|
int (*resolve)(struct btf_verifier_env *env,
|
|
|
|
const struct resolve_vertex *v);
|
2018-04-19 01:55:59 +03:00
|
|
|
int (*check_member)(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *struct_type,
|
|
|
|
const struct btf_member *member,
|
|
|
|
const struct btf_type *member_type);
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 09:13:51 +03:00
|
|
|
int (*check_kflag_member)(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *struct_type,
|
|
|
|
const struct btf_member *member,
|
|
|
|
const struct btf_type *member_type);
|
2018-04-19 01:55:57 +03:00
|
|
|
void (*log_details)(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t);
|
2020-09-28 14:31:04 +03:00
|
|
|
void (*show)(const struct btf *btf, const struct btf_type *t,
|
2018-04-19 01:56:00 +03:00
|
|
|
u32 type_id, void *data, u8 bits_offsets,
|
2020-09-28 14:31:04 +03:00
|
|
|
struct btf_show *show);
|
2018-04-19 01:55:57 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS];
|
|
|
|
static struct btf_type btf_void;
|
|
|
|
|
2018-11-20 02:29:08 +03:00
|
|
|
static int btf_resolve(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t, u32 type_id);
|
|
|
|
|
2018-04-19 01:55:58 +03:00
|
|
|
static bool btf_type_is_modifier(const struct btf_type *t)
|
|
|
|
{
|
|
|
|
/* Some of them is not strictly a C modifier
|
|
|
|
* but they are grouped into the same bucket
|
|
|
|
* for BTF concern:
|
|
|
|
* A type (t) that refers to another
|
|
|
|
* type through t->type AND its size cannot
|
|
|
|
* be determined without following the t->type.
|
|
|
|
*
|
|
|
|
* ptr does not fall into this bucket
|
|
|
|
* because its size is always sizeof(void *).
|
|
|
|
*/
|
|
|
|
switch (BTF_INFO_KIND(t->info)) {
|
|
|
|
case BTF_KIND_TYPEDEF:
|
|
|
|
case BTF_KIND_VOLATILE:
|
|
|
|
case BTF_KIND_CONST:
|
|
|
|
case BTF_KIND_RESTRICT:
|
2021-11-12 04:26:09 +03:00
|
|
|
case BTF_KIND_TYPE_TAG:
|
2018-04-19 01:55:58 +03:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2019-04-10 00:20:10 +03:00
|
|
|
bool btf_type_is_void(const struct btf_type *t)
|
2018-04-19 01:55:58 +03:00
|
|
|
{
|
2018-11-20 02:29:06 +03:00
|
|
|
return t == &btf_void;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool btf_type_is_fwd(const struct btf_type *t)
|
|
|
|
{
|
|
|
|
return BTF_INFO_KIND(t->info) == BTF_KIND_FWD;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool btf_type_nosize(const struct btf_type *t)
|
|
|
|
{
|
2018-11-20 02:29:08 +03:00
|
|
|
return btf_type_is_void(t) || btf_type_is_fwd(t) ||
|
|
|
|
btf_type_is_func(t) || btf_type_is_func_proto(t);
|
2018-04-19 01:55:58 +03:00
|
|
|
}
|
|
|
|
|
2018-11-20 02:29:06 +03:00
|
|
|
static bool btf_type_nosize_or_null(const struct btf_type *t)
|
2018-04-19 01:55:58 +03:00
|
|
|
{
|
2018-11-20 02:29:06 +03:00
|
|
|
return !t || btf_type_nosize(t);
|
2018-04-19 01:55:58 +03:00
|
|
|
}
|
|
|
|
|
2019-02-01 02:40:04 +03:00
|
|
|
static bool __btf_type_is_struct(const struct btf_type *t)
|
|
|
|
{
|
|
|
|
return BTF_INFO_KIND(t->info) == BTF_KIND_STRUCT;
|
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:58 +03:00
|
|
|
static bool btf_type_is_array(const struct btf_type *t)
|
|
|
|
{
|
|
|
|
return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY;
|
|
|
|
}
|
|
|
|
|
2019-04-10 00:20:09 +03:00
|
|
|
static bool btf_type_is_datasec(const struct btf_type *t)
|
|
|
|
{
|
|
|
|
return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC;
|
|
|
|
}
|
|
|
|
|
2021-10-12 19:48:38 +03:00
|
|
|
static bool btf_type_is_decl_tag(const struct btf_type *t)
|
2021-09-15 01:30:15 +03:00
|
|
|
{
|
2021-10-12 19:48:38 +03:00
|
|
|
return BTF_INFO_KIND(t->info) == BTF_KIND_DECL_TAG;
|
2021-09-15 01:30:15 +03:00
|
|
|
}
|
|
|
|
|
2021-10-12 19:48:38 +03:00
|
|
|
static bool btf_type_is_decl_tag_target(const struct btf_type *t)
|
2021-09-15 01:30:15 +03:00
|
|
|
{
|
|
|
|
return btf_type_is_func(t) || btf_type_is_struct(t) ||
|
2021-10-21 22:56:28 +03:00
|
|
|
btf_type_is_var(t) || btf_type_is_typedef(t);
|
2021-09-15 01:30:15 +03:00
|
|
|
}
|
|
|
|
|
2021-01-12 10:55:18 +03:00
|
|
|
u32 btf_nr_types(const struct btf *btf)
|
2020-11-10 04:19:28 +03:00
|
|
|
{
|
|
|
|
u32 total = 0;
|
|
|
|
|
|
|
|
while (btf) {
|
|
|
|
total += btf->nr_types;
|
|
|
|
btf = btf->base_btf;
|
|
|
|
}
|
|
|
|
|
|
|
|
return total;
|
|
|
|
}
|
|
|
|
|
2020-01-09 03:35:03 +03:00
|
|
|
s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind)
|
|
|
|
{
|
|
|
|
const struct btf_type *t;
|
|
|
|
const char *tname;
|
2020-11-10 04:19:28 +03:00
|
|
|
u32 i, total;
|
2020-01-09 03:35:03 +03:00
|
|
|
|
2021-01-12 10:55:18 +03:00
|
|
|
total = btf_nr_types(btf);
|
2020-11-10 04:19:28 +03:00
|
|
|
for (i = 1; i < total; i++) {
|
|
|
|
t = btf_type_by_id(btf, i);
|
2020-01-09 03:35:03 +03:00
|
|
|
if (BTF_INFO_KIND(t->info) != kind)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
tname = btf_name_by_offset(btf, t->name_off);
|
|
|
|
if (!strcmp(tname, name))
|
|
|
|
return i;
|
|
|
|
}
|
|
|
|
|
|
|
|
return -ENOENT;
|
|
|
|
}
|
|
|
|
|
|
|
|
const struct btf_type *btf_type_skip_modifiers(const struct btf *btf,
|
|
|
|
u32 id, u32 *res_id)
|
|
|
|
{
|
|
|
|
const struct btf_type *t = btf_type_by_id(btf, id);
|
|
|
|
|
|
|
|
while (btf_type_is_modifier(t)) {
|
|
|
|
id = t->type;
|
|
|
|
t = btf_type_by_id(btf, t->type);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (res_id)
|
|
|
|
*res_id = id;
|
|
|
|
|
|
|
|
return t;
|
|
|
|
}
|
|
|
|
|
|
|
|
const struct btf_type *btf_type_resolve_ptr(const struct btf *btf,
|
|
|
|
u32 id, u32 *res_id)
|
|
|
|
{
|
|
|
|
const struct btf_type *t;
|
|
|
|
|
|
|
|
t = btf_type_skip_modifiers(btf, id, NULL);
|
|
|
|
if (!btf_type_is_ptr(t))
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
return btf_type_skip_modifiers(btf, t->type, res_id);
|
|
|
|
}
|
|
|
|
|
|
|
|
const struct btf_type *btf_type_resolve_func_ptr(const struct btf *btf,
|
|
|
|
u32 id, u32 *res_id)
|
|
|
|
{
|
|
|
|
const struct btf_type *ptype;
|
|
|
|
|
|
|
|
ptype = btf_type_resolve_ptr(btf, id, res_id);
|
|
|
|
if (ptype && btf_type_is_func_proto(ptype))
|
|
|
|
return ptype;
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2019-04-10 00:20:09 +03:00
|
|
|
/* Types that act only as a source, not sink or intermediate
|
|
|
|
* type when resolving.
|
|
|
|
*/
|
|
|
|
static bool btf_type_is_resolve_source_only(const struct btf_type *t)
|
|
|
|
{
|
|
|
|
return btf_type_is_var(t) ||
|
2021-10-12 19:48:38 +03:00
|
|
|
btf_type_is_decl_tag(t) ||
|
2019-04-10 00:20:09 +03:00
|
|
|
btf_type_is_datasec(t);
|
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:58 +03:00
|
|
|
/* What types need to be resolved?
|
|
|
|
*
|
|
|
|
* btf_type_is_modifier() is an obvious one.
|
|
|
|
*
|
|
|
|
* btf_type_is_struct() because its member refers to
|
|
|
|
* another type (through member->type).
|
2019-04-10 00:20:09 +03:00
|
|
|
*
|
|
|
|
* btf_type_is_var() because the variable refers to
|
|
|
|
* another type. btf_type_is_datasec() holds multiple
|
|
|
|
* btf_type_is_var() types that need resolving.
|
|
|
|
*
|
2018-04-19 01:55:58 +03:00
|
|
|
* btf_type_is_array() because its element (array->type)
|
|
|
|
* refers to another type. Array can be thought of a
|
|
|
|
* special case of struct while array just has the same
|
|
|
|
* member-type repeated by array->nelems of times.
|
|
|
|
*/
|
|
|
|
static bool btf_type_needs_resolve(const struct btf_type *t)
|
|
|
|
{
|
|
|
|
return btf_type_is_modifier(t) ||
|
2019-04-10 00:20:09 +03:00
|
|
|
btf_type_is_ptr(t) ||
|
|
|
|
btf_type_is_struct(t) ||
|
|
|
|
btf_type_is_array(t) ||
|
|
|
|
btf_type_is_var(t) ||
|
2021-10-12 19:48:38 +03:00
|
|
|
btf_type_is_decl_tag(t) ||
|
2019-04-10 00:20:09 +03:00
|
|
|
btf_type_is_datasec(t);
|
2018-04-19 01:55:58 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/* t->size can be used */
|
|
|
|
static bool btf_type_has_size(const struct btf_type *t)
|
|
|
|
{
|
|
|
|
switch (BTF_INFO_KIND(t->info)) {
|
|
|
|
case BTF_KIND_INT:
|
|
|
|
case BTF_KIND_STRUCT:
|
|
|
|
case BTF_KIND_UNION:
|
|
|
|
case BTF_KIND_ENUM:
|
2019-04-10 00:20:09 +03:00
|
|
|
case BTF_KIND_DATASEC:
|
2021-02-26 23:22:52 +03:00
|
|
|
case BTF_KIND_FLOAT:
|
2018-04-19 01:55:58 +03:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:57 +03:00
|
|
|
static const char *btf_int_encoding_str(u8 encoding)
|
|
|
|
{
|
|
|
|
if (encoding == 0)
|
|
|
|
return "(none)";
|
|
|
|
else if (encoding == BTF_INT_SIGNED)
|
|
|
|
return "SIGNED";
|
|
|
|
else if (encoding == BTF_INT_CHAR)
|
|
|
|
return "CHAR";
|
|
|
|
else if (encoding == BTF_INT_BOOL)
|
|
|
|
return "BOOL";
|
|
|
|
else
|
|
|
|
return "UNKN";
|
|
|
|
}
|
|
|
|
|
|
|
|
static u32 btf_type_int(const struct btf_type *t)
|
|
|
|
{
|
|
|
|
return *(u32 *)(t + 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct btf_array *btf_type_array(const struct btf_type *t)
|
|
|
|
{
|
|
|
|
return (const struct btf_array *)(t + 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct btf_enum *btf_type_enum(const struct btf_type *t)
|
|
|
|
{
|
|
|
|
return (const struct btf_enum *)(t + 1);
|
|
|
|
}
|
|
|
|
|
2019-04-10 00:20:09 +03:00
|
|
|
static const struct btf_var *btf_type_var(const struct btf_type *t)
|
|
|
|
{
|
|
|
|
return (const struct btf_var *)(t + 1);
|
|
|
|
}
|
|
|
|
|
2021-10-12 19:48:38 +03:00
|
|
|
static const struct btf_decl_tag *btf_type_decl_tag(const struct btf_type *t)
|
2021-09-15 01:30:15 +03:00
|
|
|
{
|
2021-10-12 19:48:38 +03:00
|
|
|
return (const struct btf_decl_tag *)(t + 1);
|
2021-09-15 01:30:15 +03:00
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:57 +03:00
|
|
|
static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
|
|
|
|
{
|
|
|
|
return kind_ops[BTF_INFO_KIND(t->info)];
|
|
|
|
}
|
|
|
|
|
2019-01-16 22:29:40 +03:00
|
|
|
static bool btf_name_offset_valid(const struct btf *btf, u32 offset)
|
2018-04-19 01:55:57 +03:00
|
|
|
{
|
2020-11-10 04:19:28 +03:00
|
|
|
if (!BTF_STR_OFFSET_VALID(offset))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
while (offset < btf->start_str_off)
|
|
|
|
btf = btf->base_btf;
|
|
|
|
|
|
|
|
offset -= btf->start_str_off;
|
|
|
|
return offset < btf->hdr.str_len;
|
2018-04-19 01:55:57 +03:00
|
|
|
}
|
|
|
|
|
2019-04-10 00:20:09 +03:00
|
|
|
static bool __btf_name_char_ok(char c, bool first, bool dot_ok)
|
|
|
|
{
|
|
|
|
if ((first ? !isalpha(c) :
|
|
|
|
!isalnum(c)) &&
|
|
|
|
c != '_' &&
|
|
|
|
((c == '.' && !dot_ok) ||
|
|
|
|
c != '.'))
|
|
|
|
return false;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-11-10 04:19:28 +03:00
|
|
|
static const char *btf_str_by_offset(const struct btf *btf, u32 offset)
|
|
|
|
{
|
|
|
|
while (offset < btf->start_str_off)
|
|
|
|
btf = btf->base_btf;
|
|
|
|
|
|
|
|
offset -= btf->start_str_off;
|
|
|
|
if (offset < btf->hdr.str_len)
|
|
|
|
return &btf->strings[offset];
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2019-04-10 00:20:09 +03:00
|
|
|
static bool __btf_name_valid(const struct btf *btf, u32 offset, bool dot_ok)
|
2018-11-20 02:29:08 +03:00
|
|
|
{
|
|
|
|
/* offset must be valid */
|
2020-11-10 04:19:28 +03:00
|
|
|
const char *src = btf_str_by_offset(btf, offset);
|
2018-11-20 02:29:08 +03:00
|
|
|
const char *src_limit;
|
|
|
|
|
2019-04-10 00:20:09 +03:00
|
|
|
if (!__btf_name_char_ok(*src, true, dot_ok))
|
2018-11-20 02:29:08 +03:00
|
|
|
return false;
|
|
|
|
|
|
|
|
/* set a limit on identifier length */
|
|
|
|
src_limit = src + KSYM_NAME_LEN;
|
|
|
|
src++;
|
|
|
|
while (*src && src < src_limit) {
|
2019-04-10 00:20:09 +03:00
|
|
|
if (!__btf_name_char_ok(*src, false, dot_ok))
|
2018-11-20 02:29:08 +03:00
|
|
|
return false;
|
|
|
|
src++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return !*src;
|
|
|
|
}
|
|
|
|
|
2019-04-10 00:20:09 +03:00
|
|
|
/* Only C-style identifier is permitted. This can be relaxed if
|
|
|
|
* necessary.
|
|
|
|
*/
|
|
|
|
static bool btf_name_valid_identifier(const struct btf *btf, u32 offset)
|
|
|
|
{
|
|
|
|
return __btf_name_valid(btf, offset, false);
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool btf_name_valid_section(const struct btf *btf, u32 offset)
|
|
|
|
{
|
|
|
|
return __btf_name_valid(btf, offset, true);
|
|
|
|
}
|
|
|
|
|
2018-12-13 21:41:46 +03:00
|
|
|
static const char *__btf_name_by_offset(const struct btf *btf, u32 offset)
|
2018-04-19 01:55:57 +03:00
|
|
|
{
|
2020-11-10 04:19:28 +03:00
|
|
|
const char *name;
|
|
|
|
|
2018-05-23 00:57:20 +03:00
|
|
|
if (!offset)
|
2018-04-19 01:55:57 +03:00
|
|
|
return "(anon)";
|
2020-11-10 04:19:28 +03:00
|
|
|
|
|
|
|
name = btf_str_by_offset(btf, offset);
|
|
|
|
return name ?: "(invalid-name-offset)";
|
2018-04-19 01:55:57 +03:00
|
|
|
}
|
|
|
|
|
2018-12-13 21:41:46 +03:00
|
|
|
const char *btf_name_by_offset(const struct btf *btf, u32 offset)
|
|
|
|
{
|
2020-11-10 04:19:28 +03:00
|
|
|
return btf_str_by_offset(btf, offset);
|
2018-12-13 21:41:46 +03:00
|
|
|
}
|
|
|
|
|
bpf: Introduce bpf_func_info
This patch added interface to load a program with the following
additional information:
. prog_btf_fd
. func_info, func_info_rec_size and func_info_cnt
where func_info will provide function range and type_id
corresponding to each function.
The func_info_rec_size is introduced in the UAPI to specify
struct bpf_func_info size passed from user space. This
intends to make bpf_func_info structure growable in the future.
If the kernel gets a different bpf_func_info size from userspace,
it will try to handle user request with part of bpf_func_info
it can understand. In this patch, kernel can understand
struct bpf_func_info {
__u32 insn_offset;
__u32 type_id;
};
If user passed a bpf func_info record size of 16 bytes, the
kernel can still handle part of records with the above definition.
If verifier agrees with function range provided by the user,
the bpf_prog ksym for each function will use the func name
provided in the type_id, which is supposed to provide better
encoding as it is not limited by 16 bytes program name
limitation and this is better for bpf program which contains
multiple subprograms.
The bpf_prog_info interface is also extended to
return btf_id, func_info, func_info_rec_size and func_info_cnt
to userspace, so userspace can print out the function prototype
for each xlated function. The insn_offset in the returned
func_info corresponds to the insn offset for xlated functions.
With other jit related fields in bpf_prog_info, userspace can also
print out function prototypes for each jited function.
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-11-20 02:29:11 +03:00
|
|
|
const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id)
|
2018-04-19 01:55:58 +03:00
|
|
|
{
|
2020-11-10 04:19:28 +03:00
|
|
|
while (type_id < btf->start_id)
|
|
|
|
btf = btf->base_btf;
|
2018-04-19 01:55:58 +03:00
|
|
|
|
2020-11-10 04:19:28 +03:00
|
|
|
type_id -= btf->start_id;
|
|
|
|
if (type_id >= btf->nr_types)
|
|
|
|
return NULL;
|
2018-04-19 01:55:58 +03:00
|
|
|
return btf->types[type_id];
|
|
|
|
}
|
|
|
|
|
2018-05-23 00:57:19 +03:00
|
|
|
/*
|
|
|
|
* Regular int is not a bit field and it must be either
|
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-16 04:07:47 +03:00
|
|
|
* u8/u16/u32/u64 or __int128.
|
2018-05-23 00:57:19 +03:00
|
|
|
*/
|
|
|
|
static bool btf_type_int_is_regular(const struct btf_type *t)
|
|
|
|
{
|
2018-07-20 08:14:31 +03:00
|
|
|
u8 nr_bits, nr_bytes;
|
2018-05-23 00:57:19 +03:00
|
|
|
u32 int_data;
|
|
|
|
|
|
|
|
int_data = btf_type_int(t);
|
|
|
|
nr_bits = BTF_INT_BITS(int_data);
|
|
|
|
nr_bytes = BITS_ROUNDUP_BYTES(nr_bits);
|
|
|
|
if (BITS_PER_BYTE_MASKED(nr_bits) ||
|
|
|
|
BTF_INT_OFFSET(int_data) ||
|
|
|
|
(nr_bytes != sizeof(u8) && nr_bytes != sizeof(u16) &&
|
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-16 04:07:47 +03:00
|
|
|
nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64) &&
|
|
|
|
nr_bytes != (2 * sizeof(u64)))) {
|
2018-05-23 00:57:19 +03:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
bpf: add bpffs pretty print for cgroup local storage maps
Implement bpffs pretty printing for cgroup local storage maps
(both shared and per-cpu).
Output example (captured for tools/testing/selftests/bpf/netcnt_prog.c):
Shared:
$ cat /sys/fs/bpf/map_2
# WARNING!! The output is for debug purpose only
# WARNING!! The output format will change
{4294968594,1}: {9999,1039896}
Per-cpu:
$ cat /sys/fs/bpf/map_1
# WARNING!! The output is for debug purpose only
# WARNING!! The output format will change
{4294968594,1}: {
cpu0: {0,0,0,0,0}
cpu1: {0,0,0,0,0}
cpu2: {1,104,0,0,0}
cpu3: {0,0,0,0,0}
}
Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-12-11 02:43:01 +03:00
|
|
|
/*
|
2018-12-16 09:13:52 +03:00
|
|
|
* Check that given struct member is a regular int with expected
|
|
|
|
* offset and size.
|
bpf: add bpffs pretty print for cgroup local storage maps
Implement bpffs pretty printing for cgroup local storage maps
(both shared and per-cpu).
Output example (captured for tools/testing/selftests/bpf/netcnt_prog.c):
Shared:
$ cat /sys/fs/bpf/map_2
# WARNING!! The output is for debug purpose only
# WARNING!! The output format will change
{4294968594,1}: {9999,1039896}
Per-cpu:
$ cat /sys/fs/bpf/map_1
# WARNING!! The output is for debug purpose only
# WARNING!! The output format will change
{4294968594,1}: {
cpu0: {0,0,0,0,0}
cpu1: {0,0,0,0,0}
cpu2: {1,104,0,0,0}
cpu3: {0,0,0,0,0}
}
Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-12-11 02:43:01 +03:00
|
|
|
*/
|
2018-12-16 09:13:52 +03:00
|
|
|
bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
|
|
|
|
const struct btf_member *m,
|
|
|
|
u32 expected_offset, u32 expected_size)
|
bpf: add bpffs pretty print for cgroup local storage maps
Implement bpffs pretty printing for cgroup local storage maps
(both shared and per-cpu).
Output example (captured for tools/testing/selftests/bpf/netcnt_prog.c):
Shared:
$ cat /sys/fs/bpf/map_2
# WARNING!! The output is for debug purpose only
# WARNING!! The output format will change
{4294968594,1}: {9999,1039896}
Per-cpu:
$ cat /sys/fs/bpf/map_1
# WARNING!! The output is for debug purpose only
# WARNING!! The output format will change
{4294968594,1}: {
cpu0: {0,0,0,0,0}
cpu1: {0,0,0,0,0}
cpu2: {1,104,0,0,0}
cpu3: {0,0,0,0,0}
}
Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-12-11 02:43:01 +03:00
|
|
|
{
|
2018-12-16 09:13:52 +03:00
|
|
|
const struct btf_type *t;
|
|
|
|
u32 id, int_data;
|
|
|
|
u8 nr_bits;
|
bpf: add bpffs pretty print for cgroup local storage maps
Implement bpffs pretty printing for cgroup local storage maps
(both shared and per-cpu).
Output example (captured for tools/testing/selftests/bpf/netcnt_prog.c):
Shared:
$ cat /sys/fs/bpf/map_2
# WARNING!! The output is for debug purpose only
# WARNING!! The output format will change
{4294968594,1}: {9999,1039896}
Per-cpu:
$ cat /sys/fs/bpf/map_1
# WARNING!! The output is for debug purpose only
# WARNING!! The output format will change
{4294968594,1}: {
cpu0: {0,0,0,0,0}
cpu1: {0,0,0,0,0}
cpu2: {1,104,0,0,0}
cpu3: {0,0,0,0,0}
}
Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-12-11 02:43:01 +03:00
|
|
|
|
2018-12-16 09:13:52 +03:00
|
|
|
id = m->type;
|
|
|
|
t = btf_type_id_size(btf, &id, NULL);
|
|
|
|
if (!t || !btf_type_is_int(t))
|
bpf: add bpffs pretty print for cgroup local storage maps
Implement bpffs pretty printing for cgroup local storage maps
(both shared and per-cpu).
Output example (captured for tools/testing/selftests/bpf/netcnt_prog.c):
Shared:
$ cat /sys/fs/bpf/map_2
# WARNING!! The output is for debug purpose only
# WARNING!! The output format will change
{4294968594,1}: {9999,1039896}
Per-cpu:
$ cat /sys/fs/bpf/map_1
# WARNING!! The output is for debug purpose only
# WARNING!! The output format will change
{4294968594,1}: {
cpu0: {0,0,0,0,0}
cpu1: {0,0,0,0,0}
cpu2: {1,104,0,0,0}
cpu3: {0,0,0,0,0}
}
Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-12-11 02:43:01 +03:00
|
|
|
return false;
|
|
|
|
|
|
|
|
int_data = btf_type_int(t);
|
|
|
|
nr_bits = BTF_INT_BITS(int_data);
|
2018-12-16 09:13:52 +03:00
|
|
|
if (btf_type_kflag(s)) {
|
|
|
|
u32 bitfield_size = BTF_MEMBER_BITFIELD_SIZE(m->offset);
|
|
|
|
u32 bit_offset = BTF_MEMBER_BIT_OFFSET(m->offset);
|
|
|
|
|
|
|
|
/* if kflag set, int should be a regular int and
|
|
|
|
* bit offset should be at byte boundary.
|
|
|
|
*/
|
|
|
|
return !bitfield_size &&
|
|
|
|
BITS_ROUNDUP_BYTES(bit_offset) == expected_offset &&
|
|
|
|
BITS_ROUNDUP_BYTES(nr_bits) == expected_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (BTF_INT_OFFSET(int_data) ||
|
|
|
|
BITS_PER_BYTE_MASKED(m->offset) ||
|
|
|
|
BITS_ROUNDUP_BYTES(m->offset) != expected_offset ||
|
|
|
|
BITS_PER_BYTE_MASKED(nr_bits) ||
|
|
|
|
BITS_ROUNDUP_BYTES(nr_bits) != expected_size)
|
bpf: add bpffs pretty print for cgroup local storage maps
Implement bpffs pretty printing for cgroup local storage maps
(both shared and per-cpu).
Output example (captured for tools/testing/selftests/bpf/netcnt_prog.c):
Shared:
$ cat /sys/fs/bpf/map_2
# WARNING!! The output is for debug purpose only
# WARNING!! The output format will change
{4294968594,1}: {9999,1039896}
Per-cpu:
$ cat /sys/fs/bpf/map_1
# WARNING!! The output is for debug purpose only
# WARNING!! The output format will change
{4294968594,1}: {
cpu0: {0,0,0,0,0}
cpu1: {0,0,0,0,0}
cpu2: {1,104,0,0,0}
cpu3: {0,0,0,0,0}
}
Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-12-11 02:43:01 +03:00
|
|
|
return false;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-09-28 14:31:04 +03:00
|
|
|
/* Similar to btf_type_skip_modifiers() but does not skip typedefs. */
|
|
|
|
static const struct btf_type *btf_type_skip_qualifiers(const struct btf *btf,
|
|
|
|
u32 id)
|
|
|
|
{
|
|
|
|
const struct btf_type *t = btf_type_by_id(btf, id);
|
|
|
|
|
|
|
|
while (btf_type_is_modifier(t) &&
|
|
|
|
BTF_INFO_KIND(t->info) != BTF_KIND_TYPEDEF) {
|
|
|
|
t = btf_type_by_id(btf, t->type);
|
|
|
|
}
|
|
|
|
|
|
|
|
return t;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define BTF_SHOW_MAX_ITER 10
|
|
|
|
|
|
|
|
#define BTF_KIND_BIT(kind) (1ULL << kind)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Populate show->state.name with type name information.
|
|
|
|
* Format of type name is
|
|
|
|
*
|
|
|
|
* [.member_name = ] (type_name)
|
|
|
|
*/
|
|
|
|
static const char *btf_show_name(struct btf_show *show)
|
|
|
|
{
|
|
|
|
/* BTF_MAX_ITER array suffixes "[]" */
|
|
|
|
const char *array_suffixes = "[][][][][][][][][][]";
|
|
|
|
const char *array_suffix = &array_suffixes[strlen(array_suffixes)];
|
|
|
|
/* BTF_MAX_ITER pointer suffixes "*" */
|
|
|
|
const char *ptr_suffixes = "**********";
|
|
|
|
const char *ptr_suffix = &ptr_suffixes[strlen(ptr_suffixes)];
|
|
|
|
const char *name = NULL, *prefix = "", *parens = "";
|
|
|
|
const struct btf_member *m = show->state.member;
|
2021-12-08 01:47:18 +03:00
|
|
|
const struct btf_type *t;
|
2020-09-28 14:31:04 +03:00
|
|
|
const struct btf_array *array;
|
|
|
|
u32 id = show->state.type_id;
|
|
|
|
const char *member = NULL;
|
|
|
|
bool show_member = false;
|
|
|
|
u64 kinds = 0;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
show->state.name[0] = '\0';
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Don't show type name if we're showing an array member;
|
|
|
|
* in that case we show the array type so don't need to repeat
|
|
|
|
* ourselves for each member.
|
|
|
|
*/
|
|
|
|
if (show->state.array_member)
|
|
|
|
return "";
|
|
|
|
|
|
|
|
/* Retrieve member name, if any. */
|
|
|
|
if (m) {
|
|
|
|
member = btf_name_by_offset(show->btf, m->name_off);
|
|
|
|
show_member = strlen(member) > 0;
|
|
|
|
id = m->type;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Start with type_id, as we have resolved the struct btf_type *
|
|
|
|
* via btf_modifier_show() past the parent typedef to the child
|
|
|
|
* struct, int etc it is defined as. In such cases, the type_id
|
|
|
|
* still represents the starting type while the struct btf_type *
|
|
|
|
* in our show->state points at the resolved type of the typedef.
|
|
|
|
*/
|
|
|
|
t = btf_type_by_id(show->btf, id);
|
|
|
|
if (!t)
|
|
|
|
return "";
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The goal here is to build up the right number of pointer and
|
|
|
|
* array suffixes while ensuring the type name for a typedef
|
|
|
|
* is represented. Along the way we accumulate a list of
|
|
|
|
* BTF kinds we have encountered, since these will inform later
|
|
|
|
* display; for example, pointer types will not require an
|
|
|
|
* opening "{" for struct, we will just display the pointer value.
|
|
|
|
*
|
|
|
|
* We also want to accumulate the right number of pointer or array
|
|
|
|
* indices in the format string while iterating until we get to
|
|
|
|
* the typedef/pointee/array member target type.
|
|
|
|
*
|
|
|
|
* We start by pointing at the end of pointer and array suffix
|
|
|
|
* strings; as we accumulate pointers and arrays we move the pointer
|
|
|
|
* or array string backwards so it will show the expected number of
|
|
|
|
* '*' or '[]' for the type. BTF_SHOW_MAX_ITER of nesting of pointers
|
|
|
|
* and/or arrays and typedefs are supported as a precaution.
|
|
|
|
*
|
|
|
|
* We also want to get typedef name while proceeding to resolve
|
|
|
|
* type it points to so that we can add parentheses if it is a
|
|
|
|
* "typedef struct" etc.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < BTF_SHOW_MAX_ITER; i++) {
|
|
|
|
|
|
|
|
switch (BTF_INFO_KIND(t->info)) {
|
|
|
|
case BTF_KIND_TYPEDEF:
|
|
|
|
if (!name)
|
|
|
|
name = btf_name_by_offset(show->btf,
|
|
|
|
t->name_off);
|
|
|
|
kinds |= BTF_KIND_BIT(BTF_KIND_TYPEDEF);
|
|
|
|
id = t->type;
|
|
|
|
break;
|
|
|
|
case BTF_KIND_ARRAY:
|
|
|
|
kinds |= BTF_KIND_BIT(BTF_KIND_ARRAY);
|
|
|
|
parens = "[";
|
|
|
|
if (!t)
|
|
|
|
return "";
|
|
|
|
array = btf_type_array(t);
|
|
|
|
if (array_suffix > array_suffixes)
|
|
|
|
array_suffix -= 2;
|
|
|
|
id = array->type;
|
|
|
|
break;
|
|
|
|
case BTF_KIND_PTR:
|
|
|
|
kinds |= BTF_KIND_BIT(BTF_KIND_PTR);
|
|
|
|
if (ptr_suffix > ptr_suffixes)
|
|
|
|
ptr_suffix -= 1;
|
|
|
|
id = t->type;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
id = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (!id)
|
|
|
|
break;
|
|
|
|
t = btf_type_skip_qualifiers(show->btf, id);
|
|
|
|
}
|
|
|
|
/* We may not be able to represent this type; bail to be safe */
|
|
|
|
if (i == BTF_SHOW_MAX_ITER)
|
|
|
|
return "";
|
|
|
|
|
|
|
|
if (!name)
|
|
|
|
name = btf_name_by_offset(show->btf, t->name_off);
|
|
|
|
|
|
|
|
switch (BTF_INFO_KIND(t->info)) {
|
|
|
|
case BTF_KIND_STRUCT:
|
|
|
|
case BTF_KIND_UNION:
|
|
|
|
prefix = BTF_INFO_KIND(t->info) == BTF_KIND_STRUCT ?
|
|
|
|
"struct" : "union";
|
|
|
|
/* if it's an array of struct/union, parens is already set */
|
|
|
|
if (!(kinds & (BTF_KIND_BIT(BTF_KIND_ARRAY))))
|
|
|
|
parens = "{";
|
|
|
|
break;
|
|
|
|
case BTF_KIND_ENUM:
|
|
|
|
prefix = "enum";
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* pointer does not require parens */
|
|
|
|
if (kinds & BTF_KIND_BIT(BTF_KIND_PTR))
|
|
|
|
parens = "";
|
|
|
|
/* typedef does not require struct/union/enum prefix */
|
|
|
|
if (kinds & BTF_KIND_BIT(BTF_KIND_TYPEDEF))
|
|
|
|
prefix = "";
|
|
|
|
|
|
|
|
if (!name)
|
|
|
|
name = "";
|
|
|
|
|
|
|
|
/* Even if we don't want type name info, we want parentheses etc */
|
|
|
|
if (show->flags & BTF_SHOW_NONAME)
|
|
|
|
snprintf(show->state.name, sizeof(show->state.name), "%s",
|
|
|
|
parens);
|
|
|
|
else
|
|
|
|
snprintf(show->state.name, sizeof(show->state.name),
|
|
|
|
"%s%s%s(%s%s%s%s%s%s)%s",
|
|
|
|
/* first 3 strings comprise ".member = " */
|
|
|
|
show_member ? "." : "",
|
|
|
|
show_member ? member : "",
|
|
|
|
show_member ? " = " : "",
|
|
|
|
/* ...next is our prefix (struct, enum, etc) */
|
|
|
|
prefix,
|
|
|
|
strlen(prefix) > 0 && strlen(name) > 0 ? " " : "",
|
|
|
|
/* ...this is the type name itself */
|
|
|
|
name,
|
|
|
|
/* ...suffixed by the appropriate '*', '[]' suffixes */
|
|
|
|
strlen(ptr_suffix) > 0 ? " " : "", ptr_suffix,
|
|
|
|
array_suffix, parens);
|
|
|
|
|
|
|
|
return show->state.name;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char *__btf_show_indent(struct btf_show *show)
|
|
|
|
{
|
|
|
|
const char *indents = " ";
|
|
|
|
const char *indent = &indents[strlen(indents)];
|
|
|
|
|
|
|
|
if ((indent - show->state.depth) >= indents)
|
|
|
|
return indent - show->state.depth;
|
|
|
|
return indents;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char *btf_show_indent(struct btf_show *show)
|
|
|
|
{
|
|
|
|
return show->flags & BTF_SHOW_COMPACT ? "" : __btf_show_indent(show);
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char *btf_show_newline(struct btf_show *show)
|
|
|
|
{
|
|
|
|
return show->flags & BTF_SHOW_COMPACT ? "" : "\n";
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char *btf_show_delim(struct btf_show *show)
|
|
|
|
{
|
|
|
|
if (show->state.depth == 0)
|
|
|
|
return "";
|
|
|
|
|
|
|
|
if ((show->flags & BTF_SHOW_COMPACT) && show->state.type &&
|
|
|
|
BTF_INFO_KIND(show->state.type->info) == BTF_KIND_UNION)
|
|
|
|
return "|";
|
|
|
|
|
|
|
|
return ",";
|
|
|
|
}
|
|
|
|
|
|
|
|
__printf(2, 3) static void btf_show(struct btf_show *show, const char *fmt, ...)
|
|
|
|
{
|
|
|
|
va_list args;
|
|
|
|
|
|
|
|
if (!show->state.depth_check) {
|
|
|
|
va_start(args, fmt);
|
|
|
|
show->showfn(show, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Macros are used here as btf_show_type_value[s]() prepends and appends
|
|
|
|
* format specifiers to the format specifier passed in; these do the work of
|
|
|
|
* adding indentation, delimiters etc while the caller simply has to specify
|
|
|
|
* the type value(s) in the format specifier + value(s).
|
|
|
|
*/
|
|
|
|
#define btf_show_type_value(show, fmt, value) \
|
|
|
|
do { \
|
|
|
|
if ((value) != 0 || (show->flags & BTF_SHOW_ZERO) || \
|
|
|
|
show->state.depth == 0) { \
|
|
|
|
btf_show(show, "%s%s" fmt "%s%s", \
|
|
|
|
btf_show_indent(show), \
|
|
|
|
btf_show_name(show), \
|
|
|
|
value, btf_show_delim(show), \
|
|
|
|
btf_show_newline(show)); \
|
|
|
|
if (show->state.depth > show->state.depth_to_show) \
|
|
|
|
show->state.depth_to_show = show->state.depth; \
|
|
|
|
} \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
#define btf_show_type_values(show, fmt, ...) \
|
|
|
|
do { \
|
|
|
|
btf_show(show, "%s%s" fmt "%s%s", btf_show_indent(show), \
|
|
|
|
btf_show_name(show), \
|
|
|
|
__VA_ARGS__, btf_show_delim(show), \
|
|
|
|
btf_show_newline(show)); \
|
|
|
|
if (show->state.depth > show->state.depth_to_show) \
|
|
|
|
show->state.depth_to_show = show->state.depth; \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
/* How much is left to copy to safe buffer after @data? */
|
|
|
|
static int btf_show_obj_size_left(struct btf_show *show, void *data)
|
|
|
|
{
|
|
|
|
return show->obj.head + show->obj.size - data;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Is object pointed to by @data of @size already copied to our safe buffer? */
|
|
|
|
static bool btf_show_obj_is_safe(struct btf_show *show, void *data, int size)
|
|
|
|
{
|
|
|
|
return data >= show->obj.data &&
|
|
|
|
(data + size) < (show->obj.data + BTF_SHOW_OBJ_SAFE_SIZE);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If object pointed to by @data of @size falls within our safe buffer, return
|
|
|
|
* the equivalent pointer to the same safe data. Assumes
|
|
|
|
* copy_from_kernel_nofault() has already happened and our safe buffer is
|
|
|
|
* populated.
|
|
|
|
*/
|
|
|
|
static void *__btf_show_obj_safe(struct btf_show *show, void *data, int size)
|
|
|
|
{
|
|
|
|
if (btf_show_obj_is_safe(show, data, size))
|
|
|
|
return show->obj.safe + (data - show->obj.data);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Return a safe-to-access version of data pointed to by @data.
|
|
|
|
* We do this by copying the relevant amount of information
|
|
|
|
* to the struct btf_show obj.safe buffer using copy_from_kernel_nofault().
|
|
|
|
*
|
|
|
|
* If BTF_SHOW_UNSAFE is specified, just return data as-is; no
|
|
|
|
* safe copy is needed.
|
|
|
|
*
|
|
|
|
* Otherwise we need to determine if we have the required amount
|
|
|
|
* of data (determined by the @data pointer and the size of the
|
|
|
|
* largest base type we can encounter (represented by
|
|
|
|
* BTF_SHOW_OBJ_BASE_TYPE_SIZE). Having that much data ensures
|
|
|
|
* that we will be able to print some of the current object,
|
|
|
|
* and if more is needed a copy will be triggered.
|
|
|
|
* Some objects such as structs will not fit into the buffer;
|
|
|
|
* in such cases additional copies when we iterate over their
|
|
|
|
* members may be needed.
|
|
|
|
*
|
|
|
|
* btf_show_obj_safe() is used to return a safe buffer for
|
|
|
|
* btf_show_start_type(); this ensures that as we recurse into
|
|
|
|
* nested types we always have safe data for the given type.
|
|
|
|
* This approach is somewhat wasteful; it's possible for example
|
|
|
|
* that when iterating over a large union we'll end up copying the
|
|
|
|
* same data repeatedly, but the goal is safety not performance.
|
|
|
|
* We use stack data as opposed to per-CPU buffers because the
|
|
|
|
* iteration over a type can take some time, and preemption handling
|
|
|
|
* would greatly complicate use of the safe buffer.
|
|
|
|
*/
|
|
|
|
static void *btf_show_obj_safe(struct btf_show *show,
|
|
|
|
const struct btf_type *t,
|
|
|
|
void *data)
|
|
|
|
{
|
|
|
|
const struct btf_type *rt;
|
|
|
|
int size_left, size;
|
|
|
|
void *safe = NULL;
|
|
|
|
|
|
|
|
if (show->flags & BTF_SHOW_UNSAFE)
|
|
|
|
return data;
|
|
|
|
|
|
|
|
rt = btf_resolve_size(show->btf, t, &size);
|
|
|
|
if (IS_ERR(rt)) {
|
|
|
|
show->state.status = PTR_ERR(rt);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Is this toplevel object? If so, set total object size and
|
|
|
|
* initialize pointers. Otherwise check if we still fall within
|
|
|
|
* our safe object data.
|
|
|
|
*/
|
|
|
|
if (show->state.depth == 0) {
|
|
|
|
show->obj.size = size;
|
|
|
|
show->obj.head = data;
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* If the size of the current object is > our remaining
|
|
|
|
* safe buffer we _may_ need to do a new copy. However
|
|
|
|
* consider the case of a nested struct; it's size pushes
|
|
|
|
* us over the safe buffer limit, but showing any individual
|
|
|
|
* struct members does not. In such cases, we don't need
|
|
|
|
* to initiate a fresh copy yet; however we definitely need
|
|
|
|
* at least BTF_SHOW_OBJ_BASE_TYPE_SIZE bytes left
|
|
|
|
* in our buffer, regardless of the current object size.
|
|
|
|
* The logic here is that as we resolve types we will
|
|
|
|
* hit a base type at some point, and we need to be sure
|
|
|
|
* the next chunk of data is safely available to display
|
|
|
|
* that type info safely. We cannot rely on the size of
|
|
|
|
* the current object here because it may be much larger
|
|
|
|
* than our current buffer (e.g. task_struct is 8k).
|
|
|
|
* All we want to do here is ensure that we can print the
|
|
|
|
* next basic type, which we can if either
|
|
|
|
* - the current type size is within the safe buffer; or
|
|
|
|
* - at least BTF_SHOW_OBJ_BASE_TYPE_SIZE bytes are left in
|
|
|
|
* the safe buffer.
|
|
|
|
*/
|
|
|
|
safe = __btf_show_obj_safe(show, data,
|
|
|
|
min(size,
|
|
|
|
BTF_SHOW_OBJ_BASE_TYPE_SIZE));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We need a new copy to our safe object, either because we haven't
|
2021-05-25 05:56:59 +03:00
|
|
|
* yet copied and are initializing safe data, or because the data
|
2020-09-28 14:31:04 +03:00
|
|
|
* we want falls outside the boundaries of the safe object.
|
|
|
|
*/
|
|
|
|
if (!safe) {
|
|
|
|
size_left = btf_show_obj_size_left(show, data);
|
|
|
|
if (size_left > BTF_SHOW_OBJ_SAFE_SIZE)
|
|
|
|
size_left = BTF_SHOW_OBJ_SAFE_SIZE;
|
|
|
|
show->state.status = copy_from_kernel_nofault(show->obj.safe,
|
|
|
|
data, size_left);
|
|
|
|
if (!show->state.status) {
|
|
|
|
show->obj.data = data;
|
|
|
|
safe = show->obj.safe;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return safe;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set the type we are starting to show and return a safe data pointer
|
|
|
|
* to be used for showing the associated data.
|
|
|
|
*/
|
|
|
|
static void *btf_show_start_type(struct btf_show *show,
|
|
|
|
const struct btf_type *t,
|
|
|
|
u32 type_id, void *data)
|
|
|
|
{
|
|
|
|
show->state.type = t;
|
|
|
|
show->state.type_id = type_id;
|
|
|
|
show->state.name[0] = '\0';
|
|
|
|
|
|
|
|
return btf_show_obj_safe(show, t, data);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void btf_show_end_type(struct btf_show *show)
|
|
|
|
{
|
|
|
|
show->state.type = NULL;
|
|
|
|
show->state.type_id = 0;
|
|
|
|
show->state.name[0] = '\0';
|
|
|
|
}
|
|
|
|
|
|
|
|
static void *btf_show_start_aggr_type(struct btf_show *show,
|
|
|
|
const struct btf_type *t,
|
|
|
|
u32 type_id, void *data)
|
|
|
|
{
|
|
|
|
void *safe_data = btf_show_start_type(show, t, type_id, data);
|
|
|
|
|
|
|
|
if (!safe_data)
|
|
|
|
return safe_data;
|
|
|
|
|
|
|
|
btf_show(show, "%s%s%s", btf_show_indent(show),
|
|
|
|
btf_show_name(show),
|
|
|
|
btf_show_newline(show));
|
|
|
|
show->state.depth++;
|
|
|
|
return safe_data;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void btf_show_end_aggr_type(struct btf_show *show,
|
|
|
|
const char *suffix)
|
|
|
|
{
|
|
|
|
show->state.depth--;
|
|
|
|
btf_show(show, "%s%s%s%s", btf_show_indent(show), suffix,
|
|
|
|
btf_show_delim(show), btf_show_newline(show));
|
|
|
|
btf_show_end_type(show);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void btf_show_start_member(struct btf_show *show,
|
|
|
|
const struct btf_member *m)
|
|
|
|
{
|
|
|
|
show->state.member = m;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void btf_show_start_array_member(struct btf_show *show)
|
|
|
|
{
|
|
|
|
show->state.array_member = 1;
|
|
|
|
btf_show_start_member(show, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void btf_show_end_member(struct btf_show *show)
|
|
|
|
{
|
|
|
|
show->state.member = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void btf_show_end_array_member(struct btf_show *show)
|
|
|
|
{
|
|
|
|
show->state.array_member = 0;
|
|
|
|
btf_show_end_member(show);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void *btf_show_start_array_type(struct btf_show *show,
|
|
|
|
const struct btf_type *t,
|
|
|
|
u32 type_id,
|
|
|
|
u16 array_encoding,
|
|
|
|
void *data)
|
|
|
|
{
|
|
|
|
show->state.array_encoding = array_encoding;
|
|
|
|
show->state.array_terminated = 0;
|
|
|
|
return btf_show_start_aggr_type(show, t, type_id, data);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void btf_show_end_array_type(struct btf_show *show)
|
|
|
|
{
|
|
|
|
show->state.array_encoding = 0;
|
|
|
|
show->state.array_terminated = 0;
|
|
|
|
btf_show_end_aggr_type(show, "]");
|
|
|
|
}
|
|
|
|
|
|
|
|
static void *btf_show_start_struct_type(struct btf_show *show,
|
|
|
|
const struct btf_type *t,
|
|
|
|
u32 type_id,
|
|
|
|
void *data)
|
|
|
|
{
|
|
|
|
return btf_show_start_aggr_type(show, t, type_id, data);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void btf_show_end_struct_type(struct btf_show *show)
|
|
|
|
{
|
|
|
|
btf_show_end_aggr_type(show, "}");
|
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:57 +03:00
|
|
|
__printf(2, 3) static void __btf_verifier_log(struct bpf_verifier_log *log,
|
|
|
|
const char *fmt, ...)
|
|
|
|
{
|
|
|
|
va_list args;
|
|
|
|
|
|
|
|
va_start(args, fmt);
|
|
|
|
bpf_verifier_vlog(log, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
}
|
|
|
|
|
|
|
|
__printf(2, 3) static void btf_verifier_log(struct btf_verifier_env *env,
|
|
|
|
const char *fmt, ...)
|
|
|
|
{
|
|
|
|
struct bpf_verifier_log *log = &env->log;
|
|
|
|
va_list args;
|
|
|
|
|
|
|
|
if (!bpf_verifier_log_needed(log))
|
|
|
|
return;
|
|
|
|
|
|
|
|
va_start(args, fmt);
|
|
|
|
bpf_verifier_vlog(log, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
}
|
|
|
|
|
|
|
|
__printf(4, 5) static void __btf_verifier_log_type(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t,
|
|
|
|
bool log_details,
|
|
|
|
const char *fmt, ...)
|
|
|
|
{
|
|
|
|
struct bpf_verifier_log *log = &env->log;
|
|
|
|
u8 kind = BTF_INFO_KIND(t->info);
|
|
|
|
struct btf *btf = env->btf;
|
|
|
|
va_list args;
|
|
|
|
|
|
|
|
if (!bpf_verifier_log_needed(log))
|
|
|
|
return;
|
|
|
|
|
2019-10-16 06:24:57 +03:00
|
|
|
/* btf verifier prints all types it is processing via
|
|
|
|
* btf_verifier_log_type(..., fmt = NULL).
|
|
|
|
* Skip those prints for in-kernel BTF verification.
|
|
|
|
*/
|
|
|
|
if (log->level == BPF_LOG_KERNEL && !fmt)
|
|
|
|
return;
|
|
|
|
|
2018-04-19 01:55:57 +03:00
|
|
|
__btf_verifier_log(log, "[%u] %s %s%s",
|
|
|
|
env->log_type_id,
|
|
|
|
btf_kind_str[kind],
|
2018-12-13 21:41:46 +03:00
|
|
|
__btf_name_by_offset(btf, t->name_off),
|
2018-04-19 01:55:57 +03:00
|
|
|
log_details ? " " : "");
|
|
|
|
|
|
|
|
if (log_details)
|
|
|
|
btf_type_ops(t)->log_details(env, t);
|
|
|
|
|
|
|
|
if (fmt && *fmt) {
|
|
|
|
__btf_verifier_log(log, " ");
|
|
|
|
va_start(args, fmt);
|
|
|
|
bpf_verifier_vlog(log, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
}
|
|
|
|
|
|
|
|
__btf_verifier_log(log, "\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
#define btf_verifier_log_type(env, t, ...) \
|
|
|
|
__btf_verifier_log_type((env), (t), true, __VA_ARGS__)
|
|
|
|
#define btf_verifier_log_basic(env, t, ...) \
|
|
|
|
__btf_verifier_log_type((env), (t), false, __VA_ARGS__)
|
|
|
|
|
|
|
|
__printf(4, 5)
|
|
|
|
static void btf_verifier_log_member(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *struct_type,
|
|
|
|
const struct btf_member *member,
|
|
|
|
const char *fmt, ...)
|
|
|
|
{
|
|
|
|
struct bpf_verifier_log *log = &env->log;
|
|
|
|
struct btf *btf = env->btf;
|
|
|
|
va_list args;
|
|
|
|
|
|
|
|
if (!bpf_verifier_log_needed(log))
|
|
|
|
return;
|
|
|
|
|
2019-10-16 06:24:57 +03:00
|
|
|
if (log->level == BPF_LOG_KERNEL && !fmt)
|
|
|
|
return;
|
2018-04-19 01:55:58 +03:00
|
|
|
/* The CHECK_META phase already did a btf dump.
|
|
|
|
*
|
|
|
|
* If member is logged again, it must hit an error in
|
|
|
|
* parsing this member. It is useful to print out which
|
|
|
|
* struct this member belongs to.
|
|
|
|
*/
|
|
|
|
if (env->phase != CHECK_META)
|
|
|
|
btf_verifier_log_type(env, struct_type, NULL);
|
|
|
|
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 09:13:51 +03:00
|
|
|
if (btf_type_kflag(struct_type))
|
|
|
|
__btf_verifier_log(log,
|
|
|
|
"\t%s type_id=%u bitfield_size=%u bits_offset=%u",
|
|
|
|
__btf_name_by_offset(btf, member->name_off),
|
|
|
|
member->type,
|
|
|
|
BTF_MEMBER_BITFIELD_SIZE(member->offset),
|
|
|
|
BTF_MEMBER_BIT_OFFSET(member->offset));
|
|
|
|
else
|
|
|
|
__btf_verifier_log(log, "\t%s type_id=%u bits_offset=%u",
|
|
|
|
__btf_name_by_offset(btf, member->name_off),
|
|
|
|
member->type, member->offset);
|
2018-04-19 01:55:57 +03:00
|
|
|
|
|
|
|
if (fmt && *fmt) {
|
|
|
|
__btf_verifier_log(log, " ");
|
|
|
|
va_start(args, fmt);
|
|
|
|
bpf_verifier_vlog(log, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
}
|
|
|
|
|
|
|
|
__btf_verifier_log(log, "\n");
|
|
|
|
}
|
|
|
|
|
2019-04-10 00:20:09 +03:00
|
|
|
__printf(4, 5)
|
|
|
|
static void btf_verifier_log_vsi(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *datasec_type,
|
|
|
|
const struct btf_var_secinfo *vsi,
|
|
|
|
const char *fmt, ...)
|
|
|
|
{
|
|
|
|
struct bpf_verifier_log *log = &env->log;
|
|
|
|
va_list args;
|
|
|
|
|
|
|
|
if (!bpf_verifier_log_needed(log))
|
|
|
|
return;
|
2019-10-16 06:24:57 +03:00
|
|
|
if (log->level == BPF_LOG_KERNEL && !fmt)
|
|
|
|
return;
|
2019-04-10 00:20:09 +03:00
|
|
|
if (env->phase != CHECK_META)
|
|
|
|
btf_verifier_log_type(env, datasec_type, NULL);
|
|
|
|
|
|
|
|
__btf_verifier_log(log, "\t type_id=%u offset=%u size=%u",
|
|
|
|
vsi->type, vsi->offset, vsi->size);
|
|
|
|
if (fmt && *fmt) {
|
|
|
|
__btf_verifier_log(log, " ");
|
|
|
|
va_start(args, fmt);
|
|
|
|
bpf_verifier_vlog(log, fmt, args);
|
|
|
|
va_end(args);
|
|
|
|
}
|
|
|
|
|
|
|
|
__btf_verifier_log(log, "\n");
|
|
|
|
}
|
|
|
|
|
2018-05-23 00:57:18 +03:00
|
|
|
static void btf_verifier_log_hdr(struct btf_verifier_env *env,
|
|
|
|
u32 btf_data_size)
|
2018-04-19 01:55:57 +03:00
|
|
|
{
|
|
|
|
struct bpf_verifier_log *log = &env->log;
|
|
|
|
const struct btf *btf = env->btf;
|
|
|
|
const struct btf_header *hdr;
|
|
|
|
|
|
|
|
if (!bpf_verifier_log_needed(log))
|
|
|
|
return;
|
|
|
|
|
2019-10-16 06:24:57 +03:00
|
|
|
if (log->level == BPF_LOG_KERNEL)
|
|
|
|
return;
|
2018-05-23 00:57:18 +03:00
|
|
|
hdr = &btf->hdr;
|
2018-04-19 01:55:57 +03:00
|
|
|
__btf_verifier_log(log, "magic: 0x%x\n", hdr->magic);
|
|
|
|
__btf_verifier_log(log, "version: %u\n", hdr->version);
|
|
|
|
__btf_verifier_log(log, "flags: 0x%x\n", hdr->flags);
|
2018-05-23 00:57:18 +03:00
|
|
|
__btf_verifier_log(log, "hdr_len: %u\n", hdr->hdr_len);
|
2018-04-19 01:55:57 +03:00
|
|
|
__btf_verifier_log(log, "type_off: %u\n", hdr->type_off);
|
2018-05-23 00:57:18 +03:00
|
|
|
__btf_verifier_log(log, "type_len: %u\n", hdr->type_len);
|
2018-04-19 01:55:57 +03:00
|
|
|
__btf_verifier_log(log, "str_off: %u\n", hdr->str_off);
|
|
|
|
__btf_verifier_log(log, "str_len: %u\n", hdr->str_len);
|
2018-05-23 00:57:18 +03:00
|
|
|
__btf_verifier_log(log, "btf_total_size: %u\n", btf_data_size);
|
2018-04-19 01:55:57 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static int btf_add_type(struct btf_verifier_env *env, struct btf_type *t)
|
|
|
|
{
|
|
|
|
struct btf *btf = env->btf;
|
|
|
|
|
2020-11-10 04:19:28 +03:00
|
|
|
if (btf->types_size == btf->nr_types) {
|
2018-04-19 01:55:57 +03:00
|
|
|
/* Expand 'types' array */
|
|
|
|
|
|
|
|
struct btf_type **new_types;
|
|
|
|
u32 expand_by, new_size;
|
|
|
|
|
2020-11-10 04:19:28 +03:00
|
|
|
if (btf->start_id + btf->types_size == BTF_MAX_TYPE) {
|
2018-04-19 01:55:57 +03:00
|
|
|
btf_verifier_log(env, "Exceeded max num of types");
|
|
|
|
return -E2BIG;
|
|
|
|
}
|
|
|
|
|
|
|
|
expand_by = max_t(u32, btf->types_size >> 2, 16);
|
2018-05-23 00:57:20 +03:00
|
|
|
new_size = min_t(u32, BTF_MAX_TYPE,
|
2018-04-19 01:55:57 +03:00
|
|
|
btf->types_size + expand_by);
|
|
|
|
|
treewide: kvzalloc() -> kvcalloc()
The kvzalloc() function has a 2-factor argument form, kvcalloc(). This
patch replaces cases of:
kvzalloc(a * b, gfp)
with:
kvcalloc(a * b, gfp)
as well as handling cases of:
kvzalloc(a * b * c, gfp)
with:
kvzalloc(array3_size(a, b, c), gfp)
as it's slightly less ugly than:
kvcalloc(array_size(a, b), c, gfp)
This does, however, attempt to ignore constant size factors like:
kvzalloc(4 * 1024, gfp)
though any constants defined via macros get caught up in the conversion.
Any factors with a sizeof() of "unsigned char", "char", and "u8" were
dropped, since they're redundant.
The Coccinelle script used for this was:
// Fix redundant parens around sizeof().
@@
type TYPE;
expression THING, E;
@@
(
kvzalloc(
- (sizeof(TYPE)) * E
+ sizeof(TYPE) * E
, ...)
|
kvzalloc(
- (sizeof(THING)) * E
+ sizeof(THING) * E
, ...)
)
// Drop single-byte sizes and redundant parens.
@@
expression COUNT;
typedef u8;
typedef __u8;
@@
(
kvzalloc(
- sizeof(u8) * (COUNT)
+ COUNT
, ...)
|
kvzalloc(
- sizeof(__u8) * (COUNT)
+ COUNT
, ...)
|
kvzalloc(
- sizeof(char) * (COUNT)
+ COUNT
, ...)
|
kvzalloc(
- sizeof(unsigned char) * (COUNT)
+ COUNT
, ...)
|
kvzalloc(
- sizeof(u8) * COUNT
+ COUNT
, ...)
|
kvzalloc(
- sizeof(__u8) * COUNT
+ COUNT
, ...)
|
kvzalloc(
- sizeof(char) * COUNT
+ COUNT
, ...)
|
kvzalloc(
- sizeof(unsigned char) * COUNT
+ COUNT
, ...)
)
// 2-factor product with sizeof(type/expression) and identifier or constant.
@@
type TYPE;
expression THING;
identifier COUNT_ID;
constant COUNT_CONST;
@@
(
- kvzalloc
+ kvcalloc
(
- sizeof(TYPE) * (COUNT_ID)
+ COUNT_ID, sizeof(TYPE)
, ...)
|
- kvzalloc
+ kvcalloc
(
- sizeof(TYPE) * COUNT_ID
+ COUNT_ID, sizeof(TYPE)
, ...)
|
- kvzalloc
+ kvcalloc
(
- sizeof(TYPE) * (COUNT_CONST)
+ COUNT_CONST, sizeof(TYPE)
, ...)
|
- kvzalloc
+ kvcalloc
(
- sizeof(TYPE) * COUNT_CONST
+ COUNT_CONST, sizeof(TYPE)
, ...)
|
- kvzalloc
+ kvcalloc
(
- sizeof(THING) * (COUNT_ID)
+ COUNT_ID, sizeof(THING)
, ...)
|
- kvzalloc
+ kvcalloc
(
- sizeof(THING) * COUNT_ID
+ COUNT_ID, sizeof(THING)
, ...)
|
- kvzalloc
+ kvcalloc
(
- sizeof(THING) * (COUNT_CONST)
+ COUNT_CONST, sizeof(THING)
, ...)
|
- kvzalloc
+ kvcalloc
(
- sizeof(THING) * COUNT_CONST
+ COUNT_CONST, sizeof(THING)
, ...)
)
// 2-factor product, only identifiers.
@@
identifier SIZE, COUNT;
@@
- kvzalloc
+ kvcalloc
(
- SIZE * COUNT
+ COUNT, SIZE
, ...)
// 3-factor product with 1 sizeof(type) or sizeof(expression), with
// redundant parens removed.
@@
expression THING;
identifier STRIDE, COUNT;
type TYPE;
@@
(
kvzalloc(
- sizeof(TYPE) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kvzalloc(
- sizeof(TYPE) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kvzalloc(
- sizeof(TYPE) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kvzalloc(
- sizeof(TYPE) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kvzalloc(
- sizeof(THING) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kvzalloc(
- sizeof(THING) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kvzalloc(
- sizeof(THING) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kvzalloc(
- sizeof(THING) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
)
// 3-factor product with 2 sizeof(variable), with redundant parens removed.
@@
expression THING1, THING2;
identifier COUNT;
type TYPE1, TYPE2;
@@
(
kvzalloc(
- sizeof(TYPE1) * sizeof(TYPE2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
kvzalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
kvzalloc(
- sizeof(THING1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
kvzalloc(
- sizeof(THING1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
kvzalloc(
- sizeof(TYPE1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
|
kvzalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
)
// 3-factor product, only identifiers, with redundant parens removed.
@@
identifier STRIDE, SIZE, COUNT;
@@
(
kvzalloc(
- (COUNT) * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvzalloc(
- COUNT * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvzalloc(
- COUNT * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvzalloc(
- (COUNT) * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvzalloc(
- COUNT * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvzalloc(
- (COUNT) * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvzalloc(
- (COUNT) * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvzalloc(
- COUNT * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
)
// Any remaining multi-factor products, first at least 3-factor products,
// when they're not all constants...
@@
expression E1, E2, E3;
constant C1, C2, C3;
@@
(
kvzalloc(C1 * C2 * C3, ...)
|
kvzalloc(
- (E1) * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
|
kvzalloc(
- (E1) * (E2) * E3
+ array3_size(E1, E2, E3)
, ...)
|
kvzalloc(
- (E1) * (E2) * (E3)
+ array3_size(E1, E2, E3)
, ...)
|
kvzalloc(
- E1 * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
)
// And then all remaining 2 factors products when they're not all constants,
// keeping sizeof() as the second factor argument.
@@
expression THING, E1, E2;
type TYPE;
constant C1, C2, C3;
@@
(
kvzalloc(sizeof(THING) * C2, ...)
|
kvzalloc(sizeof(TYPE) * C2, ...)
|
kvzalloc(C1 * C2 * C3, ...)
|
kvzalloc(C1 * C2, ...)
|
- kvzalloc
+ kvcalloc
(
- sizeof(TYPE) * (E2)
+ E2, sizeof(TYPE)
, ...)
|
- kvzalloc
+ kvcalloc
(
- sizeof(TYPE) * E2
+ E2, sizeof(TYPE)
, ...)
|
- kvzalloc
+ kvcalloc
(
- sizeof(THING) * (E2)
+ E2, sizeof(THING)
, ...)
|
- kvzalloc
+ kvcalloc
(
- sizeof(THING) * E2
+ E2, sizeof(THING)
, ...)
|
- kvzalloc
+ kvcalloc
(
- (E1) * E2
+ E1, E2
, ...)
|
- kvzalloc
+ kvcalloc
(
- (E1) * (E2)
+ E1, E2
, ...)
|
- kvzalloc
+ kvcalloc
(
- E1 * E2
+ E1, E2
, ...)
)
Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-13 00:04:48 +03:00
|
|
|
new_types = kvcalloc(new_size, sizeof(*new_types),
|
2018-04-19 01:55:57 +03:00
|
|
|
GFP_KERNEL | __GFP_NOWARN);
|
|
|
|
if (!new_types)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2020-11-10 04:19:28 +03:00
|
|
|
if (btf->nr_types == 0) {
|
|
|
|
if (!btf->base_btf) {
|
|
|
|
/* lazily init VOID type */
|
|
|
|
new_types[0] = &btf_void;
|
|
|
|
btf->nr_types++;
|
|
|
|
}
|
|
|
|
} else {
|
2018-04-19 01:55:57 +03:00
|
|
|
memcpy(new_types, btf->types,
|
2020-11-10 04:19:28 +03:00
|
|
|
sizeof(*btf->types) * btf->nr_types);
|
|
|
|
}
|
2018-04-19 01:55:57 +03:00
|
|
|
|
|
|
|
kvfree(btf->types);
|
|
|
|
btf->types = new_types;
|
|
|
|
btf->types_size = new_size;
|
|
|
|
}
|
|
|
|
|
2020-11-10 04:19:28 +03:00
|
|
|
btf->types[btf->nr_types++] = t;
|
2018-04-19 01:55:57 +03:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-05-05 00:49:51 +03:00
|
|
|
static int btf_alloc_id(struct btf *btf)
|
|
|
|
{
|
|
|
|
int id;
|
|
|
|
|
|
|
|
idr_preload(GFP_KERNEL);
|
|
|
|
spin_lock_bh(&btf_idr_lock);
|
|
|
|
id = idr_alloc_cyclic(&btf_idr, btf, 1, INT_MAX, GFP_ATOMIC);
|
|
|
|
if (id > 0)
|
|
|
|
btf->id = id;
|
|
|
|
spin_unlock_bh(&btf_idr_lock);
|
|
|
|
idr_preload_end();
|
|
|
|
|
|
|
|
if (WARN_ON_ONCE(!id))
|
|
|
|
return -ENOSPC;
|
|
|
|
|
|
|
|
return id > 0 ? 0 : id;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void btf_free_id(struct btf *btf)
|
|
|
|
{
|
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* In map-in-map, calling map_delete_elem() on outer
|
|
|
|
* map will call bpf_map_put on the inner map.
|
|
|
|
* It will then eventually call btf_free_id()
|
|
|
|
* on the inner map. Some of the map_delete_elem()
|
|
|
|
* implementation may have irq disabled, so
|
|
|
|
* we need to use the _irqsave() version instead
|
|
|
|
* of the _bh() version.
|
|
|
|
*/
|
|
|
|
spin_lock_irqsave(&btf_idr_lock, flags);
|
|
|
|
idr_remove(&btf_idr, btf->id);
|
|
|
|
spin_unlock_irqrestore(&btf_idr_lock, flags);
|
|
|
|
}
|
|
|
|
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 19:39:45 +03:00
|
|
|
static void btf_free_kfunc_set_tab(struct btf *btf)
|
|
|
|
{
|
|
|
|
struct btf_kfunc_set_tab *tab = btf->kfunc_set_tab;
|
|
|
|
int hook, type;
|
|
|
|
|
|
|
|
if (!tab)
|
|
|
|
return;
|
|
|
|
/* For module BTF, we directly assign the sets being registered, so
|
|
|
|
* there is nothing to free except kfunc_set_tab.
|
|
|
|
*/
|
|
|
|
if (btf_is_module(btf))
|
|
|
|
goto free_tab;
|
|
|
|
for (hook = 0; hook < ARRAY_SIZE(tab->sets); hook++) {
|
|
|
|
for (type = 0; type < ARRAY_SIZE(tab->sets[0]); type++)
|
|
|
|
kfree(tab->sets[hook][type]);
|
|
|
|
}
|
|
|
|
free_tab:
|
|
|
|
kfree(tab);
|
|
|
|
btf->kfunc_set_tab = NULL;
|
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:57 +03:00
|
|
|
static void btf_free(struct btf *btf)
|
|
|
|
{
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 19:39:45 +03:00
|
|
|
btf_free_kfunc_set_tab(btf);
|
2018-04-19 01:55:57 +03:00
|
|
|
kvfree(btf->types);
|
2018-04-19 01:55:58 +03:00
|
|
|
kvfree(btf->resolved_sizes);
|
|
|
|
kvfree(btf->resolved_ids);
|
2018-04-19 01:55:57 +03:00
|
|
|
kvfree(btf->data);
|
|
|
|
kfree(btf);
|
|
|
|
}
|
|
|
|
|
2018-05-05 00:49:51 +03:00
|
|
|
static void btf_free_rcu(struct rcu_head *rcu)
|
2018-04-19 01:56:01 +03:00
|
|
|
{
|
2018-05-05 00:49:51 +03:00
|
|
|
struct btf *btf = container_of(rcu, struct btf, rcu);
|
|
|
|
|
|
|
|
btf_free(btf);
|
2018-04-19 01:56:01 +03:00
|
|
|
}
|
|
|
|
|
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-03 23:46:29 +03:00
|
|
|
void btf_get(struct btf *btf)
|
|
|
|
{
|
|
|
|
refcount_inc(&btf->refcnt);
|
|
|
|
}
|
|
|
|
|
2018-04-19 01:56:01 +03:00
|
|
|
void btf_put(struct btf *btf)
|
|
|
|
{
|
2018-05-05 00:49:51 +03:00
|
|
|
if (btf && refcount_dec_and_test(&btf->refcnt)) {
|
|
|
|
btf_free_id(btf);
|
|
|
|
call_rcu(&btf->rcu, btf_free_rcu);
|
|
|
|
}
|
2018-04-19 01:56:01 +03:00
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:58 +03:00
|
|
|
static int env_resolve_init(struct btf_verifier_env *env)
|
|
|
|
{
|
|
|
|
struct btf *btf = env->btf;
|
|
|
|
u32 nr_types = btf->nr_types;
|
|
|
|
u32 *resolved_sizes = NULL;
|
|
|
|
u32 *resolved_ids = NULL;
|
|
|
|
u8 *visit_states = NULL;
|
|
|
|
|
2020-11-10 04:19:28 +03:00
|
|
|
resolved_sizes = kvcalloc(nr_types, sizeof(*resolved_sizes),
|
2018-04-19 01:55:58 +03:00
|
|
|
GFP_KERNEL | __GFP_NOWARN);
|
|
|
|
if (!resolved_sizes)
|
|
|
|
goto nomem;
|
|
|
|
|
2020-11-10 04:19:28 +03:00
|
|
|
resolved_ids = kvcalloc(nr_types, sizeof(*resolved_ids),
|
2018-04-19 01:55:58 +03:00
|
|
|
GFP_KERNEL | __GFP_NOWARN);
|
|
|
|
if (!resolved_ids)
|
|
|
|
goto nomem;
|
|
|
|
|
2020-11-10 04:19:28 +03:00
|
|
|
visit_states = kvcalloc(nr_types, sizeof(*visit_states),
|
2018-04-19 01:55:58 +03:00
|
|
|
GFP_KERNEL | __GFP_NOWARN);
|
|
|
|
if (!visit_states)
|
|
|
|
goto nomem;
|
|
|
|
|
|
|
|
btf->resolved_sizes = resolved_sizes;
|
|
|
|
btf->resolved_ids = resolved_ids;
|
|
|
|
env->visit_states = visit_states;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
nomem:
|
|
|
|
kvfree(resolved_sizes);
|
|
|
|
kvfree(resolved_ids);
|
|
|
|
kvfree(visit_states);
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:57 +03:00
|
|
|
static void btf_verifier_env_free(struct btf_verifier_env *env)
|
|
|
|
{
|
2018-04-19 01:55:58 +03:00
|
|
|
kvfree(env->visit_states);
|
2018-04-19 01:55:57 +03:00
|
|
|
kfree(env);
|
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:58 +03:00
|
|
|
static bool env_type_is_resolve_sink(const struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *next_type)
|
|
|
|
{
|
|
|
|
switch (env->resolve_mode) {
|
|
|
|
case RESOLVE_TBD:
|
|
|
|
/* int, enum or void is a sink */
|
|
|
|
return !btf_type_needs_resolve(next_type);
|
|
|
|
case RESOLVE_PTR:
|
2018-11-20 02:29:08 +03:00
|
|
|
/* int, enum, void, struct, array, func or func_proto is a sink
|
|
|
|
* for ptr
|
|
|
|
*/
|
2018-04-19 01:55:58 +03:00
|
|
|
return !btf_type_is_modifier(next_type) &&
|
|
|
|
!btf_type_is_ptr(next_type);
|
|
|
|
case RESOLVE_STRUCT_OR_ARRAY:
|
2018-11-20 02:29:08 +03:00
|
|
|
/* int, enum, void, ptr, func or func_proto is a sink
|
|
|
|
* for struct and array
|
|
|
|
*/
|
2018-04-19 01:55:58 +03:00
|
|
|
return !btf_type_is_modifier(next_type) &&
|
|
|
|
!btf_type_is_array(next_type) &&
|
|
|
|
!btf_type_is_struct(next_type);
|
|
|
|
default:
|
2018-05-26 00:33:19 +03:00
|
|
|
BUG();
|
2018-04-19 01:55:58 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool env_type_is_resolved(const struct btf_verifier_env *env,
|
|
|
|
u32 type_id)
|
|
|
|
{
|
2020-11-10 04:19:28 +03:00
|
|
|
/* base BTF types should be resolved by now */
|
|
|
|
if (type_id < env->btf->start_id)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
return env->visit_states[type_id - env->btf->start_id] == RESOLVED;
|
2018-04-19 01:55:58 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static int env_stack_push(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t, u32 type_id)
|
|
|
|
{
|
2020-11-10 04:19:28 +03:00
|
|
|
const struct btf *btf = env->btf;
|
2018-04-19 01:55:58 +03:00
|
|
|
struct resolve_vertex *v;
|
|
|
|
|
|
|
|
if (env->top_stack == MAX_RESOLVE_DEPTH)
|
|
|
|
return -E2BIG;
|
|
|
|
|
2020-11-10 04:19:28 +03:00
|
|
|
if (type_id < btf->start_id
|
|
|
|
|| env->visit_states[type_id - btf->start_id] != NOT_VISITED)
|
2018-04-19 01:55:58 +03:00
|
|
|
return -EEXIST;
|
|
|
|
|
2020-11-10 04:19:28 +03:00
|
|
|
env->visit_states[type_id - btf->start_id] = VISITED;
|
2018-04-19 01:55:58 +03:00
|
|
|
|
|
|
|
v = &env->stack[env->top_stack++];
|
|
|
|
v->t = t;
|
|
|
|
v->type_id = type_id;
|
|
|
|
v->next_member = 0;
|
|
|
|
|
|
|
|
if (env->resolve_mode == RESOLVE_TBD) {
|
|
|
|
if (btf_type_is_ptr(t))
|
|
|
|
env->resolve_mode = RESOLVE_PTR;
|
|
|
|
else if (btf_type_is_struct(t) || btf_type_is_array(t))
|
|
|
|
env->resolve_mode = RESOLVE_STRUCT_OR_ARRAY;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void env_stack_set_next_member(struct btf_verifier_env *env,
|
|
|
|
u16 next_member)
|
|
|
|
{
|
|
|
|
env->stack[env->top_stack - 1].next_member = next_member;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void env_stack_pop_resolved(struct btf_verifier_env *env,
|
|
|
|
u32 resolved_type_id,
|
|
|
|
u32 resolved_size)
|
|
|
|
{
|
|
|
|
u32 type_id = env->stack[--(env->top_stack)].type_id;
|
|
|
|
struct btf *btf = env->btf;
|
|
|
|
|
2020-11-10 04:19:28 +03:00
|
|
|
type_id -= btf->start_id; /* adjust to local type id */
|
2018-04-19 01:55:58 +03:00
|
|
|
btf->resolved_sizes[type_id] = resolved_size;
|
|
|
|
btf->resolved_ids[type_id] = resolved_type_id;
|
|
|
|
env->visit_states[type_id] = RESOLVED;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct resolve_vertex *env_stack_peak(struct btf_verifier_env *env)
|
|
|
|
{
|
|
|
|
return env->top_stack ? &env->stack[env->top_stack - 1] : NULL;
|
|
|
|
}
|
|
|
|
|
2019-11-07 21:09:03 +03:00
|
|
|
/* Resolve the size of a passed-in "type"
|
|
|
|
*
|
|
|
|
* type: is an array (e.g. u32 array[x][y])
|
|
|
|
* return type: type "u32[x][y]", i.e. BTF_KIND_ARRAY,
|
|
|
|
* *type_size: (x * y * sizeof(u32)). Hence, *type_size always
|
|
|
|
* corresponds to the return type.
|
|
|
|
* *elem_type: u32
|
2020-08-25 22:21:14 +03:00
|
|
|
* *elem_id: id of u32
|
2019-11-07 21:09:03 +03:00
|
|
|
* *total_nelems: (x * y). Hence, individual elem size is
|
|
|
|
* (*type_size / *total_nelems)
|
2020-08-25 22:21:15 +03:00
|
|
|
* *type_id: id of type if it's changed within the function, 0 if not
|
2019-11-07 21:09:03 +03:00
|
|
|
*
|
|
|
|
* type: is not an array (e.g. const struct X)
|
|
|
|
* return type: type "struct X"
|
|
|
|
* *type_size: sizeof(struct X)
|
|
|
|
* *elem_type: same as return type ("struct X")
|
2020-08-25 22:21:14 +03:00
|
|
|
* *elem_id: 0
|
2019-11-07 21:09:03 +03:00
|
|
|
* *total_nelems: 1
|
2020-08-25 22:21:15 +03:00
|
|
|
* *type_id: id of type if it's changed within the function, 0 if not
|
2019-11-07 21:09:03 +03:00
|
|
|
*/
|
2020-08-25 22:21:13 +03:00
|
|
|
static const struct btf_type *
|
|
|
|
__btf_resolve_size(const struct btf *btf, const struct btf_type *type,
|
|
|
|
u32 *type_size, const struct btf_type **elem_type,
|
2020-08-25 22:21:15 +03:00
|
|
|
u32 *elem_id, u32 *total_nelems, u32 *type_id)
|
2019-11-07 21:09:03 +03:00
|
|
|
{
|
|
|
|
const struct btf_type *array_type = NULL;
|
2020-08-25 22:21:14 +03:00
|
|
|
const struct btf_array *array = NULL;
|
2020-08-25 22:21:15 +03:00
|
|
|
u32 i, size, nelems = 1, id = 0;
|
2019-11-07 21:09:03 +03:00
|
|
|
|
|
|
|
for (i = 0; i < MAX_RESOLVE_DEPTH; i++) {
|
|
|
|
switch (BTF_INFO_KIND(type->info)) {
|
|
|
|
/* type->size can be used */
|
|
|
|
case BTF_KIND_INT:
|
|
|
|
case BTF_KIND_STRUCT:
|
|
|
|
case BTF_KIND_UNION:
|
|
|
|
case BTF_KIND_ENUM:
|
2021-02-26 23:22:52 +03:00
|
|
|
case BTF_KIND_FLOAT:
|
2019-11-07 21:09:03 +03:00
|
|
|
size = type->size;
|
|
|
|
goto resolved;
|
|
|
|
|
|
|
|
case BTF_KIND_PTR:
|
|
|
|
size = sizeof(void *);
|
|
|
|
goto resolved;
|
|
|
|
|
|
|
|
/* Modifiers */
|
|
|
|
case BTF_KIND_TYPEDEF:
|
|
|
|
case BTF_KIND_VOLATILE:
|
|
|
|
case BTF_KIND_CONST:
|
|
|
|
case BTF_KIND_RESTRICT:
|
2021-11-12 04:26:09 +03:00
|
|
|
case BTF_KIND_TYPE_TAG:
|
2020-08-25 22:21:15 +03:00
|
|
|
id = type->type;
|
2019-11-07 21:09:03 +03:00
|
|
|
type = btf_type_by_id(btf, type->type);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case BTF_KIND_ARRAY:
|
|
|
|
if (!array_type)
|
|
|
|
array_type = type;
|
|
|
|
array = btf_type_array(type);
|
|
|
|
if (nelems && array->nelems > U32_MAX / nelems)
|
|
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
nelems *= array->nelems;
|
|
|
|
type = btf_type_by_id(btf, array->type);
|
|
|
|
break;
|
|
|
|
|
|
|
|
/* type without size */
|
|
|
|
default:
|
|
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
|
|
|
|
resolved:
|
|
|
|
if (nelems && size > U32_MAX / nelems)
|
|
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
|
|
|
|
*type_size = nelems * size;
|
bpf: Introduce BPF_MAP_TYPE_STRUCT_OPS
The patch introduces BPF_MAP_TYPE_STRUCT_OPS. The map value
is a kernel struct with its func ptr implemented in bpf prog.
This new map is the interface to register/unregister/introspect
a bpf implemented kernel struct.
The kernel struct is actually embedded inside another new struct
(or called the "value" struct in the code). For example,
"struct tcp_congestion_ops" is embbeded in:
struct bpf_struct_ops_tcp_congestion_ops {
refcount_t refcnt;
enum bpf_struct_ops_state state;
struct tcp_congestion_ops data; /* <-- kernel subsystem struct here */
}
The map value is "struct bpf_struct_ops_tcp_congestion_ops".
The "bpftool map dump" will then be able to show the
state ("inuse"/"tobefree") and the number of subsystem's refcnt (e.g.
number of tcp_sock in the tcp_congestion_ops case). This "value" struct
is created automatically by a macro. Having a separate "value" struct
will also make extending "struct bpf_struct_ops_XYZ" easier (e.g. adding
"void (*init)(void)" to "struct bpf_struct_ops_XYZ" to do some
initialization works before registering the struct_ops to the kernel
subsystem). The libbpf will take care of finding and populating the
"struct bpf_struct_ops_XYZ" from "struct XYZ".
Register a struct_ops to a kernel subsystem:
1. Load all needed BPF_PROG_TYPE_STRUCT_OPS prog(s)
2. Create a BPF_MAP_TYPE_STRUCT_OPS with attr->btf_vmlinux_value_type_id
set to the btf id "struct bpf_struct_ops_tcp_congestion_ops" of the
running kernel.
Instead of reusing the attr->btf_value_type_id,
btf_vmlinux_value_type_id s added such that attr->btf_fd can still be
used as the "user" btf which could store other useful sysadmin/debug
info that may be introduced in the furture,
e.g. creation-date/compiler-details/map-creator...etc.
3. Create a "struct bpf_struct_ops_tcp_congestion_ops" object as described
in the running kernel btf. Populate the value of this object.
The function ptr should be populated with the prog fds.
4. Call BPF_MAP_UPDATE with the object created in (3) as
the map value. The key is always "0".
During BPF_MAP_UPDATE, the code that saves the kernel-func-ptr's
args as an array of u64 is generated. BPF_MAP_UPDATE also allows
the specific struct_ops to do some final checks in "st_ops->init_member()"
(e.g. ensure all mandatory func ptrs are implemented).
If everything looks good, it will register this kernel struct
to the kernel subsystem. The map will not allow further update
from this point.
Unregister a struct_ops from the kernel subsystem:
BPF_MAP_DELETE with key "0".
Introspect a struct_ops:
BPF_MAP_LOOKUP_ELEM with key "0". The map value returned will
have the prog _id_ populated as the func ptr.
The map value state (enum bpf_struct_ops_state) will transit from:
INIT (map created) =>
INUSE (map updated, i.e. reg) =>
TOBEFREE (map value deleted, i.e. unreg)
The kernel subsystem needs to call bpf_struct_ops_get() and
bpf_struct_ops_put() to manage the "refcnt" in the
"struct bpf_struct_ops_XYZ". This patch uses a separate refcnt
for the purose of tracking the subsystem usage. Another approach
is to reuse the map->refcnt and then "show" (i.e. during map_lookup)
the subsystem's usage by doing map->refcnt - map->usercnt to filter out
the map-fd/pinned-map usage. However, that will also tie down the
future semantics of map->refcnt and map->usercnt.
The very first subsystem's refcnt (during reg()) holds one
count to map->refcnt. When the very last subsystem's refcnt
is gone, it will also release the map->refcnt. All bpf_prog will be
freed when the map->refcnt reaches 0 (i.e. during map_free()).
Here is how the bpftool map command will look like:
[root@arch-fb-vm1 bpf]# bpftool map show
6: struct_ops name dctcp flags 0x0
key 4B value 256B max_entries 1 memlock 4096B
btf_id 6
[root@arch-fb-vm1 bpf]# bpftool map dump id 6
[{
"value": {
"refcnt": {
"refs": {
"counter": 1
}
},
"state": 1,
"data": {
"list": {
"next": 0,
"prev": 0
},
"key": 0,
"flags": 2,
"init": 24,
"release": 0,
"ssthresh": 25,
"cong_avoid": 30,
"set_state": 27,
"cwnd_event": 28,
"in_ack_event": 26,
"undo_cwnd": 29,
"pkts_acked": 0,
"min_tso_segs": 0,
"sndbuf_expand": 0,
"cong_control": 0,
"get_info": 0,
"name": [98,112,102,95,100,99,116,99,112,0,0,0,0,0,0,0
],
"owner": 0
}
}
}
]
Misc Notes:
* bpf_struct_ops_map_sys_lookup_elem() is added for syscall lookup.
It does an inplace update on "*value" instead returning a pointer
to syscall.c. Otherwise, it needs a separate copy of "zero" value
for the BPF_STRUCT_OPS_STATE_INIT to avoid races.
* The bpf_struct_ops_map_delete_elem() is also called without
preempt_disable() from map_delete_elem(). It is because
the "->unreg()" may requires sleepable context, e.g.
the "tcp_unregister_congestion_control()".
* "const" is added to some of the existing "struct btf_func_model *"
function arg to avoid a compiler warning caused by this patch.
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200109003505.3855919-1-kafai@fb.com
2020-01-09 03:35:05 +03:00
|
|
|
if (total_nelems)
|
|
|
|
*total_nelems = nelems;
|
|
|
|
if (elem_type)
|
|
|
|
*elem_type = type;
|
2020-08-25 22:21:14 +03:00
|
|
|
if (elem_id)
|
|
|
|
*elem_id = array ? array->type : 0;
|
2020-08-25 22:21:15 +03:00
|
|
|
if (type_id && id)
|
|
|
|
*type_id = id;
|
2019-11-07 21:09:03 +03:00
|
|
|
|
|
|
|
return array_type ? : type;
|
|
|
|
}
|
|
|
|
|
2020-08-25 22:21:13 +03:00
|
|
|
const struct btf_type *
|
|
|
|
btf_resolve_size(const struct btf *btf, const struct btf_type *type,
|
|
|
|
u32 *type_size)
|
|
|
|
{
|
2020-08-25 22:21:15 +03:00
|
|
|
return __btf_resolve_size(btf, type, type_size, NULL, NULL, NULL, NULL);
|
2020-08-25 22:21:13 +03:00
|
|
|
}
|
|
|
|
|
2020-11-10 04:19:28 +03:00
|
|
|
static u32 btf_resolved_type_id(const struct btf *btf, u32 type_id)
|
|
|
|
{
|
|
|
|
while (type_id < btf->start_id)
|
|
|
|
btf = btf->base_btf;
|
|
|
|
|
|
|
|
return btf->resolved_ids[type_id - btf->start_id];
|
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:58 +03:00
|
|
|
/* The input param "type_id" must point to a needs_resolve type */
|
|
|
|
static const struct btf_type *btf_type_id_resolve(const struct btf *btf,
|
|
|
|
u32 *type_id)
|
|
|
|
{
|
2020-11-10 04:19:28 +03:00
|
|
|
*type_id = btf_resolved_type_id(btf, *type_id);
|
2018-04-19 01:55:58 +03:00
|
|
|
return btf_type_by_id(btf, *type_id);
|
|
|
|
}
|
|
|
|
|
2020-11-10 04:19:28 +03:00
|
|
|
static u32 btf_resolved_type_size(const struct btf *btf, u32 type_id)
|
|
|
|
{
|
|
|
|
while (type_id < btf->start_id)
|
|
|
|
btf = btf->base_btf;
|
|
|
|
|
|
|
|
return btf->resolved_sizes[type_id - btf->start_id];
|
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:58 +03:00
|
|
|
const struct btf_type *btf_type_id_size(const struct btf *btf,
|
|
|
|
u32 *type_id, u32 *ret_size)
|
|
|
|
{
|
|
|
|
const struct btf_type *size_type;
|
|
|
|
u32 size_type_id = *type_id;
|
|
|
|
u32 size = 0;
|
|
|
|
|
|
|
|
size_type = btf_type_by_id(btf, size_type_id);
|
2018-11-20 02:29:06 +03:00
|
|
|
if (btf_type_nosize_or_null(size_type))
|
2018-04-19 01:55:58 +03:00
|
|
|
return NULL;
|
|
|
|
|
|
|
|
if (btf_type_has_size(size_type)) {
|
|
|
|
size = size_type->size;
|
|
|
|
} else if (btf_type_is_array(size_type)) {
|
2020-11-10 04:19:28 +03:00
|
|
|
size = btf_resolved_type_size(btf, size_type_id);
|
2018-04-19 01:55:58 +03:00
|
|
|
} else if (btf_type_is_ptr(size_type)) {
|
|
|
|
size = sizeof(void *);
|
|
|
|
} else {
|
2019-04-10 00:20:09 +03:00
|
|
|
if (WARN_ON_ONCE(!btf_type_is_modifier(size_type) &&
|
|
|
|
!btf_type_is_var(size_type)))
|
2018-04-19 01:55:58 +03:00
|
|
|
return NULL;
|
|
|
|
|
2020-11-10 04:19:28 +03:00
|
|
|
size_type_id = btf_resolved_type_id(btf, size_type_id);
|
2018-04-19 01:55:58 +03:00
|
|
|
size_type = btf_type_by_id(btf, size_type_id);
|
2018-11-20 02:29:06 +03:00
|
|
|
if (btf_type_nosize_or_null(size_type))
|
2018-04-19 01:55:58 +03:00
|
|
|
return NULL;
|
bpf: fix BTF verifier size resolution logic
BTF verifier has a size resolution bug which in some circumstances leads to
invalid size resolution for, e.g., TYPEDEF modifier. This happens if we have
[1] PTR -> [2] TYPEDEF -> [3] ARRAY, in which case due to being in pointer
context ARRAY size won't be resolved (because for pointer it doesn't matter, so
it's a sink in pointer context), but it will be permanently remembered as zero
for TYPEDEF and TYPEDEF will be marked as RESOLVED. Eventually ARRAY size will
be resolved correctly, but TYPEDEF resolved_size won't be updated anymore.
This, subsequently, will lead to erroneous map creation failure, if that
TYPEDEF is specified as either key or value, as key_size/value_size won't
correspond to resolved size of TYPEDEF (kernel will believe it's zero).
Note, that if BTF was ordered as [1] ARRAY <- [2] TYPEDEF <- [3] PTR, this
won't be a problem, as by the time we get to TYPEDEF, ARRAY's size is already
calculated and stored.
This bug manifests itself in rejecting BTF-defined maps that use array
typedef as a value type:
typedef int array_t[16];
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(value, array_t); /* i.e., array_t *value; */
} test_map SEC(".maps");
The fix consists on not relying on modifier's resolved_size and instead using
modifier's resolved_id (type ID for "concrete" type to which modifier
eventually resolves) and doing size determination for that resolved type. This
allow to preserve existing "early DFS termination" logic for PTR or
STRUCT_OR_ARRAY contexts, but still do correct size determination for modifier
types.
Fixes: eb3f595dab40 ("bpf: btf: Validate type reference")
Cc: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-07-12 20:25:55 +03:00
|
|
|
else if (btf_type_has_size(size_type))
|
|
|
|
size = size_type->size;
|
|
|
|
else if (btf_type_is_array(size_type))
|
2020-11-10 04:19:28 +03:00
|
|
|
size = btf_resolved_type_size(btf, size_type_id);
|
bpf: fix BTF verifier size resolution logic
BTF verifier has a size resolution bug which in some circumstances leads to
invalid size resolution for, e.g., TYPEDEF modifier. This happens if we have
[1] PTR -> [2] TYPEDEF -> [3] ARRAY, in which case due to being in pointer
context ARRAY size won't be resolved (because for pointer it doesn't matter, so
it's a sink in pointer context), but it will be permanently remembered as zero
for TYPEDEF and TYPEDEF will be marked as RESOLVED. Eventually ARRAY size will
be resolved correctly, but TYPEDEF resolved_size won't be updated anymore.
This, subsequently, will lead to erroneous map creation failure, if that
TYPEDEF is specified as either key or value, as key_size/value_size won't
correspond to resolved size of TYPEDEF (kernel will believe it's zero).
Note, that if BTF was ordered as [1] ARRAY <- [2] TYPEDEF <- [3] PTR, this
won't be a problem, as by the time we get to TYPEDEF, ARRAY's size is already
calculated and stored.
This bug manifests itself in rejecting BTF-defined maps that use array
typedef as a value type:
typedef int array_t[16];
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(value, array_t); /* i.e., array_t *value; */
} test_map SEC(".maps");
The fix consists on not relying on modifier's resolved_size and instead using
modifier's resolved_id (type ID for "concrete" type to which modifier
eventually resolves) and doing size determination for that resolved type. This
allow to preserve existing "early DFS termination" logic for PTR or
STRUCT_OR_ARRAY contexts, but still do correct size determination for modifier
types.
Fixes: eb3f595dab40 ("bpf: btf: Validate type reference")
Cc: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-07-12 20:25:55 +03:00
|
|
|
else if (btf_type_is_ptr(size_type))
|
|
|
|
size = sizeof(void *);
|
|
|
|
else
|
|
|
|
return NULL;
|
2018-04-19 01:55:58 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
*type_id = size_type_id;
|
|
|
|
if (ret_size)
|
|
|
|
*ret_size = size;
|
|
|
|
|
|
|
|
return size_type;
|
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:59 +03:00
|
|
|
static int btf_df_check_member(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *struct_type,
|
|
|
|
const struct btf_member *member,
|
|
|
|
const struct btf_type *member_type)
|
|
|
|
{
|
|
|
|
btf_verifier_log_basic(env, struct_type,
|
|
|
|
"Unsupported check_member");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 09:13:51 +03:00
|
|
|
static int btf_df_check_kflag_member(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *struct_type,
|
|
|
|
const struct btf_member *member,
|
|
|
|
const struct btf_type *member_type)
|
|
|
|
{
|
|
|
|
btf_verifier_log_basic(env, struct_type,
|
|
|
|
"Unsupported check_kflag_member");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2021-02-26 23:22:52 +03:00
|
|
|
/* Used for ptr, array struct/union and float type members.
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 09:13:51 +03:00
|
|
|
* int, enum and modifier types have their specific callback functions.
|
|
|
|
*/
|
|
|
|
static int btf_generic_check_kflag_member(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *struct_type,
|
|
|
|
const struct btf_member *member,
|
|
|
|
const struct btf_type *member_type)
|
|
|
|
{
|
|
|
|
if (BTF_MEMBER_BITFIELD_SIZE(member->offset)) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Invalid member bitfield_size");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* bitfield size is 0, so member->offset represents bit offset only.
|
|
|
|
* It is safe to call non kflag check_member variants.
|
|
|
|
*/
|
|
|
|
return btf_type_ops(member_type)->check_member(env, struct_type,
|
|
|
|
member,
|
|
|
|
member_type);
|
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:58 +03:00
|
|
|
static int btf_df_resolve(struct btf_verifier_env *env,
|
|
|
|
const struct resolve_vertex *v)
|
|
|
|
{
|
|
|
|
btf_verifier_log_basic(env, v->t, "Unsupported resolve");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2020-09-28 14:31:04 +03:00
|
|
|
static void btf_df_show(const struct btf *btf, const struct btf_type *t,
|
|
|
|
u32 type_id, void *data, u8 bits_offsets,
|
|
|
|
struct btf_show *show)
|
2018-04-19 01:56:00 +03:00
|
|
|
{
|
2020-09-28 14:31:04 +03:00
|
|
|
btf_show(show, "<unsupported kind:%u>", BTF_INFO_KIND(t->info));
|
2018-04-19 01:56:00 +03:00
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:59 +03:00
|
|
|
static int btf_int_check_member(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *struct_type,
|
|
|
|
const struct btf_member *member,
|
|
|
|
const struct btf_type *member_type)
|
|
|
|
{
|
|
|
|
u32 int_data = btf_type_int(member_type);
|
|
|
|
u32 struct_bits_off = member->offset;
|
|
|
|
u32 struct_size = struct_type->size;
|
|
|
|
u32 nr_copy_bits;
|
|
|
|
u32 bytes_offset;
|
|
|
|
|
|
|
|
if (U32_MAX - struct_bits_off < BTF_INT_OFFSET(int_data)) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"bits_offset exceeds U32_MAX");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct_bits_off += BTF_INT_OFFSET(int_data);
|
|
|
|
bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off);
|
|
|
|
nr_copy_bits = BTF_INT_BITS(int_data) +
|
|
|
|
BITS_PER_BYTE_MASKED(struct_bits_off);
|
|
|
|
|
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-16 04:07:47 +03:00
|
|
|
if (nr_copy_bits > BITS_PER_U128) {
|
2018-04-19 01:55:59 +03:00
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-16 04:07:47 +03:00
|
|
|
"nr_copy_bits exceeds 128");
|
2018-04-19 01:55:59 +03:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (struct_size < bytes_offset ||
|
|
|
|
struct_size - bytes_offset < BITS_ROUNDUP_BYTES(nr_copy_bits)) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Member exceeds struct_size");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 09:13:51 +03:00
|
|
|
static int btf_int_check_kflag_member(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *struct_type,
|
|
|
|
const struct btf_member *member,
|
|
|
|
const struct btf_type *member_type)
|
|
|
|
{
|
|
|
|
u32 struct_bits_off, nr_bits, nr_int_data_bits, bytes_offset;
|
|
|
|
u32 int_data = btf_type_int(member_type);
|
|
|
|
u32 struct_size = struct_type->size;
|
|
|
|
u32 nr_copy_bits;
|
|
|
|
|
|
|
|
/* a regular int type is required for the kflag int member */
|
|
|
|
if (!btf_type_int_is_regular(member_type)) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Invalid member base type");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check sanity of bitfield size */
|
|
|
|
nr_bits = BTF_MEMBER_BITFIELD_SIZE(member->offset);
|
|
|
|
struct_bits_off = BTF_MEMBER_BIT_OFFSET(member->offset);
|
|
|
|
nr_int_data_bits = BTF_INT_BITS(int_data);
|
|
|
|
if (!nr_bits) {
|
|
|
|
/* Not a bitfield member, member offset must be at byte
|
|
|
|
* boundary.
|
|
|
|
*/
|
|
|
|
if (BITS_PER_BYTE_MASKED(struct_bits_off)) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Invalid member offset");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
nr_bits = nr_int_data_bits;
|
|
|
|
} else if (nr_bits > nr_int_data_bits) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Invalid member bitfield_size");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off);
|
|
|
|
nr_copy_bits = nr_bits + BITS_PER_BYTE_MASKED(struct_bits_off);
|
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-16 04:07:47 +03:00
|
|
|
if (nr_copy_bits > BITS_PER_U128) {
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 09:13:51 +03:00
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-16 04:07:47 +03:00
|
|
|
"nr_copy_bits exceeds 128");
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 09:13:51 +03:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (struct_size < bytes_offset ||
|
|
|
|
struct_size - bytes_offset < BITS_ROUNDUP_BYTES(nr_copy_bits)) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Member exceeds struct_size");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:57 +03:00
|
|
|
static s32 btf_int_check_meta(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t,
|
|
|
|
u32 meta_left)
|
|
|
|
{
|
|
|
|
u32 int_data, nr_bits, meta_needed = sizeof(int_data);
|
|
|
|
u16 encoding;
|
|
|
|
|
|
|
|
if (meta_left < meta_needed) {
|
|
|
|
btf_verifier_log_basic(env, t,
|
|
|
|
"meta_left:%u meta_needed:%u",
|
|
|
|
meta_left, meta_needed);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (btf_type_vlen(t)) {
|
|
|
|
btf_verifier_log_type(env, t, "vlen != 0");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 09:13:51 +03:00
|
|
|
if (btf_type_kflag(t)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:57 +03:00
|
|
|
int_data = btf_type_int(t);
|
2018-05-23 00:57:20 +03:00
|
|
|
if (int_data & ~BTF_INT_MASK) {
|
|
|
|
btf_verifier_log_basic(env, t, "Invalid int_data:%x",
|
|
|
|
int_data);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:57 +03:00
|
|
|
nr_bits = BTF_INT_BITS(int_data) + BTF_INT_OFFSET(int_data);
|
|
|
|
|
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-16 04:07:47 +03:00
|
|
|
if (nr_bits > BITS_PER_U128) {
|
2018-04-19 01:55:57 +03:00
|
|
|
btf_verifier_log_type(env, t, "nr_bits exceeds %zu",
|
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-16 04:07:47 +03:00
|
|
|
BITS_PER_U128);
|
2018-04-19 01:55:57 +03:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (BITS_ROUNDUP_BYTES(nr_bits) > t->size) {
|
|
|
|
btf_verifier_log_type(env, t, "nr_bits exceeds type_size");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-05-23 00:57:20 +03:00
|
|
|
/*
|
|
|
|
* Only one of the encoding bits is allowed and it
|
|
|
|
* should be sufficient for the pretty print purpose (i.e. decoding).
|
|
|
|
* Multiple bits can be allowed later if it is found
|
|
|
|
* to be insufficient.
|
|
|
|
*/
|
2018-04-19 01:55:57 +03:00
|
|
|
encoding = BTF_INT_ENCODING(int_data);
|
|
|
|
if (encoding &&
|
|
|
|
encoding != BTF_INT_SIGNED &&
|
|
|
|
encoding != BTF_INT_CHAR &&
|
2018-05-23 00:57:20 +03:00
|
|
|
encoding != BTF_INT_BOOL) {
|
2018-04-19 01:55:57 +03:00
|
|
|
btf_verifier_log_type(env, t, "Unsupported encoding");
|
|
|
|
return -ENOTSUPP;
|
|
|
|
}
|
|
|
|
|
|
|
|
btf_verifier_log_type(env, t, NULL);
|
|
|
|
|
|
|
|
return meta_needed;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void btf_int_log(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t)
|
|
|
|
{
|
|
|
|
int int_data = btf_type_int(t);
|
|
|
|
|
|
|
|
btf_verifier_log(env,
|
|
|
|
"size=%u bits_offset=%u nr_bits=%u encoding=%s",
|
|
|
|
t->size, BTF_INT_OFFSET(int_data),
|
|
|
|
BTF_INT_BITS(int_data),
|
|
|
|
btf_int_encoding_str(BTF_INT_ENCODING(int_data)));
|
|
|
|
}
|
|
|
|
|
2020-09-28 14:31:04 +03:00
|
|
|
static void btf_int128_print(struct btf_show *show, void *data)
|
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-16 04:07:47 +03:00
|
|
|
{
|
|
|
|
/* data points to a __int128 number.
|
|
|
|
* Suppose
|
|
|
|
* int128_num = *(__int128 *)data;
|
|
|
|
* The below formulas shows what upper_num and lower_num represents:
|
|
|
|
* upper_num = int128_num >> 64;
|
|
|
|
* lower_num = int128_num & 0xffffffffFFFFFFFFULL;
|
|
|
|
*/
|
|
|
|
u64 upper_num, lower_num;
|
|
|
|
|
|
|
|
#ifdef __BIG_ENDIAN_BITFIELD
|
|
|
|
upper_num = *(u64 *)data;
|
|
|
|
lower_num = *(u64 *)(data + 8);
|
|
|
|
#else
|
|
|
|
upper_num = *(u64 *)(data + 8);
|
|
|
|
lower_num = *(u64 *)data;
|
|
|
|
#endif
|
|
|
|
if (upper_num == 0)
|
2020-09-28 14:31:04 +03:00
|
|
|
btf_show_type_value(show, "0x%llx", lower_num);
|
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-16 04:07:47 +03:00
|
|
|
else
|
2020-09-28 14:31:04 +03:00
|
|
|
btf_show_type_values(show, "0x%llx%016llx", upper_num,
|
|
|
|
lower_num);
|
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-16 04:07:47 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static void btf_int128_shift(u64 *print_num, u16 left_shift_bits,
|
|
|
|
u16 right_shift_bits)
|
|
|
|
{
|
|
|
|
u64 upper_num, lower_num;
|
|
|
|
|
|
|
|
#ifdef __BIG_ENDIAN_BITFIELD
|
|
|
|
upper_num = print_num[0];
|
|
|
|
lower_num = print_num[1];
|
|
|
|
#else
|
|
|
|
upper_num = print_num[1];
|
|
|
|
lower_num = print_num[0];
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/* shake out un-needed bits by shift/or operations */
|
|
|
|
if (left_shift_bits >= 64) {
|
|
|
|
upper_num = lower_num << (left_shift_bits - 64);
|
|
|
|
lower_num = 0;
|
|
|
|
} else {
|
|
|
|
upper_num = (upper_num << left_shift_bits) |
|
|
|
|
(lower_num >> (64 - left_shift_bits));
|
|
|
|
lower_num = lower_num << left_shift_bits;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (right_shift_bits >= 64) {
|
|
|
|
lower_num = upper_num >> (right_shift_bits - 64);
|
|
|
|
upper_num = 0;
|
|
|
|
} else {
|
|
|
|
lower_num = (lower_num >> right_shift_bits) |
|
|
|
|
(upper_num << (64 - right_shift_bits));
|
|
|
|
upper_num = upper_num >> right_shift_bits;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef __BIG_ENDIAN_BITFIELD
|
|
|
|
print_num[0] = upper_num;
|
|
|
|
print_num[1] = lower_num;
|
|
|
|
#else
|
|
|
|
print_num[0] = lower_num;
|
|
|
|
print_num[1] = upper_num;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2020-09-28 14:31:04 +03:00
|
|
|
static void btf_bitfield_show(void *data, u8 bits_offset,
|
|
|
|
u8 nr_bits, struct btf_show *show)
|
2018-04-19 01:56:00 +03:00
|
|
|
{
|
2018-07-11 00:33:07 +03:00
|
|
|
u16 left_shift_bits, right_shift_bits;
|
2018-07-20 08:14:31 +03:00
|
|
|
u8 nr_copy_bytes;
|
|
|
|
u8 nr_copy_bits;
|
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-16 04:07:47 +03:00
|
|
|
u64 print_num[2] = {};
|
2018-04-19 01:56:00 +03:00
|
|
|
|
|
|
|
nr_copy_bits = nr_bits + bits_offset;
|
|
|
|
nr_copy_bytes = BITS_ROUNDUP_BYTES(nr_copy_bits);
|
|
|
|
|
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-16 04:07:47 +03:00
|
|
|
memcpy(print_num, data, nr_copy_bytes);
|
2018-04-19 01:56:00 +03:00
|
|
|
|
2018-07-11 00:33:07 +03:00
|
|
|
#ifdef __BIG_ENDIAN_BITFIELD
|
|
|
|
left_shift_bits = bits_offset;
|
|
|
|
#else
|
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-16 04:07:47 +03:00
|
|
|
left_shift_bits = BITS_PER_U128 - nr_copy_bits;
|
2018-07-11 00:33:07 +03:00
|
|
|
#endif
|
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-16 04:07:47 +03:00
|
|
|
right_shift_bits = BITS_PER_U128 - nr_bits;
|
2018-04-19 01:56:00 +03:00
|
|
|
|
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-16 04:07:47 +03:00
|
|
|
btf_int128_shift(print_num, left_shift_bits, right_shift_bits);
|
2020-09-28 14:31:04 +03:00
|
|
|
btf_int128_print(show, print_num);
|
2018-04-19 01:56:00 +03:00
|
|
|
}
|
|
|
|
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 09:13:51 +03:00
|
|
|
|
2020-09-28 14:31:04 +03:00
|
|
|
static void btf_int_bits_show(const struct btf *btf,
|
|
|
|
const struct btf_type *t,
|
|
|
|
void *data, u8 bits_offset,
|
|
|
|
struct btf_show *show)
|
2018-12-16 09:13:50 +03:00
|
|
|
{
|
|
|
|
u32 int_data = btf_type_int(t);
|
|
|
|
u8 nr_bits = BTF_INT_BITS(int_data);
|
|
|
|
u8 total_bits_offset;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* bits_offset is at most 7.
|
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-16 04:07:47 +03:00
|
|
|
* BTF_INT_OFFSET() cannot exceed 128 bits.
|
2018-12-16 09:13:50 +03:00
|
|
|
*/
|
|
|
|
total_bits_offset = bits_offset + BTF_INT_OFFSET(int_data);
|
2019-01-10 22:14:00 +03:00
|
|
|
data += BITS_ROUNDDOWN_BYTES(total_bits_offset);
|
|
|
|
bits_offset = BITS_PER_BYTE_MASKED(total_bits_offset);
|
2020-09-28 14:31:04 +03:00
|
|
|
btf_bitfield_show(data, bits_offset, nr_bits, show);
|
2018-12-16 09:13:50 +03:00
|
|
|
}
|
|
|
|
|
2020-09-28 14:31:04 +03:00
|
|
|
static void btf_int_show(const struct btf *btf, const struct btf_type *t,
|
|
|
|
u32 type_id, void *data, u8 bits_offset,
|
|
|
|
struct btf_show *show)
|
2018-04-19 01:56:00 +03:00
|
|
|
{
|
|
|
|
u32 int_data = btf_type_int(t);
|
|
|
|
u8 encoding = BTF_INT_ENCODING(int_data);
|
|
|
|
bool sign = encoding & BTF_INT_SIGNED;
|
2018-07-20 08:14:31 +03:00
|
|
|
u8 nr_bits = BTF_INT_BITS(int_data);
|
2020-09-28 14:31:04 +03:00
|
|
|
void *safe_data;
|
|
|
|
|
|
|
|
safe_data = btf_show_start_type(show, t, type_id, data);
|
|
|
|
if (!safe_data)
|
|
|
|
return;
|
2018-04-19 01:56:00 +03:00
|
|
|
|
|
|
|
if (bits_offset || BTF_INT_OFFSET(int_data) ||
|
|
|
|
BITS_PER_BYTE_MASKED(nr_bits)) {
|
2020-09-28 14:31:04 +03:00
|
|
|
btf_int_bits_show(btf, t, safe_data, bits_offset, show);
|
|
|
|
goto out;
|
2018-04-19 01:56:00 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
switch (nr_bits) {
|
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-16 04:07:47 +03:00
|
|
|
case 128:
|
2020-09-28 14:31:04 +03:00
|
|
|
btf_int128_print(show, safe_data);
|
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-16 04:07:47 +03:00
|
|
|
break;
|
2018-04-19 01:56:00 +03:00
|
|
|
case 64:
|
|
|
|
if (sign)
|
2020-09-28 14:31:04 +03:00
|
|
|
btf_show_type_value(show, "%lld", *(s64 *)safe_data);
|
2018-04-19 01:56:00 +03:00
|
|
|
else
|
2020-09-28 14:31:04 +03:00
|
|
|
btf_show_type_value(show, "%llu", *(u64 *)safe_data);
|
2018-04-19 01:56:00 +03:00
|
|
|
break;
|
|
|
|
case 32:
|
|
|
|
if (sign)
|
2020-09-28 14:31:04 +03:00
|
|
|
btf_show_type_value(show, "%d", *(s32 *)safe_data);
|
2018-04-19 01:56:00 +03:00
|
|
|
else
|
2020-09-28 14:31:04 +03:00
|
|
|
btf_show_type_value(show, "%u", *(u32 *)safe_data);
|
2018-04-19 01:56:00 +03:00
|
|
|
break;
|
|
|
|
case 16:
|
|
|
|
if (sign)
|
2020-09-28 14:31:04 +03:00
|
|
|
btf_show_type_value(show, "%d", *(s16 *)safe_data);
|
2018-04-19 01:56:00 +03:00
|
|
|
else
|
2020-09-28 14:31:04 +03:00
|
|
|
btf_show_type_value(show, "%u", *(u16 *)safe_data);
|
2018-04-19 01:56:00 +03:00
|
|
|
break;
|
|
|
|
case 8:
|
2020-09-28 14:31:04 +03:00
|
|
|
if (show->state.array_encoding == BTF_INT_CHAR) {
|
|
|
|
/* check for null terminator */
|
|
|
|
if (show->state.array_terminated)
|
|
|
|
break;
|
|
|
|
if (*(char *)data == '\0') {
|
|
|
|
show->state.array_terminated = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (isprint(*(char *)data)) {
|
|
|
|
btf_show_type_value(show, "'%c'",
|
|
|
|
*(char *)safe_data);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2018-04-19 01:56:00 +03:00
|
|
|
if (sign)
|
2020-09-28 14:31:04 +03:00
|
|
|
btf_show_type_value(show, "%d", *(s8 *)safe_data);
|
2018-04-19 01:56:00 +03:00
|
|
|
else
|
2020-09-28 14:31:04 +03:00
|
|
|
btf_show_type_value(show, "%u", *(u8 *)safe_data);
|
2018-04-19 01:56:00 +03:00
|
|
|
break;
|
|
|
|
default:
|
2020-09-28 14:31:04 +03:00
|
|
|
btf_int_bits_show(btf, t, safe_data, bits_offset, show);
|
|
|
|
break;
|
2018-04-19 01:56:00 +03:00
|
|
|
}
|
2020-09-28 14:31:04 +03:00
|
|
|
out:
|
|
|
|
btf_show_end_type(show);
|
2018-04-19 01:56:00 +03:00
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:57 +03:00
|
|
|
static const struct btf_kind_operations int_ops = {
|
|
|
|
.check_meta = btf_int_check_meta,
|
2018-04-19 01:55:58 +03:00
|
|
|
.resolve = btf_df_resolve,
|
2018-04-19 01:55:59 +03:00
|
|
|
.check_member = btf_int_check_member,
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 09:13:51 +03:00
|
|
|
.check_kflag_member = btf_int_check_kflag_member,
|
2018-04-19 01:55:57 +03:00
|
|
|
.log_details = btf_int_log,
|
2020-09-28 14:31:04 +03:00
|
|
|
.show = btf_int_show,
|
2018-04-19 01:55:57 +03:00
|
|
|
};
|
|
|
|
|
2018-04-19 01:55:59 +03:00
|
|
|
static int btf_modifier_check_member(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *struct_type,
|
|
|
|
const struct btf_member *member,
|
|
|
|
const struct btf_type *member_type)
|
|
|
|
{
|
|
|
|
const struct btf_type *resolved_type;
|
|
|
|
u32 resolved_type_id = member->type;
|
|
|
|
struct btf_member resolved_member;
|
|
|
|
struct btf *btf = env->btf;
|
|
|
|
|
|
|
|
resolved_type = btf_type_id_size(btf, &resolved_type_id, NULL);
|
|
|
|
if (!resolved_type) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Invalid member");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
resolved_member = *member;
|
|
|
|
resolved_member.type = resolved_type_id;
|
|
|
|
|
|
|
|
return btf_type_ops(resolved_type)->check_member(env, struct_type,
|
|
|
|
&resolved_member,
|
|
|
|
resolved_type);
|
|
|
|
}
|
|
|
|
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 09:13:51 +03:00
|
|
|
static int btf_modifier_check_kflag_member(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *struct_type,
|
|
|
|
const struct btf_member *member,
|
|
|
|
const struct btf_type *member_type)
|
|
|
|
{
|
|
|
|
const struct btf_type *resolved_type;
|
|
|
|
u32 resolved_type_id = member->type;
|
|
|
|
struct btf_member resolved_member;
|
|
|
|
struct btf *btf = env->btf;
|
|
|
|
|
|
|
|
resolved_type = btf_type_id_size(btf, &resolved_type_id, NULL);
|
|
|
|
if (!resolved_type) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Invalid member");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
resolved_member = *member;
|
|
|
|
resolved_member.type = resolved_type_id;
|
|
|
|
|
|
|
|
return btf_type_ops(resolved_type)->check_kflag_member(env, struct_type,
|
|
|
|
&resolved_member,
|
|
|
|
resolved_type);
|
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:59 +03:00
|
|
|
static int btf_ptr_check_member(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *struct_type,
|
|
|
|
const struct btf_member *member,
|
|
|
|
const struct btf_type *member_type)
|
|
|
|
{
|
|
|
|
u32 struct_size, struct_bits_off, bytes_offset;
|
|
|
|
|
|
|
|
struct_size = struct_type->size;
|
|
|
|
struct_bits_off = member->offset;
|
|
|
|
bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off);
|
|
|
|
|
|
|
|
if (BITS_PER_BYTE_MASKED(struct_bits_off)) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Member is not byte aligned");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (struct_size - bytes_offset < sizeof(void *)) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Member exceeds struct_size");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:57 +03:00
|
|
|
static int btf_ref_type_check_meta(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t,
|
|
|
|
u32 meta_left)
|
|
|
|
{
|
2021-11-12 04:26:09 +03:00
|
|
|
const char *value;
|
|
|
|
|
2018-04-19 01:55:57 +03:00
|
|
|
if (btf_type_vlen(t)) {
|
|
|
|
btf_verifier_log_type(env, t, "vlen != 0");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 09:13:51 +03:00
|
|
|
if (btf_type_kflag(t)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-05-23 00:57:20 +03:00
|
|
|
if (!BTF_TYPE_ID_VALID(t->type)) {
|
2018-04-19 01:55:57 +03:00
|
|
|
btf_verifier_log_type(env, t, "Invalid type_id");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2021-11-12 04:26:09 +03:00
|
|
|
/* typedef/type_tag type must have a valid name, and other ref types,
|
2018-11-28 00:23:28 +03:00
|
|
|
* volatile, const, restrict, should have a null name.
|
|
|
|
*/
|
|
|
|
if (BTF_INFO_KIND(t->info) == BTF_KIND_TYPEDEF) {
|
|
|
|
if (!t->name_off ||
|
|
|
|
!btf_name_valid_identifier(env->btf, t->name_off)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid name");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2021-11-12 04:26:09 +03:00
|
|
|
} else if (BTF_INFO_KIND(t->info) == BTF_KIND_TYPE_TAG) {
|
|
|
|
value = btf_name_by_offset(env->btf, t->name_off);
|
|
|
|
if (!value || !value[0]) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid name");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2018-11-28 00:23:28 +03:00
|
|
|
} else {
|
|
|
|
if (t->name_off) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid name");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:57 +03:00
|
|
|
btf_verifier_log_type(env, t, NULL);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:58 +03:00
|
|
|
static int btf_modifier_resolve(struct btf_verifier_env *env,
|
|
|
|
const struct resolve_vertex *v)
|
|
|
|
{
|
|
|
|
const struct btf_type *t = v->t;
|
|
|
|
const struct btf_type *next_type;
|
|
|
|
u32 next_type_id = t->type;
|
|
|
|
struct btf *btf = env->btf;
|
|
|
|
|
|
|
|
next_type = btf_type_by_id(btf, next_type_id);
|
2019-04-10 00:20:09 +03:00
|
|
|
if (!next_type || btf_type_is_resolve_source_only(next_type)) {
|
2018-04-19 01:55:58 +03:00
|
|
|
btf_verifier_log_type(env, v->t, "Invalid type_id");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!env_type_is_resolve_sink(env, next_type) &&
|
|
|
|
!env_type_is_resolved(env, next_type_id))
|
|
|
|
return env_stack_push(env, next_type, next_type_id);
|
|
|
|
|
|
|
|
/* Figure out the resolved next_type_id with size.
|
|
|
|
* They will be stored in the current modifier's
|
|
|
|
* resolved_ids and resolved_sizes such that it can
|
|
|
|
* save us a few type-following when we use it later (e.g. in
|
|
|
|
* pretty print).
|
|
|
|
*/
|
bpf: fix BTF verifier size resolution logic
BTF verifier has a size resolution bug which in some circumstances leads to
invalid size resolution for, e.g., TYPEDEF modifier. This happens if we have
[1] PTR -> [2] TYPEDEF -> [3] ARRAY, in which case due to being in pointer
context ARRAY size won't be resolved (because for pointer it doesn't matter, so
it's a sink in pointer context), but it will be permanently remembered as zero
for TYPEDEF and TYPEDEF will be marked as RESOLVED. Eventually ARRAY size will
be resolved correctly, but TYPEDEF resolved_size won't be updated anymore.
This, subsequently, will lead to erroneous map creation failure, if that
TYPEDEF is specified as either key or value, as key_size/value_size won't
correspond to resolved size of TYPEDEF (kernel will believe it's zero).
Note, that if BTF was ordered as [1] ARRAY <- [2] TYPEDEF <- [3] PTR, this
won't be a problem, as by the time we get to TYPEDEF, ARRAY's size is already
calculated and stored.
This bug manifests itself in rejecting BTF-defined maps that use array
typedef as a value type:
typedef int array_t[16];
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(value, array_t); /* i.e., array_t *value; */
} test_map SEC(".maps");
The fix consists on not relying on modifier's resolved_size and instead using
modifier's resolved_id (type ID for "concrete" type to which modifier
eventually resolves) and doing size determination for that resolved type. This
allow to preserve existing "early DFS termination" logic for PTR or
STRUCT_OR_ARRAY contexts, but still do correct size determination for modifier
types.
Fixes: eb3f595dab40 ("bpf: btf: Validate type reference")
Cc: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-07-12 20:25:55 +03:00
|
|
|
if (!btf_type_id_size(btf, &next_type_id, NULL)) {
|
2018-11-20 02:29:08 +03:00
|
|
|
if (env_type_is_resolved(env, next_type_id))
|
|
|
|
next_type = btf_type_id_resolve(btf, &next_type_id);
|
|
|
|
|
|
|
|
/* "typedef void new_void", "const void"...etc */
|
|
|
|
if (!btf_type_is_void(next_type) &&
|
2019-01-30 03:38:16 +03:00
|
|
|
!btf_type_is_fwd(next_type) &&
|
|
|
|
!btf_type_is_func_proto(next_type)) {
|
2018-11-20 02:29:08 +03:00
|
|
|
btf_verifier_log_type(env, v->t, "Invalid type_id");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2018-04-19 01:55:58 +03:00
|
|
|
}
|
|
|
|
|
bpf: fix BTF verifier size resolution logic
BTF verifier has a size resolution bug which in some circumstances leads to
invalid size resolution for, e.g., TYPEDEF modifier. This happens if we have
[1] PTR -> [2] TYPEDEF -> [3] ARRAY, in which case due to being in pointer
context ARRAY size won't be resolved (because for pointer it doesn't matter, so
it's a sink in pointer context), but it will be permanently remembered as zero
for TYPEDEF and TYPEDEF will be marked as RESOLVED. Eventually ARRAY size will
be resolved correctly, but TYPEDEF resolved_size won't be updated anymore.
This, subsequently, will lead to erroneous map creation failure, if that
TYPEDEF is specified as either key or value, as key_size/value_size won't
correspond to resolved size of TYPEDEF (kernel will believe it's zero).
Note, that if BTF was ordered as [1] ARRAY <- [2] TYPEDEF <- [3] PTR, this
won't be a problem, as by the time we get to TYPEDEF, ARRAY's size is already
calculated and stored.
This bug manifests itself in rejecting BTF-defined maps that use array
typedef as a value type:
typedef int array_t[16];
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(value, array_t); /* i.e., array_t *value; */
} test_map SEC(".maps");
The fix consists on not relying on modifier's resolved_size and instead using
modifier's resolved_id (type ID for "concrete" type to which modifier
eventually resolves) and doing size determination for that resolved type. This
allow to preserve existing "early DFS termination" logic for PTR or
STRUCT_OR_ARRAY contexts, but still do correct size determination for modifier
types.
Fixes: eb3f595dab40 ("bpf: btf: Validate type reference")
Cc: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-07-12 20:25:55 +03:00
|
|
|
env_stack_pop_resolved(env, next_type_id, 0);
|
2018-04-19 01:55:58 +03:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-04-10 00:20:09 +03:00
|
|
|
static int btf_var_resolve(struct btf_verifier_env *env,
|
|
|
|
const struct resolve_vertex *v)
|
|
|
|
{
|
|
|
|
const struct btf_type *next_type;
|
|
|
|
const struct btf_type *t = v->t;
|
|
|
|
u32 next_type_id = t->type;
|
|
|
|
struct btf *btf = env->btf;
|
|
|
|
|
|
|
|
next_type = btf_type_by_id(btf, next_type_id);
|
|
|
|
if (!next_type || btf_type_is_resolve_source_only(next_type)) {
|
|
|
|
btf_verifier_log_type(env, v->t, "Invalid type_id");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!env_type_is_resolve_sink(env, next_type) &&
|
|
|
|
!env_type_is_resolved(env, next_type_id))
|
|
|
|
return env_stack_push(env, next_type, next_type_id);
|
|
|
|
|
|
|
|
if (btf_type_is_modifier(next_type)) {
|
|
|
|
const struct btf_type *resolved_type;
|
|
|
|
u32 resolved_type_id;
|
|
|
|
|
|
|
|
resolved_type_id = next_type_id;
|
|
|
|
resolved_type = btf_type_id_resolve(btf, &resolved_type_id);
|
|
|
|
|
|
|
|
if (btf_type_is_ptr(resolved_type) &&
|
|
|
|
!env_type_is_resolve_sink(env, resolved_type) &&
|
|
|
|
!env_type_is_resolved(env, resolved_type_id))
|
|
|
|
return env_stack_push(env, resolved_type,
|
|
|
|
resolved_type_id);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* We must resolve to something concrete at this point, no
|
|
|
|
* forward types or similar that would resolve to size of
|
|
|
|
* zero is allowed.
|
|
|
|
*/
|
bpf: fix BTF verifier size resolution logic
BTF verifier has a size resolution bug which in some circumstances leads to
invalid size resolution for, e.g., TYPEDEF modifier. This happens if we have
[1] PTR -> [2] TYPEDEF -> [3] ARRAY, in which case due to being in pointer
context ARRAY size won't be resolved (because for pointer it doesn't matter, so
it's a sink in pointer context), but it will be permanently remembered as zero
for TYPEDEF and TYPEDEF will be marked as RESOLVED. Eventually ARRAY size will
be resolved correctly, but TYPEDEF resolved_size won't be updated anymore.
This, subsequently, will lead to erroneous map creation failure, if that
TYPEDEF is specified as either key or value, as key_size/value_size won't
correspond to resolved size of TYPEDEF (kernel will believe it's zero).
Note, that if BTF was ordered as [1] ARRAY <- [2] TYPEDEF <- [3] PTR, this
won't be a problem, as by the time we get to TYPEDEF, ARRAY's size is already
calculated and stored.
This bug manifests itself in rejecting BTF-defined maps that use array
typedef as a value type:
typedef int array_t[16];
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(value, array_t); /* i.e., array_t *value; */
} test_map SEC(".maps");
The fix consists on not relying on modifier's resolved_size and instead using
modifier's resolved_id (type ID for "concrete" type to which modifier
eventually resolves) and doing size determination for that resolved type. This
allow to preserve existing "early DFS termination" logic for PTR or
STRUCT_OR_ARRAY contexts, but still do correct size determination for modifier
types.
Fixes: eb3f595dab40 ("bpf: btf: Validate type reference")
Cc: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-07-12 20:25:55 +03:00
|
|
|
if (!btf_type_id_size(btf, &next_type_id, NULL)) {
|
2019-04-10 00:20:09 +03:00
|
|
|
btf_verifier_log_type(env, v->t, "Invalid type_id");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
bpf: fix BTF verifier size resolution logic
BTF verifier has a size resolution bug which in some circumstances leads to
invalid size resolution for, e.g., TYPEDEF modifier. This happens if we have
[1] PTR -> [2] TYPEDEF -> [3] ARRAY, in which case due to being in pointer
context ARRAY size won't be resolved (because for pointer it doesn't matter, so
it's a sink in pointer context), but it will be permanently remembered as zero
for TYPEDEF and TYPEDEF will be marked as RESOLVED. Eventually ARRAY size will
be resolved correctly, but TYPEDEF resolved_size won't be updated anymore.
This, subsequently, will lead to erroneous map creation failure, if that
TYPEDEF is specified as either key or value, as key_size/value_size won't
correspond to resolved size of TYPEDEF (kernel will believe it's zero).
Note, that if BTF was ordered as [1] ARRAY <- [2] TYPEDEF <- [3] PTR, this
won't be a problem, as by the time we get to TYPEDEF, ARRAY's size is already
calculated and stored.
This bug manifests itself in rejecting BTF-defined maps that use array
typedef as a value type:
typedef int array_t[16];
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(value, array_t); /* i.e., array_t *value; */
} test_map SEC(".maps");
The fix consists on not relying on modifier's resolved_size and instead using
modifier's resolved_id (type ID for "concrete" type to which modifier
eventually resolves) and doing size determination for that resolved type. This
allow to preserve existing "early DFS termination" logic for PTR or
STRUCT_OR_ARRAY contexts, but still do correct size determination for modifier
types.
Fixes: eb3f595dab40 ("bpf: btf: Validate type reference")
Cc: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-07-12 20:25:55 +03:00
|
|
|
env_stack_pop_resolved(env, next_type_id, 0);
|
2019-04-10 00:20:09 +03:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:58 +03:00
|
|
|
static int btf_ptr_resolve(struct btf_verifier_env *env,
|
|
|
|
const struct resolve_vertex *v)
|
|
|
|
{
|
|
|
|
const struct btf_type *next_type;
|
|
|
|
const struct btf_type *t = v->t;
|
|
|
|
u32 next_type_id = t->type;
|
|
|
|
struct btf *btf = env->btf;
|
|
|
|
|
|
|
|
next_type = btf_type_by_id(btf, next_type_id);
|
2019-04-10 00:20:09 +03:00
|
|
|
if (!next_type || btf_type_is_resolve_source_only(next_type)) {
|
2018-04-19 01:55:58 +03:00
|
|
|
btf_verifier_log_type(env, v->t, "Invalid type_id");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!env_type_is_resolve_sink(env, next_type) &&
|
|
|
|
!env_type_is_resolved(env, next_type_id))
|
|
|
|
return env_stack_push(env, next_type, next_type_id);
|
|
|
|
|
|
|
|
/* If the modifier was RESOLVED during RESOLVE_STRUCT_OR_ARRAY,
|
|
|
|
* the modifier may have stopped resolving when it was resolved
|
|
|
|
* to a ptr (last-resolved-ptr).
|
|
|
|
*
|
|
|
|
* We now need to continue from the last-resolved-ptr to
|
|
|
|
* ensure the last-resolved-ptr will not referring back to
|
|
|
|
* the currenct ptr (t).
|
|
|
|
*/
|
|
|
|
if (btf_type_is_modifier(next_type)) {
|
|
|
|
const struct btf_type *resolved_type;
|
|
|
|
u32 resolved_type_id;
|
|
|
|
|
|
|
|
resolved_type_id = next_type_id;
|
|
|
|
resolved_type = btf_type_id_resolve(btf, &resolved_type_id);
|
|
|
|
|
|
|
|
if (btf_type_is_ptr(resolved_type) &&
|
|
|
|
!env_type_is_resolve_sink(env, resolved_type) &&
|
|
|
|
!env_type_is_resolved(env, resolved_type_id))
|
|
|
|
return env_stack_push(env, resolved_type,
|
|
|
|
resolved_type_id);
|
|
|
|
}
|
|
|
|
|
2018-11-20 02:29:08 +03:00
|
|
|
if (!btf_type_id_size(btf, &next_type_id, NULL)) {
|
|
|
|
if (env_type_is_resolved(env, next_type_id))
|
|
|
|
next_type = btf_type_id_resolve(btf, &next_type_id);
|
|
|
|
|
|
|
|
if (!btf_type_is_void(next_type) &&
|
|
|
|
!btf_type_is_fwd(next_type) &&
|
|
|
|
!btf_type_is_func_proto(next_type)) {
|
|
|
|
btf_verifier_log_type(env, v->t, "Invalid type_id");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2018-04-19 01:55:58 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
env_stack_pop_resolved(env, next_type_id, 0);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-09-28 14:31:04 +03:00
|
|
|
static void btf_modifier_show(const struct btf *btf,
|
|
|
|
const struct btf_type *t,
|
|
|
|
u32 type_id, void *data,
|
|
|
|
u8 bits_offset, struct btf_show *show)
|
2018-04-19 01:56:00 +03:00
|
|
|
{
|
bpf: Introduce BPF_MAP_TYPE_STRUCT_OPS
The patch introduces BPF_MAP_TYPE_STRUCT_OPS. The map value
is a kernel struct with its func ptr implemented in bpf prog.
This new map is the interface to register/unregister/introspect
a bpf implemented kernel struct.
The kernel struct is actually embedded inside another new struct
(or called the "value" struct in the code). For example,
"struct tcp_congestion_ops" is embbeded in:
struct bpf_struct_ops_tcp_congestion_ops {
refcount_t refcnt;
enum bpf_struct_ops_state state;
struct tcp_congestion_ops data; /* <-- kernel subsystem struct here */
}
The map value is "struct bpf_struct_ops_tcp_congestion_ops".
The "bpftool map dump" will then be able to show the
state ("inuse"/"tobefree") and the number of subsystem's refcnt (e.g.
number of tcp_sock in the tcp_congestion_ops case). This "value" struct
is created automatically by a macro. Having a separate "value" struct
will also make extending "struct bpf_struct_ops_XYZ" easier (e.g. adding
"void (*init)(void)" to "struct bpf_struct_ops_XYZ" to do some
initialization works before registering the struct_ops to the kernel
subsystem). The libbpf will take care of finding and populating the
"struct bpf_struct_ops_XYZ" from "struct XYZ".
Register a struct_ops to a kernel subsystem:
1. Load all needed BPF_PROG_TYPE_STRUCT_OPS prog(s)
2. Create a BPF_MAP_TYPE_STRUCT_OPS with attr->btf_vmlinux_value_type_id
set to the btf id "struct bpf_struct_ops_tcp_congestion_ops" of the
running kernel.
Instead of reusing the attr->btf_value_type_id,
btf_vmlinux_value_type_id s added such that attr->btf_fd can still be
used as the "user" btf which could store other useful sysadmin/debug
info that may be introduced in the furture,
e.g. creation-date/compiler-details/map-creator...etc.
3. Create a "struct bpf_struct_ops_tcp_congestion_ops" object as described
in the running kernel btf. Populate the value of this object.
The function ptr should be populated with the prog fds.
4. Call BPF_MAP_UPDATE with the object created in (3) as
the map value. The key is always "0".
During BPF_MAP_UPDATE, the code that saves the kernel-func-ptr's
args as an array of u64 is generated. BPF_MAP_UPDATE also allows
the specific struct_ops to do some final checks in "st_ops->init_member()"
(e.g. ensure all mandatory func ptrs are implemented).
If everything looks good, it will register this kernel struct
to the kernel subsystem. The map will not allow further update
from this point.
Unregister a struct_ops from the kernel subsystem:
BPF_MAP_DELETE with key "0".
Introspect a struct_ops:
BPF_MAP_LOOKUP_ELEM with key "0". The map value returned will
have the prog _id_ populated as the func ptr.
The map value state (enum bpf_struct_ops_state) will transit from:
INIT (map created) =>
INUSE (map updated, i.e. reg) =>
TOBEFREE (map value deleted, i.e. unreg)
The kernel subsystem needs to call bpf_struct_ops_get() and
bpf_struct_ops_put() to manage the "refcnt" in the
"struct bpf_struct_ops_XYZ". This patch uses a separate refcnt
for the purose of tracking the subsystem usage. Another approach
is to reuse the map->refcnt and then "show" (i.e. during map_lookup)
the subsystem's usage by doing map->refcnt - map->usercnt to filter out
the map-fd/pinned-map usage. However, that will also tie down the
future semantics of map->refcnt and map->usercnt.
The very first subsystem's refcnt (during reg()) holds one
count to map->refcnt. When the very last subsystem's refcnt
is gone, it will also release the map->refcnt. All bpf_prog will be
freed when the map->refcnt reaches 0 (i.e. during map_free()).
Here is how the bpftool map command will look like:
[root@arch-fb-vm1 bpf]# bpftool map show
6: struct_ops name dctcp flags 0x0
key 4B value 256B max_entries 1 memlock 4096B
btf_id 6
[root@arch-fb-vm1 bpf]# bpftool map dump id 6
[{
"value": {
"refcnt": {
"refs": {
"counter": 1
}
},
"state": 1,
"data": {
"list": {
"next": 0,
"prev": 0
},
"key": 0,
"flags": 2,
"init": 24,
"release": 0,
"ssthresh": 25,
"cong_avoid": 30,
"set_state": 27,
"cwnd_event": 28,
"in_ack_event": 26,
"undo_cwnd": 29,
"pkts_acked": 0,
"min_tso_segs": 0,
"sndbuf_expand": 0,
"cong_control": 0,
"get_info": 0,
"name": [98,112,102,95,100,99,116,99,112,0,0,0,0,0,0,0
],
"owner": 0
}
}
}
]
Misc Notes:
* bpf_struct_ops_map_sys_lookup_elem() is added for syscall lookup.
It does an inplace update on "*value" instead returning a pointer
to syscall.c. Otherwise, it needs a separate copy of "zero" value
for the BPF_STRUCT_OPS_STATE_INIT to avoid races.
* The bpf_struct_ops_map_delete_elem() is also called without
preempt_disable() from map_delete_elem(). It is because
the "->unreg()" may requires sleepable context, e.g.
the "tcp_unregister_congestion_control()".
* "const" is added to some of the existing "struct btf_func_model *"
function arg to avoid a compiler warning caused by this patch.
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200109003505.3855919-1-kafai@fb.com
2020-01-09 03:35:05 +03:00
|
|
|
if (btf->resolved_ids)
|
|
|
|
t = btf_type_id_resolve(btf, &type_id);
|
|
|
|
else
|
|
|
|
t = btf_type_skip_modifiers(btf, type_id, NULL);
|
2018-04-19 01:56:00 +03:00
|
|
|
|
2020-09-28 14:31:04 +03:00
|
|
|
btf_type_ops(t)->show(btf, t, type_id, data, bits_offset, show);
|
2018-04-19 01:56:00 +03:00
|
|
|
}
|
|
|
|
|
2020-09-28 14:31:04 +03:00
|
|
|
static void btf_var_show(const struct btf *btf, const struct btf_type *t,
|
|
|
|
u32 type_id, void *data, u8 bits_offset,
|
|
|
|
struct btf_show *show)
|
2019-04-10 00:20:09 +03:00
|
|
|
{
|
|
|
|
t = btf_type_id_resolve(btf, &type_id);
|
|
|
|
|
2020-09-28 14:31:04 +03:00
|
|
|
btf_type_ops(t)->show(btf, t, type_id, data, bits_offset, show);
|
2019-04-10 00:20:09 +03:00
|
|
|
}
|
|
|
|
|
2020-09-28 14:31:04 +03:00
|
|
|
static void btf_ptr_show(const struct btf *btf, const struct btf_type *t,
|
|
|
|
u32 type_id, void *data, u8 bits_offset,
|
|
|
|
struct btf_show *show)
|
2018-04-19 01:56:00 +03:00
|
|
|
{
|
2020-09-28 14:31:04 +03:00
|
|
|
void *safe_data;
|
|
|
|
|
|
|
|
safe_data = btf_show_start_type(show, t, type_id, data);
|
|
|
|
if (!safe_data)
|
|
|
|
return;
|
|
|
|
|
|
|
|
/* It is a hashed value unless BTF_SHOW_PTR_RAW is specified */
|
|
|
|
if (show->flags & BTF_SHOW_PTR_RAW)
|
|
|
|
btf_show_type_value(show, "0x%px", *(void **)safe_data);
|
|
|
|
else
|
|
|
|
btf_show_type_value(show, "0x%p", *(void **)safe_data);
|
|
|
|
btf_show_end_type(show);
|
2018-04-19 01:56:00 +03:00
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:57 +03:00
|
|
|
static void btf_ref_type_log(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t)
|
|
|
|
{
|
|
|
|
btf_verifier_log(env, "type_id=%u", t->type);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct btf_kind_operations modifier_ops = {
|
|
|
|
.check_meta = btf_ref_type_check_meta,
|
2018-04-19 01:55:58 +03:00
|
|
|
.resolve = btf_modifier_resolve,
|
2018-04-19 01:55:59 +03:00
|
|
|
.check_member = btf_modifier_check_member,
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 09:13:51 +03:00
|
|
|
.check_kflag_member = btf_modifier_check_kflag_member,
|
2018-04-19 01:55:57 +03:00
|
|
|
.log_details = btf_ref_type_log,
|
2020-09-28 14:31:04 +03:00
|
|
|
.show = btf_modifier_show,
|
2018-04-19 01:55:57 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
static struct btf_kind_operations ptr_ops = {
|
|
|
|
.check_meta = btf_ref_type_check_meta,
|
2018-04-19 01:55:58 +03:00
|
|
|
.resolve = btf_ptr_resolve,
|
2018-04-19 01:55:59 +03:00
|
|
|
.check_member = btf_ptr_check_member,
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 09:13:51 +03:00
|
|
|
.check_kflag_member = btf_generic_check_kflag_member,
|
2018-04-19 01:55:57 +03:00
|
|
|
.log_details = btf_ref_type_log,
|
2020-09-28 14:31:04 +03:00
|
|
|
.show = btf_ptr_show,
|
2018-04-19 01:55:57 +03:00
|
|
|
};
|
|
|
|
|
2018-06-02 19:06:51 +03:00
|
|
|
static s32 btf_fwd_check_meta(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t,
|
|
|
|
u32 meta_left)
|
|
|
|
{
|
|
|
|
if (btf_type_vlen(t)) {
|
|
|
|
btf_verifier_log_type(env, t, "vlen != 0");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (t->type) {
|
|
|
|
btf_verifier_log_type(env, t, "type != 0");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-11-28 00:23:28 +03:00
|
|
|
/* fwd type must have a valid name */
|
|
|
|
if (!t->name_off ||
|
|
|
|
!btf_name_valid_identifier(env->btf, t->name_off)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid name");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-06-02 19:06:51 +03:00
|
|
|
btf_verifier_log_type(env, t, NULL);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-12-19 00:43:58 +03:00
|
|
|
static void btf_fwd_type_log(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t)
|
|
|
|
{
|
|
|
|
btf_verifier_log(env, "%s", btf_type_kflag(t) ? "union" : "struct");
|
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:57 +03:00
|
|
|
static struct btf_kind_operations fwd_ops = {
|
2018-06-02 19:06:51 +03:00
|
|
|
.check_meta = btf_fwd_check_meta,
|
2018-04-19 01:55:58 +03:00
|
|
|
.resolve = btf_df_resolve,
|
2018-04-19 01:55:59 +03:00
|
|
|
.check_member = btf_df_check_member,
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 09:13:51 +03:00
|
|
|
.check_kflag_member = btf_df_check_kflag_member,
|
2018-12-19 00:43:58 +03:00
|
|
|
.log_details = btf_fwd_type_log,
|
2020-09-28 14:31:04 +03:00
|
|
|
.show = btf_df_show,
|
2018-04-19 01:55:57 +03:00
|
|
|
};
|
|
|
|
|
2018-04-19 01:55:59 +03:00
|
|
|
static int btf_array_check_member(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *struct_type,
|
|
|
|
const struct btf_member *member,
|
|
|
|
const struct btf_type *member_type)
|
|
|
|
{
|
|
|
|
u32 struct_bits_off = member->offset;
|
|
|
|
u32 struct_size, bytes_offset;
|
|
|
|
u32 array_type_id, array_size;
|
|
|
|
struct btf *btf = env->btf;
|
|
|
|
|
|
|
|
if (BITS_PER_BYTE_MASKED(struct_bits_off)) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Member is not byte aligned");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
array_type_id = member->type;
|
|
|
|
btf_type_id_size(btf, &array_type_id, &array_size);
|
|
|
|
struct_size = struct_type->size;
|
|
|
|
bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off);
|
|
|
|
if (struct_size - bytes_offset < array_size) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Member exceeds struct_size");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:57 +03:00
|
|
|
static s32 btf_array_check_meta(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t,
|
|
|
|
u32 meta_left)
|
|
|
|
{
|
|
|
|
const struct btf_array *array = btf_type_array(t);
|
|
|
|
u32 meta_needed = sizeof(*array);
|
|
|
|
|
|
|
|
if (meta_left < meta_needed) {
|
|
|
|
btf_verifier_log_basic(env, t,
|
|
|
|
"meta_left:%u meta_needed:%u",
|
|
|
|
meta_left, meta_needed);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-11-28 00:23:28 +03:00
|
|
|
/* array type should not have a name */
|
|
|
|
if (t->name_off) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid name");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:57 +03:00
|
|
|
if (btf_type_vlen(t)) {
|
|
|
|
btf_verifier_log_type(env, t, "vlen != 0");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 09:13:51 +03:00
|
|
|
if (btf_type_kflag(t)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-06-02 19:06:50 +03:00
|
|
|
if (t->size) {
|
|
|
|
btf_verifier_log_type(env, t, "size != 0");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-05-23 00:57:19 +03:00
|
|
|
/* Array elem type and index type cannot be in type void,
|
|
|
|
* so !array->type and !array->index_type are not allowed.
|
2018-04-19 01:55:57 +03:00
|
|
|
*/
|
2018-05-23 00:57:20 +03:00
|
|
|
if (!array->type || !BTF_TYPE_ID_VALID(array->type)) {
|
2018-05-23 00:57:19 +03:00
|
|
|
btf_verifier_log_type(env, t, "Invalid elem");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-05-23 00:57:20 +03:00
|
|
|
if (!array->index_type || !BTF_TYPE_ID_VALID(array->index_type)) {
|
2018-05-23 00:57:19 +03:00
|
|
|
btf_verifier_log_type(env, t, "Invalid index");
|
2018-04-19 01:55:57 +03:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
btf_verifier_log_type(env, t, NULL);
|
|
|
|
|
|
|
|
return meta_needed;
|
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:58 +03:00
|
|
|
static int btf_array_resolve(struct btf_verifier_env *env,
|
|
|
|
const struct resolve_vertex *v)
|
|
|
|
{
|
|
|
|
const struct btf_array *array = btf_type_array(v->t);
|
2018-05-23 00:57:19 +03:00
|
|
|
const struct btf_type *elem_type, *index_type;
|
|
|
|
u32 elem_type_id, index_type_id;
|
2018-04-19 01:55:58 +03:00
|
|
|
struct btf *btf = env->btf;
|
|
|
|
u32 elem_size;
|
|
|
|
|
2018-05-23 00:57:19 +03:00
|
|
|
/* Check array->index_type */
|
|
|
|
index_type_id = array->index_type;
|
|
|
|
index_type = btf_type_by_id(btf, index_type_id);
|
2019-06-19 22:01:05 +03:00
|
|
|
if (btf_type_nosize_or_null(index_type) ||
|
|
|
|
btf_type_is_resolve_source_only(index_type)) {
|
2018-05-23 00:57:19 +03:00
|
|
|
btf_verifier_log_type(env, v->t, "Invalid index");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!env_type_is_resolve_sink(env, index_type) &&
|
|
|
|
!env_type_is_resolved(env, index_type_id))
|
|
|
|
return env_stack_push(env, index_type, index_type_id);
|
|
|
|
|
|
|
|
index_type = btf_type_id_size(btf, &index_type_id, NULL);
|
|
|
|
if (!index_type || !btf_type_is_int(index_type) ||
|
|
|
|
!btf_type_int_is_regular(index_type)) {
|
|
|
|
btf_verifier_log_type(env, v->t, "Invalid index");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Check array->type */
|
|
|
|
elem_type_id = array->type;
|
2018-04-19 01:55:58 +03:00
|
|
|
elem_type = btf_type_by_id(btf, elem_type_id);
|
2019-06-19 22:01:05 +03:00
|
|
|
if (btf_type_nosize_or_null(elem_type) ||
|
|
|
|
btf_type_is_resolve_source_only(elem_type)) {
|
2018-04-19 01:55:58 +03:00
|
|
|
btf_verifier_log_type(env, v->t,
|
|
|
|
"Invalid elem");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!env_type_is_resolve_sink(env, elem_type) &&
|
|
|
|
!env_type_is_resolved(env, elem_type_id))
|
|
|
|
return env_stack_push(env, elem_type, elem_type_id);
|
|
|
|
|
|
|
|
elem_type = btf_type_id_size(btf, &elem_type_id, &elem_size);
|
|
|
|
if (!elem_type) {
|
|
|
|
btf_verifier_log_type(env, v->t, "Invalid elem");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-05-23 00:57:19 +03:00
|
|
|
if (btf_type_is_int(elem_type) && !btf_type_int_is_regular(elem_type)) {
|
|
|
|
btf_verifier_log_type(env, v->t, "Invalid array of int");
|
|
|
|
return -EINVAL;
|
2018-04-19 01:55:58 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
if (array->nelems && elem_size > U32_MAX / array->nelems) {
|
|
|
|
btf_verifier_log_type(env, v->t,
|
|
|
|
"Array size overflows U32_MAX");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
env_stack_pop_resolved(env, elem_type_id, elem_size * array->nelems);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:57 +03:00
|
|
|
static void btf_array_log(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t)
|
|
|
|
{
|
|
|
|
const struct btf_array *array = btf_type_array(t);
|
|
|
|
|
|
|
|
btf_verifier_log(env, "type_id=%u index_type_id=%u nr_elems=%u",
|
|
|
|
array->type, array->index_type, array->nelems);
|
|
|
|
}
|
|
|
|
|
2020-09-28 14:31:04 +03:00
|
|
|
static void __btf_array_show(const struct btf *btf, const struct btf_type *t,
|
|
|
|
u32 type_id, void *data, u8 bits_offset,
|
|
|
|
struct btf_show *show)
|
2018-04-19 01:56:00 +03:00
|
|
|
{
|
|
|
|
const struct btf_array *array = btf_type_array(t);
|
|
|
|
const struct btf_kind_operations *elem_ops;
|
|
|
|
const struct btf_type *elem_type;
|
2020-09-28 14:31:04 +03:00
|
|
|
u32 i, elem_size = 0, elem_type_id;
|
|
|
|
u16 encoding = 0;
|
2018-04-19 01:56:00 +03:00
|
|
|
|
|
|
|
elem_type_id = array->type;
|
2020-09-28 14:31:04 +03:00
|
|
|
elem_type = btf_type_skip_modifiers(btf, elem_type_id, NULL);
|
|
|
|
if (elem_type && btf_type_has_size(elem_type))
|
|
|
|
elem_size = elem_type->size;
|
|
|
|
|
|
|
|
if (elem_type && btf_type_is_int(elem_type)) {
|
|
|
|
u32 int_type = btf_type_int(elem_type);
|
|
|
|
|
|
|
|
encoding = BTF_INT_ENCODING(int_type);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* BTF_INT_CHAR encoding never seems to be set for
|
|
|
|
* char arrays, so if size is 1 and element is
|
|
|
|
* printable as a char, we'll do that.
|
|
|
|
*/
|
|
|
|
if (elem_size == 1)
|
|
|
|
encoding = BTF_INT_CHAR;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!btf_show_start_array_type(show, t, type_id, encoding, data))
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (!elem_type)
|
|
|
|
goto out;
|
2018-04-19 01:56:00 +03:00
|
|
|
elem_ops = btf_type_ops(elem_type);
|
2020-09-28 14:31:04 +03:00
|
|
|
|
2018-04-19 01:56:00 +03:00
|
|
|
for (i = 0; i < array->nelems; i++) {
|
|
|
|
|
2020-09-28 14:31:04 +03:00
|
|
|
btf_show_start_array_member(show);
|
|
|
|
|
|
|
|
elem_ops->show(btf, elem_type, elem_type_id, data,
|
|
|
|
bits_offset, show);
|
2018-04-19 01:56:00 +03:00
|
|
|
data += elem_size;
|
2020-09-28 14:31:04 +03:00
|
|
|
|
|
|
|
btf_show_end_array_member(show);
|
|
|
|
|
|
|
|
if (show->state.array_terminated)
|
|
|
|
break;
|
2018-04-19 01:56:00 +03:00
|
|
|
}
|
2020-09-28 14:31:04 +03:00
|
|
|
out:
|
|
|
|
btf_show_end_array_type(show);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void btf_array_show(const struct btf *btf, const struct btf_type *t,
|
|
|
|
u32 type_id, void *data, u8 bits_offset,
|
|
|
|
struct btf_show *show)
|
|
|
|
{
|
|
|
|
const struct btf_member *m = show->state.member;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* First check if any members would be shown (are non-zero).
|
|
|
|
* See comments above "struct btf_show" definition for more
|
|
|
|
* details on how this works at a high-level.
|
|
|
|
*/
|
|
|
|
if (show->state.depth > 0 && !(show->flags & BTF_SHOW_ZERO)) {
|
|
|
|
if (!show->state.depth_check) {
|
|
|
|
show->state.depth_check = show->state.depth + 1;
|
|
|
|
show->state.depth_to_show = 0;
|
|
|
|
}
|
|
|
|
__btf_array_show(btf, t, type_id, data, bits_offset, show);
|
|
|
|
show->state.member = m;
|
|
|
|
|
|
|
|
if (show->state.depth_check != show->state.depth + 1)
|
|
|
|
return;
|
|
|
|
show->state.depth_check = 0;
|
|
|
|
|
|
|
|
if (show->state.depth_to_show <= show->state.depth)
|
|
|
|
return;
|
|
|
|
/*
|
|
|
|
* Reaching here indicates we have recursed and found
|
|
|
|
* non-zero array member(s).
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
__btf_array_show(btf, t, type_id, data, bits_offset, show);
|
2018-04-19 01:56:00 +03:00
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:57 +03:00
|
|
|
static struct btf_kind_operations array_ops = {
|
|
|
|
.check_meta = btf_array_check_meta,
|
2018-04-19 01:55:58 +03:00
|
|
|
.resolve = btf_array_resolve,
|
2018-04-19 01:55:59 +03:00
|
|
|
.check_member = btf_array_check_member,
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 09:13:51 +03:00
|
|
|
.check_kflag_member = btf_generic_check_kflag_member,
|
2018-04-19 01:55:57 +03:00
|
|
|
.log_details = btf_array_log,
|
2020-09-28 14:31:04 +03:00
|
|
|
.show = btf_array_show,
|
2018-04-19 01:55:57 +03:00
|
|
|
};
|
|
|
|
|
2018-04-19 01:55:59 +03:00
|
|
|
static int btf_struct_check_member(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *struct_type,
|
|
|
|
const struct btf_member *member,
|
|
|
|
const struct btf_type *member_type)
|
|
|
|
{
|
|
|
|
u32 struct_bits_off = member->offset;
|
|
|
|
u32 struct_size, bytes_offset;
|
|
|
|
|
|
|
|
if (BITS_PER_BYTE_MASKED(struct_bits_off)) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Member is not byte aligned");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct_size = struct_type->size;
|
|
|
|
bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off);
|
|
|
|
if (struct_size - bytes_offset < member_type->size) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Member exceeds struct_size");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:57 +03:00
|
|
|
static s32 btf_struct_check_meta(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t,
|
|
|
|
u32 meta_left)
|
|
|
|
{
|
|
|
|
bool is_union = BTF_INFO_KIND(t->info) == BTF_KIND_UNION;
|
|
|
|
const struct btf_member *member;
|
2018-07-21 03:38:37 +03:00
|
|
|
u32 meta_needed, last_offset;
|
2018-04-19 01:55:57 +03:00
|
|
|
struct btf *btf = env->btf;
|
|
|
|
u32 struct_size = t->size;
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 09:13:51 +03:00
|
|
|
u32 offset;
|
2018-04-19 01:55:57 +03:00
|
|
|
u16 i;
|
|
|
|
|
|
|
|
meta_needed = btf_type_vlen(t) * sizeof(*member);
|
|
|
|
if (meta_left < meta_needed) {
|
|
|
|
btf_verifier_log_basic(env, t,
|
|
|
|
"meta_left:%u meta_needed:%u",
|
|
|
|
meta_left, meta_needed);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-11-28 00:23:28 +03:00
|
|
|
/* struct type either no name or a valid one */
|
|
|
|
if (t->name_off &&
|
|
|
|
!btf_name_valid_identifier(env->btf, t->name_off)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid name");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:57 +03:00
|
|
|
btf_verifier_log_type(env, t, NULL);
|
|
|
|
|
2018-07-21 03:38:37 +03:00
|
|
|
last_offset = 0;
|
2018-04-19 01:55:57 +03:00
|
|
|
for_each_member(i, t, member) {
|
2018-04-21 19:48:23 +03:00
|
|
|
if (!btf_name_offset_valid(btf, member->name_off)) {
|
2018-04-19 01:55:57 +03:00
|
|
|
btf_verifier_log_member(env, t, member,
|
|
|
|
"Invalid member name_offset:%u",
|
2018-04-21 19:48:23 +03:00
|
|
|
member->name_off);
|
2018-04-19 01:55:57 +03:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-11-28 00:23:28 +03:00
|
|
|
/* struct member either no name or a valid one */
|
|
|
|
if (member->name_off &&
|
|
|
|
!btf_name_valid_identifier(btf, member->name_off)) {
|
|
|
|
btf_verifier_log_member(env, t, member, "Invalid name");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2018-04-19 01:55:57 +03:00
|
|
|
/* A member cannot be in type void */
|
2018-05-23 00:57:20 +03:00
|
|
|
if (!member->type || !BTF_TYPE_ID_VALID(member->type)) {
|
2018-04-19 01:55:57 +03:00
|
|
|
btf_verifier_log_member(env, t, member,
|
|
|
|
"Invalid type_id");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2021-12-01 21:10:25 +03:00
|
|
|
offset = __btf_member_bit_offset(t, member);
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 09:13:51 +03:00
|
|
|
if (is_union && offset) {
|
2018-04-19 01:55:57 +03:00
|
|
|
btf_verifier_log_member(env, t, member,
|
|
|
|
"Invalid member bits_offset");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-07-21 03:38:37 +03:00
|
|
|
/*
|
|
|
|
* ">" instead of ">=" because the last member could be
|
|
|
|
* "char a[0];"
|
|
|
|
*/
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 09:13:51 +03:00
|
|
|
if (last_offset > offset) {
|
2018-07-21 03:38:37 +03:00
|
|
|
btf_verifier_log_member(env, t, member,
|
|
|
|
"Invalid member bits_offset");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 09:13:51 +03:00
|
|
|
if (BITS_ROUNDUP_BYTES(offset) > struct_size) {
|
2018-04-19 01:55:57 +03:00
|
|
|
btf_verifier_log_member(env, t, member,
|
2018-11-26 02:32:51 +03:00
|
|
|
"Member bits_offset exceeds its struct size");
|
2018-04-19 01:55:57 +03:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
btf_verifier_log_member(env, t, member, NULL);
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 09:13:51 +03:00
|
|
|
last_offset = offset;
|
2018-04-19 01:55:57 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
return meta_needed;
|
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:58 +03:00
|
|
|
static int btf_struct_resolve(struct btf_verifier_env *env,
|
|
|
|
const struct resolve_vertex *v)
|
|
|
|
{
|
|
|
|
const struct btf_member *member;
|
2018-04-19 01:55:59 +03:00
|
|
|
int err;
|
2018-04-19 01:55:58 +03:00
|
|
|
u16 i;
|
|
|
|
|
|
|
|
/* Before continue resolving the next_member,
|
|
|
|
* ensure the last member is indeed resolved to a
|
|
|
|
* type with size info.
|
|
|
|
*/
|
|
|
|
if (v->next_member) {
|
2018-04-19 01:55:59 +03:00
|
|
|
const struct btf_type *last_member_type;
|
2018-04-19 01:55:58 +03:00
|
|
|
const struct btf_member *last_member;
|
|
|
|
u16 last_member_type_id;
|
|
|
|
|
|
|
|
last_member = btf_type_member(v->t) + v->next_member - 1;
|
|
|
|
last_member_type_id = last_member->type;
|
|
|
|
if (WARN_ON_ONCE(!env_type_is_resolved(env,
|
|
|
|
last_member_type_id)))
|
|
|
|
return -EINVAL;
|
2018-04-19 01:55:59 +03:00
|
|
|
|
|
|
|
last_member_type = btf_type_by_id(env->btf,
|
|
|
|
last_member_type_id);
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 09:13:51 +03:00
|
|
|
if (btf_type_kflag(v->t))
|
|
|
|
err = btf_type_ops(last_member_type)->check_kflag_member(env, v->t,
|
|
|
|
last_member,
|
|
|
|
last_member_type);
|
|
|
|
else
|
|
|
|
err = btf_type_ops(last_member_type)->check_member(env, v->t,
|
|
|
|
last_member,
|
|
|
|
last_member_type);
|
2018-04-19 01:55:59 +03:00
|
|
|
if (err)
|
|
|
|
return err;
|
2018-04-19 01:55:58 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
for_each_member_from(i, v->next_member, v->t, member) {
|
|
|
|
u32 member_type_id = member->type;
|
|
|
|
const struct btf_type *member_type = btf_type_by_id(env->btf,
|
|
|
|
member_type_id);
|
|
|
|
|
2019-06-19 22:01:05 +03:00
|
|
|
if (btf_type_nosize_or_null(member_type) ||
|
|
|
|
btf_type_is_resolve_source_only(member_type)) {
|
2018-04-19 01:55:58 +03:00
|
|
|
btf_verifier_log_member(env, v->t, member,
|
|
|
|
"Invalid member");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!env_type_is_resolve_sink(env, member_type) &&
|
|
|
|
!env_type_is_resolved(env, member_type_id)) {
|
|
|
|
env_stack_set_next_member(env, i + 1);
|
|
|
|
return env_stack_push(env, member_type, member_type_id);
|
|
|
|
}
|
2018-04-19 01:55:59 +03:00
|
|
|
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 09:13:51 +03:00
|
|
|
if (btf_type_kflag(v->t))
|
|
|
|
err = btf_type_ops(member_type)->check_kflag_member(env, v->t,
|
|
|
|
member,
|
|
|
|
member_type);
|
|
|
|
else
|
|
|
|
err = btf_type_ops(member_type)->check_member(env, v->t,
|
|
|
|
member,
|
|
|
|
member_type);
|
2018-04-19 01:55:59 +03:00
|
|
|
if (err)
|
|
|
|
return err;
|
2018-04-19 01:55:58 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
env_stack_pop_resolved(env, 0, 0);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:57 +03:00
|
|
|
static void btf_struct_log(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t)
|
|
|
|
{
|
|
|
|
btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t));
|
|
|
|
}
|
|
|
|
|
2021-07-15 03:54:10 +03:00
|
|
|
static int btf_find_struct_field(const struct btf *btf, const struct btf_type *t,
|
|
|
|
const char *name, int sz, int align)
|
2019-02-01 02:40:04 +03:00
|
|
|
{
|
|
|
|
const struct btf_member *member;
|
|
|
|
u32 i, off = -ENOENT;
|
|
|
|
|
|
|
|
for_each_member(i, t, member) {
|
|
|
|
const struct btf_type *member_type = btf_type_by_id(btf,
|
|
|
|
member->type);
|
|
|
|
if (!__btf_type_is_struct(member_type))
|
|
|
|
continue;
|
2021-07-15 03:54:10 +03:00
|
|
|
if (member_type->size != sz)
|
2019-02-01 02:40:04 +03:00
|
|
|
continue;
|
2021-07-15 03:54:10 +03:00
|
|
|
if (strcmp(__btf_name_by_offset(btf, member_type->name_off), name))
|
2019-02-01 02:40:04 +03:00
|
|
|
continue;
|
|
|
|
if (off != -ENOENT)
|
2021-07-15 03:54:10 +03:00
|
|
|
/* only one such field is allowed */
|
2019-02-01 02:40:04 +03:00
|
|
|
return -E2BIG;
|
2021-12-01 21:10:25 +03:00
|
|
|
off = __btf_member_bit_offset(t, member);
|
2019-02-01 02:40:04 +03:00
|
|
|
if (off % 8)
|
|
|
|
/* valid C code cannot generate such BTF */
|
|
|
|
return -EINVAL;
|
|
|
|
off /= 8;
|
2021-07-15 03:54:10 +03:00
|
|
|
if (off % align)
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
return off;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
|
|
|
|
const char *name, int sz, int align)
|
|
|
|
{
|
|
|
|
const struct btf_var_secinfo *vsi;
|
|
|
|
u32 i, off = -ENOENT;
|
|
|
|
|
|
|
|
for_each_vsi(i, t, vsi) {
|
|
|
|
const struct btf_type *var = btf_type_by_id(btf, vsi->type);
|
|
|
|
const struct btf_type *var_type = btf_type_by_id(btf, var->type);
|
|
|
|
|
|
|
|
if (!__btf_type_is_struct(var_type))
|
|
|
|
continue;
|
|
|
|
if (var_type->size != sz)
|
|
|
|
continue;
|
|
|
|
if (vsi->size != sz)
|
|
|
|
continue;
|
|
|
|
if (strcmp(__btf_name_by_offset(btf, var_type->name_off), name))
|
|
|
|
continue;
|
|
|
|
if (off != -ENOENT)
|
|
|
|
/* only one such field is allowed */
|
|
|
|
return -E2BIG;
|
|
|
|
off = vsi->offset;
|
|
|
|
if (off % align)
|
2019-02-01 02:40:04 +03:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
return off;
|
|
|
|
}
|
|
|
|
|
2021-07-15 03:54:10 +03:00
|
|
|
static int btf_find_field(const struct btf *btf, const struct btf_type *t,
|
|
|
|
const char *name, int sz, int align)
|
|
|
|
{
|
|
|
|
|
|
|
|
if (__btf_type_is_struct(t))
|
|
|
|
return btf_find_struct_field(btf, t, name, sz, align);
|
|
|
|
else if (btf_type_is_datasec(t))
|
|
|
|
return btf_find_datasec_var(btf, t, name, sz, align);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* find 'struct bpf_spin_lock' in map value.
|
|
|
|
* return >= 0 offset if found
|
|
|
|
* and < 0 in case of error
|
|
|
|
*/
|
|
|
|
int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t)
|
|
|
|
{
|
|
|
|
return btf_find_field(btf, t, "bpf_spin_lock",
|
|
|
|
sizeof(struct bpf_spin_lock),
|
|
|
|
__alignof__(struct bpf_spin_lock));
|
|
|
|
}
|
|
|
|
|
|
|
|
int btf_find_timer(const struct btf *btf, const struct btf_type *t)
|
|
|
|
{
|
|
|
|
return btf_find_field(btf, t, "bpf_timer",
|
|
|
|
sizeof(struct bpf_timer),
|
|
|
|
__alignof__(struct bpf_timer));
|
|
|
|
}
|
|
|
|
|
2020-09-28 14:31:04 +03:00
|
|
|
static void __btf_struct_show(const struct btf *btf, const struct btf_type *t,
|
|
|
|
u32 type_id, void *data, u8 bits_offset,
|
|
|
|
struct btf_show *show)
|
2018-04-19 01:56:00 +03:00
|
|
|
{
|
|
|
|
const struct btf_member *member;
|
2020-09-28 14:31:04 +03:00
|
|
|
void *safe_data;
|
2018-04-19 01:56:00 +03:00
|
|
|
u32 i;
|
|
|
|
|
2020-09-28 14:31:04 +03:00
|
|
|
safe_data = btf_show_start_struct_type(show, t, type_id, data);
|
|
|
|
if (!safe_data)
|
|
|
|
return;
|
|
|
|
|
2018-04-19 01:56:00 +03:00
|
|
|
for_each_member(i, t, member) {
|
|
|
|
const struct btf_type *member_type = btf_type_by_id(btf,
|
|
|
|
member->type);
|
|
|
|
const struct btf_kind_operations *ops;
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 09:13:51 +03:00
|
|
|
u32 member_offset, bitfield_size;
|
|
|
|
u32 bytes_offset;
|
|
|
|
u8 bits8_offset;
|
2018-04-19 01:56:00 +03:00
|
|
|
|
2020-09-28 14:31:04 +03:00
|
|
|
btf_show_start_member(show, member);
|
2018-04-19 01:56:00 +03:00
|
|
|
|
2021-12-01 21:10:25 +03:00
|
|
|
member_offset = __btf_member_bit_offset(t, member);
|
|
|
|
bitfield_size = __btf_member_bitfield_size(t, member);
|
2019-01-10 22:14:00 +03:00
|
|
|
bytes_offset = BITS_ROUNDDOWN_BYTES(member_offset);
|
|
|
|
bits8_offset = BITS_PER_BYTE_MASKED(member_offset);
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 09:13:51 +03:00
|
|
|
if (bitfield_size) {
|
2020-09-28 14:31:04 +03:00
|
|
|
safe_data = btf_show_start_type(show, member_type,
|
|
|
|
member->type,
|
|
|
|
data + bytes_offset);
|
|
|
|
if (safe_data)
|
|
|
|
btf_bitfield_show(safe_data,
|
|
|
|
bits8_offset,
|
|
|
|
bitfield_size, show);
|
|
|
|
btf_show_end_type(show);
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 09:13:51 +03:00
|
|
|
} else {
|
|
|
|
ops = btf_type_ops(member_type);
|
2020-09-28 14:31:04 +03:00
|
|
|
ops->show(btf, member_type, member->type,
|
|
|
|
data + bytes_offset, bits8_offset, show);
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 09:13:51 +03:00
|
|
|
}
|
2020-09-28 14:31:04 +03:00
|
|
|
|
|
|
|
btf_show_end_member(show);
|
2018-04-19 01:56:00 +03:00
|
|
|
}
|
2020-09-28 14:31:04 +03:00
|
|
|
|
|
|
|
btf_show_end_struct_type(show);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void btf_struct_show(const struct btf *btf, const struct btf_type *t,
|
|
|
|
u32 type_id, void *data, u8 bits_offset,
|
|
|
|
struct btf_show *show)
|
|
|
|
{
|
|
|
|
const struct btf_member *m = show->state.member;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* First check if any members would be shown (are non-zero).
|
|
|
|
* See comments above "struct btf_show" definition for more
|
|
|
|
* details on how this works at a high-level.
|
|
|
|
*/
|
|
|
|
if (show->state.depth > 0 && !(show->flags & BTF_SHOW_ZERO)) {
|
|
|
|
if (!show->state.depth_check) {
|
|
|
|
show->state.depth_check = show->state.depth + 1;
|
|
|
|
show->state.depth_to_show = 0;
|
|
|
|
}
|
|
|
|
__btf_struct_show(btf, t, type_id, data, bits_offset, show);
|
|
|
|
/* Restore saved member data here */
|
|
|
|
show->state.member = m;
|
|
|
|
if (show->state.depth_check != show->state.depth + 1)
|
|
|
|
return;
|
|
|
|
show->state.depth_check = 0;
|
|
|
|
|
|
|
|
if (show->state.depth_to_show <= show->state.depth)
|
|
|
|
return;
|
|
|
|
/*
|
|
|
|
* Reaching here indicates we have recursed and found
|
|
|
|
* non-zero child values.
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
|
|
|
|
__btf_struct_show(btf, t, type_id, data, bits_offset, show);
|
2018-04-19 01:56:00 +03:00
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:57 +03:00
|
|
|
static struct btf_kind_operations struct_ops = {
|
|
|
|
.check_meta = btf_struct_check_meta,
|
2018-04-19 01:55:58 +03:00
|
|
|
.resolve = btf_struct_resolve,
|
2018-04-19 01:55:59 +03:00
|
|
|
.check_member = btf_struct_check_member,
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 09:13:51 +03:00
|
|
|
.check_kflag_member = btf_generic_check_kflag_member,
|
2018-04-19 01:55:57 +03:00
|
|
|
.log_details = btf_struct_log,
|
2020-09-28 14:31:04 +03:00
|
|
|
.show = btf_struct_show,
|
2018-04-19 01:55:57 +03:00
|
|
|
};
|
|
|
|
|
2018-04-19 01:55:59 +03:00
|
|
|
static int btf_enum_check_member(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *struct_type,
|
|
|
|
const struct btf_member *member,
|
|
|
|
const struct btf_type *member_type)
|
|
|
|
{
|
|
|
|
u32 struct_bits_off = member->offset;
|
|
|
|
u32 struct_size, bytes_offset;
|
|
|
|
|
|
|
|
if (BITS_PER_BYTE_MASKED(struct_bits_off)) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Member is not byte aligned");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct_size = struct_type->size;
|
|
|
|
bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off);
|
2020-03-10 10:32:29 +03:00
|
|
|
if (struct_size - bytes_offset < member_type->size) {
|
2018-04-19 01:55:59 +03:00
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Member exceeds struct_size");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 09:13:51 +03:00
|
|
|
static int btf_enum_check_kflag_member(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *struct_type,
|
|
|
|
const struct btf_member *member,
|
|
|
|
const struct btf_type *member_type)
|
|
|
|
{
|
|
|
|
u32 struct_bits_off, nr_bits, bytes_end, struct_size;
|
|
|
|
u32 int_bitsize = sizeof(int) * BITS_PER_BYTE;
|
|
|
|
|
|
|
|
struct_bits_off = BTF_MEMBER_BIT_OFFSET(member->offset);
|
|
|
|
nr_bits = BTF_MEMBER_BITFIELD_SIZE(member->offset);
|
|
|
|
if (!nr_bits) {
|
|
|
|
if (BITS_PER_BYTE_MASKED(struct_bits_off)) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Member is not byte aligned");
|
2019-09-25 12:38:35 +03:00
|
|
|
return -EINVAL;
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 09:13:51 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
nr_bits = int_bitsize;
|
|
|
|
} else if (nr_bits > int_bitsize) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Invalid member bitfield_size");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct_size = struct_type->size;
|
|
|
|
bytes_end = BITS_ROUNDUP_BYTES(struct_bits_off + nr_bits);
|
|
|
|
if (struct_size < bytes_end) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Member exceeds struct_size");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:57 +03:00
|
|
|
static s32 btf_enum_check_meta(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t,
|
|
|
|
u32 meta_left)
|
|
|
|
{
|
|
|
|
const struct btf_enum *enums = btf_type_enum(t);
|
|
|
|
struct btf *btf = env->btf;
|
|
|
|
u16 i, nr_enums;
|
|
|
|
u32 meta_needed;
|
|
|
|
|
|
|
|
nr_enums = btf_type_vlen(t);
|
|
|
|
meta_needed = nr_enums * sizeof(*enums);
|
|
|
|
|
|
|
|
if (meta_left < meta_needed) {
|
|
|
|
btf_verifier_log_basic(env, t,
|
|
|
|
"meta_left:%u meta_needed:%u",
|
|
|
|
meta_left, meta_needed);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 09:13:51 +03:00
|
|
|
if (btf_type_kflag(t)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2019-09-17 20:45:37 +03:00
|
|
|
if (t->size > 8 || !is_power_of_2(t->size)) {
|
|
|
|
btf_verifier_log_type(env, t, "Unexpected size");
|
2018-04-19 01:55:57 +03:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-11-28 00:23:28 +03:00
|
|
|
/* enum type either no name or a valid one */
|
|
|
|
if (t->name_off &&
|
|
|
|
!btf_name_valid_identifier(env->btf, t->name_off)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid name");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:57 +03:00
|
|
|
btf_verifier_log_type(env, t, NULL);
|
|
|
|
|
|
|
|
for (i = 0; i < nr_enums; i++) {
|
2018-04-21 19:48:23 +03:00
|
|
|
if (!btf_name_offset_valid(btf, enums[i].name_off)) {
|
2018-04-19 01:55:57 +03:00
|
|
|
btf_verifier_log(env, "\tInvalid name_offset:%u",
|
2018-04-21 19:48:23 +03:00
|
|
|
enums[i].name_off);
|
2018-04-19 01:55:57 +03:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-11-28 00:23:28 +03:00
|
|
|
/* enum member must have a valid name */
|
|
|
|
if (!enums[i].name_off ||
|
|
|
|
!btf_name_valid_identifier(btf, enums[i].name_off)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid name");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2019-10-16 06:24:57 +03:00
|
|
|
if (env->log.level == BPF_LOG_KERNEL)
|
|
|
|
continue;
|
2018-04-19 01:55:57 +03:00
|
|
|
btf_verifier_log(env, "\t%s val=%d\n",
|
2018-12-13 21:41:46 +03:00
|
|
|
__btf_name_by_offset(btf, enums[i].name_off),
|
2018-04-19 01:55:57 +03:00
|
|
|
enums[i].val);
|
|
|
|
}
|
|
|
|
|
|
|
|
return meta_needed;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void btf_enum_log(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t)
|
|
|
|
{
|
|
|
|
btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t));
|
|
|
|
}
|
|
|
|
|
2020-09-28 14:31:04 +03:00
|
|
|
static void btf_enum_show(const struct btf *btf, const struct btf_type *t,
|
|
|
|
u32 type_id, void *data, u8 bits_offset,
|
|
|
|
struct btf_show *show)
|
2018-04-19 01:56:00 +03:00
|
|
|
{
|
|
|
|
const struct btf_enum *enums = btf_type_enum(t);
|
|
|
|
u32 i, nr_enums = btf_type_vlen(t);
|
2020-09-28 14:31:04 +03:00
|
|
|
void *safe_data;
|
|
|
|
int v;
|
|
|
|
|
|
|
|
safe_data = btf_show_start_type(show, t, type_id, data);
|
|
|
|
if (!safe_data)
|
|
|
|
return;
|
|
|
|
|
|
|
|
v = *(int *)safe_data;
|
2018-04-19 01:56:00 +03:00
|
|
|
|
|
|
|
for (i = 0; i < nr_enums; i++) {
|
2020-09-28 14:31:04 +03:00
|
|
|
if (v != enums[i].val)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
btf_show_type_value(show, "%s",
|
|
|
|
__btf_name_by_offset(btf,
|
|
|
|
enums[i].name_off));
|
|
|
|
|
|
|
|
btf_show_end_type(show);
|
|
|
|
return;
|
2018-04-19 01:56:00 +03:00
|
|
|
}
|
|
|
|
|
2020-09-28 14:31:04 +03:00
|
|
|
btf_show_type_value(show, "%d", v);
|
|
|
|
btf_show_end_type(show);
|
2018-04-19 01:56:00 +03:00
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:57 +03:00
|
|
|
static struct btf_kind_operations enum_ops = {
|
|
|
|
.check_meta = btf_enum_check_meta,
|
2018-04-19 01:55:58 +03:00
|
|
|
.resolve = btf_df_resolve,
|
2018-04-19 01:55:59 +03:00
|
|
|
.check_member = btf_enum_check_member,
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 09:13:51 +03:00
|
|
|
.check_kflag_member = btf_enum_check_kflag_member,
|
2018-04-19 01:55:57 +03:00
|
|
|
.log_details = btf_enum_log,
|
2020-09-28 14:31:04 +03:00
|
|
|
.show = btf_enum_show,
|
2018-04-19 01:55:57 +03:00
|
|
|
};
|
|
|
|
|
2018-11-20 02:29:08 +03:00
|
|
|
static s32 btf_func_proto_check_meta(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t,
|
|
|
|
u32 meta_left)
|
|
|
|
{
|
|
|
|
u32 meta_needed = btf_type_vlen(t) * sizeof(struct btf_param);
|
|
|
|
|
|
|
|
if (meta_left < meta_needed) {
|
|
|
|
btf_verifier_log_basic(env, t,
|
|
|
|
"meta_left:%u meta_needed:%u",
|
|
|
|
meta_left, meta_needed);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (t->name_off) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid name");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 09:13:51 +03:00
|
|
|
if (btf_type_kflag(t)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-11-20 02:29:08 +03:00
|
|
|
btf_verifier_log_type(env, t, NULL);
|
|
|
|
|
|
|
|
return meta_needed;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void btf_func_proto_log(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t)
|
|
|
|
{
|
|
|
|
const struct btf_param *args = (const struct btf_param *)(t + 1);
|
|
|
|
u16 nr_args = btf_type_vlen(t), i;
|
|
|
|
|
|
|
|
btf_verifier_log(env, "return=%u args=(", t->type);
|
|
|
|
if (!nr_args) {
|
|
|
|
btf_verifier_log(env, "void");
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (nr_args == 1 && !args[0].type) {
|
|
|
|
/* Only one vararg */
|
|
|
|
btf_verifier_log(env, "vararg");
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
btf_verifier_log(env, "%u %s", args[0].type,
|
2018-12-13 21:41:46 +03:00
|
|
|
__btf_name_by_offset(env->btf,
|
|
|
|
args[0].name_off));
|
2018-11-20 02:29:08 +03:00
|
|
|
for (i = 1; i < nr_args - 1; i++)
|
|
|
|
btf_verifier_log(env, ", %u %s", args[i].type,
|
2018-12-13 21:41:46 +03:00
|
|
|
__btf_name_by_offset(env->btf,
|
|
|
|
args[i].name_off));
|
2018-11-20 02:29:08 +03:00
|
|
|
|
|
|
|
if (nr_args > 1) {
|
|
|
|
const struct btf_param *last_arg = &args[nr_args - 1];
|
|
|
|
|
|
|
|
if (last_arg->type)
|
|
|
|
btf_verifier_log(env, ", %u %s", last_arg->type,
|
2018-12-13 21:41:46 +03:00
|
|
|
__btf_name_by_offset(env->btf,
|
|
|
|
last_arg->name_off));
|
2018-11-20 02:29:08 +03:00
|
|
|
else
|
|
|
|
btf_verifier_log(env, ", vararg");
|
|
|
|
}
|
|
|
|
|
|
|
|
done:
|
|
|
|
btf_verifier_log(env, ")");
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct btf_kind_operations func_proto_ops = {
|
|
|
|
.check_meta = btf_func_proto_check_meta,
|
|
|
|
.resolve = btf_df_resolve,
|
|
|
|
/*
|
|
|
|
* BTF_KIND_FUNC_PROTO cannot be directly referred by
|
|
|
|
* a struct's member.
|
|
|
|
*
|
2021-05-25 05:56:59 +03:00
|
|
|
* It should be a function pointer instead.
|
2018-11-20 02:29:08 +03:00
|
|
|
* (i.e. struct's member -> BTF_KIND_PTR -> BTF_KIND_FUNC_PROTO)
|
|
|
|
*
|
|
|
|
* Hence, there is no btf_func_check_member().
|
|
|
|
*/
|
|
|
|
.check_member = btf_df_check_member,
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 09:13:51 +03:00
|
|
|
.check_kflag_member = btf_df_check_kflag_member,
|
2018-11-20 02:29:08 +03:00
|
|
|
.log_details = btf_func_proto_log,
|
2020-09-28 14:31:04 +03:00
|
|
|
.show = btf_df_show,
|
2018-11-20 02:29:08 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
static s32 btf_func_check_meta(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t,
|
|
|
|
u32 meta_left)
|
|
|
|
{
|
|
|
|
if (!t->name_off ||
|
|
|
|
!btf_name_valid_identifier(env->btf, t->name_off)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid name");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2020-01-10 09:41:20 +03:00
|
|
|
if (btf_type_vlen(t) > BTF_FUNC_GLOBAL) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid func linkage");
|
2018-11-20 02:29:08 +03:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 09:13:51 +03:00
|
|
|
if (btf_type_kflag(t)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-11-20 02:29:08 +03:00
|
|
|
btf_verifier_log_type(env, t, NULL);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct btf_kind_operations func_ops = {
|
|
|
|
.check_meta = btf_func_check_meta,
|
|
|
|
.resolve = btf_df_resolve,
|
|
|
|
.check_member = btf_df_check_member,
|
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-16 09:13:51 +03:00
|
|
|
.check_kflag_member = btf_df_check_kflag_member,
|
2018-11-20 02:29:08 +03:00
|
|
|
.log_details = btf_ref_type_log,
|
2020-09-28 14:31:04 +03:00
|
|
|
.show = btf_df_show,
|
2018-11-20 02:29:08 +03:00
|
|
|
};
|
|
|
|
|
2019-04-10 00:20:09 +03:00
|
|
|
static s32 btf_var_check_meta(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t,
|
|
|
|
u32 meta_left)
|
|
|
|
{
|
|
|
|
const struct btf_var *var;
|
|
|
|
u32 meta_needed = sizeof(*var);
|
|
|
|
|
|
|
|
if (meta_left < meta_needed) {
|
|
|
|
btf_verifier_log_basic(env, t,
|
|
|
|
"meta_left:%u meta_needed:%u",
|
|
|
|
meta_left, meta_needed);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (btf_type_vlen(t)) {
|
|
|
|
btf_verifier_log_type(env, t, "vlen != 0");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (btf_type_kflag(t)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!t->name_off ||
|
|
|
|
!__btf_name_valid(env->btf, t->name_off, true)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid name");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* A var cannot be in type void */
|
|
|
|
if (!t->type || !BTF_TYPE_ID_VALID(t->type)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid type_id");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
var = btf_type_var(t);
|
|
|
|
if (var->linkage != BTF_VAR_STATIC &&
|
|
|
|
var->linkage != BTF_VAR_GLOBAL_ALLOCATED) {
|
|
|
|
btf_verifier_log_type(env, t, "Linkage not supported");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
btf_verifier_log_type(env, t, NULL);
|
|
|
|
|
|
|
|
return meta_needed;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void btf_var_log(struct btf_verifier_env *env, const struct btf_type *t)
|
|
|
|
{
|
|
|
|
const struct btf_var *var = btf_type_var(t);
|
|
|
|
|
|
|
|
btf_verifier_log(env, "type_id=%u linkage=%u", t->type, var->linkage);
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct btf_kind_operations var_ops = {
|
|
|
|
.check_meta = btf_var_check_meta,
|
|
|
|
.resolve = btf_var_resolve,
|
|
|
|
.check_member = btf_df_check_member,
|
|
|
|
.check_kflag_member = btf_df_check_kflag_member,
|
|
|
|
.log_details = btf_var_log,
|
2020-09-28 14:31:04 +03:00
|
|
|
.show = btf_var_show,
|
2019-04-10 00:20:09 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
static s32 btf_datasec_check_meta(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t,
|
|
|
|
u32 meta_left)
|
|
|
|
{
|
|
|
|
const struct btf_var_secinfo *vsi;
|
|
|
|
u64 last_vsi_end_off = 0, sum = 0;
|
|
|
|
u32 i, meta_needed;
|
|
|
|
|
|
|
|
meta_needed = btf_type_vlen(t) * sizeof(*vsi);
|
|
|
|
if (meta_left < meta_needed) {
|
|
|
|
btf_verifier_log_basic(env, t,
|
|
|
|
"meta_left:%u meta_needed:%u",
|
|
|
|
meta_left, meta_needed);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!t->size) {
|
|
|
|
btf_verifier_log_type(env, t, "size == 0");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (btf_type_kflag(t)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!t->name_off ||
|
|
|
|
!btf_name_valid_section(env->btf, t->name_off)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid name");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
btf_verifier_log_type(env, t, NULL);
|
|
|
|
|
|
|
|
for_each_vsi(i, t, vsi) {
|
|
|
|
/* A var cannot be in type void */
|
|
|
|
if (!vsi->type || !BTF_TYPE_ID_VALID(vsi->type)) {
|
|
|
|
btf_verifier_log_vsi(env, t, vsi,
|
|
|
|
"Invalid type_id");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (vsi->offset < last_vsi_end_off || vsi->offset >= t->size) {
|
|
|
|
btf_verifier_log_vsi(env, t, vsi,
|
|
|
|
"Invalid offset");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!vsi->size || vsi->size > t->size) {
|
|
|
|
btf_verifier_log_vsi(env, t, vsi,
|
|
|
|
"Invalid size");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
last_vsi_end_off = vsi->offset + vsi->size;
|
|
|
|
if (last_vsi_end_off > t->size) {
|
|
|
|
btf_verifier_log_vsi(env, t, vsi,
|
|
|
|
"Invalid offset+size");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
btf_verifier_log_vsi(env, t, vsi, NULL);
|
|
|
|
sum += vsi->size;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (t->size < sum) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid btf_info size");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return meta_needed;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int btf_datasec_resolve(struct btf_verifier_env *env,
|
|
|
|
const struct resolve_vertex *v)
|
|
|
|
{
|
|
|
|
const struct btf_var_secinfo *vsi;
|
|
|
|
struct btf *btf = env->btf;
|
|
|
|
u16 i;
|
|
|
|
|
|
|
|
for_each_vsi_from(i, v->next_member, v->t, vsi) {
|
|
|
|
u32 var_type_id = vsi->type, type_id, type_size = 0;
|
|
|
|
const struct btf_type *var_type = btf_type_by_id(env->btf,
|
|
|
|
var_type_id);
|
|
|
|
if (!var_type || !btf_type_is_var(var_type)) {
|
|
|
|
btf_verifier_log_vsi(env, v->t, vsi,
|
|
|
|
"Not a VAR kind member");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!env_type_is_resolve_sink(env, var_type) &&
|
|
|
|
!env_type_is_resolved(env, var_type_id)) {
|
|
|
|
env_stack_set_next_member(env, i + 1);
|
|
|
|
return env_stack_push(env, var_type, var_type_id);
|
|
|
|
}
|
|
|
|
|
|
|
|
type_id = var_type->type;
|
|
|
|
if (!btf_type_id_size(btf, &type_id, &type_size)) {
|
|
|
|
btf_verifier_log_vsi(env, v->t, vsi, "Invalid type");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (vsi->size < type_size) {
|
|
|
|
btf_verifier_log_vsi(env, v->t, vsi, "Invalid size");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
env_stack_pop_resolved(env, 0, 0);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void btf_datasec_log(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t)
|
|
|
|
{
|
|
|
|
btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t));
|
|
|
|
}
|
|
|
|
|
2020-09-28 14:31:04 +03:00
|
|
|
static void btf_datasec_show(const struct btf *btf,
|
|
|
|
const struct btf_type *t, u32 type_id,
|
|
|
|
void *data, u8 bits_offset,
|
|
|
|
struct btf_show *show)
|
2019-04-10 00:20:09 +03:00
|
|
|
{
|
|
|
|
const struct btf_var_secinfo *vsi;
|
|
|
|
const struct btf_type *var;
|
|
|
|
u32 i;
|
|
|
|
|
2020-09-28 14:31:04 +03:00
|
|
|
if (!btf_show_start_type(show, t, type_id, data))
|
|
|
|
return;
|
|
|
|
|
|
|
|
btf_show_type_value(show, "section (\"%s\") = {",
|
|
|
|
__btf_name_by_offset(btf, t->name_off));
|
2019-04-10 00:20:09 +03:00
|
|
|
for_each_vsi(i, t, vsi) {
|
|
|
|
var = btf_type_by_id(btf, vsi->type);
|
|
|
|
if (i)
|
2020-09-28 14:31:04 +03:00
|
|
|
btf_show(show, ",");
|
|
|
|
btf_type_ops(var)->show(btf, var, vsi->type,
|
|
|
|
data + vsi->offset, bits_offset, show);
|
2019-04-10 00:20:09 +03:00
|
|
|
}
|
2020-09-28 14:31:04 +03:00
|
|
|
btf_show_end_type(show);
|
2019-04-10 00:20:09 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static const struct btf_kind_operations datasec_ops = {
|
|
|
|
.check_meta = btf_datasec_check_meta,
|
|
|
|
.resolve = btf_datasec_resolve,
|
|
|
|
.check_member = btf_df_check_member,
|
|
|
|
.check_kflag_member = btf_df_check_kflag_member,
|
|
|
|
.log_details = btf_datasec_log,
|
2020-09-28 14:31:04 +03:00
|
|
|
.show = btf_datasec_show,
|
2019-04-10 00:20:09 +03:00
|
|
|
};
|
|
|
|
|
2021-02-26 23:22:52 +03:00
|
|
|
static s32 btf_float_check_meta(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t,
|
|
|
|
u32 meta_left)
|
|
|
|
{
|
|
|
|
if (btf_type_vlen(t)) {
|
|
|
|
btf_verifier_log_type(env, t, "vlen != 0");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (btf_type_kflag(t)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (t->size != 2 && t->size != 4 && t->size != 8 && t->size != 12 &&
|
|
|
|
t->size != 16) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid type_size");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
btf_verifier_log_type(env, t, NULL);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int btf_float_check_member(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *struct_type,
|
|
|
|
const struct btf_member *member,
|
|
|
|
const struct btf_type *member_type)
|
|
|
|
{
|
|
|
|
u64 start_offset_bytes;
|
|
|
|
u64 end_offset_bytes;
|
|
|
|
u64 misalign_bits;
|
|
|
|
u64 align_bytes;
|
|
|
|
u64 align_bits;
|
|
|
|
|
|
|
|
/* Different architectures have different alignment requirements, so
|
|
|
|
* here we check only for the reasonable minimum. This way we ensure
|
|
|
|
* that types after CO-RE can pass the kernel BTF verifier.
|
|
|
|
*/
|
|
|
|
align_bytes = min_t(u64, sizeof(void *), member_type->size);
|
|
|
|
align_bits = align_bytes * BITS_PER_BYTE;
|
|
|
|
div64_u64_rem(member->offset, align_bits, &misalign_bits);
|
|
|
|
if (misalign_bits) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Member is not properly aligned");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
start_offset_bytes = member->offset / BITS_PER_BYTE;
|
|
|
|
end_offset_bytes = start_offset_bytes + member_type->size;
|
|
|
|
if (end_offset_bytes > struct_type->size) {
|
|
|
|
btf_verifier_log_member(env, struct_type, member,
|
|
|
|
"Member exceeds struct_size");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void btf_float_log(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t)
|
|
|
|
{
|
|
|
|
btf_verifier_log(env, "size=%u", t->size);
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct btf_kind_operations float_ops = {
|
|
|
|
.check_meta = btf_float_check_meta,
|
|
|
|
.resolve = btf_df_resolve,
|
|
|
|
.check_member = btf_float_check_member,
|
|
|
|
.check_kflag_member = btf_generic_check_kflag_member,
|
|
|
|
.log_details = btf_float_log,
|
|
|
|
.show = btf_df_show,
|
|
|
|
};
|
|
|
|
|
2021-10-12 19:48:38 +03:00
|
|
|
static s32 btf_decl_tag_check_meta(struct btf_verifier_env *env,
|
2021-09-15 01:30:15 +03:00
|
|
|
const struct btf_type *t,
|
|
|
|
u32 meta_left)
|
|
|
|
{
|
2021-10-12 19:48:38 +03:00
|
|
|
const struct btf_decl_tag *tag;
|
2021-09-15 01:30:15 +03:00
|
|
|
u32 meta_needed = sizeof(*tag);
|
|
|
|
s32 component_idx;
|
|
|
|
const char *value;
|
|
|
|
|
|
|
|
if (meta_left < meta_needed) {
|
|
|
|
btf_verifier_log_basic(env, t,
|
|
|
|
"meta_left:%u meta_needed:%u",
|
|
|
|
meta_left, meta_needed);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
value = btf_name_by_offset(env->btf, t->name_off);
|
|
|
|
if (!value || !value[0]) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid value");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (btf_type_vlen(t)) {
|
|
|
|
btf_verifier_log_type(env, t, "vlen != 0");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (btf_type_kflag(t)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2021-10-12 19:48:38 +03:00
|
|
|
component_idx = btf_type_decl_tag(t)->component_idx;
|
2021-09-15 01:30:15 +03:00
|
|
|
if (component_idx < -1) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid component_idx");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
btf_verifier_log_type(env, t, NULL);
|
|
|
|
|
|
|
|
return meta_needed;
|
|
|
|
}
|
|
|
|
|
2021-10-12 19:48:38 +03:00
|
|
|
static int btf_decl_tag_resolve(struct btf_verifier_env *env,
|
2021-09-15 01:30:15 +03:00
|
|
|
const struct resolve_vertex *v)
|
|
|
|
{
|
|
|
|
const struct btf_type *next_type;
|
|
|
|
const struct btf_type *t = v->t;
|
|
|
|
u32 next_type_id = t->type;
|
|
|
|
struct btf *btf = env->btf;
|
|
|
|
s32 component_idx;
|
|
|
|
u32 vlen;
|
|
|
|
|
|
|
|
next_type = btf_type_by_id(btf, next_type_id);
|
2021-10-12 19:48:38 +03:00
|
|
|
if (!next_type || !btf_type_is_decl_tag_target(next_type)) {
|
2021-09-15 01:30:15 +03:00
|
|
|
btf_verifier_log_type(env, v->t, "Invalid type_id");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!env_type_is_resolve_sink(env, next_type) &&
|
|
|
|
!env_type_is_resolved(env, next_type_id))
|
|
|
|
return env_stack_push(env, next_type, next_type_id);
|
|
|
|
|
2021-10-12 19:48:38 +03:00
|
|
|
component_idx = btf_type_decl_tag(t)->component_idx;
|
2021-09-15 01:30:15 +03:00
|
|
|
if (component_idx != -1) {
|
2021-10-21 22:56:28 +03:00
|
|
|
if (btf_type_is_var(next_type) || btf_type_is_typedef(next_type)) {
|
2021-09-15 01:30:15 +03:00
|
|
|
btf_verifier_log_type(env, v->t, "Invalid component_idx");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (btf_type_is_struct(next_type)) {
|
|
|
|
vlen = btf_type_vlen(next_type);
|
|
|
|
} else {
|
|
|
|
/* next_type should be a function */
|
|
|
|
next_type = btf_type_by_id(btf, next_type->type);
|
|
|
|
vlen = btf_type_vlen(next_type);
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((u32)component_idx >= vlen) {
|
|
|
|
btf_verifier_log_type(env, v->t, "Invalid component_idx");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
env_stack_pop_resolved(env, next_type_id, 0);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-10-12 19:48:38 +03:00
|
|
|
static void btf_decl_tag_log(struct btf_verifier_env *env, const struct btf_type *t)
|
2021-09-15 01:30:15 +03:00
|
|
|
{
|
|
|
|
btf_verifier_log(env, "type=%u component_idx=%d", t->type,
|
2021-10-12 19:48:38 +03:00
|
|
|
btf_type_decl_tag(t)->component_idx);
|
2021-09-15 01:30:15 +03:00
|
|
|
}
|
|
|
|
|
2021-10-12 19:48:38 +03:00
|
|
|
static const struct btf_kind_operations decl_tag_ops = {
|
|
|
|
.check_meta = btf_decl_tag_check_meta,
|
|
|
|
.resolve = btf_decl_tag_resolve,
|
2021-09-15 01:30:15 +03:00
|
|
|
.check_member = btf_df_check_member,
|
|
|
|
.check_kflag_member = btf_df_check_kflag_member,
|
2021-10-12 19:48:38 +03:00
|
|
|
.log_details = btf_decl_tag_log,
|
2021-09-15 01:30:15 +03:00
|
|
|
.show = btf_df_show,
|
|
|
|
};
|
|
|
|
|
2018-11-20 02:29:08 +03:00
|
|
|
static int btf_func_proto_check(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t)
|
|
|
|
{
|
|
|
|
const struct btf_type *ret_type;
|
|
|
|
const struct btf_param *args;
|
|
|
|
const struct btf *btf;
|
|
|
|
u16 nr_args, i;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
btf = env->btf;
|
|
|
|
args = (const struct btf_param *)(t + 1);
|
|
|
|
nr_args = btf_type_vlen(t);
|
|
|
|
|
|
|
|
/* Check func return type which could be "void" (t->type == 0) */
|
|
|
|
if (t->type) {
|
|
|
|
u32 ret_type_id = t->type;
|
|
|
|
|
|
|
|
ret_type = btf_type_by_id(btf, ret_type_id);
|
|
|
|
if (!ret_type) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid return type");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (btf_type_needs_resolve(ret_type) &&
|
|
|
|
!env_type_is_resolved(env, ret_type_id)) {
|
|
|
|
err = btf_resolve(env, ret_type, ret_type_id);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Ensure the return type is a type that has a size */
|
|
|
|
if (!btf_type_id_size(btf, &ret_type_id, NULL)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid return type");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!nr_args)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* Last func arg type_id could be 0 if it is a vararg */
|
|
|
|
if (!args[nr_args - 1].type) {
|
|
|
|
if (args[nr_args - 1].name_off) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid arg#%u",
|
|
|
|
nr_args);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
nr_args--;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = 0;
|
|
|
|
for (i = 0; i < nr_args; i++) {
|
|
|
|
const struct btf_type *arg_type;
|
|
|
|
u32 arg_type_id;
|
|
|
|
|
|
|
|
arg_type_id = args[i].type;
|
|
|
|
arg_type = btf_type_by_id(btf, arg_type_id);
|
|
|
|
if (!arg_type) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1);
|
|
|
|
err = -EINVAL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (args[i].name_off &&
|
|
|
|
(!btf_name_offset_valid(btf, args[i].name_off) ||
|
|
|
|
!btf_name_valid_identifier(btf, args[i].name_off))) {
|
|
|
|
btf_verifier_log_type(env, t,
|
|
|
|
"Invalid arg#%u", i + 1);
|
|
|
|
err = -EINVAL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (btf_type_needs_resolve(arg_type) &&
|
|
|
|
!env_type_is_resolved(env, arg_type_id)) {
|
|
|
|
err = btf_resolve(env, arg_type, arg_type_id);
|
|
|
|
if (err)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!btf_type_id_size(btf, &arg_type_id, NULL)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1);
|
|
|
|
err = -EINVAL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int btf_func_check(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t)
|
|
|
|
{
|
|
|
|
const struct btf_type *proto_type;
|
|
|
|
const struct btf_param *args;
|
|
|
|
const struct btf *btf;
|
|
|
|
u16 nr_args, i;
|
|
|
|
|
|
|
|
btf = env->btf;
|
|
|
|
proto_type = btf_type_by_id(btf, t->type);
|
|
|
|
|
|
|
|
if (!proto_type || !btf_type_is_func_proto(proto_type)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid type_id");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
args = (const struct btf_param *)(proto_type + 1);
|
|
|
|
nr_args = btf_type_vlen(proto_type);
|
|
|
|
for (i = 0; i < nr_args; i++) {
|
|
|
|
if (!args[i].name_off && args[i].type) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:57 +03:00
|
|
|
static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS] = {
|
|
|
|
[BTF_KIND_INT] = &int_ops,
|
|
|
|
[BTF_KIND_PTR] = &ptr_ops,
|
|
|
|
[BTF_KIND_ARRAY] = &array_ops,
|
|
|
|
[BTF_KIND_STRUCT] = &struct_ops,
|
|
|
|
[BTF_KIND_UNION] = &struct_ops,
|
|
|
|
[BTF_KIND_ENUM] = &enum_ops,
|
|
|
|
[BTF_KIND_FWD] = &fwd_ops,
|
|
|
|
[BTF_KIND_TYPEDEF] = &modifier_ops,
|
|
|
|
[BTF_KIND_VOLATILE] = &modifier_ops,
|
|
|
|
[BTF_KIND_CONST] = &modifier_ops,
|
|
|
|
[BTF_KIND_RESTRICT] = &modifier_ops,
|
2018-11-20 02:29:08 +03:00
|
|
|
[BTF_KIND_FUNC] = &func_ops,
|
|
|
|
[BTF_KIND_FUNC_PROTO] = &func_proto_ops,
|
2019-04-10 00:20:09 +03:00
|
|
|
[BTF_KIND_VAR] = &var_ops,
|
|
|
|
[BTF_KIND_DATASEC] = &datasec_ops,
|
2021-02-26 23:22:52 +03:00
|
|
|
[BTF_KIND_FLOAT] = &float_ops,
|
2021-10-12 19:48:38 +03:00
|
|
|
[BTF_KIND_DECL_TAG] = &decl_tag_ops,
|
2021-11-12 04:26:09 +03:00
|
|
|
[BTF_KIND_TYPE_TAG] = &modifier_ops,
|
2018-04-19 01:55:57 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
static s32 btf_check_meta(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t,
|
|
|
|
u32 meta_left)
|
|
|
|
{
|
|
|
|
u32 saved_meta_left = meta_left;
|
|
|
|
s32 var_meta_size;
|
|
|
|
|
|
|
|
if (meta_left < sizeof(*t)) {
|
|
|
|
btf_verifier_log(env, "[%u] meta_left:%u meta_needed:%zu",
|
|
|
|
env->log_type_id, meta_left, sizeof(*t));
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
meta_left -= sizeof(*t);
|
|
|
|
|
2018-05-23 00:57:20 +03:00
|
|
|
if (t->info & ~BTF_INFO_MASK) {
|
|
|
|
btf_verifier_log(env, "[%u] Invalid btf_info:%x",
|
|
|
|
env->log_type_id, t->info);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:57 +03:00
|
|
|
if (BTF_INFO_KIND(t->info) > BTF_KIND_MAX ||
|
|
|
|
BTF_INFO_KIND(t->info) == BTF_KIND_UNKN) {
|
|
|
|
btf_verifier_log(env, "[%u] Invalid kind:%u",
|
|
|
|
env->log_type_id, BTF_INFO_KIND(t->info));
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-04-21 19:48:23 +03:00
|
|
|
if (!btf_name_offset_valid(env->btf, t->name_off)) {
|
2018-04-19 01:55:57 +03:00
|
|
|
btf_verifier_log(env, "[%u] Invalid name_offset:%u",
|
2018-04-21 19:48:23 +03:00
|
|
|
env->log_type_id, t->name_off);
|
2018-04-19 01:55:57 +03:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
var_meta_size = btf_type_ops(t)->check_meta(env, t, meta_left);
|
|
|
|
if (var_meta_size < 0)
|
|
|
|
return var_meta_size;
|
|
|
|
|
|
|
|
meta_left -= var_meta_size;
|
|
|
|
|
|
|
|
return saved_meta_left - meta_left;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int btf_check_all_metas(struct btf_verifier_env *env)
|
|
|
|
{
|
|
|
|
struct btf *btf = env->btf;
|
|
|
|
struct btf_header *hdr;
|
|
|
|
void *cur, *end;
|
|
|
|
|
2018-05-23 00:57:18 +03:00
|
|
|
hdr = &btf->hdr;
|
2018-04-19 01:55:57 +03:00
|
|
|
cur = btf->nohdr_data + hdr->type_off;
|
2018-09-12 20:29:11 +03:00
|
|
|
end = cur + hdr->type_len;
|
2018-04-19 01:55:57 +03:00
|
|
|
|
2020-11-10 04:19:28 +03:00
|
|
|
env->log_type_id = btf->base_btf ? btf->start_id : 1;
|
2018-04-19 01:55:57 +03:00
|
|
|
while (cur < end) {
|
|
|
|
struct btf_type *t = cur;
|
|
|
|
s32 meta_size;
|
|
|
|
|
|
|
|
meta_size = btf_check_meta(env, t, end - cur);
|
|
|
|
if (meta_size < 0)
|
|
|
|
return meta_size;
|
|
|
|
|
|
|
|
btf_add_type(env, t);
|
|
|
|
cur += meta_size;
|
|
|
|
env->log_type_id++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:58 +03:00
|
|
|
static bool btf_resolve_valid(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t,
|
|
|
|
u32 type_id)
|
|
|
|
{
|
|
|
|
struct btf *btf = env->btf;
|
|
|
|
|
|
|
|
if (!env_type_is_resolved(env, type_id))
|
|
|
|
return false;
|
|
|
|
|
2019-04-10 00:20:09 +03:00
|
|
|
if (btf_type_is_struct(t) || btf_type_is_datasec(t))
|
2020-11-10 04:19:28 +03:00
|
|
|
return !btf_resolved_type_id(btf, type_id) &&
|
|
|
|
!btf_resolved_type_size(btf, type_id);
|
2018-04-19 01:55:58 +03:00
|
|
|
|
2021-10-12 19:48:38 +03:00
|
|
|
if (btf_type_is_decl_tag(t))
|
2021-09-15 01:30:15 +03:00
|
|
|
return btf_resolved_type_id(btf, type_id) &&
|
|
|
|
!btf_resolved_type_size(btf, type_id);
|
|
|
|
|
2019-04-10 00:20:09 +03:00
|
|
|
if (btf_type_is_modifier(t) || btf_type_is_ptr(t) ||
|
|
|
|
btf_type_is_var(t)) {
|
2018-04-19 01:55:58 +03:00
|
|
|
t = btf_type_id_resolve(btf, &type_id);
|
2019-04-10 00:20:09 +03:00
|
|
|
return t &&
|
|
|
|
!btf_type_is_modifier(t) &&
|
|
|
|
!btf_type_is_var(t) &&
|
|
|
|
!btf_type_is_datasec(t);
|
2018-04-19 01:55:58 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
if (btf_type_is_array(t)) {
|
|
|
|
const struct btf_array *array = btf_type_array(t);
|
|
|
|
const struct btf_type *elem_type;
|
|
|
|
u32 elem_type_id = array->type;
|
|
|
|
u32 elem_size;
|
|
|
|
|
|
|
|
elem_type = btf_type_id_size(btf, &elem_type_id, &elem_size);
|
|
|
|
return elem_type && !btf_type_is_modifier(elem_type) &&
|
|
|
|
(array->nelems * elem_size ==
|
2020-11-10 04:19:28 +03:00
|
|
|
btf_resolved_type_size(btf, type_id));
|
2018-04-19 01:55:58 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2018-11-20 02:29:08 +03:00
|
|
|
static int btf_resolve(struct btf_verifier_env *env,
|
|
|
|
const struct btf_type *t, u32 type_id)
|
|
|
|
{
|
|
|
|
u32 save_log_type_id = env->log_type_id;
|
|
|
|
const struct resolve_vertex *v;
|
|
|
|
int err = 0;
|
|
|
|
|
|
|
|
env->resolve_mode = RESOLVE_TBD;
|
|
|
|
env_stack_push(env, t, type_id);
|
|
|
|
while (!err && (v = env_stack_peak(env))) {
|
|
|
|
env->log_type_id = v->type_id;
|
|
|
|
err = btf_type_ops(v->t)->resolve(env, v);
|
|
|
|
}
|
|
|
|
|
|
|
|
env->log_type_id = type_id;
|
|
|
|
if (err == -E2BIG) {
|
|
|
|
btf_verifier_log_type(env, t,
|
|
|
|
"Exceeded max resolving depth:%u",
|
|
|
|
MAX_RESOLVE_DEPTH);
|
|
|
|
} else if (err == -EEXIST) {
|
|
|
|
btf_verifier_log_type(env, t, "Loop detected");
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Final sanity check */
|
|
|
|
if (!err && !btf_resolve_valid(env, t, type_id)) {
|
|
|
|
btf_verifier_log_type(env, t, "Invalid resolve state");
|
|
|
|
err = -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
env->log_type_id = save_log_type_id;
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:58 +03:00
|
|
|
static int btf_check_all_types(struct btf_verifier_env *env)
|
|
|
|
{
|
|
|
|
struct btf *btf = env->btf;
|
2020-11-10 04:19:28 +03:00
|
|
|
const struct btf_type *t;
|
|
|
|
u32 type_id, i;
|
2018-04-19 01:55:58 +03:00
|
|
|
int err;
|
|
|
|
|
|
|
|
err = env_resolve_init(env);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
env->phase++;
|
2020-11-10 04:19:28 +03:00
|
|
|
for (i = btf->base_btf ? 0 : 1; i < btf->nr_types; i++) {
|
|
|
|
type_id = btf->start_id + i;
|
|
|
|
t = btf_type_by_id(btf, type_id);
|
2018-04-19 01:55:58 +03:00
|
|
|
|
|
|
|
env->log_type_id = type_id;
|
|
|
|
if (btf_type_needs_resolve(t) &&
|
|
|
|
!env_type_is_resolved(env, type_id)) {
|
|
|
|
err = btf_resolve(env, t, type_id);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2018-11-20 02:29:08 +03:00
|
|
|
if (btf_type_is_func_proto(t)) {
|
|
|
|
err = btf_func_proto_check(env, t);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (btf_type_is_func(t)) {
|
|
|
|
err = btf_func_check(env, t);
|
|
|
|
if (err)
|
|
|
|
return err;
|
2018-04-19 01:55:58 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:57 +03:00
|
|
|
static int btf_parse_type_sec(struct btf_verifier_env *env)
|
|
|
|
{
|
2018-05-23 00:57:18 +03:00
|
|
|
const struct btf_header *hdr = &env->btf->hdr;
|
2018-04-19 01:55:58 +03:00
|
|
|
int err;
|
|
|
|
|
2018-05-23 00:57:18 +03:00
|
|
|
/* Type section must align to 4 bytes */
|
|
|
|
if (hdr->type_off & (sizeof(u32) - 1)) {
|
|
|
|
btf_verifier_log(env, "Unaligned type_off");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2020-11-10 04:19:28 +03:00
|
|
|
if (!env->btf->base_btf && !hdr->type_len) {
|
2018-05-23 00:57:18 +03:00
|
|
|
btf_verifier_log(env, "No type found");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-04-19 01:55:58 +03:00
|
|
|
err = btf_check_all_metas(env);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
return btf_check_all_types(env);
|
2018-04-19 01:55:57 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static int btf_parse_str_sec(struct btf_verifier_env *env)
|
|
|
|
{
|
|
|
|
const struct btf_header *hdr;
|
|
|
|
struct btf *btf = env->btf;
|
|
|
|
const char *start, *end;
|
|
|
|
|
2018-05-23 00:57:18 +03:00
|
|
|
hdr = &btf->hdr;
|
2018-04-19 01:55:57 +03:00
|
|
|
start = btf->nohdr_data + hdr->str_off;
|
|
|
|
end = start + hdr->str_len;
|
|
|
|
|
2018-05-23 00:57:18 +03:00
|
|
|
if (end != btf->data + btf->data_size) {
|
|
|
|
btf_verifier_log(env, "String section is not at the end");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2020-11-10 04:19:28 +03:00
|
|
|
btf->strings = start;
|
|
|
|
|
|
|
|
if (btf->base_btf && !hdr->str_len)
|
|
|
|
return 0;
|
|
|
|
if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET || end[-1]) {
|
|
|
|
btf_verifier_log(env, "Invalid string section");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
if (!btf->base_btf && start[0]) {
|
2018-04-19 01:55:57 +03:00
|
|
|
btf_verifier_log(env, "Invalid string section");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-05-23 00:57:18 +03:00
|
|
|
static const size_t btf_sec_info_offset[] = {
|
|
|
|
offsetof(struct btf_header, type_off),
|
|
|
|
offsetof(struct btf_header, str_off),
|
|
|
|
};
|
|
|
|
|
|
|
|
static int btf_sec_info_cmp(const void *a, const void *b)
|
2018-04-19 01:55:57 +03:00
|
|
|
{
|
2018-05-23 00:57:18 +03:00
|
|
|
const struct btf_sec_info *x = a;
|
|
|
|
const struct btf_sec_info *y = b;
|
|
|
|
|
|
|
|
return (int)(x->off - y->off) ? : (int)(x->len - y->len);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int btf_check_sec_info(struct btf_verifier_env *env,
|
|
|
|
u32 btf_data_size)
|
|
|
|
{
|
2018-05-23 21:32:36 +03:00
|
|
|
struct btf_sec_info secs[ARRAY_SIZE(btf_sec_info_offset)];
|
2018-05-23 00:57:18 +03:00
|
|
|
u32 total, expected_total, i;
|
2018-04-19 01:55:57 +03:00
|
|
|
const struct btf_header *hdr;
|
2018-05-23 00:57:18 +03:00
|
|
|
const struct btf *btf;
|
|
|
|
|
|
|
|
btf = env->btf;
|
|
|
|
hdr = &btf->hdr;
|
|
|
|
|
|
|
|
/* Populate the secs from hdr */
|
2018-05-23 21:32:36 +03:00
|
|
|
for (i = 0; i < ARRAY_SIZE(btf_sec_info_offset); i++)
|
2018-05-23 00:57:18 +03:00
|
|
|
secs[i] = *(struct btf_sec_info *)((void *)hdr +
|
|
|
|
btf_sec_info_offset[i]);
|
|
|
|
|
2018-05-23 21:32:36 +03:00
|
|
|
sort(secs, ARRAY_SIZE(btf_sec_info_offset),
|
|
|
|
sizeof(struct btf_sec_info), btf_sec_info_cmp, NULL);
|
2018-05-23 00:57:18 +03:00
|
|
|
|
|
|
|
/* Check for gaps and overlap among sections */
|
|
|
|
total = 0;
|
|
|
|
expected_total = btf_data_size - hdr->hdr_len;
|
2018-05-23 21:32:36 +03:00
|
|
|
for (i = 0; i < ARRAY_SIZE(btf_sec_info_offset); i++) {
|
2018-05-23 00:57:18 +03:00
|
|
|
if (expected_total < secs[i].off) {
|
|
|
|
btf_verifier_log(env, "Invalid section offset");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
if (total < secs[i].off) {
|
|
|
|
/* gap */
|
|
|
|
btf_verifier_log(env, "Unsupported section found");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
if (total > secs[i].off) {
|
|
|
|
btf_verifier_log(env, "Section overlap found");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
if (expected_total - total < secs[i].len) {
|
|
|
|
btf_verifier_log(env,
|
|
|
|
"Total section length too long");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
total += secs[i].len;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* There is data other than hdr and known sections */
|
|
|
|
if (expected_total != total) {
|
|
|
|
btf_verifier_log(env, "Unsupported section found");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
bpf, btf: fix a missing check bug in btf_parse
Wenwen Wang reported:
In btf_parse(), the header of the user-space btf data 'btf_data'
is firstly parsed and verified through btf_parse_hdr().
In btf_parse_hdr(), the header is copied from user-space 'btf_data'
to kernel-space 'btf->hdr' and then verified. If no error happens
during the verification process, the whole data of 'btf_data',
including the header, is then copied to 'data' in btf_parse(). It
is obvious that the header is copied twice here. More importantly,
no check is enforced after the second copy to make sure the headers
obtained in these two copies are same. Given that 'btf_data' resides
in the user space, a malicious user can race to modify the header
between these two copies. By doing so, the user can inject
inconsistent data, which can cause undefined behavior of the
kernel and introduce potential security risk.
This issue is similar to the one fixed in commit 8af03d1ae2e1 ("bpf:
btf: Fix a missing check bug"). To fix it, this patch copies the user
'btf_data' *before* parsing / verifying the BTF header.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Co-developed-by: Wenwen Wang <wang6495@umn.edu>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-10-24 23:42:25 +03:00
|
|
|
static int btf_parse_hdr(struct btf_verifier_env *env)
|
2018-05-23 00:57:18 +03:00
|
|
|
{
|
bpf, btf: fix a missing check bug in btf_parse
Wenwen Wang reported:
In btf_parse(), the header of the user-space btf data 'btf_data'
is firstly parsed and verified through btf_parse_hdr().
In btf_parse_hdr(), the header is copied from user-space 'btf_data'
to kernel-space 'btf->hdr' and then verified. If no error happens
during the verification process, the whole data of 'btf_data',
including the header, is then copied to 'data' in btf_parse(). It
is obvious that the header is copied twice here. More importantly,
no check is enforced after the second copy to make sure the headers
obtained in these two copies are same. Given that 'btf_data' resides
in the user space, a malicious user can race to modify the header
between these two copies. By doing so, the user can inject
inconsistent data, which can cause undefined behavior of the
kernel and introduce potential security risk.
This issue is similar to the one fixed in commit 8af03d1ae2e1 ("bpf:
btf: Fix a missing check bug"). To fix it, this patch copies the user
'btf_data' *before* parsing / verifying the BTF header.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Co-developed-by: Wenwen Wang <wang6495@umn.edu>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-10-24 23:42:25 +03:00
|
|
|
u32 hdr_len, hdr_copy, btf_data_size;
|
2018-05-23 00:57:18 +03:00
|
|
|
const struct btf_header *hdr;
|
|
|
|
struct btf *btf;
|
|
|
|
int err;
|
2018-04-19 01:55:57 +03:00
|
|
|
|
2018-05-23 00:57:18 +03:00
|
|
|
btf = env->btf;
|
bpf, btf: fix a missing check bug in btf_parse
Wenwen Wang reported:
In btf_parse(), the header of the user-space btf data 'btf_data'
is firstly parsed and verified through btf_parse_hdr().
In btf_parse_hdr(), the header is copied from user-space 'btf_data'
to kernel-space 'btf->hdr' and then verified. If no error happens
during the verification process, the whole data of 'btf_data',
including the header, is then copied to 'data' in btf_parse(). It
is obvious that the header is copied twice here. More importantly,
no check is enforced after the second copy to make sure the headers
obtained in these two copies are same. Given that 'btf_data' resides
in the user space, a malicious user can race to modify the header
between these two copies. By doing so, the user can inject
inconsistent data, which can cause undefined behavior of the
kernel and introduce potential security risk.
This issue is similar to the one fixed in commit 8af03d1ae2e1 ("bpf:
btf: Fix a missing check bug"). To fix it, this patch copies the user
'btf_data' *before* parsing / verifying the BTF header.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Co-developed-by: Wenwen Wang <wang6495@umn.edu>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-10-24 23:42:25 +03:00
|
|
|
btf_data_size = btf->data_size;
|
2018-05-23 00:57:18 +03:00
|
|
|
|
bpf, btf: fix a missing check bug in btf_parse
Wenwen Wang reported:
In btf_parse(), the header of the user-space btf data 'btf_data'
is firstly parsed and verified through btf_parse_hdr().
In btf_parse_hdr(), the header is copied from user-space 'btf_data'
to kernel-space 'btf->hdr' and then verified. If no error happens
during the verification process, the whole data of 'btf_data',
including the header, is then copied to 'data' in btf_parse(). It
is obvious that the header is copied twice here. More importantly,
no check is enforced after the second copy to make sure the headers
obtained in these two copies are same. Given that 'btf_data' resides
in the user space, a malicious user can race to modify the header
between these two copies. By doing so, the user can inject
inconsistent data, which can cause undefined behavior of the
kernel and introduce potential security risk.
This issue is similar to the one fixed in commit 8af03d1ae2e1 ("bpf:
btf: Fix a missing check bug"). To fix it, this patch copies the user
'btf_data' *before* parsing / verifying the BTF header.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Co-developed-by: Wenwen Wang <wang6495@umn.edu>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-10-24 23:42:25 +03:00
|
|
|
if (btf_data_size <
|
|
|
|
offsetof(struct btf_header, hdr_len) + sizeof(hdr->hdr_len)) {
|
2018-05-23 00:57:18 +03:00
|
|
|
btf_verifier_log(env, "hdr_len not found");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
bpf, btf: fix a missing check bug in btf_parse
Wenwen Wang reported:
In btf_parse(), the header of the user-space btf data 'btf_data'
is firstly parsed and verified through btf_parse_hdr().
In btf_parse_hdr(), the header is copied from user-space 'btf_data'
to kernel-space 'btf->hdr' and then verified. If no error happens
during the verification process, the whole data of 'btf_data',
including the header, is then copied to 'data' in btf_parse(). It
is obvious that the header is copied twice here. More importantly,
no check is enforced after the second copy to make sure the headers
obtained in these two copies are same. Given that 'btf_data' resides
in the user space, a malicious user can race to modify the header
between these two copies. By doing so, the user can inject
inconsistent data, which can cause undefined behavior of the
kernel and introduce potential security risk.
This issue is similar to the one fixed in commit 8af03d1ae2e1 ("bpf:
btf: Fix a missing check bug"). To fix it, this patch copies the user
'btf_data' *before* parsing / verifying the BTF header.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Co-developed-by: Wenwen Wang <wang6495@umn.edu>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-10-24 23:42:25 +03:00
|
|
|
hdr = btf->data;
|
|
|
|
hdr_len = hdr->hdr_len;
|
2018-05-23 00:57:18 +03:00
|
|
|
if (btf_data_size < hdr_len) {
|
2018-04-19 01:55:57 +03:00
|
|
|
btf_verifier_log(env, "btf_header not found");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
bpf, btf: fix a missing check bug in btf_parse
Wenwen Wang reported:
In btf_parse(), the header of the user-space btf data 'btf_data'
is firstly parsed and verified through btf_parse_hdr().
In btf_parse_hdr(), the header is copied from user-space 'btf_data'
to kernel-space 'btf->hdr' and then verified. If no error happens
during the verification process, the whole data of 'btf_data',
including the header, is then copied to 'data' in btf_parse(). It
is obvious that the header is copied twice here. More importantly,
no check is enforced after the second copy to make sure the headers
obtained in these two copies are same. Given that 'btf_data' resides
in the user space, a malicious user can race to modify the header
between these two copies. By doing so, the user can inject
inconsistent data, which can cause undefined behavior of the
kernel and introduce potential security risk.
This issue is similar to the one fixed in commit 8af03d1ae2e1 ("bpf:
btf: Fix a missing check bug"). To fix it, this patch copies the user
'btf_data' *before* parsing / verifying the BTF header.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Co-developed-by: Wenwen Wang <wang6495@umn.edu>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-10-24 23:42:25 +03:00
|
|
|
/* Ensure the unsupported header fields are zero */
|
|
|
|
if (hdr_len > sizeof(btf->hdr)) {
|
|
|
|
u8 *expected_zero = btf->data + sizeof(btf->hdr);
|
|
|
|
u8 *end = btf->data + hdr_len;
|
|
|
|
|
|
|
|
for (; expected_zero < end; expected_zero++) {
|
|
|
|
if (*expected_zero) {
|
|
|
|
btf_verifier_log(env, "Unsupported btf_header");
|
|
|
|
return -E2BIG;
|
|
|
|
}
|
|
|
|
}
|
2018-05-23 00:57:18 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
hdr_copy = min_t(u32, hdr_len, sizeof(btf->hdr));
|
bpf, btf: fix a missing check bug in btf_parse
Wenwen Wang reported:
In btf_parse(), the header of the user-space btf data 'btf_data'
is firstly parsed and verified through btf_parse_hdr().
In btf_parse_hdr(), the header is copied from user-space 'btf_data'
to kernel-space 'btf->hdr' and then verified. If no error happens
during the verification process, the whole data of 'btf_data',
including the header, is then copied to 'data' in btf_parse(). It
is obvious that the header is copied twice here. More importantly,
no check is enforced after the second copy to make sure the headers
obtained in these two copies are same. Given that 'btf_data' resides
in the user space, a malicious user can race to modify the header
between these two copies. By doing so, the user can inject
inconsistent data, which can cause undefined behavior of the
kernel and introduce potential security risk.
This issue is similar to the one fixed in commit 8af03d1ae2e1 ("bpf:
btf: Fix a missing check bug"). To fix it, this patch copies the user
'btf_data' *before* parsing / verifying the BTF header.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Co-developed-by: Wenwen Wang <wang6495@umn.edu>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-10-24 23:42:25 +03:00
|
|
|
memcpy(&btf->hdr, btf->data, hdr_copy);
|
2018-05-23 00:57:18 +03:00
|
|
|
|
|
|
|
hdr = &btf->hdr;
|
|
|
|
|
|
|
|
btf_verifier_log_hdr(env, btf_data_size);
|
2018-04-19 01:55:57 +03:00
|
|
|
|
|
|
|
if (hdr->magic != BTF_MAGIC) {
|
|
|
|
btf_verifier_log(env, "Invalid magic");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (hdr->version != BTF_VERSION) {
|
|
|
|
btf_verifier_log(env, "Unsupported version");
|
|
|
|
return -ENOTSUPP;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (hdr->flags) {
|
|
|
|
btf_verifier_log(env, "Unsupported flags");
|
|
|
|
return -ENOTSUPP;
|
|
|
|
}
|
|
|
|
|
2021-01-10 10:03:40 +03:00
|
|
|
if (!btf->base_btf && btf_data_size == hdr->hdr_len) {
|
2018-04-19 01:55:57 +03:00
|
|
|
btf_verifier_log(env, "No data");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-05-23 00:57:18 +03:00
|
|
|
err = btf_check_sec_info(env, btf_data_size);
|
|
|
|
if (err)
|
|
|
|
return err;
|
2018-04-19 01:55:57 +03:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-05-14 03:36:08 +03:00
|
|
|
static struct btf *btf_parse(bpfptr_t btf_data, u32 btf_data_size,
|
2018-04-19 01:55:57 +03:00
|
|
|
u32 log_level, char __user *log_ubuf, u32 log_size)
|
|
|
|
{
|
|
|
|
struct btf_verifier_env *env = NULL;
|
|
|
|
struct bpf_verifier_log *log;
|
|
|
|
struct btf *btf = NULL;
|
|
|
|
u8 *data;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
if (btf_data_size > BTF_MAX_SIZE)
|
|
|
|
return ERR_PTR(-E2BIG);
|
|
|
|
|
|
|
|
env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN);
|
|
|
|
if (!env)
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
|
|
|
|
log = &env->log;
|
|
|
|
if (log_level || log_ubuf || log_size) {
|
|
|
|
/* user requested verbose verifier output
|
|
|
|
* and supplied buffer to store the verification trace
|
|
|
|
*/
|
|
|
|
log->level = log_level;
|
|
|
|
log->ubuf = log_ubuf;
|
|
|
|
log->len_total = log_size;
|
|
|
|
|
|
|
|
/* log attributes have to be sane */
|
2021-12-03 08:30:01 +03:00
|
|
|
if (!bpf_verifier_log_attr_valid(log)) {
|
2018-04-19 01:55:57 +03:00
|
|
|
err = -EINVAL;
|
|
|
|
goto errout;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
btf = kzalloc(sizeof(*btf), GFP_KERNEL | __GFP_NOWARN);
|
|
|
|
if (!btf) {
|
|
|
|
err = -ENOMEM;
|
|
|
|
goto errout;
|
|
|
|
}
|
2018-05-23 00:57:18 +03:00
|
|
|
env->btf = btf;
|
|
|
|
|
2018-04-19 01:55:57 +03:00
|
|
|
data = kvmalloc(btf_data_size, GFP_KERNEL | __GFP_NOWARN);
|
|
|
|
if (!data) {
|
|
|
|
err = -ENOMEM;
|
|
|
|
goto errout;
|
|
|
|
}
|
|
|
|
|
|
|
|
btf->data = data;
|
|
|
|
btf->data_size = btf_data_size;
|
|
|
|
|
2021-05-14 03:36:08 +03:00
|
|
|
if (copy_from_bpfptr(data, btf_data, btf_data_size)) {
|
2018-04-19 01:55:57 +03:00
|
|
|
err = -EFAULT;
|
|
|
|
goto errout;
|
|
|
|
}
|
|
|
|
|
bpf, btf: fix a missing check bug in btf_parse
Wenwen Wang reported:
In btf_parse(), the header of the user-space btf data 'btf_data'
is firstly parsed and verified through btf_parse_hdr().
In btf_parse_hdr(), the header is copied from user-space 'btf_data'
to kernel-space 'btf->hdr' and then verified. If no error happens
during the verification process, the whole data of 'btf_data',
including the header, is then copied to 'data' in btf_parse(). It
is obvious that the header is copied twice here. More importantly,
no check is enforced after the second copy to make sure the headers
obtained in these two copies are same. Given that 'btf_data' resides
in the user space, a malicious user can race to modify the header
between these two copies. By doing so, the user can inject
inconsistent data, which can cause undefined behavior of the
kernel and introduce potential security risk.
This issue is similar to the one fixed in commit 8af03d1ae2e1 ("bpf:
btf: Fix a missing check bug"). To fix it, this patch copies the user
'btf_data' *before* parsing / verifying the BTF header.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Co-developed-by: Wenwen Wang <wang6495@umn.edu>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-10-24 23:42:25 +03:00
|
|
|
err = btf_parse_hdr(env);
|
|
|
|
if (err)
|
|
|
|
goto errout;
|
|
|
|
|
|
|
|
btf->nohdr_data = btf->data + btf->hdr.hdr_len;
|
|
|
|
|
2018-04-19 01:55:57 +03:00
|
|
|
err = btf_parse_str_sec(env);
|
|
|
|
if (err)
|
|
|
|
goto errout;
|
|
|
|
|
|
|
|
err = btf_parse_type_sec(env);
|
|
|
|
if (err)
|
|
|
|
goto errout;
|
|
|
|
|
2018-05-23 00:57:18 +03:00
|
|
|
if (log->level && bpf_verifier_log_full(log)) {
|
2018-04-19 01:55:57 +03:00
|
|
|
err = -ENOSPC;
|
|
|
|
goto errout;
|
|
|
|
}
|
|
|
|
|
2018-05-23 00:57:18 +03:00
|
|
|
btf_verifier_env_free(env);
|
|
|
|
refcount_set(&btf->refcnt, 1);
|
|
|
|
return btf;
|
2018-04-19 01:55:57 +03:00
|
|
|
|
|
|
|
errout:
|
|
|
|
btf_verifier_env_free(env);
|
|
|
|
if (btf)
|
|
|
|
btf_free(btf);
|
|
|
|
return ERR_PTR(err);
|
|
|
|
}
|
2018-04-19 01:56:00 +03:00
|
|
|
|
2020-03-19 01:27:46 +03:00
|
|
|
extern char __weak __start_BTF[];
|
|
|
|
extern char __weak __stop_BTF[];
|
2019-11-14 21:57:15 +03:00
|
|
|
extern struct btf *btf_vmlinux;
|
|
|
|
|
|
|
|
#define BPF_MAP_TYPE(_id, _ops)
|
2020-04-29 03:16:08 +03:00
|
|
|
#define BPF_LINK_TYPE(_id, _name)
|
2019-11-14 21:57:15 +03:00
|
|
|
static union {
|
|
|
|
struct bpf_ctx_convert {
|
|
|
|
#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
|
|
|
|
prog_ctx_type _id##_prog; \
|
|
|
|
kern_ctx_type _id##_kern;
|
|
|
|
#include <linux/bpf_types.h>
|
|
|
|
#undef BPF_PROG_TYPE
|
|
|
|
} *__t;
|
|
|
|
/* 't' is written once under lock. Read many times. */
|
|
|
|
const struct btf_type *t;
|
|
|
|
} bpf_ctx_convert;
|
|
|
|
enum {
|
|
|
|
#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
|
|
|
|
__ctx_convert##_id,
|
|
|
|
#include <linux/bpf_types.h>
|
|
|
|
#undef BPF_PROG_TYPE
|
2019-11-28 07:35:08 +03:00
|
|
|
__ctx_convert_unused, /* to avoid empty enum in extreme .config */
|
2019-11-14 21:57:15 +03:00
|
|
|
};
|
|
|
|
static u8 bpf_ctx_convert_map[] = {
|
|
|
|
#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
|
|
|
|
[_id] = __ctx_convert##_id,
|
|
|
|
#include <linux/bpf_types.h>
|
|
|
|
#undef BPF_PROG_TYPE
|
2019-12-10 23:35:46 +03:00
|
|
|
0, /* avoid empty array */
|
2019-11-14 21:57:15 +03:00
|
|
|
};
|
|
|
|
#undef BPF_MAP_TYPE
|
2020-04-29 03:16:08 +03:00
|
|
|
#undef BPF_LINK_TYPE
|
2019-11-14 21:57:15 +03:00
|
|
|
|
|
|
|
static const struct btf_member *
|
2021-03-25 04:51:36 +03:00
|
|
|
btf_get_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf,
|
2020-01-10 09:41:20 +03:00
|
|
|
const struct btf_type *t, enum bpf_prog_type prog_type,
|
|
|
|
int arg)
|
2019-11-14 21:57:15 +03:00
|
|
|
{
|
|
|
|
const struct btf_type *conv_struct;
|
|
|
|
const struct btf_type *ctx_struct;
|
|
|
|
const struct btf_member *ctx_type;
|
|
|
|
const char *tname, *ctx_tname;
|
|
|
|
|
|
|
|
conv_struct = bpf_ctx_convert.t;
|
|
|
|
if (!conv_struct) {
|
|
|
|
bpf_log(log, "btf_vmlinux is malformed\n");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
t = btf_type_by_id(btf, t->type);
|
|
|
|
while (btf_type_is_modifier(t))
|
|
|
|
t = btf_type_by_id(btf, t->type);
|
|
|
|
if (!btf_type_is_struct(t)) {
|
|
|
|
/* Only pointer to struct is supported for now.
|
|
|
|
* That means that BPF_PROG_TYPE_TRACEPOINT with BTF
|
|
|
|
* is not supported yet.
|
|
|
|
* BPF_PROG_TYPE_RAW_TRACEPOINT is fine.
|
|
|
|
*/
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
tname = btf_name_by_offset(btf, t->name_off);
|
|
|
|
if (!tname) {
|
2020-01-10 09:41:20 +03:00
|
|
|
bpf_log(log, "arg#%d struct doesn't have a name\n", arg);
|
2019-11-14 21:57:15 +03:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
/* prog_type is valid bpf program type. No need for bounds check. */
|
|
|
|
ctx_type = btf_type_member(conv_struct) + bpf_ctx_convert_map[prog_type] * 2;
|
|
|
|
/* ctx_struct is a pointer to prog_ctx_type in vmlinux.
|
|
|
|
* Like 'struct __sk_buff'
|
|
|
|
*/
|
|
|
|
ctx_struct = btf_type_by_id(btf_vmlinux, ctx_type->type);
|
|
|
|
if (!ctx_struct)
|
|
|
|
/* should not happen */
|
|
|
|
return NULL;
|
|
|
|
ctx_tname = btf_name_by_offset(btf_vmlinux, ctx_struct->name_off);
|
|
|
|
if (!ctx_tname) {
|
|
|
|
/* should not happen */
|
|
|
|
bpf_log(log, "Please fix kernel include/linux/bpf_types.h\n");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
/* only compare that prog's ctx type name is the same as
|
|
|
|
* kernel expects. No need to compare field by field.
|
|
|
|
* It's ok for bpf prog to do:
|
|
|
|
* struct __sk_buff {};
|
|
|
|
* int socket_filter_bpf_prog(struct __sk_buff *skb)
|
|
|
|
* { // no fields of skb are ever used }
|
|
|
|
*/
|
|
|
|
if (strcmp(ctx_tname, tname))
|
|
|
|
return NULL;
|
|
|
|
return ctx_type;
|
|
|
|
}
|
2019-10-16 06:24:57 +03:00
|
|
|
|
bpf: Support access to bpf map fields
There are multiple use-cases when it's convenient to have access to bpf
map fields, both `struct bpf_map` and map type specific struct-s such as
`struct bpf_array`, `struct bpf_htab`, etc.
For example while working with sock arrays it can be necessary to
calculate the key based on map->max_entries (some_hash % max_entries).
Currently this is solved by communicating max_entries via "out-of-band"
channel, e.g. via additional map with known key to get info about target
map. That works, but is not very convenient and error-prone while
working with many maps.
In other cases necessary data is dynamic (i.e. unknown at loading time)
and it's impossible to get it at all. For example while working with a
hash table it can be convenient to know how much capacity is already
used (bpf_htab.count.counter for BPF_F_NO_PREALLOC case).
At the same time kernel knows this info and can provide it to bpf
program.
Fill this gap by adding support to access bpf map fields from bpf
program for both `struct bpf_map` and map type specific fields.
Support is implemented via btf_struct_access() so that a user can define
their own `struct bpf_map` or map type specific struct in their program
with only necessary fields and preserve_access_index attribute, cast a
map to this struct and use a field.
For example:
struct bpf_map {
__u32 max_entries;
} __attribute__((preserve_access_index));
struct bpf_array {
struct bpf_map map;
__u32 elem_size;
} __attribute__((preserve_access_index));
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, 4);
__type(key, __u32);
__type(value, __u32);
} m_array SEC(".maps");
SEC("cgroup_skb/egress")
int cg_skb(void *ctx)
{
struct bpf_array *array = (struct bpf_array *)&m_array;
struct bpf_map *map = (struct bpf_map *)&m_array;
/* .. use map->max_entries or array->map.max_entries .. */
}
Similarly to other btf_struct_access() use-cases (e.g. struct tcp_sock
in net/ipv4/bpf_tcp_ca.c) the patch allows access to any fields of
corresponding struct. Only reading from map fields is supported.
For btf_struct_access() to work there should be a way to know btf id of
a struct that corresponds to a map type. To get btf id there should be a
way to get a stringified name of map-specific struct, such as
"bpf_array", "bpf_htab", etc for a map type. Two new fields are added to
`struct bpf_map_ops` to handle it:
* .map_btf_name keeps a btf name of a struct returned by map_alloc();
* .map_btf_id is used to cache btf id of that struct.
To make btf ids calculation cheaper they're calculated once while
preparing btf_vmlinux and cached same way as it's done for btf_id field
of `struct bpf_func_proto`
While calculating btf ids, struct names are NOT checked for collision.
Collisions will be checked as a part of the work to prepare btf ids used
in verifier in compile time that should land soon. The only known
collision for `struct bpf_htab` (kernel/bpf/hashtab.c vs
net/core/sock_map.c) was fixed earlier.
Both new fields .map_btf_name and .map_btf_id must be set for a map type
for the feature to work. If neither is set for a map type, verifier will
return ENOTSUPP on a try to access map_ptr of corresponding type. If
just one of them set, it's verifier misconfiguration.
Only `struct bpf_array` for BPF_MAP_TYPE_ARRAY and `struct bpf_htab` for
BPF_MAP_TYPE_HASH are supported by this patch. Other map types will be
supported separately.
The feature is available only for CONFIG_DEBUG_INFO_BTF=y and gated by
perfmon_capable() so that unpriv programs won't have access to bpf map
fields.
Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/6479686a0cd1e9067993df57b4c3eef0e276fec9.1592600985.git.rdna@fb.com
2020-06-20 00:11:43 +03:00
|
|
|
static const struct bpf_map_ops * const btf_vmlinux_map_ops[] = {
|
|
|
|
#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
|
|
|
|
#define BPF_LINK_TYPE(_id, _name)
|
|
|
|
#define BPF_MAP_TYPE(_id, _ops) \
|
|
|
|
[_id] = &_ops,
|
|
|
|
#include <linux/bpf_types.h>
|
|
|
|
#undef BPF_PROG_TYPE
|
|
|
|
#undef BPF_LINK_TYPE
|
|
|
|
#undef BPF_MAP_TYPE
|
|
|
|
};
|
|
|
|
|
|
|
|
static int btf_vmlinux_map_ids_init(const struct btf *btf,
|
|
|
|
struct bpf_verifier_log *log)
|
|
|
|
{
|
|
|
|
const struct bpf_map_ops *ops;
|
|
|
|
int i, btf_id;
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(btf_vmlinux_map_ops); ++i) {
|
|
|
|
ops = btf_vmlinux_map_ops[i];
|
|
|
|
if (!ops || (!ops->map_btf_name && !ops->map_btf_id))
|
|
|
|
continue;
|
|
|
|
if (!ops->map_btf_name || !ops->map_btf_id) {
|
|
|
|
bpf_log(log, "map type %d is misconfigured\n", i);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
btf_id = btf_find_by_name_kind(btf, ops->map_btf_name,
|
|
|
|
BTF_KIND_STRUCT);
|
|
|
|
if (btf_id < 0)
|
|
|
|
return btf_id;
|
|
|
|
*ops->map_btf_id = btf_id;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-11-14 21:57:17 +03:00
|
|
|
static int btf_translate_to_vmlinux(struct bpf_verifier_log *log,
|
|
|
|
struct btf *btf,
|
|
|
|
const struct btf_type *t,
|
2020-01-10 09:41:20 +03:00
|
|
|
enum bpf_prog_type prog_type,
|
|
|
|
int arg)
|
2019-11-14 21:57:17 +03:00
|
|
|
{
|
|
|
|
const struct btf_member *prog_ctx_type, *kern_ctx_type;
|
|
|
|
|
2020-01-10 09:41:20 +03:00
|
|
|
prog_ctx_type = btf_get_prog_ctx_type(log, btf, t, prog_type, arg);
|
2019-11-14 21:57:17 +03:00
|
|
|
if (!prog_ctx_type)
|
|
|
|
return -ENOENT;
|
|
|
|
kern_ctx_type = prog_ctx_type + 1;
|
|
|
|
return kern_ctx_type->type;
|
|
|
|
}
|
|
|
|
|
2020-07-12 00:53:26 +03:00
|
|
|
BTF_ID_LIST(bpf_ctx_convert_btf_id)
|
|
|
|
BTF_ID(struct, bpf_ctx_convert)
|
|
|
|
|
2019-10-16 06:24:57 +03:00
|
|
|
struct btf *btf_parse_vmlinux(void)
|
|
|
|
{
|
|
|
|
struct btf_verifier_env *env = NULL;
|
|
|
|
struct bpf_verifier_log *log;
|
|
|
|
struct btf *btf = NULL;
|
2020-07-12 00:53:26 +03:00
|
|
|
int err;
|
2019-10-16 06:24:57 +03:00
|
|
|
|
|
|
|
env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN);
|
|
|
|
if (!env)
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
|
|
|
|
log = &env->log;
|
|
|
|
log->level = BPF_LOG_KERNEL;
|
|
|
|
|
|
|
|
btf = kzalloc(sizeof(*btf), GFP_KERNEL | __GFP_NOWARN);
|
|
|
|
if (!btf) {
|
|
|
|
err = -ENOMEM;
|
|
|
|
goto errout;
|
|
|
|
}
|
|
|
|
env->btf = btf;
|
|
|
|
|
2020-03-19 01:27:46 +03:00
|
|
|
btf->data = __start_BTF;
|
|
|
|
btf->data_size = __stop_BTF - __start_BTF;
|
2020-11-10 04:19:29 +03:00
|
|
|
btf->kernel_btf = true;
|
|
|
|
snprintf(btf->name, sizeof(btf->name), "vmlinux");
|
2019-10-16 06:24:57 +03:00
|
|
|
|
|
|
|
err = btf_parse_hdr(env);
|
|
|
|
if (err)
|
|
|
|
goto errout;
|
|
|
|
|
|
|
|
btf->nohdr_data = btf->data + btf->hdr.hdr_len;
|
|
|
|
|
|
|
|
err = btf_parse_str_sec(env);
|
|
|
|
if (err)
|
|
|
|
goto errout;
|
|
|
|
|
|
|
|
err = btf_check_all_metas(env);
|
|
|
|
if (err)
|
|
|
|
goto errout;
|
|
|
|
|
2020-06-20 00:11:41 +03:00
|
|
|
/* btf_parse_vmlinux() runs under bpf_verifier_lock */
|
2020-07-12 00:53:26 +03:00
|
|
|
bpf_ctx_convert.t = btf_type_by_id(btf, bpf_ctx_convert_btf_id[0]);
|
2019-11-14 21:57:15 +03:00
|
|
|
|
bpf: Support access to bpf map fields
There are multiple use-cases when it's convenient to have access to bpf
map fields, both `struct bpf_map` and map type specific struct-s such as
`struct bpf_array`, `struct bpf_htab`, etc.
For example while working with sock arrays it can be necessary to
calculate the key based on map->max_entries (some_hash % max_entries).
Currently this is solved by communicating max_entries via "out-of-band"
channel, e.g. via additional map with known key to get info about target
map. That works, but is not very convenient and error-prone while
working with many maps.
In other cases necessary data is dynamic (i.e. unknown at loading time)
and it's impossible to get it at all. For example while working with a
hash table it can be convenient to know how much capacity is already
used (bpf_htab.count.counter for BPF_F_NO_PREALLOC case).
At the same time kernel knows this info and can provide it to bpf
program.
Fill this gap by adding support to access bpf map fields from bpf
program for both `struct bpf_map` and map type specific fields.
Support is implemented via btf_struct_access() so that a user can define
their own `struct bpf_map` or map type specific struct in their program
with only necessary fields and preserve_access_index attribute, cast a
map to this struct and use a field.
For example:
struct bpf_map {
__u32 max_entries;
} __attribute__((preserve_access_index));
struct bpf_array {
struct bpf_map map;
__u32 elem_size;
} __attribute__((preserve_access_index));
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, 4);
__type(key, __u32);
__type(value, __u32);
} m_array SEC(".maps");
SEC("cgroup_skb/egress")
int cg_skb(void *ctx)
{
struct bpf_array *array = (struct bpf_array *)&m_array;
struct bpf_map *map = (struct bpf_map *)&m_array;
/* .. use map->max_entries or array->map.max_entries .. */
}
Similarly to other btf_struct_access() use-cases (e.g. struct tcp_sock
in net/ipv4/bpf_tcp_ca.c) the patch allows access to any fields of
corresponding struct. Only reading from map fields is supported.
For btf_struct_access() to work there should be a way to know btf id of
a struct that corresponds to a map type. To get btf id there should be a
way to get a stringified name of map-specific struct, such as
"bpf_array", "bpf_htab", etc for a map type. Two new fields are added to
`struct bpf_map_ops` to handle it:
* .map_btf_name keeps a btf name of a struct returned by map_alloc();
* .map_btf_id is used to cache btf id of that struct.
To make btf ids calculation cheaper they're calculated once while
preparing btf_vmlinux and cached same way as it's done for btf_id field
of `struct bpf_func_proto`
While calculating btf ids, struct names are NOT checked for collision.
Collisions will be checked as a part of the work to prepare btf ids used
in verifier in compile time that should land soon. The only known
collision for `struct bpf_htab` (kernel/bpf/hashtab.c vs
net/core/sock_map.c) was fixed earlier.
Both new fields .map_btf_name and .map_btf_id must be set for a map type
for the feature to work. If neither is set for a map type, verifier will
return ENOTSUPP on a try to access map_ptr of corresponding type. If
just one of them set, it's verifier misconfiguration.
Only `struct bpf_array` for BPF_MAP_TYPE_ARRAY and `struct bpf_htab` for
BPF_MAP_TYPE_HASH are supported by this patch. Other map types will be
supported separately.
The feature is available only for CONFIG_DEBUG_INFO_BTF=y and gated by
perfmon_capable() so that unpriv programs won't have access to bpf map
fields.
Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/6479686a0cd1e9067993df57b4c3eef0e276fec9.1592600985.git.rdna@fb.com
2020-06-20 00:11:43 +03:00
|
|
|
/* find bpf map structs for map_ptr access checking */
|
|
|
|
err = btf_vmlinux_map_ids_init(btf, log);
|
|
|
|
if (err < 0)
|
|
|
|
goto errout;
|
|
|
|
|
2020-01-27 20:51:45 +03:00
|
|
|
bpf_struct_ops_init(btf, log);
|
2020-01-09 03:35:03 +03:00
|
|
|
|
2019-10-16 06:24:57 +03:00
|
|
|
refcount_set(&btf->refcnt, 1);
|
2020-11-10 04:19:29 +03:00
|
|
|
|
|
|
|
err = btf_alloc_id(btf);
|
|
|
|
if (err)
|
|
|
|
goto errout;
|
|
|
|
|
|
|
|
btf_verifier_env_free(env);
|
2019-10-16 06:24:57 +03:00
|
|
|
return btf;
|
|
|
|
|
|
|
|
errout:
|
|
|
|
btf_verifier_env_free(env);
|
|
|
|
if (btf) {
|
|
|
|
kvfree(btf->types);
|
|
|
|
kfree(btf);
|
|
|
|
}
|
|
|
|
return ERR_PTR(err);
|
|
|
|
}
|
|
|
|
|
2020-11-11 07:06:45 +03:00
|
|
|
#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
|
|
|
|
|
2020-11-10 04:19:31 +03:00
|
|
|
static struct btf *btf_parse_module(const char *module_name, const void *data, unsigned int data_size)
|
|
|
|
{
|
|
|
|
struct btf_verifier_env *env = NULL;
|
|
|
|
struct bpf_verifier_log *log;
|
|
|
|
struct btf *btf = NULL, *base_btf;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
base_btf = bpf_get_btf_vmlinux();
|
|
|
|
if (IS_ERR(base_btf))
|
|
|
|
return base_btf;
|
|
|
|
if (!base_btf)
|
|
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
|
|
|
|
env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN);
|
|
|
|
if (!env)
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
|
|
|
|
log = &env->log;
|
|
|
|
log->level = BPF_LOG_KERNEL;
|
|
|
|
|
|
|
|
btf = kzalloc(sizeof(*btf), GFP_KERNEL | __GFP_NOWARN);
|
|
|
|
if (!btf) {
|
|
|
|
err = -ENOMEM;
|
|
|
|
goto errout;
|
|
|
|
}
|
|
|
|
env->btf = btf;
|
|
|
|
|
|
|
|
btf->base_btf = base_btf;
|
|
|
|
btf->start_id = base_btf->nr_types;
|
|
|
|
btf->start_str_off = base_btf->hdr.str_len;
|
|
|
|
btf->kernel_btf = true;
|
|
|
|
snprintf(btf->name, sizeof(btf->name), "%s", module_name);
|
|
|
|
|
|
|
|
btf->data = kvmalloc(data_size, GFP_KERNEL | __GFP_NOWARN);
|
|
|
|
if (!btf->data) {
|
|
|
|
err = -ENOMEM;
|
|
|
|
goto errout;
|
|
|
|
}
|
|
|
|
memcpy(btf->data, data, data_size);
|
|
|
|
btf->data_size = data_size;
|
|
|
|
|
|
|
|
err = btf_parse_hdr(env);
|
|
|
|
if (err)
|
|
|
|
goto errout;
|
|
|
|
|
|
|
|
btf->nohdr_data = btf->data + btf->hdr.hdr_len;
|
|
|
|
|
|
|
|
err = btf_parse_str_sec(env);
|
|
|
|
if (err)
|
|
|
|
goto errout;
|
|
|
|
|
|
|
|
err = btf_check_all_metas(env);
|
|
|
|
if (err)
|
|
|
|
goto errout;
|
|
|
|
|
|
|
|
btf_verifier_env_free(env);
|
|
|
|
refcount_set(&btf->refcnt, 1);
|
|
|
|
return btf;
|
|
|
|
|
|
|
|
errout:
|
|
|
|
btf_verifier_env_free(env);
|
|
|
|
if (btf) {
|
|
|
|
kvfree(btf->data);
|
|
|
|
kvfree(btf->types);
|
|
|
|
kfree(btf);
|
|
|
|
}
|
|
|
|
return ERR_PTR(err);
|
|
|
|
}
|
|
|
|
|
2020-11-11 07:06:45 +03:00
|
|
|
#endif /* CONFIG_DEBUG_INFO_BTF_MODULES */
|
|
|
|
|
2019-11-14 21:57:17 +03:00
|
|
|
struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog)
|
|
|
|
{
|
2020-09-29 15:45:50 +03:00
|
|
|
struct bpf_prog *tgt_prog = prog->aux->dst_prog;
|
2019-11-14 21:57:17 +03:00
|
|
|
|
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-03 23:46:29 +03:00
|
|
|
if (tgt_prog)
|
2019-11-14 21:57:17 +03:00
|
|
|
return tgt_prog->aux->btf;
|
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-03 23:46:29 +03:00
|
|
|
else
|
|
|
|
return prog->aux->attach_btf;
|
2019-11-14 21:57:17 +03:00
|
|
|
}
|
|
|
|
|
2021-12-08 22:32:41 +03:00
|
|
|
static bool is_int_ptr(struct btf *btf, const struct btf_type *t)
|
2020-01-23 19:15:06 +03:00
|
|
|
{
|
|
|
|
/* t comes in already as a pointer */
|
|
|
|
t = btf_type_by_id(btf, t->type);
|
|
|
|
|
|
|
|
/* allow const */
|
|
|
|
if (BTF_INFO_KIND(t->info) == BTF_KIND_CONST)
|
|
|
|
t = btf_type_by_id(btf, t->type);
|
|
|
|
|
2021-12-08 22:32:41 +03:00
|
|
|
return btf_type_is_int(t);
|
2020-01-23 19:15:06 +03:00
|
|
|
}
|
|
|
|
|
2019-10-16 06:25:00 +03:00
|
|
|
bool btf_ctx_access(int off, int size, enum bpf_access_type type,
|
|
|
|
const struct bpf_prog *prog,
|
|
|
|
struct bpf_insn_access_aux *info)
|
|
|
|
{
|
2019-10-25 03:18:11 +03:00
|
|
|
const struct btf_type *t = prog->aux->attach_func_proto;
|
2020-09-29 15:45:50 +03:00
|
|
|
struct bpf_prog *tgt_prog = prog->aux->dst_prog;
|
2019-11-14 21:57:17 +03:00
|
|
|
struct btf *btf = bpf_prog_get_target_btf(prog);
|
2019-10-25 03:18:11 +03:00
|
|
|
const char *tname = prog->aux->attach_func_name;
|
2019-10-16 06:25:00 +03:00
|
|
|
struct bpf_verifier_log *log = info->log;
|
|
|
|
const struct btf_param *args;
|
bpf: reject program if a __user tagged memory accessed in kernel way
BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.
Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.
In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-27 18:46:06 +03:00
|
|
|
const char *tag_value;
|
2019-10-16 06:25:00 +03:00
|
|
|
u32 nr_args, arg;
|
2020-05-13 21:02:21 +03:00
|
|
|
int i, ret;
|
2019-10-16 06:25:00 +03:00
|
|
|
|
|
|
|
if (off % 8) {
|
2019-10-25 03:18:11 +03:00
|
|
|
bpf_log(log, "func '%s' offset %d is not multiple of 8\n",
|
2019-10-16 06:25:00 +03:00
|
|
|
tname, off);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
arg = off / 8;
|
|
|
|
args = (const struct btf_param *)(t + 1);
|
2021-02-25 23:26:29 +03:00
|
|
|
/* if (t == NULL) Fall back to default BPF prog with
|
|
|
|
* MAX_BPF_FUNC_REG_ARGS u64 arguments.
|
|
|
|
*/
|
|
|
|
nr_args = t ? btf_type_vlen(t) : MAX_BPF_FUNC_REG_ARGS;
|
2019-10-25 03:18:11 +03:00
|
|
|
if (prog->aux->attach_btf_trace) {
|
|
|
|
/* skip first 'void *__data' argument in btf_trace_##name typedef */
|
|
|
|
args++;
|
|
|
|
nr_args--;
|
|
|
|
}
|
2019-11-14 21:57:04 +03:00
|
|
|
|
2020-03-30 17:42:46 +03:00
|
|
|
if (arg > nr_args) {
|
|
|
|
bpf_log(log, "func '%s' doesn't have %d-th argument\n",
|
|
|
|
tname, arg + 1);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2020-03-04 22:18:50 +03:00
|
|
|
if (arg == nr_args) {
|
2020-03-30 17:42:46 +03:00
|
|
|
switch (prog->expected_attach_type) {
|
|
|
|
case BPF_LSM_MAC:
|
|
|
|
case BPF_TRACE_FEXIT:
|
2020-03-29 03:43:52 +03:00
|
|
|
/* When LSM programs are attached to void LSM hooks
|
|
|
|
* they use FEXIT trampolines and when attached to
|
|
|
|
* int LSM hooks, they use MODIFY_RETURN trampolines.
|
|
|
|
*
|
|
|
|
* While the LSM programs are BPF_MODIFY_RETURN-like
|
|
|
|
* the check:
|
|
|
|
*
|
|
|
|
* if (ret_type != 'int')
|
|
|
|
* return -EINVAL;
|
|
|
|
*
|
|
|
|
* is _not_ done here. This is still safe as LSM hooks
|
|
|
|
* have only void and int return types.
|
|
|
|
*/
|
2020-03-04 22:18:50 +03:00
|
|
|
if (!t)
|
|
|
|
return true;
|
|
|
|
t = btf_type_by_id(btf, t->type);
|
2020-03-30 17:42:46 +03:00
|
|
|
break;
|
|
|
|
case BPF_MODIFY_RETURN:
|
2020-03-04 22:18:50 +03:00
|
|
|
/* For now the BPF_MODIFY_RETURN can only be attached to
|
|
|
|
* functions that return an int.
|
|
|
|
*/
|
|
|
|
if (!t)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
t = btf_type_skip_modifiers(btf, t->type, NULL);
|
2020-06-25 01:20:39 +03:00
|
|
|
if (!btf_type_is_small_int(t)) {
|
2020-03-04 22:18:50 +03:00
|
|
|
bpf_log(log,
|
|
|
|
"ret type %s not allowed for fmod_ret\n",
|
|
|
|
btf_kind_str[BTF_INFO_KIND(t->info)]);
|
|
|
|
return false;
|
|
|
|
}
|
2020-03-30 17:42:46 +03:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
bpf_log(log, "func '%s' doesn't have %d-th argument\n",
|
|
|
|
tname, arg + 1);
|
|
|
|
return false;
|
2020-03-04 22:18:50 +03:00
|
|
|
}
|
2019-11-14 21:57:04 +03:00
|
|
|
} else {
|
2019-11-14 21:57:17 +03:00
|
|
|
if (!t)
|
2021-02-25 23:26:29 +03:00
|
|
|
/* Default prog with MAX_BPF_FUNC_REG_ARGS args */
|
2019-11-14 21:57:17 +03:00
|
|
|
return true;
|
|
|
|
t = btf_type_by_id(btf, args[arg].type);
|
2019-10-16 06:25:00 +03:00
|
|
|
}
|
2020-03-30 17:42:46 +03:00
|
|
|
|
2019-10-16 06:25:00 +03:00
|
|
|
/* skip modifiers */
|
|
|
|
while (btf_type_is_modifier(t))
|
2019-11-14 21:57:17 +03:00
|
|
|
t = btf_type_by_id(btf, t->type);
|
2020-06-25 01:20:39 +03:00
|
|
|
if (btf_type_is_small_int(t) || btf_type_is_enum(t))
|
2019-10-16 06:25:00 +03:00
|
|
|
/* accessing a scalar */
|
|
|
|
return true;
|
|
|
|
if (!btf_type_is_ptr(t)) {
|
|
|
|
bpf_log(log,
|
2019-10-25 03:18:11 +03:00
|
|
|
"func '%s' arg%d '%s' has type %s. Only pointer access is allowed\n",
|
2019-10-16 06:25:00 +03:00
|
|
|
tname, arg,
|
2019-11-14 21:57:17 +03:00
|
|
|
__btf_name_by_offset(btf, t->name_off),
|
2019-10-16 06:25:00 +03:00
|
|
|
btf_kind_str[BTF_INFO_KIND(t->info)]);
|
|
|
|
return false;
|
|
|
|
}
|
2020-07-23 21:41:11 +03:00
|
|
|
|
|
|
|
/* check for PTR_TO_RDONLY_BUF_OR_NULL or PTR_TO_RDWR_BUF_OR_NULL */
|
|
|
|
for (i = 0; i < prog->aux->ctx_arg_info_size; i++) {
|
|
|
|
const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i];
|
2021-12-17 03:31:47 +03:00
|
|
|
u32 type, flag;
|
2020-07-23 21:41:11 +03:00
|
|
|
|
2021-12-17 03:31:47 +03:00
|
|
|
type = base_type(ctx_arg_info->reg_type);
|
|
|
|
flag = type_flag(ctx_arg_info->reg_type);
|
2021-12-17 03:31:48 +03:00
|
|
|
if (ctx_arg_info->offset == off && type == PTR_TO_BUF &&
|
2021-12-17 03:31:47 +03:00
|
|
|
(flag & PTR_MAYBE_NULL)) {
|
2020-07-23 21:41:11 +03:00
|
|
|
info->reg_type = ctx_arg_info->reg_type;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-10-16 06:25:00 +03:00
|
|
|
if (t->type == 0)
|
|
|
|
/* This is a pointer to void.
|
|
|
|
* It is the same as scalar from the verifier safety pov.
|
|
|
|
* No further pointer walking is allowed.
|
|
|
|
*/
|
|
|
|
return true;
|
|
|
|
|
2021-12-08 22:32:41 +03:00
|
|
|
if (is_int_ptr(btf, t))
|
2020-01-23 19:15:06 +03:00
|
|
|
return true;
|
|
|
|
|
2019-10-16 06:25:00 +03:00
|
|
|
/* this is a pointer to another type */
|
2020-05-13 21:02:21 +03:00
|
|
|
for (i = 0; i < prog->aux->ctx_arg_info_size; i++) {
|
|
|
|
const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i];
|
|
|
|
|
|
|
|
if (ctx_arg_info->offset == off) {
|
bpf: Emit better log message if bpf_iter ctx arg btf_id == 0
To avoid kernel build failure due to some missing .BTF-ids referenced
functions/types, the patch ([1]) tries to fill btf_id 0 for
these types.
In bpf verifier, for percpu variable and helper returning btf_id cases,
verifier already emitted proper warning with something like
verbose(env, "Helper has invalid btf_id in R%d\n", regno);
verbose(env, "invalid return type %d of func %s#%d\n",
fn->ret_type, func_id_name(func_id), func_id);
But this is not the case for bpf_iter context arguments.
I hacked resolve_btfids to encode btf_id 0 for struct task_struct.
With `./test_progs -n 7/5`, I got,
0: (79) r2 = *(u64 *)(r1 +0)
func 'bpf_iter_task' arg0 has btf_id 29739 type STRUCT 'bpf_iter_meta'
; struct seq_file *seq = ctx->meta->seq;
1: (79) r6 = *(u64 *)(r2 +0)
; struct task_struct *task = ctx->task;
2: (79) r7 = *(u64 *)(r1 +8)
; if (task == (void *)0) {
3: (55) if r7 != 0x0 goto pc+11
...
; BPF_SEQ_PRINTF(seq, "%8d %8d\n", task->tgid, task->pid);
26: (61) r1 = *(u32 *)(r7 +1372)
Type '(anon)' is not a struct
Basically, verifier will return btf_id 0 for task_struct.
Later on, when the code tries to access task->tgid, the
verifier correctly complains the type is '(anon)' and it is
not a struct. Users still need to backtrace to find out
what is going on.
Let us catch the invalid btf_id 0 earlier
and provide better message indicating btf_id is wrong.
The new error message looks like below:
R1 type=ctx expected=fp
; struct seq_file *seq = ctx->meta->seq;
0: (79) r2 = *(u64 *)(r1 +0)
func 'bpf_iter_task' arg0 has btf_id 29739 type STRUCT 'bpf_iter_meta'
; struct seq_file *seq = ctx->meta->seq;
1: (79) r6 = *(u64 *)(r2 +0)
; struct task_struct *task = ctx->task;
2: (79) r7 = *(u64 *)(r1 +8)
invalid btf_id for context argument offset 8
invalid bpf_context access off=8 size=8
[1] https://lore.kernel.org/bpf/20210727132532.2473636-1-hengqi.chen@gmail.com/
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210728183025.1461750-1-yhs@fb.com
2021-07-28 21:30:25 +03:00
|
|
|
if (!ctx_arg_info->btf_id) {
|
|
|
|
bpf_log(log,"invalid btf_id for context argument offset %u\n", off);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2020-05-13 21:02:21 +03:00
|
|
|
info->reg_type = ctx_arg_info->reg_type;
|
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-03 23:46:29 +03:00
|
|
|
info->btf = btf_vmlinux;
|
2020-07-20 19:34:03 +03:00
|
|
|
info->btf_id = ctx_arg_info->btf_id;
|
|
|
|
return true;
|
2020-05-13 21:02:21 +03:00
|
|
|
}
|
|
|
|
}
|
2019-10-16 06:25:00 +03:00
|
|
|
|
2020-07-20 19:34:03 +03:00
|
|
|
info->reg_type = PTR_TO_BTF_ID;
|
2019-11-14 21:57:17 +03:00
|
|
|
if (tgt_prog) {
|
2020-09-29 15:45:52 +03:00
|
|
|
enum bpf_prog_type tgt_type;
|
|
|
|
|
|
|
|
if (tgt_prog->type == BPF_PROG_TYPE_EXT)
|
|
|
|
tgt_type = tgt_prog->aux->saved_dst_prog_type;
|
|
|
|
else
|
|
|
|
tgt_type = tgt_prog->type;
|
|
|
|
|
|
|
|
ret = btf_translate_to_vmlinux(log, btf, t, tgt_type, arg);
|
2019-11-14 21:57:17 +03:00
|
|
|
if (ret > 0) {
|
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-03 23:46:29 +03:00
|
|
|
info->btf = btf_vmlinux;
|
2019-11-14 21:57:17 +03:00
|
|
|
info->btf_id = ret;
|
|
|
|
return true;
|
|
|
|
} else {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
2020-01-09 03:34:56 +03:00
|
|
|
|
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-03 23:46:29 +03:00
|
|
|
info->btf = btf;
|
2020-01-09 03:34:56 +03:00
|
|
|
info->btf_id = t->type;
|
2019-11-14 21:57:17 +03:00
|
|
|
t = btf_type_by_id(btf, t->type);
|
bpf: reject program if a __user tagged memory accessed in kernel way
BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.
Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.
In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-27 18:46:06 +03:00
|
|
|
|
|
|
|
if (btf_type_is_type_tag(t)) {
|
|
|
|
tag_value = __btf_name_by_offset(btf, t->name_off);
|
|
|
|
if (strcmp(tag_value, "user") == 0)
|
|
|
|
info->reg_type |= MEM_USER;
|
|
|
|
}
|
|
|
|
|
2019-10-16 06:25:00 +03:00
|
|
|
/* skip modifiers */
|
2020-01-09 03:34:56 +03:00
|
|
|
while (btf_type_is_modifier(t)) {
|
|
|
|
info->btf_id = t->type;
|
2019-11-14 21:57:17 +03:00
|
|
|
t = btf_type_by_id(btf, t->type);
|
2020-01-09 03:34:56 +03:00
|
|
|
}
|
2019-10-16 06:25:00 +03:00
|
|
|
if (!btf_type_is_struct(t)) {
|
|
|
|
bpf_log(log,
|
2019-10-25 03:18:11 +03:00
|
|
|
"func '%s' arg%d type %s is not a struct\n",
|
2019-10-16 06:25:00 +03:00
|
|
|
tname, arg, btf_kind_str[BTF_INFO_KIND(t->info)]);
|
|
|
|
return false;
|
|
|
|
}
|
2019-10-25 03:18:11 +03:00
|
|
|
bpf_log(log, "func '%s' arg%d has btf_id %d type %s '%s'\n",
|
2019-10-16 06:25:00 +03:00
|
|
|
tname, arg, info->btf_id, btf_kind_str[BTF_INFO_KIND(t->info)],
|
2019-11-14 21:57:17 +03:00
|
|
|
__btf_name_by_offset(btf, t->name_off));
|
2019-10-16 06:25:00 +03:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-08-25 22:21:17 +03:00
|
|
|
enum bpf_struct_walk_result {
|
|
|
|
/* < 0 error */
|
|
|
|
WALK_SCALAR = 0,
|
|
|
|
WALK_PTR,
|
|
|
|
WALK_STRUCT,
|
|
|
|
};
|
|
|
|
|
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-03 23:46:29 +03:00
|
|
|
static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf,
|
2020-08-25 22:21:17 +03:00
|
|
|
const struct btf_type *t, int off, int size,
|
bpf: reject program if a __user tagged memory accessed in kernel way
BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.
Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.
In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-27 18:46:06 +03:00
|
|
|
u32 *next_btf_id, enum bpf_type_flag *flag)
|
2019-10-16 06:25:00 +03:00
|
|
|
{
|
2019-11-07 21:09:03 +03:00
|
|
|
u32 i, moff, mtrue_end, msize = 0, total_nelems = 0;
|
|
|
|
const struct btf_type *mtype, *elem_type = NULL;
|
2019-10-16 06:25:00 +03:00
|
|
|
const struct btf_member *member;
|
bpf: reject program if a __user tagged memory accessed in kernel way
BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.
Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.
In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-27 18:46:06 +03:00
|
|
|
const char *tname, *mname, *tag_value;
|
2020-08-25 22:21:17 +03:00
|
|
|
u32 vlen, elem_id, mid;
|
2019-10-16 06:25:00 +03:00
|
|
|
|
|
|
|
again:
|
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-03 23:46:29 +03:00
|
|
|
tname = __btf_name_by_offset(btf, t->name_off);
|
2019-10-16 06:25:00 +03:00
|
|
|
if (!btf_type_is_struct(t)) {
|
2020-01-09 03:34:56 +03:00
|
|
|
bpf_log(log, "Type '%s' is not a struct\n", tname);
|
2019-10-16 06:25:00 +03:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2020-05-09 20:59:16 +03:00
|
|
|
vlen = btf_type_vlen(t);
|
2020-01-09 03:35:01 +03:00
|
|
|
if (off + size > t->size) {
|
2020-05-09 20:59:16 +03:00
|
|
|
/* If the last element is a variable size array, we may
|
|
|
|
* need to relax the rule.
|
|
|
|
*/
|
|
|
|
struct btf_array *array_elem;
|
|
|
|
|
|
|
|
if (vlen == 0)
|
|
|
|
goto error;
|
|
|
|
|
|
|
|
member = btf_type_member(t) + vlen - 1;
|
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-03 23:46:29 +03:00
|
|
|
mtype = btf_type_skip_modifiers(btf, member->type,
|
2020-05-09 20:59:16 +03:00
|
|
|
NULL);
|
|
|
|
if (!btf_type_is_array(mtype))
|
|
|
|
goto error;
|
|
|
|
|
|
|
|
array_elem = (struct btf_array *)(mtype + 1);
|
|
|
|
if (array_elem->nelems != 0)
|
|
|
|
goto error;
|
|
|
|
|
2021-12-01 21:10:25 +03:00
|
|
|
moff = __btf_member_bit_offset(t, member) / 8;
|
2020-05-09 20:59:16 +03:00
|
|
|
if (off < moff)
|
|
|
|
goto error;
|
|
|
|
|
|
|
|
/* Only allow structure for now, can be relaxed for
|
|
|
|
* other types later.
|
|
|
|
*/
|
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-03 23:46:29 +03:00
|
|
|
t = btf_type_skip_modifiers(btf, array_elem->type,
|
2020-08-25 22:21:16 +03:00
|
|
|
NULL);
|
|
|
|
if (!btf_type_is_struct(t))
|
2020-05-09 20:59:16 +03:00
|
|
|
goto error;
|
|
|
|
|
2020-08-25 22:21:16 +03:00
|
|
|
off = (off - moff) % t->size;
|
|
|
|
goto again;
|
2020-05-09 20:59:16 +03:00
|
|
|
|
|
|
|
error:
|
2020-01-09 03:35:01 +03:00
|
|
|
bpf_log(log, "access beyond struct %s at off %u size %u\n",
|
|
|
|
tname, off, size);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
2019-10-16 06:25:00 +03:00
|
|
|
|
2020-01-09 03:35:01 +03:00
|
|
|
for_each_member(i, t, member) {
|
2019-11-07 21:09:03 +03:00
|
|
|
/* offset of the field in bytes */
|
2021-12-01 21:10:25 +03:00
|
|
|
moff = __btf_member_bit_offset(t, member) / 8;
|
2019-11-07 21:09:03 +03:00
|
|
|
if (off + size <= moff)
|
2019-10-16 06:25:00 +03:00
|
|
|
/* won't find anything, field is already too far */
|
|
|
|
break;
|
2020-01-09 03:35:01 +03:00
|
|
|
|
2021-12-01 21:10:25 +03:00
|
|
|
if (__btf_member_bitfield_size(t, member)) {
|
|
|
|
u32 end_bit = __btf_member_bit_offset(t, member) +
|
|
|
|
__btf_member_bitfield_size(t, member);
|
2020-01-09 03:35:01 +03:00
|
|
|
|
|
|
|
/* off <= moff instead of off == moff because clang
|
|
|
|
* does not generate a BTF member for anonymous
|
|
|
|
* bitfield like the ":16" here:
|
|
|
|
* struct {
|
|
|
|
* int :16;
|
|
|
|
* int x:8;
|
|
|
|
* };
|
|
|
|
*/
|
|
|
|
if (off <= moff &&
|
|
|
|
BITS_ROUNDUP_BYTES(end_bit) <= off + size)
|
2020-08-25 22:21:17 +03:00
|
|
|
return WALK_SCALAR;
|
2020-01-09 03:35:01 +03:00
|
|
|
|
|
|
|
/* off may be accessing a following member
|
|
|
|
*
|
|
|
|
* or
|
|
|
|
*
|
|
|
|
* Doing partial access at either end of this
|
|
|
|
* bitfield. Continue on this case also to
|
|
|
|
* treat it as not accessing this bitfield
|
|
|
|
* and eventually error out as field not
|
|
|
|
* found to keep it simple.
|
|
|
|
* It could be relaxed if there was a legit
|
|
|
|
* partial access case later.
|
|
|
|
*/
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2019-11-07 21:09:03 +03:00
|
|
|
/* In case of "off" is pointing to holes of a struct */
|
|
|
|
if (off < moff)
|
2020-01-09 03:35:01 +03:00
|
|
|
break;
|
2019-10-16 06:25:00 +03:00
|
|
|
|
|
|
|
/* type of the field */
|
2020-08-25 22:21:17 +03:00
|
|
|
mid = member->type;
|
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-03 23:46:29 +03:00
|
|
|
mtype = btf_type_by_id(btf, member->type);
|
|
|
|
mname = __btf_name_by_offset(btf, member->name_off);
|
2019-10-16 06:25:00 +03:00
|
|
|
|
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-03 23:46:29 +03:00
|
|
|
mtype = __btf_resolve_size(btf, mtype, &msize,
|
2020-08-25 22:21:17 +03:00
|
|
|
&elem_type, &elem_id, &total_nelems,
|
|
|
|
&mid);
|
2019-11-07 21:09:03 +03:00
|
|
|
if (IS_ERR(mtype)) {
|
2019-10-16 06:25:00 +03:00
|
|
|
bpf_log(log, "field %s doesn't have size\n", mname);
|
|
|
|
return -EFAULT;
|
|
|
|
}
|
2019-11-07 21:09:03 +03:00
|
|
|
|
|
|
|
mtrue_end = moff + msize;
|
|
|
|
if (off >= mtrue_end)
|
2019-10-16 06:25:00 +03:00
|
|
|
/* no overlap with member, keep iterating */
|
|
|
|
continue;
|
2019-11-07 21:09:03 +03:00
|
|
|
|
|
|
|
if (btf_type_is_array(mtype)) {
|
|
|
|
u32 elem_idx;
|
|
|
|
|
2020-08-25 22:21:13 +03:00
|
|
|
/* __btf_resolve_size() above helps to
|
2019-11-07 21:09:03 +03:00
|
|
|
* linearize a multi-dimensional array.
|
|
|
|
*
|
|
|
|
* The logic here is treating an array
|
|
|
|
* in a struct as the following way:
|
|
|
|
*
|
|
|
|
* struct outer {
|
|
|
|
* struct inner array[2][2];
|
|
|
|
* };
|
|
|
|
*
|
|
|
|
* looks like:
|
|
|
|
*
|
|
|
|
* struct outer {
|
|
|
|
* struct inner array_elem0;
|
|
|
|
* struct inner array_elem1;
|
|
|
|
* struct inner array_elem2;
|
|
|
|
* struct inner array_elem3;
|
|
|
|
* };
|
|
|
|
*
|
|
|
|
* When accessing outer->array[1][0], it moves
|
|
|
|
* moff to "array_elem2", set mtype to
|
|
|
|
* "struct inner", and msize also becomes
|
|
|
|
* sizeof(struct inner). Then most of the
|
|
|
|
* remaining logic will fall through without
|
|
|
|
* caring the current member is an array or
|
|
|
|
* not.
|
|
|
|
*
|
|
|
|
* Unlike mtype/msize/moff, mtrue_end does not
|
|
|
|
* change. The naming difference ("_true") tells
|
|
|
|
* that it is not always corresponding to
|
|
|
|
* the current mtype/msize/moff.
|
|
|
|
* It is the true end of the current
|
|
|
|
* member (i.e. array in this case). That
|
|
|
|
* will allow an int array to be accessed like
|
|
|
|
* a scratch space,
|
|
|
|
* i.e. allow access beyond the size of
|
|
|
|
* the array's element as long as it is
|
|
|
|
* within the mtrue_end boundary.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* skip empty array */
|
|
|
|
if (moff == mtrue_end)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
msize /= total_nelems;
|
|
|
|
elem_idx = (off - moff) / msize;
|
|
|
|
moff += elem_idx * msize;
|
|
|
|
mtype = elem_type;
|
2020-08-25 22:21:17 +03:00
|
|
|
mid = elem_id;
|
2019-11-07 21:09:03 +03:00
|
|
|
}
|
|
|
|
|
2019-10-16 06:25:00 +03:00
|
|
|
/* the 'off' we're looking for is either equal to start
|
|
|
|
* of this field or inside of this struct
|
|
|
|
*/
|
|
|
|
if (btf_type_is_struct(mtype)) {
|
|
|
|
/* our field must be inside that union or struct */
|
|
|
|
t = mtype;
|
|
|
|
|
2020-08-25 22:21:17 +03:00
|
|
|
/* return if the offset matches the member offset */
|
|
|
|
if (off == moff) {
|
|
|
|
*next_btf_id = mid;
|
|
|
|
return WALK_STRUCT;
|
|
|
|
}
|
|
|
|
|
2019-10-16 06:25:00 +03:00
|
|
|
/* adjust offset we're looking for */
|
2019-11-07 21:09:03 +03:00
|
|
|
off -= moff;
|
2019-10-16 06:25:00 +03:00
|
|
|
goto again;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (btf_type_is_ptr(mtype)) {
|
bpf: reject program if a __user tagged memory accessed in kernel way
BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.
Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.
In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-27 18:46:06 +03:00
|
|
|
const struct btf_type *stype, *t;
|
|
|
|
enum bpf_type_flag tmp_flag = 0;
|
2020-02-01 03:03:14 +03:00
|
|
|
u32 id;
|
2019-10-16 06:25:00 +03:00
|
|
|
|
2019-11-07 21:09:03 +03:00
|
|
|
if (msize != size || off != moff) {
|
|
|
|
bpf_log(log,
|
|
|
|
"cannot access ptr member %s with moff %u in struct %s with off %u size %u\n",
|
|
|
|
mname, moff, tname, off, size);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
bpf: reject program if a __user tagged memory accessed in kernel way
BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.
Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.
In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-27 18:46:06 +03:00
|
|
|
|
|
|
|
/* check __user tag */
|
|
|
|
t = btf_type_by_id(btf, mtype->type);
|
|
|
|
if (btf_type_is_type_tag(t)) {
|
|
|
|
tag_value = __btf_name_by_offset(btf, t->name_off);
|
|
|
|
if (strcmp(tag_value, "user") == 0)
|
|
|
|
tmp_flag = MEM_USER;
|
|
|
|
}
|
|
|
|
|
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-03 23:46:29 +03:00
|
|
|
stype = btf_type_skip_modifiers(btf, mtype->type, &id);
|
2019-10-16 06:25:00 +03:00
|
|
|
if (btf_type_is_struct(stype)) {
|
2020-02-01 03:03:14 +03:00
|
|
|
*next_btf_id = id;
|
bpf: reject program if a __user tagged memory accessed in kernel way
BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.
Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.
In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-27 18:46:06 +03:00
|
|
|
*flag = tmp_flag;
|
2020-08-25 22:21:17 +03:00
|
|
|
return WALK_PTR;
|
2019-10-16 06:25:00 +03:00
|
|
|
}
|
|
|
|
}
|
2019-11-07 21:09:03 +03:00
|
|
|
|
|
|
|
/* Allow more flexible access within an int as long as
|
|
|
|
* it is within mtrue_end.
|
|
|
|
* Since mtrue_end could be the end of an array,
|
|
|
|
* that also allows using an array of int as a scratch
|
|
|
|
* space. e.g. skb->cb[].
|
|
|
|
*/
|
|
|
|
if (off + size > mtrue_end) {
|
|
|
|
bpf_log(log,
|
|
|
|
"access beyond the end of member %s (mend:%u) in struct %s with off %u size %u\n",
|
|
|
|
mname, mtrue_end, tname, off, size);
|
|
|
|
return -EACCES;
|
|
|
|
}
|
|
|
|
|
2020-08-25 22:21:17 +03:00
|
|
|
return WALK_SCALAR;
|
2019-10-16 06:25:00 +03:00
|
|
|
}
|
|
|
|
bpf_log(log, "struct %s doesn't have field at offset %d\n", tname, off);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-03 23:46:29 +03:00
|
|
|
int btf_struct_access(struct bpf_verifier_log *log, const struct btf *btf,
|
2020-08-25 22:21:17 +03:00
|
|
|
const struct btf_type *t, int off, int size,
|
|
|
|
enum bpf_access_type atype __maybe_unused,
|
bpf: reject program if a __user tagged memory accessed in kernel way
BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.
Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.
In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-27 18:46:06 +03:00
|
|
|
u32 *next_btf_id, enum bpf_type_flag *flag)
|
2020-08-25 22:21:17 +03:00
|
|
|
{
|
bpf: reject program if a __user tagged memory accessed in kernel way
BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.
Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.
In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-27 18:46:06 +03:00
|
|
|
enum bpf_type_flag tmp_flag = 0;
|
2020-08-25 22:21:17 +03:00
|
|
|
int err;
|
|
|
|
u32 id;
|
|
|
|
|
|
|
|
do {
|
bpf: reject program if a __user tagged memory accessed in kernel way
BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.
Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.
In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-27 18:46:06 +03:00
|
|
|
err = btf_struct_walk(log, btf, t, off, size, &id, &tmp_flag);
|
2020-08-25 22:21:17 +03:00
|
|
|
|
|
|
|
switch (err) {
|
|
|
|
case WALK_PTR:
|
|
|
|
/* If we found the pointer or scalar on t+off,
|
|
|
|
* we're done.
|
|
|
|
*/
|
|
|
|
*next_btf_id = id;
|
bpf: reject program if a __user tagged memory accessed in kernel way
BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.
Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.
In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-27 18:46:06 +03:00
|
|
|
*flag = tmp_flag;
|
2020-08-25 22:21:17 +03:00
|
|
|
return PTR_TO_BTF_ID;
|
|
|
|
case WALK_SCALAR:
|
|
|
|
return SCALAR_VALUE;
|
|
|
|
case WALK_STRUCT:
|
|
|
|
/* We found nested struct, so continue the search
|
|
|
|
* by diving in it. At this point the offset is
|
|
|
|
* aligned with the new type, so set it to 0.
|
|
|
|
*/
|
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-03 23:46:29 +03:00
|
|
|
t = btf_type_by_id(btf, id);
|
2020-08-25 22:21:17 +03:00
|
|
|
off = 0;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
/* It's either error or unknown return value..
|
|
|
|
* scream and leave.
|
|
|
|
*/
|
|
|
|
if (WARN_ONCE(err > 0, "unknown btf_struct_walk return value"))
|
|
|
|
return -EINVAL;
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
} while (t);
|
|
|
|
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-03 23:46:29 +03:00
|
|
|
/* Check that two BTF types, each specified as an BTF object + id, are exactly
|
|
|
|
* the same. Trivial ID check is not enough due to module BTFs, because we can
|
|
|
|
* end up with two different module BTFs, but IDs point to the common type in
|
|
|
|
* vmlinux BTF.
|
|
|
|
*/
|
|
|
|
static bool btf_types_are_same(const struct btf *btf1, u32 id1,
|
|
|
|
const struct btf *btf2, u32 id2)
|
|
|
|
{
|
|
|
|
if (id1 != id2)
|
|
|
|
return false;
|
|
|
|
if (btf1 == btf2)
|
|
|
|
return true;
|
|
|
|
return btf_type_by_id(btf1, id1) == btf_type_by_id(btf2, id2);
|
|
|
|
}
|
|
|
|
|
2020-08-25 22:21:18 +03:00
|
|
|
bool btf_struct_ids_match(struct bpf_verifier_log *log,
|
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-03 23:46:29 +03:00
|
|
|
const struct btf *btf, u32 id, int off,
|
|
|
|
const struct btf *need_btf, u32 need_type_id)
|
2020-08-25 22:21:18 +03:00
|
|
|
{
|
|
|
|
const struct btf_type *type;
|
bpf: reject program if a __user tagged memory accessed in kernel way
BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.
Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.
In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-27 18:46:06 +03:00
|
|
|
enum bpf_type_flag flag;
|
2020-08-25 22:21:18 +03:00
|
|
|
int err;
|
|
|
|
|
|
|
|
/* Are we already done? */
|
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-03 23:46:29 +03:00
|
|
|
if (off == 0 && btf_types_are_same(btf, id, need_btf, need_type_id))
|
2020-08-25 22:21:18 +03:00
|
|
|
return true;
|
|
|
|
|
|
|
|
again:
|
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-03 23:46:29 +03:00
|
|
|
type = btf_type_by_id(btf, id);
|
2020-08-25 22:21:18 +03:00
|
|
|
if (!type)
|
|
|
|
return false;
|
bpf: reject program if a __user tagged memory accessed in kernel way
BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.
Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.
In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-27 18:46:06 +03:00
|
|
|
err = btf_struct_walk(log, btf, type, off, 1, &id, &flag);
|
2020-08-25 22:21:18 +03:00
|
|
|
if (err != WALK_STRUCT)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/* We found nested struct object. If it matches
|
|
|
|
* the requested ID, we're done. Otherwise let's
|
|
|
|
* continue the search with offset 0 in the new
|
|
|
|
* type.
|
|
|
|
*/
|
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-03 23:46:29 +03:00
|
|
|
if (!btf_types_are_same(btf, id, need_btf, need_type_id)) {
|
2020-08-25 22:21:18 +03:00
|
|
|
off = 0;
|
|
|
|
goto again;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-11-14 21:57:04 +03:00
|
|
|
static int __get_type_size(struct btf *btf, u32 btf_id,
|
|
|
|
const struct btf_type **bad_type)
|
|
|
|
{
|
|
|
|
const struct btf_type *t;
|
|
|
|
|
|
|
|
if (!btf_id)
|
|
|
|
/* void */
|
|
|
|
return 0;
|
|
|
|
t = btf_type_by_id(btf, btf_id);
|
|
|
|
while (t && btf_type_is_modifier(t))
|
|
|
|
t = btf_type_by_id(btf, t->type);
|
2019-11-27 02:01:06 +03:00
|
|
|
if (!t) {
|
2020-11-10 04:19:28 +03:00
|
|
|
*bad_type = btf_type_by_id(btf, 0);
|
2019-11-14 21:57:04 +03:00
|
|
|
return -EINVAL;
|
2019-11-27 02:01:06 +03:00
|
|
|
}
|
2019-11-14 21:57:04 +03:00
|
|
|
if (btf_type_is_ptr(t))
|
|
|
|
/* kernel size of pointer. Not BPF's size of pointer*/
|
|
|
|
return sizeof(void *);
|
|
|
|
if (btf_type_is_int(t) || btf_type_is_enum(t))
|
|
|
|
return t->size;
|
|
|
|
*bad_type = t;
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
int btf_distill_func_proto(struct bpf_verifier_log *log,
|
|
|
|
struct btf *btf,
|
|
|
|
const struct btf_type *func,
|
|
|
|
const char *tname,
|
|
|
|
struct btf_func_model *m)
|
|
|
|
{
|
|
|
|
const struct btf_param *args;
|
|
|
|
const struct btf_type *t;
|
|
|
|
u32 i, nargs;
|
|
|
|
int ret;
|
|
|
|
|
2019-11-14 21:57:17 +03:00
|
|
|
if (!func) {
|
|
|
|
/* BTF function prototype doesn't match the verifier types.
|
2021-02-25 23:26:29 +03:00
|
|
|
* Fall back to MAX_BPF_FUNC_REG_ARGS u64 args.
|
2019-11-14 21:57:17 +03:00
|
|
|
*/
|
2021-02-25 23:26:29 +03:00
|
|
|
for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++)
|
2019-11-14 21:57:17 +03:00
|
|
|
m->arg_size[i] = 8;
|
|
|
|
m->ret_size = 8;
|
2021-02-25 23:26:29 +03:00
|
|
|
m->nr_args = MAX_BPF_FUNC_REG_ARGS;
|
2019-11-14 21:57:17 +03:00
|
|
|
return 0;
|
|
|
|
}
|
2019-11-14 21:57:04 +03:00
|
|
|
args = (const struct btf_param *)(func + 1);
|
|
|
|
nargs = btf_type_vlen(func);
|
|
|
|
if (nargs >= MAX_BPF_FUNC_ARGS) {
|
|
|
|
bpf_log(log,
|
|
|
|
"The function %s has %d arguments. Too many.\n",
|
|
|
|
tname, nargs);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
ret = __get_type_size(btf, func->type, &t);
|
|
|
|
if (ret < 0) {
|
|
|
|
bpf_log(log,
|
|
|
|
"The function %s return type %s is unsupported.\n",
|
|
|
|
tname, btf_kind_str[BTF_INFO_KIND(t->info)]);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
m->ret_size = ret;
|
|
|
|
|
|
|
|
for (i = 0; i < nargs; i++) {
|
2021-05-05 16:25:29 +03:00
|
|
|
if (i == nargs - 1 && args[i].type == 0) {
|
|
|
|
bpf_log(log,
|
|
|
|
"The function %s with variable args is unsupported.\n",
|
|
|
|
tname);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2019-11-14 21:57:04 +03:00
|
|
|
ret = __get_type_size(btf, args[i].type, &t);
|
|
|
|
if (ret < 0) {
|
|
|
|
bpf_log(log,
|
|
|
|
"The function %s arg%d type %s is unsupported.\n",
|
|
|
|
tname, i, btf_kind_str[BTF_INFO_KIND(t->info)]);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2021-05-05 16:25:29 +03:00
|
|
|
if (ret == 0) {
|
|
|
|
bpf_log(log,
|
|
|
|
"The function %s has malformed void argument.\n",
|
|
|
|
tname);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2019-11-14 21:57:04 +03:00
|
|
|
m->arg_size[i] = ret;
|
|
|
|
}
|
|
|
|
m->nr_args = nargs;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-01-21 03:53:46 +03:00
|
|
|
/* Compare BTFs of two functions assuming only scalars and pointers to context.
|
|
|
|
* t1 points to BTF_KIND_FUNC in btf1
|
|
|
|
* t2 points to BTF_KIND_FUNC in btf2
|
|
|
|
* Returns:
|
|
|
|
* EINVAL - function prototype mismatch
|
|
|
|
* EFAULT - verifier bug
|
|
|
|
* 0 - 99% match. The last 1% is validated by the verifier.
|
|
|
|
*/
|
2020-02-10 04:14:41 +03:00
|
|
|
static int btf_check_func_type_match(struct bpf_verifier_log *log,
|
|
|
|
struct btf *btf1, const struct btf_type *t1,
|
|
|
|
struct btf *btf2, const struct btf_type *t2)
|
2020-01-21 03:53:46 +03:00
|
|
|
{
|
|
|
|
const struct btf_param *args1, *args2;
|
|
|
|
const char *fn1, *fn2, *s1, *s2;
|
|
|
|
u32 nargs1, nargs2, i;
|
|
|
|
|
|
|
|
fn1 = btf_name_by_offset(btf1, t1->name_off);
|
|
|
|
fn2 = btf_name_by_offset(btf2, t2->name_off);
|
|
|
|
|
|
|
|
if (btf_func_linkage(t1) != BTF_FUNC_GLOBAL) {
|
|
|
|
bpf_log(log, "%s() is not a global function\n", fn1);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
if (btf_func_linkage(t2) != BTF_FUNC_GLOBAL) {
|
|
|
|
bpf_log(log, "%s() is not a global function\n", fn2);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
t1 = btf_type_by_id(btf1, t1->type);
|
|
|
|
if (!t1 || !btf_type_is_func_proto(t1))
|
|
|
|
return -EFAULT;
|
|
|
|
t2 = btf_type_by_id(btf2, t2->type);
|
|
|
|
if (!t2 || !btf_type_is_func_proto(t2))
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
args1 = (const struct btf_param *)(t1 + 1);
|
|
|
|
nargs1 = btf_type_vlen(t1);
|
|
|
|
args2 = (const struct btf_param *)(t2 + 1);
|
|
|
|
nargs2 = btf_type_vlen(t2);
|
|
|
|
|
|
|
|
if (nargs1 != nargs2) {
|
|
|
|
bpf_log(log, "%s() has %d args while %s() has %d args\n",
|
|
|
|
fn1, nargs1, fn2, nargs2);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
t1 = btf_type_skip_modifiers(btf1, t1->type, NULL);
|
|
|
|
t2 = btf_type_skip_modifiers(btf2, t2->type, NULL);
|
|
|
|
if (t1->info != t2->info) {
|
|
|
|
bpf_log(log,
|
|
|
|
"Return type %s of %s() doesn't match type %s of %s()\n",
|
|
|
|
btf_type_str(t1), fn1,
|
|
|
|
btf_type_str(t2), fn2);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < nargs1; i++) {
|
|
|
|
t1 = btf_type_skip_modifiers(btf1, args1[i].type, NULL);
|
|
|
|
t2 = btf_type_skip_modifiers(btf2, args2[i].type, NULL);
|
|
|
|
|
|
|
|
if (t1->info != t2->info) {
|
|
|
|
bpf_log(log, "arg%d in %s() is %s while %s() has %s\n",
|
|
|
|
i, fn1, btf_type_str(t1),
|
|
|
|
fn2, btf_type_str(t2));
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
if (btf_type_has_size(t1) && t1->size != t2->size) {
|
|
|
|
bpf_log(log,
|
|
|
|
"arg%d in %s() has size %d while %s() has %d\n",
|
|
|
|
i, fn1, t1->size,
|
|
|
|
fn2, t2->size);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* global functions are validated with scalars and pointers
|
|
|
|
* to context only. And only global functions can be replaced.
|
|
|
|
* Hence type check only those types.
|
|
|
|
*/
|
|
|
|
if (btf_type_is_int(t1) || btf_type_is_enum(t1))
|
|
|
|
continue;
|
|
|
|
if (!btf_type_is_ptr(t1)) {
|
|
|
|
bpf_log(log,
|
|
|
|
"arg%d in %s() has unrecognized type\n",
|
|
|
|
i, fn1);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
t1 = btf_type_skip_modifiers(btf1, t1->type, NULL);
|
|
|
|
t2 = btf_type_skip_modifiers(btf2, t2->type, NULL);
|
|
|
|
if (!btf_type_is_struct(t1)) {
|
|
|
|
bpf_log(log,
|
|
|
|
"arg%d in %s() is not a pointer to context\n",
|
|
|
|
i, fn1);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
if (!btf_type_is_struct(t2)) {
|
|
|
|
bpf_log(log,
|
|
|
|
"arg%d in %s() is not a pointer to context\n",
|
|
|
|
i, fn2);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
/* This is an optional check to make program writing easier.
|
|
|
|
* Compare names of structs and report an error to the user.
|
|
|
|
* btf_prepare_func_args() already checked that t2 struct
|
|
|
|
* is a context type. btf_prepare_func_args() will check
|
|
|
|
* later that t1 struct is a context type as well.
|
|
|
|
*/
|
|
|
|
s1 = btf_name_by_offset(btf1, t1->name_off);
|
|
|
|
s2 = btf_name_by_offset(btf2, t2->name_off);
|
|
|
|
if (strcmp(s1, s2)) {
|
|
|
|
bpf_log(log,
|
|
|
|
"arg%d %s(struct %s *) doesn't match %s(struct %s *)\n",
|
|
|
|
i, fn1, s1, fn2, s2);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Compare BTFs of given program with BTF of target program */
|
2020-09-26 00:25:01 +03:00
|
|
|
int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog,
|
2020-01-21 03:53:46 +03:00
|
|
|
struct btf *btf2, const struct btf_type *t2)
|
|
|
|
{
|
|
|
|
struct btf *btf1 = prog->aux->btf;
|
|
|
|
const struct btf_type *t1;
|
|
|
|
u32 btf_id = 0;
|
|
|
|
|
|
|
|
if (!prog->aux->func_info) {
|
2020-09-26 00:25:01 +03:00
|
|
|
bpf_log(log, "Program extension requires BTF\n");
|
2020-01-21 03:53:46 +03:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
btf_id = prog->aux->func_info[0].type_id;
|
|
|
|
if (!btf_id)
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
t1 = btf_type_by_id(btf1, btf_id);
|
|
|
|
if (!t1 || !btf_type_is_func(t1))
|
|
|
|
return -EFAULT;
|
|
|
|
|
2020-09-26 00:25:01 +03:00
|
|
|
return btf_check_func_type_match(log, btf1, t1, btf2, t2);
|
2020-01-21 03:53:46 +03:00
|
|
|
}
|
|
|
|
|
bpf: Support bpf program calling kernel function
This patch adds support to BPF verifier to allow bpf program calling
kernel function directly.
The use case included in this set is to allow bpf-tcp-cc to directly
call some tcp-cc helper functions (e.g. "tcp_cong_avoid_ai()"). Those
functions have already been used by some kernel tcp-cc implementations.
This set will also allow the bpf-tcp-cc program to directly call the
kernel tcp-cc implementation, For example, a bpf_dctcp may only want to
implement its own dctcp_cwnd_event() and reuse other dctcp_*() directly
from the kernel tcp_dctcp.c instead of reimplementing (or
copy-and-pasting) them.
The tcp-cc kernel functions mentioned above will be white listed
for the struct_ops bpf-tcp-cc programs to use in a later patch.
The white listed functions are not bounded to a fixed ABI contract.
Those functions have already been used by the existing kernel tcp-cc.
If any of them has changed, both in-tree and out-of-tree kernel tcp-cc
implementations have to be changed. The same goes for the struct_ops
bpf-tcp-cc programs which have to be adjusted accordingly.
This patch is to make the required changes in the bpf verifier.
First change is in btf.c, it adds a case in "btf_check_func_arg_match()".
When the passed in "btf->kernel_btf == true", it means matching the
verifier regs' states with a kernel function. This will handle the
PTR_TO_BTF_ID reg. It also maps PTR_TO_SOCK_COMMON, PTR_TO_SOCKET,
and PTR_TO_TCP_SOCK to its kernel's btf_id.
In the later libbpf patch, the insn calling a kernel function will
look like:
insn->code == (BPF_JMP | BPF_CALL)
insn->src_reg == BPF_PSEUDO_KFUNC_CALL /* <- new in this patch */
insn->imm == func_btf_id /* btf_id of the running kernel */
[ For the future calling function-in-kernel-module support, an array
of module btf_fds can be passed at the load time and insn->off
can be used to index into this array. ]
At the early stage of verifier, the verifier will collect all kernel
function calls into "struct bpf_kfunc_desc". Those
descriptors are stored in "prog->aux->kfunc_tab" and will
be available to the JIT. Since this "add" operation is similar
to the current "add_subprog()" and looking for the same insn->code,
they are done together in the new "add_subprog_and_kfunc()".
In the "do_check()" stage, the new "check_kfunc_call()" is added
to verify the kernel function call instruction:
1. Ensure the kernel function can be used by a particular BPF_PROG_TYPE.
A new bpf_verifier_ops "check_kfunc_call" is added to do that.
The bpf-tcp-cc struct_ops program will implement this function in
a later patch.
2. Call "btf_check_kfunc_args_match()" to ensure the regs can be
used as the args of a kernel function.
3. Mark the regs' type, subreg_def, and zext_dst.
At the later do_misc_fixups() stage, the new fixup_kfunc_call()
will replace the insn->imm with the function address (relative
to __bpf_call_base). If needed, the jit can find the btf_func_model
by calling the new bpf_jit_find_kfunc_model(prog, insn).
With the imm set to the function address, "bpftool prog dump xlated"
will be able to display the kernel function calls the same way as
it displays other bpf helper calls.
gpl_compatible program is required to call kernel function.
This feature currently requires JIT.
The verifier selftests are adjusted because of the changes in
the verbose log in add_subprog_and_kfunc().
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210325015142.1544736-1-kafai@fb.com
2021-03-25 04:51:42 +03:00
|
|
|
static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = {
|
|
|
|
#ifdef CONFIG_NET
|
|
|
|
[PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
|
|
|
|
[PTR_TO_SOCK_COMMON] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
|
|
|
|
[PTR_TO_TCP_SOCK] = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
|
|
|
|
#endif
|
|
|
|
};
|
|
|
|
|
bpf: Extend kfunc with PTR_TO_CTX, PTR_TO_MEM argument support
Allow passing PTR_TO_CTX, if the kfunc expects a matching struct type,
and punt to PTR_TO_MEM block if reg->type does not fall in one of
PTR_TO_BTF_ID or PTR_TO_SOCK* types. This will be used by future commits
to get access to XDP and TC PTR_TO_CTX, and pass various data (flags,
l4proto, netns_id, etc.) encoded in opts struct passed as pointer to
kfunc.
For PTR_TO_MEM support, arguments are currently limited to pointer to
scalar, or pointer to struct composed of scalars. This is done so that
unsafe scenarios (like passing PTR_TO_MEM where PTR_TO_BTF_ID of
in-kernel valid structure is expected, which may have pointers) are
avoided. Since the argument checking happens basd on argument register
type, it is not easy to ascertain what the expected type is. In the
future, support for PTR_TO_MEM for kfunc can be extended to serve other
usecases. The struct type whose pointer is passed in may have maximum
nesting depth of 4, all recursively composed of scalars or struct with
scalars.
Future commits will add negative tests that check whether these
restrictions imposed for kfunc arguments are duly rejected by BPF
verifier or not.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211217015031.1278167-4-memxor@gmail.com
2021-12-17 04:50:24 +03:00
|
|
|
/* Returns true if struct is composed of scalars, 4 levels of nesting allowed */
|
|
|
|
static bool __btf_type_is_scalar_struct(struct bpf_verifier_log *log,
|
|
|
|
const struct btf *btf,
|
|
|
|
const struct btf_type *t, int rec)
|
|
|
|
{
|
|
|
|
const struct btf_type *member_type;
|
|
|
|
const struct btf_member *member;
|
|
|
|
u32 i;
|
|
|
|
|
|
|
|
if (!btf_type_is_struct(t))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
for_each_member(i, t, member) {
|
|
|
|
const struct btf_array *array;
|
|
|
|
|
|
|
|
member_type = btf_type_skip_modifiers(btf, member->type, NULL);
|
|
|
|
if (btf_type_is_struct(member_type)) {
|
|
|
|
if (rec >= 3) {
|
|
|
|
bpf_log(log, "max struct nesting depth exceeded\n");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (!__btf_type_is_scalar_struct(log, btf, member_type, rec + 1))
|
|
|
|
return false;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (btf_type_is_array(member_type)) {
|
|
|
|
array = btf_type_array(member_type);
|
|
|
|
if (!array->nelems)
|
|
|
|
return false;
|
|
|
|
member_type = btf_type_skip_modifiers(btf, array->type, NULL);
|
|
|
|
if (!btf_type_is_scalar(member_type))
|
|
|
|
return false;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!btf_type_is_scalar(member_type))
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
bpf: Introduce mem, size argument pair support for kfunc
BPF helpers can associate two adjacent arguments together to pass memory
of certain size, using ARG_PTR_TO_MEM and ARG_CONST_SIZE arguments.
Since we don't use bpf_func_proto for kfunc, we need to leverage BTF to
implement similar support.
The ARG_CONST_SIZE processing for helpers is refactored into a common
check_mem_size_reg helper that is shared with kfunc as well. kfunc
ptr_to_mem support follows logic similar to global functions, where
verification is done as if pointer is not null, even when it may be
null.
This leads to a simple to follow rule for writing kfunc: always check
the argument pointer for NULL, except when it is PTR_TO_CTX. Also, the
PTR_TO_CTX case is also only safe when the helper expecting pointer to
program ctx is not exposed to other programs where same struct is not
ctx type. In that case, the type check will fall through to other cases
and would permit passing other types of pointers, possibly NULL at
runtime.
Currently, we require the size argument to be suffixed with "__sz" in
the parameter name. This information is then recorded in kernel BTF and
verified during function argument checking. In the future we can use BTF
tagging instead, and modify the kernel function definitions. This will
be a purely kernel-side change.
This allows us to have some form of backwards compatibility for
structures that are passed in to the kernel function with their size,
and allow variable length structures to be passed in if they are
accompanied by a size parameter.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 19:39:47 +03:00
|
|
|
static bool is_kfunc_arg_mem_size(const struct btf *btf,
|
|
|
|
const struct btf_param *arg,
|
|
|
|
const struct bpf_reg_state *reg)
|
|
|
|
{
|
|
|
|
int len, sfx_len = sizeof("__sz") - 1;
|
|
|
|
const struct btf_type *t;
|
|
|
|
const char *param_name;
|
|
|
|
|
|
|
|
t = btf_type_skip_modifiers(btf, arg->type, NULL);
|
|
|
|
if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/* In the future, this can be ported to use BTF tagging */
|
|
|
|
param_name = btf_name_by_offset(btf, arg->name_off);
|
|
|
|
if (str_is_empty(param_name))
|
|
|
|
return false;
|
|
|
|
len = strlen(param_name);
|
|
|
|
if (len < sfx_len)
|
|
|
|
return false;
|
|
|
|
param_name += len - sfx_len;
|
|
|
|
if (strncmp(param_name, "__sz", sfx_len))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2021-03-25 04:51:36 +03:00
|
|
|
static int btf_check_func_arg_match(struct bpf_verifier_env *env,
|
|
|
|
const struct btf *btf, u32 func_id,
|
|
|
|
struct bpf_reg_state *regs,
|
|
|
|
bool ptr_to_mem_ok)
|
2019-11-14 21:57:16 +03:00
|
|
|
{
|
|
|
|
struct bpf_verifier_log *log = &env->log;
|
bpf: Add reference tracking support to kfunc
This patch adds verifier support for PTR_TO_BTF_ID return type of kfunc
to be a reference, by reusing acquire_reference_state/release_reference
support for existing in-kernel bpf helpers.
We make use of the three kfunc types:
- BTF_KFUNC_TYPE_ACQUIRE
Return true if kfunc_btf_id is an acquire kfunc. This will
acquire_reference_state for the returned PTR_TO_BTF_ID (this is the
only allow return value). Note that acquire kfunc must always return a
PTR_TO_BTF_ID{_OR_NULL}, otherwise the program is rejected.
- BTF_KFUNC_TYPE_RELEASE
Return true if kfunc_btf_id is a release kfunc. This will release the
reference to the passed in PTR_TO_BTF_ID which has a reference state
(from earlier acquire kfunc).
The btf_check_func_arg_match returns the regno (of argument register,
hence > 0) if the kfunc is a release kfunc, and a proper referenced
PTR_TO_BTF_ID is being passed to it.
This is similar to how helper call check uses bpf_call_arg_meta to
store the ref_obj_id that is later used to release the reference.
Similar to in-kernel helper, we only allow passing one referenced
PTR_TO_BTF_ID as an argument. It can also be passed in to normal
kfunc, but in case of release kfunc there must always be one
PTR_TO_BTF_ID argument that is referenced.
- BTF_KFUNC_TYPE_RET_NULL
For kfunc returning PTR_TO_BTF_ID, tells if it can be NULL, hence
force caller to mark the pointer not null (using check) before
accessing it. Note that taking into account the case fixed by commit
93c230e3f5bd ("bpf: Enforce id generation for all may-be-null register type")
we assign a non-zero id for mark_ptr_or_null_reg logic. Later, if more
return types are supported by kfunc, which have a _OR_NULL variant, it
might be better to move this id generation under a common
reg_type_may_be_null check, similar to the case in the commit.
Referenced PTR_TO_BTF_ID is currently only limited to kfunc, but can be
extended in the future to other BPF helpers as well. For now, we can
rely on the btf_struct_ids_match check to ensure we get the pointer to
the expected struct type. In the future, care needs to be taken to avoid
ambiguity for reference PTR_TO_BTF_ID passed to release function, in
case multiple candidates can release same BTF ID.
e.g. there might be two release kfuncs (or kfunc and helper):
foo(struct abc *p);
bar(struct abc *p);
... such that both release a PTR_TO_BTF_ID with btf_id of struct abc. In
this case we would need to track the acquire function corresponding to
the release function to avoid type confusion, and store this information
in the register state so that an incorrect program can be rejected. This
is not a problem right now, hence it is left as an exercise for the
future patch introducing such a case in the kernel.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-6-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 19:39:48 +03:00
|
|
|
u32 i, nargs, ref_id, ref_obj_id = 0;
|
bpf: Extend kfunc with PTR_TO_CTX, PTR_TO_MEM argument support
Allow passing PTR_TO_CTX, if the kfunc expects a matching struct type,
and punt to PTR_TO_MEM block if reg->type does not fall in one of
PTR_TO_BTF_ID or PTR_TO_SOCK* types. This will be used by future commits
to get access to XDP and TC PTR_TO_CTX, and pass various data (flags,
l4proto, netns_id, etc.) encoded in opts struct passed as pointer to
kfunc.
For PTR_TO_MEM support, arguments are currently limited to pointer to
scalar, or pointer to struct composed of scalars. This is done so that
unsafe scenarios (like passing PTR_TO_MEM where PTR_TO_BTF_ID of
in-kernel valid structure is expected, which may have pointers) are
avoided. Since the argument checking happens basd on argument register
type, it is not easy to ascertain what the expected type is. In the
future, support for PTR_TO_MEM for kfunc can be extended to serve other
usecases. The struct type whose pointer is passed in may have maximum
nesting depth of 4, all recursively composed of scalars or struct with
scalars.
Future commits will add negative tests that check whether these
restrictions imposed for kfunc arguments are duly rejected by BPF
verifier or not.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211217015031.1278167-4-memxor@gmail.com
2021-12-17 04:50:24 +03:00
|
|
|
bool is_kfunc = btf_is_kernel(btf);
|
2021-03-25 04:51:36 +03:00
|
|
|
const char *func_name, *ref_tname;
|
2021-02-12 23:56:41 +03:00
|
|
|
const struct btf_type *t, *ref_t;
|
2021-03-25 04:51:36 +03:00
|
|
|
const struct btf_param *args;
|
bpf: Add reference tracking support to kfunc
This patch adds verifier support for PTR_TO_BTF_ID return type of kfunc
to be a reference, by reusing acquire_reference_state/release_reference
support for existing in-kernel bpf helpers.
We make use of the three kfunc types:
- BTF_KFUNC_TYPE_ACQUIRE
Return true if kfunc_btf_id is an acquire kfunc. This will
acquire_reference_state for the returned PTR_TO_BTF_ID (this is the
only allow return value). Note that acquire kfunc must always return a
PTR_TO_BTF_ID{_OR_NULL}, otherwise the program is rejected.
- BTF_KFUNC_TYPE_RELEASE
Return true if kfunc_btf_id is a release kfunc. This will release the
reference to the passed in PTR_TO_BTF_ID which has a reference state
(from earlier acquire kfunc).
The btf_check_func_arg_match returns the regno (of argument register,
hence > 0) if the kfunc is a release kfunc, and a proper referenced
PTR_TO_BTF_ID is being passed to it.
This is similar to how helper call check uses bpf_call_arg_meta to
store the ref_obj_id that is later used to release the reference.
Similar to in-kernel helper, we only allow passing one referenced
PTR_TO_BTF_ID as an argument. It can also be passed in to normal
kfunc, but in case of release kfunc there must always be one
PTR_TO_BTF_ID argument that is referenced.
- BTF_KFUNC_TYPE_RET_NULL
For kfunc returning PTR_TO_BTF_ID, tells if it can be NULL, hence
force caller to mark the pointer not null (using check) before
accessing it. Note that taking into account the case fixed by commit
93c230e3f5bd ("bpf: Enforce id generation for all may-be-null register type")
we assign a non-zero id for mark_ptr_or_null_reg logic. Later, if more
return types are supported by kfunc, which have a _OR_NULL variant, it
might be better to move this id generation under a common
reg_type_may_be_null check, similar to the case in the commit.
Referenced PTR_TO_BTF_ID is currently only limited to kfunc, but can be
extended in the future to other BPF helpers as well. For now, we can
rely on the btf_struct_ids_match check to ensure we get the pointer to
the expected struct type. In the future, care needs to be taken to avoid
ambiguity for reference PTR_TO_BTF_ID passed to release function, in
case multiple candidates can release same BTF ID.
e.g. there might be two release kfuncs (or kfunc and helper):
foo(struct abc *p);
bar(struct abc *p);
... such that both release a PTR_TO_BTF_ID with btf_id of struct abc. In
this case we would need to track the acquire function corresponding to
the release function to avoid type confusion, and store this information
in the register state so that an incorrect program can be rejected. This
is not a problem right now, hence it is left as an exercise for the
future patch introducing such a case in the kernel.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-6-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 19:39:48 +03:00
|
|
|
int ref_regno = 0;
|
|
|
|
bool rel = false;
|
2019-11-14 21:57:16 +03:00
|
|
|
|
2021-03-25 04:51:36 +03:00
|
|
|
t = btf_type_by_id(btf, func_id);
|
2019-11-14 21:57:16 +03:00
|
|
|
if (!t || !btf_type_is_func(t)) {
|
2020-01-10 09:41:20 +03:00
|
|
|
/* These checks were already done by the verifier while loading
|
bpf: Support bpf program calling kernel function
This patch adds support to BPF verifier to allow bpf program calling
kernel function directly.
The use case included in this set is to allow bpf-tcp-cc to directly
call some tcp-cc helper functions (e.g. "tcp_cong_avoid_ai()"). Those
functions have already been used by some kernel tcp-cc implementations.
This set will also allow the bpf-tcp-cc program to directly call the
kernel tcp-cc implementation, For example, a bpf_dctcp may only want to
implement its own dctcp_cwnd_event() and reuse other dctcp_*() directly
from the kernel tcp_dctcp.c instead of reimplementing (or
copy-and-pasting) them.
The tcp-cc kernel functions mentioned above will be white listed
for the struct_ops bpf-tcp-cc programs to use in a later patch.
The white listed functions are not bounded to a fixed ABI contract.
Those functions have already been used by the existing kernel tcp-cc.
If any of them has changed, both in-tree and out-of-tree kernel tcp-cc
implementations have to be changed. The same goes for the struct_ops
bpf-tcp-cc programs which have to be adjusted accordingly.
This patch is to make the required changes in the bpf verifier.
First change is in btf.c, it adds a case in "btf_check_func_arg_match()".
When the passed in "btf->kernel_btf == true", it means matching the
verifier regs' states with a kernel function. This will handle the
PTR_TO_BTF_ID reg. It also maps PTR_TO_SOCK_COMMON, PTR_TO_SOCKET,
and PTR_TO_TCP_SOCK to its kernel's btf_id.
In the later libbpf patch, the insn calling a kernel function will
look like:
insn->code == (BPF_JMP | BPF_CALL)
insn->src_reg == BPF_PSEUDO_KFUNC_CALL /* <- new in this patch */
insn->imm == func_btf_id /* btf_id of the running kernel */
[ For the future calling function-in-kernel-module support, an array
of module btf_fds can be passed at the load time and insn->off
can be used to index into this array. ]
At the early stage of verifier, the verifier will collect all kernel
function calls into "struct bpf_kfunc_desc". Those
descriptors are stored in "prog->aux->kfunc_tab" and will
be available to the JIT. Since this "add" operation is similar
to the current "add_subprog()" and looking for the same insn->code,
they are done together in the new "add_subprog_and_kfunc()".
In the "do_check()" stage, the new "check_kfunc_call()" is added
to verify the kernel function call instruction:
1. Ensure the kernel function can be used by a particular BPF_PROG_TYPE.
A new bpf_verifier_ops "check_kfunc_call" is added to do that.
The bpf-tcp-cc struct_ops program will implement this function in
a later patch.
2. Call "btf_check_kfunc_args_match()" to ensure the regs can be
used as the args of a kernel function.
3. Mark the regs' type, subreg_def, and zext_dst.
At the later do_misc_fixups() stage, the new fixup_kfunc_call()
will replace the insn->imm with the function address (relative
to __bpf_call_base). If needed, the jit can find the btf_func_model
by calling the new bpf_jit_find_kfunc_model(prog, insn).
With the imm set to the function address, "bpftool prog dump xlated"
will be able to display the kernel function calls the same way as
it displays other bpf helper calls.
gpl_compatible program is required to call kernel function.
This feature currently requires JIT.
The verifier selftests are adjusted because of the changes in
the verbose log in add_subprog_and_kfunc().
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210325015142.1544736-1-kafai@fb.com
2021-03-25 04:51:42 +03:00
|
|
|
* struct bpf_func_info or in add_kfunc_call().
|
2020-01-10 09:41:20 +03:00
|
|
|
*/
|
2021-03-25 04:51:36 +03:00
|
|
|
bpf_log(log, "BTF of func_id %u doesn't point to KIND_FUNC\n",
|
|
|
|
func_id);
|
2020-01-10 09:41:20 +03:00
|
|
|
return -EFAULT;
|
2019-11-14 21:57:16 +03:00
|
|
|
}
|
2021-03-25 04:51:36 +03:00
|
|
|
func_name = btf_name_by_offset(btf, t->name_off);
|
2019-11-14 21:57:16 +03:00
|
|
|
|
|
|
|
t = btf_type_by_id(btf, t->type);
|
|
|
|
if (!t || !btf_type_is_func_proto(t)) {
|
2021-03-25 04:51:36 +03:00
|
|
|
bpf_log(log, "Invalid BTF of func %s\n", func_name);
|
2020-01-10 09:41:20 +03:00
|
|
|
return -EFAULT;
|
2019-11-14 21:57:16 +03:00
|
|
|
}
|
|
|
|
args = (const struct btf_param *)(t + 1);
|
|
|
|
nargs = btf_type_vlen(t);
|
2021-02-25 23:26:29 +03:00
|
|
|
if (nargs > MAX_BPF_FUNC_REG_ARGS) {
|
2021-03-25 04:51:36 +03:00
|
|
|
bpf_log(log, "Function %s has %d > %d args\n", func_name, nargs,
|
2021-02-25 23:26:29 +03:00
|
|
|
MAX_BPF_FUNC_REG_ARGS);
|
2021-03-25 04:51:36 +03:00
|
|
|
return -EINVAL;
|
2019-11-14 21:57:16 +03:00
|
|
|
}
|
2021-02-12 23:56:41 +03:00
|
|
|
|
2019-11-14 21:57:16 +03:00
|
|
|
/* check that BTF function arguments match actual types that the
|
|
|
|
* verifier sees.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < nargs; i++) {
|
2021-03-25 04:51:36 +03:00
|
|
|
u32 regno = i + 1;
|
|
|
|
struct bpf_reg_state *reg = ®s[regno];
|
2021-02-12 23:56:39 +03:00
|
|
|
|
2021-03-25 04:51:36 +03:00
|
|
|
t = btf_type_skip_modifiers(btf, args[i].type, NULL);
|
|
|
|
if (btf_type_is_scalar(t)) {
|
2021-02-12 23:56:39 +03:00
|
|
|
if (reg->type == SCALAR_VALUE)
|
2019-11-14 21:57:16 +03:00
|
|
|
continue;
|
2021-03-25 04:51:36 +03:00
|
|
|
bpf_log(log, "R%d is not a scalar\n", regno);
|
|
|
|
return -EINVAL;
|
2019-11-14 21:57:16 +03:00
|
|
|
}
|
2021-03-25 04:51:36 +03:00
|
|
|
|
|
|
|
if (!btf_type_is_ptr(t)) {
|
|
|
|
bpf_log(log, "Unrecognized arg#%d type %s\n",
|
|
|
|
i, btf_type_str(t));
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
bpf: Support bpf program calling kernel function
This patch adds support to BPF verifier to allow bpf program calling
kernel function directly.
The use case included in this set is to allow bpf-tcp-cc to directly
call some tcp-cc helper functions (e.g. "tcp_cong_avoid_ai()"). Those
functions have already been used by some kernel tcp-cc implementations.
This set will also allow the bpf-tcp-cc program to directly call the
kernel tcp-cc implementation, For example, a bpf_dctcp may only want to
implement its own dctcp_cwnd_event() and reuse other dctcp_*() directly
from the kernel tcp_dctcp.c instead of reimplementing (or
copy-and-pasting) them.
The tcp-cc kernel functions mentioned above will be white listed
for the struct_ops bpf-tcp-cc programs to use in a later patch.
The white listed functions are not bounded to a fixed ABI contract.
Those functions have already been used by the existing kernel tcp-cc.
If any of them has changed, both in-tree and out-of-tree kernel tcp-cc
implementations have to be changed. The same goes for the struct_ops
bpf-tcp-cc programs which have to be adjusted accordingly.
This patch is to make the required changes in the bpf verifier.
First change is in btf.c, it adds a case in "btf_check_func_arg_match()".
When the passed in "btf->kernel_btf == true", it means matching the
verifier regs' states with a kernel function. This will handle the
PTR_TO_BTF_ID reg. It also maps PTR_TO_SOCK_COMMON, PTR_TO_SOCKET,
and PTR_TO_TCP_SOCK to its kernel's btf_id.
In the later libbpf patch, the insn calling a kernel function will
look like:
insn->code == (BPF_JMP | BPF_CALL)
insn->src_reg == BPF_PSEUDO_KFUNC_CALL /* <- new in this patch */
insn->imm == func_btf_id /* btf_id of the running kernel */
[ For the future calling function-in-kernel-module support, an array
of module btf_fds can be passed at the load time and insn->off
can be used to index into this array. ]
At the early stage of verifier, the verifier will collect all kernel
function calls into "struct bpf_kfunc_desc". Those
descriptors are stored in "prog->aux->kfunc_tab" and will
be available to the JIT. Since this "add" operation is similar
to the current "add_subprog()" and looking for the same insn->code,
they are done together in the new "add_subprog_and_kfunc()".
In the "do_check()" stage, the new "check_kfunc_call()" is added
to verify the kernel function call instruction:
1. Ensure the kernel function can be used by a particular BPF_PROG_TYPE.
A new bpf_verifier_ops "check_kfunc_call" is added to do that.
The bpf-tcp-cc struct_ops program will implement this function in
a later patch.
2. Call "btf_check_kfunc_args_match()" to ensure the regs can be
used as the args of a kernel function.
3. Mark the regs' type, subreg_def, and zext_dst.
At the later do_misc_fixups() stage, the new fixup_kfunc_call()
will replace the insn->imm with the function address (relative
to __bpf_call_base). If needed, the jit can find the btf_func_model
by calling the new bpf_jit_find_kfunc_model(prog, insn).
With the imm set to the function address, "bpftool prog dump xlated"
will be able to display the kernel function calls the same way as
it displays other bpf helper calls.
gpl_compatible program is required to call kernel function.
This feature currently requires JIT.
The verifier selftests are adjusted because of the changes in
the verbose log in add_subprog_and_kfunc().
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210325015142.1544736-1-kafai@fb.com
2021-03-25 04:51:42 +03:00
|
|
|
ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id);
|
2021-03-25 04:51:36 +03:00
|
|
|
ref_tname = btf_name_by_offset(btf, ref_t->name_off);
|
bpf: Extend kfunc with PTR_TO_CTX, PTR_TO_MEM argument support
Allow passing PTR_TO_CTX, if the kfunc expects a matching struct type,
and punt to PTR_TO_MEM block if reg->type does not fall in one of
PTR_TO_BTF_ID or PTR_TO_SOCK* types. This will be used by future commits
to get access to XDP and TC PTR_TO_CTX, and pass various data (flags,
l4proto, netns_id, etc.) encoded in opts struct passed as pointer to
kfunc.
For PTR_TO_MEM support, arguments are currently limited to pointer to
scalar, or pointer to struct composed of scalars. This is done so that
unsafe scenarios (like passing PTR_TO_MEM where PTR_TO_BTF_ID of
in-kernel valid structure is expected, which may have pointers) are
avoided. Since the argument checking happens basd on argument register
type, it is not easy to ascertain what the expected type is. In the
future, support for PTR_TO_MEM for kfunc can be extended to serve other
usecases. The struct type whose pointer is passed in may have maximum
nesting depth of 4, all recursively composed of scalars or struct with
scalars.
Future commits will add negative tests that check whether these
restrictions imposed for kfunc arguments are duly rejected by BPF
verifier or not.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211217015031.1278167-4-memxor@gmail.com
2021-12-17 04:50:24 +03:00
|
|
|
if (btf_get_prog_ctx_type(log, btf, t,
|
|
|
|
env->prog->type, i)) {
|
|
|
|
/* If function expects ctx type in BTF check that caller
|
|
|
|
* is passing PTR_TO_CTX.
|
|
|
|
*/
|
|
|
|
if (reg->type != PTR_TO_CTX) {
|
|
|
|
bpf_log(log,
|
|
|
|
"arg#%d expected pointer to ctx, but got %s\n",
|
|
|
|
i, btf_type_str(t));
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2022-01-10 17:05:49 +03:00
|
|
|
if (check_ptr_off_reg(env, reg, regno))
|
bpf: Extend kfunc with PTR_TO_CTX, PTR_TO_MEM argument support
Allow passing PTR_TO_CTX, if the kfunc expects a matching struct type,
and punt to PTR_TO_MEM block if reg->type does not fall in one of
PTR_TO_BTF_ID or PTR_TO_SOCK* types. This will be used by future commits
to get access to XDP and TC PTR_TO_CTX, and pass various data (flags,
l4proto, netns_id, etc.) encoded in opts struct passed as pointer to
kfunc.
For PTR_TO_MEM support, arguments are currently limited to pointer to
scalar, or pointer to struct composed of scalars. This is done so that
unsafe scenarios (like passing PTR_TO_MEM where PTR_TO_BTF_ID of
in-kernel valid structure is expected, which may have pointers) are
avoided. Since the argument checking happens basd on argument register
type, it is not easy to ascertain what the expected type is. In the
future, support for PTR_TO_MEM for kfunc can be extended to serve other
usecases. The struct type whose pointer is passed in may have maximum
nesting depth of 4, all recursively composed of scalars or struct with
scalars.
Future commits will add negative tests that check whether these
restrictions imposed for kfunc arguments are duly rejected by BPF
verifier or not.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211217015031.1278167-4-memxor@gmail.com
2021-12-17 04:50:24 +03:00
|
|
|
return -EINVAL;
|
|
|
|
} else if (is_kfunc && (reg->type == PTR_TO_BTF_ID || reg2btf_ids[reg->type])) {
|
bpf: Support bpf program calling kernel function
This patch adds support to BPF verifier to allow bpf program calling
kernel function directly.
The use case included in this set is to allow bpf-tcp-cc to directly
call some tcp-cc helper functions (e.g. "tcp_cong_avoid_ai()"). Those
functions have already been used by some kernel tcp-cc implementations.
This set will also allow the bpf-tcp-cc program to directly call the
kernel tcp-cc implementation, For example, a bpf_dctcp may only want to
implement its own dctcp_cwnd_event() and reuse other dctcp_*() directly
from the kernel tcp_dctcp.c instead of reimplementing (or
copy-and-pasting) them.
The tcp-cc kernel functions mentioned above will be white listed
for the struct_ops bpf-tcp-cc programs to use in a later patch.
The white listed functions are not bounded to a fixed ABI contract.
Those functions have already been used by the existing kernel tcp-cc.
If any of them has changed, both in-tree and out-of-tree kernel tcp-cc
implementations have to be changed. The same goes for the struct_ops
bpf-tcp-cc programs which have to be adjusted accordingly.
This patch is to make the required changes in the bpf verifier.
First change is in btf.c, it adds a case in "btf_check_func_arg_match()".
When the passed in "btf->kernel_btf == true", it means matching the
verifier regs' states with a kernel function. This will handle the
PTR_TO_BTF_ID reg. It also maps PTR_TO_SOCK_COMMON, PTR_TO_SOCKET,
and PTR_TO_TCP_SOCK to its kernel's btf_id.
In the later libbpf patch, the insn calling a kernel function will
look like:
insn->code == (BPF_JMP | BPF_CALL)
insn->src_reg == BPF_PSEUDO_KFUNC_CALL /* <- new in this patch */
insn->imm == func_btf_id /* btf_id of the running kernel */
[ For the future calling function-in-kernel-module support, an array
of module btf_fds can be passed at the load time and insn->off
can be used to index into this array. ]
At the early stage of verifier, the verifier will collect all kernel
function calls into "struct bpf_kfunc_desc". Those
descriptors are stored in "prog->aux->kfunc_tab" and will
be available to the JIT. Since this "add" operation is similar
to the current "add_subprog()" and looking for the same insn->code,
they are done together in the new "add_subprog_and_kfunc()".
In the "do_check()" stage, the new "check_kfunc_call()" is added
to verify the kernel function call instruction:
1. Ensure the kernel function can be used by a particular BPF_PROG_TYPE.
A new bpf_verifier_ops "check_kfunc_call" is added to do that.
The bpf-tcp-cc struct_ops program will implement this function in
a later patch.
2. Call "btf_check_kfunc_args_match()" to ensure the regs can be
used as the args of a kernel function.
3. Mark the regs' type, subreg_def, and zext_dst.
At the later do_misc_fixups() stage, the new fixup_kfunc_call()
will replace the insn->imm with the function address (relative
to __bpf_call_base). If needed, the jit can find the btf_func_model
by calling the new bpf_jit_find_kfunc_model(prog, insn).
With the imm set to the function address, "bpftool prog dump xlated"
will be able to display the kernel function calls the same way as
it displays other bpf helper calls.
gpl_compatible program is required to call kernel function.
This feature currently requires JIT.
The verifier selftests are adjusted because of the changes in
the verbose log in add_subprog_and_kfunc().
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210325015142.1544736-1-kafai@fb.com
2021-03-25 04:51:42 +03:00
|
|
|
const struct btf_type *reg_ref_t;
|
|
|
|
const struct btf *reg_btf;
|
|
|
|
const char *reg_ref_tname;
|
|
|
|
u32 reg_ref_id;
|
|
|
|
|
|
|
|
if (!btf_type_is_struct(ref_t)) {
|
|
|
|
bpf_log(log, "kernel function %s args#%d pointer type %s %s is not supported\n",
|
|
|
|
func_name, i, btf_type_str(ref_t),
|
|
|
|
ref_tname);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (reg->type == PTR_TO_BTF_ID) {
|
|
|
|
reg_btf = reg->btf;
|
|
|
|
reg_ref_id = reg->btf_id;
|
bpf: Add reference tracking support to kfunc
This patch adds verifier support for PTR_TO_BTF_ID return type of kfunc
to be a reference, by reusing acquire_reference_state/release_reference
support for existing in-kernel bpf helpers.
We make use of the three kfunc types:
- BTF_KFUNC_TYPE_ACQUIRE
Return true if kfunc_btf_id is an acquire kfunc. This will
acquire_reference_state for the returned PTR_TO_BTF_ID (this is the
only allow return value). Note that acquire kfunc must always return a
PTR_TO_BTF_ID{_OR_NULL}, otherwise the program is rejected.
- BTF_KFUNC_TYPE_RELEASE
Return true if kfunc_btf_id is a release kfunc. This will release the
reference to the passed in PTR_TO_BTF_ID which has a reference state
(from earlier acquire kfunc).
The btf_check_func_arg_match returns the regno (of argument register,
hence > 0) if the kfunc is a release kfunc, and a proper referenced
PTR_TO_BTF_ID is being passed to it.
This is similar to how helper call check uses bpf_call_arg_meta to
store the ref_obj_id that is later used to release the reference.
Similar to in-kernel helper, we only allow passing one referenced
PTR_TO_BTF_ID as an argument. It can also be passed in to normal
kfunc, but in case of release kfunc there must always be one
PTR_TO_BTF_ID argument that is referenced.
- BTF_KFUNC_TYPE_RET_NULL
For kfunc returning PTR_TO_BTF_ID, tells if it can be NULL, hence
force caller to mark the pointer not null (using check) before
accessing it. Note that taking into account the case fixed by commit
93c230e3f5bd ("bpf: Enforce id generation for all may-be-null register type")
we assign a non-zero id for mark_ptr_or_null_reg logic. Later, if more
return types are supported by kfunc, which have a _OR_NULL variant, it
might be better to move this id generation under a common
reg_type_may_be_null check, similar to the case in the commit.
Referenced PTR_TO_BTF_ID is currently only limited to kfunc, but can be
extended in the future to other BPF helpers as well. For now, we can
rely on the btf_struct_ids_match check to ensure we get the pointer to
the expected struct type. In the future, care needs to be taken to avoid
ambiguity for reference PTR_TO_BTF_ID passed to release function, in
case multiple candidates can release same BTF ID.
e.g. there might be two release kfuncs (or kfunc and helper):
foo(struct abc *p);
bar(struct abc *p);
... such that both release a PTR_TO_BTF_ID with btf_id of struct abc. In
this case we would need to track the acquire function corresponding to
the release function to avoid type confusion, and store this information
in the register state so that an incorrect program can be rejected. This
is not a problem right now, hence it is left as an exercise for the
future patch introducing such a case in the kernel.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-6-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 19:39:48 +03:00
|
|
|
/* Ensure only one argument is referenced PTR_TO_BTF_ID */
|
|
|
|
if (reg->ref_obj_id) {
|
|
|
|
if (ref_obj_id) {
|
|
|
|
bpf_log(log, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
|
|
|
|
regno, reg->ref_obj_id, ref_obj_id);
|
|
|
|
return -EFAULT;
|
|
|
|
}
|
|
|
|
ref_regno = regno;
|
|
|
|
ref_obj_id = reg->ref_obj_id;
|
|
|
|
}
|
bpf: Extend kfunc with PTR_TO_CTX, PTR_TO_MEM argument support
Allow passing PTR_TO_CTX, if the kfunc expects a matching struct type,
and punt to PTR_TO_MEM block if reg->type does not fall in one of
PTR_TO_BTF_ID or PTR_TO_SOCK* types. This will be used by future commits
to get access to XDP and TC PTR_TO_CTX, and pass various data (flags,
l4proto, netns_id, etc.) encoded in opts struct passed as pointer to
kfunc.
For PTR_TO_MEM support, arguments are currently limited to pointer to
scalar, or pointer to struct composed of scalars. This is done so that
unsafe scenarios (like passing PTR_TO_MEM where PTR_TO_BTF_ID of
in-kernel valid structure is expected, which may have pointers) are
avoided. Since the argument checking happens basd on argument register
type, it is not easy to ascertain what the expected type is. In the
future, support for PTR_TO_MEM for kfunc can be extended to serve other
usecases. The struct type whose pointer is passed in may have maximum
nesting depth of 4, all recursively composed of scalars or struct with
scalars.
Future commits will add negative tests that check whether these
restrictions imposed for kfunc arguments are duly rejected by BPF
verifier or not.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211217015031.1278167-4-memxor@gmail.com
2021-12-17 04:50:24 +03:00
|
|
|
} else {
|
bpf: Support bpf program calling kernel function
This patch adds support to BPF verifier to allow bpf program calling
kernel function directly.
The use case included in this set is to allow bpf-tcp-cc to directly
call some tcp-cc helper functions (e.g. "tcp_cong_avoid_ai()"). Those
functions have already been used by some kernel tcp-cc implementations.
This set will also allow the bpf-tcp-cc program to directly call the
kernel tcp-cc implementation, For example, a bpf_dctcp may only want to
implement its own dctcp_cwnd_event() and reuse other dctcp_*() directly
from the kernel tcp_dctcp.c instead of reimplementing (or
copy-and-pasting) them.
The tcp-cc kernel functions mentioned above will be white listed
for the struct_ops bpf-tcp-cc programs to use in a later patch.
The white listed functions are not bounded to a fixed ABI contract.
Those functions have already been used by the existing kernel tcp-cc.
If any of them has changed, both in-tree and out-of-tree kernel tcp-cc
implementations have to be changed. The same goes for the struct_ops
bpf-tcp-cc programs which have to be adjusted accordingly.
This patch is to make the required changes in the bpf verifier.
First change is in btf.c, it adds a case in "btf_check_func_arg_match()".
When the passed in "btf->kernel_btf == true", it means matching the
verifier regs' states with a kernel function. This will handle the
PTR_TO_BTF_ID reg. It also maps PTR_TO_SOCK_COMMON, PTR_TO_SOCKET,
and PTR_TO_TCP_SOCK to its kernel's btf_id.
In the later libbpf patch, the insn calling a kernel function will
look like:
insn->code == (BPF_JMP | BPF_CALL)
insn->src_reg == BPF_PSEUDO_KFUNC_CALL /* <- new in this patch */
insn->imm == func_btf_id /* btf_id of the running kernel */
[ For the future calling function-in-kernel-module support, an array
of module btf_fds can be passed at the load time and insn->off
can be used to index into this array. ]
At the early stage of verifier, the verifier will collect all kernel
function calls into "struct bpf_kfunc_desc". Those
descriptors are stored in "prog->aux->kfunc_tab" and will
be available to the JIT. Since this "add" operation is similar
to the current "add_subprog()" and looking for the same insn->code,
they are done together in the new "add_subprog_and_kfunc()".
In the "do_check()" stage, the new "check_kfunc_call()" is added
to verify the kernel function call instruction:
1. Ensure the kernel function can be used by a particular BPF_PROG_TYPE.
A new bpf_verifier_ops "check_kfunc_call" is added to do that.
The bpf-tcp-cc struct_ops program will implement this function in
a later patch.
2. Call "btf_check_kfunc_args_match()" to ensure the regs can be
used as the args of a kernel function.
3. Mark the regs' type, subreg_def, and zext_dst.
At the later do_misc_fixups() stage, the new fixup_kfunc_call()
will replace the insn->imm with the function address (relative
to __bpf_call_base). If needed, the jit can find the btf_func_model
by calling the new bpf_jit_find_kfunc_model(prog, insn).
With the imm set to the function address, "bpftool prog dump xlated"
will be able to display the kernel function calls the same way as
it displays other bpf helper calls.
gpl_compatible program is required to call kernel function.
This feature currently requires JIT.
The verifier selftests are adjusted because of the changes in
the verbose log in add_subprog_and_kfunc().
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210325015142.1544736-1-kafai@fb.com
2021-03-25 04:51:42 +03:00
|
|
|
reg_btf = btf_vmlinux;
|
|
|
|
reg_ref_id = *reg2btf_ids[reg->type];
|
|
|
|
}
|
|
|
|
|
|
|
|
reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id,
|
|
|
|
®_ref_id);
|
|
|
|
reg_ref_tname = btf_name_by_offset(reg_btf,
|
|
|
|
reg_ref_t->name_off);
|
|
|
|
if (!btf_struct_ids_match(log, reg_btf, reg_ref_id,
|
|
|
|
reg->off, btf, ref_id)) {
|
|
|
|
bpf_log(log, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n",
|
|
|
|
func_name, i,
|
|
|
|
btf_type_str(ref_t), ref_tname,
|
|
|
|
regno, btf_type_str(reg_ref_t),
|
|
|
|
reg_ref_tname);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2021-03-25 04:51:36 +03:00
|
|
|
} else if (ptr_to_mem_ok) {
|
|
|
|
const struct btf_type *resolve_ret;
|
|
|
|
u32 type_size;
|
2021-02-12 23:56:41 +03:00
|
|
|
|
bpf: Extend kfunc with PTR_TO_CTX, PTR_TO_MEM argument support
Allow passing PTR_TO_CTX, if the kfunc expects a matching struct type,
and punt to PTR_TO_MEM block if reg->type does not fall in one of
PTR_TO_BTF_ID or PTR_TO_SOCK* types. This will be used by future commits
to get access to XDP and TC PTR_TO_CTX, and pass various data (flags,
l4proto, netns_id, etc.) encoded in opts struct passed as pointer to
kfunc.
For PTR_TO_MEM support, arguments are currently limited to pointer to
scalar, or pointer to struct composed of scalars. This is done so that
unsafe scenarios (like passing PTR_TO_MEM where PTR_TO_BTF_ID of
in-kernel valid structure is expected, which may have pointers) are
avoided. Since the argument checking happens basd on argument register
type, it is not easy to ascertain what the expected type is. In the
future, support for PTR_TO_MEM for kfunc can be extended to serve other
usecases. The struct type whose pointer is passed in may have maximum
nesting depth of 4, all recursively composed of scalars or struct with
scalars.
Future commits will add negative tests that check whether these
restrictions imposed for kfunc arguments are duly rejected by BPF
verifier or not.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211217015031.1278167-4-memxor@gmail.com
2021-12-17 04:50:24 +03:00
|
|
|
if (is_kfunc) {
|
bpf: Introduce mem, size argument pair support for kfunc
BPF helpers can associate two adjacent arguments together to pass memory
of certain size, using ARG_PTR_TO_MEM and ARG_CONST_SIZE arguments.
Since we don't use bpf_func_proto for kfunc, we need to leverage BTF to
implement similar support.
The ARG_CONST_SIZE processing for helpers is refactored into a common
check_mem_size_reg helper that is shared with kfunc as well. kfunc
ptr_to_mem support follows logic similar to global functions, where
verification is done as if pointer is not null, even when it may be
null.
This leads to a simple to follow rule for writing kfunc: always check
the argument pointer for NULL, except when it is PTR_TO_CTX. Also, the
PTR_TO_CTX case is also only safe when the helper expecting pointer to
program ctx is not exposed to other programs where same struct is not
ctx type. In that case, the type check will fall through to other cases
and would permit passing other types of pointers, possibly NULL at
runtime.
Currently, we require the size argument to be suffixed with "__sz" in
the parameter name. This information is then recorded in kernel BTF and
verified during function argument checking. In the future we can use BTF
tagging instead, and modify the kernel function definitions. This will
be a purely kernel-side change.
This allows us to have some form of backwards compatibility for
structures that are passed in to the kernel function with their size,
and allow variable length structures to be passed in if they are
accompanied by a size parameter.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 19:39:47 +03:00
|
|
|
bool arg_mem_size = i + 1 < nargs && is_kfunc_arg_mem_size(btf, &args[i + 1], ®s[regno + 1]);
|
|
|
|
|
bpf: Extend kfunc with PTR_TO_CTX, PTR_TO_MEM argument support
Allow passing PTR_TO_CTX, if the kfunc expects a matching struct type,
and punt to PTR_TO_MEM block if reg->type does not fall in one of
PTR_TO_BTF_ID or PTR_TO_SOCK* types. This will be used by future commits
to get access to XDP and TC PTR_TO_CTX, and pass various data (flags,
l4proto, netns_id, etc.) encoded in opts struct passed as pointer to
kfunc.
For PTR_TO_MEM support, arguments are currently limited to pointer to
scalar, or pointer to struct composed of scalars. This is done so that
unsafe scenarios (like passing PTR_TO_MEM where PTR_TO_BTF_ID of
in-kernel valid structure is expected, which may have pointers) are
avoided. Since the argument checking happens basd on argument register
type, it is not easy to ascertain what the expected type is. In the
future, support for PTR_TO_MEM for kfunc can be extended to serve other
usecases. The struct type whose pointer is passed in may have maximum
nesting depth of 4, all recursively composed of scalars or struct with
scalars.
Future commits will add negative tests that check whether these
restrictions imposed for kfunc arguments are duly rejected by BPF
verifier or not.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211217015031.1278167-4-memxor@gmail.com
2021-12-17 04:50:24 +03:00
|
|
|
/* Permit pointer to mem, but only when argument
|
|
|
|
* type is pointer to scalar, or struct composed
|
|
|
|
* (recursively) of scalars.
|
bpf: Introduce mem, size argument pair support for kfunc
BPF helpers can associate two adjacent arguments together to pass memory
of certain size, using ARG_PTR_TO_MEM and ARG_CONST_SIZE arguments.
Since we don't use bpf_func_proto for kfunc, we need to leverage BTF to
implement similar support.
The ARG_CONST_SIZE processing for helpers is refactored into a common
check_mem_size_reg helper that is shared with kfunc as well. kfunc
ptr_to_mem support follows logic similar to global functions, where
verification is done as if pointer is not null, even when it may be
null.
This leads to a simple to follow rule for writing kfunc: always check
the argument pointer for NULL, except when it is PTR_TO_CTX. Also, the
PTR_TO_CTX case is also only safe when the helper expecting pointer to
program ctx is not exposed to other programs where same struct is not
ctx type. In that case, the type check will fall through to other cases
and would permit passing other types of pointers, possibly NULL at
runtime.
Currently, we require the size argument to be suffixed with "__sz" in
the parameter name. This information is then recorded in kernel BTF and
verified during function argument checking. In the future we can use BTF
tagging instead, and modify the kernel function definitions. This will
be a purely kernel-side change.
This allows us to have some form of backwards compatibility for
structures that are passed in to the kernel function with their size,
and allow variable length structures to be passed in if they are
accompanied by a size parameter.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 19:39:47 +03:00
|
|
|
* When arg_mem_size is true, the pointer can be
|
|
|
|
* void *.
|
bpf: Extend kfunc with PTR_TO_CTX, PTR_TO_MEM argument support
Allow passing PTR_TO_CTX, if the kfunc expects a matching struct type,
and punt to PTR_TO_MEM block if reg->type does not fall in one of
PTR_TO_BTF_ID or PTR_TO_SOCK* types. This will be used by future commits
to get access to XDP and TC PTR_TO_CTX, and pass various data (flags,
l4proto, netns_id, etc.) encoded in opts struct passed as pointer to
kfunc.
For PTR_TO_MEM support, arguments are currently limited to pointer to
scalar, or pointer to struct composed of scalars. This is done so that
unsafe scenarios (like passing PTR_TO_MEM where PTR_TO_BTF_ID of
in-kernel valid structure is expected, which may have pointers) are
avoided. Since the argument checking happens basd on argument register
type, it is not easy to ascertain what the expected type is. In the
future, support for PTR_TO_MEM for kfunc can be extended to serve other
usecases. The struct type whose pointer is passed in may have maximum
nesting depth of 4, all recursively composed of scalars or struct with
scalars.
Future commits will add negative tests that check whether these
restrictions imposed for kfunc arguments are duly rejected by BPF
verifier or not.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211217015031.1278167-4-memxor@gmail.com
2021-12-17 04:50:24 +03:00
|
|
|
*/
|
|
|
|
if (!btf_type_is_scalar(ref_t) &&
|
bpf: Introduce mem, size argument pair support for kfunc
BPF helpers can associate two adjacent arguments together to pass memory
of certain size, using ARG_PTR_TO_MEM and ARG_CONST_SIZE arguments.
Since we don't use bpf_func_proto for kfunc, we need to leverage BTF to
implement similar support.
The ARG_CONST_SIZE processing for helpers is refactored into a common
check_mem_size_reg helper that is shared with kfunc as well. kfunc
ptr_to_mem support follows logic similar to global functions, where
verification is done as if pointer is not null, even when it may be
null.
This leads to a simple to follow rule for writing kfunc: always check
the argument pointer for NULL, except when it is PTR_TO_CTX. Also, the
PTR_TO_CTX case is also only safe when the helper expecting pointer to
program ctx is not exposed to other programs where same struct is not
ctx type. In that case, the type check will fall through to other cases
and would permit passing other types of pointers, possibly NULL at
runtime.
Currently, we require the size argument to be suffixed with "__sz" in
the parameter name. This information is then recorded in kernel BTF and
verified during function argument checking. In the future we can use BTF
tagging instead, and modify the kernel function definitions. This will
be a purely kernel-side change.
This allows us to have some form of backwards compatibility for
structures that are passed in to the kernel function with their size,
and allow variable length structures to be passed in if they are
accompanied by a size parameter.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 19:39:47 +03:00
|
|
|
!__btf_type_is_scalar_struct(log, btf, ref_t, 0) &&
|
|
|
|
(arg_mem_size ? !btf_type_is_void(ref_t) : 1)) {
|
bpf: Extend kfunc with PTR_TO_CTX, PTR_TO_MEM argument support
Allow passing PTR_TO_CTX, if the kfunc expects a matching struct type,
and punt to PTR_TO_MEM block if reg->type does not fall in one of
PTR_TO_BTF_ID or PTR_TO_SOCK* types. This will be used by future commits
to get access to XDP and TC PTR_TO_CTX, and pass various data (flags,
l4proto, netns_id, etc.) encoded in opts struct passed as pointer to
kfunc.
For PTR_TO_MEM support, arguments are currently limited to pointer to
scalar, or pointer to struct composed of scalars. This is done so that
unsafe scenarios (like passing PTR_TO_MEM where PTR_TO_BTF_ID of
in-kernel valid structure is expected, which may have pointers) are
avoided. Since the argument checking happens basd on argument register
type, it is not easy to ascertain what the expected type is. In the
future, support for PTR_TO_MEM for kfunc can be extended to serve other
usecases. The struct type whose pointer is passed in may have maximum
nesting depth of 4, all recursively composed of scalars or struct with
scalars.
Future commits will add negative tests that check whether these
restrictions imposed for kfunc arguments are duly rejected by BPF
verifier or not.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211217015031.1278167-4-memxor@gmail.com
2021-12-17 04:50:24 +03:00
|
|
|
bpf_log(log,
|
bpf: Introduce mem, size argument pair support for kfunc
BPF helpers can associate two adjacent arguments together to pass memory
of certain size, using ARG_PTR_TO_MEM and ARG_CONST_SIZE arguments.
Since we don't use bpf_func_proto for kfunc, we need to leverage BTF to
implement similar support.
The ARG_CONST_SIZE processing for helpers is refactored into a common
check_mem_size_reg helper that is shared with kfunc as well. kfunc
ptr_to_mem support follows logic similar to global functions, where
verification is done as if pointer is not null, even when it may be
null.
This leads to a simple to follow rule for writing kfunc: always check
the argument pointer for NULL, except when it is PTR_TO_CTX. Also, the
PTR_TO_CTX case is also only safe when the helper expecting pointer to
program ctx is not exposed to other programs where same struct is not
ctx type. In that case, the type check will fall through to other cases
and would permit passing other types of pointers, possibly NULL at
runtime.
Currently, we require the size argument to be suffixed with "__sz" in
the parameter name. This information is then recorded in kernel BTF and
verified during function argument checking. In the future we can use BTF
tagging instead, and modify the kernel function definitions. This will
be a purely kernel-side change.
This allows us to have some form of backwards compatibility for
structures that are passed in to the kernel function with their size,
and allow variable length structures to be passed in if they are
accompanied by a size parameter.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 19:39:47 +03:00
|
|
|
"arg#%d pointer type %s %s must point to %sscalar, or struct with scalar\n",
|
|
|
|
i, btf_type_str(ref_t), ref_tname, arg_mem_size ? "void, " : "");
|
bpf: Extend kfunc with PTR_TO_CTX, PTR_TO_MEM argument support
Allow passing PTR_TO_CTX, if the kfunc expects a matching struct type,
and punt to PTR_TO_MEM block if reg->type does not fall in one of
PTR_TO_BTF_ID or PTR_TO_SOCK* types. This will be used by future commits
to get access to XDP and TC PTR_TO_CTX, and pass various data (flags,
l4proto, netns_id, etc.) encoded in opts struct passed as pointer to
kfunc.
For PTR_TO_MEM support, arguments are currently limited to pointer to
scalar, or pointer to struct composed of scalars. This is done so that
unsafe scenarios (like passing PTR_TO_MEM where PTR_TO_BTF_ID of
in-kernel valid structure is expected, which may have pointers) are
avoided. Since the argument checking happens basd on argument register
type, it is not easy to ascertain what the expected type is. In the
future, support for PTR_TO_MEM for kfunc can be extended to serve other
usecases. The struct type whose pointer is passed in may have maximum
nesting depth of 4, all recursively composed of scalars or struct with
scalars.
Future commits will add negative tests that check whether these
restrictions imposed for kfunc arguments are duly rejected by BPF
verifier or not.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211217015031.1278167-4-memxor@gmail.com
2021-12-17 04:50:24 +03:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
bpf: Introduce mem, size argument pair support for kfunc
BPF helpers can associate two adjacent arguments together to pass memory
of certain size, using ARG_PTR_TO_MEM and ARG_CONST_SIZE arguments.
Since we don't use bpf_func_proto for kfunc, we need to leverage BTF to
implement similar support.
The ARG_CONST_SIZE processing for helpers is refactored into a common
check_mem_size_reg helper that is shared with kfunc as well. kfunc
ptr_to_mem support follows logic similar to global functions, where
verification is done as if pointer is not null, even when it may be
null.
This leads to a simple to follow rule for writing kfunc: always check
the argument pointer for NULL, except when it is PTR_TO_CTX. Also, the
PTR_TO_CTX case is also only safe when the helper expecting pointer to
program ctx is not exposed to other programs where same struct is not
ctx type. In that case, the type check will fall through to other cases
and would permit passing other types of pointers, possibly NULL at
runtime.
Currently, we require the size argument to be suffixed with "__sz" in
the parameter name. This information is then recorded in kernel BTF and
verified during function argument checking. In the future we can use BTF
tagging instead, and modify the kernel function definitions. This will
be a purely kernel-side change.
This allows us to have some form of backwards compatibility for
structures that are passed in to the kernel function with their size,
and allow variable length structures to be passed in if they are
accompanied by a size parameter.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 19:39:47 +03:00
|
|
|
|
|
|
|
/* Check for mem, len pair */
|
|
|
|
if (arg_mem_size) {
|
|
|
|
if (check_kfunc_mem_size_reg(env, ®s[regno + 1], regno + 1)) {
|
|
|
|
bpf_log(log, "arg#%d arg#%d memory, len pair leads to invalid memory access\n",
|
|
|
|
i, i + 1);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
i++;
|
|
|
|
continue;
|
|
|
|
}
|
bpf: Extend kfunc with PTR_TO_CTX, PTR_TO_MEM argument support
Allow passing PTR_TO_CTX, if the kfunc expects a matching struct type,
and punt to PTR_TO_MEM block if reg->type does not fall in one of
PTR_TO_BTF_ID or PTR_TO_SOCK* types. This will be used by future commits
to get access to XDP and TC PTR_TO_CTX, and pass various data (flags,
l4proto, netns_id, etc.) encoded in opts struct passed as pointer to
kfunc.
For PTR_TO_MEM support, arguments are currently limited to pointer to
scalar, or pointer to struct composed of scalars. This is done so that
unsafe scenarios (like passing PTR_TO_MEM where PTR_TO_BTF_ID of
in-kernel valid structure is expected, which may have pointers) are
avoided. Since the argument checking happens basd on argument register
type, it is not easy to ascertain what the expected type is. In the
future, support for PTR_TO_MEM for kfunc can be extended to serve other
usecases. The struct type whose pointer is passed in may have maximum
nesting depth of 4, all recursively composed of scalars or struct with
scalars.
Future commits will add negative tests that check whether these
restrictions imposed for kfunc arguments are duly rejected by BPF
verifier or not.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211217015031.1278167-4-memxor@gmail.com
2021-12-17 04:50:24 +03:00
|
|
|
}
|
|
|
|
|
2021-03-25 04:51:36 +03:00
|
|
|
resolve_ret = btf_resolve_size(btf, ref_t, &type_size);
|
|
|
|
if (IS_ERR(resolve_ret)) {
|
2021-02-12 23:56:41 +03:00
|
|
|
bpf_log(log,
|
2021-03-25 04:51:36 +03:00
|
|
|
"arg#%d reference type('%s %s') size cannot be determined: %ld\n",
|
|
|
|
i, btf_type_str(ref_t), ref_tname,
|
|
|
|
PTR_ERR(resolve_ret));
|
|
|
|
return -EINVAL;
|
2021-02-12 23:56:41 +03:00
|
|
|
}
|
|
|
|
|
2021-03-25 04:51:36 +03:00
|
|
|
if (check_mem_reg(env, reg, regno, type_size))
|
|
|
|
return -EINVAL;
|
|
|
|
} else {
|
bpf: Extend kfunc with PTR_TO_CTX, PTR_TO_MEM argument support
Allow passing PTR_TO_CTX, if the kfunc expects a matching struct type,
and punt to PTR_TO_MEM block if reg->type does not fall in one of
PTR_TO_BTF_ID or PTR_TO_SOCK* types. This will be used by future commits
to get access to XDP and TC PTR_TO_CTX, and pass various data (flags,
l4proto, netns_id, etc.) encoded in opts struct passed as pointer to
kfunc.
For PTR_TO_MEM support, arguments are currently limited to pointer to
scalar, or pointer to struct composed of scalars. This is done so that
unsafe scenarios (like passing PTR_TO_MEM where PTR_TO_BTF_ID of
in-kernel valid structure is expected, which may have pointers) are
avoided. Since the argument checking happens basd on argument register
type, it is not easy to ascertain what the expected type is. In the
future, support for PTR_TO_MEM for kfunc can be extended to serve other
usecases. The struct type whose pointer is passed in may have maximum
nesting depth of 4, all recursively composed of scalars or struct with
scalars.
Future commits will add negative tests that check whether these
restrictions imposed for kfunc arguments are duly rejected by BPF
verifier or not.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211217015031.1278167-4-memxor@gmail.com
2021-12-17 04:50:24 +03:00
|
|
|
bpf_log(log, "reg type unsupported for arg#%d %sfunction %s#%d\n", i,
|
|
|
|
is_kfunc ? "kernel " : "", func_name, func_id);
|
2021-03-25 04:51:36 +03:00
|
|
|
return -EINVAL;
|
2019-11-14 21:57:16 +03:00
|
|
|
}
|
|
|
|
}
|
2021-03-25 04:51:36 +03:00
|
|
|
|
bpf: Add reference tracking support to kfunc
This patch adds verifier support for PTR_TO_BTF_ID return type of kfunc
to be a reference, by reusing acquire_reference_state/release_reference
support for existing in-kernel bpf helpers.
We make use of the three kfunc types:
- BTF_KFUNC_TYPE_ACQUIRE
Return true if kfunc_btf_id is an acquire kfunc. This will
acquire_reference_state for the returned PTR_TO_BTF_ID (this is the
only allow return value). Note that acquire kfunc must always return a
PTR_TO_BTF_ID{_OR_NULL}, otherwise the program is rejected.
- BTF_KFUNC_TYPE_RELEASE
Return true if kfunc_btf_id is a release kfunc. This will release the
reference to the passed in PTR_TO_BTF_ID which has a reference state
(from earlier acquire kfunc).
The btf_check_func_arg_match returns the regno (of argument register,
hence > 0) if the kfunc is a release kfunc, and a proper referenced
PTR_TO_BTF_ID is being passed to it.
This is similar to how helper call check uses bpf_call_arg_meta to
store the ref_obj_id that is later used to release the reference.
Similar to in-kernel helper, we only allow passing one referenced
PTR_TO_BTF_ID as an argument. It can also be passed in to normal
kfunc, but in case of release kfunc there must always be one
PTR_TO_BTF_ID argument that is referenced.
- BTF_KFUNC_TYPE_RET_NULL
For kfunc returning PTR_TO_BTF_ID, tells if it can be NULL, hence
force caller to mark the pointer not null (using check) before
accessing it. Note that taking into account the case fixed by commit
93c230e3f5bd ("bpf: Enforce id generation for all may-be-null register type")
we assign a non-zero id for mark_ptr_or_null_reg logic. Later, if more
return types are supported by kfunc, which have a _OR_NULL variant, it
might be better to move this id generation under a common
reg_type_may_be_null check, similar to the case in the commit.
Referenced PTR_TO_BTF_ID is currently only limited to kfunc, but can be
extended in the future to other BPF helpers as well. For now, we can
rely on the btf_struct_ids_match check to ensure we get the pointer to
the expected struct type. In the future, care needs to be taken to avoid
ambiguity for reference PTR_TO_BTF_ID passed to release function, in
case multiple candidates can release same BTF ID.
e.g. there might be two release kfuncs (or kfunc and helper):
foo(struct abc *p);
bar(struct abc *p);
... such that both release a PTR_TO_BTF_ID with btf_id of struct abc. In
this case we would need to track the acquire function corresponding to
the release function to avoid type confusion, and store this information
in the register state so that an incorrect program can be rejected. This
is not a problem right now, hence it is left as an exercise for the
future patch introducing such a case in the kernel.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-6-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 19:39:48 +03:00
|
|
|
/* Either both are set, or neither */
|
|
|
|
WARN_ON_ONCE((ref_obj_id && !ref_regno) || (!ref_obj_id && ref_regno));
|
|
|
|
if (is_kfunc) {
|
|
|
|
rel = btf_kfunc_id_set_contains(btf, resolve_prog_type(env->prog),
|
|
|
|
BTF_KFUNC_TYPE_RELEASE, func_id);
|
|
|
|
/* We already made sure ref_obj_id is set only for one argument */
|
|
|
|
if (rel && !ref_obj_id) {
|
|
|
|
bpf_log(log, "release kernel function %s expects refcounted PTR_TO_BTF_ID\n",
|
|
|
|
func_name);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
/* Allow (!rel && ref_obj_id), so that passing such referenced PTR_TO_BTF_ID to
|
|
|
|
* other kfuncs works
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
/* returns argument register number > 0 in case of reference release kfunc */
|
|
|
|
return rel ? ref_regno : 0;
|
2021-03-25 04:51:36 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Compare BTF of a function with given bpf_reg_state.
|
|
|
|
* Returns:
|
|
|
|
* EFAULT - there is a verifier bug. Abort verification.
|
|
|
|
* EINVAL - there is a type mismatch or BTF is not available.
|
|
|
|
* 0 - BTF matches with what bpf_reg_state expects.
|
|
|
|
* Only PTR_TO_CTX and SCALAR_VALUE states are recognized.
|
|
|
|
*/
|
|
|
|
int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog,
|
|
|
|
struct bpf_reg_state *regs)
|
|
|
|
{
|
|
|
|
struct bpf_prog *prog = env->prog;
|
|
|
|
struct btf *btf = prog->aux->btf;
|
|
|
|
bool is_global;
|
|
|
|
u32 btf_id;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
if (!prog->aux->func_info)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
btf_id = prog->aux->func_info[subprog].type_id;
|
|
|
|
if (!btf_id)
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
if (prog->aux->func_info_aux[subprog].unreliable)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
is_global = prog->aux->func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
|
|
|
|
err = btf_check_func_arg_match(env, btf, btf_id, regs, is_global);
|
|
|
|
|
2020-01-10 09:41:20 +03:00
|
|
|
/* Compiler optimizations can remove arguments from static functions
|
|
|
|
* or mismatched type can be passed into a global function.
|
|
|
|
* In such cases mark the function as unreliable from BTF point of view.
|
|
|
|
*/
|
2021-03-25 04:51:36 +03:00
|
|
|
if (err)
|
|
|
|
prog->aux->func_info_aux[subprog].unreliable = true;
|
|
|
|
return err;
|
2020-01-10 09:41:20 +03:00
|
|
|
}
|
|
|
|
|
bpf: Support bpf program calling kernel function
This patch adds support to BPF verifier to allow bpf program calling
kernel function directly.
The use case included in this set is to allow bpf-tcp-cc to directly
call some tcp-cc helper functions (e.g. "tcp_cong_avoid_ai()"). Those
functions have already been used by some kernel tcp-cc implementations.
This set will also allow the bpf-tcp-cc program to directly call the
kernel tcp-cc implementation, For example, a bpf_dctcp may only want to
implement its own dctcp_cwnd_event() and reuse other dctcp_*() directly
from the kernel tcp_dctcp.c instead of reimplementing (or
copy-and-pasting) them.
The tcp-cc kernel functions mentioned above will be white listed
for the struct_ops bpf-tcp-cc programs to use in a later patch.
The white listed functions are not bounded to a fixed ABI contract.
Those functions have already been used by the existing kernel tcp-cc.
If any of them has changed, both in-tree and out-of-tree kernel tcp-cc
implementations have to be changed. The same goes for the struct_ops
bpf-tcp-cc programs which have to be adjusted accordingly.
This patch is to make the required changes in the bpf verifier.
First change is in btf.c, it adds a case in "btf_check_func_arg_match()".
When the passed in "btf->kernel_btf == true", it means matching the
verifier regs' states with a kernel function. This will handle the
PTR_TO_BTF_ID reg. It also maps PTR_TO_SOCK_COMMON, PTR_TO_SOCKET,
and PTR_TO_TCP_SOCK to its kernel's btf_id.
In the later libbpf patch, the insn calling a kernel function will
look like:
insn->code == (BPF_JMP | BPF_CALL)
insn->src_reg == BPF_PSEUDO_KFUNC_CALL /* <- new in this patch */
insn->imm == func_btf_id /* btf_id of the running kernel */
[ For the future calling function-in-kernel-module support, an array
of module btf_fds can be passed at the load time and insn->off
can be used to index into this array. ]
At the early stage of verifier, the verifier will collect all kernel
function calls into "struct bpf_kfunc_desc". Those
descriptors are stored in "prog->aux->kfunc_tab" and will
be available to the JIT. Since this "add" operation is similar
to the current "add_subprog()" and looking for the same insn->code,
they are done together in the new "add_subprog_and_kfunc()".
In the "do_check()" stage, the new "check_kfunc_call()" is added
to verify the kernel function call instruction:
1. Ensure the kernel function can be used by a particular BPF_PROG_TYPE.
A new bpf_verifier_ops "check_kfunc_call" is added to do that.
The bpf-tcp-cc struct_ops program will implement this function in
a later patch.
2. Call "btf_check_kfunc_args_match()" to ensure the regs can be
used as the args of a kernel function.
3. Mark the regs' type, subreg_def, and zext_dst.
At the later do_misc_fixups() stage, the new fixup_kfunc_call()
will replace the insn->imm with the function address (relative
to __bpf_call_base). If needed, the jit can find the btf_func_model
by calling the new bpf_jit_find_kfunc_model(prog, insn).
With the imm set to the function address, "bpftool prog dump xlated"
will be able to display the kernel function calls the same way as
it displays other bpf helper calls.
gpl_compatible program is required to call kernel function.
This feature currently requires JIT.
The verifier selftests are adjusted because of the changes in
the verbose log in add_subprog_and_kfunc().
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210325015142.1544736-1-kafai@fb.com
2021-03-25 04:51:42 +03:00
|
|
|
int btf_check_kfunc_arg_match(struct bpf_verifier_env *env,
|
|
|
|
const struct btf *btf, u32 func_id,
|
|
|
|
struct bpf_reg_state *regs)
|
|
|
|
{
|
bpf: Extend kfunc with PTR_TO_CTX, PTR_TO_MEM argument support
Allow passing PTR_TO_CTX, if the kfunc expects a matching struct type,
and punt to PTR_TO_MEM block if reg->type does not fall in one of
PTR_TO_BTF_ID or PTR_TO_SOCK* types. This will be used by future commits
to get access to XDP and TC PTR_TO_CTX, and pass various data (flags,
l4proto, netns_id, etc.) encoded in opts struct passed as pointer to
kfunc.
For PTR_TO_MEM support, arguments are currently limited to pointer to
scalar, or pointer to struct composed of scalars. This is done so that
unsafe scenarios (like passing PTR_TO_MEM where PTR_TO_BTF_ID of
in-kernel valid structure is expected, which may have pointers) are
avoided. Since the argument checking happens basd on argument register
type, it is not easy to ascertain what the expected type is. In the
future, support for PTR_TO_MEM for kfunc can be extended to serve other
usecases. The struct type whose pointer is passed in may have maximum
nesting depth of 4, all recursively composed of scalars or struct with
scalars.
Future commits will add negative tests that check whether these
restrictions imposed for kfunc arguments are duly rejected by BPF
verifier or not.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211217015031.1278167-4-memxor@gmail.com
2021-12-17 04:50:24 +03:00
|
|
|
return btf_check_func_arg_match(env, btf, func_id, regs, true);
|
bpf: Support bpf program calling kernel function
This patch adds support to BPF verifier to allow bpf program calling
kernel function directly.
The use case included in this set is to allow bpf-tcp-cc to directly
call some tcp-cc helper functions (e.g. "tcp_cong_avoid_ai()"). Those
functions have already been used by some kernel tcp-cc implementations.
This set will also allow the bpf-tcp-cc program to directly call the
kernel tcp-cc implementation, For example, a bpf_dctcp may only want to
implement its own dctcp_cwnd_event() and reuse other dctcp_*() directly
from the kernel tcp_dctcp.c instead of reimplementing (or
copy-and-pasting) them.
The tcp-cc kernel functions mentioned above will be white listed
for the struct_ops bpf-tcp-cc programs to use in a later patch.
The white listed functions are not bounded to a fixed ABI contract.
Those functions have already been used by the existing kernel tcp-cc.
If any of them has changed, both in-tree and out-of-tree kernel tcp-cc
implementations have to be changed. The same goes for the struct_ops
bpf-tcp-cc programs which have to be adjusted accordingly.
This patch is to make the required changes in the bpf verifier.
First change is in btf.c, it adds a case in "btf_check_func_arg_match()".
When the passed in "btf->kernel_btf == true", it means matching the
verifier regs' states with a kernel function. This will handle the
PTR_TO_BTF_ID reg. It also maps PTR_TO_SOCK_COMMON, PTR_TO_SOCKET,
and PTR_TO_TCP_SOCK to its kernel's btf_id.
In the later libbpf patch, the insn calling a kernel function will
look like:
insn->code == (BPF_JMP | BPF_CALL)
insn->src_reg == BPF_PSEUDO_KFUNC_CALL /* <- new in this patch */
insn->imm == func_btf_id /* btf_id of the running kernel */
[ For the future calling function-in-kernel-module support, an array
of module btf_fds can be passed at the load time and insn->off
can be used to index into this array. ]
At the early stage of verifier, the verifier will collect all kernel
function calls into "struct bpf_kfunc_desc". Those
descriptors are stored in "prog->aux->kfunc_tab" and will
be available to the JIT. Since this "add" operation is similar
to the current "add_subprog()" and looking for the same insn->code,
they are done together in the new "add_subprog_and_kfunc()".
In the "do_check()" stage, the new "check_kfunc_call()" is added
to verify the kernel function call instruction:
1. Ensure the kernel function can be used by a particular BPF_PROG_TYPE.
A new bpf_verifier_ops "check_kfunc_call" is added to do that.
The bpf-tcp-cc struct_ops program will implement this function in
a later patch.
2. Call "btf_check_kfunc_args_match()" to ensure the regs can be
used as the args of a kernel function.
3. Mark the regs' type, subreg_def, and zext_dst.
At the later do_misc_fixups() stage, the new fixup_kfunc_call()
will replace the insn->imm with the function address (relative
to __bpf_call_base). If needed, the jit can find the btf_func_model
by calling the new bpf_jit_find_kfunc_model(prog, insn).
With the imm set to the function address, "bpftool prog dump xlated"
will be able to display the kernel function calls the same way as
it displays other bpf helper calls.
gpl_compatible program is required to call kernel function.
This feature currently requires JIT.
The verifier selftests are adjusted because of the changes in
the verbose log in add_subprog_and_kfunc().
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210325015142.1544736-1-kafai@fb.com
2021-03-25 04:51:42 +03:00
|
|
|
}
|
|
|
|
|
2020-01-10 09:41:20 +03:00
|
|
|
/* Convert BTF of a function into bpf_reg_state if possible
|
|
|
|
* Returns:
|
|
|
|
* EFAULT - there is a verifier bug. Abort verification.
|
|
|
|
* EINVAL - cannot convert BTF.
|
|
|
|
* 0 - Successfully converted BTF into bpf_reg_state
|
|
|
|
* (either PTR_TO_CTX or SCALAR_VALUE).
|
|
|
|
*/
|
|
|
|
int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
|
2021-02-12 23:56:39 +03:00
|
|
|
struct bpf_reg_state *regs)
|
2020-01-10 09:41:20 +03:00
|
|
|
{
|
|
|
|
struct bpf_verifier_log *log = &env->log;
|
|
|
|
struct bpf_prog *prog = env->prog;
|
2020-01-21 03:53:46 +03:00
|
|
|
enum bpf_prog_type prog_type = prog->type;
|
2020-01-10 09:41:20 +03:00
|
|
|
struct btf *btf = prog->aux->btf;
|
|
|
|
const struct btf_param *args;
|
2021-02-12 23:56:41 +03:00
|
|
|
const struct btf_type *t, *ref_t;
|
2020-01-10 09:41:20 +03:00
|
|
|
u32 i, nargs, btf_id;
|
|
|
|
const char *tname;
|
|
|
|
|
|
|
|
if (!prog->aux->func_info ||
|
|
|
|
prog->aux->func_info_aux[subprog].linkage != BTF_FUNC_GLOBAL) {
|
|
|
|
bpf_log(log, "Verifier bug\n");
|
|
|
|
return -EFAULT;
|
|
|
|
}
|
|
|
|
|
|
|
|
btf_id = prog->aux->func_info[subprog].type_id;
|
|
|
|
if (!btf_id) {
|
|
|
|
bpf_log(log, "Global functions need valid BTF\n");
|
|
|
|
return -EFAULT;
|
|
|
|
}
|
|
|
|
|
|
|
|
t = btf_type_by_id(btf, btf_id);
|
|
|
|
if (!t || !btf_type_is_func(t)) {
|
|
|
|
/* These checks were already done by the verifier while loading
|
|
|
|
* struct bpf_func_info
|
|
|
|
*/
|
|
|
|
bpf_log(log, "BTF of func#%d doesn't point to KIND_FUNC\n",
|
|
|
|
subprog);
|
|
|
|
return -EFAULT;
|
|
|
|
}
|
|
|
|
tname = btf_name_by_offset(btf, t->name_off);
|
|
|
|
|
|
|
|
if (log->level & BPF_LOG_LEVEL)
|
|
|
|
bpf_log(log, "Validating %s() func#%d...\n",
|
|
|
|
tname, subprog);
|
|
|
|
|
|
|
|
if (prog->aux->func_info_aux[subprog].unreliable) {
|
|
|
|
bpf_log(log, "Verifier bug in function %s()\n", tname);
|
|
|
|
return -EFAULT;
|
|
|
|
}
|
2020-01-21 03:53:46 +03:00
|
|
|
if (prog_type == BPF_PROG_TYPE_EXT)
|
2020-09-29 15:45:50 +03:00
|
|
|
prog_type = prog->aux->dst_prog->type;
|
2020-01-10 09:41:20 +03:00
|
|
|
|
|
|
|
t = btf_type_by_id(btf, t->type);
|
|
|
|
if (!t || !btf_type_is_func_proto(t)) {
|
|
|
|
bpf_log(log, "Invalid type of function %s()\n", tname);
|
|
|
|
return -EFAULT;
|
|
|
|
}
|
|
|
|
args = (const struct btf_param *)(t + 1);
|
|
|
|
nargs = btf_type_vlen(t);
|
2021-02-25 23:26:29 +03:00
|
|
|
if (nargs > MAX_BPF_FUNC_REG_ARGS) {
|
|
|
|
bpf_log(log, "Global function %s() with %d > %d args. Buggy compiler.\n",
|
|
|
|
tname, nargs, MAX_BPF_FUNC_REG_ARGS);
|
2020-01-10 09:41:20 +03:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
/* check that function returns int */
|
|
|
|
t = btf_type_by_id(btf, t->type);
|
|
|
|
while (btf_type_is_modifier(t))
|
|
|
|
t = btf_type_by_id(btf, t->type);
|
|
|
|
if (!btf_type_is_int(t) && !btf_type_is_enum(t)) {
|
|
|
|
bpf_log(log,
|
|
|
|
"Global function %s() doesn't return scalar. Only those are supported.\n",
|
|
|
|
tname);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
/* Convert BTF function arguments into verifier types.
|
|
|
|
* Only PTR_TO_CTX and SCALAR are supported atm.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < nargs; i++) {
|
2021-02-12 23:56:39 +03:00
|
|
|
struct bpf_reg_state *reg = ®s[i + 1];
|
|
|
|
|
2020-01-10 09:41:20 +03:00
|
|
|
t = btf_type_by_id(btf, args[i].type);
|
|
|
|
while (btf_type_is_modifier(t))
|
|
|
|
t = btf_type_by_id(btf, t->type);
|
|
|
|
if (btf_type_is_int(t) || btf_type_is_enum(t)) {
|
2021-02-12 23:56:39 +03:00
|
|
|
reg->type = SCALAR_VALUE;
|
2020-01-10 09:41:20 +03:00
|
|
|
continue;
|
|
|
|
}
|
2021-02-12 23:56:41 +03:00
|
|
|
if (btf_type_is_ptr(t)) {
|
|
|
|
if (btf_get_prog_ctx_type(log, btf, t, prog_type, i)) {
|
|
|
|
reg->type = PTR_TO_CTX;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
t = btf_type_skip_modifiers(btf, t->type, NULL);
|
|
|
|
|
|
|
|
ref_t = btf_resolve_size(btf, t, ®->mem_size);
|
|
|
|
if (IS_ERR(ref_t)) {
|
|
|
|
bpf_log(log,
|
|
|
|
"arg#%d reference type('%s %s') size cannot be determined: %ld\n",
|
|
|
|
i, btf_type_str(t), btf_name_by_offset(btf, t->name_off),
|
|
|
|
PTR_ERR(ref_t));
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2021-12-17 03:31:49 +03:00
|
|
|
reg->type = PTR_TO_MEM | PTR_MAYBE_NULL;
|
2021-02-12 23:56:41 +03:00
|
|
|
reg->id = ++env->id_gen;
|
|
|
|
|
2020-01-10 09:41:20 +03:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
bpf_log(log, "Arg#%d type %s in %s() is not supported yet.\n",
|
|
|
|
i, btf_kind_str[BTF_INFO_KIND(t->info)], tname);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2019-11-14 21:57:16 +03:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-09-28 14:31:04 +03:00
|
|
|
static void btf_type_show(const struct btf *btf, u32 type_id, void *obj,
|
|
|
|
struct btf_show *show)
|
|
|
|
{
|
|
|
|
const struct btf_type *t = btf_type_by_id(btf, type_id);
|
|
|
|
|
|
|
|
show->btf = btf;
|
|
|
|
memset(&show->state, 0, sizeof(show->state));
|
|
|
|
memset(&show->obj, 0, sizeof(show->obj));
|
|
|
|
|
|
|
|
btf_type_ops(t)->show(btf, t, type_id, obj, 0, show);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void btf_seq_show(struct btf_show *show, const char *fmt,
|
|
|
|
va_list args)
|
|
|
|
{
|
|
|
|
seq_vprintf((struct seq_file *)show->target, fmt, args);
|
|
|
|
}
|
|
|
|
|
2020-09-28 14:31:09 +03:00
|
|
|
int btf_type_seq_show_flags(const struct btf *btf, u32 type_id,
|
|
|
|
void *obj, struct seq_file *m, u64 flags)
|
2020-09-28 14:31:04 +03:00
|
|
|
{
|
|
|
|
struct btf_show sseq;
|
|
|
|
|
|
|
|
sseq.target = m;
|
|
|
|
sseq.showfn = btf_seq_show;
|
|
|
|
sseq.flags = flags;
|
|
|
|
|
|
|
|
btf_type_show(btf, type_id, obj, &sseq);
|
|
|
|
|
|
|
|
return sseq.state.status;
|
|
|
|
}
|
|
|
|
|
2018-04-19 01:56:00 +03:00
|
|
|
void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj,
|
|
|
|
struct seq_file *m)
|
|
|
|
{
|
2020-09-28 14:31:04 +03:00
|
|
|
(void) btf_type_seq_show_flags(btf, type_id, obj, m,
|
|
|
|
BTF_SHOW_NONAME | BTF_SHOW_COMPACT |
|
|
|
|
BTF_SHOW_ZERO | BTF_SHOW_UNSAFE);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct btf_show_snprintf {
|
|
|
|
struct btf_show show;
|
|
|
|
int len_left; /* space left in string */
|
|
|
|
int len; /* length we would have written */
|
|
|
|
};
|
|
|
|
|
|
|
|
static void btf_snprintf_show(struct btf_show *show, const char *fmt,
|
|
|
|
va_list args)
|
|
|
|
{
|
|
|
|
struct btf_show_snprintf *ssnprintf = (struct btf_show_snprintf *)show;
|
|
|
|
int len;
|
|
|
|
|
|
|
|
len = vsnprintf(show->target, ssnprintf->len_left, fmt, args);
|
|
|
|
|
|
|
|
if (len < 0) {
|
|
|
|
ssnprintf->len_left = 0;
|
|
|
|
ssnprintf->len = len;
|
|
|
|
} else if (len > ssnprintf->len_left) {
|
|
|
|
/* no space, drive on to get length we would have written */
|
|
|
|
ssnprintf->len_left = 0;
|
|
|
|
ssnprintf->len += len;
|
|
|
|
} else {
|
|
|
|
ssnprintf->len_left -= len;
|
|
|
|
ssnprintf->len += len;
|
|
|
|
show->target += len;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int btf_type_snprintf_show(const struct btf *btf, u32 type_id, void *obj,
|
|
|
|
char *buf, int len, u64 flags)
|
|
|
|
{
|
|
|
|
struct btf_show_snprintf ssnprintf;
|
|
|
|
|
|
|
|
ssnprintf.show.target = buf;
|
|
|
|
ssnprintf.show.flags = flags;
|
|
|
|
ssnprintf.show.showfn = btf_snprintf_show;
|
|
|
|
ssnprintf.len_left = len;
|
|
|
|
ssnprintf.len = 0;
|
|
|
|
|
|
|
|
btf_type_show(btf, type_id, obj, (struct btf_show *)&ssnprintf);
|
|
|
|
|
|
|
|
/* If we encontered an error, return it. */
|
|
|
|
if (ssnprintf.show.state.status)
|
|
|
|
return ssnprintf.show.state.status;
|
2018-04-19 01:56:00 +03:00
|
|
|
|
2020-09-28 14:31:04 +03:00
|
|
|
/* Otherwise return length we would have written */
|
|
|
|
return ssnprintf.len;
|
2018-04-19 01:56:00 +03:00
|
|
|
}
|
2018-04-19 01:56:01 +03:00
|
|
|
|
2019-08-20 16:53:46 +03:00
|
|
|
#ifdef CONFIG_PROC_FS
|
|
|
|
static void bpf_btf_show_fdinfo(struct seq_file *m, struct file *filp)
|
|
|
|
{
|
|
|
|
const struct btf *btf = filp->private_data;
|
|
|
|
|
|
|
|
seq_printf(m, "btf_id:\t%u\n", btf->id);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2018-04-19 01:56:01 +03:00
|
|
|
static int btf_release(struct inode *inode, struct file *filp)
|
|
|
|
{
|
|
|
|
btf_put(filp->private_data);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-04-19 01:56:02 +03:00
|
|
|
const struct file_operations btf_fops = {
|
2019-08-20 16:53:46 +03:00
|
|
|
#ifdef CONFIG_PROC_FS
|
|
|
|
.show_fdinfo = bpf_btf_show_fdinfo,
|
|
|
|
#endif
|
2018-04-19 01:56:01 +03:00
|
|
|
.release = btf_release,
|
|
|
|
};
|
|
|
|
|
2018-05-05 00:49:51 +03:00
|
|
|
static int __btf_new_fd(struct btf *btf)
|
|
|
|
{
|
|
|
|
return anon_inode_getfd("btf", &btf_fops, btf, O_RDONLY | O_CLOEXEC);
|
|
|
|
}
|
|
|
|
|
2021-05-14 03:36:08 +03:00
|
|
|
int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr)
|
2018-04-19 01:56:01 +03:00
|
|
|
{
|
|
|
|
struct btf *btf;
|
2018-05-05 00:49:51 +03:00
|
|
|
int ret;
|
2018-04-19 01:56:01 +03:00
|
|
|
|
2021-05-14 03:36:08 +03:00
|
|
|
btf = btf_parse(make_bpfptr(attr->btf, uattr.is_kernel),
|
2018-04-19 01:56:01 +03:00
|
|
|
attr->btf_size, attr->btf_log_level,
|
|
|
|
u64_to_user_ptr(attr->btf_log_buf),
|
|
|
|
attr->btf_log_size);
|
|
|
|
if (IS_ERR(btf))
|
|
|
|
return PTR_ERR(btf);
|
|
|
|
|
2018-05-05 00:49:51 +03:00
|
|
|
ret = btf_alloc_id(btf);
|
|
|
|
if (ret) {
|
|
|
|
btf_free(btf);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The BTF ID is published to the userspace.
|
|
|
|
* All BTF free must go through call_rcu() from
|
|
|
|
* now on (i.e. free by calling btf_put()).
|
|
|
|
*/
|
|
|
|
|
|
|
|
ret = __btf_new_fd(btf);
|
|
|
|
if (ret < 0)
|
2018-04-19 01:56:01 +03:00
|
|
|
btf_put(btf);
|
|
|
|
|
2018-05-05 00:49:51 +03:00
|
|
|
return ret;
|
2018-04-19 01:56:01 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
struct btf *btf_get_by_fd(int fd)
|
|
|
|
{
|
|
|
|
struct btf *btf;
|
|
|
|
struct fd f;
|
|
|
|
|
|
|
|
f = fdget(fd);
|
|
|
|
|
|
|
|
if (!f.file)
|
|
|
|
return ERR_PTR(-EBADF);
|
|
|
|
|
|
|
|
if (f.file->f_op != &btf_fops) {
|
|
|
|
fdput(f);
|
|
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
}
|
|
|
|
|
|
|
|
btf = f.file->private_data;
|
2018-05-05 00:49:51 +03:00
|
|
|
refcount_inc(&btf->refcnt);
|
2018-04-19 01:56:01 +03:00
|
|
|
fdput(f);
|
|
|
|
|
|
|
|
return btf;
|
|
|
|
}
|
2018-04-19 01:56:02 +03:00
|
|
|
|
|
|
|
int btf_get_info_by_fd(const struct btf *btf,
|
|
|
|
const union bpf_attr *attr,
|
|
|
|
union bpf_attr __user *uattr)
|
|
|
|
{
|
2018-05-05 00:49:52 +03:00
|
|
|
struct bpf_btf_info __user *uinfo;
|
2020-03-20 19:22:58 +03:00
|
|
|
struct bpf_btf_info info;
|
2018-05-05 00:49:52 +03:00
|
|
|
u32 info_copy, btf_copy;
|
|
|
|
void __user *ubtf;
|
2020-11-10 04:19:29 +03:00
|
|
|
char __user *uname;
|
|
|
|
u32 uinfo_len, uname_len, name_len;
|
|
|
|
int ret = 0;
|
2018-04-19 01:56:02 +03:00
|
|
|
|
2018-05-05 00:49:52 +03:00
|
|
|
uinfo = u64_to_user_ptr(attr->info.info);
|
|
|
|
uinfo_len = attr->info.info_len;
|
|
|
|
|
|
|
|
info_copy = min_t(u32, uinfo_len, sizeof(info));
|
2020-03-20 19:22:58 +03:00
|
|
|
memset(&info, 0, sizeof(info));
|
2018-05-05 00:49:52 +03:00
|
|
|
if (copy_from_user(&info, uinfo, info_copy))
|
|
|
|
return -EFAULT;
|
|
|
|
|
|
|
|
info.id = btf->id;
|
|
|
|
ubtf = u64_to_user_ptr(info.btf);
|
|
|
|
btf_copy = min_t(u32, btf->data_size, info.btf_size);
|
|
|
|
if (copy_to_user(ubtf, btf->data, btf_copy))
|
|
|
|
return -EFAULT;
|
|
|
|
info.btf_size = btf->data_size;
|
|
|
|
|
2020-11-10 04:19:29 +03:00
|
|
|
info.kernel_btf = btf->kernel_btf;
|
|
|
|
|
|
|
|
uname = u64_to_user_ptr(info.name);
|
|
|
|
uname_len = info.name_len;
|
|
|
|
if (!uname ^ !uname_len)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
name_len = strlen(btf->name);
|
|
|
|
info.name_len = name_len;
|
|
|
|
|
|
|
|
if (uname) {
|
|
|
|
if (uname_len >= name_len + 1) {
|
|
|
|
if (copy_to_user(uname, btf->name, name_len + 1))
|
|
|
|
return -EFAULT;
|
|
|
|
} else {
|
|
|
|
char zero = '\0';
|
|
|
|
|
|
|
|
if (copy_to_user(uname, btf->name, uname_len - 1))
|
|
|
|
return -EFAULT;
|
|
|
|
if (put_user(zero, uname + uname_len - 1))
|
|
|
|
return -EFAULT;
|
|
|
|
/* let user-space know about too short buffer */
|
|
|
|
ret = -ENOSPC;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-05-05 00:49:52 +03:00
|
|
|
if (copy_to_user(uinfo, &info, info_copy) ||
|
|
|
|
put_user(info_copy, &uattr->info.info_len))
|
2018-04-19 01:56:02 +03:00
|
|
|
return -EFAULT;
|
|
|
|
|
2020-11-10 04:19:29 +03:00
|
|
|
return ret;
|
2018-04-19 01:56:02 +03:00
|
|
|
}
|
2018-05-05 00:49:51 +03:00
|
|
|
|
|
|
|
int btf_get_fd_by_id(u32 id)
|
|
|
|
{
|
|
|
|
struct btf *btf;
|
|
|
|
int fd;
|
|
|
|
|
|
|
|
rcu_read_lock();
|
|
|
|
btf = idr_find(&btf_idr, id);
|
|
|
|
if (!btf || !refcount_inc_not_zero(&btf->refcnt))
|
|
|
|
btf = ERR_PTR(-ENOENT);
|
|
|
|
rcu_read_unlock();
|
|
|
|
|
|
|
|
if (IS_ERR(btf))
|
|
|
|
return PTR_ERR(btf);
|
|
|
|
|
|
|
|
fd = __btf_new_fd(btf);
|
|
|
|
if (fd < 0)
|
|
|
|
btf_put(btf);
|
|
|
|
|
|
|
|
return fd;
|
|
|
|
}
|
|
|
|
|
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-03 23:46:29 +03:00
|
|
|
u32 btf_obj_id(const struct btf *btf)
|
2018-05-05 00:49:51 +03:00
|
|
|
{
|
|
|
|
return btf->id;
|
|
|
|
}
|
2020-08-25 22:21:19 +03:00
|
|
|
|
2020-12-03 23:46:30 +03:00
|
|
|
bool btf_is_kernel(const struct btf *btf)
|
|
|
|
{
|
|
|
|
return btf->kernel_btf;
|
|
|
|
}
|
|
|
|
|
2021-01-12 10:55:18 +03:00
|
|
|
bool btf_is_module(const struct btf *btf)
|
|
|
|
{
|
|
|
|
return btf->kernel_btf && strcmp(btf->name, "vmlinux") != 0;
|
|
|
|
}
|
|
|
|
|
2020-08-25 22:21:19 +03:00
|
|
|
static int btf_id_cmp_func(const void *a, const void *b)
|
|
|
|
{
|
|
|
|
const int *pa = a, *pb = b;
|
|
|
|
|
|
|
|
return *pa - *pb;
|
|
|
|
}
|
|
|
|
|
2020-09-21 15:12:17 +03:00
|
|
|
bool btf_id_set_contains(const struct btf_id_set *set, u32 id)
|
2020-08-25 22:21:19 +03:00
|
|
|
{
|
|
|
|
return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL;
|
|
|
|
}
|
2020-11-10 04:19:31 +03:00
|
|
|
|
bpf: Fix UAF due to race between btf_try_get_module and load_module
While working on code to populate kfunc BTF ID sets for module BTF from
its initcall, I noticed that by the time the initcall is invoked, the
module BTF can already be seen by userspace (and the BPF verifier). The
existing btf_try_get_module calls try_module_get which only fails if
mod->state == MODULE_STATE_GOING, i.e. it can increment module reference
when module initcall is happening in parallel.
Currently, BTF parsing happens from MODULE_STATE_COMING notifier
callback. At this point, the module initcalls have not been invoked.
The notifier callback parses and prepares the module BTF, allocates an
ID, which publishes it to userspace, and then adds it to the btf_modules
list allowing the kernel to invoke btf_try_get_module for the BTF.
However, at this point, the module has not been fully initialized (i.e.
its initcalls have not finished). The code in module.c can still fail
and free the module, without caring for other users. However, nothing
stops btf_try_get_module from succeeding between the state transition
from MODULE_STATE_COMING to MODULE_STATE_LIVE.
This leads to a use-after-free issue when BPF program loads
successfully in the state transition, load_module's do_init_module call
fails and frees the module, and BPF program fd on close calls module_put
for the freed module. Future patch has test case to verify we don't
regress in this area in future.
There are multiple points after prepare_coming_module (in load_module)
where failure can occur and module loading can return error. We
illustrate and test for the race using the last point where it can
practically occur (in module __init function).
An illustration of the race:
CPU 0 CPU 1
load_module
notifier_call(MODULE_STATE_COMING)
btf_parse_module
btf_alloc_id // Published to userspace
list_add(&btf_mod->list, btf_modules)
mod->init(...)
... ^
bpf_check |
check_pseudo_btf_id |
btf_try_get_module |
returns true | ...
... | module __init in progress
return prog_fd | ...
... V
if (ret < 0)
free_module(mod)
...
close(prog_fd)
...
bpf_prog_free_deferred
module_put(used_btf.mod) // use-after-free
We fix this issue by setting a flag BTF_MODULE_F_LIVE, from the notifier
callback when MODULE_STATE_LIVE state is reached for the module, so that
we return NULL from btf_try_get_module for modules that are not fully
formed. Since try_module_get already checks that module is not in
MODULE_STATE_GOING state, and that is the only transition a live module
can make before being removed from btf_modules list, this is enough to
close the race and prevent the bug.
A later selftest patch crafts the race condition artifically to verify
that it has been fixed, and that verifier fails to load program (with
ENXIO).
Lastly, a couple of comments:
1. Even if this race didn't exist, it seems more appropriate to only
access resources (ksyms and kfuncs) of a fully formed module which
has been initialized completely.
2. This patch was born out of need for synchronization against module
initcall for the next patch, so it is needed for correctness even
without the aforementioned race condition. The BTF resources
initialized by module initcall are set up once and then only looked
up, so just waiting until the initcall has finished ensures correct
behavior.
Fixes: 541c3bad8dc5 ("bpf: Support BPF ksym variables in kernel modules")
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-2-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 19:39:44 +03:00
|
|
|
enum {
|
|
|
|
BTF_MODULE_F_LIVE = (1 << 0),
|
|
|
|
};
|
|
|
|
|
2020-11-10 04:19:31 +03:00
|
|
|
#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
|
|
|
|
struct btf_module {
|
|
|
|
struct list_head list;
|
|
|
|
struct module *module;
|
|
|
|
struct btf *btf;
|
|
|
|
struct bin_attribute *sysfs_attr;
|
bpf: Fix UAF due to race between btf_try_get_module and load_module
While working on code to populate kfunc BTF ID sets for module BTF from
its initcall, I noticed that by the time the initcall is invoked, the
module BTF can already be seen by userspace (and the BPF verifier). The
existing btf_try_get_module calls try_module_get which only fails if
mod->state == MODULE_STATE_GOING, i.e. it can increment module reference
when module initcall is happening in parallel.
Currently, BTF parsing happens from MODULE_STATE_COMING notifier
callback. At this point, the module initcalls have not been invoked.
The notifier callback parses and prepares the module BTF, allocates an
ID, which publishes it to userspace, and then adds it to the btf_modules
list allowing the kernel to invoke btf_try_get_module for the BTF.
However, at this point, the module has not been fully initialized (i.e.
its initcalls have not finished). The code in module.c can still fail
and free the module, without caring for other users. However, nothing
stops btf_try_get_module from succeeding between the state transition
from MODULE_STATE_COMING to MODULE_STATE_LIVE.
This leads to a use-after-free issue when BPF program loads
successfully in the state transition, load_module's do_init_module call
fails and frees the module, and BPF program fd on close calls module_put
for the freed module. Future patch has test case to verify we don't
regress in this area in future.
There are multiple points after prepare_coming_module (in load_module)
where failure can occur and module loading can return error. We
illustrate and test for the race using the last point where it can
practically occur (in module __init function).
An illustration of the race:
CPU 0 CPU 1
load_module
notifier_call(MODULE_STATE_COMING)
btf_parse_module
btf_alloc_id // Published to userspace
list_add(&btf_mod->list, btf_modules)
mod->init(...)
... ^
bpf_check |
check_pseudo_btf_id |
btf_try_get_module |
returns true | ...
... | module __init in progress
return prog_fd | ...
... V
if (ret < 0)
free_module(mod)
...
close(prog_fd)
...
bpf_prog_free_deferred
module_put(used_btf.mod) // use-after-free
We fix this issue by setting a flag BTF_MODULE_F_LIVE, from the notifier
callback when MODULE_STATE_LIVE state is reached for the module, so that
we return NULL from btf_try_get_module for modules that are not fully
formed. Since try_module_get already checks that module is not in
MODULE_STATE_GOING state, and that is the only transition a live module
can make before being removed from btf_modules list, this is enough to
close the race and prevent the bug.
A later selftest patch crafts the race condition artifically to verify
that it has been fixed, and that verifier fails to load program (with
ENXIO).
Lastly, a couple of comments:
1. Even if this race didn't exist, it seems more appropriate to only
access resources (ksyms and kfuncs) of a fully formed module which
has been initialized completely.
2. This patch was born out of need for synchronization against module
initcall for the next patch, so it is needed for correctness even
without the aforementioned race condition. The BTF resources
initialized by module initcall are set up once and then only looked
up, so just waiting until the initcall has finished ensures correct
behavior.
Fixes: 541c3bad8dc5 ("bpf: Support BPF ksym variables in kernel modules")
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-2-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 19:39:44 +03:00
|
|
|
int flags;
|
2020-11-10 04:19:31 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
static LIST_HEAD(btf_modules);
|
|
|
|
static DEFINE_MUTEX(btf_module_mutex);
|
|
|
|
|
|
|
|
static ssize_t
|
|
|
|
btf_module_read(struct file *file, struct kobject *kobj,
|
|
|
|
struct bin_attribute *bin_attr,
|
|
|
|
char *buf, loff_t off, size_t len)
|
|
|
|
{
|
|
|
|
const struct btf *btf = bin_attr->private;
|
|
|
|
|
|
|
|
memcpy(buf, btf->data + off, len);
|
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
2021-12-01 21:10:31 +03:00
|
|
|
static void purge_cand_cache(struct btf *btf);
|
|
|
|
|
2020-11-10 04:19:31 +03:00
|
|
|
static int btf_module_notify(struct notifier_block *nb, unsigned long op,
|
|
|
|
void *module)
|
|
|
|
{
|
|
|
|
struct btf_module *btf_mod, *tmp;
|
|
|
|
struct module *mod = module;
|
|
|
|
struct btf *btf;
|
|
|
|
int err = 0;
|
|
|
|
|
|
|
|
if (mod->btf_data_size == 0 ||
|
bpf: Fix UAF due to race between btf_try_get_module and load_module
While working on code to populate kfunc BTF ID sets for module BTF from
its initcall, I noticed that by the time the initcall is invoked, the
module BTF can already be seen by userspace (and the BPF verifier). The
existing btf_try_get_module calls try_module_get which only fails if
mod->state == MODULE_STATE_GOING, i.e. it can increment module reference
when module initcall is happening in parallel.
Currently, BTF parsing happens from MODULE_STATE_COMING notifier
callback. At this point, the module initcalls have not been invoked.
The notifier callback parses and prepares the module BTF, allocates an
ID, which publishes it to userspace, and then adds it to the btf_modules
list allowing the kernel to invoke btf_try_get_module for the BTF.
However, at this point, the module has not been fully initialized (i.e.
its initcalls have not finished). The code in module.c can still fail
and free the module, without caring for other users. However, nothing
stops btf_try_get_module from succeeding between the state transition
from MODULE_STATE_COMING to MODULE_STATE_LIVE.
This leads to a use-after-free issue when BPF program loads
successfully in the state transition, load_module's do_init_module call
fails and frees the module, and BPF program fd on close calls module_put
for the freed module. Future patch has test case to verify we don't
regress in this area in future.
There are multiple points after prepare_coming_module (in load_module)
where failure can occur and module loading can return error. We
illustrate and test for the race using the last point where it can
practically occur (in module __init function).
An illustration of the race:
CPU 0 CPU 1
load_module
notifier_call(MODULE_STATE_COMING)
btf_parse_module
btf_alloc_id // Published to userspace
list_add(&btf_mod->list, btf_modules)
mod->init(...)
... ^
bpf_check |
check_pseudo_btf_id |
btf_try_get_module |
returns true | ...
... | module __init in progress
return prog_fd | ...
... V
if (ret < 0)
free_module(mod)
...
close(prog_fd)
...
bpf_prog_free_deferred
module_put(used_btf.mod) // use-after-free
We fix this issue by setting a flag BTF_MODULE_F_LIVE, from the notifier
callback when MODULE_STATE_LIVE state is reached for the module, so that
we return NULL from btf_try_get_module for modules that are not fully
formed. Since try_module_get already checks that module is not in
MODULE_STATE_GOING state, and that is the only transition a live module
can make before being removed from btf_modules list, this is enough to
close the race and prevent the bug.
A later selftest patch crafts the race condition artifically to verify
that it has been fixed, and that verifier fails to load program (with
ENXIO).
Lastly, a couple of comments:
1. Even if this race didn't exist, it seems more appropriate to only
access resources (ksyms and kfuncs) of a fully formed module which
has been initialized completely.
2. This patch was born out of need for synchronization against module
initcall for the next patch, so it is needed for correctness even
without the aforementioned race condition. The BTF resources
initialized by module initcall are set up once and then only looked
up, so just waiting until the initcall has finished ensures correct
behavior.
Fixes: 541c3bad8dc5 ("bpf: Support BPF ksym variables in kernel modules")
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-2-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 19:39:44 +03:00
|
|
|
(op != MODULE_STATE_COMING && op != MODULE_STATE_LIVE &&
|
|
|
|
op != MODULE_STATE_GOING))
|
2020-11-10 04:19:31 +03:00
|
|
|
goto out;
|
|
|
|
|
|
|
|
switch (op) {
|
|
|
|
case MODULE_STATE_COMING:
|
|
|
|
btf_mod = kzalloc(sizeof(*btf_mod), GFP_KERNEL);
|
|
|
|
if (!btf_mod) {
|
|
|
|
err = -ENOMEM;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
btf = btf_parse_module(mod->name, mod->btf_data, mod->btf_data_size);
|
|
|
|
if (IS_ERR(btf)) {
|
|
|
|
pr_warn("failed to validate module [%s] BTF: %ld\n",
|
|
|
|
mod->name, PTR_ERR(btf));
|
|
|
|
kfree(btf_mod);
|
|
|
|
err = PTR_ERR(btf);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
err = btf_alloc_id(btf);
|
|
|
|
if (err) {
|
|
|
|
btf_free(btf);
|
|
|
|
kfree(btf_mod);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2021-12-01 21:10:31 +03:00
|
|
|
purge_cand_cache(NULL);
|
2020-11-10 04:19:31 +03:00
|
|
|
mutex_lock(&btf_module_mutex);
|
|
|
|
btf_mod->module = module;
|
|
|
|
btf_mod->btf = btf;
|
|
|
|
list_add(&btf_mod->list, &btf_modules);
|
|
|
|
mutex_unlock(&btf_module_mutex);
|
|
|
|
|
|
|
|
if (IS_ENABLED(CONFIG_SYSFS)) {
|
|
|
|
struct bin_attribute *attr;
|
|
|
|
|
|
|
|
attr = kzalloc(sizeof(*attr), GFP_KERNEL);
|
|
|
|
if (!attr)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
sysfs_bin_attr_init(attr);
|
|
|
|
attr->attr.name = btf->name;
|
|
|
|
attr->attr.mode = 0444;
|
|
|
|
attr->size = btf->data_size;
|
|
|
|
attr->private = btf;
|
|
|
|
attr->read = btf_module_read;
|
|
|
|
|
|
|
|
err = sysfs_create_bin_file(btf_kobj, attr);
|
|
|
|
if (err) {
|
|
|
|
pr_warn("failed to register module [%s] BTF in sysfs: %d\n",
|
|
|
|
mod->name, err);
|
|
|
|
kfree(attr);
|
|
|
|
err = 0;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
btf_mod->sysfs_attr = attr;
|
|
|
|
}
|
|
|
|
|
bpf: Fix UAF due to race between btf_try_get_module and load_module
While working on code to populate kfunc BTF ID sets for module BTF from
its initcall, I noticed that by the time the initcall is invoked, the
module BTF can already be seen by userspace (and the BPF verifier). The
existing btf_try_get_module calls try_module_get which only fails if
mod->state == MODULE_STATE_GOING, i.e. it can increment module reference
when module initcall is happening in parallel.
Currently, BTF parsing happens from MODULE_STATE_COMING notifier
callback. At this point, the module initcalls have not been invoked.
The notifier callback parses and prepares the module BTF, allocates an
ID, which publishes it to userspace, and then adds it to the btf_modules
list allowing the kernel to invoke btf_try_get_module for the BTF.
However, at this point, the module has not been fully initialized (i.e.
its initcalls have not finished). The code in module.c can still fail
and free the module, without caring for other users. However, nothing
stops btf_try_get_module from succeeding between the state transition
from MODULE_STATE_COMING to MODULE_STATE_LIVE.
This leads to a use-after-free issue when BPF program loads
successfully in the state transition, load_module's do_init_module call
fails and frees the module, and BPF program fd on close calls module_put
for the freed module. Future patch has test case to verify we don't
regress in this area in future.
There are multiple points after prepare_coming_module (in load_module)
where failure can occur and module loading can return error. We
illustrate and test for the race using the last point where it can
practically occur (in module __init function).
An illustration of the race:
CPU 0 CPU 1
load_module
notifier_call(MODULE_STATE_COMING)
btf_parse_module
btf_alloc_id // Published to userspace
list_add(&btf_mod->list, btf_modules)
mod->init(...)
... ^
bpf_check |
check_pseudo_btf_id |
btf_try_get_module |
returns true | ...
... | module __init in progress
return prog_fd | ...
... V
if (ret < 0)
free_module(mod)
...
close(prog_fd)
...
bpf_prog_free_deferred
module_put(used_btf.mod) // use-after-free
We fix this issue by setting a flag BTF_MODULE_F_LIVE, from the notifier
callback when MODULE_STATE_LIVE state is reached for the module, so that
we return NULL from btf_try_get_module for modules that are not fully
formed. Since try_module_get already checks that module is not in
MODULE_STATE_GOING state, and that is the only transition a live module
can make before being removed from btf_modules list, this is enough to
close the race and prevent the bug.
A later selftest patch crafts the race condition artifically to verify
that it has been fixed, and that verifier fails to load program (with
ENXIO).
Lastly, a couple of comments:
1. Even if this race didn't exist, it seems more appropriate to only
access resources (ksyms and kfuncs) of a fully formed module which
has been initialized completely.
2. This patch was born out of need for synchronization against module
initcall for the next patch, so it is needed for correctness even
without the aforementioned race condition. The BTF resources
initialized by module initcall are set up once and then only looked
up, so just waiting until the initcall has finished ensures correct
behavior.
Fixes: 541c3bad8dc5 ("bpf: Support BPF ksym variables in kernel modules")
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-2-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 19:39:44 +03:00
|
|
|
break;
|
|
|
|
case MODULE_STATE_LIVE:
|
|
|
|
mutex_lock(&btf_module_mutex);
|
|
|
|
list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) {
|
|
|
|
if (btf_mod->module != module)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
btf_mod->flags |= BTF_MODULE_F_LIVE;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
mutex_unlock(&btf_module_mutex);
|
2020-11-10 04:19:31 +03:00
|
|
|
break;
|
|
|
|
case MODULE_STATE_GOING:
|
|
|
|
mutex_lock(&btf_module_mutex);
|
|
|
|
list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) {
|
|
|
|
if (btf_mod->module != module)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
list_del(&btf_mod->list);
|
|
|
|
if (btf_mod->sysfs_attr)
|
|
|
|
sysfs_remove_bin_file(btf_kobj, btf_mod->sysfs_attr);
|
2021-12-01 21:10:31 +03:00
|
|
|
purge_cand_cache(btf_mod->btf);
|
2020-11-10 04:19:31 +03:00
|
|
|
btf_put(btf_mod->btf);
|
|
|
|
kfree(btf_mod->sysfs_attr);
|
|
|
|
kfree(btf_mod);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
mutex_unlock(&btf_module_mutex);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
out:
|
|
|
|
return notifier_from_errno(err);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct notifier_block btf_module_nb = {
|
|
|
|
.notifier_call = btf_module_notify,
|
|
|
|
};
|
|
|
|
|
|
|
|
static int __init btf_module_init(void)
|
|
|
|
{
|
|
|
|
register_module_notifier(&btf_module_nb);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
fs_initcall(btf_module_init);
|
|
|
|
#endif /* CONFIG_DEBUG_INFO_BTF_MODULES */
|
2021-01-12 10:55:18 +03:00
|
|
|
|
|
|
|
struct module *btf_try_get_module(const struct btf *btf)
|
|
|
|
{
|
|
|
|
struct module *res = NULL;
|
|
|
|
#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
|
|
|
|
struct btf_module *btf_mod, *tmp;
|
|
|
|
|
|
|
|
mutex_lock(&btf_module_mutex);
|
|
|
|
list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) {
|
|
|
|
if (btf_mod->btf != btf)
|
|
|
|
continue;
|
|
|
|
|
bpf: Fix UAF due to race between btf_try_get_module and load_module
While working on code to populate kfunc BTF ID sets for module BTF from
its initcall, I noticed that by the time the initcall is invoked, the
module BTF can already be seen by userspace (and the BPF verifier). The
existing btf_try_get_module calls try_module_get which only fails if
mod->state == MODULE_STATE_GOING, i.e. it can increment module reference
when module initcall is happening in parallel.
Currently, BTF parsing happens from MODULE_STATE_COMING notifier
callback. At this point, the module initcalls have not been invoked.
The notifier callback parses and prepares the module BTF, allocates an
ID, which publishes it to userspace, and then adds it to the btf_modules
list allowing the kernel to invoke btf_try_get_module for the BTF.
However, at this point, the module has not been fully initialized (i.e.
its initcalls have not finished). The code in module.c can still fail
and free the module, without caring for other users. However, nothing
stops btf_try_get_module from succeeding between the state transition
from MODULE_STATE_COMING to MODULE_STATE_LIVE.
This leads to a use-after-free issue when BPF program loads
successfully in the state transition, load_module's do_init_module call
fails and frees the module, and BPF program fd on close calls module_put
for the freed module. Future patch has test case to verify we don't
regress in this area in future.
There are multiple points after prepare_coming_module (in load_module)
where failure can occur and module loading can return error. We
illustrate and test for the race using the last point where it can
practically occur (in module __init function).
An illustration of the race:
CPU 0 CPU 1
load_module
notifier_call(MODULE_STATE_COMING)
btf_parse_module
btf_alloc_id // Published to userspace
list_add(&btf_mod->list, btf_modules)
mod->init(...)
... ^
bpf_check |
check_pseudo_btf_id |
btf_try_get_module |
returns true | ...
... | module __init in progress
return prog_fd | ...
... V
if (ret < 0)
free_module(mod)
...
close(prog_fd)
...
bpf_prog_free_deferred
module_put(used_btf.mod) // use-after-free
We fix this issue by setting a flag BTF_MODULE_F_LIVE, from the notifier
callback when MODULE_STATE_LIVE state is reached for the module, so that
we return NULL from btf_try_get_module for modules that are not fully
formed. Since try_module_get already checks that module is not in
MODULE_STATE_GOING state, and that is the only transition a live module
can make before being removed from btf_modules list, this is enough to
close the race and prevent the bug.
A later selftest patch crafts the race condition artifically to verify
that it has been fixed, and that verifier fails to load program (with
ENXIO).
Lastly, a couple of comments:
1. Even if this race didn't exist, it seems more appropriate to only
access resources (ksyms and kfuncs) of a fully formed module which
has been initialized completely.
2. This patch was born out of need for synchronization against module
initcall for the next patch, so it is needed for correctness even
without the aforementioned race condition. The BTF resources
initialized by module initcall are set up once and then only looked
up, so just waiting until the initcall has finished ensures correct
behavior.
Fixes: 541c3bad8dc5 ("bpf: Support BPF ksym variables in kernel modules")
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-2-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 19:39:44 +03:00
|
|
|
/* We must only consider module whose __init routine has
|
|
|
|
* finished, hence we must check for BTF_MODULE_F_LIVE flag,
|
|
|
|
* which is set from the notifier callback for
|
|
|
|
* MODULE_STATE_LIVE.
|
|
|
|
*/
|
|
|
|
if ((btf_mod->flags & BTF_MODULE_F_LIVE) && try_module_get(btf_mod->module))
|
2021-01-12 10:55:18 +03:00
|
|
|
res = btf_mod->module;
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
mutex_unlock(&btf_module_mutex);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|
2021-05-14 03:36:11 +03:00
|
|
|
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 19:39:45 +03:00
|
|
|
/* Returns struct btf corresponding to the struct module
|
|
|
|
*
|
|
|
|
* This function can return NULL or ERR_PTR. Note that caller must
|
|
|
|
* release reference for struct btf iff btf_is_module is true.
|
|
|
|
*/
|
|
|
|
static struct btf *btf_get_module_btf(const struct module *module)
|
|
|
|
{
|
|
|
|
struct btf *btf = NULL;
|
|
|
|
#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
|
|
|
|
struct btf_module *btf_mod, *tmp;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
if (!module)
|
|
|
|
return bpf_get_btf_vmlinux();
|
|
|
|
#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
|
|
|
|
mutex_lock(&btf_module_mutex);
|
|
|
|
list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) {
|
|
|
|
if (btf_mod->module != module)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
btf_get(btf_mod->btf);
|
|
|
|
btf = btf_mod->btf;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
mutex_unlock(&btf_module_mutex);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
return btf;
|
|
|
|
}
|
|
|
|
|
2021-05-14 03:36:11 +03:00
|
|
|
BPF_CALL_4(bpf_btf_find_by_name_kind, char *, name, int, name_sz, u32, kind, int, flags)
|
|
|
|
{
|
|
|
|
struct btf *btf;
|
|
|
|
long ret;
|
|
|
|
|
|
|
|
if (flags)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (name_sz <= 1 || name[name_sz - 1])
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
btf = bpf_get_btf_vmlinux();
|
|
|
|
if (IS_ERR(btf))
|
|
|
|
return PTR_ERR(btf);
|
|
|
|
|
|
|
|
ret = btf_find_by_name_kind(btf, name, kind);
|
|
|
|
/* ret is never zero, since btf_find_by_name_kind returns
|
|
|
|
* positive btf_id or negative error.
|
|
|
|
*/
|
|
|
|
if (ret < 0) {
|
|
|
|
struct btf *mod_btf;
|
|
|
|
int id;
|
|
|
|
|
|
|
|
/* If name is not found in vmlinux's BTF then search in module's BTFs */
|
|
|
|
spin_lock_bh(&btf_idr_lock);
|
|
|
|
idr_for_each_entry(&btf_idr, mod_btf, id) {
|
|
|
|
if (!btf_is_module(mod_btf))
|
|
|
|
continue;
|
|
|
|
/* linear search could be slow hence unlock/lock
|
|
|
|
* the IDR to avoiding holding it for too long
|
|
|
|
*/
|
|
|
|
btf_get(mod_btf);
|
|
|
|
spin_unlock_bh(&btf_idr_lock);
|
|
|
|
ret = btf_find_by_name_kind(mod_btf, name, kind);
|
|
|
|
if (ret > 0) {
|
|
|
|
int btf_obj_fd;
|
|
|
|
|
|
|
|
btf_obj_fd = __btf_new_fd(mod_btf);
|
|
|
|
if (btf_obj_fd < 0) {
|
|
|
|
btf_put(mod_btf);
|
|
|
|
return btf_obj_fd;
|
|
|
|
}
|
|
|
|
return ret | (((u64)btf_obj_fd) << 32);
|
|
|
|
}
|
|
|
|
spin_lock_bh(&btf_idr_lock);
|
|
|
|
btf_put(mod_btf);
|
|
|
|
}
|
|
|
|
spin_unlock_bh(&btf_idr_lock);
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
const struct bpf_func_proto bpf_btf_find_by_name_kind_proto = {
|
|
|
|
.func = bpf_btf_find_by_name_kind,
|
|
|
|
.gpl_only = false,
|
|
|
|
.ret_type = RET_INTEGER,
|
2021-12-17 03:31:51 +03:00
|
|
|
.arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY,
|
2021-05-14 03:36:11 +03:00
|
|
|
.arg2_type = ARG_CONST_SIZE,
|
|
|
|
.arg3_type = ARG_ANYTHING,
|
|
|
|
.arg4_type = ARG_ANYTHING,
|
|
|
|
};
|
2021-08-26 04:48:31 +03:00
|
|
|
|
2021-11-12 18:02:43 +03:00
|
|
|
BTF_ID_LIST_GLOBAL(btf_tracing_ids, MAX_BTF_TRACING_TYPE)
|
|
|
|
#define BTF_TRACING_TYPE(name, type) BTF_ID(struct, type)
|
|
|
|
BTF_TRACING_TYPE_xxx
|
|
|
|
#undef BTF_TRACING_TYPE
|
2021-10-02 04:17:51 +03:00
|
|
|
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 19:39:45 +03:00
|
|
|
/* Kernel Function (kfunc) BTF ID set registration API */
|
2021-10-02 04:17:51 +03:00
|
|
|
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 19:39:45 +03:00
|
|
|
static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
|
|
|
|
enum btf_kfunc_type type,
|
|
|
|
struct btf_id_set *add_set, bool vmlinux_set)
|
2021-10-02 04:17:51 +03:00
|
|
|
{
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 19:39:45 +03:00
|
|
|
struct btf_kfunc_set_tab *tab;
|
|
|
|
struct btf_id_set *set;
|
|
|
|
u32 set_cnt;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (hook >= BTF_KFUNC_HOOK_MAX || type >= BTF_KFUNC_TYPE_MAX) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!add_set->cnt)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
tab = btf->kfunc_set_tab;
|
|
|
|
if (!tab) {
|
|
|
|
tab = kzalloc(sizeof(*tab), GFP_KERNEL | __GFP_NOWARN);
|
|
|
|
if (!tab)
|
|
|
|
return -ENOMEM;
|
|
|
|
btf->kfunc_set_tab = tab;
|
|
|
|
}
|
|
|
|
|
|
|
|
set = tab->sets[hook][type];
|
|
|
|
/* Warn when register_btf_kfunc_id_set is called twice for the same hook
|
|
|
|
* for module sets.
|
|
|
|
*/
|
|
|
|
if (WARN_ON_ONCE(set && !vmlinux_set)) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* We don't need to allocate, concatenate, and sort module sets, because
|
|
|
|
* only one is allowed per hook. Hence, we can directly assign the
|
|
|
|
* pointer and return.
|
|
|
|
*/
|
|
|
|
if (!vmlinux_set) {
|
|
|
|
tab->sets[hook][type] = add_set;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* In case of vmlinux sets, there may be more than one set being
|
|
|
|
* registered per hook. To create a unified set, we allocate a new set
|
|
|
|
* and concatenate all individual sets being registered. While each set
|
|
|
|
* is individually sorted, they may become unsorted when concatenated,
|
|
|
|
* hence re-sorting the final set again is required to make binary
|
|
|
|
* searching the set using btf_id_set_contains function work.
|
|
|
|
*/
|
|
|
|
set_cnt = set ? set->cnt : 0;
|
|
|
|
|
|
|
|
if (set_cnt > U32_MAX - add_set->cnt) {
|
|
|
|
ret = -EOVERFLOW;
|
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (set_cnt + add_set->cnt > BTF_KFUNC_SET_MAX_CNT) {
|
|
|
|
ret = -E2BIG;
|
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Grow set */
|
|
|
|
set = krealloc(tab->sets[hook][type],
|
|
|
|
offsetof(struct btf_id_set, ids[set_cnt + add_set->cnt]),
|
|
|
|
GFP_KERNEL | __GFP_NOWARN);
|
|
|
|
if (!set) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* For newly allocated set, initialize set->cnt to 0 */
|
|
|
|
if (!tab->sets[hook][type])
|
|
|
|
set->cnt = 0;
|
|
|
|
tab->sets[hook][type] = set;
|
|
|
|
|
|
|
|
/* Concatenate the two sets */
|
|
|
|
memcpy(set->ids + set->cnt, add_set->ids, add_set->cnt * sizeof(set->ids[0]));
|
|
|
|
set->cnt += add_set->cnt;
|
|
|
|
|
|
|
|
sort(set->ids, set->cnt, sizeof(set->ids[0]), btf_id_cmp_func, NULL);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
end:
|
|
|
|
btf_free_kfunc_set_tab(btf);
|
|
|
|
return ret;
|
2021-10-02 04:17:51 +03:00
|
|
|
}
|
|
|
|
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 19:39:45 +03:00
|
|
|
static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook,
|
|
|
|
const struct btf_kfunc_id_set *kset)
|
2021-10-02 04:17:51 +03:00
|
|
|
{
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 19:39:45 +03:00
|
|
|
bool vmlinux_set = !btf_is_module(btf);
|
|
|
|
int type, ret;
|
|
|
|
|
|
|
|
for (type = 0; type < ARRAY_SIZE(kset->sets); type++) {
|
|
|
|
if (!kset->sets[type])
|
|
|
|
continue;
|
|
|
|
|
|
|
|
ret = __btf_populate_kfunc_set(btf, hook, type, kset->sets[type], vmlinux_set);
|
|
|
|
if (ret)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return ret;
|
2021-10-02 04:17:51 +03:00
|
|
|
}
|
|
|
|
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 19:39:45 +03:00
|
|
|
static bool __btf_kfunc_id_set_contains(const struct btf *btf,
|
|
|
|
enum btf_kfunc_hook hook,
|
|
|
|
enum btf_kfunc_type type,
|
|
|
|
u32 kfunc_btf_id)
|
2021-10-02 04:17:51 +03:00
|
|
|
{
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 19:39:45 +03:00
|
|
|
struct btf_id_set *set;
|
2021-10-02 04:17:51 +03:00
|
|
|
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 19:39:45 +03:00
|
|
|
if (hook >= BTF_KFUNC_HOOK_MAX || type >= BTF_KFUNC_TYPE_MAX)
|
|
|
|
return false;
|
|
|
|
if (!btf->kfunc_set_tab)
|
|
|
|
return false;
|
|
|
|
set = btf->kfunc_set_tab->sets[hook][type];
|
|
|
|
if (!set)
|
|
|
|
return false;
|
|
|
|
return btf_id_set_contains(set, kfunc_btf_id);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
|
|
|
|
{
|
|
|
|
switch (prog_type) {
|
|
|
|
case BPF_PROG_TYPE_XDP:
|
|
|
|
return BTF_KFUNC_HOOK_XDP;
|
|
|
|
case BPF_PROG_TYPE_SCHED_CLS:
|
|
|
|
return BTF_KFUNC_HOOK_TC;
|
|
|
|
case BPF_PROG_TYPE_STRUCT_OPS:
|
|
|
|
return BTF_KFUNC_HOOK_STRUCT_OPS;
|
|
|
|
default:
|
|
|
|
return BTF_KFUNC_HOOK_MAX;
|
2021-10-02 04:17:51 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 19:39:45 +03:00
|
|
|
/* Caution:
|
|
|
|
* Reference to the module (obtained using btf_try_get_module) corresponding to
|
|
|
|
* the struct btf *MUST* be held when calling this function from verifier
|
|
|
|
* context. This is usually true as we stash references in prog's kfunc_btf_tab;
|
|
|
|
* keeping the reference for the duration of the call provides the necessary
|
|
|
|
* protection for looking up a well-formed btf->kfunc_set_tab.
|
|
|
|
*/
|
|
|
|
bool btf_kfunc_id_set_contains(const struct btf *btf,
|
|
|
|
enum bpf_prog_type prog_type,
|
|
|
|
enum btf_kfunc_type type, u32 kfunc_btf_id)
|
|
|
|
{
|
|
|
|
enum btf_kfunc_hook hook;
|
2021-10-02 04:17:53 +03:00
|
|
|
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 19:39:45 +03:00
|
|
|
hook = bpf_prog_type_to_kfunc_hook(prog_type);
|
|
|
|
return __btf_kfunc_id_set_contains(btf, hook, type, kfunc_btf_id);
|
|
|
|
}
|
2021-11-22 17:47:40 +03:00
|
|
|
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 19:39:45 +03:00
|
|
|
/* This function must be invoked only from initcalls/module init functions */
|
|
|
|
int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
|
|
|
|
const struct btf_kfunc_id_set *kset)
|
|
|
|
{
|
|
|
|
enum btf_kfunc_hook hook;
|
|
|
|
struct btf *btf;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
btf = btf_get_module_btf(kset->owner);
|
2022-01-26 03:13:40 +03:00
|
|
|
if (!btf) {
|
|
|
|
if (!kset->owner && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
|
|
|
|
pr_err("missing vmlinux BTF, cannot register kfuncs\n");
|
|
|
|
return -ENOENT;
|
|
|
|
}
|
|
|
|
if (kset->owner && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) {
|
|
|
|
pr_err("missing module BTF, cannot register kfuncs\n");
|
|
|
|
return -ENOENT;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (IS_ERR(btf))
|
|
|
|
return PTR_ERR(btf);
|
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 19:39:45 +03:00
|
|
|
|
|
|
|
hook = bpf_prog_type_to_kfunc_hook(prog_type);
|
|
|
|
ret = btf_populate_kfunc_set(btf, hook, kset);
|
|
|
|
/* reference is only taken for module BTF */
|
|
|
|
if (btf_is_module(btf))
|
|
|
|
btf_put(btf);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(register_btf_kfunc_id_set);
|
Merge https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Andrii Nakryiko says:
====================
bpf-next 2021-12-10 v2
We've added 115 non-merge commits during the last 26 day(s) which contain
a total of 182 files changed, 5747 insertions(+), 2564 deletions(-).
The main changes are:
1) Various samples fixes, from Alexander Lobakin.
2) BPF CO-RE support in kernel and light skeleton, from Alexei Starovoitov.
3) A batch of new unified APIs for libbpf, logging improvements, version
querying, etc. Also a batch of old deprecations for old APIs and various
bug fixes, in preparation for libbpf 1.0, from Andrii Nakryiko.
4) BPF documentation reorganization and improvements, from Christoph Hellwig
and Dave Tucker.
5) Support for declarative initialization of BPF_MAP_TYPE_PROG_ARRAY in
libbpf, from Hengqi Chen.
6) Verifier log fixes, from Hou Tao.
7) Runtime-bounded loops support with bpf_loop() helper, from Joanne Koong.
8) Extend branch record capturing to all platforms that support it,
from Kajol Jain.
9) Light skeleton codegen improvements, from Kumar Kartikeya Dwivedi.
10) bpftool doc-generating script improvements, from Quentin Monnet.
11) Two libbpf v0.6 bug fixes, from Shuyi Cheng and Vincent Minet.
12) Deprecation warning fix for perf/bpf_counter, from Song Liu.
13) MAX_TAIL_CALL_CNT unification and MIPS build fix for libbpf,
from Tiezhu Yang.
14) BTF_KING_TYPE_TAG follow-up fixes, from Yonghong Song.
15) Selftests fixes and improvements, from Ilya Leoshkevich, Jean-Philippe
Brucker, Jiri Olsa, Maxim Mikityanskiy, Tirthendu Sarkar, Yucong Sun,
and others.
* https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (115 commits)
libbpf: Add "bool skipped" to struct bpf_map
libbpf: Fix typo in btf__dedup@LIBBPF_0.0.2 definition
bpftool: Switch bpf_object__load_xattr() to bpf_object__load()
selftests/bpf: Remove the only use of deprecated bpf_object__load_xattr()
selftests/bpf: Add test for libbpf's custom log_buf behavior
selftests/bpf: Replace all uses of bpf_load_btf() with bpf_btf_load()
libbpf: Deprecate bpf_object__load_xattr()
libbpf: Add per-program log buffer setter and getter
libbpf: Preserve kernel error code and remove kprobe prog type guessing
libbpf: Improve logging around BPF program loading
libbpf: Allow passing user log setting through bpf_object_open_opts
libbpf: Allow passing preallocated log_buf when loading BTF into kernel
libbpf: Add OPTS-based bpf_btf_load() API
libbpf: Fix bpf_prog_load() log_buf logic for log_level 0
samples/bpf: Remove unneeded variable
bpf: Remove redundant assignment to pointer t
selftests/bpf: Fix a compilation warning
perf/bpf_counter: Use bpf_map_create instead of bpf_create_map
samples: bpf: Fix 'unknown warning group' build warning on Clang
samples: bpf: Fix xdp_sample_user.o linking with Clang
...
====================
Link: https://lore.kernel.org/r/20211210234746.2100561-1-andrii@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-12-11 02:56:10 +03:00
|
|
|
|
2021-12-01 21:10:26 +03:00
|
|
|
int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
|
|
|
|
const struct btf *targ_btf, __u32 targ_id)
|
|
|
|
{
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool bpf_core_is_flavor_sep(const char *s)
|
|
|
|
{
|
|
|
|
/* check X___Y name pattern, where X and Y are not underscores */
|
|
|
|
return s[0] != '_' && /* X */
|
|
|
|
s[1] == '_' && s[2] == '_' && s[3] == '_' && /* ___ */
|
|
|
|
s[4] != '_'; /* Y */
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t bpf_core_essential_name_len(const char *name)
|
|
|
|
{
|
|
|
|
size_t n = strlen(name);
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = n - 5; i >= 0; i--) {
|
|
|
|
if (bpf_core_is_flavor_sep(name + i))
|
|
|
|
return i + 1;
|
|
|
|
}
|
|
|
|
return n;
|
|
|
|
}
|
2021-12-01 21:10:28 +03:00
|
|
|
|
2021-12-01 21:10:31 +03:00
|
|
|
struct bpf_cand_cache {
|
|
|
|
const char *name;
|
|
|
|
u32 name_len;
|
|
|
|
u16 kind;
|
|
|
|
u16 cnt;
|
|
|
|
struct {
|
|
|
|
const struct btf *btf;
|
|
|
|
u32 id;
|
|
|
|
} cands[];
|
|
|
|
};
|
|
|
|
|
|
|
|
static void bpf_free_cands(struct bpf_cand_cache *cands)
|
|
|
|
{
|
|
|
|
if (!cands->cnt)
|
|
|
|
/* empty candidate array was allocated on stack */
|
|
|
|
return;
|
|
|
|
kfree(cands);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void bpf_free_cands_from_cache(struct bpf_cand_cache *cands)
|
|
|
|
{
|
|
|
|
kfree(cands->name);
|
|
|
|
kfree(cands);
|
|
|
|
}
|
|
|
|
|
|
|
|
#define VMLINUX_CAND_CACHE_SIZE 31
|
|
|
|
static struct bpf_cand_cache *vmlinux_cand_cache[VMLINUX_CAND_CACHE_SIZE];
|
|
|
|
|
|
|
|
#define MODULE_CAND_CACHE_SIZE 31
|
|
|
|
static struct bpf_cand_cache *module_cand_cache[MODULE_CAND_CACHE_SIZE];
|
|
|
|
|
|
|
|
static DEFINE_MUTEX(cand_cache_mutex);
|
|
|
|
|
|
|
|
static void __print_cand_cache(struct bpf_verifier_log *log,
|
|
|
|
struct bpf_cand_cache **cache,
|
|
|
|
int cache_size)
|
|
|
|
{
|
|
|
|
struct bpf_cand_cache *cc;
|
|
|
|
int i, j;
|
|
|
|
|
|
|
|
for (i = 0; i < cache_size; i++) {
|
|
|
|
cc = cache[i];
|
|
|
|
if (!cc)
|
|
|
|
continue;
|
|
|
|
bpf_log(log, "[%d]%s(", i, cc->name);
|
|
|
|
for (j = 0; j < cc->cnt; j++) {
|
|
|
|
bpf_log(log, "%d", cc->cands[j].id);
|
|
|
|
if (j < cc->cnt - 1)
|
|
|
|
bpf_log(log, " ");
|
|
|
|
}
|
|
|
|
bpf_log(log, "), ");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void print_cand_cache(struct bpf_verifier_log *log)
|
|
|
|
{
|
|
|
|
mutex_lock(&cand_cache_mutex);
|
|
|
|
bpf_log(log, "vmlinux_cand_cache:");
|
|
|
|
__print_cand_cache(log, vmlinux_cand_cache, VMLINUX_CAND_CACHE_SIZE);
|
|
|
|
bpf_log(log, "\nmodule_cand_cache:");
|
|
|
|
__print_cand_cache(log, module_cand_cache, MODULE_CAND_CACHE_SIZE);
|
|
|
|
bpf_log(log, "\n");
|
|
|
|
mutex_unlock(&cand_cache_mutex);
|
|
|
|
}
|
|
|
|
|
|
|
|
static u32 hash_cands(struct bpf_cand_cache *cands)
|
|
|
|
{
|
|
|
|
return jhash(cands->name, cands->name_len, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct bpf_cand_cache *check_cand_cache(struct bpf_cand_cache *cands,
|
|
|
|
struct bpf_cand_cache **cache,
|
|
|
|
int cache_size)
|
|
|
|
{
|
|
|
|
struct bpf_cand_cache *cc = cache[hash_cands(cands) % cache_size];
|
|
|
|
|
|
|
|
if (cc && cc->name_len == cands->name_len &&
|
|
|
|
!strncmp(cc->name, cands->name, cands->name_len))
|
|
|
|
return cc;
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static size_t sizeof_cands(int cnt)
|
|
|
|
{
|
|
|
|
return offsetof(struct bpf_cand_cache, cands[cnt]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct bpf_cand_cache *populate_cand_cache(struct bpf_cand_cache *cands,
|
|
|
|
struct bpf_cand_cache **cache,
|
|
|
|
int cache_size)
|
|
|
|
{
|
|
|
|
struct bpf_cand_cache **cc = &cache[hash_cands(cands) % cache_size], *new_cands;
|
|
|
|
|
|
|
|
if (*cc) {
|
|
|
|
bpf_free_cands_from_cache(*cc);
|
|
|
|
*cc = NULL;
|
|
|
|
}
|
2021-12-09 09:21:22 +03:00
|
|
|
new_cands = kmemdup(cands, sizeof_cands(cands->cnt), GFP_KERNEL);
|
2021-12-01 21:10:31 +03:00
|
|
|
if (!new_cands) {
|
|
|
|
bpf_free_cands(cands);
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
}
|
|
|
|
/* strdup the name, since it will stay in cache.
|
|
|
|
* the cands->name points to strings in prog's BTF and the prog can be unloaded.
|
|
|
|
*/
|
|
|
|
new_cands->name = kmemdup_nul(cands->name, cands->name_len, GFP_KERNEL);
|
|
|
|
bpf_free_cands(cands);
|
|
|
|
if (!new_cands->name) {
|
|
|
|
kfree(new_cands);
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
}
|
|
|
|
*cc = new_cands;
|
|
|
|
return new_cands;
|
|
|
|
}
|
|
|
|
|
2021-12-07 04:48:39 +03:00
|
|
|
#ifdef CONFIG_DEBUG_INFO_BTF_MODULES
|
2021-12-01 21:10:31 +03:00
|
|
|
static void __purge_cand_cache(struct btf *btf, struct bpf_cand_cache **cache,
|
|
|
|
int cache_size)
|
|
|
|
{
|
|
|
|
struct bpf_cand_cache *cc;
|
|
|
|
int i, j;
|
|
|
|
|
|
|
|
for (i = 0; i < cache_size; i++) {
|
|
|
|
cc = cache[i];
|
|
|
|
if (!cc)
|
|
|
|
continue;
|
|
|
|
if (!btf) {
|
|
|
|
/* when new module is loaded purge all of module_cand_cache,
|
|
|
|
* since new module might have candidates with the name
|
|
|
|
* that matches cached cands.
|
|
|
|
*/
|
|
|
|
bpf_free_cands_from_cache(cc);
|
|
|
|
cache[i] = NULL;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
/* when module is unloaded purge cache entries
|
|
|
|
* that match module's btf
|
|
|
|
*/
|
|
|
|
for (j = 0; j < cc->cnt; j++)
|
|
|
|
if (cc->cands[j].btf == btf) {
|
|
|
|
bpf_free_cands_from_cache(cc);
|
|
|
|
cache[i] = NULL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
static void purge_cand_cache(struct btf *btf)
|
|
|
|
{
|
|
|
|
mutex_lock(&cand_cache_mutex);
|
|
|
|
__purge_cand_cache(btf, module_cand_cache, MODULE_CAND_CACHE_SIZE);
|
|
|
|
mutex_unlock(&cand_cache_mutex);
|
|
|
|
}
|
2021-12-07 04:48:39 +03:00
|
|
|
#endif
|
2021-12-01 21:10:31 +03:00
|
|
|
|
|
|
|
static struct bpf_cand_cache *
|
|
|
|
bpf_core_add_cands(struct bpf_cand_cache *cands, const struct btf *targ_btf,
|
|
|
|
int targ_start_id)
|
|
|
|
{
|
|
|
|
struct bpf_cand_cache *new_cands;
|
|
|
|
const struct btf_type *t;
|
|
|
|
const char *targ_name;
|
|
|
|
size_t targ_essent_len;
|
|
|
|
int n, i;
|
|
|
|
|
|
|
|
n = btf_nr_types(targ_btf);
|
|
|
|
for (i = targ_start_id; i < n; i++) {
|
|
|
|
t = btf_type_by_id(targ_btf, i);
|
|
|
|
if (btf_kind(t) != cands->kind)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
targ_name = btf_name_by_offset(targ_btf, t->name_off);
|
|
|
|
if (!targ_name)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* the resched point is before strncmp to make sure that search
|
|
|
|
* for non-existing name will have a chance to schedule().
|
|
|
|
*/
|
|
|
|
cond_resched();
|
|
|
|
|
|
|
|
if (strncmp(cands->name, targ_name, cands->name_len) != 0)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
targ_essent_len = bpf_core_essential_name_len(targ_name);
|
|
|
|
if (targ_essent_len != cands->name_len)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* most of the time there is only one candidate for a given kind+name pair */
|
|
|
|
new_cands = kmalloc(sizeof_cands(cands->cnt + 1), GFP_KERNEL);
|
|
|
|
if (!new_cands) {
|
|
|
|
bpf_free_cands(cands);
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
}
|
|
|
|
|
|
|
|
memcpy(new_cands, cands, sizeof_cands(cands->cnt));
|
|
|
|
bpf_free_cands(cands);
|
|
|
|
cands = new_cands;
|
|
|
|
cands->cands[cands->cnt].btf = targ_btf;
|
|
|
|
cands->cands[cands->cnt].id = i;
|
|
|
|
cands->cnt++;
|
|
|
|
}
|
|
|
|
return cands;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct bpf_cand_cache *
|
|
|
|
bpf_core_find_cands(struct bpf_core_ctx *ctx, u32 local_type_id)
|
|
|
|
{
|
|
|
|
struct bpf_cand_cache *cands, *cc, local_cand = {};
|
|
|
|
const struct btf *local_btf = ctx->btf;
|
|
|
|
const struct btf_type *local_type;
|
|
|
|
const struct btf *main_btf;
|
|
|
|
size_t local_essent_len;
|
|
|
|
struct btf *mod_btf;
|
|
|
|
const char *name;
|
|
|
|
int id;
|
|
|
|
|
|
|
|
main_btf = bpf_get_btf_vmlinux();
|
|
|
|
if (IS_ERR(main_btf))
|
2021-12-12 05:08:19 +03:00
|
|
|
return ERR_CAST(main_btf);
|
2021-12-01 21:10:31 +03:00
|
|
|
|
|
|
|
local_type = btf_type_by_id(local_btf, local_type_id);
|
|
|
|
if (!local_type)
|
|
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
|
|
|
|
name = btf_name_by_offset(local_btf, local_type->name_off);
|
|
|
|
if (str_is_empty(name))
|
|
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
local_essent_len = bpf_core_essential_name_len(name);
|
|
|
|
|
|
|
|
cands = &local_cand;
|
|
|
|
cands->name = name;
|
|
|
|
cands->kind = btf_kind(local_type);
|
|
|
|
cands->name_len = local_essent_len;
|
|
|
|
|
|
|
|
cc = check_cand_cache(cands, vmlinux_cand_cache, VMLINUX_CAND_CACHE_SIZE);
|
|
|
|
/* cands is a pointer to stack here */
|
|
|
|
if (cc) {
|
|
|
|
if (cc->cnt)
|
|
|
|
return cc;
|
|
|
|
goto check_modules;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Attempt to find target candidates in vmlinux BTF first */
|
|
|
|
cands = bpf_core_add_cands(cands, main_btf, 1);
|
|
|
|
if (IS_ERR(cands))
|
2021-12-12 05:08:19 +03:00
|
|
|
return ERR_CAST(cands);
|
2021-12-01 21:10:31 +03:00
|
|
|
|
|
|
|
/* cands is a pointer to kmalloced memory here if cands->cnt > 0 */
|
|
|
|
|
|
|
|
/* populate cache even when cands->cnt == 0 */
|
|
|
|
cc = populate_cand_cache(cands, vmlinux_cand_cache, VMLINUX_CAND_CACHE_SIZE);
|
|
|
|
if (IS_ERR(cc))
|
2021-12-12 05:08:19 +03:00
|
|
|
return ERR_CAST(cc);
|
2021-12-01 21:10:31 +03:00
|
|
|
|
|
|
|
/* if vmlinux BTF has any candidate, don't go for module BTFs */
|
|
|
|
if (cc->cnt)
|
|
|
|
return cc;
|
|
|
|
|
|
|
|
check_modules:
|
|
|
|
/* cands is a pointer to stack here and cands->cnt == 0 */
|
|
|
|
cc = check_cand_cache(cands, module_cand_cache, MODULE_CAND_CACHE_SIZE);
|
|
|
|
if (cc)
|
|
|
|
/* if cache has it return it even if cc->cnt == 0 */
|
|
|
|
return cc;
|
|
|
|
|
|
|
|
/* If candidate is not found in vmlinux's BTF then search in module's BTFs */
|
|
|
|
spin_lock_bh(&btf_idr_lock);
|
|
|
|
idr_for_each_entry(&btf_idr, mod_btf, id) {
|
|
|
|
if (!btf_is_module(mod_btf))
|
|
|
|
continue;
|
|
|
|
/* linear search could be slow hence unlock/lock
|
|
|
|
* the IDR to avoiding holding it for too long
|
|
|
|
*/
|
|
|
|
btf_get(mod_btf);
|
|
|
|
spin_unlock_bh(&btf_idr_lock);
|
|
|
|
cands = bpf_core_add_cands(cands, mod_btf, btf_nr_types(main_btf));
|
|
|
|
if (IS_ERR(cands)) {
|
|
|
|
btf_put(mod_btf);
|
2021-12-12 05:08:19 +03:00
|
|
|
return ERR_CAST(cands);
|
2021-12-01 21:10:31 +03:00
|
|
|
}
|
|
|
|
spin_lock_bh(&btf_idr_lock);
|
|
|
|
btf_put(mod_btf);
|
|
|
|
}
|
|
|
|
spin_unlock_bh(&btf_idr_lock);
|
|
|
|
/* cands is a pointer to kmalloced memory here if cands->cnt > 0
|
|
|
|
* or pointer to stack if cands->cnd == 0.
|
|
|
|
* Copy it into the cache even when cands->cnt == 0 and
|
|
|
|
* return the result.
|
|
|
|
*/
|
|
|
|
return populate_cand_cache(cands, module_cand_cache, MODULE_CAND_CACHE_SIZE);
|
|
|
|
}
|
|
|
|
|
2021-12-01 21:10:28 +03:00
|
|
|
int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo,
|
|
|
|
int relo_idx, void *insn)
|
|
|
|
{
|
2021-12-01 21:10:31 +03:00
|
|
|
bool need_cands = relo->kind != BPF_CORE_TYPE_ID_LOCAL;
|
|
|
|
struct bpf_core_cand_list cands = {};
|
2021-12-03 21:28:36 +03:00
|
|
|
struct bpf_core_spec *specs;
|
2021-12-01 21:10:31 +03:00
|
|
|
int err;
|
|
|
|
|
2021-12-03 21:28:36 +03:00
|
|
|
/* ~4k of temp memory necessary to convert LLVM spec like "0:1:0:5"
|
|
|
|
* into arrays of btf_ids of struct fields and array indices.
|
|
|
|
*/
|
|
|
|
specs = kcalloc(3, sizeof(*specs), GFP_KERNEL);
|
|
|
|
if (!specs)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2021-12-01 21:10:31 +03:00
|
|
|
if (need_cands) {
|
|
|
|
struct bpf_cand_cache *cc;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
mutex_lock(&cand_cache_mutex);
|
|
|
|
cc = bpf_core_find_cands(ctx, relo->type_id);
|
|
|
|
if (IS_ERR(cc)) {
|
|
|
|
bpf_log(ctx->log, "target candidate search failed for %d\n",
|
|
|
|
relo->type_id);
|
|
|
|
err = PTR_ERR(cc);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
if (cc->cnt) {
|
|
|
|
cands.cands = kcalloc(cc->cnt, sizeof(*cands.cands), GFP_KERNEL);
|
|
|
|
if (!cands.cands) {
|
|
|
|
err = -ENOMEM;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for (i = 0; i < cc->cnt; i++) {
|
|
|
|
bpf_log(ctx->log,
|
|
|
|
"CO-RE relocating %s %s: found target candidate [%d]\n",
|
|
|
|
btf_kind_str[cc->kind], cc->name, cc->cands[i].id);
|
|
|
|
cands.cands[i].btf = cc->cands[i].btf;
|
|
|
|
cands.cands[i].id = cc->cands[i].id;
|
|
|
|
}
|
|
|
|
cands.len = cc->cnt;
|
|
|
|
/* cand_cache_mutex needs to span the cache lookup and
|
|
|
|
* copy of btf pointer into bpf_core_cand_list,
|
|
|
|
* since module can be unloaded while bpf_core_apply_relo_insn
|
|
|
|
* is working with module's btf.
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
|
|
|
|
err = bpf_core_apply_relo_insn((void *)ctx->log, insn, relo->insn_off / 8,
|
2021-12-03 21:28:36 +03:00
|
|
|
relo, relo_idx, ctx->btf, &cands, specs);
|
2021-12-01 21:10:31 +03:00
|
|
|
out:
|
2021-12-03 21:28:36 +03:00
|
|
|
kfree(specs);
|
2021-12-01 21:10:31 +03:00
|
|
|
if (need_cands) {
|
|
|
|
kfree(cands.cands);
|
|
|
|
mutex_unlock(&cand_cache_mutex);
|
|
|
|
if (ctx->log->level & BPF_LOG_LEVEL2)
|
|
|
|
print_cand_cache(ctx->log);
|
|
|
|
}
|
|
|
|
return err;
|
2021-12-01 21:10:28 +03:00
|
|
|
}
|