bpf: Add bpf_core_add_cands() and wire it into bpf_core_apply_relo_insn().
Given BPF program's BTF root type name perform the following steps: . search in vmlinux candidate cache. . if (present in cache and candidate list >= 1) return candidate list. . do a linear search through kernel BTFs for possible candidates. . regardless of number of candidates found populate vmlinux cache. . if (candidate list >= 1) return candidate list. . search in module candidate cache. . if (present in cache) return candidate list (even if list is empty). . do a linear search through BTFs of all kernel modules collecting candidates from all of them. . regardless of number of candidates found populate module cache. . return candidate list. Then wire the result into bpf_core_apply_relo_insn(). When BPF program is trying to CO-RE relocate a type that doesn't exist in either vmlinux BTF or in modules BTFs these steps will perform 2 cache lookups when cache is hit. Note the cache doesn't prevent the abuse by the program that might have lots of relocations that cannot be resolved. Hence cond_resched(). CO-RE in the kernel requires CAP_BPF, since BTF loading requires it. Signed-off-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Acked-by: Andrii Nakryiko <andrii@kernel.org> Link: https://lore.kernel.org/bpf/20211201181040.23337-9-alexei.starovoitov@gmail.com
This commit is contained in:
Родитель
03d5b99138
Коммит
1e89106da2
346
kernel/bpf/btf.c
346
kernel/bpf/btf.c
|
@ -25,6 +25,7 @@
|
|||
#include <linux/kobject.h>
|
||||
#include <linux/sysfs.h>
|
||||
#include <net/sock.h>
|
||||
#include "../tools/lib/bpf/relo_core.h"
|
||||
|
||||
/* BTF (BPF Type Format) is the meta data format which describes
|
||||
* the data types of BPF program/map. Hence, it basically focus
|
||||
|
@ -6169,6 +6170,8 @@ btf_module_read(struct file *file, struct kobject *kobj,
|
|||
return len;
|
||||
}
|
||||
|
||||
static void purge_cand_cache(struct btf *btf);
|
||||
|
||||
static int btf_module_notify(struct notifier_block *nb, unsigned long op,
|
||||
void *module)
|
||||
{
|
||||
|
@ -6203,6 +6206,7 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op,
|
|||
goto out;
|
||||
}
|
||||
|
||||
purge_cand_cache(NULL);
|
||||
mutex_lock(&btf_module_mutex);
|
||||
btf_mod->module = module;
|
||||
btf_mod->btf = btf;
|
||||
|
@ -6245,6 +6249,7 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op,
|
|||
list_del(&btf_mod->list);
|
||||
if (btf_mod->sysfs_attr)
|
||||
sysfs_remove_bin_file(btf_kobj, btf_mod->sysfs_attr);
|
||||
purge_cand_cache(btf_mod->btf);
|
||||
btf_put(btf_mod->btf);
|
||||
kfree(btf_mod->sysfs_attr);
|
||||
kfree(btf_mod);
|
||||
|
@ -6440,8 +6445,347 @@ size_t bpf_core_essential_name_len(const char *name)
|
|||
return n;
|
||||
}
|
||||
|
||||
struct bpf_cand_cache {
|
||||
const char *name;
|
||||
u32 name_len;
|
||||
u16 kind;
|
||||
u16 cnt;
|
||||
struct {
|
||||
const struct btf *btf;
|
||||
u32 id;
|
||||
} cands[];
|
||||
};
|
||||
|
||||
static void bpf_free_cands(struct bpf_cand_cache *cands)
|
||||
{
|
||||
if (!cands->cnt)
|
||||
/* empty candidate array was allocated on stack */
|
||||
return;
|
||||
kfree(cands);
|
||||
}
|
||||
|
||||
static void bpf_free_cands_from_cache(struct bpf_cand_cache *cands)
|
||||
{
|
||||
kfree(cands->name);
|
||||
kfree(cands);
|
||||
}
|
||||
|
||||
#define VMLINUX_CAND_CACHE_SIZE 31
|
||||
static struct bpf_cand_cache *vmlinux_cand_cache[VMLINUX_CAND_CACHE_SIZE];
|
||||
|
||||
#define MODULE_CAND_CACHE_SIZE 31
|
||||
static struct bpf_cand_cache *module_cand_cache[MODULE_CAND_CACHE_SIZE];
|
||||
|
||||
static DEFINE_MUTEX(cand_cache_mutex);
|
||||
|
||||
static void __print_cand_cache(struct bpf_verifier_log *log,
|
||||
struct bpf_cand_cache **cache,
|
||||
int cache_size)
|
||||
{
|
||||
struct bpf_cand_cache *cc;
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < cache_size; i++) {
|
||||
cc = cache[i];
|
||||
if (!cc)
|
||||
continue;
|
||||
bpf_log(log, "[%d]%s(", i, cc->name);
|
||||
for (j = 0; j < cc->cnt; j++) {
|
||||
bpf_log(log, "%d", cc->cands[j].id);
|
||||
if (j < cc->cnt - 1)
|
||||
bpf_log(log, " ");
|
||||
}
|
||||
bpf_log(log, "), ");
|
||||
}
|
||||
}
|
||||
|
||||
static void print_cand_cache(struct bpf_verifier_log *log)
|
||||
{
|
||||
mutex_lock(&cand_cache_mutex);
|
||||
bpf_log(log, "vmlinux_cand_cache:");
|
||||
__print_cand_cache(log, vmlinux_cand_cache, VMLINUX_CAND_CACHE_SIZE);
|
||||
bpf_log(log, "\nmodule_cand_cache:");
|
||||
__print_cand_cache(log, module_cand_cache, MODULE_CAND_CACHE_SIZE);
|
||||
bpf_log(log, "\n");
|
||||
mutex_unlock(&cand_cache_mutex);
|
||||
}
|
||||
|
||||
static u32 hash_cands(struct bpf_cand_cache *cands)
|
||||
{
|
||||
return jhash(cands->name, cands->name_len, 0);
|
||||
}
|
||||
|
||||
static struct bpf_cand_cache *check_cand_cache(struct bpf_cand_cache *cands,
|
||||
struct bpf_cand_cache **cache,
|
||||
int cache_size)
|
||||
{
|
||||
struct bpf_cand_cache *cc = cache[hash_cands(cands) % cache_size];
|
||||
|
||||
if (cc && cc->name_len == cands->name_len &&
|
||||
!strncmp(cc->name, cands->name, cands->name_len))
|
||||
return cc;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static size_t sizeof_cands(int cnt)
|
||||
{
|
||||
return offsetof(struct bpf_cand_cache, cands[cnt]);
|
||||
}
|
||||
|
||||
static struct bpf_cand_cache *populate_cand_cache(struct bpf_cand_cache *cands,
|
||||
struct bpf_cand_cache **cache,
|
||||
int cache_size)
|
||||
{
|
||||
struct bpf_cand_cache **cc = &cache[hash_cands(cands) % cache_size], *new_cands;
|
||||
|
||||
if (*cc) {
|
||||
bpf_free_cands_from_cache(*cc);
|
||||
*cc = NULL;
|
||||
}
|
||||
new_cands = kmalloc(sizeof_cands(cands->cnt), GFP_KERNEL);
|
||||
if (!new_cands) {
|
||||
bpf_free_cands(cands);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
memcpy(new_cands, cands, sizeof_cands(cands->cnt));
|
||||
/* strdup the name, since it will stay in cache.
|
||||
* the cands->name points to strings in prog's BTF and the prog can be unloaded.
|
||||
*/
|
||||
new_cands->name = kmemdup_nul(cands->name, cands->name_len, GFP_KERNEL);
|
||||
bpf_free_cands(cands);
|
||||
if (!new_cands->name) {
|
||||
kfree(new_cands);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
*cc = new_cands;
|
||||
return new_cands;
|
||||
}
|
||||
|
||||
static void __purge_cand_cache(struct btf *btf, struct bpf_cand_cache **cache,
|
||||
int cache_size)
|
||||
{
|
||||
struct bpf_cand_cache *cc;
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < cache_size; i++) {
|
||||
cc = cache[i];
|
||||
if (!cc)
|
||||
continue;
|
||||
if (!btf) {
|
||||
/* when new module is loaded purge all of module_cand_cache,
|
||||
* since new module might have candidates with the name
|
||||
* that matches cached cands.
|
||||
*/
|
||||
bpf_free_cands_from_cache(cc);
|
||||
cache[i] = NULL;
|
||||
continue;
|
||||
}
|
||||
/* when module is unloaded purge cache entries
|
||||
* that match module's btf
|
||||
*/
|
||||
for (j = 0; j < cc->cnt; j++)
|
||||
if (cc->cands[j].btf == btf) {
|
||||
bpf_free_cands_from_cache(cc);
|
||||
cache[i] = NULL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static void purge_cand_cache(struct btf *btf)
|
||||
{
|
||||
mutex_lock(&cand_cache_mutex);
|
||||
__purge_cand_cache(btf, module_cand_cache, MODULE_CAND_CACHE_SIZE);
|
||||
mutex_unlock(&cand_cache_mutex);
|
||||
}
|
||||
|
||||
static struct bpf_cand_cache *
|
||||
bpf_core_add_cands(struct bpf_cand_cache *cands, const struct btf *targ_btf,
|
||||
int targ_start_id)
|
||||
{
|
||||
struct bpf_cand_cache *new_cands;
|
||||
const struct btf_type *t;
|
||||
const char *targ_name;
|
||||
size_t targ_essent_len;
|
||||
int n, i;
|
||||
|
||||
n = btf_nr_types(targ_btf);
|
||||
for (i = targ_start_id; i < n; i++) {
|
||||
t = btf_type_by_id(targ_btf, i);
|
||||
if (btf_kind(t) != cands->kind)
|
||||
continue;
|
||||
|
||||
targ_name = btf_name_by_offset(targ_btf, t->name_off);
|
||||
if (!targ_name)
|
||||
continue;
|
||||
|
||||
/* the resched point is before strncmp to make sure that search
|
||||
* for non-existing name will have a chance to schedule().
|
||||
*/
|
||||
cond_resched();
|
||||
|
||||
if (strncmp(cands->name, targ_name, cands->name_len) != 0)
|
||||
continue;
|
||||
|
||||
targ_essent_len = bpf_core_essential_name_len(targ_name);
|
||||
if (targ_essent_len != cands->name_len)
|
||||
continue;
|
||||
|
||||
/* most of the time there is only one candidate for a given kind+name pair */
|
||||
new_cands = kmalloc(sizeof_cands(cands->cnt + 1), GFP_KERNEL);
|
||||
if (!new_cands) {
|
||||
bpf_free_cands(cands);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
memcpy(new_cands, cands, sizeof_cands(cands->cnt));
|
||||
bpf_free_cands(cands);
|
||||
cands = new_cands;
|
||||
cands->cands[cands->cnt].btf = targ_btf;
|
||||
cands->cands[cands->cnt].id = i;
|
||||
cands->cnt++;
|
||||
}
|
||||
return cands;
|
||||
}
|
||||
|
||||
static struct bpf_cand_cache *
|
||||
bpf_core_find_cands(struct bpf_core_ctx *ctx, u32 local_type_id)
|
||||
{
|
||||
struct bpf_cand_cache *cands, *cc, local_cand = {};
|
||||
const struct btf *local_btf = ctx->btf;
|
||||
const struct btf_type *local_type;
|
||||
const struct btf *main_btf;
|
||||
size_t local_essent_len;
|
||||
struct btf *mod_btf;
|
||||
const char *name;
|
||||
int id;
|
||||
|
||||
main_btf = bpf_get_btf_vmlinux();
|
||||
if (IS_ERR(main_btf))
|
||||
return (void *)main_btf;
|
||||
|
||||
local_type = btf_type_by_id(local_btf, local_type_id);
|
||||
if (!local_type)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
name = btf_name_by_offset(local_btf, local_type->name_off);
|
||||
if (str_is_empty(name))
|
||||
return ERR_PTR(-EINVAL);
|
||||
local_essent_len = bpf_core_essential_name_len(name);
|
||||
|
||||
cands = &local_cand;
|
||||
cands->name = name;
|
||||
cands->kind = btf_kind(local_type);
|
||||
cands->name_len = local_essent_len;
|
||||
|
||||
cc = check_cand_cache(cands, vmlinux_cand_cache, VMLINUX_CAND_CACHE_SIZE);
|
||||
/* cands is a pointer to stack here */
|
||||
if (cc) {
|
||||
if (cc->cnt)
|
||||
return cc;
|
||||
goto check_modules;
|
||||
}
|
||||
|
||||
/* Attempt to find target candidates in vmlinux BTF first */
|
||||
cands = bpf_core_add_cands(cands, main_btf, 1);
|
||||
if (IS_ERR(cands))
|
||||
return cands;
|
||||
|
||||
/* cands is a pointer to kmalloced memory here if cands->cnt > 0 */
|
||||
|
||||
/* populate cache even when cands->cnt == 0 */
|
||||
cc = populate_cand_cache(cands, vmlinux_cand_cache, VMLINUX_CAND_CACHE_SIZE);
|
||||
if (IS_ERR(cc))
|
||||
return cc;
|
||||
|
||||
/* if vmlinux BTF has any candidate, don't go for module BTFs */
|
||||
if (cc->cnt)
|
||||
return cc;
|
||||
|
||||
check_modules:
|
||||
/* cands is a pointer to stack here and cands->cnt == 0 */
|
||||
cc = check_cand_cache(cands, module_cand_cache, MODULE_CAND_CACHE_SIZE);
|
||||
if (cc)
|
||||
/* if cache has it return it even if cc->cnt == 0 */
|
||||
return cc;
|
||||
|
||||
/* If candidate is not found in vmlinux's BTF then search in module's BTFs */
|
||||
spin_lock_bh(&btf_idr_lock);
|
||||
idr_for_each_entry(&btf_idr, mod_btf, id) {
|
||||
if (!btf_is_module(mod_btf))
|
||||
continue;
|
||||
/* linear search could be slow hence unlock/lock
|
||||
* the IDR to avoiding holding it for too long
|
||||
*/
|
||||
btf_get(mod_btf);
|
||||
spin_unlock_bh(&btf_idr_lock);
|
||||
cands = bpf_core_add_cands(cands, mod_btf, btf_nr_types(main_btf));
|
||||
if (IS_ERR(cands)) {
|
||||
btf_put(mod_btf);
|
||||
return cands;
|
||||
}
|
||||
spin_lock_bh(&btf_idr_lock);
|
||||
btf_put(mod_btf);
|
||||
}
|
||||
spin_unlock_bh(&btf_idr_lock);
|
||||
/* cands is a pointer to kmalloced memory here if cands->cnt > 0
|
||||
* or pointer to stack if cands->cnd == 0.
|
||||
* Copy it into the cache even when cands->cnt == 0 and
|
||||
* return the result.
|
||||
*/
|
||||
return populate_cand_cache(cands, module_cand_cache, MODULE_CAND_CACHE_SIZE);
|
||||
}
|
||||
|
||||
int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo,
|
||||
int relo_idx, void *insn)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
bool need_cands = relo->kind != BPF_CORE_TYPE_ID_LOCAL;
|
||||
struct bpf_core_cand_list cands = {};
|
||||
int err;
|
||||
|
||||
if (need_cands) {
|
||||
struct bpf_cand_cache *cc;
|
||||
int i;
|
||||
|
||||
mutex_lock(&cand_cache_mutex);
|
||||
cc = bpf_core_find_cands(ctx, relo->type_id);
|
||||
if (IS_ERR(cc)) {
|
||||
bpf_log(ctx->log, "target candidate search failed for %d\n",
|
||||
relo->type_id);
|
||||
err = PTR_ERR(cc);
|
||||
goto out;
|
||||
}
|
||||
if (cc->cnt) {
|
||||
cands.cands = kcalloc(cc->cnt, sizeof(*cands.cands), GFP_KERNEL);
|
||||
if (!cands.cands) {
|
||||
err = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
for (i = 0; i < cc->cnt; i++) {
|
||||
bpf_log(ctx->log,
|
||||
"CO-RE relocating %s %s: found target candidate [%d]\n",
|
||||
btf_kind_str[cc->kind], cc->name, cc->cands[i].id);
|
||||
cands.cands[i].btf = cc->cands[i].btf;
|
||||
cands.cands[i].id = cc->cands[i].id;
|
||||
}
|
||||
cands.len = cc->cnt;
|
||||
/* cand_cache_mutex needs to span the cache lookup and
|
||||
* copy of btf pointer into bpf_core_cand_list,
|
||||
* since module can be unloaded while bpf_core_apply_relo_insn
|
||||
* is working with module's btf.
|
||||
*/
|
||||
}
|
||||
|
||||
err = bpf_core_apply_relo_insn((void *)ctx->log, insn, relo->insn_off / 8,
|
||||
relo, relo_idx, ctx->btf, &cands);
|
||||
out:
|
||||
if (need_cands) {
|
||||
kfree(cands.cands);
|
||||
mutex_unlock(&cand_cache_mutex);
|
||||
if (ctx->log->level & BPF_LOG_LEVEL2)
|
||||
print_cand_cache(ctx->log);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче