2016-09-21 13:43:57 +03:00
|
|
|
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of version 2 of the GNU General Public
|
|
|
|
* License as published by the Free Software Foundation.
|
|
|
|
*/
|
|
|
|
#ifndef _LINUX_BPF_VERIFIER_H
|
|
|
|
#define _LINUX_BPF_VERIFIER_H 1
|
|
|
|
|
|
|
|
#include <linux/bpf.h> /* for enum bpf_reg_type */
|
|
|
|
#include <linux/filter.h> /* for MAX_BPF_STACK */
|
2017-08-07 17:26:19 +03:00
|
|
|
#include <linux/tnum.h>
|
2016-09-21 13:43:57 +03:00
|
|
|
|
2017-08-07 17:26:36 +03:00
|
|
|
/* Maximum variable offset umax_value permitted when resolving memory accesses.
|
|
|
|
* In practice this is far bigger than any realistic pointer offset; this limit
|
|
|
|
* ensures that umax_value + (int)off + (int)size cannot overflow a u64.
|
|
|
|
*/
|
2017-12-19 07:12:00 +03:00
|
|
|
#define BPF_MAX_VAR_OFF (1 << 29)
|
2017-08-07 17:26:36 +03:00
|
|
|
/* Maximum variable size permitted for ARG_CONST_SIZE[_OR_ZERO]. This ensures
|
|
|
|
* that converting umax_value to int cannot overflow.
|
|
|
|
*/
|
2017-12-19 07:12:00 +03:00
|
|
|
#define BPF_MAX_VAR_SIZ (1 << 29)
|
2016-09-28 17:54:32 +03:00
|
|
|
|
2017-08-23 17:11:21 +03:00
|
|
|
/* Liveness marks, used for registers and spilled-regs (in stack slots).
|
|
|
|
* Read marks propagate upwards until they find a write mark; they record that
|
|
|
|
* "one of this state's descendants read this reg" (and therefore the reg is
|
|
|
|
* relevant for states_equal() checks).
|
|
|
|
* Write marks collect downwards and do not propagate; they record that "the
|
|
|
|
* straight-line code that reached this state (from its parent) wrote this reg"
|
|
|
|
* (and therefore that reads propagated from this state or its descendants
|
|
|
|
* should not propagate to its parent).
|
|
|
|
* A state with a write mark can receive read marks; it just won't propagate
|
|
|
|
* them to its parent, since the write mark is a property, not of the state,
|
|
|
|
* but of the link between it and its parent. See mark_reg_read() and
|
|
|
|
* mark_stack_slot_read() in kernel/bpf/verifier.c.
|
|
|
|
*/
|
2017-08-15 22:34:35 +03:00
|
|
|
enum bpf_reg_liveness {
|
|
|
|
REG_LIVE_NONE = 0, /* reg hasn't been read or written this branch */
|
|
|
|
REG_LIVE_READ, /* reg was read, so we're sensitive to initial value */
|
|
|
|
REG_LIVE_WRITTEN, /* reg was written first, screening off later reads */
|
2018-12-13 22:42:34 +03:00
|
|
|
REG_LIVE_DONE = 4, /* liveness won't be updating this register anymore */
|
2017-08-15 22:34:35 +03:00
|
|
|
};
|
|
|
|
|
2016-09-21 13:43:57 +03:00
|
|
|
struct bpf_reg_state {
|
2018-08-22 22:02:19 +03:00
|
|
|
/* Ordering of fields matters. See states_equal() */
|
2016-09-21 13:43:57 +03:00
|
|
|
enum bpf_reg_type type;
|
|
|
|
union {
|
2017-08-07 17:26:19 +03:00
|
|
|
/* valid when type == PTR_TO_PACKET */
|
|
|
|
u16 range;
|
2016-09-21 13:43:57 +03:00
|
|
|
|
|
|
|
/* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
|
|
|
|
* PTR_TO_MAP_VALUE_OR_NULL
|
|
|
|
*/
|
|
|
|
struct bpf_map *map_ptr;
|
2018-11-01 02:05:52 +03:00
|
|
|
|
|
|
|
/* Max size from any of the above. */
|
|
|
|
unsigned long raw;
|
2016-09-21 13:43:57 +03:00
|
|
|
};
|
2017-08-07 17:26:19 +03:00
|
|
|
/* Fixed part of pointer offset, pointer types only */
|
|
|
|
s32 off;
|
|
|
|
/* For PTR_TO_PACKET, used to find other pointers with the same variable
|
|
|
|
* offset, so they can share range knowledge.
|
|
|
|
* For PTR_TO_MAP_VALUE_OR_NULL this is used to share which map value we
|
|
|
|
* came from, when one is tested for != NULL.
|
2018-10-02 23:35:33 +03:00
|
|
|
* For PTR_TO_SOCKET this is used to share which pointers retain the
|
|
|
|
* same reference to the socket, to determine proper reference freeing.
|
2017-08-07 17:26:19 +03:00
|
|
|
*/
|
2016-12-07 21:57:59 +03:00
|
|
|
u32 id;
|
2017-08-07 17:26:19 +03:00
|
|
|
/* For scalar types (SCALAR_VALUE), this represents our knowledge of
|
|
|
|
* the actual value.
|
|
|
|
* For pointer types, this represents the variable part of the offset
|
|
|
|
* from the pointed-to object, and is shared with all bpf_reg_states
|
|
|
|
* with the same id as us.
|
|
|
|
*/
|
|
|
|
struct tnum var_off;
|
2016-12-07 21:57:59 +03:00
|
|
|
/* Used to determine if any memory access using this register will
|
2017-08-07 17:26:19 +03:00
|
|
|
* result in a bad access.
|
|
|
|
* These refer to the same value as var_off, not necessarily the actual
|
|
|
|
* contents of the register.
|
2016-12-07 21:57:59 +03:00
|
|
|
*/
|
2017-08-07 17:26:36 +03:00
|
|
|
s64 smin_value; /* minimum possible (s64)value */
|
|
|
|
s64 smax_value; /* maximum possible (s64)value */
|
|
|
|
u64 umin_value; /* minimum possible (u64)value */
|
|
|
|
u64 umax_value; /* maximum possible (u64)value */
|
2018-08-22 22:02:19 +03:00
|
|
|
/* parentage chain for liveness checking */
|
|
|
|
struct bpf_reg_state *parent;
|
2017-12-15 04:55:06 +03:00
|
|
|
/* Inside the callee two registers can be both PTR_TO_STACK like
|
|
|
|
* R1=fp-8 and R2=fp-8, but one of them points to this function stack
|
|
|
|
* while another to the caller's stack. To differentiate them 'frameno'
|
|
|
|
* is used which is an index in bpf_verifier_state->frame[] array
|
|
|
|
* pointing to bpf_func_state.
|
|
|
|
*/
|
|
|
|
u32 frameno;
|
2017-08-15 22:34:35 +03:00
|
|
|
enum bpf_reg_liveness live;
|
2016-09-21 13:43:57 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
enum bpf_stack_slot_type {
|
|
|
|
STACK_INVALID, /* nothing was stored in this stack slot */
|
|
|
|
STACK_SPILL, /* register spilled into stack */
|
2017-12-15 04:55:08 +03:00
|
|
|
STACK_MISC, /* BPF program wrote some data into this slot */
|
|
|
|
STACK_ZERO, /* BPF program wrote constant zero */
|
2016-09-21 13:43:57 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
#define BPF_REG_SIZE 8 /* size of eBPF register in bytes */
|
|
|
|
|
2017-11-01 04:16:05 +03:00
|
|
|
struct bpf_stack_state {
|
|
|
|
struct bpf_reg_state spilled_ptr;
|
|
|
|
u8 slot_type[BPF_REG_SIZE];
|
|
|
|
};
|
|
|
|
|
2018-10-02 23:35:35 +03:00
|
|
|
struct bpf_reference_state {
|
|
|
|
/* Track each reference created with a unique id, even if the same
|
|
|
|
* instruction creates the reference multiple times (eg, via CALL).
|
|
|
|
*/
|
|
|
|
int id;
|
|
|
|
/* Instruction where the allocation of this reference occurred. This
|
|
|
|
* is used purely to inform the user of a reference leak.
|
|
|
|
*/
|
|
|
|
int insn_idx;
|
|
|
|
};
|
|
|
|
|
2016-09-21 13:43:57 +03:00
|
|
|
/* state of the program:
|
|
|
|
* type of all registers and stack info
|
|
|
|
*/
|
2017-12-15 04:55:06 +03:00
|
|
|
struct bpf_func_state {
|
2016-09-21 13:43:57 +03:00
|
|
|
struct bpf_reg_state regs[MAX_BPF_REG];
|
2017-12-15 04:55:06 +03:00
|
|
|
/* index of call instruction that called into this func */
|
|
|
|
int callsite;
|
|
|
|
/* stack frame number of this function state from pov of
|
|
|
|
* enclosing bpf_verifier_state.
|
|
|
|
* 0 = main function, 1 = first callee.
|
|
|
|
*/
|
|
|
|
u32 frameno;
|
|
|
|
/* subprog number == index within subprog_stack_depth
|
|
|
|
* zero == main subprog
|
|
|
|
*/
|
|
|
|
u32 subprogno;
|
|
|
|
|
2018-10-02 23:35:35 +03:00
|
|
|
/* The following fields should be last. See copy_func_state() */
|
|
|
|
int acquired_refs;
|
|
|
|
struct bpf_reference_state *refs;
|
2017-11-01 04:16:05 +03:00
|
|
|
int allocated_stack;
|
|
|
|
struct bpf_stack_state *stack;
|
2016-09-21 13:43:57 +03:00
|
|
|
};
|
|
|
|
|
2017-12-15 04:55:06 +03:00
|
|
|
#define MAX_CALL_FRAMES 8
|
|
|
|
struct bpf_verifier_state {
|
|
|
|
/* call stack tracking */
|
|
|
|
struct bpf_func_state *frame[MAX_CALL_FRAMES];
|
|
|
|
u32 curframe;
|
|
|
|
};
|
|
|
|
|
2018-10-02 23:35:29 +03:00
|
|
|
#define bpf_get_spilled_reg(slot, frame) \
|
|
|
|
(((slot < frame->allocated_stack / BPF_REG_SIZE) && \
|
|
|
|
(frame->stack[slot].slot_type[0] == STACK_SPILL)) \
|
|
|
|
? &frame->stack[slot].spilled_ptr : NULL)
|
|
|
|
|
|
|
|
/* Iterate over 'frame', setting 'reg' to either NULL or a spilled register. */
|
|
|
|
#define bpf_for_each_spilled_reg(iter, frame, reg) \
|
|
|
|
for (iter = 0, reg = bpf_get_spilled_reg(iter, frame); \
|
|
|
|
iter < frame->allocated_stack / BPF_REG_SIZE; \
|
|
|
|
iter++, reg = bpf_get_spilled_reg(iter, frame))
|
|
|
|
|
2016-09-21 13:43:57 +03:00
|
|
|
/* linked list of verifier states used to prune search */
|
|
|
|
struct bpf_verifier_state_list {
|
|
|
|
struct bpf_verifier_state state;
|
|
|
|
struct bpf_verifier_state_list *next;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct bpf_insn_aux_data {
|
2017-03-16 04:26:42 +03:00
|
|
|
union {
|
|
|
|
enum bpf_reg_type ptr_type; /* pointer type for load/store insns */
|
bpf: properly enforce index mask to prevent out-of-bounds speculation
While reviewing the verifier code, I recently noticed that the
following two program variants in relation to tail calls can be
loaded.
Variant 1:
# bpftool p d x i 15
0: (15) if r1 == 0x0 goto pc+3
1: (18) r2 = map[id:5]
3: (05) goto pc+2
4: (18) r2 = map[id:6]
6: (b7) r3 = 7
7: (35) if r3 >= 0xa0 goto pc+2
8: (54) (u32) r3 &= (u32) 255
9: (85) call bpf_tail_call#12
10: (b7) r0 = 1
11: (95) exit
# bpftool m s i 5
5: prog_array flags 0x0
key 4B value 4B max_entries 4 memlock 4096B
# bpftool m s i 6
6: prog_array flags 0x0
key 4B value 4B max_entries 160 memlock 4096B
Variant 2:
# bpftool p d x i 20
0: (15) if r1 == 0x0 goto pc+3
1: (18) r2 = map[id:8]
3: (05) goto pc+2
4: (18) r2 = map[id:7]
6: (b7) r3 = 7
7: (35) if r3 >= 0x4 goto pc+2
8: (54) (u32) r3 &= (u32) 3
9: (85) call bpf_tail_call#12
10: (b7) r0 = 1
11: (95) exit
# bpftool m s i 8
8: prog_array flags 0x0
key 4B value 4B max_entries 160 memlock 4096B
# bpftool m s i 7
7: prog_array flags 0x0
key 4B value 4B max_entries 4 memlock 4096B
In both cases the index masking inserted by the verifier in order
to control out of bounds speculation from a CPU via b2157399cc98
("bpf: prevent out-of-bounds speculation") seems to be incorrect
in what it is enforcing. In the 1st variant, the mask is applied
from the map with the significantly larger number of entries where
we would allow to a certain degree out of bounds speculation for
the smaller map, and in the 2nd variant where the mask is applied
from the map with the smaller number of entries, we get buggy
behavior since we truncate the index of the larger map.
The original intent from commit b2157399cc98 is to reject such
occasions where two or more different tail call maps are used
in the same tail call helper invocation. However, the check on
the BPF_MAP_PTR_POISON is never hit since we never poisoned the
saved pointer in the first place! We do this explicitly for map
lookups but in case of tail calls we basically used the tail
call map in insn_aux_data that was processed in the most recent
path which the verifier walked. Thus any prior path that stored
a pointer in insn_aux_data at the helper location was always
overridden.
Fix it by moving the map pointer poison logic into a small helper
that covers both BPF helpers with the same logic. After that in
fixup_bpf_calls() the poison check is then hit for tail calls
and the program rejected. Latter only happens in unprivileged
case since this is the *only* occasion where a rewrite needs to
happen, and where such rewrite is specific to the map (max_entries,
index_mask). In the privileged case the rewrite is generic for
the insn->imm / insn->code update so multiple maps from different
paths can be handled just fine since all the remaining logic
happens in the instruction processing itself. This is similar
to the case of map lookups: in case there is a collision of
maps in fixup_bpf_calls() we must skip the inlined rewrite since
this will turn the generic instruction sequence into a non-
generic one. Thus the patch_call_imm will simply update the
insn->imm location where the bpf_map_lookup_elem() will later
take care of the dispatch. Given we need this 'poison' state
as a check, the information of whether a map is an unpriv_array
gets lost, so enforcing it prior to that needs an additional
state. In general this check is needed since there are some
complex and tail call intensive BPF programs out there where
LLVM tends to generate such code occasionally. We therefore
convert the map_ptr rather into map_state to store all this
w/o extra memory overhead, and the bit whether one of the maps
involved in the collision was from an unpriv_array thus needs
to be retained as well there.
Fixes: b2157399cc98 ("bpf: prevent out-of-bounds speculation")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-05-24 03:32:53 +03:00
|
|
|
unsigned long map_state; /* pointer/poison value for maps */
|
2017-12-15 04:55:15 +03:00
|
|
|
s32 call_imm; /* saved imm field of call insn */
|
2017-03-16 04:26:42 +03:00
|
|
|
};
|
2017-06-23 01:07:39 +03:00
|
|
|
int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
|
2018-05-15 19:27:05 +03:00
|
|
|
int sanitize_stack_off; /* stack slot to be cleared */
|
2017-11-23 03:42:05 +03:00
|
|
|
bool seen; /* this insn was processed by the verifier */
|
2016-09-21 13:43:57 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
|
|
|
|
|
2017-10-09 20:30:15 +03:00
|
|
|
#define BPF_VERIFIER_TMP_LOG_SIZE 1024
|
|
|
|
|
2018-03-24 21:44:22 +03:00
|
|
|
struct bpf_verifier_log {
|
2017-10-09 20:30:10 +03:00
|
|
|
u32 level;
|
2017-10-09 20:30:15 +03:00
|
|
|
char kbuf[BPF_VERIFIER_TMP_LOG_SIZE];
|
2017-10-09 20:30:10 +03:00
|
|
|
char __user *ubuf;
|
|
|
|
u32 len_used;
|
|
|
|
u32 len_total;
|
|
|
|
};
|
|
|
|
|
2018-03-24 21:44:22 +03:00
|
|
|
static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log)
|
2017-10-09 20:30:10 +03:00
|
|
|
{
|
|
|
|
return log->len_used >= log->len_total - 1;
|
|
|
|
}
|
|
|
|
|
2018-03-24 21:44:23 +03:00
|
|
|
static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
|
|
|
|
{
|
|
|
|
return log->level && log->ubuf && !bpf_verifier_log_full(log);
|
|
|
|
}
|
|
|
|
|
2017-12-15 04:55:05 +03:00
|
|
|
#define BPF_MAX_SUBPROGS 256
|
|
|
|
|
2018-05-02 23:17:18 +03:00
|
|
|
struct bpf_subprog_info {
|
|
|
|
u32 start; /* insn idx of function entry point */
|
2018-12-08 03:42:25 +03:00
|
|
|
u32 linfo_idx; /* The idx to the main_prog->aux->linfo */
|
2018-05-02 23:17:18 +03:00
|
|
|
u16 stack_depth; /* max. stack depth used by this function */
|
|
|
|
};
|
|
|
|
|
2016-09-21 13:43:57 +03:00
|
|
|
/* single container for all structs
|
|
|
|
* one verifier_env per bpf_check() call
|
|
|
|
*/
|
|
|
|
struct bpf_verifier_env {
|
2019-01-03 02:58:27 +03:00
|
|
|
u32 insn_idx;
|
|
|
|
u32 prev_insn_idx;
|
2016-09-21 13:43:57 +03:00
|
|
|
struct bpf_prog *prog; /* eBPF program being verified */
|
2017-10-17 02:40:54 +03:00
|
|
|
const struct bpf_verifier_ops *ops;
|
2016-09-21 13:43:57 +03:00
|
|
|
struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */
|
|
|
|
int stack_size; /* number of states to be processed */
|
2017-05-10 21:38:07 +03:00
|
|
|
bool strict_alignment; /* perform strict pointer alignment checks */
|
2017-11-01 04:16:05 +03:00
|
|
|
struct bpf_verifier_state *cur_state; /* current verifier state */
|
2016-09-21 13:43:57 +03:00
|
|
|
struct bpf_verifier_state_list **explored_states; /* search pruning optimization */
|
|
|
|
struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
|
|
|
|
u32 used_map_cnt; /* number of used maps */
|
|
|
|
u32 id_gen; /* used to generate unique reg IDs */
|
|
|
|
bool allow_ptr_leaks;
|
|
|
|
bool seen_direct_write;
|
|
|
|
struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
|
2018-12-13 21:41:48 +03:00
|
|
|
const struct bpf_line_info *prev_linfo;
|
2018-03-24 21:44:22 +03:00
|
|
|
struct bpf_verifier_log log;
|
2018-05-02 23:17:18 +03:00
|
|
|
struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1];
|
2017-12-15 04:55:05 +03:00
|
|
|
u32 subprog_cnt;
|
2016-09-21 13:43:57 +03:00
|
|
|
};
|
|
|
|
|
2018-05-16 23:27:41 +03:00
|
|
|
__printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log,
|
|
|
|
const char *fmt, va_list args);
|
2018-01-10 15:26:06 +03:00
|
|
|
__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
|
|
|
|
const char *fmt, ...);
|
|
|
|
|
2018-10-02 23:35:35 +03:00
|
|
|
static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env)
|
2017-11-01 04:16:05 +03:00
|
|
|
{
|
2017-12-15 04:55:06 +03:00
|
|
|
struct bpf_verifier_state *cur = env->cur_state;
|
|
|
|
|
2018-10-02 23:35:35 +03:00
|
|
|
return cur->frame[cur->curframe];
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env)
|
|
|
|
{
|
|
|
|
return cur_func(env)->regs;
|
2017-11-01 04:16:05 +03:00
|
|
|
}
|
|
|
|
|
2018-11-09 16:03:31 +03:00
|
|
|
int bpf_prog_offload_verifier_prep(struct bpf_prog *prog);
|
2017-12-28 05:39:05 +03:00
|
|
|
int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env,
|
|
|
|
int insn_idx, int prev_insn_idx);
|
2018-10-07 14:56:47 +03:00
|
|
|
int bpf_prog_offload_finalize(struct bpf_verifier_env *env);
|
2017-11-03 23:56:17 +03:00
|
|
|
|
2016-09-21 13:43:57 +03:00
|
|
|
#endif /* _LINUX_BPF_VERIFIER_H */
|