2005-04-17 02:20:36 +04:00
|
|
|
/*
|
|
|
|
* Linux Socket Filter Data Structures
|
|
|
|
*/
|
|
|
|
#ifndef __LINUX_FILTER_H__
|
|
|
|
#define __LINUX_FILTER_H__
|
|
|
|
|
2011-07-27 03:09:06 +04:00
|
|
|
#include <linux/atomic.h>
|
2012-04-13 01:47:53 +04:00
|
|
|
#include <linux/compat.h>
|
2013-10-04 11:14:06 +04:00
|
|
|
#include <linux/workqueue.h>
|
2012-10-13 13:46:48 +04:00
|
|
|
#include <uapi/linux/filter.h>
|
2011-05-22 11:08:11 +04:00
|
|
|
|
2012-04-13 01:47:53 +04:00
|
|
|
#ifdef CONFIG_COMPAT
|
|
|
|
/*
|
|
|
|
* A struct sock_filter is architecture independent.
|
|
|
|
*/
|
|
|
|
struct compat_sock_fprog {
|
|
|
|
u16 len;
|
|
|
|
compat_uptr_t filter; /* struct sock_filter * */
|
|
|
|
};
|
|
|
|
#endif
|
|
|
|
|
2011-05-22 11:08:11 +04:00
|
|
|
struct sk_buff;
|
|
|
|
struct sock;
|
|
|
|
|
2008-04-10 12:33:47 +04:00
|
|
|
struct sk_filter
|
|
|
|
{
|
|
|
|
atomic_t refcnt;
|
|
|
|
unsigned int len; /* Number of filter blocks */
|
2013-10-04 11:14:06 +04:00
|
|
|
struct rcu_head rcu;
|
2011-04-20 13:27:32 +04:00
|
|
|
unsigned int (*bpf_func)(const struct sk_buff *skb,
|
|
|
|
const struct sock_filter *filter);
|
2013-10-04 11:14:06 +04:00
|
|
|
union {
|
|
|
|
struct sock_filter insns[0];
|
|
|
|
struct work_struct work;
|
|
|
|
};
|
2008-04-10 12:33:47 +04:00
|
|
|
};
|
|
|
|
|
2013-10-04 11:14:06 +04:00
|
|
|
static inline unsigned int sk_filter_size(unsigned int proglen)
|
2008-04-10 12:33:47 +04:00
|
|
|
{
|
2013-10-04 11:14:06 +04:00
|
|
|
return max(sizeof(struct sk_filter),
|
|
|
|
offsetof(struct sk_filter, insns[proglen]));
|
2008-04-10 12:33:47 +04:00
|
|
|
}
|
|
|
|
|
2008-04-10 12:43:09 +04:00
|
|
|
extern int sk_filter(struct sock *sk, struct sk_buff *skb);
|
2010-12-06 23:50:09 +03:00
|
|
|
extern unsigned int sk_run_filter(const struct sk_buff *skb,
|
2010-11-19 20:49:59 +03:00
|
|
|
const struct sock_filter *filter);
|
2012-03-31 15:01:19 +04:00
|
|
|
extern int sk_unattached_filter_create(struct sk_filter **pfp,
|
|
|
|
struct sock_fprog *fprog);
|
|
|
|
extern void sk_unattached_filter_destroy(struct sk_filter *fp);
|
2005-04-17 02:20:36 +04:00
|
|
|
extern int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
|
2007-10-18 08:21:26 +04:00
|
|
|
extern int sk_detach_filter(struct sock *sk);
|
2011-10-18 01:04:20 +04:00
|
|
|
extern int sk_chk_filter(struct sock_filter *filter, unsigned int flen);
|
sk-filter: Add ability to get socket filter program (v2)
The SO_ATTACH_FILTER option is set only. I propose to add the get
ability by using SO_ATTACH_FILTER in getsockopt. To be less
irritating to eyes the SO_GET_FILTER alias to it is declared. This
ability is required by checkpoint-restore project to be able to
save full state of a socket.
There are two issues with getting filter back.
First, kernel modifies the sock_filter->code on filter load, thus in
order to return the filter element back to user we have to decode it
into user-visible constants. Fortunately the modification in question
is interconvertible.
Second, the BPF_S_ALU_DIV_K code modifies the command argument k to
speed up the run-time division by doing kernel_k = reciprocal(user_k).
Bad news is that different user_k may result in same kernel_k, so we
can't get the original user_k back. Good news is that we don't have
to do it. What we need to is calculate a user2_k so, that
reciprocal(user2_k) == reciprocal(user_k) == kernel_k
i.e. if it's re-loaded back the compiled again value will be exactly
the same as it was. That said, the user2_k can be calculated like this
user2_k = reciprocal(kernel_k)
with an exception, that if kernel_k == 0, then user2_k == 1.
The optlen argument is treated like this -- when zero, kernel returns
the amount of sock_fprog elements in filter, otherwise it should be
large enough for the sock_fprog array.
changes since v1:
* Declared SO_GET_FILTER in all arch headers
* Added decode of vlan-tag codes
Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2012-11-01 06:01:48 +04:00
|
|
|
extern int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned len);
|
2013-06-05 17:30:55 +04:00
|
|
|
extern void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to);
|
2011-04-20 13:27:32 +04:00
|
|
|
|
|
|
|
#ifdef CONFIG_BPF_JIT
|
2013-05-02 00:24:08 +04:00
|
|
|
#include <stdarg.h>
|
2013-03-28 19:24:53 +04:00
|
|
|
#include <linux/linkage.h>
|
|
|
|
#include <linux/printk.h>
|
|
|
|
|
2011-04-20 13:27:32 +04:00
|
|
|
extern void bpf_jit_compile(struct sk_filter *fp);
|
|
|
|
extern void bpf_jit_free(struct sk_filter *fp);
|
2013-03-22 01:22:03 +04:00
|
|
|
|
|
|
|
static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen,
|
|
|
|
u32 pass, void *image)
|
|
|
|
{
|
2013-05-17 20:57:37 +04:00
|
|
|
pr_err("flen=%u proglen=%u pass=%u image=%pK\n",
|
2013-03-22 01:22:03 +04:00
|
|
|
flen, proglen, pass, image);
|
|
|
|
if (image)
|
2013-05-17 20:57:37 +04:00
|
|
|
print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_OFFSET,
|
2013-03-22 01:22:03 +04:00
|
|
|
16, 1, image, proglen, false);
|
|
|
|
}
|
2011-04-20 13:27:32 +04:00
|
|
|
#define SK_RUN_FILTER(FILTER, SKB) (*FILTER->bpf_func)(SKB, FILTER->insns)
|
|
|
|
#else
|
2013-10-04 11:14:06 +04:00
|
|
|
#include <linux/slab.h>
|
2011-04-20 13:27:32 +04:00
|
|
|
static inline void bpf_jit_compile(struct sk_filter *fp)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
static inline void bpf_jit_free(struct sk_filter *fp)
|
|
|
|
{
|
2013-10-04 11:14:06 +04:00
|
|
|
kfree(fp);
|
2011-04-20 13:27:32 +04:00
|
|
|
}
|
|
|
|
#define SK_RUN_FILTER(FILTER, SKB) sk_run_filter(SKB, FILTER->insns)
|
|
|
|
#endif
|
|
|
|
|
2014-01-17 20:09:45 +04:00
|
|
|
static inline int bpf_tell_extensions(void)
|
|
|
|
{
|
|
|
|
/* When adding new BPF extension it is necessary to enumerate
|
|
|
|
* it here, so userspace software which wants to know what is
|
|
|
|
* supported can do so by inspecting return value of this
|
|
|
|
* function
|
|
|
|
*/
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2011-04-20 13:27:32 +04:00
|
|
|
enum {
|
|
|
|
BPF_S_RET_K = 1,
|
|
|
|
BPF_S_RET_A,
|
|
|
|
BPF_S_ALU_ADD_K,
|
|
|
|
BPF_S_ALU_ADD_X,
|
|
|
|
BPF_S_ALU_SUB_K,
|
|
|
|
BPF_S_ALU_SUB_X,
|
|
|
|
BPF_S_ALU_MUL_K,
|
|
|
|
BPF_S_ALU_MUL_X,
|
|
|
|
BPF_S_ALU_DIV_X,
|
2012-09-08 02:03:35 +04:00
|
|
|
BPF_S_ALU_MOD_K,
|
|
|
|
BPF_S_ALU_MOD_X,
|
2011-04-20 13:27:32 +04:00
|
|
|
BPF_S_ALU_AND_K,
|
|
|
|
BPF_S_ALU_AND_X,
|
|
|
|
BPF_S_ALU_OR_K,
|
|
|
|
BPF_S_ALU_OR_X,
|
2012-09-24 06:23:59 +04:00
|
|
|
BPF_S_ALU_XOR_K,
|
|
|
|
BPF_S_ALU_XOR_X,
|
2011-04-20 13:27:32 +04:00
|
|
|
BPF_S_ALU_LSH_K,
|
|
|
|
BPF_S_ALU_LSH_X,
|
|
|
|
BPF_S_ALU_RSH_K,
|
|
|
|
BPF_S_ALU_RSH_X,
|
|
|
|
BPF_S_ALU_NEG,
|
|
|
|
BPF_S_LD_W_ABS,
|
|
|
|
BPF_S_LD_H_ABS,
|
|
|
|
BPF_S_LD_B_ABS,
|
|
|
|
BPF_S_LD_W_LEN,
|
|
|
|
BPF_S_LD_W_IND,
|
|
|
|
BPF_S_LD_H_IND,
|
|
|
|
BPF_S_LD_B_IND,
|
|
|
|
BPF_S_LD_IMM,
|
|
|
|
BPF_S_LDX_W_LEN,
|
|
|
|
BPF_S_LDX_B_MSH,
|
|
|
|
BPF_S_LDX_IMM,
|
|
|
|
BPF_S_MISC_TAX,
|
|
|
|
BPF_S_MISC_TXA,
|
|
|
|
BPF_S_ALU_DIV_K,
|
|
|
|
BPF_S_LD_MEM,
|
|
|
|
BPF_S_LDX_MEM,
|
|
|
|
BPF_S_ST,
|
|
|
|
BPF_S_STX,
|
|
|
|
BPF_S_JMP_JA,
|
|
|
|
BPF_S_JMP_JEQ_K,
|
|
|
|
BPF_S_JMP_JEQ_X,
|
|
|
|
BPF_S_JMP_JGE_K,
|
|
|
|
BPF_S_JMP_JGE_X,
|
|
|
|
BPF_S_JMP_JGT_K,
|
|
|
|
BPF_S_JMP_JGT_X,
|
|
|
|
BPF_S_JMP_JSET_K,
|
|
|
|
BPF_S_JMP_JSET_X,
|
|
|
|
/* Ancillary data */
|
|
|
|
BPF_S_ANC_PROTOCOL,
|
|
|
|
BPF_S_ANC_PKTTYPE,
|
|
|
|
BPF_S_ANC_IFINDEX,
|
|
|
|
BPF_S_ANC_NLATTR,
|
|
|
|
BPF_S_ANC_NLATTR_NEST,
|
|
|
|
BPF_S_ANC_MARK,
|
|
|
|
BPF_S_ANC_QUEUE,
|
|
|
|
BPF_S_ANC_HATYPE,
|
|
|
|
BPF_S_ANC_RXHASH,
|
|
|
|
BPF_S_ANC_CPU,
|
2012-03-31 15:01:20 +04:00
|
|
|
BPF_S_ANC_ALU_XOR_X,
|
2012-04-13 01:47:52 +04:00
|
|
|
BPF_S_ANC_SECCOMP_LD_W,
|
2012-10-27 06:26:17 +04:00
|
|
|
BPF_S_ANC_VLAN_TAG,
|
|
|
|
BPF_S_ANC_VLAN_TAG_PRESENT,
|
filter: add ANC_PAY_OFFSET instruction for loading payload start offset
It is very useful to do dynamic truncation of packets. In particular,
we're interested to push the necessary header bytes to the user space and
cut off user payload that should probably not be transferred for some reasons
(e.g. privacy, speed, or others). With the ancillary extension PAY_OFFSET,
we can load it into the accumulator, and return it. E.g. in bpfc syntax ...
ld #poff ; { 0x20, 0, 0, 0xfffff034 },
ret a ; { 0x16, 0, 0, 0x00000000 },
... as a filter will accomplish this without having to do a big hackery in
a BPF filter itself. Follow-up JIT implementations are welcome.
Thanks to Eric Dumazet for suggesting and discussing this during the
Netfilter Workshop in Copenhagen.
Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2013-03-19 10:39:31 +04:00
|
|
|
BPF_S_ANC_PAY_OFFSET,
|
2011-04-20 13:27:32 +04:00
|
|
|
};
|
|
|
|
|
2005-04-17 02:20:36 +04:00
|
|
|
#endif /* __LINUX_FILTER_H__ */
|