Merge branch 'bpf-support-for-sockets'
David Ahern says: ==================== net: Add bpf support for sockets The recently added VRF support in Linux leverages the bind-to-device API for programs to specify an L3 domain for a socket. While SO_BINDTODEVICE has been around for ages, not every ipv4/ipv6 capable program has support for it. Even for those programs that do support it, the API requires processes to be started as root (CAP_NET_RAW) which is not desirable from a general security perspective. This patch set leverages Daniel Mack's work to attach bpf programs to a cgroup to provide a capability to set sk_bound_dev_if for all AF_INET{6} sockets opened by a process in a cgroup when the sockets are allocated. For example: 1. configure vrf (e.g., using ifupdown2) auto eth0 iface eth0 inet dhcp vrf mgmt auto mgmt iface mgmt vrf-table auto 2. configure cgroup mount -t cgroup2 none /tmp/cgroupv2 mkdir /tmp/cgroupv2/mgmt test_cgrp2_sock /tmp/cgroupv2/mgmt 15 3. set shell into cgroup (e.g., can be done at login using pam) echo $$ >> /tmp/cgroupv2/mgmt/cgroup.procs At this point all commands run in the shell (e.g, apt) have sockets automatically bound to the VRF (see output of ss -ap 'dev == <vrf>'), including processes not running as root. This capability enables running any program in a VRF context and is key to deploying Management VRF, a fundamental configuration for networking gear, with any Linux OS installation. This patchset also exports the socket family, type and protocol as read-only allowing bpf filters to deny a process in a cgroup the ability to open specific types of AF_INET or AF_INET6 sockets. v7 - comments from Alexei v6 - add export of socket family, type and protocol ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Коммит
b5b5eca9aa
|
@ -36,31 +36,44 @@ void cgroup_bpf_update(struct cgroup *cgrp,
|
|||
struct bpf_prog *prog,
|
||||
enum bpf_attach_type type);
|
||||
|
||||
int __cgroup_bpf_run_filter(struct sock *sk,
|
||||
struct sk_buff *skb,
|
||||
enum bpf_attach_type type);
|
||||
int __cgroup_bpf_run_filter_skb(struct sock *sk,
|
||||
struct sk_buff *skb,
|
||||
enum bpf_attach_type type);
|
||||
|
||||
/* Wrappers for __cgroup_bpf_run_filter() guarded by cgroup_bpf_enabled. */
|
||||
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) \
|
||||
({ \
|
||||
int __ret = 0; \
|
||||
if (cgroup_bpf_enabled) \
|
||||
__ret = __cgroup_bpf_run_filter(sk, skb, \
|
||||
BPF_CGROUP_INET_INGRESS); \
|
||||
\
|
||||
__ret; \
|
||||
int __cgroup_bpf_run_filter_sk(struct sock *sk,
|
||||
enum bpf_attach_type type);
|
||||
|
||||
/* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
|
||||
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \
|
||||
({ \
|
||||
int __ret = 0; \
|
||||
if (cgroup_bpf_enabled) \
|
||||
__ret = __cgroup_bpf_run_filter_skb(sk, skb, \
|
||||
BPF_CGROUP_INET_INGRESS); \
|
||||
\
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) \
|
||||
({ \
|
||||
int __ret = 0; \
|
||||
if (cgroup_bpf_enabled && sk && sk == skb->sk) { \
|
||||
typeof(sk) __sk = sk_to_full_sk(sk); \
|
||||
if (sk_fullsock(__sk)) \
|
||||
__ret = __cgroup_bpf_run_filter(__sk, skb, \
|
||||
BPF_CGROUP_INET_EGRESS); \
|
||||
} \
|
||||
__ret; \
|
||||
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb) \
|
||||
({ \
|
||||
int __ret = 0; \
|
||||
if (cgroup_bpf_enabled && sk && sk == skb->sk) { \
|
||||
typeof(sk) __sk = sk_to_full_sk(sk); \
|
||||
if (sk_fullsock(__sk)) \
|
||||
__ret = __cgroup_bpf_run_filter_skb(__sk, skb, \
|
||||
BPF_CGROUP_INET_EGRESS); \
|
||||
} \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \
|
||||
({ \
|
||||
int __ret = 0; \
|
||||
if (cgroup_bpf_enabled && sk) { \
|
||||
__ret = __cgroup_bpf_run_filter_sk(sk, \
|
||||
BPF_CGROUP_INET_SOCK_CREATE); \
|
||||
} \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#else
|
||||
|
@ -72,6 +85,7 @@ static inline void cgroup_bpf_inherit(struct cgroup *cgrp,
|
|||
|
||||
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
|
||||
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
|
||||
#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
|
||||
|
||||
#endif /* CONFIG_CGROUP_BPF */
|
||||
|
||||
|
|
|
@ -389,6 +389,21 @@ struct sock {
|
|||
* Because of non atomicity rules, all
|
||||
* changes are protected by socket lock.
|
||||
*/
|
||||
unsigned int __sk_flags_offset[0];
|
||||
#ifdef __BIG_ENDIAN_BITFIELD
|
||||
#define SK_FL_PROTO_SHIFT 16
|
||||
#define SK_FL_PROTO_MASK 0x00ff0000
|
||||
|
||||
#define SK_FL_TYPE_SHIFT 0
|
||||
#define SK_FL_TYPE_MASK 0x0000ffff
|
||||
#else
|
||||
#define SK_FL_PROTO_SHIFT 8
|
||||
#define SK_FL_PROTO_MASK 0x0000ff00
|
||||
|
||||
#define SK_FL_TYPE_SHIFT 16
|
||||
#define SK_FL_TYPE_MASK 0xffff0000
|
||||
#endif
|
||||
|
||||
kmemcheck_bitfield_begin(flags);
|
||||
unsigned int sk_padding : 2,
|
||||
sk_no_check_tx : 1,
|
||||
|
|
|
@ -101,6 +101,7 @@ enum bpf_prog_type {
|
|||
BPF_PROG_TYPE_XDP,
|
||||
BPF_PROG_TYPE_PERF_EVENT,
|
||||
BPF_PROG_TYPE_CGROUP_SKB,
|
||||
BPF_PROG_TYPE_CGROUP_SOCK,
|
||||
BPF_PROG_TYPE_LWT_IN,
|
||||
BPF_PROG_TYPE_LWT_OUT,
|
||||
BPF_PROG_TYPE_LWT_XMIT,
|
||||
|
@ -109,6 +110,7 @@ enum bpf_prog_type {
|
|||
enum bpf_attach_type {
|
||||
BPF_CGROUP_INET_INGRESS,
|
||||
BPF_CGROUP_INET_EGRESS,
|
||||
BPF_CGROUP_INET_SOCK_CREATE,
|
||||
__MAX_BPF_ATTACH_TYPE
|
||||
};
|
||||
|
||||
|
@ -567,6 +569,13 @@ enum bpf_ret_code {
|
|||
/* >127 are reserved for prog type specific return codes */
|
||||
};
|
||||
|
||||
struct bpf_sock {
|
||||
__u32 bound_dev_if;
|
||||
__u32 family;
|
||||
__u32 type;
|
||||
__u32 protocol;
|
||||
};
|
||||
|
||||
/* User return codes for XDP prog type.
|
||||
* A valid XDP program must return one of these defined values. All other
|
||||
* return codes are reserved for future use. Unknown return codes will result
|
||||
|
|
|
@ -118,7 +118,7 @@ void __cgroup_bpf_update(struct cgroup *cgrp,
|
|||
}
|
||||
|
||||
/**
|
||||
* __cgroup_bpf_run_filter() - Run a program for packet filtering
|
||||
* __cgroup_bpf_run_filter_skb() - Run a program for packet filtering
|
||||
* @sk: The socken sending or receiving traffic
|
||||
* @skb: The skb that is being sent or received
|
||||
* @type: The type of program to be exectuted
|
||||
|
@ -132,9 +132,9 @@ void __cgroup_bpf_update(struct cgroup *cgrp,
|
|||
* This function will return %-EPERM if any if an attached program was found
|
||||
* and if it returned != 1 during execution. In all other cases, 0 is returned.
|
||||
*/
|
||||
int __cgroup_bpf_run_filter(struct sock *sk,
|
||||
struct sk_buff *skb,
|
||||
enum bpf_attach_type type)
|
||||
int __cgroup_bpf_run_filter_skb(struct sock *sk,
|
||||
struct sk_buff *skb,
|
||||
enum bpf_attach_type type)
|
||||
{
|
||||
struct bpf_prog *prog;
|
||||
struct cgroup *cgrp;
|
||||
|
@ -164,4 +164,37 @@ int __cgroup_bpf_run_filter(struct sock *sk,
|
|||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(__cgroup_bpf_run_filter);
|
||||
EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
|
||||
|
||||
/**
|
||||
* __cgroup_bpf_run_filter_sk() - Run a program on a sock
|
||||
* @sk: sock structure to manipulate
|
||||
* @type: The type of program to be exectuted
|
||||
*
|
||||
* socket is passed is expected to be of type INET or INET6.
|
||||
*
|
||||
* The program type passed in via @type must be suitable for sock
|
||||
* filtering. No further check is performed to assert that.
|
||||
*
|
||||
* This function will return %-EPERM if any if an attached program was found
|
||||
* and if it returned != 1 during execution. In all other cases, 0 is returned.
|
||||
*/
|
||||
int __cgroup_bpf_run_filter_sk(struct sock *sk,
|
||||
enum bpf_attach_type type)
|
||||
{
|
||||
struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
|
||||
struct bpf_prog *prog;
|
||||
int ret = 0;
|
||||
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
prog = rcu_dereference(cgrp->bpf.effective[type]);
|
||||
if (prog)
|
||||
ret = BPF_PROG_RUN(prog, sk) == 1 ? 0 : -EPERM;
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
|
||||
|
|
|
@ -856,6 +856,7 @@ static int bpf_prog_attach(const union bpf_attr *attr)
|
|||
{
|
||||
struct bpf_prog *prog;
|
||||
struct cgroup *cgrp;
|
||||
enum bpf_prog_type ptype;
|
||||
|
||||
if (!capable(CAP_NET_ADMIN))
|
||||
return -EPERM;
|
||||
|
@ -866,25 +867,28 @@ static int bpf_prog_attach(const union bpf_attr *attr)
|
|||
switch (attr->attach_type) {
|
||||
case BPF_CGROUP_INET_INGRESS:
|
||||
case BPF_CGROUP_INET_EGRESS:
|
||||
prog = bpf_prog_get_type(attr->attach_bpf_fd,
|
||||
BPF_PROG_TYPE_CGROUP_SKB);
|
||||
if (IS_ERR(prog))
|
||||
return PTR_ERR(prog);
|
||||
|
||||
cgrp = cgroup_get_from_fd(attr->target_fd);
|
||||
if (IS_ERR(cgrp)) {
|
||||
bpf_prog_put(prog);
|
||||
return PTR_ERR(cgrp);
|
||||
}
|
||||
|
||||
cgroup_bpf_update(cgrp, prog, attr->attach_type);
|
||||
cgroup_put(cgrp);
|
||||
ptype = BPF_PROG_TYPE_CGROUP_SKB;
|
||||
break;
|
||||
case BPF_CGROUP_INET_SOCK_CREATE:
|
||||
ptype = BPF_PROG_TYPE_CGROUP_SOCK;
|
||||
break;
|
||||
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
|
||||
if (IS_ERR(prog))
|
||||
return PTR_ERR(prog);
|
||||
|
||||
cgrp = cgroup_get_from_fd(attr->target_fd);
|
||||
if (IS_ERR(cgrp)) {
|
||||
bpf_prog_put(prog);
|
||||
return PTR_ERR(cgrp);
|
||||
}
|
||||
|
||||
cgroup_bpf_update(cgrp, prog, attr->attach_type);
|
||||
cgroup_put(cgrp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -903,6 +907,7 @@ static int bpf_prog_detach(const union bpf_attr *attr)
|
|||
switch (attr->attach_type) {
|
||||
case BPF_CGROUP_INET_INGRESS:
|
||||
case BPF_CGROUP_INET_EGRESS:
|
||||
case BPF_CGROUP_INET_SOCK_CREATE:
|
||||
cgrp = cgroup_get_from_fd(attr->target_fd);
|
||||
if (IS_ERR(cgrp))
|
||||
return PTR_ERR(cgrp);
|
||||
|
|
|
@ -2818,6 +2818,32 @@ static bool lwt_is_valid_access(int off, int size,
|
|||
return __is_valid_access(off, size, type);
|
||||
}
|
||||
|
||||
static bool sock_filter_is_valid_access(int off, int size,
|
||||
enum bpf_access_type type,
|
||||
enum bpf_reg_type *reg_type)
|
||||
{
|
||||
if (type == BPF_WRITE) {
|
||||
switch (off) {
|
||||
case offsetof(struct bpf_sock, bound_dev_if):
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (off < 0 || off + size > sizeof(struct bpf_sock))
|
||||
return false;
|
||||
|
||||
/* The verifier guarantees that size > 0. */
|
||||
if (off % size != 0)
|
||||
return false;
|
||||
|
||||
if (size != sizeof(__u32))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
|
||||
const struct bpf_prog *prog)
|
||||
{
|
||||
|
@ -3076,6 +3102,51 @@ static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg,
|
|||
return insn - insn_buf;
|
||||
}
|
||||
|
||||
static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
|
||||
int dst_reg, int src_reg,
|
||||
int ctx_off,
|
||||
struct bpf_insn *insn_buf,
|
||||
struct bpf_prog *prog)
|
||||
{
|
||||
struct bpf_insn *insn = insn_buf;
|
||||
|
||||
switch (ctx_off) {
|
||||
case offsetof(struct bpf_sock, bound_dev_if):
|
||||
BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_bound_dev_if) != 4);
|
||||
|
||||
if (type == BPF_WRITE)
|
||||
*insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg,
|
||||
offsetof(struct sock, sk_bound_dev_if));
|
||||
else
|
||||
*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
|
||||
offsetof(struct sock, sk_bound_dev_if));
|
||||
break;
|
||||
|
||||
case offsetof(struct bpf_sock, family):
|
||||
BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_family) != 2);
|
||||
|
||||
*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
|
||||
offsetof(struct sock, sk_family));
|
||||
break;
|
||||
|
||||
case offsetof(struct bpf_sock, type):
|
||||
*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
|
||||
offsetof(struct sock, __sk_flags_offset));
|
||||
*insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, SK_FL_TYPE_MASK);
|
||||
*insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, SK_FL_TYPE_SHIFT);
|
||||
break;
|
||||
|
||||
case offsetof(struct bpf_sock, protocol):
|
||||
*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
|
||||
offsetof(struct sock, __sk_flags_offset));
|
||||
*insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, SK_FL_PROTO_MASK);
|
||||
*insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, SK_FL_PROTO_SHIFT);
|
||||
break;
|
||||
}
|
||||
|
||||
return insn - insn_buf;
|
||||
}
|
||||
|
||||
static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type, int dst_reg,
|
||||
int src_reg, int ctx_off,
|
||||
struct bpf_insn *insn_buf,
|
||||
|
@ -3162,6 +3233,12 @@ static const struct bpf_verifier_ops lwt_xmit_ops = {
|
|||
.gen_prologue = tc_cls_act_prologue,
|
||||
};
|
||||
|
||||
static const struct bpf_verifier_ops cg_sock_ops = {
|
||||
.get_func_proto = sk_filter_func_proto,
|
||||
.is_valid_access = sock_filter_is_valid_access,
|
||||
.convert_ctx_access = sock_filter_convert_ctx_access,
|
||||
};
|
||||
|
||||
static struct bpf_prog_type_list sk_filter_type __read_mostly = {
|
||||
.ops = &sk_filter_ops,
|
||||
.type = BPF_PROG_TYPE_SOCKET_FILTER,
|
||||
|
@ -3202,6 +3279,11 @@ static struct bpf_prog_type_list lwt_xmit_type __read_mostly = {
|
|||
.type = BPF_PROG_TYPE_LWT_XMIT,
|
||||
};
|
||||
|
||||
static struct bpf_prog_type_list cg_sock_type __read_mostly = {
|
||||
.ops = &cg_sock_ops,
|
||||
.type = BPF_PROG_TYPE_CGROUP_SOCK
|
||||
};
|
||||
|
||||
static int __init register_sk_filter_ops(void)
|
||||
{
|
||||
bpf_register_prog_type(&sk_filter_type);
|
||||
|
@ -3209,6 +3291,7 @@ static int __init register_sk_filter_ops(void)
|
|||
bpf_register_prog_type(&sched_act_type);
|
||||
bpf_register_prog_type(&xdp_type);
|
||||
bpf_register_prog_type(&cg_skb_type);
|
||||
bpf_register_prog_type(&cg_sock_type);
|
||||
bpf_register_prog_type(&lwt_in_type);
|
||||
bpf_register_prog_type(&lwt_out_type);
|
||||
bpf_register_prog_type(&lwt_xmit_type);
|
||||
|
|
|
@ -374,8 +374,18 @@ lookup_protocol:
|
|||
|
||||
if (sk->sk_prot->init) {
|
||||
err = sk->sk_prot->init(sk);
|
||||
if (err)
|
||||
if (err) {
|
||||
sk_common_release(sk);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
if (!kern) {
|
||||
err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
|
||||
if (err) {
|
||||
sk_common_release(sk);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
out:
|
||||
return err;
|
||||
|
|
|
@ -258,6 +258,14 @@ lookup_protocol:
|
|||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
if (!kern) {
|
||||
err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk);
|
||||
if (err) {
|
||||
sk_common_release(sk);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
out:
|
||||
return err;
|
||||
out_rcu_unlock:
|
||||
|
|
|
@ -23,6 +23,8 @@ hostprogs-y += map_perf_test
|
|||
hostprogs-y += test_overhead
|
||||
hostprogs-y += test_cgrp2_array_pin
|
||||
hostprogs-y += test_cgrp2_attach
|
||||
hostprogs-y += test_cgrp2_sock
|
||||
hostprogs-y += test_cgrp2_sock2
|
||||
hostprogs-y += xdp1
|
||||
hostprogs-y += xdp2
|
||||
hostprogs-y += test_current_task_under_cgroup
|
||||
|
@ -52,6 +54,8 @@ map_perf_test-objs := bpf_load.o libbpf.o map_perf_test_user.o
|
|||
test_overhead-objs := bpf_load.o libbpf.o test_overhead_user.o
|
||||
test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o
|
||||
test_cgrp2_attach-objs := libbpf.o test_cgrp2_attach.o
|
||||
test_cgrp2_sock-objs := libbpf.o test_cgrp2_sock.o
|
||||
test_cgrp2_sock2-objs := bpf_load.o libbpf.o test_cgrp2_sock2.o
|
||||
xdp1-objs := bpf_load.o libbpf.o xdp1_user.o
|
||||
# reuse xdp1 source intentionally
|
||||
xdp2-objs := bpf_load.o libbpf.o xdp1_user.o
|
||||
|
@ -73,6 +77,7 @@ always += tracex3_kern.o
|
|||
always += tracex4_kern.o
|
||||
always += tracex5_kern.o
|
||||
always += tracex6_kern.o
|
||||
always += sock_flags_kern.o
|
||||
always += test_probe_write_user_kern.o
|
||||
always += trace_output_kern.o
|
||||
always += tcbpf1_kern.o
|
||||
|
@ -107,6 +112,7 @@ HOSTLOADLIBES_tracex3 += -lelf
|
|||
HOSTLOADLIBES_tracex4 += -lelf -lrt
|
||||
HOSTLOADLIBES_tracex5 += -lelf
|
||||
HOSTLOADLIBES_tracex6 += -lelf
|
||||
HOSTLOADLIBES_test_cgrp2_sock2 += -lelf
|
||||
HOSTLOADLIBES_test_probe_write_user += -lelf
|
||||
HOSTLOADLIBES_trace_output += -lelf -lrt
|
||||
HOSTLOADLIBES_lathist += -lelf
|
||||
|
|
|
@ -52,6 +52,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
|
|||
bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0;
|
||||
bool is_xdp = strncmp(event, "xdp", 3) == 0;
|
||||
bool is_perf_event = strncmp(event, "perf_event", 10) == 0;
|
||||
bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0;
|
||||
bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0;
|
||||
enum bpf_prog_type prog_type;
|
||||
char buf[256];
|
||||
int fd, efd, err, id;
|
||||
|
@ -72,6 +74,10 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
|
|||
prog_type = BPF_PROG_TYPE_XDP;
|
||||
} else if (is_perf_event) {
|
||||
prog_type = BPF_PROG_TYPE_PERF_EVENT;
|
||||
} else if (is_cgroup_skb) {
|
||||
prog_type = BPF_PROG_TYPE_CGROUP_SKB;
|
||||
} else if (is_cgroup_sk) {
|
||||
prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
|
||||
} else {
|
||||
printf("Unknown event '%s'\n", event);
|
||||
return -1;
|
||||
|
@ -85,7 +91,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
|
|||
|
||||
prog_fd[prog_cnt++] = fd;
|
||||
|
||||
if (is_xdp || is_perf_event)
|
||||
if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk)
|
||||
return 0;
|
||||
|
||||
if (is_socket) {
|
||||
|
@ -334,7 +340,8 @@ int load_bpf_file(char *path)
|
|||
memcmp(shname_prog, "tracepoint/", 11) == 0 ||
|
||||
memcmp(shname_prog, "xdp", 3) == 0 ||
|
||||
memcmp(shname_prog, "perf_event", 10) == 0 ||
|
||||
memcmp(shname_prog, "socket", 6) == 0)
|
||||
memcmp(shname_prog, "socket", 6) == 0 ||
|
||||
memcmp(shname_prog, "cgroup/", 7) == 0)
|
||||
load_and_attach(shname_prog, insns, data_prog->d_size);
|
||||
}
|
||||
}
|
||||
|
@ -353,7 +360,8 @@ int load_bpf_file(char *path)
|
|||
memcmp(shname, "tracepoint/", 11) == 0 ||
|
||||
memcmp(shname, "xdp", 3) == 0 ||
|
||||
memcmp(shname, "perf_event", 10) == 0 ||
|
||||
memcmp(shname, "socket", 6) == 0)
|
||||
memcmp(shname, "socket", 6) == 0 ||
|
||||
memcmp(shname, "cgroup/", 7) == 0)
|
||||
load_and_attach(shname, data->d_buf, data->d_size);
|
||||
}
|
||||
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
extern int map_fd[MAX_MAPS];
|
||||
extern int prog_fd[MAX_PROGS];
|
||||
extern int event_fd[MAX_PROGS];
|
||||
extern int prog_cnt;
|
||||
|
||||
/* parses elf file compiled by llvm .c->.o
|
||||
* . parses 'maps' section and creates maps via BPF syscall
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
#include <uapi/linux/bpf.h>
|
||||
#include <linux/socket.h>
|
||||
#include <linux/net.h>
|
||||
#include <uapi/linux/in.h>
|
||||
#include <uapi/linux/in6.h>
|
||||
#include "bpf_helpers.h"
|
||||
|
||||
SEC("cgroup/sock1")
|
||||
int bpf_prog1(struct bpf_sock *sk)
|
||||
{
|
||||
char fmt[] = "socket: family %d type %d protocol %d\n";
|
||||
|
||||
bpf_trace_printk(fmt, sizeof(fmt), sk->family, sk->type, sk->protocol);
|
||||
|
||||
/* block PF_INET6, SOCK_RAW, IPPROTO_ICMPV6 sockets
|
||||
* ie., make ping6 fail
|
||||
*/
|
||||
if (sk->family == PF_INET6 &&
|
||||
sk->type == SOCK_RAW &&
|
||||
sk->protocol == IPPROTO_ICMPV6)
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
SEC("cgroup/sock2")
|
||||
int bpf_prog2(struct bpf_sock *sk)
|
||||
{
|
||||
char fmt[] = "socket: family %d type %d protocol %d\n";
|
||||
|
||||
bpf_trace_printk(fmt, sizeof(fmt), sk->family, sk->type, sk->protocol);
|
||||
|
||||
/* block PF_INET, SOCK_RAW, IPPROTO_ICMP sockets
|
||||
* ie., make ping fail
|
||||
*/
|
||||
if (sk->family == PF_INET &&
|
||||
sk->type == SOCK_RAW &&
|
||||
sk->protocol == IPPROTO_ICMP)
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
|
@ -0,0 +1,83 @@
|
|||
/* eBPF example program:
|
||||
*
|
||||
* - Loads eBPF program
|
||||
*
|
||||
* The eBPF program sets the sk_bound_dev_if index in new AF_INET{6}
|
||||
* sockets opened by processes in the cgroup.
|
||||
*
|
||||
* - Attaches the new program to a cgroup using BPF_PROG_ATTACH
|
||||
*/
|
||||
|
||||
#define _GNU_SOURCE
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <net/if.h>
|
||||
#include <linux/bpf.h>
|
||||
|
||||
#include "libbpf.h"
|
||||
|
||||
static int prog_load(int idx)
|
||||
{
|
||||
struct bpf_insn prog[] = {
|
||||
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
||||
BPF_MOV64_IMM(BPF_REG_3, idx),
|
||||
BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, bound_dev_if)),
|
||||
BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, bound_dev_if)),
|
||||
BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */
|
||||
BPF_EXIT_INSN(),
|
||||
};
|
||||
|
||||
return bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, prog, sizeof(prog),
|
||||
"GPL", 0);
|
||||
}
|
||||
|
||||
static int usage(const char *argv0)
|
||||
{
|
||||
printf("Usage: %s cg-path device-index\n", argv0);
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int cg_fd, prog_fd, ret;
|
||||
unsigned int idx;
|
||||
|
||||
if (argc < 2)
|
||||
return usage(argv[0]);
|
||||
|
||||
idx = if_nametoindex(argv[2]);
|
||||
if (!idx) {
|
||||
printf("Invalid device name\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
cg_fd = open(argv[1], O_DIRECTORY | O_RDONLY);
|
||||
if (cg_fd < 0) {
|
||||
printf("Failed to open cgroup path: '%s'\n", strerror(errno));
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
prog_fd = prog_load(idx);
|
||||
printf("Output from kernel verifier:\n%s\n-------\n", bpf_log_buf);
|
||||
|
||||
if (prog_fd < 0) {
|
||||
printf("Failed to load prog: '%s'\n", strerror(errno));
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
ret = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE);
|
||||
if (ret < 0) {
|
||||
printf("Failed to attach prog to cgroup: '%s'\n",
|
||||
strerror(errno));
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
#!/bin/bash
|
||||
|
||||
function config_device {
|
||||
ip netns add at_ns0
|
||||
ip link add veth0 type veth peer name veth0b
|
||||
ip link set veth0b up
|
||||
ip link set veth0 netns at_ns0
|
||||
ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0
|
||||
ip netns exec at_ns0 ip addr add 2401:db00::1/64 dev veth0 nodad
|
||||
ip netns exec at_ns0 ip link set dev veth0 up
|
||||
ip link add foo type vrf table 1234
|
||||
ip link set foo up
|
||||
ip addr add 172.16.1.101/24 dev veth0b
|
||||
ip addr add 2401:db00::2/64 dev veth0b nodad
|
||||
ip link set veth0b master foo
|
||||
}
|
||||
|
||||
function attach_bpf {
|
||||
rm -rf /tmp/cgroupv2
|
||||
mkdir -p /tmp/cgroupv2
|
||||
mount -t cgroup2 none /tmp/cgroupv2
|
||||
mkdir -p /tmp/cgroupv2/foo
|
||||
test_cgrp2_sock /tmp/cgroupv2/foo foo
|
||||
echo $$ >> /tmp/cgroupv2/foo/cgroup.procs
|
||||
}
|
||||
|
||||
function cleanup {
|
||||
set +ex
|
||||
ip netns delete at_ns0
|
||||
ip link del veth0
|
||||
ip link del foo
|
||||
umount /tmp/cgroupv2
|
||||
rm -rf /tmp/cgroupv2
|
||||
set -ex
|
||||
}
|
||||
|
||||
function do_test {
|
||||
ping -c1 -w1 172.16.1.100
|
||||
ping6 -c1 -w1 2401:db00::1
|
||||
}
|
||||
|
||||
cleanup 2>/dev/null
|
||||
config_device
|
||||
attach_bpf
|
||||
do_test
|
||||
cleanup
|
||||
echo "*** PASS ***"
|
|
@ -0,0 +1,66 @@
|
|||
/* eBPF example program:
|
||||
*
|
||||
* - Loads eBPF program
|
||||
*
|
||||
* The eBPF program loads a filter from file and attaches the
|
||||
* program to a cgroup using BPF_PROG_ATTACH
|
||||
*/
|
||||
|
||||
#define _GNU_SOURCE
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <net/if.h>
|
||||
#include <linux/bpf.h>
|
||||
|
||||
#include "libbpf.h"
|
||||
#include "bpf_load.h"
|
||||
|
||||
static int usage(const char *argv0)
|
||||
{
|
||||
printf("Usage: %s cg-path filter-path [filter-id]\n", argv0);
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int cg_fd, ret, filter_id = 0;
|
||||
|
||||
if (argc < 3)
|
||||
return usage(argv[0]);
|
||||
|
||||
cg_fd = open(argv[1], O_DIRECTORY | O_RDONLY);
|
||||
if (cg_fd < 0) {
|
||||
printf("Failed to open cgroup path: '%s'\n", strerror(errno));
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
if (load_bpf_file(argv[2]))
|
||||
return EXIT_FAILURE;
|
||||
|
||||
printf("Output from kernel verifier:\n%s\n-------\n", bpf_log_buf);
|
||||
|
||||
if (argc > 3)
|
||||
filter_id = atoi(argv[3]);
|
||||
|
||||
if (filter_id > prog_cnt) {
|
||||
printf("Invalid program id; program not found in file\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
ret = bpf_prog_attach(prog_fd[filter_id], cg_fd,
|
||||
BPF_CGROUP_INET_SOCK_CREATE);
|
||||
if (ret < 0) {
|
||||
printf("Failed to attach prog to cgroup: '%s'\n",
|
||||
strerror(errno));
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
|
@ -0,0 +1,81 @@
|
|||
#!/bin/bash
|
||||
|
||||
function config_device {
|
||||
ip netns add at_ns0
|
||||
ip link add veth0 type veth peer name veth0b
|
||||
ip link set veth0b up
|
||||
ip link set veth0 netns at_ns0
|
||||
ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0
|
||||
ip netns exec at_ns0 ip addr add 2401:db00::1/64 dev veth0 nodad
|
||||
ip netns exec at_ns0 ip link set dev veth0 up
|
||||
ip addr add 172.16.1.101/24 dev veth0b
|
||||
ip addr add 2401:db00::2/64 dev veth0b nodad
|
||||
}
|
||||
|
||||
function config_cgroup {
|
||||
rm -rf /tmp/cgroupv2
|
||||
mkdir -p /tmp/cgroupv2
|
||||
mount -t cgroup2 none /tmp/cgroupv2
|
||||
mkdir -p /tmp/cgroupv2/foo
|
||||
echo $$ >> /tmp/cgroupv2/foo/cgroup.procs
|
||||
}
|
||||
|
||||
|
||||
function attach_bpf {
|
||||
test_cgrp2_sock2 /tmp/cgroupv2/foo sock_flags_kern.o $1
|
||||
[ $? -ne 0 ] && exit 1
|
||||
}
|
||||
|
||||
function cleanup {
|
||||
ip link del veth0b
|
||||
ip netns delete at_ns0
|
||||
umount /tmp/cgroupv2
|
||||
rm -rf /tmp/cgroupv2
|
||||
}
|
||||
|
||||
cleanup 2>/dev/null
|
||||
|
||||
set -e
|
||||
config_device
|
||||
config_cgroup
|
||||
set +e
|
||||
|
||||
#
|
||||
# Test 1 - fail ping6
|
||||
#
|
||||
attach_bpf 0
|
||||
ping -c1 -w1 172.16.1.100
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "ping failed when it should succeed"
|
||||
cleanup
|
||||
exit 1
|
||||
fi
|
||||
|
||||
ping6 -c1 -w1 2401:db00::1
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "ping6 succeeded when it should not"
|
||||
cleanup
|
||||
exit 1
|
||||
fi
|
||||
|
||||
#
|
||||
# Test 2 - fail ping
|
||||
#
|
||||
attach_bpf 1
|
||||
ping6 -c1 -w1 2401:db00::1
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "ping6 failed when it should succeed"
|
||||
cleanup
|
||||
exit 1
|
||||
fi
|
||||
|
||||
ping -c1 -w1 172.16.1.100
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "ping succeeded when it should not"
|
||||
cleanup
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cleanup
|
||||
echo
|
||||
echo "*** PASS ***"
|
Загрузка…
Ссылка в новой задаче