Merge branch 'bpf-flow-dissector-fixes'
Stanislav Fomichev says: ==================== This patch series fixes the existing BPF flow dissector API to support calling BPF progs from the eth_get_headlen context (the support itself will be added in bpf-next tree). The summary of the changes: * fix VLAN handling in bpf_flow.c, we don't need to peek back and look at skb->vlan_present; add selftests * pass and use flow_keys->n_proto instead of skb->protocol * fix clamping of flow_keys->nhoff for packets with nhoff > 0 * prohibit access to most of the __sk_buff fields from BPF flow dissector progs; only data/data_end/flow_keys are allowed (all input is now passed via flow_keys) * finally, document BPF flow dissector program environment ==================== Acked-by: Willem de Bruijn <willemb@google.com> Acked-by: Petar Penkov <peterpenkov96@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
This commit is contained in:
Коммит
a090dbf25c
|
@ -0,0 +1,115 @@
|
|||
==================
|
||||
BPF Flow Dissector
|
||||
==================
|
||||
|
||||
Overview
|
||||
========
|
||||
|
||||
Flow dissector is a routine that parses metadata out of the packets. It's
|
||||
used in the various places in the networking subsystem (RFS, flow hash, etc).
|
||||
|
||||
BPF flow dissector is an attempt to reimplement C-based flow dissector logic
|
||||
in BPF to gain all the benefits of BPF verifier (namely, limits on the
|
||||
number of instructions and tail calls).
|
||||
|
||||
API
|
||||
===
|
||||
|
||||
BPF flow dissector programs operate on an __sk_buff. However, only the
|
||||
limited set of fields is allowed: data, data_end and flow_keys. flow_keys
|
||||
is 'struct bpf_flow_keys' and contains flow dissector input and
|
||||
output arguments.
|
||||
|
||||
The inputs are:
|
||||
* nhoff - initial offset of the networking header
|
||||
* thoff - initial offset of the transport header, initialized to nhoff
|
||||
* n_proto - L3 protocol type, parsed out of L2 header
|
||||
|
||||
Flow dissector BPF program should fill out the rest of the 'struct
|
||||
bpf_flow_keys' fields. Input arguments nhoff/thoff/n_proto should be also
|
||||
adjusted accordingly.
|
||||
|
||||
The return code of the BPF program is either BPF_OK to indicate successful
|
||||
dissection, or BPF_DROP to indicate parsing error.
|
||||
|
||||
__sk_buff->data
|
||||
===============
|
||||
|
||||
In the VLAN-less case, this is what the initial state of the BPF flow
|
||||
dissector looks like:
|
||||
+------+------+------------+-----------+
|
||||
| DMAC | SMAC | ETHER_TYPE | L3_HEADER |
|
||||
+------+------+------------+-----------+
|
||||
^
|
||||
|
|
||||
+-- flow dissector starts here
|
||||
|
||||
skb->data + flow_keys->nhoff point to the first byte of L3_HEADER.
|
||||
flow_keys->thoff = nhoff
|
||||
flow_keys->n_proto = ETHER_TYPE
|
||||
|
||||
|
||||
In case of VLAN, flow dissector can be called with the two different states.
|
||||
|
||||
Pre-VLAN parsing:
|
||||
+------+------+------+-----+-----------+-----------+
|
||||
| DMAC | SMAC | TPID | TCI |ETHER_TYPE | L3_HEADER |
|
||||
+------+------+------+-----+-----------+-----------+
|
||||
^
|
||||
|
|
||||
+-- flow dissector starts here
|
||||
|
||||
skb->data + flow_keys->nhoff point the to first byte of TCI.
|
||||
flow_keys->thoff = nhoff
|
||||
flow_keys->n_proto = TPID
|
||||
|
||||
Please note that TPID can be 802.1AD and, hence, BPF program would
|
||||
have to parse VLAN information twice for double tagged packets.
|
||||
|
||||
|
||||
Post-VLAN parsing:
|
||||
+------+------+------+-----+-----------+-----------+
|
||||
| DMAC | SMAC | TPID | TCI |ETHER_TYPE | L3_HEADER |
|
||||
+------+------+------+-----+-----------+-----------+
|
||||
^
|
||||
|
|
||||
+-- flow dissector starts here
|
||||
|
||||
skb->data + flow_keys->nhoff point the to first byte of L3_HEADER.
|
||||
flow_keys->thoff = nhoff
|
||||
flow_keys->n_proto = ETHER_TYPE
|
||||
|
||||
In this case VLAN information has been processed before the flow dissector
|
||||
and BPF flow dissector is not required to handle it.
|
||||
|
||||
|
||||
The takeaway here is as follows: BPF flow dissector program can be called with
|
||||
the optional VLAN header and should gracefully handle both cases: when single
|
||||
or double VLAN is present and when it is not present. The same program
|
||||
can be called for both cases and would have to be written carefully to
|
||||
handle both cases.
|
||||
|
||||
|
||||
Reference Implementation
|
||||
========================
|
||||
|
||||
See tools/testing/selftests/bpf/progs/bpf_flow.c for the reference
|
||||
implementation and tools/testing/selftests/bpf/flow_dissector_load.[hc] for
|
||||
the loader. bpftool can be used to load BPF flow dissector program as well.
|
||||
|
||||
The reference implementation is organized as follows:
|
||||
* jmp_table map that contains sub-programs for each supported L3 protocol
|
||||
* _dissect routine - entry point; it does input n_proto parsing and does
|
||||
bpf_tail_call to the appropriate L3 handler
|
||||
|
||||
Since BPF at this point doesn't support looping (or any jumping back),
|
||||
jmp_table is used instead to handle multiple levels of encapsulation (and
|
||||
IPv6 options).
|
||||
|
||||
|
||||
Current Limitations
|
||||
===================
|
||||
BPF flow dissector doesn't support exporting all the metadata that in-kernel
|
||||
C-based implementation can export. Notable example is single VLAN (802.1Q)
|
||||
and double VLAN (802.1AD) tags. Please refer to the 'struct bpf_flow_keys'
|
||||
for a set of information that's currently can be exported from the BPF context.
|
|
@ -6613,14 +6613,8 @@ static bool flow_dissector_is_valid_access(int off, int size,
|
|||
const struct bpf_prog *prog,
|
||||
struct bpf_insn_access_aux *info)
|
||||
{
|
||||
if (type == BPF_WRITE) {
|
||||
switch (off) {
|
||||
case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (type == BPF_WRITE)
|
||||
return false;
|
||||
|
||||
switch (off) {
|
||||
case bpf_ctx_range(struct __sk_buff, data):
|
||||
|
@ -6632,11 +6626,7 @@ static bool flow_dissector_is_valid_access(int off, int size,
|
|||
case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
|
||||
info->reg_type = PTR_TO_FLOW_KEYS;
|
||||
break;
|
||||
case bpf_ctx_range(struct __sk_buff, tc_classid):
|
||||
case bpf_ctx_range(struct __sk_buff, data_meta):
|
||||
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
|
||||
case bpf_ctx_range(struct __sk_buff, tstamp):
|
||||
case bpf_ctx_range(struct __sk_buff, wire_len):
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -707,6 +707,7 @@ bool __skb_flow_bpf_dissect(struct bpf_prog *prog,
|
|||
/* Pass parameters to the BPF program */
|
||||
memset(flow_keys, 0, sizeof(*flow_keys));
|
||||
cb->qdisc_cb.flow_keys = flow_keys;
|
||||
flow_keys->n_proto = skb->protocol;
|
||||
flow_keys->nhoff = skb_network_offset(skb);
|
||||
flow_keys->thoff = flow_keys->nhoff;
|
||||
|
||||
|
@ -716,7 +717,8 @@ bool __skb_flow_bpf_dissect(struct bpf_prog *prog,
|
|||
/* Restore state */
|
||||
memcpy(cb, &cb_saved, sizeof(cb_saved));
|
||||
|
||||
flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, 0, skb->len);
|
||||
flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff,
|
||||
skb_network_offset(skb), skb->len);
|
||||
flow_keys->thoff = clamp_t(u16, flow_keys->thoff,
|
||||
flow_keys->nhoff, skb->len);
|
||||
|
||||
|
|
|
@ -39,6 +39,58 @@ static struct bpf_flow_keys pkt_v6_flow_keys = {
|
|||
.n_proto = __bpf_constant_htons(ETH_P_IPV6),
|
||||
};
|
||||
|
||||
#define VLAN_HLEN 4
|
||||
|
||||
static struct {
|
||||
struct ethhdr eth;
|
||||
__u16 vlan_tci;
|
||||
__u16 vlan_proto;
|
||||
struct iphdr iph;
|
||||
struct tcphdr tcp;
|
||||
} __packed pkt_vlan_v4 = {
|
||||
.eth.h_proto = __bpf_constant_htons(ETH_P_8021Q),
|
||||
.vlan_proto = __bpf_constant_htons(ETH_P_IP),
|
||||
.iph.ihl = 5,
|
||||
.iph.protocol = IPPROTO_TCP,
|
||||
.iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
|
||||
.tcp.urg_ptr = 123,
|
||||
.tcp.doff = 5,
|
||||
};
|
||||
|
||||
static struct bpf_flow_keys pkt_vlan_v4_flow_keys = {
|
||||
.nhoff = VLAN_HLEN,
|
||||
.thoff = VLAN_HLEN + sizeof(struct iphdr),
|
||||
.addr_proto = ETH_P_IP,
|
||||
.ip_proto = IPPROTO_TCP,
|
||||
.n_proto = __bpf_constant_htons(ETH_P_IP),
|
||||
};
|
||||
|
||||
static struct {
|
||||
struct ethhdr eth;
|
||||
__u16 vlan_tci;
|
||||
__u16 vlan_proto;
|
||||
__u16 vlan_tci2;
|
||||
__u16 vlan_proto2;
|
||||
struct ipv6hdr iph;
|
||||
struct tcphdr tcp;
|
||||
} __packed pkt_vlan_v6 = {
|
||||
.eth.h_proto = __bpf_constant_htons(ETH_P_8021AD),
|
||||
.vlan_proto = __bpf_constant_htons(ETH_P_8021Q),
|
||||
.vlan_proto2 = __bpf_constant_htons(ETH_P_IPV6),
|
||||
.iph.nexthdr = IPPROTO_TCP,
|
||||
.iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
|
||||
.tcp.urg_ptr = 123,
|
||||
.tcp.doff = 5,
|
||||
};
|
||||
|
||||
static struct bpf_flow_keys pkt_vlan_v6_flow_keys = {
|
||||
.nhoff = VLAN_HLEN * 2,
|
||||
.thoff = VLAN_HLEN * 2 + sizeof(struct ipv6hdr),
|
||||
.addr_proto = ETH_P_IPV6,
|
||||
.ip_proto = IPPROTO_TCP,
|
||||
.n_proto = __bpf_constant_htons(ETH_P_IPV6),
|
||||
};
|
||||
|
||||
void test_flow_dissector(void)
|
||||
{
|
||||
struct bpf_flow_keys flow_keys;
|
||||
|
@ -68,5 +120,21 @@ void test_flow_dissector(void)
|
|||
err, errno, retval, duration, size, sizeof(flow_keys));
|
||||
CHECK_FLOW_KEYS("ipv6_flow_keys", flow_keys, pkt_v6_flow_keys);
|
||||
|
||||
err = bpf_prog_test_run(prog_fd, 10, &pkt_vlan_v4, sizeof(pkt_vlan_v4),
|
||||
&flow_keys, &size, &retval, &duration);
|
||||
CHECK(size != sizeof(flow_keys) || err || retval != 1, "vlan_ipv4",
|
||||
"err %d errno %d retval %d duration %d size %u/%lu\n",
|
||||
err, errno, retval, duration, size, sizeof(flow_keys));
|
||||
CHECK_FLOW_KEYS("vlan_ipv4_flow_keys", flow_keys,
|
||||
pkt_vlan_v4_flow_keys);
|
||||
|
||||
err = bpf_prog_test_run(prog_fd, 10, &pkt_vlan_v6, sizeof(pkt_vlan_v6),
|
||||
&flow_keys, &size, &retval, &duration);
|
||||
CHECK(size != sizeof(flow_keys) || err || retval != 1, "vlan_ipv6",
|
||||
"err %d errno %d retval %d duration %d size %u/%lu\n",
|
||||
err, errno, retval, duration, size, sizeof(flow_keys));
|
||||
CHECK_FLOW_KEYS("vlan_ipv6_flow_keys", flow_keys,
|
||||
pkt_vlan_v6_flow_keys);
|
||||
|
||||
bpf_object__close(obj);
|
||||
}
|
||||
|
|
|
@ -92,7 +92,6 @@ static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto)
|
|||
{
|
||||
struct bpf_flow_keys *keys = skb->flow_keys;
|
||||
|
||||
keys->n_proto = proto;
|
||||
switch (proto) {
|
||||
case bpf_htons(ETH_P_IP):
|
||||
bpf_tail_call(skb, &jmp_table, IP);
|
||||
|
@ -119,10 +118,9 @@ static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto)
|
|||
SEC("flow_dissector")
|
||||
int _dissect(struct __sk_buff *skb)
|
||||
{
|
||||
if (!skb->vlan_present)
|
||||
return parse_eth_proto(skb, skb->protocol);
|
||||
else
|
||||
return parse_eth_proto(skb, skb->vlan_proto);
|
||||
struct bpf_flow_keys *keys = skb->flow_keys;
|
||||
|
||||
return parse_eth_proto(skb, keys->n_proto);
|
||||
}
|
||||
|
||||
/* Parses on IPPROTO_* */
|
||||
|
@ -336,15 +334,9 @@ PROG(VLAN)(struct __sk_buff *skb)
|
|||
{
|
||||
struct bpf_flow_keys *keys = skb->flow_keys;
|
||||
struct vlan_hdr *vlan, _vlan;
|
||||
__be16 proto;
|
||||
|
||||
/* Peek back to see if single or double-tagging */
|
||||
if (bpf_skb_load_bytes(skb, keys->thoff - sizeof(proto), &proto,
|
||||
sizeof(proto)))
|
||||
return BPF_DROP;
|
||||
|
||||
/* Account for double-tagging */
|
||||
if (proto == bpf_htons(ETH_P_8021AD)) {
|
||||
if (keys->n_proto == bpf_htons(ETH_P_8021AD)) {
|
||||
vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
|
||||
if (!vlan)
|
||||
return BPF_DROP;
|
||||
|
@ -352,6 +344,7 @@ PROG(VLAN)(struct __sk_buff *skb)
|
|||
if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q))
|
||||
return BPF_DROP;
|
||||
|
||||
keys->nhoff += sizeof(*vlan);
|
||||
keys->thoff += sizeof(*vlan);
|
||||
}
|
||||
|
||||
|
@ -359,12 +352,14 @@ PROG(VLAN)(struct __sk_buff *skb)
|
|||
if (!vlan)
|
||||
return BPF_DROP;
|
||||
|
||||
keys->nhoff += sizeof(*vlan);
|
||||
keys->thoff += sizeof(*vlan);
|
||||
/* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/
|
||||
if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) ||
|
||||
vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q))
|
||||
return BPF_DROP;
|
||||
|
||||
keys->n_proto = vlan->h_vlan_encapsulated_proto;
|
||||
return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto);
|
||||
}
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче