bpf: Change bpf_fib_lookup to return lookup status

For ACLs implemented using either FIB rules or FIB entries, the BPF
program needs the FIB lookup status to be able to drop the packet.
Since the bpf_fib_lookup API has not reached a released kernel yet,
change the return code to contain an encoding of the FIB lookup
result and return the nexthop device index in the params struct.

In addition, inform the BPF program of any post FIB lookup reason as
to why the packet needs to go up the stack.

The fib result for unicast routes must have an egress device, so remove
the check that it is non-NULL.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
This commit is contained in:
David Ahern 2018-06-26 16:21:18 -07:00 коммит произвёл Daniel Borkmann
Родитель 3203c90100
Коммит 4c79579b44
3 изменённых файлов: 81 добавлений и 41 удалений

Просмотреть файл

@ -1857,7 +1857,8 @@ union bpf_attr {
* is resolved), the nexthop address is returned in ipv4_dst * is resolved), the nexthop address is returned in ipv4_dst
* or ipv6_dst based on family, smac is set to mac address of * or ipv6_dst based on family, smac is set to mac address of
* egress device, dmac is set to nexthop mac address, rt_metric * egress device, dmac is set to nexthop mac address, rt_metric
* is set to metric from route (IPv4/IPv6 only). * is set to metric from route (IPv4/IPv6 only), and ifindex
* is set to the device index of the nexthop from the FIB lookup.
* *
* *plen* argument is the size of the passed in struct. * *plen* argument is the size of the passed in struct.
* *flags* argument can be a combination of one or more of the * *flags* argument can be a combination of one or more of the
@ -1873,9 +1874,10 @@ union bpf_attr {
* *ctx* is either **struct xdp_md** for XDP programs or * *ctx* is either **struct xdp_md** for XDP programs or
* **struct sk_buff** tc cls_act programs. * **struct sk_buff** tc cls_act programs.
* Return * Return
* Egress device index on success, 0 if packet needs to continue * * < 0 if any input argument is invalid
* up the stack for further processing or a negative error in case * * 0 on success (packet is forwarded, nexthop neighbor exists)
* of failure. * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
* * packet is not forwarded or needs assist from full stack
* *
* int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags) * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags)
* Description * Description
@ -2612,6 +2614,18 @@ struct bpf_raw_tracepoint_args {
#define BPF_FIB_LOOKUP_DIRECT BIT(0) #define BPF_FIB_LOOKUP_DIRECT BIT(0)
#define BPF_FIB_LOOKUP_OUTPUT BIT(1) #define BPF_FIB_LOOKUP_OUTPUT BIT(1)
enum {
BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */
BPF_FIB_LKUP_RET_BLACKHOLE, /* dest is blackholed; can be dropped */
BPF_FIB_LKUP_RET_UNREACHABLE, /* dest is unreachable; can be dropped */
BPF_FIB_LKUP_RET_PROHIBIT, /* dest not allowed; can be dropped */
BPF_FIB_LKUP_RET_NOT_FWDED, /* packet is not forwarded */
BPF_FIB_LKUP_RET_FWD_DISABLED, /* fwding is not enabled on ingress */
BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */
BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */
BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */
};
struct bpf_fib_lookup { struct bpf_fib_lookup {
/* input: network family for lookup (AF_INET, AF_INET6) /* input: network family for lookup (AF_INET, AF_INET6)
* output: network family of egress nexthop * output: network family of egress nexthop
@ -2625,7 +2639,11 @@ struct bpf_fib_lookup {
/* total length of packet from network header - used for MTU check */ /* total length of packet from network header - used for MTU check */
__u16 tot_len; __u16 tot_len;
__u32 ifindex; /* L3 device index for lookup */
/* input: L3 device index for lookup
* output: device index from FIB lookup
*/
__u32 ifindex;
union { union {
/* inputs to lookup */ /* inputs to lookup */

Просмотреть файл

@ -4073,8 +4073,9 @@ static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params,
memcpy(params->smac, dev->dev_addr, ETH_ALEN); memcpy(params->smac, dev->dev_addr, ETH_ALEN);
params->h_vlan_TCI = 0; params->h_vlan_TCI = 0;
params->h_vlan_proto = 0; params->h_vlan_proto = 0;
params->ifindex = dev->ifindex;
return dev->ifindex; return 0;
} }
#endif #endif
@ -4098,7 +4099,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
/* verify forwarding is enabled on this interface */ /* verify forwarding is enabled on this interface */
in_dev = __in_dev_get_rcu(dev); in_dev = __in_dev_get_rcu(dev);
if (unlikely(!in_dev || !IN_DEV_FORWARD(in_dev))) if (unlikely(!in_dev || !IN_DEV_FORWARD(in_dev)))
return 0; return BPF_FIB_LKUP_RET_FWD_DISABLED;
if (flags & BPF_FIB_LOOKUP_OUTPUT) { if (flags & BPF_FIB_LOOKUP_OUTPUT) {
fl4.flowi4_iif = 1; fl4.flowi4_iif = 1;
@ -4123,7 +4124,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
tb = fib_get_table(net, tbid); tb = fib_get_table(net, tbid);
if (unlikely(!tb)) if (unlikely(!tb))
return 0; return BPF_FIB_LKUP_RET_NOT_FWDED;
err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF); err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
} else { } else {
@ -4135,8 +4136,20 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
err = fib_lookup(net, &fl4, &res, FIB_LOOKUP_NOREF); err = fib_lookup(net, &fl4, &res, FIB_LOOKUP_NOREF);
} }
if (err || res.type != RTN_UNICAST) if (err) {
return 0; /* map fib lookup errors to RTN_ type */
if (err == -EINVAL)
return BPF_FIB_LKUP_RET_BLACKHOLE;
if (err == -EHOSTUNREACH)
return BPF_FIB_LKUP_RET_UNREACHABLE;
if (err == -EACCES)
return BPF_FIB_LKUP_RET_PROHIBIT;
return BPF_FIB_LKUP_RET_NOT_FWDED;
}
if (res.type != RTN_UNICAST)
return BPF_FIB_LKUP_RET_NOT_FWDED;
if (res.fi->fib_nhs > 1) if (res.fi->fib_nhs > 1)
fib_select_path(net, &res, &fl4, NULL); fib_select_path(net, &res, &fl4, NULL);
@ -4144,19 +4157,16 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
if (check_mtu) { if (check_mtu) {
mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst); mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst);
if (params->tot_len > mtu) if (params->tot_len > mtu)
return 0; return BPF_FIB_LKUP_RET_FRAG_NEEDED;
} }
nh = &res.fi->fib_nh[res.nh_sel]; nh = &res.fi->fib_nh[res.nh_sel];
/* do not handle lwt encaps right now */ /* do not handle lwt encaps right now */
if (nh->nh_lwtstate) if (nh->nh_lwtstate)
return 0; return BPF_FIB_LKUP_RET_UNSUPP_LWT;
dev = nh->nh_dev; dev = nh->nh_dev;
if (unlikely(!dev))
return 0;
if (nh->nh_gw) if (nh->nh_gw)
params->ipv4_dst = nh->nh_gw; params->ipv4_dst = nh->nh_gw;
@ -4166,10 +4176,10 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
* rcu_read_lock_bh is not needed here * rcu_read_lock_bh is not needed here
*/ */
neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)params->ipv4_dst); neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)params->ipv4_dst);
if (neigh) if (!neigh)
return bpf_fib_set_fwd_params(params, neigh, dev); return BPF_FIB_LKUP_RET_NO_NEIGH;
return 0; return bpf_fib_set_fwd_params(params, neigh, dev);
} }
#endif #endif
@ -4190,7 +4200,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
/* link local addresses are never forwarded */ /* link local addresses are never forwarded */
if (rt6_need_strict(dst) || rt6_need_strict(src)) if (rt6_need_strict(dst) || rt6_need_strict(src))
return 0; return BPF_FIB_LKUP_RET_NOT_FWDED;
dev = dev_get_by_index_rcu(net, params->ifindex); dev = dev_get_by_index_rcu(net, params->ifindex);
if (unlikely(!dev)) if (unlikely(!dev))
@ -4198,7 +4208,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
idev = __in6_dev_get_safely(dev); idev = __in6_dev_get_safely(dev);
if (unlikely(!idev || !net->ipv6.devconf_all->forwarding)) if (unlikely(!idev || !net->ipv6.devconf_all->forwarding))
return 0; return BPF_FIB_LKUP_RET_FWD_DISABLED;
if (flags & BPF_FIB_LOOKUP_OUTPUT) { if (flags & BPF_FIB_LOOKUP_OUTPUT) {
fl6.flowi6_iif = 1; fl6.flowi6_iif = 1;
@ -4225,7 +4235,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
tb = ipv6_stub->fib6_get_table(net, tbid); tb = ipv6_stub->fib6_get_table(net, tbid);
if (unlikely(!tb)) if (unlikely(!tb))
return 0; return BPF_FIB_LKUP_RET_NOT_FWDED;
f6i = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, strict); f6i = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, strict);
} else { } else {
@ -4238,11 +4248,23 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
} }
if (unlikely(IS_ERR_OR_NULL(f6i) || f6i == net->ipv6.fib6_null_entry)) if (unlikely(IS_ERR_OR_NULL(f6i) || f6i == net->ipv6.fib6_null_entry))
return 0; return BPF_FIB_LKUP_RET_NOT_FWDED;
if (unlikely(f6i->fib6_flags & RTF_REJECT || if (unlikely(f6i->fib6_flags & RTF_REJECT)) {
f6i->fib6_type != RTN_UNICAST)) switch (f6i->fib6_type) {
return 0; case RTN_BLACKHOLE:
return BPF_FIB_LKUP_RET_BLACKHOLE;
case RTN_UNREACHABLE:
return BPF_FIB_LKUP_RET_UNREACHABLE;
case RTN_PROHIBIT:
return BPF_FIB_LKUP_RET_PROHIBIT;
default:
return BPF_FIB_LKUP_RET_NOT_FWDED;
}
}
if (f6i->fib6_type != RTN_UNICAST)
return BPF_FIB_LKUP_RET_NOT_FWDED;
if (f6i->fib6_nsiblings && fl6.flowi6_oif == 0) if (f6i->fib6_nsiblings && fl6.flowi6_oif == 0)
f6i = ipv6_stub->fib6_multipath_select(net, f6i, &fl6, f6i = ipv6_stub->fib6_multipath_select(net, f6i, &fl6,
@ -4252,11 +4274,11 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
if (check_mtu) { if (check_mtu) {
mtu = ipv6_stub->ip6_mtu_from_fib6(f6i, dst, src); mtu = ipv6_stub->ip6_mtu_from_fib6(f6i, dst, src);
if (params->tot_len > mtu) if (params->tot_len > mtu)
return 0; return BPF_FIB_LKUP_RET_FRAG_NEEDED;
} }
if (f6i->fib6_nh.nh_lwtstate) if (f6i->fib6_nh.nh_lwtstate)
return 0; return BPF_FIB_LKUP_RET_UNSUPP_LWT;
if (f6i->fib6_flags & RTF_GATEWAY) if (f6i->fib6_flags & RTF_GATEWAY)
*dst = f6i->fib6_nh.nh_gw; *dst = f6i->fib6_nh.nh_gw;
@ -4270,10 +4292,10 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
*/ */
neigh = ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128, neigh = ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128,
ndisc_hashfn, dst, dev); ndisc_hashfn, dst, dev);
if (neigh) if (!neigh)
return bpf_fib_set_fwd_params(params, neigh, dev); return BPF_FIB_LKUP_RET_NO_NEIGH;
return 0; return bpf_fib_set_fwd_params(params, neigh, dev);
} }
#endif #endif
@ -4315,7 +4337,7 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
struct bpf_fib_lookup *, params, int, plen, u32, flags) struct bpf_fib_lookup *, params, int, plen, u32, flags)
{ {
struct net *net = dev_net(skb->dev); struct net *net = dev_net(skb->dev);
int index = -EAFNOSUPPORT; int rc = -EAFNOSUPPORT;
if (plen < sizeof(*params)) if (plen < sizeof(*params))
return -EINVAL; return -EINVAL;
@ -4326,25 +4348,25 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
switch (params->family) { switch (params->family) {
#if IS_ENABLED(CONFIG_INET) #if IS_ENABLED(CONFIG_INET)
case AF_INET: case AF_INET:
index = bpf_ipv4_fib_lookup(net, params, flags, false); rc = bpf_ipv4_fib_lookup(net, params, flags, false);
break; break;
#endif #endif
#if IS_ENABLED(CONFIG_IPV6) #if IS_ENABLED(CONFIG_IPV6)
case AF_INET6: case AF_INET6:
index = bpf_ipv6_fib_lookup(net, params, flags, false); rc = bpf_ipv6_fib_lookup(net, params, flags, false);
break; break;
#endif #endif
} }
if (index > 0) { if (!rc) {
struct net_device *dev; struct net_device *dev;
dev = dev_get_by_index_rcu(net, index); dev = dev_get_by_index_rcu(net, params->ifindex);
if (!is_skb_forwardable(dev, skb)) if (!is_skb_forwardable(dev, skb))
index = 0; rc = BPF_FIB_LKUP_RET_FRAG_NEEDED;
} }
return index; return rc;
} }
static const struct bpf_func_proto bpf_skb_fib_lookup_proto = { static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {

Просмотреть файл

@ -48,9 +48,9 @@ static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags)
struct ethhdr *eth = data; struct ethhdr *eth = data;
struct ipv6hdr *ip6h; struct ipv6hdr *ip6h;
struct iphdr *iph; struct iphdr *iph;
int out_index;
u16 h_proto; u16 h_proto;
u64 nh_off; u64 nh_off;
int rc;
nh_off = sizeof(*eth); nh_off = sizeof(*eth);
if (data + nh_off > data_end) if (data + nh_off > data_end)
@ -101,7 +101,7 @@ static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags)
fib_params.ifindex = ctx->ingress_ifindex; fib_params.ifindex = ctx->ingress_ifindex;
out_index = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), flags); rc = bpf_fib_lookup(ctx, &fib_params, sizeof(fib_params), flags);
/* verify egress index has xdp support /* verify egress index has xdp support
* TO-DO bpf_map_lookup_elem(&tx_port, &key) fails with * TO-DO bpf_map_lookup_elem(&tx_port, &key) fails with
@ -109,7 +109,7 @@ static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags)
* NOTE: without verification that egress index supports XDP * NOTE: without verification that egress index supports XDP
* forwarding packets are dropped. * forwarding packets are dropped.
*/ */
if (out_index > 0) { if (rc == 0) {
if (h_proto == htons(ETH_P_IP)) if (h_proto == htons(ETH_P_IP))
ip_decrease_ttl(iph); ip_decrease_ttl(iph);
else if (h_proto == htons(ETH_P_IPV6)) else if (h_proto == htons(ETH_P_IPV6))
@ -117,7 +117,7 @@ static __always_inline int xdp_fwd_flags(struct xdp_md *ctx, u32 flags)
memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN); memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN);
memcpy(eth->h_source, fib_params.smac, ETH_ALEN); memcpy(eth->h_source, fib_params.smac, ETH_ALEN);
return bpf_redirect_map(&tx_port, out_index, 0); return bpf_redirect_map(&tx_port, fib_params.ifindex, 0);
} }
return XDP_PASS; return XDP_PASS;