bpf: Derive source IP addr via bpf_*_fib_lookup()

commit dab4e1f06cabb6834de14264394ccab197007302 upstream.

Extend the bpf_fib_lookup() helper by making it to return the source
IPv4/IPv6 address if the BPF_FIB_LOOKUP_SRC flag is set.

For example, the following snippet can be used to derive the desired
source IP address:

    struct bpf_fib_lookup p = { .ipv4_dst = ip4->daddr };

    ret = bpf_skb_fib_lookup(skb, p, sizeof(p),
            BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH);
    if (ret != BPF_FIB_LKUP_RET_SUCCESS)
        return TC_ACT_SHOT;

    /* the p.ipv4_src now contains the source address */

The inability to derive the proper source address may cause malfunctions
in BPF-based dataplanes for hosts containing netdevs with more than one
routable IP address or for multi-homed hosts.

For example, Cilium implements packet masquerading in BPF. If an
egressing netdev to which the Cilium's BPF prog is attached has
multiple IP addresses, then only one [hardcoded] IP address can be used for
masquerading. This breaks connectivity if any other IP address should have
been selected instead, for example, when a public and private addresses
are attached to the same egress interface.

The change was tested with Cilium [1].

Nikolay Aleksandrov helped to figure out the IPv6 addr selection.

[1]: https://github.com/cilium/cilium/pull/28283

Signed-off-by: Martynas Pumputis <m@lambda.lt>
Link: https://lore.kernel.org/r/20231007081415.33502-2-m@lambda.lt
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
Martynas Pumputis 2023-10-07 10:14:14 +02:00 коммит произвёл Greg Kroah-Hartman
Родитель 39b4ee40d2
Коммит 68dbe92d67
5 изменённых файлов: 43 добавлений и 1 удалений

Просмотреть файл

@ -81,6 +81,11 @@ struct ipv6_bpf_stub {
const struct in6_addr *daddr, __be16 dport, const struct in6_addr *daddr, __be16 dport,
int dif, int sdif, struct udp_table *tbl, int dif, int sdif, struct udp_table *tbl,
struct sk_buff *skb); struct sk_buff *skb);
int (*ipv6_dev_get_saddr)(struct net *net,
const struct net_device *dst_dev,
const struct in6_addr *daddr,
unsigned int prefs,
struct in6_addr *saddr);
}; };
extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly; extern const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;

Просмотреть файл

@ -3023,6 +3023,11 @@ union bpf_attr {
* and *params*->smac will not be set as output. A common * and *params*->smac will not be set as output. A common
* use case is to call **bpf_redirect_neigh**\ () after * use case is to call **bpf_redirect_neigh**\ () after
* doing **bpf_fib_lookup**\ (). * doing **bpf_fib_lookup**\ ().
* **BPF_FIB_LOOKUP_SRC**
* Derive and set source IP addr in *params*->ipv{4,6}_src
* for the nexthop. If the src addr cannot be derived,
* **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this
* case, *params*->dmac and *params*->smac are not set either.
* *
* *ctx* is either **struct xdp_md** for XDP programs or * *ctx* is either **struct xdp_md** for XDP programs or
* **struct sk_buff** tc cls_act programs. * **struct sk_buff** tc cls_act programs.
@ -6051,6 +6056,7 @@ enum {
BPF_FIB_LOOKUP_OUTPUT = (1U << 1), BPF_FIB_LOOKUP_OUTPUT = (1U << 1),
BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
BPF_FIB_LOOKUP_TBID = (1U << 3), BPF_FIB_LOOKUP_TBID = (1U << 3),
BPF_FIB_LOOKUP_SRC = (1U << 4),
}; };
enum { enum {
@ -6063,6 +6069,7 @@ enum {
BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */
BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */
BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */
BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */
}; };
struct bpf_fib_lookup { struct bpf_fib_lookup {
@ -6097,6 +6104,9 @@ struct bpf_fib_lookup {
__u32 rt_metric; __u32 rt_metric;
}; };
/* input: source address to consider for lookup
* output: source address result from lookup
*/
union { union {
__be32 ipv4_src; __be32 ipv4_src;
__u32 ipv6_src[4]; /* in6_addr; network order */ __u32 ipv6_src[4]; /* in6_addr; network order */

Просмотреть файл

@ -5504,6 +5504,9 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
params->rt_metric = res.fi->fib_priority; params->rt_metric = res.fi->fib_priority;
params->ifindex = dev->ifindex; params->ifindex = dev->ifindex;
if (flags & BPF_FIB_LOOKUP_SRC)
params->ipv4_src = fib_result_prefsrc(net, &res);
/* xdp and cls_bpf programs are run in RCU-bh so /* xdp and cls_bpf programs are run in RCU-bh so
* rcu_read_lock_bh is not needed here * rcu_read_lock_bh is not needed here
*/ */
@ -5646,6 +5649,18 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
params->rt_metric = res.f6i->fib6_metric; params->rt_metric = res.f6i->fib6_metric;
params->ifindex = dev->ifindex; params->ifindex = dev->ifindex;
if (flags & BPF_FIB_LOOKUP_SRC) {
if (res.f6i->fib6_prefsrc.plen) {
*src = res.f6i->fib6_prefsrc.addr;
} else {
err = ipv6_bpf_stub->ipv6_dev_get_saddr(net, dev,
&fl6.daddr, 0,
src);
if (err)
return BPF_FIB_LKUP_RET_NO_SRC_ADDR;
}
}
if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH) if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH)
goto set_fwd_params; goto set_fwd_params;
@ -5664,7 +5679,8 @@ set_fwd_params:
#endif #endif
#define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \ #define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \
BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID) BPF_FIB_LOOKUP_SKIP_NEIGH | BPF_FIB_LOOKUP_TBID | \
BPF_FIB_LOOKUP_SRC)
BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx, BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
struct bpf_fib_lookup *, params, int, plen, u32, flags) struct bpf_fib_lookup *, params, int, plen, u32, flags)

Просмотреть файл

@ -1061,6 +1061,7 @@ static const struct ipv6_stub ipv6_stub_impl = {
static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = { static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
.inet6_bind = __inet6_bind, .inet6_bind = __inet6_bind,
.udp6_lib_lookup = __udp6_lib_lookup, .udp6_lib_lookup = __udp6_lib_lookup,
.ipv6_dev_get_saddr = ipv6_dev_get_saddr,
}; };
static int __init inet6_init(void) static int __init inet6_init(void)

Просмотреть файл

@ -3023,6 +3023,11 @@ union bpf_attr {
* and *params*->smac will not be set as output. A common * and *params*->smac will not be set as output. A common
* use case is to call **bpf_redirect_neigh**\ () after * use case is to call **bpf_redirect_neigh**\ () after
* doing **bpf_fib_lookup**\ (). * doing **bpf_fib_lookup**\ ().
* **BPF_FIB_LOOKUP_SRC**
* Derive and set source IP addr in *params*->ipv{4,6}_src
* for the nexthop. If the src addr cannot be derived,
* **BPF_FIB_LKUP_RET_NO_SRC_ADDR** is returned. In this
* case, *params*->dmac and *params*->smac are not set either.
* *
* *ctx* is either **struct xdp_md** for XDP programs or * *ctx* is either **struct xdp_md** for XDP programs or
* **struct sk_buff** tc cls_act programs. * **struct sk_buff** tc cls_act programs.
@ -6051,6 +6056,7 @@ enum {
BPF_FIB_LOOKUP_OUTPUT = (1U << 1), BPF_FIB_LOOKUP_OUTPUT = (1U << 1),
BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2), BPF_FIB_LOOKUP_SKIP_NEIGH = (1U << 2),
BPF_FIB_LOOKUP_TBID = (1U << 3), BPF_FIB_LOOKUP_TBID = (1U << 3),
BPF_FIB_LOOKUP_SRC = (1U << 4),
}; };
enum { enum {
@ -6063,6 +6069,7 @@ enum {
BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */ BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */
BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */ BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */
BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */ BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */
BPF_FIB_LKUP_RET_NO_SRC_ADDR, /* failed to derive IP src addr */
}; };
struct bpf_fib_lookup { struct bpf_fib_lookup {
@ -6097,6 +6104,9 @@ struct bpf_fib_lookup {
__u32 rt_metric; __u32 rt_metric;
}; };
/* input: source address to consider for lookup
* output: source address result from lookup
*/
union { union {
__be32 ipv4_src; __be32 ipv4_src;
__u32 ipv6_src[4]; /* in6_addr; network order */ __u32 ipv6_src[4]; /* in6_addr; network order */