bpf: xdp: Add XDP example for head adjustment
The XDP prog checks if the incoming packet matches any VIP:PORT combination in the BPF hashmap. If it is, it will encapsulate the packet with a IPv4/v6 header as instructed by the value of the BPF hashmap and then XDP_TX it out. The VIP:PORT -> IP-Encap-Info can be specified by the cmd args of the user prog. Acked-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: Martin KaFai Lau <kafai@fb.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Родитель
ea3349a035
Коммит
12d8bb64e3
|
@ -33,6 +33,7 @@ hostprogs-y += trace_event
|
||||||
hostprogs-y += sampleip
|
hostprogs-y += sampleip
|
||||||
hostprogs-y += tc_l2_redirect
|
hostprogs-y += tc_l2_redirect
|
||||||
hostprogs-y += lwt_len_hist
|
hostprogs-y += lwt_len_hist
|
||||||
|
hostprogs-y += xdp_tx_iptunnel
|
||||||
|
|
||||||
test_lru_dist-objs := test_lru_dist.o libbpf.o
|
test_lru_dist-objs := test_lru_dist.o libbpf.o
|
||||||
sock_example-objs := sock_example.o libbpf.o
|
sock_example-objs := sock_example.o libbpf.o
|
||||||
|
@ -67,6 +68,7 @@ trace_event-objs := bpf_load.o libbpf.o trace_event_user.o
|
||||||
sampleip-objs := bpf_load.o libbpf.o sampleip_user.o
|
sampleip-objs := bpf_load.o libbpf.o sampleip_user.o
|
||||||
tc_l2_redirect-objs := bpf_load.o libbpf.o tc_l2_redirect_user.o
|
tc_l2_redirect-objs := bpf_load.o libbpf.o tc_l2_redirect_user.o
|
||||||
lwt_len_hist-objs := bpf_load.o libbpf.o lwt_len_hist_user.o
|
lwt_len_hist-objs := bpf_load.o libbpf.o lwt_len_hist_user.o
|
||||||
|
xdp_tx_iptunnel-objs := bpf_load.o libbpf.o xdp_tx_iptunnel_user.o
|
||||||
|
|
||||||
# Tell kbuild to always build the programs
|
# Tell kbuild to always build the programs
|
||||||
always := $(hostprogs-y)
|
always := $(hostprogs-y)
|
||||||
|
@ -99,6 +101,7 @@ always += test_current_task_under_cgroup_kern.o
|
||||||
always += trace_event_kern.o
|
always += trace_event_kern.o
|
||||||
always += sampleip_kern.o
|
always += sampleip_kern.o
|
||||||
always += lwt_len_hist_kern.o
|
always += lwt_len_hist_kern.o
|
||||||
|
always += xdp_tx_iptunnel_kern.o
|
||||||
|
|
||||||
HOSTCFLAGS += -I$(objtree)/usr/include
|
HOSTCFLAGS += -I$(objtree)/usr/include
|
||||||
HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/
|
HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/
|
||||||
|
@ -129,6 +132,7 @@ HOSTLOADLIBES_trace_event += -lelf
|
||||||
HOSTLOADLIBES_sampleip += -lelf
|
HOSTLOADLIBES_sampleip += -lelf
|
||||||
HOSTLOADLIBES_tc_l2_redirect += -l elf
|
HOSTLOADLIBES_tc_l2_redirect += -l elf
|
||||||
HOSTLOADLIBES_lwt_len_hist += -l elf
|
HOSTLOADLIBES_lwt_len_hist += -l elf
|
||||||
|
HOSTLOADLIBES_xdp_tx_iptunnel += -lelf
|
||||||
|
|
||||||
# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
|
# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
|
||||||
# make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
|
# make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
|
||||||
|
|
|
@ -57,6 +57,8 @@ static int (*bpf_skb_set_tunnel_opt)(void *ctx, void *md, int size) =
|
||||||
(void *) BPF_FUNC_skb_set_tunnel_opt;
|
(void *) BPF_FUNC_skb_set_tunnel_opt;
|
||||||
static unsigned long long (*bpf_get_prandom_u32)(void) =
|
static unsigned long long (*bpf_get_prandom_u32)(void) =
|
||||||
(void *) BPF_FUNC_get_prandom_u32;
|
(void *) BPF_FUNC_get_prandom_u32;
|
||||||
|
static int (*bpf_xdp_adjust_head)(void *ctx, int offset) =
|
||||||
|
(void *) BPF_FUNC_xdp_adjust_head;
|
||||||
|
|
||||||
/* llvm builtin functions that eBPF C program may use to
|
/* llvm builtin functions that eBPF C program may use to
|
||||||
* emit BPF_LD_ABS and BPF_LD_IND instructions
|
* emit BPF_LD_ABS and BPF_LD_IND instructions
|
||||||
|
|
|
@ -12,6 +12,10 @@
|
||||||
#include <linux/bpf.h>
|
#include <linux/bpf.h>
|
||||||
#include <linux/filter.h>
|
#include <linux/filter.h>
|
||||||
#include <linux/perf_event.h>
|
#include <linux/perf_event.h>
|
||||||
|
#include <linux/netlink.h>
|
||||||
|
#include <linux/rtnetlink.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/socket.h>
|
||||||
#include <sys/syscall.h>
|
#include <sys/syscall.h>
|
||||||
#include <sys/ioctl.h>
|
#include <sys/ioctl.h>
|
||||||
#include <sys/mman.h>
|
#include <sys/mman.h>
|
||||||
|
@ -450,3 +454,93 @@ struct ksym *ksym_search(long key)
|
||||||
/* out of range. return _stext */
|
/* out of range. return _stext */
|
||||||
return &syms[0];
|
return &syms[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int set_link_xdp_fd(int ifindex, int fd)
|
||||||
|
{
|
||||||
|
struct sockaddr_nl sa;
|
||||||
|
int sock, seq = 0, len, ret = -1;
|
||||||
|
char buf[4096];
|
||||||
|
struct nlattr *nla, *nla_xdp;
|
||||||
|
struct {
|
||||||
|
struct nlmsghdr nh;
|
||||||
|
struct ifinfomsg ifinfo;
|
||||||
|
char attrbuf[64];
|
||||||
|
} req;
|
||||||
|
struct nlmsghdr *nh;
|
||||||
|
struct nlmsgerr *err;
|
||||||
|
|
||||||
|
memset(&sa, 0, sizeof(sa));
|
||||||
|
sa.nl_family = AF_NETLINK;
|
||||||
|
|
||||||
|
sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
|
||||||
|
if (sock < 0) {
|
||||||
|
printf("open netlink socket: %s\n", strerror(errno));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
|
||||||
|
printf("bind to netlink: %s\n", strerror(errno));
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
memset(&req, 0, sizeof(req));
|
||||||
|
req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
|
||||||
|
req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
|
||||||
|
req.nh.nlmsg_type = RTM_SETLINK;
|
||||||
|
req.nh.nlmsg_pid = 0;
|
||||||
|
req.nh.nlmsg_seq = ++seq;
|
||||||
|
req.ifinfo.ifi_family = AF_UNSPEC;
|
||||||
|
req.ifinfo.ifi_index = ifindex;
|
||||||
|
nla = (struct nlattr *)(((char *)&req)
|
||||||
|
+ NLMSG_ALIGN(req.nh.nlmsg_len));
|
||||||
|
nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/;
|
||||||
|
|
||||||
|
nla_xdp = (struct nlattr *)((char *)nla + NLA_HDRLEN);
|
||||||
|
nla_xdp->nla_type = 1/*IFLA_XDP_FD*/;
|
||||||
|
nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
|
||||||
|
memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
|
||||||
|
nla->nla_len = NLA_HDRLEN + nla_xdp->nla_len;
|
||||||
|
|
||||||
|
req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
|
||||||
|
|
||||||
|
if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
|
||||||
|
printf("send to netlink: %s\n", strerror(errno));
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
len = recv(sock, buf, sizeof(buf), 0);
|
||||||
|
if (len < 0) {
|
||||||
|
printf("recv from netlink: %s\n", strerror(errno));
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
|
||||||
|
nh = NLMSG_NEXT(nh, len)) {
|
||||||
|
if (nh->nlmsg_pid != getpid()) {
|
||||||
|
printf("Wrong pid %d, expected %d\n",
|
||||||
|
nh->nlmsg_pid, getpid());
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
if (nh->nlmsg_seq != seq) {
|
||||||
|
printf("Wrong seq %d, expected %d\n",
|
||||||
|
nh->nlmsg_seq, seq);
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
switch (nh->nlmsg_type) {
|
||||||
|
case NLMSG_ERROR:
|
||||||
|
err = (struct nlmsgerr *)NLMSG_DATA(nh);
|
||||||
|
if (!err->error)
|
||||||
|
continue;
|
||||||
|
printf("nlmsg error %s\n", strerror(-err->error));
|
||||||
|
goto cleanup;
|
||||||
|
case NLMSG_DONE:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = 0;
|
||||||
|
|
||||||
|
cleanup:
|
||||||
|
close(sock);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
|
@ -31,4 +31,5 @@ struct ksym {
|
||||||
|
|
||||||
int load_kallsyms(void);
|
int load_kallsyms(void);
|
||||||
struct ksym *ksym_search(long key);
|
struct ksym *ksym_search(long key);
|
||||||
|
int set_link_xdp_fd(int ifindex, int fd);
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -5,111 +5,18 @@
|
||||||
* License as published by the Free Software Foundation.
|
* License as published by the Free Software Foundation.
|
||||||
*/
|
*/
|
||||||
#include <linux/bpf.h>
|
#include <linux/bpf.h>
|
||||||
#include <linux/netlink.h>
|
|
||||||
#include <linux/rtnetlink.h>
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <signal.h>
|
#include <signal.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <sys/socket.h>
|
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
|
||||||
#include "bpf_load.h"
|
#include "bpf_load.h"
|
||||||
#include "bpf_util.h"
|
#include "bpf_util.h"
|
||||||
#include "libbpf.h"
|
#include "libbpf.h"
|
||||||
|
|
||||||
static int set_link_xdp_fd(int ifindex, int fd)
|
|
||||||
{
|
|
||||||
struct sockaddr_nl sa;
|
|
||||||
int sock, seq = 0, len, ret = -1;
|
|
||||||
char buf[4096];
|
|
||||||
struct nlattr *nla, *nla_xdp;
|
|
||||||
struct {
|
|
||||||
struct nlmsghdr nh;
|
|
||||||
struct ifinfomsg ifinfo;
|
|
||||||
char attrbuf[64];
|
|
||||||
} req;
|
|
||||||
struct nlmsghdr *nh;
|
|
||||||
struct nlmsgerr *err;
|
|
||||||
|
|
||||||
memset(&sa, 0, sizeof(sa));
|
|
||||||
sa.nl_family = AF_NETLINK;
|
|
||||||
|
|
||||||
sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
|
|
||||||
if (sock < 0) {
|
|
||||||
printf("open netlink socket: %s\n", strerror(errno));
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
|
|
||||||
printf("bind to netlink: %s\n", strerror(errno));
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
|
|
||||||
memset(&req, 0, sizeof(req));
|
|
||||||
req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
|
|
||||||
req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
|
|
||||||
req.nh.nlmsg_type = RTM_SETLINK;
|
|
||||||
req.nh.nlmsg_pid = 0;
|
|
||||||
req.nh.nlmsg_seq = ++seq;
|
|
||||||
req.ifinfo.ifi_family = AF_UNSPEC;
|
|
||||||
req.ifinfo.ifi_index = ifindex;
|
|
||||||
nla = (struct nlattr *)(((char *)&req)
|
|
||||||
+ NLMSG_ALIGN(req.nh.nlmsg_len));
|
|
||||||
nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/;
|
|
||||||
|
|
||||||
nla_xdp = (struct nlattr *)((char *)nla + NLA_HDRLEN);
|
|
||||||
nla_xdp->nla_type = 1/*IFLA_XDP_FD*/;
|
|
||||||
nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
|
|
||||||
memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
|
|
||||||
nla->nla_len = NLA_HDRLEN + nla_xdp->nla_len;
|
|
||||||
|
|
||||||
req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
|
|
||||||
|
|
||||||
if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
|
|
||||||
printf("send to netlink: %s\n", strerror(errno));
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
|
|
||||||
len = recv(sock, buf, sizeof(buf), 0);
|
|
||||||
if (len < 0) {
|
|
||||||
printf("recv from netlink: %s\n", strerror(errno));
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
|
|
||||||
nh = NLMSG_NEXT(nh, len)) {
|
|
||||||
if (nh->nlmsg_pid != getpid()) {
|
|
||||||
printf("Wrong pid %d, expected %d\n",
|
|
||||||
nh->nlmsg_pid, getpid());
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
if (nh->nlmsg_seq != seq) {
|
|
||||||
printf("Wrong seq %d, expected %d\n",
|
|
||||||
nh->nlmsg_seq, seq);
|
|
||||||
goto cleanup;
|
|
||||||
}
|
|
||||||
switch (nh->nlmsg_type) {
|
|
||||||
case NLMSG_ERROR:
|
|
||||||
err = (struct nlmsgerr *)NLMSG_DATA(nh);
|
|
||||||
if (!err->error)
|
|
||||||
continue;
|
|
||||||
printf("nlmsg error %s\n", strerror(-err->error));
|
|
||||||
goto cleanup;
|
|
||||||
case NLMSG_DONE:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = 0;
|
|
||||||
|
|
||||||
cleanup:
|
|
||||||
close(sock);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int ifindex;
|
static int ifindex;
|
||||||
|
|
||||||
static void int_exit(int sig)
|
static void int_exit(int sig)
|
||||||
|
|
|
@ -0,0 +1,37 @@
|
||||||
|
/* Copyright (c) 2016 Facebook
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of version 2 of the GNU General Public
|
||||||
|
* License as published by the Free Software Foundation.
|
||||||
|
*/
|
||||||
|
#ifndef _SAMPLES_BPF_XDP_TX_IPTNL_COMMON_H
|
||||||
|
#define _SAMPLES_BPF_XDP_TX_IPTNL_COMMON_H
|
||||||
|
|
||||||
|
#include <linux/types.h>
|
||||||
|
|
||||||
|
#define MAX_IPTNL_ENTRIES 256U
|
||||||
|
|
||||||
|
struct vip {
|
||||||
|
union {
|
||||||
|
__u32 v6[4];
|
||||||
|
__u32 v4;
|
||||||
|
} daddr;
|
||||||
|
__u16 dport;
|
||||||
|
__u16 family;
|
||||||
|
__u8 protocol;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct iptnl_info {
|
||||||
|
union {
|
||||||
|
__u32 v6[4];
|
||||||
|
__u32 v4;
|
||||||
|
} saddr;
|
||||||
|
union {
|
||||||
|
__u32 v6[4];
|
||||||
|
__u32 v4;
|
||||||
|
} daddr;
|
||||||
|
__u16 family;
|
||||||
|
__u8 dmac[6];
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
|
@ -0,0 +1,236 @@
|
||||||
|
/* Copyright (c) 2016 Facebook
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of version 2 of the GNU General Public
|
||||||
|
* License as published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This program shows how to use bpf_xdp_adjust_head() by
|
||||||
|
* encapsulating the incoming packet in an IPv4/v6 header
|
||||||
|
* and then XDP_TX it out.
|
||||||
|
*/
|
||||||
|
#include <uapi/linux/bpf.h>
|
||||||
|
#include <linux/in.h>
|
||||||
|
#include <linux/if_ether.h>
|
||||||
|
#include <linux/if_packet.h>
|
||||||
|
#include <linux/if_vlan.h>
|
||||||
|
#include <linux/ip.h>
|
||||||
|
#include <linux/ipv6.h>
|
||||||
|
#include "bpf_helpers.h"
|
||||||
|
#include "xdp_tx_iptunnel_common.h"
|
||||||
|
|
||||||
|
struct bpf_map_def SEC("maps") rxcnt = {
|
||||||
|
.type = BPF_MAP_TYPE_PERCPU_ARRAY,
|
||||||
|
.key_size = sizeof(__u32),
|
||||||
|
.value_size = sizeof(__u64),
|
||||||
|
.max_entries = 256,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct bpf_map_def SEC("maps") vip2tnl = {
|
||||||
|
.type = BPF_MAP_TYPE_HASH,
|
||||||
|
.key_size = sizeof(struct vip),
|
||||||
|
.value_size = sizeof(struct iptnl_info),
|
||||||
|
.max_entries = MAX_IPTNL_ENTRIES,
|
||||||
|
};
|
||||||
|
|
||||||
|
static __always_inline void count_tx(u32 protocol)
|
||||||
|
{
|
||||||
|
u64 *rxcnt_count;
|
||||||
|
|
||||||
|
rxcnt_count = bpf_map_lookup_elem(&rxcnt, &protocol);
|
||||||
|
if (rxcnt_count)
|
||||||
|
*rxcnt_count += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __always_inline int get_dport(void *trans_data, void *data_end,
|
||||||
|
u8 protocol)
|
||||||
|
{
|
||||||
|
struct tcphdr *th;
|
||||||
|
struct udphdr *uh;
|
||||||
|
|
||||||
|
switch (protocol) {
|
||||||
|
case IPPROTO_TCP:
|
||||||
|
th = (struct tcphdr *)trans_data;
|
||||||
|
if (th + 1 > data_end)
|
||||||
|
return -1;
|
||||||
|
return th->dest;
|
||||||
|
case IPPROTO_UDP:
|
||||||
|
uh = (struct udphdr *)trans_data;
|
||||||
|
if (uh + 1 > data_end)
|
||||||
|
return -1;
|
||||||
|
return uh->dest;
|
||||||
|
default:
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static __always_inline void set_ethhdr(struct ethhdr *new_eth,
|
||||||
|
const struct ethhdr *old_eth,
|
||||||
|
const struct iptnl_info *tnl,
|
||||||
|
__be16 h_proto)
|
||||||
|
{
|
||||||
|
memcpy(new_eth->h_source, old_eth->h_dest, sizeof(new_eth->h_source));
|
||||||
|
memcpy(new_eth->h_dest, tnl->dmac, sizeof(new_eth->h_dest));
|
||||||
|
new_eth->h_proto = h_proto;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __always_inline int handle_ipv4(struct xdp_md *xdp)
|
||||||
|
{
|
||||||
|
void *data_end = (void *)(long)xdp->data_end;
|
||||||
|
void *data = (void *)(long)xdp->data;
|
||||||
|
struct iptnl_info *tnl;
|
||||||
|
struct ethhdr *new_eth;
|
||||||
|
struct ethhdr *old_eth;
|
||||||
|
struct iphdr *iph = data + sizeof(struct ethhdr);
|
||||||
|
u16 *next_iph_u16;
|
||||||
|
u16 payload_len;
|
||||||
|
struct vip vip = {};
|
||||||
|
int dport;
|
||||||
|
u32 csum = 0;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
if (iph + 1 > data_end)
|
||||||
|
return XDP_DROP;
|
||||||
|
|
||||||
|
dport = get_dport(iph + 1, data_end, iph->protocol);
|
||||||
|
if (dport == -1)
|
||||||
|
return XDP_DROP;
|
||||||
|
|
||||||
|
vip.protocol = iph->protocol;
|
||||||
|
vip.family = AF_INET;
|
||||||
|
vip.daddr.v4 = iph->daddr;
|
||||||
|
vip.dport = dport;
|
||||||
|
payload_len = ntohs(iph->tot_len);
|
||||||
|
|
||||||
|
tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
|
||||||
|
/* It only does v4-in-v4 */
|
||||||
|
if (!tnl || tnl->family != AF_INET)
|
||||||
|
return XDP_PASS;
|
||||||
|
|
||||||
|
/* The vip key is found. Add an IP header and send it out */
|
||||||
|
|
||||||
|
if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr)))
|
||||||
|
return XDP_DROP;
|
||||||
|
|
||||||
|
data = (void *)(long)xdp->data;
|
||||||
|
data_end = (void *)(long)xdp->data_end;
|
||||||
|
|
||||||
|
new_eth = data;
|
||||||
|
iph = data + sizeof(*new_eth);
|
||||||
|
old_eth = data + sizeof(*iph);
|
||||||
|
|
||||||
|
if (new_eth + 1 > data_end ||
|
||||||
|
old_eth + 1 > data_end ||
|
||||||
|
iph + 1 > data_end)
|
||||||
|
return XDP_DROP;
|
||||||
|
|
||||||
|
set_ethhdr(new_eth, old_eth, tnl, htons(ETH_P_IP));
|
||||||
|
|
||||||
|
iph->version = 4;
|
||||||
|
iph->ihl = sizeof(*iph) >> 2;
|
||||||
|
iph->frag_off = 0;
|
||||||
|
iph->protocol = IPPROTO_IPIP;
|
||||||
|
iph->check = 0;
|
||||||
|
iph->tos = 0;
|
||||||
|
iph->tot_len = htons(payload_len + sizeof(*iph));
|
||||||
|
iph->daddr = tnl->daddr.v4;
|
||||||
|
iph->saddr = tnl->saddr.v4;
|
||||||
|
iph->ttl = 8;
|
||||||
|
|
||||||
|
next_iph_u16 = (u16 *)iph;
|
||||||
|
#pragma clang loop unroll(full)
|
||||||
|
for (i = 0; i < sizeof(*iph) >> 1; i++)
|
||||||
|
csum += *next_iph_u16++;
|
||||||
|
|
||||||
|
iph->check = ~((csum & 0xffff) + (csum >> 16));
|
||||||
|
|
||||||
|
count_tx(vip.protocol);
|
||||||
|
|
||||||
|
return XDP_TX;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __always_inline int handle_ipv6(struct xdp_md *xdp)
|
||||||
|
{
|
||||||
|
void *data_end = (void *)(long)xdp->data_end;
|
||||||
|
void *data = (void *)(long)xdp->data;
|
||||||
|
struct iptnl_info *tnl;
|
||||||
|
struct ethhdr *new_eth;
|
||||||
|
struct ethhdr *old_eth;
|
||||||
|
struct ipv6hdr *ip6h = data + sizeof(struct ethhdr);
|
||||||
|
__u16 payload_len;
|
||||||
|
struct vip vip = {};
|
||||||
|
int dport;
|
||||||
|
|
||||||
|
if (ip6h + 1 > data_end)
|
||||||
|
return XDP_DROP;
|
||||||
|
|
||||||
|
dport = get_dport(ip6h + 1, data_end, ip6h->nexthdr);
|
||||||
|
if (dport == -1)
|
||||||
|
return XDP_DROP;
|
||||||
|
|
||||||
|
vip.protocol = ip6h->nexthdr;
|
||||||
|
vip.family = AF_INET6;
|
||||||
|
memcpy(vip.daddr.v6, ip6h->daddr.s6_addr32, sizeof(vip.daddr));
|
||||||
|
vip.dport = dport;
|
||||||
|
payload_len = ip6h->payload_len;
|
||||||
|
|
||||||
|
tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
|
||||||
|
/* It only does v6-in-v6 */
|
||||||
|
if (!tnl || tnl->family != AF_INET6)
|
||||||
|
return XDP_PASS;
|
||||||
|
|
||||||
|
/* The vip key is found. Add an IP header and send it out */
|
||||||
|
|
||||||
|
if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr)))
|
||||||
|
return XDP_DROP;
|
||||||
|
|
||||||
|
data = (void *)(long)xdp->data;
|
||||||
|
data_end = (void *)(long)xdp->data_end;
|
||||||
|
|
||||||
|
new_eth = data;
|
||||||
|
ip6h = data + sizeof(*new_eth);
|
||||||
|
old_eth = data + sizeof(*ip6h);
|
||||||
|
|
||||||
|
if (new_eth + 1 > data_end ||
|
||||||
|
old_eth + 1 > data_end ||
|
||||||
|
ip6h + 1 > data_end)
|
||||||
|
return XDP_DROP;
|
||||||
|
|
||||||
|
set_ethhdr(new_eth, old_eth, tnl, htons(ETH_P_IPV6));
|
||||||
|
|
||||||
|
ip6h->version = 6;
|
||||||
|
ip6h->priority = 0;
|
||||||
|
memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl));
|
||||||
|
ip6h->payload_len = htons(ntohs(payload_len) + sizeof(*ip6h));
|
||||||
|
ip6h->nexthdr = IPPROTO_IPV6;
|
||||||
|
ip6h->hop_limit = 8;
|
||||||
|
memcpy(ip6h->saddr.s6_addr32, tnl->saddr.v6, sizeof(tnl->saddr.v6));
|
||||||
|
memcpy(ip6h->daddr.s6_addr32, tnl->daddr.v6, sizeof(tnl->daddr.v6));
|
||||||
|
|
||||||
|
count_tx(vip.protocol);
|
||||||
|
|
||||||
|
return XDP_TX;
|
||||||
|
}
|
||||||
|
|
||||||
|
SEC("xdp_tx_iptunnel")
|
||||||
|
int _xdp_tx_iptunnel(struct xdp_md *xdp)
|
||||||
|
{
|
||||||
|
void *data_end = (void *)(long)xdp->data_end;
|
||||||
|
void *data = (void *)(long)xdp->data;
|
||||||
|
struct ethhdr *eth = data;
|
||||||
|
__u16 h_proto;
|
||||||
|
|
||||||
|
if (eth + 1 > data_end)
|
||||||
|
return XDP_DROP;
|
||||||
|
|
||||||
|
h_proto = eth->h_proto;
|
||||||
|
|
||||||
|
if (h_proto == htons(ETH_P_IP))
|
||||||
|
return handle_ipv4(xdp);
|
||||||
|
else if (h_proto == htons(ETH_P_IPV6))
|
||||||
|
|
||||||
|
return handle_ipv6(xdp);
|
||||||
|
else
|
||||||
|
return XDP_PASS;
|
||||||
|
}
|
||||||
|
|
||||||
|
char _license[] SEC("license") = "GPL";
|
|
@ -0,0 +1,256 @@
|
||||||
|
/* Copyright (c) 2016 Facebook
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of version 2 of the GNU General Public
|
||||||
|
* License as published by the Free Software Foundation.
|
||||||
|
*/
|
||||||
|
#include <linux/bpf.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <signal.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <sys/resource.h>
|
||||||
|
#include <arpa/inet.h>
|
||||||
|
#include <netinet/ether.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include "bpf_load.h"
|
||||||
|
#include "libbpf.h"
|
||||||
|
#include "bpf_util.h"
|
||||||
|
#include "xdp_tx_iptunnel_common.h"
|
||||||
|
|
||||||
|
#define STATS_INTERVAL_S 2U
|
||||||
|
|
||||||
|
static int ifindex = -1;
|
||||||
|
|
||||||
|
static void int_exit(int sig)
|
||||||
|
{
|
||||||
|
if (ifindex > -1)
|
||||||
|
set_link_xdp_fd(ifindex, -1);
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* simple per-protocol drop counter
|
||||||
|
*/
|
||||||
|
static void poll_stats(unsigned int kill_after_s)
|
||||||
|
{
|
||||||
|
const unsigned int nr_protos = 256;
|
||||||
|
unsigned int nr_cpus = bpf_num_possible_cpus();
|
||||||
|
time_t started_at = time(NULL);
|
||||||
|
__u64 values[nr_cpus], prev[nr_protos][nr_cpus];
|
||||||
|
__u32 proto;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
memset(prev, 0, sizeof(prev));
|
||||||
|
|
||||||
|
while (!kill_after_s || time(NULL) - started_at <= kill_after_s) {
|
||||||
|
sleep(STATS_INTERVAL_S);
|
||||||
|
|
||||||
|
for (proto = 0; proto < nr_protos; proto++) {
|
||||||
|
__u64 sum = 0;
|
||||||
|
|
||||||
|
assert(bpf_lookup_elem(map_fd[0], &proto, values) == 0);
|
||||||
|
for (i = 0; i < nr_cpus; i++)
|
||||||
|
sum += (values[i] - prev[proto][i]);
|
||||||
|
|
||||||
|
if (sum)
|
||||||
|
printf("proto %u: sum:%10llu pkts, rate:%10llu pkts/s\n",
|
||||||
|
proto, sum, sum / STATS_INTERVAL_S);
|
||||||
|
memcpy(prev[proto], values, sizeof(values));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void usage(const char *cmd)
|
||||||
|
{
|
||||||
|
printf("Start a XDP prog which encapsulates incoming packets\n"
|
||||||
|
"in an IPv4/v6 header and XDP_TX it out. The dst <VIP:PORT>\n"
|
||||||
|
"is used to select packets to encapsulate\n\n");
|
||||||
|
printf("Usage: %s [...]\n", cmd);
|
||||||
|
printf(" -i <ifindex> Interface Index\n");
|
||||||
|
printf(" -a <vip-service-address> IPv4 or IPv6\n");
|
||||||
|
printf(" -p <vip-service-port> A port range (e.g. 433-444) is also allowed\n");
|
||||||
|
printf(" -s <source-ip> Used in the IPTunnel header\n");
|
||||||
|
printf(" -d <dest-ip> Used in the IPTunnel header\n");
|
||||||
|
printf(" -m <dest-MAC> Used in sending the IP Tunneled pkt\n");
|
||||||
|
printf(" -T <stop-after-X-seconds> Default: 0 (forever)\n");
|
||||||
|
printf(" -P <IP-Protocol> Default is TCP\n");
|
||||||
|
printf(" -h Display this help\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
static int parse_ipstr(const char *ipstr, unsigned int *addr)
|
||||||
|
{
|
||||||
|
if (inet_pton(AF_INET6, ipstr, addr) == 1) {
|
||||||
|
return AF_INET6;
|
||||||
|
} else if (inet_pton(AF_INET, ipstr, addr) == 1) {
|
||||||
|
addr[1] = addr[2] = addr[3] = 0;
|
||||||
|
return AF_INET;
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(stderr, "%s is an invalid IP\n", ipstr);
|
||||||
|
return AF_UNSPEC;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int parse_ports(const char *port_str, int *min_port, int *max_port)
|
||||||
|
{
|
||||||
|
char *end;
|
||||||
|
long tmp_min_port;
|
||||||
|
long tmp_max_port;
|
||||||
|
|
||||||
|
tmp_min_port = strtol(optarg, &end, 10);
|
||||||
|
if (tmp_min_port < 1 || tmp_min_port > 65535) {
|
||||||
|
fprintf(stderr, "Invalid port(s):%s\n", optarg);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (*end == '-') {
|
||||||
|
end++;
|
||||||
|
tmp_max_port = strtol(end, NULL, 10);
|
||||||
|
if (tmp_max_port < 1 || tmp_max_port > 65535) {
|
||||||
|
fprintf(stderr, "Invalid port(s):%s\n", optarg);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
tmp_max_port = tmp_min_port;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tmp_min_port > tmp_max_port) {
|
||||||
|
fprintf(stderr, "Invalid port(s):%s\n", optarg);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tmp_max_port - tmp_min_port + 1 > MAX_IPTNL_ENTRIES) {
|
||||||
|
fprintf(stderr, "Port range (%s) is larger than %u\n",
|
||||||
|
port_str, MAX_IPTNL_ENTRIES);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
*min_port = tmp_min_port;
|
||||||
|
*max_port = tmp_max_port;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
unsigned char opt_flags[256] = {};
|
||||||
|
unsigned int kill_after_s = 0;
|
||||||
|
const char *optstr = "i:a:p:s:d:m:T:P:h";
|
||||||
|
int min_port = 0, max_port = 0;
|
||||||
|
struct iptnl_info tnl = {};
|
||||||
|
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
|
||||||
|
struct vip vip = {};
|
||||||
|
char filename[256];
|
||||||
|
int opt;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
tnl.family = AF_UNSPEC;
|
||||||
|
vip.protocol = IPPROTO_TCP;
|
||||||
|
|
||||||
|
for (i = 0; i < strlen(optstr); i++)
|
||||||
|
if (optstr[i] != 'h' && 'a' <= optstr[i] && optstr[i] <= 'z')
|
||||||
|
opt_flags[(unsigned char)optstr[i]] = 1;
|
||||||
|
|
||||||
|
while ((opt = getopt(argc, argv, optstr)) != -1) {
|
||||||
|
unsigned short family;
|
||||||
|
unsigned int *v6;
|
||||||
|
|
||||||
|
switch (opt) {
|
||||||
|
case 'i':
|
||||||
|
ifindex = atoi(optarg);
|
||||||
|
break;
|
||||||
|
case 'a':
|
||||||
|
vip.family = parse_ipstr(optarg, vip.daddr.v6);
|
||||||
|
if (vip.family == AF_UNSPEC)
|
||||||
|
return 1;
|
||||||
|
break;
|
||||||
|
case 'p':
|
||||||
|
if (parse_ports(optarg, &min_port, &max_port))
|
||||||
|
return 1;
|
||||||
|
break;
|
||||||
|
case 'P':
|
||||||
|
vip.protocol = atoi(optarg);
|
||||||
|
break;
|
||||||
|
case 's':
|
||||||
|
case 'd':
|
||||||
|
if (opt == 's')
|
||||||
|
v6 = tnl.saddr.v6;
|
||||||
|
else
|
||||||
|
v6 = tnl.daddr.v6;
|
||||||
|
|
||||||
|
family = parse_ipstr(optarg, v6);
|
||||||
|
if (family == AF_UNSPEC)
|
||||||
|
return 1;
|
||||||
|
if (tnl.family == AF_UNSPEC) {
|
||||||
|
tnl.family = family;
|
||||||
|
} else if (tnl.family != family) {
|
||||||
|
fprintf(stderr,
|
||||||
|
"The IP version of the src and dst addresses used in the IP encapsulation does not match\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'm':
|
||||||
|
if (!ether_aton_r(optarg,
|
||||||
|
(struct ether_addr *)tnl.dmac)) {
|
||||||
|
fprintf(stderr, "Invalid mac address:%s\n",
|
||||||
|
optarg);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 'T':
|
||||||
|
kill_after_s = atoi(optarg);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
usage(argv[0]);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
opt_flags[opt] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < strlen(optstr); i++) {
|
||||||
|
if (opt_flags[(unsigned int)optstr[i]]) {
|
||||||
|
fprintf(stderr, "Missing argument -%c\n", optstr[i]);
|
||||||
|
usage(argv[0]);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (setrlimit(RLIMIT_MEMLOCK, &r)) {
|
||||||
|
perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
|
||||||
|
|
||||||
|
if (load_bpf_file(filename)) {
|
||||||
|
printf("%s", bpf_log_buf);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!prog_fd[0]) {
|
||||||
|
printf("load_bpf_file: %s\n", strerror(errno));
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
signal(SIGINT, int_exit);
|
||||||
|
|
||||||
|
while (min_port <= max_port) {
|
||||||
|
vip.dport = htons(min_port++);
|
||||||
|
if (bpf_update_elem(map_fd[1], &vip, &tnl, BPF_NOEXIST)) {
|
||||||
|
perror("bpf_update_elem(&vip2tnl)");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (set_link_xdp_fd(ifindex, prog_fd[0]) < 0) {
|
||||||
|
printf("link set xdp fd failed\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
poll_stats(kill_after_s);
|
||||||
|
|
||||||
|
set_link_xdp_fd(ifindex, -1);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
Загрузка…
Ссылка в новой задаче