2016-12-08 02:53:14 +03:00
|
|
|
/* Copyright (c) 2016 Facebook
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of version 2 of the GNU General Public
|
|
|
|
* License as published by the Free Software Foundation.
|
|
|
|
*/
|
|
|
|
#include <linux/bpf.h>
|
2017-04-27 19:11:13 +03:00
|
|
|
#include <linux/if_link.h>
|
2016-12-08 02:53:14 +03:00
|
|
|
#include <assert.h>
|
|
|
|
#include <errno.h>
|
|
|
|
#include <signal.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <sys/resource.h>
|
|
|
|
#include <arpa/inet.h>
|
|
|
|
#include <netinet/ether.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
#include <time.h>
|
|
|
|
#include "bpf_load.h"
|
|
|
|
#include "libbpf.h"
|
|
|
|
#include "bpf_util.h"
|
|
|
|
#include "xdp_tx_iptunnel_common.h"
|
|
|
|
|
|
|
|
#define STATS_INTERVAL_S 2U
|
|
|
|
|
|
|
|
static int ifindex = -1;
|
2017-05-01 12:26:20 +03:00
|
|
|
static __u32 xdp_flags = 0;
|
2016-12-08 02:53:14 +03:00
|
|
|
|
|
|
|
static void int_exit(int sig)
|
|
|
|
{
|
|
|
|
if (ifindex > -1)
|
2017-05-01 12:26:20 +03:00
|
|
|
set_link_xdp_fd(ifindex, -1, xdp_flags);
|
2016-12-08 02:53:14 +03:00
|
|
|
exit(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* simple per-protocol drop counter
|
|
|
|
*/
|
|
|
|
static void poll_stats(unsigned int kill_after_s)
|
|
|
|
{
|
|
|
|
const unsigned int nr_protos = 256;
|
|
|
|
unsigned int nr_cpus = bpf_num_possible_cpus();
|
|
|
|
time_t started_at = time(NULL);
|
|
|
|
__u64 values[nr_cpus], prev[nr_protos][nr_cpus];
|
|
|
|
__u32 proto;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
memset(prev, 0, sizeof(prev));
|
|
|
|
|
|
|
|
while (!kill_after_s || time(NULL) - started_at <= kill_after_s) {
|
|
|
|
sleep(STATS_INTERVAL_S);
|
|
|
|
|
|
|
|
for (proto = 0; proto < nr_protos; proto++) {
|
|
|
|
__u64 sum = 0;
|
|
|
|
|
samples/bpf: Make samples more libbpf-centric
Switch all of the sample code to use the function names from
tools/lib/bpf so that they're consistent with that, and to declare their
own log buffers. This allow the next commit to be purely devoted to
getting rid of the duplicate library in samples/bpf.
Committer notes:
Testing it:
On a fedora rawhide container, with clang/llvm 3.9, sharing the host
linux kernel git tree:
# make O=/tmp/build/linux/ headers_install
# make O=/tmp/build/linux -C samples/bpf/
Since I forgot to make it privileged, just tested it outside the
container, using what it generated:
# uname -a
Linux jouet 4.9.0-rc8+ #1 SMP Mon Dec 12 11:20:49 BRT 2016 x86_64 x86_64 x86_64 GNU/Linux
# cd /var/lib/docker/devicemapper/mnt/c43e09a53ff56c86a07baf79847f00e2cc2a17a1e2220e1adbf8cbc62734feda/rootfs/tmp/build/linux/samples/bpf/
# ls -la offwaketime
-rwxr-xr-x. 1 root root 24200 Dec 15 12:19 offwaketime
# file offwaketime
offwaketime: ELF 64-bit LSB executable, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, for GNU/Linux 2.6.32, BuildID[sha1]=c940d3f127d5e66cdd680e42d885cb0b64f8a0e4, not stripped
# readelf -SW offwaketime_kern.o | grep PROGBITS
[ 2] .text PROGBITS 0000000000000000 000040 000000 00 AX 0 0 4
[ 3] kprobe/try_to_wake_up PROGBITS 0000000000000000 000040 0000d8 00 AX 0 0 8
[ 5] tracepoint/sched/sched_switch PROGBITS 0000000000000000 000118 000318 00 AX 0 0 8
[ 7] maps PROGBITS 0000000000000000 000430 000050 00 WA 0 0 4
[ 8] license PROGBITS 0000000000000000 000480 000004 00 WA 0 0 1
[ 9] version PROGBITS 0000000000000000 000484 000004 00 WA 0 0 4
# ./offwaketime | head -5
swapper/1;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule;__schedule;-;---;; 106
CPU 0/KVM;entry_SYSCALL_64_fastpath;sys_ioctl;do_vfs_ioctl;kvm_vcpu_ioctl;kvm_arch_vcpu_ioctl_run;kvm_vcpu_block;schedule;__schedule;-;try_to_wake_up;swake_up_locked;swake_up;apic_timer_expired;apic_timer_fn;__hrtimer_run_queues;hrtimer_interrupt;local_apic_timer_interrupt;smp_apic_timer_interrupt;__irqentry_text_start;cpuidle_enter;call_cpuidle;cpu_startup_entry;start_secondary;;swapper/3 2
Compositor;entry_SYSCALL_64_fastpath;sys_futex;do_futex;futex_wait;futex_wait_queue_me;schedule;__schedule;-;try_to_wake_up;futex_requeue;do_futex;sys_futex;entry_SYSCALL_64_fastpath;;SoftwareVsyncTh 5
firefox;entry_SYSCALL_64_fastpath;sys_poll;do_sys_poll;poll_schedule_timeout;schedule_hrtimeout_range;schedule_hrtimeout_range_clock;schedule;__schedule;-;try_to_wake_up;pollwake;__wake_up_common;__wake_up_sync_key;pipe_write;__vfs_write;vfs_write;sys_write;entry_SYSCALL_64_fastpath;;Timer 13
JS Helper;entry_SYSCALL_64_fastpath;sys_futex;do_futex;futex_wait;futex_wait_queue_me;schedule;__schedule;-;try_to_wake_up;do_futex;sys_futex;entry_SYSCALL_64_fastpath;;firefox 2
#
Signed-off-by: Joe Stringer <joe@ovn.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexei Starovoitov <ast@fb.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Wang Nan <wangnan0@huawei.com>
Cc: netdev@vger.kernel.org
Link: http://lkml.kernel.org/r/20161214224342.12858-2-joe@ovn.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-12-15 01:43:38 +03:00
|
|
|
assert(bpf_map_lookup_elem(map_fd[0], &proto, values) == 0);
|
2016-12-08 02:53:14 +03:00
|
|
|
for (i = 0; i < nr_cpus; i++)
|
|
|
|
sum += (values[i] - prev[proto][i]);
|
|
|
|
|
|
|
|
if (sum)
|
|
|
|
printf("proto %u: sum:%10llu pkts, rate:%10llu pkts/s\n",
|
|
|
|
proto, sum, sum / STATS_INTERVAL_S);
|
|
|
|
memcpy(prev[proto], values, sizeof(values));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void usage(const char *cmd)
|
|
|
|
{
|
|
|
|
printf("Start a XDP prog which encapsulates incoming packets\n"
|
|
|
|
"in an IPv4/v6 header and XDP_TX it out. The dst <VIP:PORT>\n"
|
|
|
|
"is used to select packets to encapsulate\n\n");
|
|
|
|
printf("Usage: %s [...]\n", cmd);
|
|
|
|
printf(" -i <ifindex> Interface Index\n");
|
|
|
|
printf(" -a <vip-service-address> IPv4 or IPv6\n");
|
|
|
|
printf(" -p <vip-service-port> A port range (e.g. 433-444) is also allowed\n");
|
|
|
|
printf(" -s <source-ip> Used in the IPTunnel header\n");
|
|
|
|
printf(" -d <dest-ip> Used in the IPTunnel header\n");
|
|
|
|
printf(" -m <dest-MAC> Used in sending the IP Tunneled pkt\n");
|
|
|
|
printf(" -T <stop-after-X-seconds> Default: 0 (forever)\n");
|
|
|
|
printf(" -P <IP-Protocol> Default is TCP\n");
|
2017-05-12 02:04:45 +03:00
|
|
|
printf(" -S use skb-mode\n");
|
|
|
|
printf(" -N enforce native mode\n");
|
2016-12-08 02:53:14 +03:00
|
|
|
printf(" -h Display this help\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
static int parse_ipstr(const char *ipstr, unsigned int *addr)
|
|
|
|
{
|
|
|
|
if (inet_pton(AF_INET6, ipstr, addr) == 1) {
|
|
|
|
return AF_INET6;
|
|
|
|
} else if (inet_pton(AF_INET, ipstr, addr) == 1) {
|
|
|
|
addr[1] = addr[2] = addr[3] = 0;
|
|
|
|
return AF_INET;
|
|
|
|
}
|
|
|
|
|
|
|
|
fprintf(stderr, "%s is an invalid IP\n", ipstr);
|
|
|
|
return AF_UNSPEC;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int parse_ports(const char *port_str, int *min_port, int *max_port)
|
|
|
|
{
|
|
|
|
char *end;
|
|
|
|
long tmp_min_port;
|
|
|
|
long tmp_max_port;
|
|
|
|
|
|
|
|
tmp_min_port = strtol(optarg, &end, 10);
|
|
|
|
if (tmp_min_port < 1 || tmp_min_port > 65535) {
|
|
|
|
fprintf(stderr, "Invalid port(s):%s\n", optarg);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (*end == '-') {
|
|
|
|
end++;
|
|
|
|
tmp_max_port = strtol(end, NULL, 10);
|
|
|
|
if (tmp_max_port < 1 || tmp_max_port > 65535) {
|
|
|
|
fprintf(stderr, "Invalid port(s):%s\n", optarg);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
tmp_max_port = tmp_min_port;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (tmp_min_port > tmp_max_port) {
|
|
|
|
fprintf(stderr, "Invalid port(s):%s\n", optarg);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (tmp_max_port - tmp_min_port + 1 > MAX_IPTNL_ENTRIES) {
|
|
|
|
fprintf(stderr, "Port range (%s) is larger than %u\n",
|
|
|
|
port_str, MAX_IPTNL_ENTRIES);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
*min_port = tmp_min_port;
|
|
|
|
*max_port = tmp_max_port;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int main(int argc, char **argv)
|
|
|
|
{
|
|
|
|
unsigned char opt_flags[256] = {};
|
|
|
|
unsigned int kill_after_s = 0;
|
2017-05-12 02:04:45 +03:00
|
|
|
const char *optstr = "i:a:p:s:d:m:T:P:SNh";
|
2016-12-08 02:53:14 +03:00
|
|
|
int min_port = 0, max_port = 0;
|
|
|
|
struct iptnl_info tnl = {};
|
|
|
|
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
|
|
|
|
struct vip vip = {};
|
|
|
|
char filename[256];
|
|
|
|
int opt;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
tnl.family = AF_UNSPEC;
|
|
|
|
vip.protocol = IPPROTO_TCP;
|
|
|
|
|
|
|
|
for (i = 0; i < strlen(optstr); i++)
|
|
|
|
if (optstr[i] != 'h' && 'a' <= optstr[i] && optstr[i] <= 'z')
|
|
|
|
opt_flags[(unsigned char)optstr[i]] = 1;
|
|
|
|
|
|
|
|
while ((opt = getopt(argc, argv, optstr)) != -1) {
|
|
|
|
unsigned short family;
|
|
|
|
unsigned int *v6;
|
|
|
|
|
|
|
|
switch (opt) {
|
|
|
|
case 'i':
|
|
|
|
ifindex = atoi(optarg);
|
|
|
|
break;
|
|
|
|
case 'a':
|
|
|
|
vip.family = parse_ipstr(optarg, vip.daddr.v6);
|
|
|
|
if (vip.family == AF_UNSPEC)
|
|
|
|
return 1;
|
|
|
|
break;
|
|
|
|
case 'p':
|
|
|
|
if (parse_ports(optarg, &min_port, &max_port))
|
|
|
|
return 1;
|
|
|
|
break;
|
|
|
|
case 'P':
|
|
|
|
vip.protocol = atoi(optarg);
|
|
|
|
break;
|
|
|
|
case 's':
|
|
|
|
case 'd':
|
|
|
|
if (opt == 's')
|
|
|
|
v6 = tnl.saddr.v6;
|
|
|
|
else
|
|
|
|
v6 = tnl.daddr.v6;
|
|
|
|
|
|
|
|
family = parse_ipstr(optarg, v6);
|
|
|
|
if (family == AF_UNSPEC)
|
|
|
|
return 1;
|
|
|
|
if (tnl.family == AF_UNSPEC) {
|
|
|
|
tnl.family = family;
|
|
|
|
} else if (tnl.family != family) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"The IP version of the src and dst addresses used in the IP encapsulation does not match\n");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 'm':
|
|
|
|
if (!ether_aton_r(optarg,
|
|
|
|
(struct ether_addr *)tnl.dmac)) {
|
|
|
|
fprintf(stderr, "Invalid mac address:%s\n",
|
|
|
|
optarg);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 'T':
|
|
|
|
kill_after_s = atoi(optarg);
|
|
|
|
break;
|
2017-04-27 19:11:13 +03:00
|
|
|
case 'S':
|
2017-05-01 12:26:15 +03:00
|
|
|
xdp_flags |= XDP_FLAGS_SKB_MODE;
|
2017-05-12 02:04:45 +03:00
|
|
|
break;
|
|
|
|
case 'N':
|
|
|
|
xdp_flags |= XDP_FLAGS_DRV_MODE;
|
2017-04-27 19:11:13 +03:00
|
|
|
break;
|
2016-12-08 02:53:14 +03:00
|
|
|
default:
|
|
|
|
usage(argv[0]);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
opt_flags[opt] = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < strlen(optstr); i++) {
|
|
|
|
if (opt_flags[(unsigned int)optstr[i]]) {
|
|
|
|
fprintf(stderr, "Missing argument -%c\n", optstr[i]);
|
|
|
|
usage(argv[0]);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (setrlimit(RLIMIT_MEMLOCK, &r)) {
|
|
|
|
perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
|
|
|
|
|
|
|
|
if (load_bpf_file(filename)) {
|
|
|
|
printf("%s", bpf_log_buf);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!prog_fd[0]) {
|
|
|
|
printf("load_bpf_file: %s\n", strerror(errno));
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
signal(SIGINT, int_exit);
|
2017-05-11 22:52:30 +03:00
|
|
|
signal(SIGTERM, int_exit);
|
2016-12-08 02:53:14 +03:00
|
|
|
|
|
|
|
while (min_port <= max_port) {
|
|
|
|
vip.dport = htons(min_port++);
|
samples/bpf: Make samples more libbpf-centric
Switch all of the sample code to use the function names from
tools/lib/bpf so that they're consistent with that, and to declare their
own log buffers. This allow the next commit to be purely devoted to
getting rid of the duplicate library in samples/bpf.
Committer notes:
Testing it:
On a fedora rawhide container, with clang/llvm 3.9, sharing the host
linux kernel git tree:
# make O=/tmp/build/linux/ headers_install
# make O=/tmp/build/linux -C samples/bpf/
Since I forgot to make it privileged, just tested it outside the
container, using what it generated:
# uname -a
Linux jouet 4.9.0-rc8+ #1 SMP Mon Dec 12 11:20:49 BRT 2016 x86_64 x86_64 x86_64 GNU/Linux
# cd /var/lib/docker/devicemapper/mnt/c43e09a53ff56c86a07baf79847f00e2cc2a17a1e2220e1adbf8cbc62734feda/rootfs/tmp/build/linux/samples/bpf/
# ls -la offwaketime
-rwxr-xr-x. 1 root root 24200 Dec 15 12:19 offwaketime
# file offwaketime
offwaketime: ELF 64-bit LSB executable, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, for GNU/Linux 2.6.32, BuildID[sha1]=c940d3f127d5e66cdd680e42d885cb0b64f8a0e4, not stripped
# readelf -SW offwaketime_kern.o | grep PROGBITS
[ 2] .text PROGBITS 0000000000000000 000040 000000 00 AX 0 0 4
[ 3] kprobe/try_to_wake_up PROGBITS 0000000000000000 000040 0000d8 00 AX 0 0 8
[ 5] tracepoint/sched/sched_switch PROGBITS 0000000000000000 000118 000318 00 AX 0 0 8
[ 7] maps PROGBITS 0000000000000000 000430 000050 00 WA 0 0 4
[ 8] license PROGBITS 0000000000000000 000480 000004 00 WA 0 0 1
[ 9] version PROGBITS 0000000000000000 000484 000004 00 WA 0 0 4
# ./offwaketime | head -5
swapper/1;start_secondary;cpu_startup_entry;schedule_preempt_disabled;schedule;__schedule;-;---;; 106
CPU 0/KVM;entry_SYSCALL_64_fastpath;sys_ioctl;do_vfs_ioctl;kvm_vcpu_ioctl;kvm_arch_vcpu_ioctl_run;kvm_vcpu_block;schedule;__schedule;-;try_to_wake_up;swake_up_locked;swake_up;apic_timer_expired;apic_timer_fn;__hrtimer_run_queues;hrtimer_interrupt;local_apic_timer_interrupt;smp_apic_timer_interrupt;__irqentry_text_start;cpuidle_enter;call_cpuidle;cpu_startup_entry;start_secondary;;swapper/3 2
Compositor;entry_SYSCALL_64_fastpath;sys_futex;do_futex;futex_wait;futex_wait_queue_me;schedule;__schedule;-;try_to_wake_up;futex_requeue;do_futex;sys_futex;entry_SYSCALL_64_fastpath;;SoftwareVsyncTh 5
firefox;entry_SYSCALL_64_fastpath;sys_poll;do_sys_poll;poll_schedule_timeout;schedule_hrtimeout_range;schedule_hrtimeout_range_clock;schedule;__schedule;-;try_to_wake_up;pollwake;__wake_up_common;__wake_up_sync_key;pipe_write;__vfs_write;vfs_write;sys_write;entry_SYSCALL_64_fastpath;;Timer 13
JS Helper;entry_SYSCALL_64_fastpath;sys_futex;do_futex;futex_wait;futex_wait_queue_me;schedule;__schedule;-;try_to_wake_up;do_futex;sys_futex;entry_SYSCALL_64_fastpath;;firefox 2
#
Signed-off-by: Joe Stringer <joe@ovn.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexei Starovoitov <ast@fb.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Wang Nan <wangnan0@huawei.com>
Cc: netdev@vger.kernel.org
Link: http://lkml.kernel.org/r/20161214224342.12858-2-joe@ovn.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
2016-12-15 01:43:38 +03:00
|
|
|
if (bpf_map_update_elem(map_fd[1], &vip, &tnl, BPF_NOEXIST)) {
|
|
|
|
perror("bpf_map_update_elem(&vip2tnl)");
|
2016-12-08 02:53:14 +03:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-05-01 12:26:15 +03:00
|
|
|
if (set_link_xdp_fd(ifindex, prog_fd[0], xdp_flags) < 0) {
|
2016-12-08 02:53:14 +03:00
|
|
|
printf("link set xdp fd failed\n");
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
poll_stats(kill_after_s);
|
|
|
|
|
2017-05-01 12:26:15 +03:00
|
|
|
set_link_xdp_fd(ifindex, -1, xdp_flags);
|
2016-12-08 02:53:14 +03:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|