net: add skeleton of bpfilter kernel module
bpfilter.ko consists of bpfilter_kern.c (normal kernel module code) and user mode helper code that is embedded into bpfilter.ko The steps to build bpfilter.ko are the following: - main.c is compiled by HOSTCC into the bpfilter_umh elf executable file - with quite a bit of objcopy and Makefile magic the bpfilter_umh elf file is converted into bpfilter_umh.o object file with _binary_net_bpfilter_bpfilter_umh_start and _end symbols Example: $ nm ./bld_x64/net/bpfilter/bpfilter_umh.o 0000000000004cf8 T _binary_net_bpfilter_bpfilter_umh_end 0000000000004cf8 A _binary_net_bpfilter_bpfilter_umh_size 0000000000000000 T _binary_net_bpfilter_bpfilter_umh_start - bpfilter_umh.o and bpfilter_kern.o are linked together into bpfilter.ko bpfilter_kern.c is a normal kernel module code that calls the fork_usermode_blob() helper to execute part of its own data as a user mode process. Notice that _binary_net_bpfilter_bpfilter_umh_start - end is placed into .init.rodata section, so it's freed as soon as __init function of bpfilter.ko is finished. As part of __init the bpfilter.ko does first request/reply action via two unix pipe provided by fork_usermode_blob() helper to make sure that umh is healthy. If not it will kill it via pid. Later bpfilter_process_sockopt() will be called from bpfilter hooks in get/setsockopt() to pass iptable commands into umh via bpfilter.ko If admin does 'rmmod bpfilter' the __exit code bpfilter.ko will kill umh as well. Signed-off-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Родитель
449325b52b
Коммит
d2ba09c17a
|
@ -0,0 +1,15 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _LINUX_BPFILTER_H
|
||||
#define _LINUX_BPFILTER_H
|
||||
|
||||
#include <uapi/linux/bpfilter.h>
|
||||
|
||||
struct sock;
|
||||
int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char *optval,
|
||||
unsigned int optlen);
|
||||
int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char *optval,
|
||||
int *optlen);
|
||||
extern int (*bpfilter_process_sockopt)(struct sock *sk, int optname,
|
||||
char __user *optval,
|
||||
unsigned int optlen, bool is_set);
|
||||
#endif
|
|
@ -0,0 +1,21 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _UAPI_LINUX_BPFILTER_H
|
||||
#define _UAPI_LINUX_BPFILTER_H
|
||||
|
||||
#include <linux/if.h>
|
||||
|
||||
enum {
|
||||
BPFILTER_IPT_SO_SET_REPLACE = 64,
|
||||
BPFILTER_IPT_SO_SET_ADD_COUNTERS = 65,
|
||||
BPFILTER_IPT_SET_MAX,
|
||||
};
|
||||
|
||||
enum {
|
||||
BPFILTER_IPT_SO_GET_INFO = 64,
|
||||
BPFILTER_IPT_SO_GET_ENTRIES = 65,
|
||||
BPFILTER_IPT_SO_GET_REVISION_MATCH = 66,
|
||||
BPFILTER_IPT_SO_GET_REVISION_TARGET = 67,
|
||||
BPFILTER_IPT_GET_MAX,
|
||||
};
|
||||
|
||||
#endif /* _UAPI_LINUX_BPFILTER_H */
|
|
@ -202,6 +202,8 @@ source "net/bridge/netfilter/Kconfig"
|
|||
|
||||
endif
|
||||
|
||||
source "net/bpfilter/Kconfig"
|
||||
|
||||
source "net/dccp/Kconfig"
|
||||
source "net/sctp/Kconfig"
|
||||
source "net/rds/Kconfig"
|
||||
|
|
|
@ -20,6 +20,7 @@ obj-$(CONFIG_TLS) += tls/
|
|||
obj-$(CONFIG_XFRM) += xfrm/
|
||||
obj-$(CONFIG_UNIX) += unix/
|
||||
obj-$(CONFIG_NET) += ipv6/
|
||||
obj-$(CONFIG_BPFILTER) += bpfilter/
|
||||
obj-$(CONFIG_PACKET) += packet/
|
||||
obj-$(CONFIG_NET_KEY) += key/
|
||||
obj-$(CONFIG_BRIDGE) += bridge/
|
||||
|
|
|
@ -0,0 +1,16 @@
|
|||
menuconfig BPFILTER
|
||||
bool "BPF based packet filtering framework (BPFILTER)"
|
||||
default n
|
||||
depends on NET && BPF
|
||||
help
|
||||
This builds experimental bpfilter framework that is aiming to
|
||||
provide netfilter compatible functionality via BPF
|
||||
|
||||
if BPFILTER
|
||||
config BPFILTER_UMH
|
||||
tristate "bpfilter kernel module with user mode helper"
|
||||
default m
|
||||
help
|
||||
This builds bpfilter kernel module with embedded user mode helper
|
||||
endif
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
# SPDX-License-Identifier: GPL-2.0
|
||||
#
|
||||
# Makefile for the Linux BPFILTER layer.
|
||||
#
|
||||
|
||||
hostprogs-y := bpfilter_umh
|
||||
bpfilter_umh-objs := main.o
|
||||
HOSTCFLAGS += -I. -Itools/include/
|
||||
ifeq ($(CONFIG_BPFILTER_UMH), y)
|
||||
# builtin bpfilter_umh should be compiled with -static
|
||||
# since rootfs isn't mounted at the time of __init
|
||||
# function is called and do_execv won't find elf interpreter
|
||||
HOSTLDFLAGS += -static
|
||||
endif
|
||||
|
||||
# a bit of elf magic to convert bpfilter_umh binary into a binary blob
|
||||
# inside bpfilter_umh.o elf file referenced by
|
||||
# _binary_net_bpfilter_bpfilter_umh_start symbol
|
||||
# which bpfilter_kern.c passes further into umh blob loader at run-time
|
||||
quiet_cmd_copy_umh = GEN $@
|
||||
cmd_copy_umh = echo ':' > $(obj)/.bpfilter_umh.o.cmd; \
|
||||
$(OBJCOPY) -I binary -O $(CONFIG_OUTPUT_FORMAT) \
|
||||
-B `$(OBJDUMP) -f $<|grep architecture|cut -d, -f1|cut -d' ' -f2` \
|
||||
--rename-section .data=.init.rodata $< $@
|
||||
|
||||
$(obj)/bpfilter_umh.o: $(obj)/bpfilter_umh
|
||||
$(call cmd,copy_umh)
|
||||
|
||||
obj-$(CONFIG_BPFILTER_UMH) += bpfilter.o
|
||||
bpfilter-objs += bpfilter_kern.o bpfilter_umh.o
|
|
@ -0,0 +1,111 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/umh.h>
|
||||
#include <linux/bpfilter.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched/signal.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/file.h>
|
||||
#include "msgfmt.h"
|
||||
|
||||
#define UMH_start _binary_net_bpfilter_bpfilter_umh_start
|
||||
#define UMH_end _binary_net_bpfilter_bpfilter_umh_end
|
||||
|
||||
extern char UMH_start;
|
||||
extern char UMH_end;
|
||||
|
||||
static struct umh_info info;
|
||||
/* since ip_getsockopt() can run in parallel, serialize access to umh */
|
||||
static DEFINE_MUTEX(bpfilter_lock);
|
||||
|
||||
static void shutdown_umh(struct umh_info *info)
|
||||
{
|
||||
struct task_struct *tsk;
|
||||
|
||||
tsk = pid_task(find_vpid(info->pid), PIDTYPE_PID);
|
||||
if (tsk)
|
||||
force_sig(SIGKILL, tsk);
|
||||
fput(info->pipe_to_umh);
|
||||
fput(info->pipe_from_umh);
|
||||
}
|
||||
|
||||
static void __stop_umh(void)
|
||||
{
|
||||
if (bpfilter_process_sockopt) {
|
||||
bpfilter_process_sockopt = NULL;
|
||||
shutdown_umh(&info);
|
||||
}
|
||||
}
|
||||
|
||||
static void stop_umh(void)
|
||||
{
|
||||
mutex_lock(&bpfilter_lock);
|
||||
__stop_umh();
|
||||
mutex_unlock(&bpfilter_lock);
|
||||
}
|
||||
|
||||
static int __bpfilter_process_sockopt(struct sock *sk, int optname,
|
||||
char __user *optval,
|
||||
unsigned int optlen, bool is_set)
|
||||
{
|
||||
struct mbox_request req;
|
||||
struct mbox_reply reply;
|
||||
loff_t pos;
|
||||
ssize_t n;
|
||||
int ret;
|
||||
|
||||
req.is_set = is_set;
|
||||
req.pid = current->pid;
|
||||
req.cmd = optname;
|
||||
req.addr = (long)optval;
|
||||
req.len = optlen;
|
||||
mutex_lock(&bpfilter_lock);
|
||||
n = __kernel_write(info.pipe_to_umh, &req, sizeof(req), &pos);
|
||||
if (n != sizeof(req)) {
|
||||
pr_err("write fail %zd\n", n);
|
||||
__stop_umh();
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
pos = 0;
|
||||
n = kernel_read(info.pipe_from_umh, &reply, sizeof(reply), &pos);
|
||||
if (n != sizeof(reply)) {
|
||||
pr_err("read fail %zd\n", n);
|
||||
__stop_umh();
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
ret = reply.status;
|
||||
out:
|
||||
mutex_unlock(&bpfilter_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __init load_umh(void)
|
||||
{
|
||||
int err;
|
||||
|
||||
/* fork usermode process */
|
||||
err = fork_usermode_blob(&UMH_start, &UMH_end - &UMH_start, &info);
|
||||
if (err)
|
||||
return err;
|
||||
pr_info("Loaded bpfilter_umh pid %d\n", info.pid);
|
||||
|
||||
/* health check that usermode process started correctly */
|
||||
if (__bpfilter_process_sockopt(NULL, 0, 0, 0, 0) != 0) {
|
||||
stop_umh();
|
||||
return -EFAULT;
|
||||
}
|
||||
bpfilter_process_sockopt = &__bpfilter_process_sockopt;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __exit fini_umh(void)
|
||||
{
|
||||
stop_umh();
|
||||
}
|
||||
module_init(load_umh);
|
||||
module_exit(fini_umh);
|
||||
MODULE_LICENSE("GPL");
|
|
@ -0,0 +1,63 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
#define _GNU_SOURCE
|
||||
#include <sys/uio.h>
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <sys/socket.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include "include/uapi/linux/bpf.h"
|
||||
#include <asm/unistd.h>
|
||||
#include "msgfmt.h"
|
||||
|
||||
int debug_fd;
|
||||
|
||||
static int handle_get_cmd(struct mbox_request *cmd)
|
||||
{
|
||||
switch (cmd->cmd) {
|
||||
case 0:
|
||||
return 0;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return -ENOPROTOOPT;
|
||||
}
|
||||
|
||||
static int handle_set_cmd(struct mbox_request *cmd)
|
||||
{
|
||||
return -ENOPROTOOPT;
|
||||
}
|
||||
|
||||
static void loop(void)
|
||||
{
|
||||
while (1) {
|
||||
struct mbox_request req;
|
||||
struct mbox_reply reply;
|
||||
int n;
|
||||
|
||||
n = read(0, &req, sizeof(req));
|
||||
if (n != sizeof(req)) {
|
||||
dprintf(debug_fd, "invalid request %d\n", n);
|
||||
return;
|
||||
}
|
||||
|
||||
reply.status = req.is_set ?
|
||||
handle_set_cmd(&req) :
|
||||
handle_get_cmd(&req);
|
||||
|
||||
n = write(1, &reply, sizeof(reply));
|
||||
if (n != sizeof(reply)) {
|
||||
dprintf(debug_fd, "reply failed %d\n", n);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
debug_fd = open("/dev/console", 00000002 | 00000100);
|
||||
dprintf(debug_fd, "Started bpfilter\n");
|
||||
loop();
|
||||
close(debug_fd);
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,17 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _NET_BPFILTER_MSGFMT_H
|
||||
#define _NET_BPFILTER_MSGFMT_H
|
||||
|
||||
struct mbox_request {
|
||||
__u64 addr;
|
||||
__u32 len;
|
||||
__u32 is_set;
|
||||
__u32 cmd;
|
||||
__u32 pid;
|
||||
};
|
||||
|
||||
struct mbox_reply {
|
||||
__u32 status;
|
||||
};
|
||||
|
||||
#endif
|
|
@ -16,6 +16,8 @@ obj-y := route.o inetpeer.o protocol.o \
|
|||
inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o \
|
||||
metrics.o
|
||||
|
||||
obj-$(CONFIG_BPFILTER) += bpfilter/
|
||||
|
||||
obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o
|
||||
obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
|
||||
obj-$(CONFIG_PROC_FS) += proc.o
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
obj-$(CONFIG_BPFILTER) += sockopt.o
|
||||
|
|
@ -0,0 +1,42 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/bpfilter.h>
|
||||
#include <uapi/linux/bpf.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/kmod.h>
|
||||
|
||||
int (*bpfilter_process_sockopt)(struct sock *sk, int optname,
|
||||
char __user *optval,
|
||||
unsigned int optlen, bool is_set);
|
||||
EXPORT_SYMBOL_GPL(bpfilter_process_sockopt);
|
||||
|
||||
int bpfilter_mbox_request(struct sock *sk, int optname, char __user *optval,
|
||||
unsigned int optlen, bool is_set)
|
||||
{
|
||||
if (!bpfilter_process_sockopt) {
|
||||
int err = request_module("bpfilter");
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
if (!bpfilter_process_sockopt)
|
||||
return -ECHILD;
|
||||
}
|
||||
return bpfilter_process_sockopt(sk, optname, optval, optlen, is_set);
|
||||
}
|
||||
|
||||
int bpfilter_ip_set_sockopt(struct sock *sk, int optname, char __user *optval,
|
||||
unsigned int optlen)
|
||||
{
|
||||
return bpfilter_mbox_request(sk, optname, optval, optlen, true);
|
||||
}
|
||||
|
||||
int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval,
|
||||
int __user *optlen)
|
||||
{
|
||||
int len;
|
||||
|
||||
if (get_user(len, optlen))
|
||||
return -EFAULT;
|
||||
|
||||
return bpfilter_mbox_request(sk, optname, optval, len, false);
|
||||
}
|
|
@ -47,6 +47,8 @@
|
|||
#include <linux/errqueue.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
#include <linux/bpfilter.h>
|
||||
|
||||
/*
|
||||
* SOL_IP control messages.
|
||||
*/
|
||||
|
@ -1244,6 +1246,11 @@ int ip_setsockopt(struct sock *sk, int level,
|
|||
return -ENOPROTOOPT;
|
||||
|
||||
err = do_ip_setsockopt(sk, level, optname, optval, optlen);
|
||||
#ifdef CONFIG_BPFILTER
|
||||
if (optname >= BPFILTER_IPT_SO_SET_REPLACE &&
|
||||
optname < BPFILTER_IPT_SET_MAX)
|
||||
err = bpfilter_ip_set_sockopt(sk, optname, optval, optlen);
|
||||
#endif
|
||||
#ifdef CONFIG_NETFILTER
|
||||
/* we need to exclude all possible ENOPROTOOPTs except default case */
|
||||
if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&
|
||||
|
@ -1552,6 +1559,11 @@ int ip_getsockopt(struct sock *sk, int level,
|
|||
int err;
|
||||
|
||||
err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0);
|
||||
#ifdef CONFIG_BPFILTER
|
||||
if (optname >= BPFILTER_IPT_SO_GET_INFO &&
|
||||
optname < BPFILTER_IPT_GET_MAX)
|
||||
err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen);
|
||||
#endif
|
||||
#ifdef CONFIG_NETFILTER
|
||||
/* we need to exclude all possible ENOPROTOOPTs except default case */
|
||||
if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
|
||||
|
@ -1584,6 +1596,11 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname,
|
|||
err = do_ip_getsockopt(sk, level, optname, optval, optlen,
|
||||
MSG_CMSG_COMPAT);
|
||||
|
||||
#ifdef CONFIG_BPFILTER
|
||||
if (optname >= BPFILTER_IPT_SO_GET_INFO &&
|
||||
optname < BPFILTER_IPT_GET_MAX)
|
||||
err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen);
|
||||
#endif
|
||||
#ifdef CONFIG_NETFILTER
|
||||
/* we need to exclude all possible ENOPROTOOPTs except default case */
|
||||
if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
|
||||
|
|
Загрузка…
Ссылка в новой задаче