Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

Pablo Neira Ayuso says:

====================
Netfilter updates for net-next

The following patchset contains Netfilter updates for net-next:

1) Clean up and consolidate ct ecache infrastructure by merging ct and
   expect notifiers, from Florian Westphal.

2) Missing counters and timestamp in nfnetlink_queue and _log conntrack
   information.

3) Missing error check for xt_register_template() in iptables mangle,
   as a incremental fix for the previous pull request, also from
   Florian Westphal.

4) Add netfilter hooks for the SRv6 lightweigh tunnel driver, from
   Ryoga Sato. The hooks are enabled via nf_hooks_lwtunnel sysctl
   to make sure existing netfilter rulesets do not break. There is
   a static key to disable the hooks by default.

   The pktgen_bench_xmit_mode_netif_receive.sh shows no noticeable
   impact in the seg6_input path for non-netfilter users: similar
   numbers with and without this patch.

   This is a sample of the perf report output:

    11.67%  kpktgend_0       [ipv6]                    [k] ipv6_get_saddr_eval
     7.89%  kpktgend_0       [ipv6]                    [k] __ipv6_addr_label
     7.52%  kpktgend_0       [ipv6]                    [k] __ipv6_dev_get_saddr
     6.63%  kpktgend_0       [kernel.vmlinux]          [k] asm_exc_nmi
     4.74%  kpktgend_0       [ipv6]                    [k] fib6_node_lookup_1
     3.48%  kpktgend_0       [kernel.vmlinux]          [k] pskb_expand_head
     3.33%  kpktgend_0       [ipv6]                    [k] ip6_rcv_core.isra.29
     3.33%  kpktgend_0       [ipv6]                    [k] seg6_do_srh_encap
     2.53%  kpktgend_0       [ipv6]                    [k] ipv6_dev_get_saddr
     2.45%  kpktgend_0       [ipv6]                    [k] fib6_table_lookup
     2.24%  kpktgend_0       [kernel.vmlinux]          [k] ___cache_free
     2.16%  kpktgend_0       [ipv6]                    [k] ip6_pol_route
     2.11%  kpktgend_0       [kernel.vmlinux]          [k] __ipv6_addr_type
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2021-08-30 10:57:54 +01:00
Родитель 724812d856 7a3f5b0de3
Коммит 9dfa859da0
14 изменённых файлов: 362 добавлений и 251 удалений

Просмотреть файл

@ -184,6 +184,13 @@ nf_conntrack_gre_timeout_stream - INTEGER (seconds)
This extended timeout will be used in case there is an GRE stream
detected.
nf_hooks_lwtunnel - BOOLEAN
- 0 - disabled (default)
- not 0 - enabled
If this option is enabled, the lightweight tunnel netfilter hooks are
enabled. This option cannot be disabled once it is enabled.
nf_flowtable_tcp_timeout - INTEGER (seconds)
default 30

Просмотреть файл

@ -51,6 +51,9 @@ struct lwtunnel_encap_ops {
};
#ifdef CONFIG_LWTUNNEL
DECLARE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled);
void lwtstate_free(struct lwtunnel_state *lws);
static inline struct lwtunnel_state *

Просмотреть файл

@ -72,14 +72,20 @@ struct nf_ct_event {
int report;
};
struct nf_ct_event_notifier {
int (*fcn)(unsigned int events, struct nf_ct_event *item);
struct nf_exp_event {
struct nf_conntrack_expect *exp;
u32 portid;
int report;
};
int nf_conntrack_register_notifier(struct net *net,
struct nf_ct_event_notifier *nb);
void nf_conntrack_unregister_notifier(struct net *net,
struct nf_ct_event_notifier *nb);
struct nf_ct_event_notifier {
int (*ct_event)(unsigned int events, const struct nf_ct_event *item);
int (*exp_event)(unsigned int events, const struct nf_exp_event *item);
};
void nf_conntrack_register_notifier(struct net *net,
const struct nf_ct_event_notifier *nb);
void nf_conntrack_unregister_notifier(struct net *net);
void nf_ct_deliver_cached_events(struct nf_conn *ct);
int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct,
@ -151,22 +157,6 @@ nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct)
}
#ifdef CONFIG_NF_CONNTRACK_EVENTS
struct nf_exp_event {
struct nf_conntrack_expect *exp;
u32 portid;
int report;
};
struct nf_exp_event_notifier {
int (*fcn)(unsigned int events, struct nf_exp_event *item);
};
int nf_ct_expect_register_notifier(struct net *net,
struct nf_exp_event_notifier *nb);
void nf_ct_expect_unregister_notifier(struct net *net,
struct nf_exp_event_notifier *nb);
void nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
struct nf_conntrack_expect *exp,
u32 portid, int report);

Просмотреть файл

@ -0,0 +1,7 @@
#include <linux/sysctl.h>
#include <linux/types.h>
#ifdef CONFIG_SYSCTL
int nf_hooks_lwtunnel_sysctl_handler(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos);
#endif

Просмотреть файл

@ -113,7 +113,6 @@ struct netns_ct {
struct ct_pcpu __percpu *pcpu_lists;
struct ip_conntrack_stat __percpu *stat;
struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb;
struct nf_exp_event_notifier __rcu *nf_expect_event_cb;
struct nf_ip_net nf_ct_proto;
#if defined(CONFIG_NF_CONNTRACK_LABELS)
unsigned int labels_used;

Просмотреть файл

@ -23,6 +23,9 @@
#include <net/ip6_fib.h>
#include <net/rtnh.h>
DEFINE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled);
EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_enabled);
#ifdef CONFIG_MODULES
static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type)

Просмотреть файл

@ -112,6 +112,8 @@ static int __init iptable_mangle_init(void)
{
int ret = xt_register_template(&packet_mangler,
iptable_mangle_table_init);
if (ret < 0)
return ret;
mangle_ops = xt_hook_ops_alloc(&packet_mangler, iptable_mangle_hook);
if (IS_ERR(mangle_ops)) {

Просмотреть файл

@ -26,6 +26,8 @@
#ifdef CONFIG_IPV6_SEG6_HMAC
#include <net/seg6_hmac.h>
#endif
#include <net/lwtunnel.h>
#include <linux/netfilter.h>
static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo)
{
@ -295,11 +297,19 @@ static int seg6_do_srh(struct sk_buff *skb)
ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
nf_reset_ct(skb);
return 0;
}
static int seg6_input(struct sk_buff *skb)
static int seg6_input_finish(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
return dst_input(skb);
}
static int seg6_input_core(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
struct dst_entry *orig_dst = skb_dst(skb);
struct dst_entry *dst = NULL;
@ -337,10 +347,41 @@ static int seg6_input(struct sk_buff *skb)
if (unlikely(err))
return err;
return dst_input(skb);
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
dev_net(skb->dev), NULL, skb, NULL,
skb_dst(skb)->dev, seg6_input_finish);
return seg6_input_finish(dev_net(skb->dev), NULL, skb);
}
static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
static int seg6_input_nf(struct sk_buff *skb)
{
struct net_device *dev = skb_dst(skb)->dev;
struct net *net = dev_net(skb->dev);
switch (skb->protocol) {
case htons(ETH_P_IP):
return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, NULL,
skb, NULL, dev, seg6_input_core);
case htons(ETH_P_IPV6):
return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, NULL,
skb, NULL, dev, seg6_input_core);
}
return -EINVAL;
}
static int seg6_input(struct sk_buff *skb)
{
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return seg6_input_nf(skb);
return seg6_input_core(dev_net(skb->dev), NULL, skb);
}
static int seg6_output_core(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
struct dst_entry *orig_dst = skb_dst(skb);
struct dst_entry *dst = NULL;
@ -387,12 +428,40 @@ static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (unlikely(err))
goto drop;
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,
NULL, skb_dst(skb)->dev, dst_output);
return dst_output(net, sk, skb);
drop:
kfree_skb(skb);
return err;
}
static int seg6_output_nf(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct net_device *dev = skb_dst(skb)->dev;
switch (skb->protocol) {
case htons(ETH_P_IP):
return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb,
NULL, dev, seg6_output_core);
case htons(ETH_P_IPV6):
return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb,
NULL, dev, seg6_output_core);
}
return -EINVAL;
}
static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return seg6_output_nf(net, sk, skb);
return seg6_output_core(net, sk, skb);
}
static int seg6_build_state(struct net *net, struct nlattr *nla,
unsigned int family, const void *cfg,
struct lwtunnel_state **ts,

Просмотреть файл

@ -30,6 +30,8 @@
#include <net/seg6_local.h>
#include <linux/etherdevice.h>
#include <linux/bpf.h>
#include <net/lwtunnel.h>
#include <linux/netfilter.h>
#define SEG6_F_ATTR(i) BIT(i)
@ -413,12 +415,33 @@ drop:
return -EINVAL;
}
static int input_action_end_dx6_finish(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
struct dst_entry *orig_dst = skb_dst(skb);
struct in6_addr *nhaddr = NULL;
struct seg6_local_lwt *slwt;
slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
/* The inner packet is not associated to any local interface,
* so we do not call netif_rx().
*
* If slwt->nh6 is set to ::, then lookup the nexthop for the
* inner packet's DA. Otherwise, use the specified nexthop.
*/
if (!ipv6_addr_any(&slwt->nh6))
nhaddr = &slwt->nh6;
seg6_lookup_nexthop(skb, nhaddr, 0);
return dst_input(skb);
}
/* decapsulate and forward to specified nexthop */
static int input_action_end_dx6(struct sk_buff *skb,
struct seg6_local_lwt *slwt)
{
struct in6_addr *nhaddr = NULL;
/* this function accepts IPv6 encapsulated packets, with either
* an SRH with SL=0, or no SRH.
*/
@ -429,40 +452,30 @@ static int input_action_end_dx6(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto drop;
/* The inner packet is not associated to any local interface,
* so we do not call netif_rx().
*
* If slwt->nh6 is set to ::, then lookup the nexthop for the
* inner packet's DA. Otherwise, use the specified nexthop.
*/
if (!ipv6_addr_any(&slwt->nh6))
nhaddr = &slwt->nh6;
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
nf_reset_ct(skb);
seg6_lookup_nexthop(skb, nhaddr, 0);
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
dev_net(skb->dev), NULL, skb, NULL,
skb_dst(skb)->dev, input_action_end_dx6_finish);
return dst_input(skb);
return input_action_end_dx6_finish(dev_net(skb->dev), NULL, skb);
drop:
kfree_skb(skb);
return -EINVAL;
}
static int input_action_end_dx4(struct sk_buff *skb,
struct seg6_local_lwt *slwt)
static int input_action_end_dx4_finish(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
struct dst_entry *orig_dst = skb_dst(skb);
struct seg6_local_lwt *slwt;
struct iphdr *iph;
__be32 nhaddr;
int err;
if (!decap_and_validate(skb, IPPROTO_IPIP))
goto drop;
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
goto drop;
skb->protocol = htons(ETH_P_IP);
slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
iph = ip_hdr(skb);
@ -470,14 +483,34 @@ static int input_action_end_dx4(struct sk_buff *skb,
skb_dst_drop(skb);
skb_set_transport_header(skb, sizeof(struct iphdr));
err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev);
if (err)
goto drop;
if (err) {
kfree_skb(skb);
return -EINVAL;
}
return dst_input(skb);
}
static int input_action_end_dx4(struct sk_buff *skb,
struct seg6_local_lwt *slwt)
{
if (!decap_and_validate(skb, IPPROTO_IPIP))
goto drop;
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
goto drop;
skb->protocol = htons(ETH_P_IP);
skb_set_transport_header(skb, sizeof(struct iphdr));
nf_reset_ct(skb);
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
dev_net(skb->dev), NULL, skb, NULL,
skb_dst(skb)->dev, input_action_end_dx4_finish);
return input_action_end_dx4_finish(dev_net(skb->dev), NULL, skb);
drop:
kfree_skb(skb);
return -EINVAL;
@ -645,6 +678,7 @@ static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb,
skb_dst_drop(skb);
skb_set_transport_header(skb, hdrlen);
nf_reset_ct(skb);
return end_dt_vrf_rcv(skb, family, vrf);
@ -1078,7 +1112,8 @@ static void seg6_local_update_counters(struct seg6_local_lwt *slwt,
u64_stats_update_end(&pcounters->syncp);
}
static int seg6_local_input(struct sk_buff *skb)
static int seg6_local_input_core(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
struct dst_entry *orig_dst = skb_dst(skb);
struct seg6_action_desc *desc;
@ -1086,11 +1121,6 @@ static int seg6_local_input(struct sk_buff *skb)
unsigned int len = skb->len;
int rc;
if (skb->protocol != htons(ETH_P_IPV6)) {
kfree_skb(skb);
return -EINVAL;
}
slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
desc = slwt->desc;
@ -1104,6 +1134,21 @@ static int seg6_local_input(struct sk_buff *skb)
return rc;
}
static int seg6_local_input(struct sk_buff *skb)
{
if (skb->protocol != htons(ETH_P_IPV6)) {
kfree_skb(skb);
return -EINVAL;
}
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN,
dev_net(skb->dev), NULL, skb, skb->dev, NULL,
seg6_local_input_core);
return seg6_local_input_core(dev_net(skb->dev), NULL, skb);
}
static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
[SEG6_LOCAL_ACTION] = { .type = NLA_U32 },
[SEG6_LOCAL_SRH] = { .type = NLA_BINARY },

Просмотреть файл

@ -212,3 +212,6 @@ obj-$(CONFIG_IP_SET) += ipset/
# IPVS
obj-$(CONFIG_IP_VS) += ipvs/
# lwtunnel
obj-$(CONFIG_LWTUNNEL) += nf_hooks_lwtunnel.o

Просмотреть файл

@ -130,103 +130,32 @@ static void ecache_work(struct work_struct *work)
schedule_delayed_work(&cnet->ecache_dwork, delay);
}
int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct,
u32 portid, int report)
static int __nf_conntrack_eventmask_report(struct nf_conntrack_ecache *e,
const unsigned int events,
const unsigned long missed,
const struct nf_ct_event *item)
{
int ret = 0;
struct net *net = nf_ct_net(ct);
struct nf_conn *ct = item->ct;
struct net *net = nf_ct_net(item->ct);
struct nf_ct_event_notifier *notify;
struct nf_conntrack_ecache *e;
rcu_read_lock();
notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
if (!notify)
goto out_unlock;
e = nf_ct_ecache_find(ct);
if (!e)
goto out_unlock;
if (nf_ct_is_confirmed(ct)) {
struct nf_ct_event item = {
.ct = ct,
.portid = e->portid ? e->portid : portid,
.report = report
};
/* This is a resent of a destroy event? If so, skip missed */
unsigned long missed = e->portid ? 0 : e->missed;
if (!((eventmask | missed) & e->ctmask))
goto out_unlock;
ret = notify->fcn(eventmask | missed, &item);
if (unlikely(ret < 0 || missed)) {
spin_lock_bh(&ct->lock);
if (ret < 0) {
/* This is a destroy event that has been
* triggered by a process, we store the PORTID
* to include it in the retransmission.
*/
if (eventmask & (1 << IPCT_DESTROY)) {
if (e->portid == 0 && portid != 0)
e->portid = portid;
e->state = NFCT_ECACHE_DESTROY_FAIL;
} else {
e->missed |= eventmask;
}
} else {
e->missed &= ~missed;
}
spin_unlock_bh(&ct->lock);
}
}
out_unlock:
rcu_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(nf_conntrack_eventmask_report);
/* deliver cached events and clear cache entry - must be called with locally
* disabled softirqs */
void nf_ct_deliver_cached_events(struct nf_conn *ct)
{
struct net *net = nf_ct_net(ct);
unsigned long events, missed;
struct nf_ct_event_notifier *notify;
struct nf_conntrack_ecache *e;
struct nf_ct_event item;
int ret;
rcu_read_lock();
notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
if (notify == NULL)
goto out_unlock;
if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct))
goto out_unlock;
e = nf_ct_ecache_find(ct);
if (e == NULL)
goto out_unlock;
events = xchg(&e->cache, 0);
/* We make a copy of the missed event cache without taking
* the lock, thus we may send missed events twice. However,
* this does not harm and it happens very rarely. */
missed = e->missed;
if (!((events | missed) & e->ctmask))
goto out_unlock;
return 0;
item.ct = ct;
item.portid = 0;
item.report = 0;
rcu_read_lock();
ret = notify->fcn(events | missed, &item);
notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
if (!notify) {
rcu_read_unlock();
return 0;
}
if (likely(ret == 0 && !missed))
goto out_unlock;
ret = notify->ct_event(events | missed, item);
rcu_read_unlock();
if (likely(ret >= 0 && missed == 0))
return 0;
spin_lock_bh(&ct->lock);
if (ret < 0)
@ -235,8 +164,73 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct)
e->missed &= ~missed;
spin_unlock_bh(&ct->lock);
out_unlock:
rcu_read_unlock();
return ret;
}
int nf_conntrack_eventmask_report(unsigned int events, struct nf_conn *ct,
u32 portid, int report)
{
struct nf_conntrack_ecache *e;
struct nf_ct_event item;
unsigned long missed;
int ret;
if (!nf_ct_is_confirmed(ct))
return 0;
e = nf_ct_ecache_find(ct);
if (!e)
return 0;
memset(&item, 0, sizeof(item));
item.ct = ct;
item.portid = e->portid ? e->portid : portid;
item.report = report;
/* This is a resent of a destroy event? If so, skip missed */
missed = e->portid ? 0 : e->missed;
ret = __nf_conntrack_eventmask_report(e, events, missed, &item);
if (unlikely(ret < 0 && (events & (1 << IPCT_DESTROY)))) {
/* This is a destroy event that has been triggered by a process,
* we store the PORTID to include it in the retransmission.
*/
if (e->portid == 0 && portid != 0)
e->portid = portid;
e->state = NFCT_ECACHE_DESTROY_FAIL;
}
return ret;
}
EXPORT_SYMBOL_GPL(nf_conntrack_eventmask_report);
/* deliver cached events and clear cache entry - must be called with locally
* disabled softirqs */
void nf_ct_deliver_cached_events(struct nf_conn *ct)
{
struct nf_conntrack_ecache *e;
struct nf_ct_event item;
unsigned long events;
if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct))
return;
e = nf_ct_ecache_find(ct);
if (e == NULL)
return;
events = xchg(&e->cache, 0);
item.ct = ct;
item.portid = 0;
item.report = 0;
/* We make a copy of the missed event cache without taking
* the lock, thus we may send missed events twice. However,
* this does not harm and it happens very rarely.
*/
__nf_conntrack_eventmask_report(e, events, e->missed, &item);
}
EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
@ -246,11 +240,11 @@ void nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
{
struct net *net = nf_ct_exp_net(exp);
struct nf_exp_event_notifier *notify;
struct nf_ct_event_notifier *notify;
struct nf_conntrack_ecache *e;
rcu_read_lock();
notify = rcu_dereference(net->ct.nf_expect_event_cb);
notify = rcu_dereference(net->ct.nf_conntrack_event_cb);
if (!notify)
goto out_unlock;
@ -264,86 +258,35 @@ void nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
.portid = portid,
.report = report
};
notify->fcn(1 << event, &item);
notify->exp_event(1 << event, &item);
}
out_unlock:
rcu_read_unlock();
}
int nf_conntrack_register_notifier(struct net *net,
struct nf_ct_event_notifier *new)
void nf_conntrack_register_notifier(struct net *net,
const struct nf_ct_event_notifier *new)
{
int ret;
struct nf_ct_event_notifier *notify;
mutex_lock(&nf_ct_ecache_mutex);
notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb,
lockdep_is_held(&nf_ct_ecache_mutex));
if (notify != NULL) {
ret = -EBUSY;
goto out_unlock;
}
WARN_ON_ONCE(notify);
rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new);
ret = 0;
out_unlock:
mutex_unlock(&nf_ct_ecache_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
void nf_conntrack_unregister_notifier(struct net *net,
struct nf_ct_event_notifier *new)
void nf_conntrack_unregister_notifier(struct net *net)
{
struct nf_ct_event_notifier *notify;
mutex_lock(&nf_ct_ecache_mutex);
notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb,
lockdep_is_held(&nf_ct_ecache_mutex));
BUG_ON(notify != new);
RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL);
mutex_unlock(&nf_ct_ecache_mutex);
/* synchronize_rcu() is called from ctnetlink_exit. */
/* synchronize_rcu() is called after netns pre_exit */
}
EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
int nf_ct_expect_register_notifier(struct net *net,
struct nf_exp_event_notifier *new)
{
int ret;
struct nf_exp_event_notifier *notify;
mutex_lock(&nf_ct_ecache_mutex);
notify = rcu_dereference_protected(net->ct.nf_expect_event_cb,
lockdep_is_held(&nf_ct_ecache_mutex));
if (notify != NULL) {
ret = -EBUSY;
goto out_unlock;
}
rcu_assign_pointer(net->ct.nf_expect_event_cb, new);
ret = 0;
out_unlock:
mutex_unlock(&nf_ct_ecache_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier);
void nf_ct_expect_unregister_notifier(struct net *net,
struct nf_exp_event_notifier *new)
{
struct nf_exp_event_notifier *notify;
mutex_lock(&nf_ct_ecache_mutex);
notify = rcu_dereference_protected(net->ct.nf_expect_event_cb,
lockdep_is_held(&nf_ct_ecache_mutex));
BUG_ON(notify != new);
RCU_INIT_POINTER(net->ct.nf_expect_event_cb, NULL);
mutex_unlock(&nf_ct_ecache_mutex);
/* synchronize_rcu() is called from ctnetlink_exit. */
}
EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state)
{
struct nf_conntrack_net *cnet = nf_ct_pernet(net);

Просмотреть файл

@ -706,7 +706,7 @@ static size_t ctnetlink_nlmsg_size(const struct nf_conn *ct)
}
static int
ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item)
{
const struct nf_conntrack_zone *zone;
struct net *net;
@ -2669,6 +2669,8 @@ ctnetlink_glue_build_size(const struct nf_conn *ct)
+ nla_total_size(0) /* CTA_HELP */
+ nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */
+ ctnetlink_secctx_size(ct)
+ ctnetlink_acct_size(ct)
+ ctnetlink_timestamp_size(ct)
#if IS_ENABLED(CONFIG_NF_NAT)
+ 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */
+ 6 * nla_total_size(sizeof(u_int32_t)) /* CTA_NAT_SEQ_OFFSET */
@ -2726,6 +2728,10 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct)
if (ctnetlink_dump_protoinfo(skb, ct, false) < 0)
goto nla_put_failure;
if (ctnetlink_dump_acct(skb, ct, IPCTNL_MSG_CT_GET) < 0 ||
ctnetlink_dump_timestamp(skb, ct) < 0)
goto nla_put_failure;
if (ctnetlink_dump_helpinfo(skb, ct) < 0)
goto nla_put_failure;
@ -3104,7 +3110,7 @@ nla_put_failure:
#ifdef CONFIG_NF_CONNTRACK_EVENTS
static int
ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
ctnetlink_expect_event(unsigned int events, const struct nf_exp_event *item)
{
struct nf_conntrack_expect *exp = item->exp;
struct net *net = nf_ct_exp_net(exp);
@ -3755,11 +3761,8 @@ static int ctnetlink_stat_exp_cpu(struct sk_buff *skb,
#ifdef CONFIG_NF_CONNTRACK_EVENTS
static struct nf_ct_event_notifier ctnl_notifier = {
.fcn = ctnetlink_conntrack_event,
};
static struct nf_exp_event_notifier ctnl_notifier_exp = {
.fcn = ctnetlink_expect_event,
.ct_event = ctnetlink_conntrack_event,
.exp_event = ctnetlink_expect_event,
};
#endif
@ -3852,52 +3855,21 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_EXP);
static int __net_init ctnetlink_net_init(struct net *net)
{
#ifdef CONFIG_NF_CONNTRACK_EVENTS
int ret;
ret = nf_conntrack_register_notifier(net, &ctnl_notifier);
if (ret < 0) {
pr_err("ctnetlink_init: cannot register notifier.\n");
goto err_out;
}
ret = nf_ct_expect_register_notifier(net, &ctnl_notifier_exp);
if (ret < 0) {
pr_err("ctnetlink_init: cannot expect register notifier.\n");
goto err_unreg_notifier;
}
nf_conntrack_register_notifier(net, &ctnl_notifier);
#endif
return 0;
#ifdef CONFIG_NF_CONNTRACK_EVENTS
err_unreg_notifier:
nf_conntrack_unregister_notifier(net, &ctnl_notifier);
err_out:
return ret;
#endif
}
static void ctnetlink_net_exit(struct net *net)
static void ctnetlink_net_pre_exit(struct net *net)
{
#ifdef CONFIG_NF_CONNTRACK_EVENTS
nf_ct_expect_unregister_notifier(net, &ctnl_notifier_exp);
nf_conntrack_unregister_notifier(net, &ctnl_notifier);
nf_conntrack_unregister_notifier(net);
#endif
}
static void __net_exit ctnetlink_net_exit_batch(struct list_head *net_exit_list)
{
struct net *net;
list_for_each_entry(net, net_exit_list, exit_list)
ctnetlink_net_exit(net);
/* wait for other cpus until they are done with ctnl_notifiers */
synchronize_rcu();
}
static struct pernet_operations ctnetlink_net_ops = {
.init = ctnetlink_net_init,
.exit_batch = ctnetlink_net_exit_batch,
.pre_exit = ctnetlink_net_pre_exit,
};
static int __init ctnetlink_init(void)

Просмотреть файл

@ -22,6 +22,9 @@
#include <net/netfilter/nf_conntrack_acct.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_conntrack_timestamp.h>
#ifdef CONFIG_LWTUNNEL
#include <net/netfilter/nf_hooks_lwtunnel.h>
#endif
#include <linux/rculist_nulls.h>
static bool enable_hooks __read_mostly;
@ -612,6 +615,9 @@ enum nf_ct_sysctl_index {
NF_SYSCTL_CT_PROTO_TIMEOUT_GRE,
NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM,
#endif
#ifdef CONFIG_LWTUNNEL
NF_SYSCTL_CT_LWTUNNEL,
#endif
__NF_SYSCTL_CT_LAST_SYSCTL,
};
@ -958,6 +964,15 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
#endif
#ifdef CONFIG_LWTUNNEL
[NF_SYSCTL_CT_LWTUNNEL] = {
.procname = "nf_hooks_lwtunnel",
.data = NULL,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = nf_hooks_lwtunnel_sysctl_handler,
},
#endif
{}
};

Просмотреть файл

@ -0,0 +1,53 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/sysctl.h>
#include <net/lwtunnel.h>
#include <net/netfilter/nf_hooks_lwtunnel.h>
static inline int nf_hooks_lwtunnel_get(void)
{
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return 1;
else
return 0;
}
static inline int nf_hooks_lwtunnel_set(int enable)
{
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) {
if (!enable)
return -EBUSY;
} else if (enable) {
static_branch_enable(&nf_hooks_lwtunnel_enabled);
}
return 0;
}
#ifdef CONFIG_SYSCTL
int nf_hooks_lwtunnel_sysctl_handler(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int proc_nf_hooks_lwtunnel_enabled = 0;
struct ctl_table tmp = {
.procname = table->procname,
.data = &proc_nf_hooks_lwtunnel_enabled,
.maxlen = sizeof(int),
.mode = table->mode,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
};
int ret;
if (!write)
proc_nf_hooks_lwtunnel_enabled = nf_hooks_lwtunnel_get();
ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
if (write && ret == 0)
ret = nf_hooks_lwtunnel_set(proc_nf_hooks_lwtunnel_enabled);
return ret;
}
EXPORT_SYMBOL_GPL(nf_hooks_lwtunnel_sysctl_handler);
#endif /* CONFIG_SYSCTL */