From 1702ad79d31ef236adcd9999fae7617e8eda515d Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 9 Sep 2020 13:29:01 +0200 Subject: [PATCH 01/11] netfilter: conntrack: proc: rename stat column Rename 'searched' column to 'clashres' (same len). conntrack(8) using the old /proc interface (ctnetlink not available) shows: cpu=0 entries=4784 clashres=2292 [..] Another alternative is to add another column, but this increases the number of always-0 columns. Fixes: bc92470413f3af1 ("netfilter: conntrack: add clash resolution stat counter") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_standalone.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 0ff39740797d..46c5557c1fec 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -428,14 +428,14 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v) const struct ip_conntrack_stat *st = v; if (v == SEQ_START_TOKEN) { - seq_puts(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete search_restart\n"); + seq_puts(seq, "entries clashres found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete search_restart\n"); return 0; } seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x " "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", nr_conntracks, - st->clash_resolve, /* was: searched */ + st->clash_resolve, st->found, 0, st->invalid, From 8a8b9047a8975ad4bdee34b7affad66edfbe626d Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 16 Sep 2020 22:16:56 +0800 Subject: [PATCH 02/11] netfilter: nf_tables: Remove ununsed function nft_data_debug It is never used, so can be removed. Signed-off-by: YueHaibing Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 8ceca0e419b3..c4c526507ddb 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -148,13 +148,6 @@ static inline void nft_data_copy(u32 *dst, const struct nft_data *src, memcpy(dst, src, len); } -static inline void nft_data_debug(const struct nft_data *data) -{ - pr_debug("data[0]=%x data[1]=%x data[2]=%x data[3]=%x\n", - data->data[0], data->data[1], - data->data[2], data->data[3]); -} - /** * struct nft_ctx - nf_tables rule/set context * From 18cd9b00fffe416f8c17990519dd80f05b312617 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 18 Sep 2020 21:16:56 +0800 Subject: [PATCH 03/11] ipvs: Remove unused macros They are not used since commit e4ff67513096 ("ipvs: add sync_maxlen parameter for the sync daemon") Signed-off-by: YueHaibing Acked-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_sync.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 2b8abbfe018c..16b48064f715 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -242,9 +242,6 @@ struct ip_vs_sync_thread_data { | IPVS Sync Connection (1) | */ -#define SYNC_MESG_HEADER_LEN 4 -#define MAX_CONNS_PER_SYNCBUFF 255 /* nr_conns in ip_vs_sync_mesg is 8 bit */ - /* Version 0 header */ struct ip_vs_sync_mesg_v0 { __u8 nr_conns; From bc7a708235f41face74f9746461f94d94e8e33ae Mon Sep 17 00:00:00 2001 From: "Jose M. Guisado Gomez" Date: Sun, 27 Sep 2020 10:36:22 +0200 Subject: [PATCH 04/11] netfilter: nf_tables: fix userdata memleak When userdata was introduced for tables and objects its allocation was only freed inside the error path of the new{table, object} functions. Free user data inside corresponding destroy functions for tables and objects. Fixes: b131c96496b3 ("netfilter: nf_tables: add userdata support for nft_object") Fixes: 7a81575b806e ("netfilter: nf_tables: add userdata attributes to nft_table") Signed-off-by: Jose M. Guisado Gomez Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 84c0c1aaae99..b3c3c3fc1969 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1211,6 +1211,7 @@ static void nf_tables_table_destroy(struct nft_ctx *ctx) rhltable_destroy(&ctx->table->chains_ht); kfree(ctx->table->name); + kfree(ctx->table->udata); kfree(ctx->table); } @@ -6231,6 +6232,7 @@ static void nft_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj) module_put(obj->ops->type->owner); kfree(obj->key.name); + kfree(obj->udata); kfree(obj); } From 85db827a57a9311ddb3d7f9fa18d812517b2ab9a Mon Sep 17 00:00:00 2001 From: "Jose M. Guisado Gomez" Date: Mon, 28 Sep 2020 14:24:57 +0200 Subject: [PATCH 05/11] netfilter: nf_tables: use nla_memdup to copy udata When userdata support was added to tables and objects, user data coming from user space was allocated and copied using kzalloc + nla_memcpy. Use nla_memdup to copy userdata of tables and objects. Signed-off-by: Jose M. Guisado Gomez Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index b3c3c3fc1969..0473316aa392 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -986,7 +986,6 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, struct nft_table *table; struct nft_ctx ctx; u32 flags = 0; - u16 udlen = 0; int err; lockdep_assert_held(&net->nft.commit_mutex); @@ -1023,13 +1022,11 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, goto err_strdup; if (nla[NFTA_TABLE_USERDATA]) { - udlen = nla_len(nla[NFTA_TABLE_USERDATA]); - table->udata = kzalloc(udlen, GFP_KERNEL); + table->udata = nla_memdup(nla[NFTA_TABLE_USERDATA], GFP_KERNEL); if (table->udata == NULL) goto err_table_udata; - nla_memcpy(table->udata, nla[NFTA_TABLE_USERDATA], udlen); - table->udlen = udlen; + table->udlen = nla_len(nla[NFTA_TABLE_USERDATA]); } err = rhltable_init(&table->chains_ht, &nft_chain_ht_params); @@ -5900,7 +5897,6 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, struct nft_object *obj; struct nft_ctx ctx; u32 objtype; - u16 udlen; int err; if (!nla[NFTA_OBJ_TYPE] || @@ -5957,13 +5953,11 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, } if (nla[NFTA_OBJ_USERDATA]) { - udlen = nla_len(nla[NFTA_OBJ_USERDATA]); - obj->udata = kzalloc(udlen, GFP_KERNEL); + obj->udata = nla_memdup(nla[NFTA_OBJ_USERDATA], GFP_KERNEL); if (obj->udata == NULL) goto err_userdata; - nla_memcpy(obj->udata, nla[NFTA_OBJ_USERDATA], udlen); - obj->udlen = udlen; + obj->udlen = nla_len(nla[NFTA_OBJ_USERDATA]); } err = nft_trans_obj_add(&ctx, NFT_MSG_NEWOBJ, obj); From 002f2176532093753cb6ced61e5ea7b8904c6cae Mon Sep 17 00:00:00 2001 From: "Jose M. Guisado Gomez" Date: Mon, 28 Sep 2020 14:27:10 +0200 Subject: [PATCH 06/11] netfilter: nf_tables: add userdata attributes to nft_chain Enables storing userdata for nft_chain. Field udata points to user data and udlen stores its length. Adds new attribute flag NFTA_CHAIN_USERDATA. Signed-off-by: Jose M. Guisado Gomez Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 ++ include/uapi/linux/netfilter/nf_tables.h | 2 ++ net/netfilter/nf_tables_api.c | 33 ++++++++++++++++++------ 3 files changed, 29 insertions(+), 8 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index c4c526507ddb..0bd2a081ae39 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -945,6 +945,8 @@ struct nft_chain { bound:1, genmask:2; char *name; + u16 udlen; + u8 *udata; /* Only used during control plane commit phase: */ struct nft_rule **rules_next; diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 3c2469b43742..352ee51707a1 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -208,6 +208,7 @@ enum nft_chain_flags { * @NFTA_CHAIN_COUNTERS: counter specification of the chain (NLA_NESTED: nft_counter_attributes) * @NFTA_CHAIN_FLAGS: chain flags * @NFTA_CHAIN_ID: uniquely identifies a chain in a transaction (NLA_U32) + * @NFTA_CHAIN_USERDATA: user data (NLA_BINARY) */ enum nft_chain_attributes { NFTA_CHAIN_UNSPEC, @@ -222,6 +223,7 @@ enum nft_chain_attributes { NFTA_CHAIN_PAD, NFTA_CHAIN_FLAGS, NFTA_CHAIN_ID, + NFTA_CHAIN_USERDATA, __NFTA_CHAIN_MAX }; #define NFTA_CHAIN_MAX (__NFTA_CHAIN_MAX - 1) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 0473316aa392..3cfff31e4818 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1304,6 +1304,8 @@ static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = { [NFTA_CHAIN_COUNTERS] = { .type = NLA_NESTED }, [NFTA_CHAIN_FLAGS] = { .type = NLA_U32 }, [NFTA_CHAIN_ID] = { .type = NLA_U32 }, + [NFTA_CHAIN_USERDATA] = { .type = NLA_BINARY, + .len = NFT_USERDATA_MAXLEN }, }; static const struct nla_policy nft_hook_policy[NFTA_HOOK_MAX + 1] = { @@ -1445,6 +1447,10 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, if (nla_put_be32(skb, NFTA_CHAIN_USE, htonl(chain->use))) goto nla_put_failure; + if (chain->udata && + nla_put(skb, NFTA_CHAIN_USERDATA, chain->udlen, chain->udata)) + goto nla_put_failure; + nlmsg_end(skb, nlh); return 0; @@ -1682,9 +1688,11 @@ void nf_tables_chain_destroy(struct nft_ctx *ctx) free_percpu(rcu_dereference_raw(basechain->stats)); } kfree(chain->name); + kfree(chain->udata); kfree(basechain); } else { kfree(chain->name); + kfree(chain->udata); kfree(chain); } } @@ -2038,7 +2046,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, } else { if (!(flags & NFT_CHAIN_BINDING)) { err = -EINVAL; - goto err1; + goto err_destroy_chain; } snprintf(name, sizeof(name), "__chain%llu", ++chain_id); @@ -2047,13 +2055,22 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, if (!chain->name) { err = -ENOMEM; - goto err1; + goto err_destroy_chain; + } + + if (nla[NFTA_CHAIN_USERDATA]) { + chain->udata = nla_memdup(nla[NFTA_CHAIN_USERDATA], GFP_KERNEL); + if (chain->udata == NULL) { + err = -ENOMEM; + goto err_destroy_chain; + } + chain->udlen = nla_len(nla[NFTA_CHAIN_USERDATA]); } rules = nf_tables_chain_alloc_rules(chain, 0); if (!rules) { err = -ENOMEM; - goto err1; + goto err_destroy_chain; } *rules = NULL; @@ -2062,12 +2079,12 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, err = nf_tables_register_hook(net, table, chain); if (err < 0) - goto err1; + goto err_destroy_chain; trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN); if (IS_ERR(trans)) { err = PTR_ERR(trans); - goto err2; + goto err_unregister_hook; } nft_trans_chain_policy(trans) = NFT_CHAIN_POLICY_UNSET; @@ -2077,15 +2094,15 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, err = nft_chain_add(table, chain); if (err < 0) { nft_trans_destroy(trans); - goto err2; + goto err_unregister_hook; } table->use++; return 0; -err2: +err_unregister_hook: nf_tables_unregister_hook(net, table, chain); -err1: +err_destroy_chain: nf_tables_chain_destroy(ctx); return err; From 82ec6630f9fcd129ebd839a6c862d0dbffe9eafc Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 18 Sep 2020 21:17:29 +0800 Subject: [PATCH 07/11] netfilter: nf_tables_offload: Remove unused macro FLOW_SETUP_BLOCK commit 9a32669fecfb ("netfilter: nf_tables_offload: support indr block call") left behind this. Signed-off-by: YueHaibing Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_offload.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index 9ef37c1b7b3b..7c7e06624dc3 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -323,8 +323,6 @@ static int nft_indr_block_offload_cmd(struct nft_base_chain *basechain, return nft_block_setup(basechain, &bo, cmd); } -#define FLOW_SETUP_BLOCK TC_SETUP_BLOCK - static int nft_chain_offload_cmd(struct nft_base_chain *basechain, struct net_device *dev, enum flow_block_command cmd) From 9446ab34ace256e5e470c5aa221d46e544ad895e Mon Sep 17 00:00:00 2001 From: Vasily Averin Date: Fri, 25 Sep 2020 11:56:02 +0300 Subject: [PATCH 08/11] netfilter: ipset: enable memory accounting for ipset allocations Currently netadmin inside non-trusted container can quickly allocate whole node's memory via request of huge ipset hashtable. Other ipset-related memory allocations should be restricted too. v2: fixed typo ALLOC -> ACCOUNT Signed-off-by: Vasily Averin Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_core.c | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 920b7c4331f0..6f35832f0de3 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -250,22 +250,7 @@ EXPORT_SYMBOL_GPL(ip_set_type_unregister); void * ip_set_alloc(size_t size) { - void *members = NULL; - - if (size < KMALLOC_MAX_SIZE) - members = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); - - if (members) { - pr_debug("%p: allocated with kmalloc\n", members); - return members; - } - - members = vzalloc(size); - if (!members) - return NULL; - pr_debug("%p: allocated with vmalloc\n", members); - - return members; + return kvzalloc(size, GFP_KERNEL_ACCOUNT); } EXPORT_SYMBOL_GPL(ip_set_alloc); From ab6c41eefd46b92e4f5bcdbbc6c1ea39ed148274 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 2 Oct 2020 13:51:29 +0200 Subject: [PATCH 09/11] netfilter: nfnetlink: place subsys mutexes in distinct lockdep classes From time to time there are lockdep reports similar to this one: WARNING: possible circular locking dependency detected ------------------------------------------------------ 000000004f61aa56 (&table[i].mutex){+.+.}, at: nfnl_lock [nfnetlink] but task is already holding lock: [..] (&net->nft.commit_mutex){+.+.}, at: nf_tables_valid_genid [nf_tables] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&net->nft.commit_mutex){+.+.}: [..] nf_tables_valid_genid+0x18/0x60 [nf_tables] nfnetlink_rcv_batch+0x24c/0x620 [nfnetlink] nfnetlink_rcv+0x110/0x140 [nfnetlink] netlink_unicast+0x12c/0x1e0 [..] sys_sendmsg+0x18/0x40 linux_sparc_syscall+0x34/0x44 -> #0 (&table[i].mutex){+.+.}: [..] nfnl_lock+0x24/0x40 [nfnetlink] ip_set_nfnl_get_byindex+0x19c/0x280 [ip_set] set_match_v1_checkentry+0x14/0xc0 [xt_set] xt_check_match+0x238/0x260 [x_tables] __nft_match_init+0x160/0x180 [nft_compat] [..] sys_sendmsg+0x18/0x40 linux_sparc_syscall+0x34/0x44 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&net->nft.commit_mutex); lock(&table[i].mutex); lock(&net->nft.commit_mutex); lock(&table[i].mutex); Lockdep considers this an ABBA deadlock because the different nfnl subsys mutexes reside in the same lockdep class, but this is a false positive. CPU1 table[i] refers to the nftables subsys mutex, whereas CPU1 locks the ipset subsys mutex. Yi Che reported a similar lockdep splat, this time between ipset and ctnetlink subsys mutexes. Time to place them in distinct classes to avoid these warnings. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 3a2e64e13b22..2daa1f6ae344 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -46,6 +46,23 @@ static struct { const struct nfnetlink_subsystem __rcu *subsys; } table[NFNL_SUBSYS_COUNT]; +static struct lock_class_key nfnl_lockdep_keys[NFNL_SUBSYS_COUNT]; + +static const char *const nfnl_lockdep_names[NFNL_SUBSYS_COUNT] = { + [NFNL_SUBSYS_NONE] = "nfnl_subsys_none", + [NFNL_SUBSYS_CTNETLINK] = "nfnl_subsys_ctnetlink", + [NFNL_SUBSYS_CTNETLINK_EXP] = "nfnl_subsys_ctnetlink_exp", + [NFNL_SUBSYS_QUEUE] = "nfnl_subsys_queue", + [NFNL_SUBSYS_ULOG] = "nfnl_subsys_ulog", + [NFNL_SUBSYS_OSF] = "nfnl_subsys_osf", + [NFNL_SUBSYS_IPSET] = "nfnl_subsys_ipset", + [NFNL_SUBSYS_ACCT] = "nfnl_subsys_acct", + [NFNL_SUBSYS_CTNETLINK_TIMEOUT] = "nfnl_subsys_cttimeout", + [NFNL_SUBSYS_CTHELPER] = "nfnl_subsys_cthelper", + [NFNL_SUBSYS_NFTABLES] = "nfnl_subsys_nftables", + [NFNL_SUBSYS_NFT_COMPAT] = "nfnl_subsys_nftcompat", +}; + static const int nfnl_group2type[NFNLGRP_MAX+1] = { [NFNLGRP_CONNTRACK_NEW] = NFNL_SUBSYS_CTNETLINK, [NFNLGRP_CONNTRACK_UPDATE] = NFNL_SUBSYS_CTNETLINK, @@ -632,7 +649,7 @@ static int __init nfnetlink_init(void) BUG_ON(nfnl_group2type[i] == NFNL_SUBSYS_NONE); for (i=0; i Date: Fri, 2 Oct 2020 15:50:56 +0200 Subject: [PATCH 10/11] netfilter: nf_tables: Enable fast nft_cmp for inverted matches Add a boolean indicating NFT_CMP_NEQ. To include it into the match decision, it is sufficient to XOR it with the data comparison's result. While being at it, store the mask that is calculated during expression init and free the eval routine from having to recalculate it each time. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_core.h | 2 ++ net/netfilter/nf_tables_core.c | 3 +-- net/netfilter/nft_cmp.c | 13 +++++++------ 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 78516de14d31..df2d91c814cb 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -25,8 +25,10 @@ void nf_tables_core_module_exit(void); struct nft_cmp_fast_expr { u32 data; + u32 mask; enum nft_registers sreg:8; u8 len; + bool inv; }; struct nft_immediate_expr { diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 587897a2498b..e92feacaf551 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -51,9 +51,8 @@ static void nft_cmp_fast_eval(const struct nft_expr *expr, struct nft_regs *regs) { const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr); - u32 mask = nft_cmp_fast_mask(priv->len); - if ((regs->data[priv->sreg] & mask) == priv->data) + if (((regs->data[priv->sreg] & priv->mask) == priv->data) ^ priv->inv) return; regs->verdict.code = NFT_BREAK; } diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index 16f4d84599ac..bc079d68a536 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -167,7 +167,6 @@ static int nft_cmp_fast_init(const struct nft_ctx *ctx, struct nft_cmp_fast_expr *priv = nft_expr_priv(expr); struct nft_data_desc desc; struct nft_data data; - u32 mask; int err; err = nft_data_init(NULL, &data, sizeof(data), &desc, @@ -181,10 +180,11 @@ static int nft_cmp_fast_init(const struct nft_ctx *ctx, return err; desc.len *= BITS_PER_BYTE; - mask = nft_cmp_fast_mask(desc.len); - priv->data = data.data[0] & mask; + priv->mask = nft_cmp_fast_mask(desc.len); + priv->data = data.data[0] & priv->mask; priv->len = desc.len; + priv->inv = ntohl(nla_get_be32(tb[NFTA_CMP_OP])) != NFT_CMP_EQ; return 0; } @@ -201,7 +201,7 @@ static int nft_cmp_fast_offload(struct nft_offload_ctx *ctx, }, .sreg = priv->sreg, .len = priv->len / BITS_PER_BYTE, - .op = NFT_CMP_EQ, + .op = priv->inv ? NFT_CMP_NEQ : NFT_CMP_EQ, }; return __nft_cmp_offload(ctx, flow, &cmp); @@ -210,11 +210,12 @@ static int nft_cmp_fast_offload(struct nft_offload_ctx *ctx, static int nft_cmp_fast_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr); + enum nft_cmp_ops op = priv->inv ? NFT_CMP_NEQ : NFT_CMP_EQ; struct nft_data data; if (nft_dump_register(skb, NFTA_CMP_SREG, priv->sreg)) goto nla_put_failure; - if (nla_put_be32(skb, NFTA_CMP_OP, htonl(NFT_CMP_EQ))) + if (nla_put_be32(skb, NFTA_CMP_OP, htonl(op))) goto nla_put_failure; data.data[0] = priv->data; @@ -272,7 +273,7 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) goto err1; } - if (desc.len <= sizeof(u32) && op == NFT_CMP_EQ) + if (desc.len <= sizeof(u32) && (op == NFT_CMP_EQ || op == NFT_CMP_NEQ)) return &nft_cmp_fast_ops; return &nft_cmp_ops; From 10fdd6d80e4c21ad48f3860d723f5b3b5965477b Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Thu, 1 Oct 2020 18:57:44 +0200 Subject: [PATCH 11/11] netfilter: nf_tables: Implement fast bitwise expression A typical use of bitwise expression is to mask out parts of an IP address when matching on the network part only. Optimize for this common use with a fast variant for NFT_BITWISE_BOOL-type expressions operating on 32bit-sized values. Signed-off-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables_core.h | 9 ++ net/netfilter/nf_tables_core.c | 12 +++ net/netfilter/nft_bitwise.c | 141 +++++++++++++++++++++++-- 3 files changed, 156 insertions(+), 6 deletions(-) diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index df2d91c814cb..8657e6815b07 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -23,6 +23,13 @@ extern struct nft_object_type nft_secmark_obj_type; int nf_tables_core_module_init(void); void nf_tables_core_module_exit(void); +struct nft_bitwise_fast_expr { + u32 mask; + u32 xor; + enum nft_registers sreg:8; + enum nft_registers dreg:8; +}; + struct nft_cmp_fast_expr { u32 data; u32 mask; @@ -68,6 +75,8 @@ struct nft_payload_set { extern const struct nft_expr_ops nft_payload_fast_ops; +extern const struct nft_expr_ops nft_bitwise_fast_ops; + extern struct static_key_false nft_counters_enabled; extern struct static_key_false nft_trace_enabled; diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index e92feacaf551..dbc2e945c98e 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -47,6 +47,16 @@ static inline void nft_trace_packet(struct nft_traceinfo *info, } } +static void nft_bitwise_fast_eval(const struct nft_expr *expr, + struct nft_regs *regs) +{ + const struct nft_bitwise_fast_expr *priv = nft_expr_priv(expr); + u32 *src = ®s->data[priv->sreg]; + u32 *dst = ®s->data[priv->dreg]; + + *dst = (*src & priv->mask) ^ priv->xor; +} + static void nft_cmp_fast_eval(const struct nft_expr *expr, struct nft_regs *regs) { @@ -175,6 +185,8 @@ next_rule: nft_rule_for_each_expr(expr, last, rule) { if (expr->ops == &nft_cmp_fast_ops) nft_cmp_fast_eval(expr, ®s); + else if (expr->ops == &nft_bitwise_fast_ops) + nft_bitwise_fast_eval(expr, ®s); else if (expr->ops != &nft_payload_fast_ops || !nft_payload_fast_eval(expr, ®s, pkt)) expr_call_ops_eval(expr, ®s, pkt); diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index bc37d6c59db4..bbd773d74377 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -163,11 +163,6 @@ static int nft_bitwise_init(const struct nft_ctx *ctx, u32 len; int err; - if (!tb[NFTA_BITWISE_SREG] || - !tb[NFTA_BITWISE_DREG] || - !tb[NFTA_BITWISE_LEN]) - return -EINVAL; - err = nft_parse_u32_check(tb[NFTA_BITWISE_LEN], U8_MAX, &len); if (err < 0) return err; @@ -292,9 +287,143 @@ static const struct nft_expr_ops nft_bitwise_ops = { .offload = nft_bitwise_offload, }; +static int +nft_bitwise_extract_u32_data(const struct nlattr * const tb, u32 *out) +{ + struct nft_data_desc desc; + struct nft_data data; + int err = 0; + + err = nft_data_init(NULL, &data, sizeof(data), &desc, tb); + if (err < 0) + return err; + + if (desc.type != NFT_DATA_VALUE || desc.len != sizeof(u32)) { + err = -EINVAL; + goto err; + } + *out = data.data[0]; +err: + nft_data_release(&data, desc.type); + return err; +} + +static int nft_bitwise_fast_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_bitwise_fast_expr *priv = nft_expr_priv(expr); + int err; + + priv->sreg = nft_parse_register(tb[NFTA_BITWISE_SREG]); + err = nft_validate_register_load(priv->sreg, sizeof(u32)); + if (err < 0) + return err; + + priv->dreg = nft_parse_register(tb[NFTA_BITWISE_DREG]); + err = nft_validate_register_store(ctx, priv->dreg, NULL, + NFT_DATA_VALUE, sizeof(u32)); + if (err < 0) + return err; + + if (tb[NFTA_BITWISE_DATA]) + return -EINVAL; + + if (!tb[NFTA_BITWISE_MASK] || + !tb[NFTA_BITWISE_XOR]) + return -EINVAL; + + err = nft_bitwise_extract_u32_data(tb[NFTA_BITWISE_MASK], &priv->mask); + if (err < 0) + return err; + + err = nft_bitwise_extract_u32_data(tb[NFTA_BITWISE_XOR], &priv->xor); + if (err < 0) + return err; + + return 0; +} + +static int +nft_bitwise_fast_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_bitwise_fast_expr *priv = nft_expr_priv(expr); + struct nft_data data; + + if (nft_dump_register(skb, NFTA_BITWISE_SREG, priv->sreg)) + return -1; + if (nft_dump_register(skb, NFTA_BITWISE_DREG, priv->dreg)) + return -1; + if (nla_put_be32(skb, NFTA_BITWISE_LEN, htonl(sizeof(u32)))) + return -1; + if (nla_put_be32(skb, NFTA_BITWISE_OP, htonl(NFT_BITWISE_BOOL))) + return -1; + + data.data[0] = priv->mask; + if (nft_data_dump(skb, NFTA_BITWISE_MASK, &data, + NFT_DATA_VALUE, sizeof(u32)) < 0) + return -1; + + data.data[0] = priv->xor; + if (nft_data_dump(skb, NFTA_BITWISE_XOR, &data, + NFT_DATA_VALUE, sizeof(u32)) < 0) + return -1; + + return 0; +} + +static int nft_bitwise_fast_offload(struct nft_offload_ctx *ctx, + struct nft_flow_rule *flow, + const struct nft_expr *expr) +{ + const struct nft_bitwise_fast_expr *priv = nft_expr_priv(expr); + struct nft_offload_reg *reg = &ctx->regs[priv->dreg]; + + if (priv->xor || priv->sreg != priv->dreg || reg->len != sizeof(u32)) + return -EOPNOTSUPP; + + reg->mask.data[0] = priv->mask; + return 0; +} + +const struct nft_expr_ops nft_bitwise_fast_ops = { + .type = &nft_bitwise_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_bitwise_fast_expr)), + .eval = NULL, /* inlined */ + .init = nft_bitwise_fast_init, + .dump = nft_bitwise_fast_dump, + .offload = nft_bitwise_fast_offload, +}; + +static const struct nft_expr_ops * +nft_bitwise_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + int err; + u32 len; + + if (!tb[NFTA_BITWISE_LEN] || + !tb[NFTA_BITWISE_SREG] || + !tb[NFTA_BITWISE_DREG]) + return ERR_PTR(-EINVAL); + + err = nft_parse_u32_check(tb[NFTA_BITWISE_LEN], U8_MAX, &len); + if (err < 0) + return ERR_PTR(err); + + if (len != sizeof(u32)) + return &nft_bitwise_ops; + + if (tb[NFTA_BITWISE_OP] && + ntohl(nla_get_be32(tb[NFTA_BITWISE_OP])) != NFT_BITWISE_BOOL) + return &nft_bitwise_ops; + + return &nft_bitwise_fast_ops; +} + struct nft_expr_type nft_bitwise_type __read_mostly = { .name = "bitwise", - .ops = &nft_bitwise_ops, + .select_ops = nft_bitwise_select_ops, .policy = nft_bitwise_policy, .maxattr = NFTA_BITWISE_MAX, .owner = THIS_MODULE,