Merge branch 'master' of git://1984.lsi.us.es/nf-next

Pablo Neira Ayuso says:

====================
The following patchset contains Netfilter/IPVS updates for
your net-next tree, they are:

* Better performance in nfnetlink_queue by avoiding copy from the
  packet to netlink message, from Eric Dumazet.

* Remove unnecessary locking in the exit path of ebt_ulog, from Gao Feng.

* Use new function ipv6_iface_scope_id in nf_ct_ipv6, from Hannes Frederic Sowa.

* A couple of sparse fixes for IPVS, from Julian Anastasov.

* Use xor hashing in nfnetlink_queue, as suggested by Eric Dumazet, from
  myself.

* Allow to dump expectations per master conntrack via ctnetlink, from myself.

* A couple of cleanups to use PTR_RET in module init path, from Silviu-Mihai
  Popescu.

* Remove nf_conntrack module a bit faster if netns are in use, from
  Vladimir Davydov.

* Use checksum_partial in ip6t_NPT, from YOSHIFUJI Hideaki.

* Sparse fix for nf_conntrack, from Stephen Hemminger.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2013-03-25 12:11:44 -04:00
Родитель f5a03cf461 dece40e848
Коммит da13482534
14 изменённых файлов: 230 добавлений и 84 удалений

Просмотреть файл

@ -459,7 +459,7 @@ struct ip_vs_estimator {
struct ip_vs_stats { struct ip_vs_stats {
struct ip_vs_stats_user ustats; /* statistics */ struct ip_vs_stats_user ustats; /* statistics */
struct ip_vs_estimator est; /* estimator */ struct ip_vs_estimator est; /* estimator */
struct ip_vs_cpu_stats *cpustats; /* per cpu counters */ struct ip_vs_cpu_stats __percpu *cpustats; /* per cpu counters */
spinlock_t lock; /* spin lock */ spinlock_t lock; /* spin lock */
struct ip_vs_stats_user ustats0; /* reset values */ struct ip_vs_stats_user ustats0; /* reset values */
}; };

Просмотреть файл

@ -27,6 +27,7 @@ extern unsigned int nf_conntrack_in(struct net *net,
extern int nf_conntrack_init_net(struct net *net); extern int nf_conntrack_init_net(struct net *net);
extern void nf_conntrack_cleanup_net(struct net *net); extern void nf_conntrack_cleanup_net(struct net *net);
extern void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list);
extern int nf_conntrack_proto_pernet_init(struct net *net); extern int nf_conntrack_proto_pernet_init(struct net *net);
extern void nf_conntrack_proto_pernet_fini(struct net *net); extern void nf_conntrack_proto_pernet_fini(struct net *net);

Просмотреть файл

@ -319,12 +319,11 @@ static void __exit ebt_ulog_fini(void)
for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) { for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) {
ub = &ulog_buffers[i]; ub = &ulog_buffers[i];
del_timer(&ub->timer); del_timer(&ub->timer);
spin_lock_bh(&ub->lock);
if (ub->skb) { if (ub->skb) {
kfree_skb(ub->skb); kfree_skb(ub->skb);
ub->skb = NULL; ub->skb = NULL;
} }
spin_unlock_bh(&ub->lock);
} }
netlink_kernel_release(ebtulognl); netlink_kernel_release(ebtulognl);
} }

Просмотреть файл

@ -64,9 +64,7 @@ static int ebt_broute(struct sk_buff *skb)
static int __net_init broute_net_init(struct net *net) static int __net_init broute_net_init(struct net *net)
{ {
net->xt.broute_table = ebt_register_table(net, &broute_table); net->xt.broute_table = ebt_register_table(net, &broute_table);
if (IS_ERR(net->xt.broute_table)) return PTR_RET(net->xt.broute_table);
return PTR_ERR(net->xt.broute_table);
return 0;
} }
static void __net_exit broute_net_exit(struct net *net) static void __net_exit broute_net_exit(struct net *net)

Просмотреть файл

@ -48,9 +48,7 @@ static int __net_init arptable_filter_net_init(struct net *net)
net->ipv4.arptable_filter = net->ipv4.arptable_filter =
arpt_register_table(net, &packet_filter, repl); arpt_register_table(net, &packet_filter, repl);
kfree(repl); kfree(repl);
if (IS_ERR(net->ipv4.arptable_filter)) return PTR_RET(net->ipv4.arptable_filter);
return PTR_ERR(net->ipv4.arptable_filter);
return 0;
} }
static void __net_exit arptable_filter_net_exit(struct net *net) static void __net_exit arptable_filter_net_exit(struct net *net)

Просмотреть файл

@ -18,9 +18,8 @@
static int ip6t_npt_checkentry(const struct xt_tgchk_param *par) static int ip6t_npt_checkentry(const struct xt_tgchk_param *par)
{ {
struct ip6t_npt_tginfo *npt = par->targinfo; struct ip6t_npt_tginfo *npt = par->targinfo;
__wsum src_sum = 0, dst_sum = 0;
struct in6_addr pfx; struct in6_addr pfx;
unsigned int i; __wsum src_sum, dst_sum;
if (npt->src_pfx_len > 64 || npt->dst_pfx_len > 64) if (npt->src_pfx_len > 64 || npt->dst_pfx_len > 64)
return -EINVAL; return -EINVAL;
@ -33,12 +32,8 @@ static int ip6t_npt_checkentry(const struct xt_tgchk_param *par)
if (!ipv6_addr_equal(&pfx, &npt->dst_pfx.in6)) if (!ipv6_addr_equal(&pfx, &npt->dst_pfx.in6))
return -EINVAL; return -EINVAL;
for (i = 0; i < ARRAY_SIZE(npt->src_pfx.in6.s6_addr16); i++) { src_sum = csum_partial(&npt->src_pfx.in6, sizeof(npt->src_pfx.in6), 0);
src_sum = csum_add(src_sum, dst_sum = csum_partial(&npt->dst_pfx.in6, sizeof(npt->dst_pfx.in6), 0);
(__force __wsum)npt->src_pfx.in6.s6_addr16[i]);
dst_sum = csum_add(dst_sum,
(__force __wsum)npt->dst_pfx.in6.s6_addr16[i]);
}
npt->adjustment = ~csum_fold(csum_sub(src_sum, dst_sum)); npt->adjustment = ~csum_fold(csum_sub(src_sum, dst_sum));
return 0; return 0;

Просмотреть файл

@ -330,12 +330,8 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
sizeof(sin6.sin6_addr)); sizeof(sin6.sin6_addr));
nf_ct_put(ct); nf_ct_put(ct);
sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr,
if (ipv6_addr_type(&sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL) sk->sk_bound_dev_if);
sin6.sin6_scope_id = sk->sk_bound_dev_if;
else
sin6.sin6_scope_id = 0;
return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0; return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0;
} }

Просмотреть файл

@ -69,10 +69,7 @@ EXPORT_SYMBOL(ip_vs_conn_put);
EXPORT_SYMBOL(ip_vs_get_debug_level); EXPORT_SYMBOL(ip_vs_get_debug_level);
#endif #endif
int ip_vs_net_id __read_mostly; static int ip_vs_net_id __read_mostly;
#ifdef IP_VS_GENERIC_NETNS
EXPORT_SYMBOL(ip_vs_net_id);
#endif
/* netns cnt used for uniqueness */ /* netns cnt used for uniqueness */
static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0); static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0);
@ -1181,9 +1178,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
iph.len)))) { iph.len)))) {
#ifdef CONFIG_IP_VS_IPV6 #ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) { if (af == AF_INET6) {
struct net *net =
dev_net(skb_dst(skb)->dev);
if (!skb->dev) if (!skb->dev)
skb->dev = net->loopback_dev; skb->dev = net->loopback_dev;
icmpv6_send(skb, icmpv6_send(skb,

Просмотреть файл

@ -271,16 +271,18 @@ ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto,
{ {
register unsigned int porth = ntohs(port); register unsigned int porth = ntohs(port);
__be32 addr_fold = addr->ip; __be32 addr_fold = addr->ip;
__u32 ahash;
#ifdef CONFIG_IP_VS_IPV6 #ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) if (af == AF_INET6)
addr_fold = addr->ip6[0]^addr->ip6[1]^ addr_fold = addr->ip6[0]^addr->ip6[1]^
addr->ip6[2]^addr->ip6[3]; addr->ip6[2]^addr->ip6[3];
#endif #endif
addr_fold ^= ((size_t)net>>8); ahash = ntohl(addr_fold);
ahash ^= ((size_t) net >> 8);
return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth) return (proto ^ ahash ^ (porth >> IP_VS_SVC_TAB_BITS) ^ porth) &
& IP_VS_SVC_TAB_MASK; IP_VS_SVC_TAB_MASK;
} }
/* /*

Просмотреть файл

@ -56,7 +56,7 @@
* Make a summary from each cpu * Make a summary from each cpu
*/ */
static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum, static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
struct ip_vs_cpu_stats *stats) struct ip_vs_cpu_stats __percpu *stats)
{ {
int i; int i;

Просмотреть файл

@ -48,6 +48,7 @@
#include <net/netfilter/nf_conntrack_labels.h> #include <net/netfilter/nf_conntrack_labels.h>
#include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_core.h> #include <net/netfilter/nf_nat_core.h>
#include <net/netfilter/nf_nat_helper.h>
#define NF_CONNTRACK_VERSION "0.5.0" #define NF_CONNTRACK_VERSION "0.5.0"
@ -1364,20 +1365,37 @@ void nf_conntrack_cleanup_end(void)
*/ */
void nf_conntrack_cleanup_net(struct net *net) void nf_conntrack_cleanup_net(struct net *net)
{ {
LIST_HEAD(single);
list_add(&net->exit_list, &single);
nf_conntrack_cleanup_net_list(&single);
}
void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list)
{
int busy;
struct net *net;
/* /*
* This makes sure all current packets have passed through * This makes sure all current packets have passed through
* netfilter framework. Roll on, two-stage module * netfilter framework. Roll on, two-stage module
* delete... * delete...
*/ */
synchronize_net(); synchronize_net();
i_see_dead_people: i_see_dead_people:
busy = 0;
list_for_each_entry(net, net_exit_list, exit_list) {
nf_ct_iterate_cleanup(net, kill_all, NULL); nf_ct_iterate_cleanup(net, kill_all, NULL);
nf_ct_release_dying_list(net); nf_ct_release_dying_list(net);
if (atomic_read(&net->ct.count) != 0) { if (atomic_read(&net->ct.count) != 0)
busy = 1;
}
if (busy) {
schedule(); schedule();
goto i_see_dead_people; goto i_see_dead_people;
} }
list_for_each_entry(net, net_exit_list, exit_list) {
nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
nf_conntrack_proto_pernet_fini(net); nf_conntrack_proto_pernet_fini(net);
nf_conntrack_helper_pernet_fini(net); nf_conntrack_helper_pernet_fini(net);
@ -1388,6 +1406,7 @@ void nf_conntrack_cleanup_net(struct net *net)
kmem_cache_destroy(net->ct.nf_conntrack_cachep); kmem_cache_destroy(net->ct.nf_conntrack_cachep);
kfree(net->ct.slabname); kfree(net->ct.slabname);
free_percpu(net->ct.stat); free_percpu(net->ct.stat);
}
} }
void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls) void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)

Просмотреть файл

@ -2409,6 +2409,92 @@ out:
return skb->len; return skb->len;
} }
static int
ctnetlink_exp_ct_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
{
struct nf_conntrack_expect *exp, *last;
struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
struct nf_conn *ct = cb->data;
struct nf_conn_help *help = nfct_help(ct);
u_int8_t l3proto = nfmsg->nfgen_family;
if (cb->args[0])
return 0;
rcu_read_lock();
last = (struct nf_conntrack_expect *)cb->args[1];
restart:
hlist_for_each_entry(exp, &help->expectations, lnode) {
if (l3proto && exp->tuple.src.l3num != l3proto)
continue;
if (cb->args[1]) {
if (exp != last)
continue;
cb->args[1] = 0;
}
if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
IPCTNL_MSG_EXP_NEW,
exp) < 0) {
if (!atomic_inc_not_zero(&exp->use))
continue;
cb->args[1] = (unsigned long)exp;
goto out;
}
}
if (cb->args[1]) {
cb->args[1] = 0;
goto restart;
}
cb->args[0] = 1;
out:
rcu_read_unlock();
if (last)
nf_ct_expect_put(last);
return skb->len;
}
static int ctnetlink_dump_exp_ct(struct sock *ctnl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const cda[])
{
int err;
struct net *net = sock_net(ctnl);
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u_int8_t u3 = nfmsg->nfgen_family;
struct nf_conntrack_tuple tuple;
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
u16 zone = 0;
struct netlink_dump_control c = {
.dump = ctnetlink_exp_ct_dump_table,
.done = ctnetlink_exp_done,
};
err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3);
if (err < 0)
return err;
if (cda[CTA_EXPECT_ZONE]) {
err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone);
if (err < 0)
return err;
}
h = nf_conntrack_find_get(net, zone, &tuple);
if (!h)
return -ENOENT;
ct = nf_ct_tuplehash_to_ctrack(h);
c.data = ct;
err = netlink_dump_start(ctnl, skb, nlh, &c);
nf_ct_put(ct);
return err;
}
static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = { static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
[CTA_EXPECT_MASTER] = { .type = NLA_NESTED }, [CTA_EXPECT_MASTER] = { .type = NLA_NESTED },
[CTA_EXPECT_TUPLE] = { .type = NLA_NESTED }, [CTA_EXPECT_TUPLE] = { .type = NLA_NESTED },
@ -2439,12 +2525,16 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
int err; int err;
if (nlh->nlmsg_flags & NLM_F_DUMP) { if (nlh->nlmsg_flags & NLM_F_DUMP) {
if (cda[CTA_EXPECT_MASTER])
return ctnetlink_dump_exp_ct(ctnl, skb, nlh, cda);
else {
struct netlink_dump_control c = { struct netlink_dump_control c = {
.dump = ctnetlink_exp_dump_table, .dump = ctnetlink_exp_dump_table,
.done = ctnetlink_exp_done, .done = ctnetlink_exp_done,
}; };
return netlink_dump_start(ctnl, skb, nlh, &c); return netlink_dump_start(ctnl, skb, nlh, &c);
} }
}
err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone); err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone);
if (err < 0) if (err < 0)

Просмотреть файл

@ -545,16 +545,20 @@ out_init:
return ret; return ret;
} }
static void nf_conntrack_pernet_exit(struct net *net) static void nf_conntrack_pernet_exit(struct list_head *net_exit_list)
{ {
struct net *net;
list_for_each_entry(net, net_exit_list, exit_list) {
nf_conntrack_standalone_fini_sysctl(net); nf_conntrack_standalone_fini_sysctl(net);
nf_conntrack_standalone_fini_proc(net); nf_conntrack_standalone_fini_proc(net);
nf_conntrack_cleanup_net(net); }
nf_conntrack_cleanup_net_list(net_exit_list);
} }
static struct pernet_operations nf_conntrack_net_ops = { static struct pernet_operations nf_conntrack_net_ops = {
.init = nf_conntrack_pernet_init, .init = nf_conntrack_pernet_init,
.exit = nf_conntrack_pernet_exit, .exit_batch = nf_conntrack_pernet_exit,
}; };
static int __init nf_conntrack_standalone_init(void) static int __init nf_conntrack_standalone_init(void)

Просмотреть файл

@ -73,7 +73,7 @@ static struct hlist_head instance_table[INSTANCE_BUCKETS] __read_mostly;
static inline u_int8_t instance_hashfn(u_int16_t queue_num) static inline u_int8_t instance_hashfn(u_int16_t queue_num)
{ {
return ((queue_num >> 8) | queue_num) % INSTANCE_BUCKETS; return ((queue_num >> 8) ^ queue_num) % INSTANCE_BUCKETS;
} }
static struct nfqnl_instance * static struct nfqnl_instance *
@ -217,14 +217,59 @@ nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data)
spin_unlock_bh(&queue->lock); spin_unlock_bh(&queue->lock);
} }
static void
nfqnl_zcopy(struct sk_buff *to, const struct sk_buff *from, int len, int hlen)
{
int i, j = 0;
int plen = 0; /* length of skb->head fragment */
struct page *page;
unsigned int offset;
/* dont bother with small payloads */
if (len <= skb_tailroom(to)) {
skb_copy_bits(from, 0, skb_put(to, len), len);
return;
}
if (hlen) {
skb_copy_bits(from, 0, skb_put(to, hlen), hlen);
len -= hlen;
} else {
plen = min_t(int, skb_headlen(from), len);
if (plen) {
page = virt_to_head_page(from->head);
offset = from->data - (unsigned char *)page_address(page);
__skb_fill_page_desc(to, 0, page, offset, plen);
get_page(page);
j = 1;
len -= plen;
}
}
to->truesize += len + plen;
to->len += len + plen;
to->data_len += len + plen;
for (i = 0; i < skb_shinfo(from)->nr_frags; i++) {
if (!len)
break;
skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i];
skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len);
len -= skb_shinfo(to)->frags[j].size;
skb_frag_ref(to, j);
j++;
}
skb_shinfo(to)->nr_frags = j;
}
static struct sk_buff * static struct sk_buff *
nfqnl_build_packet_message(struct nfqnl_instance *queue, nfqnl_build_packet_message(struct nfqnl_instance *queue,
struct nf_queue_entry *entry, struct nf_queue_entry *entry,
__be32 **packet_id_ptr) __be32 **packet_id_ptr)
{ {
sk_buff_data_t old_tail;
size_t size; size_t size;
size_t data_len = 0, cap_len = 0; size_t data_len = 0, cap_len = 0;
int hlen = 0;
struct sk_buff *skb; struct sk_buff *skb;
struct nlattr *nla; struct nlattr *nla;
struct nfqnl_msg_packet_hdr *pmsg; struct nfqnl_msg_packet_hdr *pmsg;
@ -246,8 +291,10 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
#endif #endif
+ nla_total_size(sizeof(u_int32_t)) /* mark */ + nla_total_size(sizeof(u_int32_t)) /* mark */
+ nla_total_size(sizeof(struct nfqnl_msg_packet_hw)) + nla_total_size(sizeof(struct nfqnl_msg_packet_hw))
+ nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp) + nla_total_size(sizeof(u_int32_t)); /* cap_len */
+ nla_total_size(sizeof(u_int32_t))); /* cap_len */
if (entskb->tstamp.tv64)
size += nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp));
outdev = entry->outdev; outdev = entry->outdev;
@ -265,7 +312,16 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
if (data_len == 0 || data_len > entskb->len) if (data_len == 0 || data_len > entskb->len)
data_len = entskb->len; data_len = entskb->len;
size += nla_total_size(data_len);
if (!entskb->head_frag ||
skb_headlen(entskb) < L1_CACHE_BYTES ||
skb_shinfo(entskb)->nr_frags >= MAX_SKB_FRAGS)
hlen = skb_headlen(entskb);
if (skb_has_frag_list(entskb))
hlen = entskb->len;
hlen = min_t(int, data_len, hlen);
size += sizeof(struct nlattr) + hlen;
cap_len = entskb->len; cap_len = entskb->len;
break; break;
} }
@ -277,7 +333,6 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
if (!skb) if (!skb)
return NULL; return NULL;
old_tail = skb->tail;
nlh = nlmsg_put(skb, 0, 0, nlh = nlmsg_put(skb, 0, 0,
NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET, NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET,
sizeof(struct nfgenmsg), 0); sizeof(struct nfgenmsg), 0);
@ -382,31 +437,26 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
goto nla_put_failure; goto nla_put_failure;
} }
if (data_len) {
struct nlattr *nla;
int sz = nla_attr_size(data_len);
if (skb_tailroom(skb) < nla_total_size(data_len)) {
printk(KERN_WARNING "nf_queue: no tailroom!\n");
kfree_skb(skb);
return NULL;
}
nla = (struct nlattr *)skb_put(skb, nla_total_size(data_len));
nla->nla_type = NFQA_PAYLOAD;
nla->nla_len = sz;
if (skb_copy_bits(entskb, 0, nla_data(nla), data_len))
BUG();
}
if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0) if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0)
goto nla_put_failure; goto nla_put_failure;
if (cap_len > 0 && nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len))) if (cap_len > 0 && nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len)))
goto nla_put_failure; goto nla_put_failure;
nlh->nlmsg_len = skb->tail - old_tail; if (data_len) {
struct nlattr *nla;
if (skb_tailroom(skb) < sizeof(*nla) + hlen)
goto nla_put_failure;
nla = (struct nlattr *)skb_put(skb, sizeof(*nla));
nla->nla_type = NFQA_PAYLOAD;
nla->nla_len = nla_attr_size(data_len);
nfqnl_zcopy(skb, entskb, data_len, hlen);
}
nlh->nlmsg_len = skb->len;
return skb; return skb;
nla_put_failure: nla_put_failure: