Merge branch 'inet-frags-next'

Nikolay Aleksandrov says:

====================
inet: frags: cleanup and kmem_cache use

This patchset does a couple of small cleanups in patches 1-5 and then in
patch 06 it introduces the use of kmem_cache for allocation/freeing of
inet_frag_queue+header objects.

v2: Broke up patch 02 into 3 patches as David suggested

Here are the results of a couple of netperf runs:
netperf options: -l 30 -I95,5 -i 15,10 -m 64k

- 10 gig before the patchset
MIGRATED UDP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.155.1 () port 0 AF_INET : +/-2.500% @ 95% conf.
Socket  Message  Elapsed      Messages
Size    Size     Time         Okay Errors   Throughput
bytes   bytes    secs            #      #   10^6bits/sec

212992   64000   30.00      442466      0    7551.39
212992           30.00      439130           7494.45

- 10 gig after the patchset
MIGRATED UDP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.155.1 () port 0 AF_INET : +/-2.500% @ 95% conf.
Socket  Message  Elapsed      Messages
Size    Size     Time         Okay Errors   Throughput
bytes   bytes    secs            #      #   10^6bits/sec

212992   64000   30.00      458846      0    7830.94
212992           30.00      457575           7809.25

- Virtio before the patchset
MIGRATED UDP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.144.1 () port 0 AF_INET : +/-2.500% @ 95% conf.
Socket  Message  Elapsed      Messages
Size    Size     Time         Okay Errors   Throughput
bytes   bytes    secs            #      #   10^6bits/sec

212992   64000   30.00      735000      0    12543.96
212992           30.00      560322           9562.79

- Virtio after the patchset
MIGRATED UDP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.144.1 () port 0 AF_INET : +/-2.500% @ 95% conf.
Socket  Message  Elapsed      Messages
Size    Size     Time         Okay Errors   Throughput
bytes   bytes    secs            #      #   10^6bits/sec

212992   64000   30.00      731729      0    12488.14
212992           30.00      647241           11046.21
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2014-08-02 15:31:40 -07:00
Родитель a5536e1094 d4ad4d22e7
Коммит 7e32aa4d8c
6 изменённых файлов: 134 добавлений и 80 удалений

Просмотреть файл

@ -15,25 +15,49 @@ struct netns_frags {
int low_thresh; int low_thresh;
}; };
/**
* fragment queue flags
*
* @INET_FRAG_FIRST_IN: first fragment has arrived
* @INET_FRAG_LAST_IN: final fragment has arrived
* @INET_FRAG_COMPLETE: frag queue has been processed and is due for destruction
* @INET_FRAG_EVICTED: frag queue is being evicted
*/
enum {
INET_FRAG_FIRST_IN = BIT(0),
INET_FRAG_LAST_IN = BIT(1),
INET_FRAG_COMPLETE = BIT(2),
INET_FRAG_EVICTED = BIT(3)
};
/**
* struct inet_frag_queue - fragment queue
*
* @lock: spinlock protecting the queue
* @timer: queue expiration timer
* @list: hash bucket list
* @refcnt: reference count of the queue
* @fragments: received fragments head
* @fragments_tail: received fragments tail
* @stamp: timestamp of the last received fragment
* @len: total length of the original datagram
* @meat: length of received fragments so far
* @flags: fragment queue flags
* @max_size: (ipv4 only) maximum received fragment size with IP_DF set
* @net: namespace that this frag belongs to
*/
struct inet_frag_queue { struct inet_frag_queue {
spinlock_t lock; spinlock_t lock;
struct timer_list timer; /* when will this queue expire? */ struct timer_list timer;
struct hlist_node list; struct hlist_node list;
atomic_t refcnt; atomic_t refcnt;
struct sk_buff *fragments; /* list of received fragments */ struct sk_buff *fragments;
struct sk_buff *fragments_tail; struct sk_buff *fragments_tail;
ktime_t stamp; ktime_t stamp;
int len; /* total length of orig datagram */ int len;
int meat; int meat;
__u8 last_in; /* first/last segment arrived? */ __u8 flags;
#define INET_FRAG_EVICTED 8
#define INET_FRAG_COMPLETE 4
#define INET_FRAG_FIRST_IN 2
#define INET_FRAG_LAST_IN 1
u16 max_size; u16 max_size;
struct netns_frags *net; struct netns_frags *net;
}; };
@ -77,9 +101,11 @@ struct inet_frags {
void (*destructor)(struct inet_frag_queue *); void (*destructor)(struct inet_frag_queue *);
void (*skb_free)(struct sk_buff *); void (*skb_free)(struct sk_buff *);
void (*frag_expire)(unsigned long data); void (*frag_expire)(unsigned long data);
struct kmem_cache *frags_cachep;
const char *frags_cache_name;
}; };
void inet_frags_init(struct inet_frags *); int inet_frags_init(struct inet_frags *);
void inet_frags_fini(struct inet_frags *); void inet_frags_fini(struct inet_frags *);
void inet_frags_init_net(struct netns_frags *nf); void inet_frags_init_net(struct netns_frags *nf);

Просмотреть файл

@ -30,6 +30,8 @@
#include "reassembly.h" #include "reassembly.h"
static const char lowpan_frags_cache_name[] = "lowpan-frags";
struct lowpan_frag_info { struct lowpan_frag_info {
__be16 d_tag; __be16 d_tag;
u16 d_size; u16 d_size;
@ -99,7 +101,7 @@ static void lowpan_frag_expire(unsigned long data)
spin_lock(&fq->q.lock); spin_lock(&fq->q.lock);
if (fq->q.last_in & INET_FRAG_COMPLETE) if (fq->q.flags & INET_FRAG_COMPLETE)
goto out; goto out;
inet_frag_kill(&fq->q, &lowpan_frags); inet_frag_kill(&fq->q, &lowpan_frags);
@ -142,7 +144,7 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq,
struct net_device *dev; struct net_device *dev;
int end, offset; int end, offset;
if (fq->q.last_in & INET_FRAG_COMPLETE) if (fq->q.flags & INET_FRAG_COMPLETE)
goto err; goto err;
offset = lowpan_cb(skb)->d_offset << 3; offset = lowpan_cb(skb)->d_offset << 3;
@ -154,14 +156,14 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq,
* or have different end, the segment is corrupted. * or have different end, the segment is corrupted.
*/ */
if (end < fq->q.len || if (end < fq->q.len ||
((fq->q.last_in & INET_FRAG_LAST_IN) && end != fq->q.len)) ((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len))
goto err; goto err;
fq->q.last_in |= INET_FRAG_LAST_IN; fq->q.flags |= INET_FRAG_LAST_IN;
fq->q.len = end; fq->q.len = end;
} else { } else {
if (end > fq->q.len) { if (end > fq->q.len) {
/* Some bits beyond end -> corruption. */ /* Some bits beyond end -> corruption. */
if (fq->q.last_in & INET_FRAG_LAST_IN) if (fq->q.flags & INET_FRAG_LAST_IN)
goto err; goto err;
fq->q.len = end; fq->q.len = end;
} }
@ -201,13 +203,13 @@ found:
if (frag_type == LOWPAN_DISPATCH_FRAG1) { if (frag_type == LOWPAN_DISPATCH_FRAG1) {
/* Calculate uncomp. 6lowpan header to estimate full size */ /* Calculate uncomp. 6lowpan header to estimate full size */
fq->q.meat += lowpan_uncompress_size(skb, NULL); fq->q.meat += lowpan_uncompress_size(skb, NULL);
fq->q.last_in |= INET_FRAG_FIRST_IN; fq->q.flags |= INET_FRAG_FIRST_IN;
} else { } else {
fq->q.meat += skb->len; fq->q.meat += skb->len;
} }
add_frag_mem_limit(&fq->q, skb->truesize); add_frag_mem_limit(&fq->q, skb->truesize);
if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
fq->q.meat == fq->q.len) { fq->q.meat == fq->q.len) {
int res; int res;
unsigned long orefdst = skb->_skb_refdst; unsigned long orefdst = skb->_skb_refdst;
@ -571,7 +573,10 @@ int __init lowpan_net_frag_init(void)
lowpan_frags.qsize = sizeof(struct frag_queue); lowpan_frags.qsize = sizeof(struct frag_queue);
lowpan_frags.match = lowpan_frag_match; lowpan_frags.match = lowpan_frag_match;
lowpan_frags.frag_expire = lowpan_frag_expire; lowpan_frags.frag_expire = lowpan_frag_expire;
inet_frags_init(&lowpan_frags); lowpan_frags.frags_cache_name = lowpan_frags_cache_name;
ret = inet_frags_init(&lowpan_frags);
if (ret)
goto err_pernet;
return ret; return ret;
err_pernet: err_pernet:

Просмотреть файл

@ -151,9 +151,7 @@ evict_again:
goto evict_again; goto evict_again;
} }
/* suppress xmit of (icmp) error packet */ fq->flags |= INET_FRAG_EVICTED;
fq->last_in &= ~INET_FRAG_FIRST_IN;
fq->last_in |= INET_FRAG_EVICTED;
hlist_del(&fq->list); hlist_del(&fq->list);
hlist_add_head(&fq->list, &expired); hlist_add_head(&fq->list, &expired);
++evicted; ++evicted;
@ -200,7 +198,7 @@ static void inet_frag_schedule_worker(struct inet_frags *f)
schedule_work(&f->frags_work); schedule_work(&f->frags_work);
} }
void inet_frags_init(struct inet_frags *f) int inet_frags_init(struct inet_frags *f)
{ {
int i; int i;
@ -215,6 +213,12 @@ void inet_frags_init(struct inet_frags *f)
seqlock_init(&f->rnd_seqlock); seqlock_init(&f->rnd_seqlock);
f->last_rebuild_jiffies = 0; f->last_rebuild_jiffies = 0;
f->frags_cachep = kmem_cache_create(f->frags_cache_name, f->qsize, 0, 0,
NULL);
if (!f->frags_cachep)
return -ENOMEM;
return 0;
} }
EXPORT_SYMBOL(inet_frags_init); EXPORT_SYMBOL(inet_frags_init);
@ -227,6 +231,7 @@ EXPORT_SYMBOL(inet_frags_init_net);
void inet_frags_fini(struct inet_frags *f) void inet_frags_fini(struct inet_frags *f)
{ {
cancel_work_sync(&f->frags_work); cancel_work_sync(&f->frags_work);
kmem_cache_destroy(f->frags_cachep);
} }
EXPORT_SYMBOL(inet_frags_fini); EXPORT_SYMBOL(inet_frags_fini);
@ -289,16 +294,16 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
if (del_timer(&fq->timer)) if (del_timer(&fq->timer))
atomic_dec(&fq->refcnt); atomic_dec(&fq->refcnt);
if (!(fq->last_in & INET_FRAG_COMPLETE)) { if (!(fq->flags & INET_FRAG_COMPLETE)) {
fq_unlink(fq, f); fq_unlink(fq, f);
atomic_dec(&fq->refcnt); atomic_dec(&fq->refcnt);
fq->last_in |= INET_FRAG_COMPLETE; fq->flags |= INET_FRAG_COMPLETE;
} }
} }
EXPORT_SYMBOL(inet_frag_kill); EXPORT_SYMBOL(inet_frag_kill);
static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f, static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f,
struct sk_buff *skb) struct sk_buff *skb)
{ {
if (f->skb_free) if (f->skb_free)
f->skb_free(skb); f->skb_free(skb);
@ -311,7 +316,7 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f)
struct netns_frags *nf; struct netns_frags *nf;
unsigned int sum, sum_truesize = 0; unsigned int sum, sum_truesize = 0;
WARN_ON(!(q->last_in & INET_FRAG_COMPLETE)); WARN_ON(!(q->flags & INET_FRAG_COMPLETE));
WARN_ON(del_timer(&q->timer) != 0); WARN_ON(del_timer(&q->timer) != 0);
/* Release all fragment data. */ /* Release all fragment data. */
@ -329,13 +334,14 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f)
if (f->destructor) if (f->destructor)
f->destructor(q); f->destructor(q);
kfree(q); kmem_cache_free(f->frags_cachep, q);
} }
EXPORT_SYMBOL(inet_frag_destroy); EXPORT_SYMBOL(inet_frag_destroy);
static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
struct inet_frag_queue *qp_in, struct inet_frags *f, struct inet_frag_queue *qp_in,
void *arg) struct inet_frags *f,
void *arg)
{ {
struct inet_frag_bucket *hb = get_frag_bucket_locked(qp_in, f); struct inet_frag_bucket *hb = get_frag_bucket_locked(qp_in, f);
struct inet_frag_queue *qp; struct inet_frag_queue *qp;
@ -349,7 +355,7 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
if (qp->net == nf && f->match(qp, arg)) { if (qp->net == nf && f->match(qp, arg)) {
atomic_inc(&qp->refcnt); atomic_inc(&qp->refcnt);
spin_unlock(&hb->chain_lock); spin_unlock(&hb->chain_lock);
qp_in->last_in |= INET_FRAG_COMPLETE; qp_in->flags |= INET_FRAG_COMPLETE;
inet_frag_put(qp_in, f); inet_frag_put(qp_in, f);
return qp; return qp;
} }
@ -368,7 +374,8 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
} }
static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
struct inet_frags *f, void *arg) struct inet_frags *f,
void *arg)
{ {
struct inet_frag_queue *q; struct inet_frag_queue *q;
@ -377,7 +384,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
return NULL; return NULL;
} }
q = kzalloc(f->qsize, GFP_ATOMIC); q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC);
if (q == NULL) if (q == NULL)
return NULL; return NULL;
@ -393,7 +400,8 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
} }
static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
struct inet_frags *f, void *arg) struct inet_frags *f,
void *arg)
{ {
struct inet_frag_queue *q; struct inet_frag_queue *q;
@ -405,7 +413,8 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
} }
struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
struct inet_frags *f, void *key, unsigned int hash) struct inet_frags *f, void *key,
unsigned int hash)
{ {
struct inet_frag_bucket *hb; struct inet_frag_bucket *hb;
struct inet_frag_queue *q; struct inet_frag_queue *q;

Просмотреть файл

@ -55,6 +55,7 @@
*/ */
static int sysctl_ipfrag_max_dist __read_mostly = 64; static int sysctl_ipfrag_max_dist __read_mostly = 64;
static const char ip_frag_cache_name[] = "ip4-frags";
struct ipfrag_skb_cb struct ipfrag_skb_cb
{ {
@ -185,20 +186,22 @@ static void ip_expire(unsigned long arg)
spin_lock(&qp->q.lock); spin_lock(&qp->q.lock);
if (qp->q.last_in & INET_FRAG_COMPLETE) if (qp->q.flags & INET_FRAG_COMPLETE)
goto out; goto out;
ipq_kill(qp); ipq_kill(qp);
if (!(qp->q.last_in & INET_FRAG_EVICTED))
IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT);
IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) { if (!(qp->q.flags & INET_FRAG_EVICTED)) {
struct sk_buff *head = qp->q.fragments; struct sk_buff *head = qp->q.fragments;
const struct iphdr *iph; const struct iphdr *iph;
int err; int err;
IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT);
if (!(qp->q.flags & INET_FRAG_FIRST_IN) || !qp->q.fragments)
goto out;
rcu_read_lock(); rcu_read_lock();
head->dev = dev_get_by_index_rcu(net, qp->iif); head->dev = dev_get_by_index_rcu(net, qp->iif);
if (!head->dev) if (!head->dev)
@ -211,8 +214,7 @@ static void ip_expire(unsigned long arg)
if (err) if (err)
goto out_rcu_unlock; goto out_rcu_unlock;
/* /* Only an end host needs to send an ICMP
* Only an end host needs to send an ICMP
* "Fragment Reassembly Timeout" message, per RFC792. * "Fragment Reassembly Timeout" message, per RFC792.
*/ */
if (qp->user == IP_DEFRAG_AF_PACKET || if (qp->user == IP_DEFRAG_AF_PACKET ||
@ -221,7 +223,6 @@ static void ip_expire(unsigned long arg)
(skb_rtable(head)->rt_type != RTN_LOCAL))) (skb_rtable(head)->rt_type != RTN_LOCAL)))
goto out_rcu_unlock; goto out_rcu_unlock;
/* Send an ICMP "Fragment Reassembly Timeout" message. */ /* Send an ICMP "Fragment Reassembly Timeout" message. */
icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
out_rcu_unlock: out_rcu_unlock:
@ -302,7 +303,7 @@ static int ip_frag_reinit(struct ipq *qp)
} while (fp); } while (fp);
sub_frag_mem_limit(&qp->q, sum_truesize); sub_frag_mem_limit(&qp->q, sum_truesize);
qp->q.last_in = 0; qp->q.flags = 0;
qp->q.len = 0; qp->q.len = 0;
qp->q.meat = 0; qp->q.meat = 0;
qp->q.fragments = NULL; qp->q.fragments = NULL;
@ -323,7 +324,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
int err = -ENOENT; int err = -ENOENT;
u8 ecn; u8 ecn;
if (qp->q.last_in & INET_FRAG_COMPLETE) if (qp->q.flags & INET_FRAG_COMPLETE)
goto err; goto err;
if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) && if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) &&
@ -350,9 +351,9 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
* or have different end, the segment is corrupted. * or have different end, the segment is corrupted.
*/ */
if (end < qp->q.len || if (end < qp->q.len ||
((qp->q.last_in & INET_FRAG_LAST_IN) && end != qp->q.len)) ((qp->q.flags & INET_FRAG_LAST_IN) && end != qp->q.len))
goto err; goto err;
qp->q.last_in |= INET_FRAG_LAST_IN; qp->q.flags |= INET_FRAG_LAST_IN;
qp->q.len = end; qp->q.len = end;
} else { } else {
if (end&7) { if (end&7) {
@ -362,7 +363,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
} }
if (end > qp->q.len) { if (end > qp->q.len) {
/* Some bits beyond end -> corruption. */ /* Some bits beyond end -> corruption. */
if (qp->q.last_in & INET_FRAG_LAST_IN) if (qp->q.flags & INET_FRAG_LAST_IN)
goto err; goto err;
qp->q.len = end; qp->q.len = end;
} }
@ -471,13 +472,13 @@ found:
qp->ecn |= ecn; qp->ecn |= ecn;
add_frag_mem_limit(&qp->q, skb->truesize); add_frag_mem_limit(&qp->q, skb->truesize);
if (offset == 0) if (offset == 0)
qp->q.last_in |= INET_FRAG_FIRST_IN; qp->q.flags |= INET_FRAG_FIRST_IN;
if (ip_hdr(skb)->frag_off & htons(IP_DF) && if (ip_hdr(skb)->frag_off & htons(IP_DF) &&
skb->len + ihl > qp->q.max_size) skb->len + ihl > qp->q.max_size)
qp->q.max_size = skb->len + ihl; qp->q.max_size = skb->len + ihl;
if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && if (qp->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
qp->q.meat == qp->q.len) { qp->q.meat == qp->q.len) {
unsigned long orefdst = skb->_skb_refdst; unsigned long orefdst = skb->_skb_refdst;
@ -860,5 +861,7 @@ void __init ipfrag_init(void)
ip4_frags.qsize = sizeof(struct ipq); ip4_frags.qsize = sizeof(struct ipq);
ip4_frags.match = ip4_frag_match; ip4_frags.match = ip4_frag_match;
ip4_frags.frag_expire = ip_expire; ip4_frags.frag_expire = ip_expire;
inet_frags_init(&ip4_frags); ip4_frags.frags_cache_name = ip_frag_cache_name;
if (inet_frags_init(&ip4_frags))
panic("IP: failed to allocate ip4_frags cache\n");
} }

Просмотреть файл

@ -50,6 +50,7 @@
#include <linux/module.h> #include <linux/module.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h> #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
static const char nf_frags_cache_name[] = "nf-frags";
struct nf_ct_frag6_skb_cb struct nf_ct_frag6_skb_cb
{ {
@ -222,7 +223,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
int offset, end; int offset, end;
u8 ecn; u8 ecn;
if (fq->q.last_in & INET_FRAG_COMPLETE) { if (fq->q.flags & INET_FRAG_COMPLETE) {
pr_debug("Already completed\n"); pr_debug("Already completed\n");
goto err; goto err;
} }
@ -253,11 +254,11 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
* or have different end, the segment is corrupted. * or have different end, the segment is corrupted.
*/ */
if (end < fq->q.len || if (end < fq->q.len ||
((fq->q.last_in & INET_FRAG_LAST_IN) && end != fq->q.len)) { ((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len)) {
pr_debug("already received last fragment\n"); pr_debug("already received last fragment\n");
goto err; goto err;
} }
fq->q.last_in |= INET_FRAG_LAST_IN; fq->q.flags |= INET_FRAG_LAST_IN;
fq->q.len = end; fq->q.len = end;
} else { } else {
/* Check if the fragment is rounded to 8 bytes. /* Check if the fragment is rounded to 8 bytes.
@ -272,7 +273,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
} }
if (end > fq->q.len) { if (end > fq->q.len) {
/* Some bits beyond end -> corruption. */ /* Some bits beyond end -> corruption. */
if (fq->q.last_in & INET_FRAG_LAST_IN) { if (fq->q.flags & INET_FRAG_LAST_IN) {
pr_debug("last packet already reached.\n"); pr_debug("last packet already reached.\n");
goto err; goto err;
} }
@ -354,7 +355,7 @@ found:
*/ */
if (offset == 0) { if (offset == 0) {
fq->nhoffset = nhoff; fq->nhoffset = nhoff;
fq->q.last_in |= INET_FRAG_FIRST_IN; fq->q.flags |= INET_FRAG_FIRST_IN;
} }
return 0; return 0;
@ -617,7 +618,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
goto ret_orig; goto ret_orig;
} }
if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
fq->q.meat == fq->q.len) { fq->q.meat == fq->q.len) {
ret_skb = nf_ct_frag6_reasm(fq, dev); ret_skb = nf_ct_frag6_reasm(fq, dev);
if (ret_skb == NULL) if (ret_skb == NULL)
@ -677,12 +678,15 @@ int nf_ct_frag6_init(void)
nf_frags.qsize = sizeof(struct frag_queue); nf_frags.qsize = sizeof(struct frag_queue);
nf_frags.match = ip6_frag_match; nf_frags.match = ip6_frag_match;
nf_frags.frag_expire = nf_ct_frag6_expire; nf_frags.frag_expire = nf_ct_frag6_expire;
inet_frags_init(&nf_frags); nf_frags.frags_cache_name = nf_frags_cache_name;
ret = inet_frags_init(&nf_frags);
if (ret)
goto out;
ret = register_pernet_subsys(&nf_ct_net_ops); ret = register_pernet_subsys(&nf_ct_net_ops);
if (ret) if (ret)
inet_frags_fini(&nf_frags); inet_frags_fini(&nf_frags);
out:
return ret; return ret;
} }

Просмотреть файл

@ -60,6 +60,8 @@
#include <net/inet_frag.h> #include <net/inet_frag.h>
#include <net/inet_ecn.h> #include <net/inet_ecn.h>
static const char ip6_frag_cache_name[] = "ip6-frags";
struct ip6frag_skb_cb struct ip6frag_skb_cb
{ {
struct inet6_skb_parm h; struct inet6_skb_parm h;
@ -131,7 +133,7 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
spin_lock(&fq->q.lock); spin_lock(&fq->q.lock);
if (fq->q.last_in & INET_FRAG_COMPLETE) if (fq->q.flags & INET_FRAG_COMPLETE)
goto out; goto out;
inet_frag_kill(&fq->q, frags); inet_frag_kill(&fq->q, frags);
@ -141,19 +143,20 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
if (!dev) if (!dev)
goto out_rcu_unlock; goto out_rcu_unlock;
if (!(fq->q.last_in & INET_FRAG_EVICTED))
IP6_INC_STATS_BH(net, __in6_dev_get(dev),
IPSTATS_MIB_REASMTIMEOUT);
IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
/* Don't send error if the first segment did not arrive. */ if (fq->q.flags & INET_FRAG_EVICTED)
if (!(fq->q.last_in & INET_FRAG_FIRST_IN) || !fq->q.fragments)
goto out_rcu_unlock; goto out_rcu_unlock;
/* IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
But use as source device on which LAST ARRIVED
segment was received. And do not use fq->dev /* Don't send error if the first segment did not arrive. */
pointer directly, device might already disappeared. if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !fq->q.fragments)
goto out_rcu_unlock;
/* But use as source device on which LAST ARRIVED
* segment was received. And do not use fq->dev
* pointer directly, device might already disappeared.
*/ */
fq->q.fragments->dev = dev; fq->q.fragments->dev = dev;
icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0); icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
@ -209,7 +212,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
struct net *net = dev_net(skb_dst(skb)->dev); struct net *net = dev_net(skb_dst(skb)->dev);
u8 ecn; u8 ecn;
if (fq->q.last_in & INET_FRAG_COMPLETE) if (fq->q.flags & INET_FRAG_COMPLETE)
goto err; goto err;
offset = ntohs(fhdr->frag_off) & ~0x7; offset = ntohs(fhdr->frag_off) & ~0x7;
@ -240,9 +243,9 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
* or have different end, the segment is corrupted. * or have different end, the segment is corrupted.
*/ */
if (end < fq->q.len || if (end < fq->q.len ||
((fq->q.last_in & INET_FRAG_LAST_IN) && end != fq->q.len)) ((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len))
goto err; goto err;
fq->q.last_in |= INET_FRAG_LAST_IN; fq->q.flags |= INET_FRAG_LAST_IN;
fq->q.len = end; fq->q.len = end;
} else { } else {
/* Check if the fragment is rounded to 8 bytes. /* Check if the fragment is rounded to 8 bytes.
@ -260,7 +263,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
} }
if (end > fq->q.len) { if (end > fq->q.len) {
/* Some bits beyond end -> corruption. */ /* Some bits beyond end -> corruption. */
if (fq->q.last_in & INET_FRAG_LAST_IN) if (fq->q.flags & INET_FRAG_LAST_IN)
goto err; goto err;
fq->q.len = end; fq->q.len = end;
} }
@ -335,10 +338,10 @@ found:
*/ */
if (offset == 0) { if (offset == 0) {
fq->nhoffset = nhoff; fq->nhoffset = nhoff;
fq->q.last_in |= INET_FRAG_FIRST_IN; fq->q.flags |= INET_FRAG_FIRST_IN;
} }
if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
fq->q.meat == fq->q.len) { fq->q.meat == fq->q.len) {
int res; int res;
unsigned long orefdst = skb->_skb_refdst; unsigned long orefdst = skb->_skb_refdst;
@ -355,8 +358,8 @@ found:
discard_fq: discard_fq:
inet_frag_kill(&fq->q, &ip6_frags); inet_frag_kill(&fq->q, &ip6_frags);
err: err:
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_REASMFAILS); IPSTATS_MIB_REASMFAILS);
kfree_skb(skb); kfree_skb(skb);
return -1; return -1;
} }
@ -566,7 +569,8 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
return -1; return -1;
fail_hdr: fail_hdr:
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INHDRERRORS); IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_INHDRERRORS);
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb_network_header_len(skb)); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb_network_header_len(skb));
return -1; return -1;
} }
@ -746,7 +750,10 @@ int __init ipv6_frag_init(void)
ip6_frags.qsize = sizeof(struct frag_queue); ip6_frags.qsize = sizeof(struct frag_queue);
ip6_frags.match = ip6_frag_match; ip6_frags.match = ip6_frag_match;
ip6_frags.frag_expire = ip6_frag_expire; ip6_frags.frag_expire = ip6_frag_expire;
inet_frags_init(&ip6_frags); ip6_frags.frags_cache_name = ip6_frag_cache_name;
ret = inet_frags_init(&ip6_frags);
if (ret)
goto err_pernet;
out: out:
return ret; return ret;