Merge branch 'inet-frag-fixes'

Florian Westphal says:

====================
inet: ip defrag bug fixes

Johan Schuijt and Frank Schreuder reported crash and softlockup after the
inet workqueue eviction change:

general protection fault: 0000 [#1] SMP
CPU: 0 PID: 4 Comm: kworker/0:0 Not tainted 3.18.18-transip-1.5 #1
Workqueue: events inet_frag_worker
task: ffff880224935130 ti: ffff880224938000 task.ti: ffff880224938000
RIP: 0010:[<ffffffff8149288c>] [<ffffffff8149288c>] inet_evict_bucket+0xfc/0x160
RSP: 0018:ffff88022493bd58  EFLAGS: 00010286
RAX: ffff88021f4f3e80 RBX: dead000000100100 RCX: 000000000000006b
RDX: 000000000000006c RSI: ffff88021f4f3e80 RDI: dead0000001000a8
RBP: 0000000000000002 R08: ffff880222273900 R09: ffff880036e49200
R10: ffff8800c6e86500 R11: ffff880036f45500 R12: ffffffff81a87100
R13: ffff88022493bd70 R14: 0000000000000000 R15: ffff8800c9b26280
[..]
Call Trace:
 [<ffffffff814929e0>] ? inet_frag_worker+0x60/0x210
 [<ffffffff8107e3a2>] ? process_one_work+0x142/0x3b0
 [<ffffffff8107eb94>] ? worker_thread+0x114/0x440
[..]

A second issue results in softlockup since the evictor may restart the
eviction loop for a (potentially) unlimited number of times while local
softirqs are disabled.

Frank reports that test system remained stable for 14 hours of testing
(before, crash occured within half an hour in their setup).
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2015-07-26 21:00:15 -07:00
Родитель 81296fc673 caaecdd3d3
Коммит 64b892ad23
6 изменённых файлов: 42 добавлений и 47 удалений

Просмотреть файл

@ -21,13 +21,11 @@ struct netns_frags {
* @INET_FRAG_FIRST_IN: first fragment has arrived * @INET_FRAG_FIRST_IN: first fragment has arrived
* @INET_FRAG_LAST_IN: final fragment has arrived * @INET_FRAG_LAST_IN: final fragment has arrived
* @INET_FRAG_COMPLETE: frag queue has been processed and is due for destruction * @INET_FRAG_COMPLETE: frag queue has been processed and is due for destruction
* @INET_FRAG_EVICTED: frag queue is being evicted
*/ */
enum { enum {
INET_FRAG_FIRST_IN = BIT(0), INET_FRAG_FIRST_IN = BIT(0),
INET_FRAG_LAST_IN = BIT(1), INET_FRAG_LAST_IN = BIT(1),
INET_FRAG_COMPLETE = BIT(2), INET_FRAG_COMPLETE = BIT(2),
INET_FRAG_EVICTED = BIT(3)
}; };
/** /**
@ -45,6 +43,7 @@ enum {
* @flags: fragment queue flags * @flags: fragment queue flags
* @max_size: maximum received fragment size * @max_size: maximum received fragment size
* @net: namespace that this frag belongs to * @net: namespace that this frag belongs to
* @list_evictor: list of queues to forcefully evict (e.g. due to low memory)
*/ */
struct inet_frag_queue { struct inet_frag_queue {
spinlock_t lock; spinlock_t lock;
@ -59,6 +58,7 @@ struct inet_frag_queue {
__u8 flags; __u8 flags;
u16 max_size; u16 max_size;
struct netns_frags *net; struct netns_frags *net;
struct hlist_node list_evictor;
}; };
#define INETFRAGS_HASHSZ 1024 #define INETFRAGS_HASHSZ 1024
@ -125,6 +125,11 @@ static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f
inet_frag_destroy(q, f); inet_frag_destroy(q, f);
} }
static inline bool inet_frag_evicting(struct inet_frag_queue *q)
{
return !hlist_unhashed(&q->list_evictor);
}
/* Memory Tracking Functions. */ /* Memory Tracking Functions. */
/* The default percpu_counter batch size is not big enough to scale to /* The default percpu_counter batch size is not big enough to scale to
@ -139,14 +144,14 @@ static inline int frag_mem_limit(struct netns_frags *nf)
return percpu_counter_read(&nf->mem); return percpu_counter_read(&nf->mem);
} }
static inline void sub_frag_mem_limit(struct inet_frag_queue *q, int i) static inline void sub_frag_mem_limit(struct netns_frags *nf, int i)
{ {
__percpu_counter_add(&q->net->mem, -i, frag_percpu_counter_batch); __percpu_counter_add(&nf->mem, -i, frag_percpu_counter_batch);
} }
static inline void add_frag_mem_limit(struct inet_frag_queue *q, int i) static inline void add_frag_mem_limit(struct netns_frags *nf, int i)
{ {
__percpu_counter_add(&q->net->mem, i, frag_percpu_counter_batch); __percpu_counter_add(&nf->mem, i, frag_percpu_counter_batch);
} }
static inline void init_frag_mem_limit(struct netns_frags *nf) static inline void init_frag_mem_limit(struct netns_frags *nf)

Просмотреть файл

@ -207,7 +207,7 @@ found:
} else { } else {
fq->q.meat += skb->len; fq->q.meat += skb->len;
} }
add_frag_mem_limit(&fq->q, skb->truesize); add_frag_mem_limit(fq->q.net, skb->truesize);
if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
fq->q.meat == fq->q.len) { fq->q.meat == fq->q.len) {
@ -287,7 +287,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev,
clone->data_len = clone->len; clone->data_len = clone->len;
head->data_len -= clone->len; head->data_len -= clone->len;
head->len -= clone->len; head->len -= clone->len;
add_frag_mem_limit(&fq->q, clone->truesize); add_frag_mem_limit(fq->q.net, clone->truesize);
} }
WARN_ON(head == NULL); WARN_ON(head == NULL);
@ -310,7 +310,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev,
} }
fp = next; fp = next;
} }
sub_frag_mem_limit(&fq->q, sum_truesize); sub_frag_mem_limit(fq->q.net, sum_truesize);
head->next = NULL; head->next = NULL;
head->dev = dev; head->dev = dev;

Просмотреть файл

@ -131,34 +131,22 @@ inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb)
unsigned int evicted = 0; unsigned int evicted = 0;
HLIST_HEAD(expired); HLIST_HEAD(expired);
evict_again:
spin_lock(&hb->chain_lock); spin_lock(&hb->chain_lock);
hlist_for_each_entry_safe(fq, n, &hb->chain, list) { hlist_for_each_entry_safe(fq, n, &hb->chain, list) {
if (!inet_fragq_should_evict(fq)) if (!inet_fragq_should_evict(fq))
continue; continue;
if (!del_timer(&fq->timer)) { if (!del_timer(&fq->timer))
/* q expiring right now thus increment its refcount so continue;
* it won't be freed under us and wait until the timer
* has finished executing then destroy it
*/
atomic_inc(&fq->refcnt);
spin_unlock(&hb->chain_lock);
del_timer_sync(&fq->timer);
inet_frag_put(fq, f);
goto evict_again;
}
fq->flags |= INET_FRAG_EVICTED; hlist_add_head(&fq->list_evictor, &expired);
hlist_del(&fq->list);
hlist_add_head(&fq->list, &expired);
++evicted; ++evicted;
} }
spin_unlock(&hb->chain_lock); spin_unlock(&hb->chain_lock);
hlist_for_each_entry_safe(fq, n, &expired, list) hlist_for_each_entry_safe(fq, n, &expired, list_evictor)
f->frag_expire((unsigned long) fq); f->frag_expire((unsigned long) fq);
return evicted; return evicted;
@ -240,18 +228,20 @@ void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
int i; int i;
nf->low_thresh = 0; nf->low_thresh = 0;
local_bh_disable();
evict_again: evict_again:
local_bh_disable();
seq = read_seqbegin(&f->rnd_seqlock); seq = read_seqbegin(&f->rnd_seqlock);
for (i = 0; i < INETFRAGS_HASHSZ ; i++) for (i = 0; i < INETFRAGS_HASHSZ ; i++)
inet_evict_bucket(f, &f->hash[i]); inet_evict_bucket(f, &f->hash[i]);
if (read_seqretry(&f->rnd_seqlock, seq))
goto evict_again;
local_bh_enable(); local_bh_enable();
cond_resched();
if (read_seqretry(&f->rnd_seqlock, seq) ||
percpu_counter_sum(&nf->mem))
goto evict_again;
percpu_counter_destroy(&nf->mem); percpu_counter_destroy(&nf->mem);
} }
@ -284,8 +274,8 @@ static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
struct inet_frag_bucket *hb; struct inet_frag_bucket *hb;
hb = get_frag_bucket_locked(fq, f); hb = get_frag_bucket_locked(fq, f);
if (!(fq->flags & INET_FRAG_EVICTED)) hlist_del(&fq->list);
hlist_del(&fq->list); fq->flags |= INET_FRAG_COMPLETE;
spin_unlock(&hb->chain_lock); spin_unlock(&hb->chain_lock);
} }
@ -297,7 +287,6 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
if (!(fq->flags & INET_FRAG_COMPLETE)) { if (!(fq->flags & INET_FRAG_COMPLETE)) {
fq_unlink(fq, f); fq_unlink(fq, f);
atomic_dec(&fq->refcnt); atomic_dec(&fq->refcnt);
fq->flags |= INET_FRAG_COMPLETE;
} }
} }
EXPORT_SYMBOL(inet_frag_kill); EXPORT_SYMBOL(inet_frag_kill);
@ -330,11 +319,12 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f)
fp = xp; fp = xp;
} }
sum = sum_truesize + f->qsize; sum = sum_truesize + f->qsize;
sub_frag_mem_limit(q, sum);
if (f->destructor) if (f->destructor)
f->destructor(q); f->destructor(q);
kmem_cache_free(f->frags_cachep, q); kmem_cache_free(f->frags_cachep, q);
sub_frag_mem_limit(nf, sum);
} }
EXPORT_SYMBOL(inet_frag_destroy); EXPORT_SYMBOL(inet_frag_destroy);
@ -390,7 +380,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
q->net = nf; q->net = nf;
f->constructor(q, arg); f->constructor(q, arg);
add_frag_mem_limit(q, f->qsize); add_frag_mem_limit(nf, f->qsize);
setup_timer(&q->timer, f->frag_expire, (unsigned long)q); setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
spin_lock_init(&q->lock); spin_lock_init(&q->lock);

Просмотреть файл

@ -202,7 +202,7 @@ static void ip_expire(unsigned long arg)
ipq_kill(qp); ipq_kill(qp);
IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS);
if (!(qp->q.flags & INET_FRAG_EVICTED)) { if (!inet_frag_evicting(&qp->q)) {
struct sk_buff *head = qp->q.fragments; struct sk_buff *head = qp->q.fragments;
const struct iphdr *iph; const struct iphdr *iph;
int err; int err;
@ -309,7 +309,7 @@ static int ip_frag_reinit(struct ipq *qp)
kfree_skb(fp); kfree_skb(fp);
fp = xp; fp = xp;
} while (fp); } while (fp);
sub_frag_mem_limit(&qp->q, sum_truesize); sub_frag_mem_limit(qp->q.net, sum_truesize);
qp->q.flags = 0; qp->q.flags = 0;
qp->q.len = 0; qp->q.len = 0;
@ -455,7 +455,7 @@ found:
qp->q.fragments = next; qp->q.fragments = next;
qp->q.meat -= free_it->len; qp->q.meat -= free_it->len;
sub_frag_mem_limit(&qp->q, free_it->truesize); sub_frag_mem_limit(qp->q.net, free_it->truesize);
kfree_skb(free_it); kfree_skb(free_it);
} }
} }
@ -479,7 +479,7 @@ found:
qp->q.stamp = skb->tstamp; qp->q.stamp = skb->tstamp;
qp->q.meat += skb->len; qp->q.meat += skb->len;
qp->ecn |= ecn; qp->ecn |= ecn;
add_frag_mem_limit(&qp->q, skb->truesize); add_frag_mem_limit(qp->q.net, skb->truesize);
if (offset == 0) if (offset == 0)
qp->q.flags |= INET_FRAG_FIRST_IN; qp->q.flags |= INET_FRAG_FIRST_IN;
@ -587,7 +587,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
head->len -= clone->len; head->len -= clone->len;
clone->csum = 0; clone->csum = 0;
clone->ip_summed = head->ip_summed; clone->ip_summed = head->ip_summed;
add_frag_mem_limit(&qp->q, clone->truesize); add_frag_mem_limit(qp->q.net, clone->truesize);
} }
skb_push(head, head->data - skb_network_header(head)); skb_push(head, head->data - skb_network_header(head));
@ -615,7 +615,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
} }
fp = next; fp = next;
} }
sub_frag_mem_limit(&qp->q, sum_truesize); sub_frag_mem_limit(qp->q.net, sum_truesize);
head->next = NULL; head->next = NULL;
head->dev = dev; head->dev = dev;

Просмотреть файл

@ -348,7 +348,7 @@ found:
fq->ecn |= ecn; fq->ecn |= ecn;
if (payload_len > fq->q.max_size) if (payload_len > fq->q.max_size)
fq->q.max_size = payload_len; fq->q.max_size = payload_len;
add_frag_mem_limit(&fq->q, skb->truesize); add_frag_mem_limit(fq->q.net, skb->truesize);
/* The first fragment. /* The first fragment.
* nhoffset is obtained from the first fragment, of course. * nhoffset is obtained from the first fragment, of course.
@ -430,7 +430,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
clone->ip_summed = head->ip_summed; clone->ip_summed = head->ip_summed;
NFCT_FRAG6_CB(clone)->orig = NULL; NFCT_FRAG6_CB(clone)->orig = NULL;
add_frag_mem_limit(&fq->q, clone->truesize); add_frag_mem_limit(fq->q.net, clone->truesize);
} }
/* We have to remove fragment header from datagram and to relocate /* We have to remove fragment header from datagram and to relocate
@ -454,7 +454,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
head->csum = csum_add(head->csum, fp->csum); head->csum = csum_add(head->csum, fp->csum);
head->truesize += fp->truesize; head->truesize += fp->truesize;
} }
sub_frag_mem_limit(&fq->q, head->truesize); sub_frag_mem_limit(fq->q.net, head->truesize);
head->ignore_df = 1; head->ignore_df = 1;
head->next = NULL; head->next = NULL;

Просмотреть файл

@ -144,7 +144,7 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
if (fq->q.flags & INET_FRAG_EVICTED) if (inet_frag_evicting(&fq->q))
goto out_rcu_unlock; goto out_rcu_unlock;
IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
@ -330,7 +330,7 @@ found:
fq->q.stamp = skb->tstamp; fq->q.stamp = skb->tstamp;
fq->q.meat += skb->len; fq->q.meat += skb->len;
fq->ecn |= ecn; fq->ecn |= ecn;
add_frag_mem_limit(&fq->q, skb->truesize); add_frag_mem_limit(fq->q.net, skb->truesize);
/* The first fragment. /* The first fragment.
* nhoffset is obtained from the first fragment, of course. * nhoffset is obtained from the first fragment, of course.
@ -443,7 +443,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
head->len -= clone->len; head->len -= clone->len;
clone->csum = 0; clone->csum = 0;
clone->ip_summed = head->ip_summed; clone->ip_summed = head->ip_summed;
add_frag_mem_limit(&fq->q, clone->truesize); add_frag_mem_limit(fq->q.net, clone->truesize);
} }
/* We have to remove fragment header from datagram and to relocate /* We have to remove fragment header from datagram and to relocate
@ -481,7 +481,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
} }
fp = next; fp = next;
} }
sub_frag_mem_limit(&fq->q, sum_truesize); sub_frag_mem_limit(fq->q.net, sum_truesize);
head->next = NULL; head->next = NULL;
head->dev = dev; head->dev = dev;