Merge branch 'net_sched-reduce-the-number-of-qdisc-resets'

Cong Wang says:

====================
net_sched: reduce the number of qdisc resets

This patchset aims to reduce the number of qdisc resets during
qdisc tear down. Patch 1~3 are preparation for their following
patches, especially patch 2 and patch 3 add a few tracepoints
so that we can observe the whole lifetime of qdisc's. Patch 4
and patch 5 are the ones do the actual work. Please find more
details in each patch description.

Vaclav Zindulka tested this patchset and his large ruleset with
over 13k qdiscs defined got from 22s to 520ms.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2020-05-27 15:05:50 -07:00
Родитель b3037ac501 759ae57f1b
Коммит bdad7f9441
3 изменённых файлов: 110 добавлений и 43 удалений

Просмотреть файл

@ -8,6 +8,8 @@
#include <linux/netdevice.h>
#include <linux/tracepoint.h>
#include <linux/ftrace.h>
#include <linux/pkt_sched.h>
#include <net/sch_generic.h>
TRACE_EVENT(qdisc_dequeue,
@ -44,6 +46,79 @@ TRACE_EVENT(qdisc_dequeue,
__entry->txq_state, __entry->packets, __entry->skbaddr )
);
TRACE_EVENT(qdisc_reset,
TP_PROTO(struct Qdisc *q),
TP_ARGS(q),
TP_STRUCT__entry(
__string( dev, qdisc_dev(q) )
__string( kind, q->ops->id )
__field( u32, parent )
__field( u32, handle )
),
TP_fast_assign(
__assign_str(dev, qdisc_dev(q));
__assign_str(kind, q->ops->id);
__entry->parent = q->parent;
__entry->handle = q->handle;
),
TP_printk("dev=%s kind=%s parent=%x:%x handle=%x:%x", __get_str(dev),
__get_str(kind), TC_H_MAJ(__entry->parent) >> 16, TC_H_MIN(__entry->parent),
TC_H_MAJ(__entry->handle) >> 16, TC_H_MIN(__entry->handle))
);
TRACE_EVENT(qdisc_destroy,
TP_PROTO(struct Qdisc *q),
TP_ARGS(q),
TP_STRUCT__entry(
__string( dev, qdisc_dev(q) )
__string( kind, q->ops->id )
__field( u32, parent )
__field( u32, handle )
),
TP_fast_assign(
__assign_str(dev, qdisc_dev(q));
__assign_str(kind, q->ops->id);
__entry->parent = q->parent;
__entry->handle = q->handle;
),
TP_printk("dev=%s kind=%s parent=%x:%x handle=%x:%x", __get_str(dev),
__get_str(kind), TC_H_MAJ(__entry->parent) >> 16, TC_H_MIN(__entry->parent),
TC_H_MAJ(__entry->handle) >> 16, TC_H_MIN(__entry->handle))
);
TRACE_EVENT(qdisc_create,
TP_PROTO(const struct Qdisc_ops *ops, struct net_device *dev, u32 parent),
TP_ARGS(ops, dev, parent),
TP_STRUCT__entry(
__string( dev, dev->name )
__string( kind, ops->id )
__field( u32, parent )
),
TP_fast_assign(
__assign_str(dev, dev->name);
__assign_str(kind, ops->id);
__entry->parent = parent;
),
TP_printk("dev=%s kind=%s parent=%x:%x",
__get_str(dev), __get_str(kind),
TC_H_MAJ(__entry->parent) >> 16, TC_H_MIN(__entry->parent))
);
#endif /* _TRACE_QDISC_H */
/* This part must be outside protection */

Просмотреть файл

@ -32,6 +32,8 @@
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
#include <trace/events/qdisc.h>
/*
Short review.
@ -1283,6 +1285,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
}
qdisc_hash_add(sch, false);
trace_qdisc_create(ops, dev, parent);
return sch;

Просмотреть файл

@ -896,8 +896,10 @@ struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
}
sch->parent = parentid;
if (!ops->init || ops->init(sch, NULL, extack) == 0)
if (!ops->init || ops->init(sch, NULL, extack) == 0) {
trace_qdisc_create(ops, dev_queue->dev, parentid);
return sch;
}
qdisc_put(sch);
return NULL;
@ -911,6 +913,8 @@ void qdisc_reset(struct Qdisc *qdisc)
const struct Qdisc_ops *ops = qdisc->ops;
struct sk_buff *skb, *tmp;
trace_qdisc_reset(qdisc);
if (ops->reset)
ops->reset(qdisc);
@ -949,7 +953,6 @@ static void qdisc_free_cb(struct rcu_head *head)
static void qdisc_destroy(struct Qdisc *qdisc)
{
const struct Qdisc_ops *ops = qdisc->ops;
struct sk_buff *skb, *tmp;
#ifdef CONFIG_NET_SCHED
qdisc_hash_del(qdisc);
@ -957,23 +960,16 @@ static void qdisc_destroy(struct Qdisc *qdisc)
qdisc_put_stab(rtnl_dereference(qdisc->stab));
#endif
gen_kill_estimator(&qdisc->rate_est);
if (ops->reset)
ops->reset(qdisc);
qdisc_reset(qdisc);
if (ops->destroy)
ops->destroy(qdisc);
module_put(ops->owner);
dev_put(qdisc_dev(qdisc));
skb_queue_walk_safe(&qdisc->gso_skb, skb, tmp) {
__skb_unlink(skb, &qdisc->gso_skb);
kfree_skb_list(skb);
}
skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp) {
__skb_unlink(skb, &qdisc->skb_bad_txq);
kfree_skb_list(skb);
}
trace_qdisc_destroy(qdisc);
call_rcu(&qdisc->rcu, qdisc_free_cb);
}
@ -1132,6 +1128,28 @@ void dev_activate(struct net_device *dev)
}
EXPORT_SYMBOL(dev_activate);
static void qdisc_deactivate(struct Qdisc *qdisc)
{
bool nolock = qdisc->flags & TCQ_F_NOLOCK;
if (qdisc->flags & TCQ_F_BUILTIN)
return;
if (test_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state))
return;
if (nolock)
spin_lock_bh(&qdisc->seqlock);
spin_lock_bh(qdisc_lock(qdisc));
set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);
qdisc_reset(qdisc);
spin_unlock_bh(qdisc_lock(qdisc));
if (nolock)
spin_unlock_bh(&qdisc->seqlock);
}
static void dev_deactivate_queue(struct net_device *dev,
struct netdev_queue *dev_queue,
void *_qdisc_default)
@ -1141,21 +1159,8 @@ static void dev_deactivate_queue(struct net_device *dev,
qdisc = rtnl_dereference(dev_queue->qdisc);
if (qdisc) {
bool nolock = qdisc->flags & TCQ_F_NOLOCK;
if (nolock)
spin_lock_bh(&qdisc->seqlock);
spin_lock_bh(qdisc_lock(qdisc));
if (!(qdisc->flags & TCQ_F_BUILTIN))
set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);
qdisc_deactivate(qdisc);
rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
qdisc_reset(qdisc);
spin_unlock_bh(qdisc_lock(qdisc));
if (nolock)
spin_unlock_bh(&qdisc->seqlock);
}
}
@ -1186,16 +1191,6 @@ static bool some_qdisc_is_busy(struct net_device *dev)
return false;
}
static void dev_qdisc_reset(struct net_device *dev,
struct netdev_queue *dev_queue,
void *none)
{
struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
if (qdisc)
qdisc_reset(qdisc);
}
/**
* dev_deactivate_many - deactivate transmissions on several devices
* @head: list of devices to deactivate
@ -1232,12 +1227,6 @@ void dev_deactivate_many(struct list_head *head)
*/
schedule_timeout_uninterruptible(1);
}
/* The new qdisc is assigned at this point so we can safely
* unwind stale skb lists and qdisc statistics
*/
netdev_for_each_tx_queue(dev, dev_qdisc_reset, NULL);
if (dev_ingress_queue(dev))
dev_qdisc_reset(dev, dev_ingress_queue(dev), NULL);
}
}