From 2a90f7e1d5d04e4f1060268e0b55a2c702bbd67a Mon Sep 17 00:00:00 2001 From: Simon Guinot Date: Mon, 16 Jan 2017 18:08:31 +0100 Subject: [PATCH 1/2] net: mvneta: add xmit_more support Basing on xmit_more flag of the skb, TX descriptors can be concatenated before flushing. This commit delay Tx descriptor flush if the queue is running and if there is more skb's to send. A maximum allowed number of descriptors for flushing at once due to MVNETA_TXQ_UPDATE_REG(q) reqisters limitation, is 255. Because of that a new macro was added (MVNETA_TXQ_DEC_SENT_MASK) in order to ensure that concatenated amount of descriptor does not exceed that value. Signed-off-by: Simon Guinot Signed-off-by: Marcin Wojtas Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 3607d8febbcf..9624537219a8 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -224,6 +224,7 @@ #define MVNETA_TXQ_SENT_THRESH_MASK(coal) ((coal) << 16) #define MVNETA_TXQ_UPDATE_REG(q) (0x3c60 + ((q) << 2)) #define MVNETA_TXQ_DEC_SENT_SHIFT 16 +#define MVNETA_TXQ_DEC_SENT_MASK 0xff #define MVNETA_TXQ_STATUS_REG(q) (0x3c40 + ((q) << 2)) #define MVNETA_TXQ_SENT_DESC_SHIFT 16 #define MVNETA_TXQ_SENT_DESC_MASK 0x3fff0000 @@ -525,6 +526,7 @@ struct mvneta_tx_queue { * descriptor ring */ int count; + int pending; int tx_stop_threshold; int tx_wake_threshold; @@ -818,8 +820,9 @@ static void mvneta_txq_pend_desc_add(struct mvneta_port *pp, /* Only 255 descriptors can be added at once ; Assume caller * process TX desriptors in quanta less than 256 */ - val = pend_desc; + val = pend_desc + txq->pending; mvreg_write(pp, MVNETA_TXQ_UPDATE_REG(txq->id), val); + txq->pending = 0; } /* Get pointer to next TX descriptor to be processed (send) by HW */ @@ -2399,11 +2402,15 @@ out: struct netdev_queue *nq = netdev_get_tx_queue(dev, txq_id); txq->count += frags; - mvneta_txq_pend_desc_add(pp, txq, frags); - if (txq->count >= txq->tx_stop_threshold) netif_tx_stop_queue(nq); + if (!skb->xmit_more || netif_xmit_stopped(nq) || + txq->pending + frags > MVNETA_TXQ_DEC_SENT_MASK) + mvneta_txq_pend_desc_add(pp, txq, frags); + else + txq->pending += frags; + u64_stats_update_begin(&stats->syncp); stats->tx_packets++; stats->tx_bytes += len; From a29b6235560a1ed10c8e1a73bfc616a66b802b90 Mon Sep 17 00:00:00 2001 From: Marcin Wojtas Date: Mon, 16 Jan 2017 18:08:32 +0100 Subject: [PATCH 2/2] net: mvneta: add BQL support Tests showed that when whole bandwidth is consumed, the latency for various kind of traffic can reach high values. With saturated link (e.g. with iperf from target to host) simple ping could take significant amount of time. BQL proved to improve this situation when implemented in mvneta driver. Measurements of ping latency for 3 link speeds: Speed | Latency w/o BQL | Latency with BQL 10 | 7-14 ms | 3.5 ms 100 | 2-12 ms | 0.6 ms 1000 | often timeout | up to 2ms Decreasing latency as above result in sligt performance cost - 4kpps (-1.4%) when pushing 64B packets via two bridged interfaces of Armada 38x. For 1500B packets in the same setup, the mpstat tool showed +8% of CPU occupation (default affinity, second CPU idle). Even though this cost seems reasonable to take, considering other improvements. This commit adds byte queue limit mechanism for the mvneta driver. Signed-off-by: Marcin Wojtas Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 9624537219a8..6dcc951af0ff 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -1759,8 +1759,10 @@ static struct mvneta_tx_queue *mvneta_tx_done_policy(struct mvneta_port *pp, /* Free tx queue skbuffs */ static void mvneta_txq_bufs_free(struct mvneta_port *pp, - struct mvneta_tx_queue *txq, int num) + struct mvneta_tx_queue *txq, int num, + struct netdev_queue *nq) { + unsigned int bytes_compl = 0, pkts_compl = 0; int i; for (i = 0; i < num; i++) { @@ -1768,6 +1770,11 @@ static void mvneta_txq_bufs_free(struct mvneta_port *pp, txq->txq_get_index; struct sk_buff *skb = txq->tx_skb[txq->txq_get_index]; + if (skb) { + bytes_compl += skb->len; + pkts_compl++; + } + mvneta_txq_inc_get(txq); if (!IS_TSO_HEADER(txq, tx_desc->buf_phys_addr)) @@ -1778,6 +1785,8 @@ static void mvneta_txq_bufs_free(struct mvneta_port *pp, continue; dev_kfree_skb_any(skb); } + + netdev_tx_completed_queue(nq, pkts_compl, bytes_compl); } /* Handle end of transmission */ @@ -1791,7 +1800,7 @@ static void mvneta_txq_done(struct mvneta_port *pp, if (!tx_done) return; - mvneta_txq_bufs_free(pp, txq, tx_done); + mvneta_txq_bufs_free(pp, txq, tx_done, nq); txq->count -= tx_done; @@ -2401,6 +2410,8 @@ out: struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats); struct netdev_queue *nq = netdev_get_tx_queue(dev, txq_id); + netdev_tx_sent_queue(nq, len); + txq->count += frags; if (txq->count >= txq->tx_stop_threshold) netif_tx_stop_queue(nq); @@ -2429,9 +2440,10 @@ static void mvneta_txq_done_force(struct mvneta_port *pp, struct mvneta_tx_queue *txq) { + struct netdev_queue *nq = netdev_get_tx_queue(pp->dev, txq->id); int tx_done = txq->count; - mvneta_txq_bufs_free(pp, txq, tx_done); + mvneta_txq_bufs_free(pp, txq, tx_done, nq); /* reset txq */ txq->count = 0; @@ -2957,6 +2969,8 @@ static int mvneta_txq_init(struct mvneta_port *pp, static void mvneta_txq_deinit(struct mvneta_port *pp, struct mvneta_tx_queue *txq) { + struct netdev_queue *nq = netdev_get_tx_queue(pp->dev, txq->id); + kfree(txq->tx_skb); if (txq->tso_hdrs) @@ -2968,6 +2982,8 @@ static void mvneta_txq_deinit(struct mvneta_port *pp, txq->size * MVNETA_DESC_ALIGNED_SIZE, txq->descs, txq->descs_phys); + netdev_tx_reset_queue(nq); + txq->descs = NULL; txq->last_desc = 0; txq->next_desc_to_proc = 0;