net: mvneta: Improve the buffer allocation method for SWBM

With system having a small memory (around 256MB), the state "cannot
allocate memory to refill with new buffer" is reach pretty quickly.

By this patch we changed buffer allocation method to a better handling of
this use case by avoiding memory allocation issues.

Signed-off-by: Yelena Krivosheev <yelena@marvell.com>
[gregory: extract from a larger patch]
Signed-off-by: Gregory CLEMENT <gregory.clement@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Yelena Krivosheev 2018-07-18 18:10:57 +02:00 коммит произвёл David S. Miller
Родитель f945cec88c
Коммит 562e2f467e
1 изменённых файлов: 182 добавлений и 97 удалений

Просмотреть файл

@ -483,7 +483,10 @@ struct mvneta_port {
#define MVNETA_RXD_ERR_RESOURCE (BIT(17) | BIT(18))
#define MVNETA_RXD_ERR_CODE_MASK (BIT(17) | BIT(18))
#define MVNETA_RXD_L3_IP4 BIT(25)
#define MVNETA_RXD_FIRST_LAST_DESC (BIT(26) | BIT(27))
#define MVNETA_RXD_LAST_DESC BIT(26)
#define MVNETA_RXD_FIRST_DESC BIT(27)
#define MVNETA_RXD_FIRST_LAST_DESC (MVNETA_RXD_FIRST_DESC | \
MVNETA_RXD_LAST_DESC)
#define MVNETA_RXD_L4_CSUM_OK BIT(30)
#if defined(__LITTLE_ENDIAN)
@ -611,6 +614,14 @@ struct mvneta_rx_queue {
/* Index of the next RX DMA descriptor to process */
int next_desc_to_proc;
/* Index of first RX DMA descriptor to refill */
int first_to_refill;
u32 refill_num;
/* pointer to uncomplete skb buffer */
struct sk_buff *skb;
int left_size;
/* error counters */
u32 skb_alloc_err;
u32 refill_err;
@ -626,6 +637,7 @@ static int txq_number = 8;
static int rxq_def;
static int rx_copybreak __read_mostly = 256;
static int rx_header_size __read_mostly = 128;
/* HW BM need that each port be identify by a unique ID */
static int global_port_id;
@ -1689,13 +1701,6 @@ static void mvneta_rx_error(struct mvneta_port *pp,
{
u32 status = rx_desc->status;
if (!mvneta_rxq_desc_is_first_last(status)) {
netdev_err(pp->dev,
"bad rx status %08x (buffer oversize), size=%d\n",
status, rx_desc->data_size);
return;
}
switch (status & MVNETA_RXD_ERR_CODE_MASK) {
case MVNETA_RXD_ERR_CRC:
netdev_err(pp->dev, "bad rx status %08x (crc error), size=%d\n",
@ -1882,6 +1887,8 @@ static void mvneta_rxq_drop_pkts(struct mvneta_port *pp,
for (i = 0; i < rxq->size; i++) {
struct mvneta_rx_desc *rx_desc = rxq->descs + i;
void *data = rxq->buf_virt_addr[i];
if (!data || !(rx_desc->buf_phys_addr))
continue;
dma_unmap_single(pp->dev->dev.parent, rx_desc->buf_phys_addr,
MVNETA_RX_BUF_SIZE(pp->pkt_size), DMA_FROM_DEVICE);
@ -1889,117 +1896,183 @@ static void mvneta_rxq_drop_pkts(struct mvneta_port *pp,
}
}
static inline
int mvneta_rx_refill_queue(struct mvneta_port *pp, struct mvneta_rx_queue *rxq)
{
struct mvneta_rx_desc *rx_desc;
int curr_desc = rxq->first_to_refill;
int i;
for (i = 0; (i < rxq->refill_num) && (i < 64); i++) {
rx_desc = rxq->descs + curr_desc;
if (!(rx_desc->buf_phys_addr)) {
if (mvneta_rx_refill(pp, rx_desc, rxq, GFP_ATOMIC)) {
pr_err("Can't refill queue %d. Done %d from %d\n",
rxq->id, i, rxq->refill_num);
rxq->refill_err++;
break;
}
}
curr_desc = MVNETA_QUEUE_NEXT_DESC(rxq, curr_desc);
}
rxq->refill_num -= i;
rxq->first_to_refill = curr_desc;
return i;
}
/* Main rx processing when using software buffer management */
static int mvneta_rx_swbm(struct napi_struct *napi,
struct mvneta_port *pp, int rx_todo,
struct mvneta_port *pp, int budget,
struct mvneta_rx_queue *rxq)
{
struct net_device *dev = pp->dev;
int rx_done;
int rx_todo, rx_proc;
int refill = 0;
u32 rcvd_pkts = 0;
u32 rcvd_bytes = 0;
/* Get number of received packets */
rx_done = mvneta_rxq_busy_desc_num_get(pp, rxq);
if (rx_todo > rx_done)
rx_todo = rx_done;
rx_done = 0;
rx_todo = mvneta_rxq_busy_desc_num_get(pp, rxq);
rx_proc = 0;
/* Fairness NAPI loop */
while (rx_done < rx_todo) {
while ((rcvd_pkts < budget) && (rx_proc < rx_todo)) {
struct mvneta_rx_desc *rx_desc = mvneta_rxq_next_desc_get(rxq);
struct sk_buff *skb;
unsigned char *data;
struct page *page;
dma_addr_t phys_addr;
u32 rx_status, frag_size;
int rx_bytes, err, index;
u32 rx_status, index;
int rx_bytes, skb_size, copy_size;
int frag_num, frag_size, frag_offset;
rx_done++;
rx_status = rx_desc->status;
rx_bytes = rx_desc->data_size - (ETH_FCS_LEN + MVNETA_MH_SIZE);
index = rx_desc - rxq->descs;
page = (struct page *)rxq->buf_virt_addr[index];
data = page_address(page);
/* Prefetch header */
prefetch(data);
phys_addr = rx_desc->buf_phys_addr - pp->rx_offset_correction;
if (!mvneta_rxq_desc_is_first_last(rx_status) ||
(rx_status & MVNETA_RXD_ERR_SUMMARY)) {
mvneta_rx_error(pp, rx_desc);
err_drop_frame:
dev->stats.rx_errors++;
/* leave the descriptor untouched */
continue;
}
phys_addr = rx_desc->buf_phys_addr;
rx_status = rx_desc->status;
rx_proc++;
rxq->refill_num++;
if (rx_bytes <= rx_copybreak) {
/* better copy a small frame and not unmap the DMA region */
skb = netdev_alloc_skb_ip_align(dev, rx_bytes);
if (unlikely(!skb)) {
if (rx_status & MVNETA_RXD_FIRST_DESC) {
/* Check errors only for FIRST descriptor */
if (rx_status & MVNETA_RXD_ERR_SUMMARY) {
mvneta_rx_error(pp, rx_desc);
dev->stats.rx_errors++;
/* leave the descriptor untouched */
continue;
}
rx_bytes = rx_desc->data_size -
(ETH_FCS_LEN + MVNETA_MH_SIZE);
/* Allocate small skb for each new packet */
skb_size = max(rx_copybreak, rx_header_size);
rxq->skb = netdev_alloc_skb_ip_align(dev, skb_size);
if (unlikely(!rxq->skb)) {
netdev_err(dev,
"Can't allocate skb on queue %d\n",
rxq->id);
dev->stats.rx_dropped++;
rxq->skb_alloc_err++;
goto err_drop_frame;
continue;
}
copy_size = min(skb_size, rx_bytes);
dma_sync_single_range_for_cpu(dev->dev.parent,
phys_addr,
MVNETA_MH_SIZE + NET_SKB_PAD,
rx_bytes,
DMA_FROM_DEVICE);
skb_put_data(skb, data + MVNETA_MH_SIZE + NET_SKB_PAD,
rx_bytes);
/* Copy data from buffer to SKB, skip Marvell header */
memcpy(rxq->skb->data, data + MVNETA_MH_SIZE,
copy_size);
skb_put(rxq->skb, copy_size);
rxq->left_size = rx_bytes - copy_size;
skb->protocol = eth_type_trans(skb, dev);
mvneta_rx_csum(pp, rx_status, skb);
napi_gro_receive(napi, skb);
mvneta_rx_csum(pp, rx_status, rxq->skb);
if (rxq->left_size == 0) {
int size = copy_size + MVNETA_MH_SIZE;
rcvd_pkts++;
rcvd_bytes += rx_bytes;
dma_sync_single_range_for_cpu(dev->dev.parent,
phys_addr, 0,
size,
DMA_FROM_DEVICE);
/* leave the descriptor and buffer untouched */
/* leave the descriptor and buffer untouched */
} else {
/* refill descriptor with new buffer later */
rx_desc->buf_phys_addr = 0;
frag_num = 0;
frag_offset = copy_size + MVNETA_MH_SIZE;
frag_size = min(rxq->left_size,
(int)(PAGE_SIZE - frag_offset));
skb_add_rx_frag(rxq->skb, frag_num, page,
frag_offset, frag_size,
PAGE_SIZE);
dma_unmap_single(dev->dev.parent, phys_addr,
PAGE_SIZE, DMA_FROM_DEVICE);
rxq->left_size -= frag_size;
}
} else {
/* Middle or Last descriptor */
if (unlikely(!rxq->skb)) {
pr_debug("no skb for rx_status 0x%x\n",
rx_status);
continue;
}
if (!rxq->left_size) {
/* last descriptor has only FCS */
/* and can be discarded */
dma_sync_single_range_for_cpu(dev->dev.parent,
phys_addr, 0,
ETH_FCS_LEN,
DMA_FROM_DEVICE);
/* leave the descriptor and buffer untouched */
} else {
/* refill descriptor with new buffer later */
rx_desc->buf_phys_addr = 0;
frag_num = skb_shinfo(rxq->skb)->nr_frags;
frag_offset = 0;
frag_size = min(rxq->left_size,
(int)(PAGE_SIZE - frag_offset));
skb_add_rx_frag(rxq->skb, frag_num, page,
frag_offset, frag_size,
PAGE_SIZE);
dma_unmap_single(dev->dev.parent, phys_addr,
PAGE_SIZE,
DMA_FROM_DEVICE);
rxq->left_size -= frag_size;
}
} /* Middle or Last descriptor */
if (!(rx_status & MVNETA_RXD_LAST_DESC))
/* no last descriptor this time */
continue;
if (rxq->left_size) {
pr_err("get last desc, but left_size (%d) != 0\n",
rxq->left_size);
dev_kfree_skb_any(rxq->skb);
rxq->left_size = 0;
rxq->skb = NULL;
continue;
}
/* Refill processing */
err = mvneta_rx_refill(pp, rx_desc, rxq, GFP_KERNEL);
if (err) {
netdev_err(dev, "Linux processing - Can't refill\n");
rxq->refill_err++;
goto err_drop_frame;
}
frag_size = pp->frag_size;
skb = build_skb(data, frag_size > PAGE_SIZE ? 0 : frag_size);
/* After refill old buffer has to be unmapped regardless
* the skb is successfully built or not.
*/
dma_unmap_single(dev->dev.parent, phys_addr,
MVNETA_RX_BUF_SIZE(pp->pkt_size),
DMA_FROM_DEVICE);
if (!skb)
goto err_drop_frame;
rcvd_pkts++;
rcvd_bytes += rx_bytes;
rcvd_bytes += rxq->skb->len;
/* Linux processing */
skb_reserve(skb, MVNETA_MH_SIZE + NET_SKB_PAD);
skb_put(skb, rx_bytes);
rxq->skb->protocol = eth_type_trans(rxq->skb, dev);
skb->protocol = eth_type_trans(skb, dev);
if (dev->features & NETIF_F_GRO)
napi_gro_receive(napi, rxq->skb);
else
netif_receive_skb(rxq->skb);
mvneta_rx_csum(pp, rx_status, skb);
napi_gro_receive(napi, skb);
/* clean uncomplete skb pointer in queue */
rxq->skb = NULL;
rxq->left_size = 0;
}
if (rcvd_pkts) {
@ -2011,10 +2084,13 @@ err_drop_frame:
u64_stats_update_end(&stats->syncp);
}
/* Update rxq management counters */
mvneta_rxq_desc_num_update(pp, rxq, rx_done, rx_done);
/* return some buffers to hardware queue, one at a time is too slow */
refill = mvneta_rx_refill_queue(pp, rxq);
return rx_done;
/* Update rxq management counters */
mvneta_rxq_desc_num_update(pp, rxq, rx_proc, refill);
return rcvd_pkts;
}
/* Main rx processing when using hardware buffer management */
@ -2823,21 +2899,23 @@ static void mvneta_rxq_hw_init(struct mvneta_port *pp,
mvreg_write(pp, MVNETA_RXQ_BASE_ADDR_REG(rxq->id), rxq->descs_phys);
mvreg_write(pp, MVNETA_RXQ_SIZE_REG(rxq->id), rxq->size);
/* Set Offset */
mvneta_rxq_offset_set(pp, rxq, NET_SKB_PAD - pp->rx_offset_correction);
/* Set coalescing pkts and time */
mvneta_rx_pkts_coal_set(pp, rxq, rxq->pkts_coal);
mvneta_rx_time_coal_set(pp, rxq, rxq->time_coal);
if (!pp->bm_priv) {
/* Fill RXQ with buffers from RX pool */
mvneta_rxq_buf_size_set(pp, rxq,
MVNETA_RX_BUF_SIZE(pp->pkt_size));
/* Set Offset */
mvneta_rxq_offset_set(pp, rxq, 0);
mvneta_rxq_buf_size_set(pp, rxq, pp->frag_size);
mvneta_rxq_bm_disable(pp, rxq);
mvneta_rxq_fill(pp, rxq, rxq->size);
} else {
/* Set Offset */
mvneta_rxq_offset_set(pp, rxq,
NET_SKB_PAD - pp->rx_offset_correction);
mvneta_rxq_bm_enable(pp, rxq);
/* Fill RXQ with buffers from RX pool */
mvneta_rxq_long_pool_set(pp, rxq);
mvneta_rxq_short_pool_set(pp, rxq);
mvneta_rxq_non_occup_desc_add(pp, rxq, rxq->size);
@ -2866,6 +2944,9 @@ static void mvneta_rxq_deinit(struct mvneta_port *pp,
{
mvneta_rxq_drop_pkts(pp, rxq);
if (rxq->skb)
dev_kfree_skb_any(rxq->skb);
if (rxq->descs)
dma_free_coherent(pp->dev->dev.parent,
rxq->size * MVNETA_DESC_ALIGNED_SIZE,
@ -2876,6 +2957,10 @@ static void mvneta_rxq_deinit(struct mvneta_port *pp,
rxq->last_desc = 0;
rxq->next_desc_to_proc = 0;
rxq->descs_phys = 0;
rxq->first_to_refill = 0;
rxq->refill_num = 0;
rxq->skb = NULL;
rxq->left_size = 0;
}
static int mvneta_txq_sw_init(struct mvneta_port *pp,
@ -4366,14 +4451,6 @@ static int mvneta_probe(struct platform_device *pdev)
pp->dn = dn;
pp->rxq_def = rxq_def;
/* Set RX packet offset correction for platforms, whose
* NET_SKB_PAD, exceeds 64B. It should be 64B for 64-bit
* platforms and 0B for 32-bit ones.
*/
pp->rx_offset_correction =
max(0, NET_SKB_PAD - MVNETA_RX_PKT_OFFSET_CORRECTION);
pp->indir[0] = rxq_def;
/* Get special SoC configurations */
@ -4461,6 +4538,7 @@ static int mvneta_probe(struct platform_device *pdev)
SET_NETDEV_DEV(dev, &pdev->dev);
pp->id = global_port_id++;
pp->rx_offset_correction = 0; /* not relevant for SW BM */
/* Obtain access to BM resources if enabled and already initialized */
bm_node = of_parse_phandle(dn, "buffer-manager", 0);
@ -4475,6 +4553,13 @@ static int mvneta_probe(struct platform_device *pdev)
pp->bm_priv = NULL;
}
}
/* Set RX packet offset correction for platforms, whose
* NET_SKB_PAD, exceeds 64B. It should be 64B for 64-bit
* platforms and 0B for 32-bit ones.
*/
pp->rx_offset_correction = max(0,
NET_SKB_PAD -
MVNETA_RX_PKT_OFFSET_CORRECTION);
}
of_node_put(bm_node);