[dpdk-dev] The VMXNET3 PMD can't receive packet suddenly after a lot of traffic coming in

Message ID 1437699218-8783-1-git-send-email-mac_leehk@yahoo.com.hk (mailing list archive)
State Changes Requested, archived
Delegated to: Bruce Richardson
Headers

Commit Message

Lee Marco July 24, 2015, 12:53 a.m. UTC
  The RX of VMXNET3 PMD will have deadlock when a lot of traffic coming in.
The root cause is due to mbuf allocation fail in vmxnet3_post_rx_bufs()
and there is no error handling when it is called from vmxnet3_recv_pkts().
The RXD will not have "free" mbuf for it but the counter still increment.
Finally, no packet can be received.

This fix is allocate the mbuf first, if the allocation is failed,
then reuse the old mbuf. If the allocation is success,
the vmxnet3_post_rx_bufs() will call vmxnet3_renew_desc()
and RXD will be renew inside.

Signed-off-by: Marco Lee <mac_leehk@yahoo.com.hk/marco.lee@ruckuswireless.com>
---
 drivers/net/vmxnet3/vmxnet3_rxtx.c |   37 +++++++++++++++++++++++++++++++++++-
 1 file changed, 36 insertions(+), 1 deletion(-)
  

Comments

Thomas Monjalon July 24, 2015, 7:58 a.m. UTC | #1
Hi Marco,

It seems it is a new version of the patch.
Please, in order to ease patch management and review, could you add:
	- a version number
	- a changelog
	- threading with previous patches
It can be done with:
	- subject-prefix option
	- annotate option
	- in-reply-to option
as explained here:
	http://dpdk.org/dev#send

Thanks
  
Thomas Monjalon Oct. 21, 2015, 4:39 p.m. UTC | #2
Any update?
Yong, have you looked at it?

2015-07-24 09:58, Thomas Monjalon:
> Hi Marco,
> 
> It seems it is a new version of the patch.
> Please, in order to ease patch management and review, could you add:
> 	- a version number
> 	- a changelog
> 	- threading with previous patches
> It can be done with:
> 	- subject-prefix option
> 	- annotate option
> 	- in-reply-to option
> as explained here:
> 	http://dpdk.org/dev#send
> 
> Thanks
  
Yong Wang Dec. 22, 2015, 9:36 p.m. UTC | #3
On 7/23/15, 5:53 PM, "dev on behalf of Marco Lee" <dev-bounces@dpdk.org on behalf of mac_leehk@yahoo.com.hk> wrote:


>The RX of VMXNET3 PMD will have deadlock when a lot of traffic coming in.

>The root cause is due to mbuf allocation fail in vmxnet3_post_rx_bufs()

>and there is no error handling when it is called from vmxnet3_recv_pkts().

>The RXD will not have "free" mbuf for it but the counter still increment.


Can you describe what counter this refers to?

>Finally, no packet can be received.

>

>This fix is allocate the mbuf first, if the allocation is failed,

>then reuse the old mbuf. If the allocation is success,

>the vmxnet3_post_rx_bufs() will call vmxnet3_renew_desc()

>and RXD will be renew inside.


I didn’t see this part of logic implemented.

>

>Signed-off-by: Marco Lee <mac_leehk@yahoo.com.hk/marco.lee@ruckuswireless.com>

>---

> drivers/net/vmxnet3/vmxnet3_rxtx.c |   37 +++++++++++++++++++++++++++++++++++-

> 1 file changed, 36 insertions(+), 1 deletion(-)

>

>diff --git a/drivers/net/vmxnet3/vmxnet3_rxtx.c b/drivers/net/vmxnet3/vmxnet3_rxtx.c

>index 39ad6ef..cbed438 100644

>--- a/drivers/net/vmxnet3/vmxnet3_rxtx.c

>+++ b/drivers/net/vmxnet3/vmxnet3_rxtx.c

>@@ -421,6 +421,35 @@ vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,

> 	return nb_tx;

> }

> 

>+static inline void

>+vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,

>+		struct rte_mbuf *mbuf)

>+{

>+	uint32_t  val = 0;

>+	struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];

>+


Remove this blank line.

>+	struct Vmxnet3_RxDesc *rxd;

>+	vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];

>+

>+	rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);

>+


nit: this can be merged with the above definition.

>+	if (ring->rid == 0)

>+		val = VMXNET3_RXD_BTYPE_HEAD;

>+	else

>+		val = VMXNET3_RXD_BTYPE_BODY;

>+	


Remove the trailing space here.
 
>+

>+	buf_info->m = mbuf;

>+	buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);

>+	buf_info->bufPA = RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mbuf);

>+

>+	rxd->addr = buf_info->bufPA;

>+	rxd->btype = val;

>+	rxd->len = buf_info->len;

>+	rxd->gen = ring->gen;

>+

>+	vmxnet3_cmd_ring_adv_next2fill(ring);

>+}

> /*

>  *  Allocates mbufs and clusters. Post rx descriptors with buffer details

>  *  so that device can receive packets in those buffers.

>@@ -578,6 +607,8 @@ vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)

> 		if (nb_rx >= nb_pkts)

> 			break;

> 

>+		struct rte_mbuf *rep;


What does rep mean? Can you rename it to something easier to understand
(say newm, or m2)?

Also, please move the definition to the top of this block.

>+		rep = rte_rxmbuf_alloc(rxq->mb_pool);

> 		idx = rcd->rxdIdx;

> 		ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);

> 		rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;

>@@ -651,13 +682,17 @@ vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)

> 

> 		vmxnet3_rx_offload(rcd, rxm);

> 

>+		if (unlikely(rep == NULL)) {

>+			rep = rxm;

>+			goto rcd_done;

>+		}


Should this be moved earlier? Also need to update the rx_buf_alloc_failure
counter.

> 		rx_pkts[nb_rx++] = rxm;

> rcd_done:

> 		rxq->cmd_ring[ring_idx].next2comp = idx;

> 		VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);

> 

> 		/* It's time to allocate some new buf and renew descriptors */

>-		vmxnet3_post_rx_bufs(rxq, ring_idx);

>+		vmxnet3_renew_desc(rxq, ring_idx, rep);

> 		if (unlikely(rxq->shared->ctrl.updateRxProd)) {

> 			VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),

> 					       rxq->cmd_ring[ring_idx].next2fill);

>-- 

>1.7.9.5

>
  

Patch

diff --git a/drivers/net/vmxnet3/vmxnet3_rxtx.c b/drivers/net/vmxnet3/vmxnet3_rxtx.c
index 39ad6ef..cbed438 100644
--- a/drivers/net/vmxnet3/vmxnet3_rxtx.c
+++ b/drivers/net/vmxnet3/vmxnet3_rxtx.c
@@ -421,6 +421,35 @@  vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 	return nb_tx;
 }
 
+static inline void
+vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id,
+		struct rte_mbuf *mbuf)
+{
+	uint32_t  val = 0;
+	struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];
+
+	struct Vmxnet3_RxDesc *rxd;
+	vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill];
+
+	rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);
+
+	if (ring->rid == 0)
+		val = VMXNET3_RXD_BTYPE_HEAD;
+	else
+		val = VMXNET3_RXD_BTYPE_BODY;
+	
+
+	buf_info->m = mbuf;
+	buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM);
+	buf_info->bufPA = RTE_MBUF_DATA_DMA_ADDR_DEFAULT(mbuf);
+
+	rxd->addr = buf_info->bufPA;
+	rxd->btype = val;
+	rxd->len = buf_info->len;
+	rxd->gen = ring->gen;
+
+	vmxnet3_cmd_ring_adv_next2fill(ring);
+}
 /*
  *  Allocates mbufs and clusters. Post rx descriptors with buffer details
  *  so that device can receive packets in those buffers.
@@ -578,6 +607,8 @@  vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		if (nb_rx >= nb_pkts)
 			break;
 
+		struct rte_mbuf *rep;
+		rep = rte_rxmbuf_alloc(rxq->mb_pool);
 		idx = rcd->rxdIdx;
 		ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);
 		rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
@@ -651,13 +682,17 @@  vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 
 		vmxnet3_rx_offload(rcd, rxm);
 
+		if (unlikely(rep == NULL)) {
+			rep = rxm;
+			goto rcd_done;
+		}
 		rx_pkts[nb_rx++] = rxm;
 rcd_done:
 		rxq->cmd_ring[ring_idx].next2comp = idx;
 		VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);
 
 		/* It's time to allocate some new buf and renew descriptors */
-		vmxnet3_post_rx_bufs(rxq, ring_idx);
+		vmxnet3_renew_desc(rxq, ring_idx, rep);
 		if (unlikely(rxq->shared->ctrl.updateRxProd)) {
 			VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),
 					       rxq->cmd_ring[ring_idx].next2fill);