[dpdk-dev] [PATCH 4/7] vmxnet3: add support for multi-segment transmit

Yong Wang yongwang at vmware.com
Thu Mar 5 20:35:51 CET 2015


A quick glance over v2 shows that it only made the change for max segment
check.  I am not sure if all the  other comments on v1 of this patch are
missed or ignored?  If it¹s the latter, can you explain your reasoning why
they are not addressed?

On 2/25/15, 6:31 PM, "Stephen Hemminger" <stephen at networkplumber.org>
wrote:

>Change sending loop to support multi-segment mbufs.
>The VMXNET3 api has start-of-packet and end-packet flags, so it
>is not hard to send multi-segment mbuf's.
>
>Also, update descriptor in 32 bit value rather than toggling
>bitfields which is slower and error prone.
>Based on code in earlier driver, and the Linux kernel driver.
>
>Add a compiler barrier to make sure that update of earlier descriptor
>are completed prior to update of generation bit on start of packet.
>
>Signed-off-by: Stephen Hemminger <stephen at networkplumber.org>
>---
>v2 -- incorporate # of segments check
>
> lib/librte_pmd_vmxnet3/vmxnet3_ring.h |   1 +
> lib/librte_pmd_vmxnet3/vmxnet3_rxtx.c | 137
>++++++++++++++++------------------
> 2 files changed, 65 insertions(+), 73 deletions(-)
>
>diff --git a/lib/librte_pmd_vmxnet3/vmxnet3_ring.h
>b/lib/librte_pmd_vmxnet3/vmxnet3_ring.h
>index ebe6268..612487e 100644
>--- a/lib/librte_pmd_vmxnet3/vmxnet3_ring.h
>+++ b/lib/librte_pmd_vmxnet3/vmxnet3_ring.h
>@@ -125,6 +125,7 @@ struct vmxnet3_txq_stats {
> 				     * the counters below track droppings due to
> 				     * different reasons
> 				     */
>+	uint64_t	drop_too_many_segs;
> 	uint64_t	drop_tso;
> 	uint64_t	tx_ring_full;
> };
>diff --git a/lib/librte_pmd_vmxnet3/vmxnet3_rxtx.c
>b/lib/librte_pmd_vmxnet3/vmxnet3_rxtx.c
>index 38ac811..884b57f 100644
>--- a/lib/librte_pmd_vmxnet3/vmxnet3_rxtx.c
>+++ b/lib/librte_pmd_vmxnet3/vmxnet3_rxtx.c
>@@ -312,20 +312,22 @@ vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq)
> 		VMXNET3_ASSERT(txq->cmd_ring.base[tcd->txdIdx].txd.eop == 1);
> #endif
> 		mbuf = txq->cmd_ring.buf_info[tcd->txdIdx].m;
>-		if (unlikely(mbuf == NULL))
>-			rte_panic("EOP desc does not point to a valid mbuf");
>-		else
>-			rte_pktmbuf_free(mbuf);
>+		rte_pktmbuf_free_seg(mbuf);
>+		txq->cmd_ring.buf_info[tcd->txdIdx].m = NULL;
> 
>+		while (txq->cmd_ring.next2comp != tcd->txdIdx) {
>+			mbuf = txq->cmd_ring.buf_info[txq->cmd_ring.next2comp].m;
>+			txq->cmd_ring.buf_info[txq->cmd_ring.next2comp].m = NULL;
>+			rte_pktmbuf_free_seg(mbuf);
> 
>-		txq->cmd_ring.buf_info[tcd->txdIdx].m = NULL;
>-		/* Mark the txd for which tcd was generated as completed */
>-		vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
>+			/* Mark the txd for which tcd was generated as completed */
>+			vmxnet3_cmd_ring_adv_next2comp(&txq->cmd_ring);
>+			completed++;
>+		}
> 
> 		vmxnet3_comp_ring_adv_next2proc(comp_ring);
> 		tcd = (struct Vmxnet3_TxCompDesc *)(comp_ring->base +
> 						    comp_ring->next2proc);
>-		completed++;
> 	}
> 
> 	PMD_TX_LOG(DEBUG, "Processed %d tx comps & command descs.", completed);
>@@ -336,13 +338,8 @@ vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf
>**tx_pkts,
> 		  uint16_t nb_pkts)
> {
> 	uint16_t nb_tx;
>-	Vmxnet3_TxDesc *txd = NULL;
>-	vmxnet3_buf_info_t *tbi = NULL;
>-	struct vmxnet3_hw *hw;
>-	struct rte_mbuf *txm;
> 	vmxnet3_tx_queue_t *txq = tx_queue;
>-
>-	hw = txq->hw;
>+	struct vmxnet3_hw *hw = txq->hw;
> 
> 	if (unlikely(txq->stopped)) {
> 		PMD_TX_LOG(DEBUG, "Tx queue is stopped.");
>@@ -354,75 +351,69 @@ vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf
>**tx_pkts,
> 
> 	nb_tx = 0;
> 	while (nb_tx < nb_pkts) {
>+		Vmxnet3_GenericDesc *gdesc;
>+		vmxnet3_buf_info_t *tbi;
>+		uint32_t first2fill, avail, dw2;
>+		struct rte_mbuf *txm = tx_pkts[nb_tx];
>+		struct rte_mbuf *m_seg = txm;
>+
>+		/* Is this packet execessively fragmented, then drop */
>+		if (unlikely(txm->nb_segs > VMXNET3_MAX_TXD_PER_PKT)) {
>+			++txq->stats.drop_too_many_segs;
>+			++txq->stats.drop_total;
>+			rte_pktmbuf_free(txm);
>+			++nb_tx;
>+			continue;
>+		}
> 
>-		if (vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring)) {
>-			int copy_size = 0;
>-
>-			txm = tx_pkts[nb_tx];
>-			/* Don't support scatter packets yet, free them if met */
>-			if (txm->nb_segs != 1) {
>-				PMD_TX_LOG(DEBUG, "Don't support scatter packets yet, drop!");
>-				rte_pktmbuf_free(tx_pkts[nb_tx]);
>-				txq->stats.drop_total++;
>-
>-				nb_tx++;
>-				continue;
>-			}
>-
>-			txd = (Vmxnet3_TxDesc *)(txq->cmd_ring.base +
>txq->cmd_ring.next2fill);
>-			if (rte_pktmbuf_pkt_len(txm) <= VMXNET3_HDR_COPY_SIZE) {
>-				struct Vmxnet3_TxDataDesc *tdd;
>-
>-				tdd = txq->data_ring.base + txq->cmd_ring.next2fill;
>-				copy_size = rte_pktmbuf_pkt_len(txm);
>-				rte_memcpy(tdd->data, rte_pktmbuf_mtod(txm, char *), copy_size);
>-			}
>+		/* Is command ring full? */
>+		avail = vmxnet3_cmd_ring_desc_avail(&txq->cmd_ring);
>+		if (txm->nb_segs > avail) {
>+			++txq->stats.tx_ring_full;
>+			break;
>+		}
> 
>-			/* Fill the tx descriptor */
>+		/* use the previous gen bit for the SOP desc */
>+		dw2 = (txq->cmd_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT;
>+		first2fill = txq->cmd_ring.next2fill;
>+		do {
>+			/* Remember the transmit buffer for cleanup */
> 			tbi = txq->cmd_ring.buf_info + txq->cmd_ring.next2fill;
>-			tbi->bufPA = RTE_MBUF_DATA_DMA_ADDR(txm);
>-			if (copy_size)
>-				txd->addr = rte_cpu_to_le_64(txq->data_ring.basePA +
>-							txq->cmd_ring.next2fill *
>-							sizeof(struct Vmxnet3_TxDataDesc));
>-			else
>-				txd->addr = tbi->bufPA;
>-			txd->len = txm->data_len;
>+			tbi->m = m_seg;
> 
>-			/* Mark the last descriptor as End of Packet. */
>-			txd->cq = 1;
>-			txd->eop = 1;
>+			/* NB: the following assumes that VMXNET3 maximum
>+			   transmit buffer size (16K) is greater than
>+			   maximum sizeof mbuf segment size. */
>+			gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill;
>+			gdesc->txd.addr = RTE_MBUF_DATA_DMA_ADDR(m_seg);
>+			gdesc->dword[2] = dw2 | m_seg->data_len;
>+			gdesc->dword[3] = 0;
> 
>-			/* Add VLAN tag if requested */
>-			if (txm->ol_flags & PKT_TX_VLAN_PKT) {
>-				txd->ti = 1;
>-				txd->tci = rte_cpu_to_le_16(txm->vlan_tci);
>-			}
>+			/* move to the next2fill descriptor */
>+			vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
> 
>-			/* Record current mbuf for freeing it later in tx complete */
>-#ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER
>-			VMXNET3_ASSERT(txm);
>-#endif
>-			tbi->m = txm;
>+			/* use the right gen for non-SOP desc */
>+			dw2 = txq->cmd_ring.gen << VMXNET3_TXD_GEN_SHIFT;
>+		} while ( (m_seg = m_seg->next) != NULL);
> 
>-			/* Set the offloading mode to default */
>-			txd->hlen = 0;
>-			txd->om = VMXNET3_OM_NONE;
>-			txd->msscof = 0;
>+		/* Update the EOP descriptor */
>+		gdesc->dword[3] |= VMXNET3_TXD_EOP | VMXNET3_TXD_CQ;
> 
>-			/* finally flip the GEN bit of the SOP desc  */
>-			txd->gen = txq->cmd_ring.gen;
>-			txq->shared->ctrl.txNumDeferred++;
>+		/* Add VLAN tag if present */
>+		gdesc = txq->cmd_ring.base + first2fill;
>+		if (txm->ol_flags & PKT_TX_VLAN_PKT) {
>+			gdesc->txd.ti = 1;
>+			gdesc->txd.tci = txm->vlan_tci;
>+		}
> 
>-			/* move to the next2fill descriptor */
>-			vmxnet3_cmd_ring_adv_next2fill(&txq->cmd_ring);
>-			nb_tx++;
>+		/* TODO: Add transmit checksum offload here */
> 
>-		} else {
>-			PMD_TX_LOG(DEBUG, "No free tx cmd desc(s)");
>-			txq->stats.drop_total += (nb_pkts - nb_tx);
>-			break;
>-		}
>+		/* flip the GEN bit on the SOP */
>+		rte_compiler_barrier();
>+		gdesc->dword[2] ^= VMXNET3_TXD_GEN;
>+
>+		txq->shared->ctrl.txNumDeferred++;
>+		nb_tx++;
> 	}
> 
> 	PMD_TX_LOG(DEBUG, "vmxnet3 txThreshold: %u",
>txq->shared->ctrl.txThreshold);
>-- 
>2.1.4
>



More information about the dev mailing list