[dpdk-dev] [PATCH 2/2] net/af_xdp: make reserve/submit peek/release consistent

Xiaolong Ye xiaolong.ye at intel.com
Fri Apr 12 16:48:44 CEST 2019


As David pointed out, if we reserve N slots, but only submit n slots,
we would end up with an incorrect opinion of the number of available slots
later, we also would get wrong idx when we call xsk_ring_prod__reserve next
time. It also applies to xsk_ring_cons__peek()/xsk_ring_cons__release().

This patch ensures that both reserve/submit and peek/release are
consistent.

Fixes: f1debd77efaf ("net/af_xdp: introduce AF_XDP PMD")

Reported-by: David Marchand <david.marchand at redhat.com>
Signed-off-by: Xiaolong Ye <xiaolong.ye at intel.com>
---
 drivers/net/af_xdp/rte_eth_af_xdp.c | 80 +++++++++++++++--------------
 1 file changed, 41 insertions(+), 39 deletions(-)

diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
index 5cc643ce2..76a6a8331 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -138,22 +138,19 @@ reserve_fill_queue(struct xsk_umem_info *umem, int reserve_size)
 {
 	struct xsk_ring_prod *fq = &umem->fq;
 	uint32_t idx;
-	int i, ret;
-
-	ret = xsk_ring_prod__reserve(fq, reserve_size, &idx);
-	if (unlikely(!ret)) {
-		AF_XDP_LOG(ERR, "Failed to reserve enough fq descs.\n");
-		return ret;
-	}
+	int i;
 
 	for (i = 0; i < reserve_size; i++) {
 		__u64 *fq_addr;
 		void *addr = NULL;
 		if (rte_ring_dequeue(umem->buf_ring, &addr)) {
-			i--;
 			break;
 		}
-		fq_addr = xsk_ring_prod__fill_addr(fq, idx++);
+		if (unlikely(!xsk_ring_prod__reserve(fq, 1, &idx))) {
+			AF_XDP_LOG(WARNING, "Failed to reserve 1 fq desc.\n");
+			break;
+		}
+		fq_addr = xsk_ring_prod__fill_addr(fq, idx);
 		*fq_addr = (uint64_t)addr;
 	}
 
@@ -179,6 +176,9 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 	nb_pkts = RTE_MIN(nb_pkts, ETH_AF_XDP_TX_BATCH_SIZE);
 
+	if (unlikely(rte_pktmbuf_alloc_bulk(rxq->mb_pool, mbufs, nb_pkts) != 0))
+		return 0;
+
 	rcvd = xsk_ring_cons__peek(rx, nb_pkts, &idx_rx);
 	if (rcvd == 0)
 		return 0;
@@ -186,9 +186,6 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	if (xsk_prod_nb_free(fq, free_thresh) >= free_thresh)
 		(void)reserve_fill_queue(umem, ETH_AF_XDP_RX_BATCH_SIZE);
 
-	if (unlikely(rte_pktmbuf_alloc_bulk(rxq->mb_pool, mbufs, rcvd) != 0))
-		return 0;
-
 	for (i = 0; i < rcvd; i++) {
 		const struct xdp_desc *desc;
 		uint64_t addr;
@@ -211,6 +208,10 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 
 	xsk_ring_cons__release(rx, rcvd);
 
+	/* free the extra mbufs */
+	for (; rcvd < nb_pkts; rcvd++)
+		rte_pktmbuf_free(mbufs[rcvd]);
+
 	/* statistics */
 	rxq->stats.rx_pkts += (rcvd - dropped);
 	rxq->stats.rx_bytes += rx_bytes;
@@ -261,55 +262,56 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct xsk_umem_info *umem = txq->pair->umem;
 	struct rte_mbuf *mbuf;
 	void *addrs[ETH_AF_XDP_TX_BATCH_SIZE];
+	struct rte_mbuf *valid_bufs[ETH_AF_XDP_TX_BATCH_SIZE];
 	unsigned long tx_bytes = 0;
-	int i, valid = 0;
+	int i;
+	uint16_t nb_valid = 0;
 	uint32_t idx_tx;
+	uint32_t buf_len = ETH_AF_XDP_FRAME_SIZE - ETH_AF_XDP_DATA_HEADROOM;
 
 	nb_pkts = RTE_MIN(nb_pkts, ETH_AF_XDP_TX_BATCH_SIZE);
 
 	pull_umem_cq(umem, nb_pkts);
 
-	nb_pkts = rte_ring_dequeue_bulk(umem->buf_ring, addrs,
-					nb_pkts, NULL);
-	if (nb_pkts == 0)
+	for (i = 0; i < nb_pkts; i++) {
+		if (bufs[i]->pkt_len <= buf_len)
+			valid_bufs[nb_valid++] = bufs[i];
+		else
+			rte_pktmbuf_free(bufs[i]);
+	}
+
+	nb_valid = rte_ring_dequeue_bulk(umem->buf_ring, addrs,
+					nb_valid, NULL);
+	if (nb_valid == 0)
 		return 0;
 
-	if (xsk_ring_prod__reserve(&txq->tx, nb_pkts, &idx_tx) != nb_pkts) {
+	if (xsk_ring_prod__reserve(&txq->tx, nb_valid, &idx_tx) != nb_valid) {
 		kick_tx(txq);
-		rte_ring_enqueue_bulk(umem->buf_ring, addrs, nb_pkts, NULL);
+		rte_ring_enqueue_bulk(umem->buf_ring, addrs, nb_valid, NULL);
 		return 0;
 	}
 
-	for (i = 0; i < nb_pkts; i++) {
+	for (i = 0; i < nb_valid; i++) {
 		struct xdp_desc *desc;
 		void *pkt;
-		uint32_t buf_len = ETH_AF_XDP_FRAME_SIZE
-					- ETH_AF_XDP_DATA_HEADROOM;
 		desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx + i);
-		mbuf = bufs[i];
-		if (mbuf->pkt_len <= buf_len) {
-			desc->addr = (uint64_t)addrs[valid];
-			desc->len = mbuf->pkt_len;
-			pkt = xsk_umem__get_data(umem->mz->addr,
-						 desc->addr);
-			rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *),
-			       desc->len);
-			valid++;
-			tx_bytes += mbuf->pkt_len;
-		}
+		mbuf = valid_bufs[i];
+		desc->addr = (uint64_t)addrs[i];
+		desc->len = mbuf->pkt_len;
+		pkt = xsk_umem__get_data(umem->mz->addr,
+					 desc->addr);
+		rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *),
+			   desc->len);
+		tx_bytes += mbuf->pkt_len;
 		rte_pktmbuf_free(mbuf);
 	}
 
-	xsk_ring_prod__submit(&txq->tx, nb_pkts);
+	xsk_ring_prod__submit(&txq->tx, nb_valid);
 
 	kick_tx(txq);
 
-	if (valid < nb_pkts)
-		rte_ring_enqueue_bulk(umem->buf_ring, &addrs[valid],
-				 nb_pkts - valid, NULL);
-
-	txq->stats.err_pkts += nb_pkts - valid;
-	txq->stats.tx_pkts += valid;
+	txq->stats.err_pkts += nb_pkts - nb_valid;
+	txq->stats.tx_pkts += nb_valid;
 	txq->stats.tx_bytes += tx_bytes;
 
 	return nb_pkts;
-- 
2.17.1



More information about the dev mailing list