[dpdk-dev] [PATCH] net/thunderx: reduce writes to mbuf

Jerin Jacob jerin.jacob at caviumnetworks.com
Fri Apr 14 11:41:07 CEST 2017


With the mbuf rework, we now have 8 contiguous bytes to be
rearmed in the mbuf at 8B naturally aligned address.
Use single 8B write to avoid multiple 2B writes in Rx path.

Signed-off-by: Jerin Jacob <jerin.jacob at caviumnetworks.com>
Signed-off-by: Maciej Czekaj <maciej.czekaj at caviumnetworks.com>
---
 drivers/net/thunderx/nicvf_ethdev.c | 18 ++++++++++++++++++
 drivers/net/thunderx/nicvf_rxtx.c   | 28 ++++++++++------------------
 drivers/net/thunderx/nicvf_rxtx.h   | 27 +++++++++++++++++++++++++++
 drivers/net/thunderx/nicvf_struct.h | 11 +++++++++++
 4 files changed, 66 insertions(+), 18 deletions(-)

diff --git a/drivers/net/thunderx/nicvf_ethdev.c b/drivers/net/thunderx/nicvf_ethdev.c
index 6c3670aee..76a2ae649 100644
--- a/drivers/net/thunderx/nicvf_ethdev.c
+++ b/drivers/net/thunderx/nicvf_ethdev.c
@@ -1231,6 +1231,23 @@ nicvf_dev_tx_queue_stop(struct rte_eth_dev *dev, uint16_t qidx)
 	return nicvf_vf_stop_tx_queue(dev, nic, qidx);
 }
 
+static inline void
+nicvf_rxq_mbuf_setup(struct nicvf_rxq *rxq)
+{
+	uintptr_t p;
+	struct rte_mbuf mb_def;
+
+	RTE_BUILD_BUG_ON(sizeof(union mbuf_initializer) != 8);
+	mb_def.nb_segs = 1;
+	mb_def.data_off = RTE_PKTMBUF_HEADROOM;
+	mb_def.port = rxq->port_id;
+	rte_mbuf_refcnt_set(&mb_def, 1);
+
+	/* Prevent compiler reordering: rearm_data covers previous fields */
+	rte_compiler_barrier();
+	p = (uintptr_t)&mb_def.rearm_data;
+	rxq->mbuf_initializer.value = *(uint64_t *)p;
+}
 
 static int
 nicvf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t qidx,
@@ -1323,6 +1340,7 @@ nicvf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t qidx,
 	else
 		rxq->rbptr_offset = NICVF_CQE_RBPTR_WORD;
 
+	nicvf_rxq_mbuf_setup(rxq);
 
 	/* Alloc completion queue */
 	if (nicvf_qset_cq_alloc(dev, nic, rxq, rxq->queue_id, nb_desc)) {
diff --git a/drivers/net/thunderx/nicvf_rxtx.c b/drivers/net/thunderx/nicvf_rxtx.c
index fc43b747a..003ab0693 100644
--- a/drivers/net/thunderx/nicvf_rxtx.c
+++ b/drivers/net/thunderx/nicvf_rxtx.c
@@ -430,9 +430,9 @@ nicvf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 	union cq_entry_t *desc = rxq->desc;
 	const uint64_t cqe_mask = rxq->qlen_mask;
 	uint64_t rb0_ptr, mbuf_phys_off = rxq->mbuf_phys_off;
+	const uint64_t mbuf_init = rxq->mbuf_initializer.value;
 	uint32_t cqe_head = rxq->head & cqe_mask;
 	int32_t available_space = rxq->available_space;
-	uint8_t port_id = rxq->port_id;
 	const uint8_t rbptr_offset = rxq->rbptr_offset;
 
 	to_process = nicvf_rx_pkts_to_process(rxq, nb_pkts, available_space);
@@ -448,17 +448,12 @@ nicvf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		rb0_ptr = *((uint64_t *)cqe_rx + rbptr_offset);
 		pkt = (struct rte_mbuf *)nicvf_mbuff_phy2virt
 				(rb0_ptr - cqe_rx_w1.align_pad, mbuf_phys_off);
-
 		pkt->ol_flags = 0;
-		pkt->port = port_id;
 		pkt->data_len = cqe_rx_w3.rb0_sz;
-		pkt->data_off = RTE_PKTMBUF_HEADROOM + cqe_rx_w1.align_pad;
-		pkt->nb_segs = 1;
 		pkt->pkt_len = cqe_rx_w3.rb0_sz;
 		pkt->packet_type = nicvf_rx_classify_pkt(cqe_rx_w0);
-
+		nicvf_mbuff_init_update(pkt, mbuf_init, cqe_rx_w1.align_pad);
 		nicvf_rx_offload(cqe_rx_w0, cqe_rx_w2, pkt);
-		rte_mbuf_refcnt_set(pkt, 1);
 		rx_pkts[i] = pkt;
 		cqe_head = (cqe_head + 1) & cqe_mask;
 		nicvf_prefetch_store_keep(pkt);
@@ -481,8 +476,9 @@ nicvf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 
 static inline uint16_t __hot
 nicvf_process_cq_mseg_entry(struct cqe_rx_t *cqe_rx,
-			uint64_t mbuf_phys_off, uint8_t port_id,
-			struct rte_mbuf **rx_pkt, uint8_t rbptr_offset)
+			uint64_t mbuf_phys_off,
+			struct rte_mbuf **rx_pkt, uint8_t rbptr_offset,
+			uint64_t mbuf_init)
 {
 	struct rte_mbuf *pkt, *seg, *prev;
 	cqe_rx_word0_t cqe_rx_w0;
@@ -501,12 +497,10 @@ nicvf_process_cq_mseg_entry(struct cqe_rx_t *cqe_rx,
 			(rb_ptr[0] - cqe_rx_w1.align_pad, mbuf_phys_off);
 
 	pkt->ol_flags = 0;
-	pkt->port = port_id;
-	pkt->data_off = RTE_PKTMBUF_HEADROOM + cqe_rx_w1.align_pad;
-	pkt->nb_segs = nb_segs;
 	pkt->pkt_len = cqe_rx_w1.pkt_len;
 	pkt->data_len = rb_sz[nicvf_frag_num(0)];
-	rte_mbuf_refcnt_set(pkt, 1);
+	nicvf_mbuff_init_mseg_update(
+				pkt, mbuf_init, cqe_rx_w1.align_pad, nb_segs);
 	pkt->packet_type = nicvf_rx_classify_pkt(cqe_rx_w0);
 	nicvf_rx_offload(cqe_rx_w0, cqe_rx_w2, pkt);
 
@@ -518,9 +512,7 @@ nicvf_process_cq_mseg_entry(struct cqe_rx_t *cqe_rx,
 
 		prev->next = seg;
 		seg->data_len = rb_sz[nicvf_frag_num(seg_idx)];
-		seg->port = port_id;
-		seg->data_off = RTE_PKTMBUF_HEADROOM;
-		rte_mbuf_refcnt_set(seg, 1);
+		nicvf_mbuff_init_update(seg, mbuf_init, 0);
 
 		prev = seg;
 	}
@@ -541,7 +533,7 @@ nicvf_recv_pkts_multiseg(void *rx_queue, struct rte_mbuf **rx_pkts,
 	uint32_t i, to_process, cqe_head, buffers_consumed = 0;
 	int32_t available_space = rxq->available_space;
 	uint16_t nb_segs;
-	const uint8_t port_id = rxq->port_id;
+	const uint64_t mbuf_init = rxq->mbuf_initializer.value;
 	const uint8_t rbptr_offset = rxq->rbptr_offset;
 
 	cqe_head = rxq->head & cqe_mask;
@@ -552,7 +544,7 @@ nicvf_recv_pkts_multiseg(void *rx_queue, struct rte_mbuf **rx_pkts,
 		cq_entry = &desc[cqe_head];
 		cqe_rx = (struct cqe_rx_t *)cq_entry;
 		nb_segs = nicvf_process_cq_mseg_entry(cqe_rx, mbuf_phys_off,
-				port_id, rx_pkts + i, rbptr_offset);
+			rx_pkts + i, rbptr_offset, mbuf_init);
 		buffers_consumed += nb_segs;
 		cqe_head = (cqe_head + 1) & cqe_mask;
 		nicvf_prefetch_store_keep(rx_pkts[i]);
diff --git a/drivers/net/thunderx/nicvf_rxtx.h b/drivers/net/thunderx/nicvf_rxtx.h
index 9dad8a5ab..3631ff22f 100644
--- a/drivers/net/thunderx/nicvf_rxtx.h
+++ b/drivers/net/thunderx/nicvf_rxtx.h
@@ -84,6 +84,33 @@ fill_sq_desc_gather(union sq_entry_t *entry, struct rte_mbuf *pkt)
 }
 #endif
 
+static inline void
+nicvf_mbuff_init_update(struct rte_mbuf *pkt, const uint64_t mbuf_init,
+				uint16_t apad)
+{
+	union mbuf_initializer init = {.value = mbuf_init};
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+	init.fields.data_off += apad;
+#else
+	init.value += apad;
+#endif
+	*(uint64_t *)(&pkt->rearm_data) = init.value;
+}
+
+static inline void
+nicvf_mbuff_init_mseg_update(struct rte_mbuf *pkt, const uint64_t mbuf_init,
+						uint16_t apad, uint16_t nb_segs)
+{
+	union mbuf_initializer init = {.value = mbuf_init};
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+	init.fields.data_off += apad;
+#else
+	init.value += apad;
+#endif
+	init.fields.nb_segs = nb_segs;
+	*(uint64_t *)(&pkt->rearm_data) = init.value;
+}
+
 uint32_t nicvf_dev_rx_queue_count(struct rte_eth_dev *dev, uint16_t queue_idx);
 uint32_t nicvf_dev_rbdr_refill(struct rte_eth_dev *dev, uint16_t queue_idx);
 
diff --git a/drivers/net/thunderx/nicvf_struct.h b/drivers/net/thunderx/nicvf_struct.h
index 5bc6d5773..34c41b791 100644
--- a/drivers/net/thunderx/nicvf_struct.h
+++ b/drivers/net/thunderx/nicvf_struct.h
@@ -72,10 +72,21 @@ struct nicvf_txq {
 	uint16_t tx_free_thresh;
 } __rte_cache_aligned;
 
+union mbuf_initializer {
+	struct {
+		uint16_t data_off;
+		uint16_t refcnt;
+		uint16_t nb_segs;
+		uint16_t port;
+	} fields;
+	uint64_t value;
+};
+
 struct nicvf_rxq {
 	uint64_t mbuf_phys_off;
 	uintptr_t cq_status;
 	uintptr_t cq_door;
+	union mbuf_initializer mbuf_initializer;
 	nicvf_phys_addr_t phys;
 	union cq_entry_t *desc;
 	struct nicvf_rbdr *shared_rbdr;
-- 
2.12.2



More information about the dev mailing list