[dpdk-dev] [PATCH 1/4] cxgbe: improve latency for slow traffic

Rahul Lakkireddy rahul.lakkireddy at chelsio.com
Sat May 27 05:47:57 CEST 2017


TX coalescing waits for ETH_COALESCE_PKT_NUM packets to be coalesced
across bursts before transmitting them.  For slow traffic, such as
100 PPS, this approach increases latency since packets are received
one at a time and tx coalescing has to wait for ETH_COALESCE_PKT
number of packets to arrive before transmitting.

To fix this:

- Update rx path to use status page instead and only receive packets
  when either the ingress interrupt timer threshold (5 us) or
  the ingress interrupt packet count threshold (32 packets) fires.
  (i.e. whichever happens first).

- If number of packets coalesced is <= number of packets sent
  by tx burst function, stop coalescing and transmit these packets
  immediately.

Also added compile time option to favor throughput over latency by
default.

Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy at chelsio.com>
Signed-off-by: Kumar Sanghvi <kumaras at chelsio.com>
---
 config/common_base                      |   3 +-
 doc/guides/nics/cxgbe.rst               |   4 ++
 doc/guides/rel_notes/release_17_08.rst  |   5 ++
 drivers/net/cxgbe/base/adapter.h        |   4 +-
 drivers/net/cxgbe/base/t4_regs_values.h |   2 +-
 drivers/net/cxgbe/base/t4fw_interface.h |   8 +++
 drivers/net/cxgbe/cxgbe_compat.h        |  11 +++-
 drivers/net/cxgbe/cxgbe_ethdev.c        |   3 +-
 drivers/net/cxgbe/cxgbe_main.c          |   5 +-
 drivers/net/cxgbe/sge.c                 | 109 ++++++++++++++++----------------
 10 files changed, 92 insertions(+), 62 deletions(-)

diff --git a/config/common_base b/config/common_base
index 67ef2ec..b2a6ff6 100644
--- a/config/common_base
+++ b/config/common_base
@@ -240,7 +240,7 @@ CONFIG_RTE_LIBRTE_BNX2X_MF_SUPPORT=n
 CONFIG_RTE_LIBRTE_BNX2X_DEBUG_PERIODIC=n
 
 #
-# Compile burst-oriented Chelsio Terminator 10GbE/40GbE (CXGBE) PMD
+# Compile burst-oriented Chelsio Terminator (CXGBE) PMD
 #
 CONFIG_RTE_LIBRTE_CXGBE_PMD=y
 CONFIG_RTE_LIBRTE_CXGBE_DEBUG=n
@@ -248,6 +248,7 @@ CONFIG_RTE_LIBRTE_CXGBE_DEBUG_REG=n
 CONFIG_RTE_LIBRTE_CXGBE_DEBUG_MBOX=n
 CONFIG_RTE_LIBRTE_CXGBE_DEBUG_TX=n
 CONFIG_RTE_LIBRTE_CXGBE_DEBUG_RX=n
+CONFIG_RTE_LIBRTE_CXGBE_TPUT=y
 
 #
 # Compile burst-oriented Cisco ENIC PMD driver
diff --git a/doc/guides/nics/cxgbe.rst b/doc/guides/nics/cxgbe.rst
index 176c189..8651a7b 100644
--- a/doc/guides/nics/cxgbe.rst
+++ b/doc/guides/nics/cxgbe.rst
@@ -130,6 +130,10 @@ enabling debugging options may affect system performance.
 
   Toggle display of receiving data path run-time check messages.
 
+- ``CONFIG_RTE_LIBRTE_CXGBE_TPUT`` (default **y**)
+
+  Toggle behaviour to prefer Throughput or Latency.
+
 .. _driver-compilation:
 
 Driver compilation and testing
diff --git a/doc/guides/rel_notes/release_17_08.rst b/doc/guides/rel_notes/release_17_08.rst
index 39a3398..bd4ea2c 100644
--- a/doc/guides/rel_notes/release_17_08.rst
+++ b/doc/guides/rel_notes/release_17_08.rst
@@ -79,6 +79,11 @@ EAL
 Drivers
 ~~~~~~~
 
+* **net/cxgbe: latency and performance improvements**
+
+  TX and RX path reworked to improve performance.  Also reduced latency
+  for slow traffic.
+
 
 Libraries
 ~~~~~~~~~
diff --git a/drivers/net/cxgbe/base/adapter.h b/drivers/net/cxgbe/base/adapter.h
index cc89e49..58c6903 100644
--- a/drivers/net/cxgbe/base/adapter.h
+++ b/drivers/net/cxgbe/base/adapter.h
@@ -148,6 +148,7 @@ struct sge_rspq {                   /* state for an SGE response queue */
 
 	void __iomem *bar2_addr;    /* address of BAR2 Queue registers */
 	unsigned int bar2_qid;      /* Queue ID for BAR2 Queue registers */
+	struct sge_qstat *stat;
 
 	unsigned int cidx;          /* consumer index */
 	unsigned int gts_idx;	    /* last gts write sent */
@@ -708,7 +709,8 @@ void reclaim_completed_tx(struct sge_txq *q);
 void t4_free_sge_resources(struct adapter *adap);
 void t4_sge_tx_monitor_start(struct adapter *adap);
 void t4_sge_tx_monitor_stop(struct adapter *adap);
-int t4_eth_xmit(struct sge_eth_txq *txq, struct rte_mbuf *mbuf);
+int t4_eth_xmit(struct sge_eth_txq *txq, struct rte_mbuf *mbuf,
+		uint16_t nb_pkts);
 int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp,
 		     const struct pkt_gl *gl);
 int t4_sge_init(struct adapter *adap);
diff --git a/drivers/net/cxgbe/base/t4_regs_values.h b/drivers/net/cxgbe/base/t4_regs_values.h
index 1326594..9085ff6d 100644
--- a/drivers/net/cxgbe/base/t4_regs_values.h
+++ b/drivers/net/cxgbe/base/t4_regs_values.h
@@ -82,7 +82,7 @@
 /*
  * Ingress Context field values
  */
-#define X_UPDATEDELIVERY_INTERRUPT	1
+#define X_UPDATEDELIVERY_STATUS_PAGE	2
 
 #define X_RSPD_TYPE_FLBUF		0
 #define X_RSPD_TYPE_CPL			1
diff --git a/drivers/net/cxgbe/base/t4fw_interface.h b/drivers/net/cxgbe/base/t4fw_interface.h
index fcc61bf..6283fe9 100644
--- a/drivers/net/cxgbe/base/t4fw_interface.h
+++ b/drivers/net/cxgbe/base/t4fw_interface.h
@@ -84,6 +84,7 @@ enum fw_memtype {
 enum fw_wr_opcodes {
 	FW_ETH_TX_PKT_WR	= 0x08,
 	FW_ETH_TX_PKTS_WR	= 0x09,
+	FW_ETH_TX_PKTS2_WR      = 0x78,
 };
 
 /*
@@ -591,6 +592,13 @@ struct fw_iq_cmd {
 #define G_FW_IQ_CMD_IQESIZE(x)	\
 	(((x) >> S_FW_IQ_CMD_IQESIZE) & M_FW_IQ_CMD_IQESIZE)
 
+#define S_FW_IQ_CMD_IQRO                30
+#define M_FW_IQ_CMD_IQRO                0x1
+#define V_FW_IQ_CMD_IQRO(x)             ((x) << S_FW_IQ_CMD_IQRO)
+#define G_FW_IQ_CMD_IQRO(x)             \
+	(((x) >> S_FW_IQ_CMD_IQRO) & M_FW_IQ_CMD_IQRO)
+#define F_FW_IQ_CMD_IQRO                V_FW_IQ_CMD_IQRO(1U)
+
 #define S_FW_IQ_CMD_IQFLINTCONGEN	27
 #define M_FW_IQ_CMD_IQFLINTCONGEN	0x1
 #define V_FW_IQ_CMD_IQFLINTCONGEN(x)	((x) << S_FW_IQ_CMD_IQFLINTCONGEN)
diff --git a/drivers/net/cxgbe/cxgbe_compat.h b/drivers/net/cxgbe/cxgbe_compat.h
index 1551cbf..03bba9f 100644
--- a/drivers/net/cxgbe/cxgbe_compat.h
+++ b/drivers/net/cxgbe/cxgbe_compat.h
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2014-2015 Chelsio Communications.
+ *   Copyright(c) 2014-2017 Chelsio Communications.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -226,6 +226,15 @@ static inline int cxgbe_fls(int x)
 	return x ? sizeof(x) * 8 - __builtin_clz(x) : 0;
 }
 
+/**
+ * cxgbe_ffs - find first bit set
+ * @x: the word to search
+ */
+static inline int cxgbe_ffs(int x)
+{
+	return x ? __builtin_ffs(x) : 0;
+}
+
 static inline unsigned long ilog2(unsigned long n)
 {
 	unsigned int e = 0;
diff --git a/drivers/net/cxgbe/cxgbe_ethdev.c b/drivers/net/cxgbe/cxgbe_ethdev.c
index ac70f22..7282575 100644
--- a/drivers/net/cxgbe/cxgbe_ethdev.c
+++ b/drivers/net/cxgbe/cxgbe_ethdev.c
@@ -104,7 +104,8 @@ static uint16_t cxgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		pkts_remain = nb_pkts - total_sent;
 
 		for (pkts_sent = 0; pkts_sent < pkts_remain; pkts_sent++) {
-			ret = t4_eth_xmit(txq, tx_pkts[total_sent + pkts_sent]);
+			ret = t4_eth_xmit(txq, tx_pkts[total_sent + pkts_sent],
+					  nb_pkts);
 			if (ret < 0)
 				break;
 		}
diff --git a/drivers/net/cxgbe/cxgbe_main.c b/drivers/net/cxgbe/cxgbe_main.c
index 42238ef..2522354 100644
--- a/drivers/net/cxgbe/cxgbe_main.c
+++ b/drivers/net/cxgbe/cxgbe_main.c
@@ -301,7 +301,7 @@ void cfg_queues(struct rte_eth_dev *eth_dev)
 		for (i = 0; i < ARRAY_SIZE(s->ethrxq); i++) {
 			struct sge_eth_rxq *r = &s->ethrxq[i];
 
-			init_rspq(adap, &r->rspq, 0, 0, 1024, 64);
+			init_rspq(adap, &r->rspq, 5, 32, 1024, 64);
 			r->usembufs = 1;
 			r->fl.size = (r->usembufs ? 1024 : 72);
 		}
@@ -445,6 +445,9 @@ static int adap_init0_tweaks(struct adapter *adapter)
 			 V_CREDITCNT(M_CREDITCNT) | M_CREDITCNTPACKING,
 			 V_CREDITCNT(3) | V_CREDITCNTPACKING(1));
 
+	t4_set_reg_field(adapter, A_SGE_INGRESS_RX_THRESHOLD,
+			 V_THRESHOLD_3(M_THRESHOLD_3), V_THRESHOLD_3(32U));
+
 	t4_set_reg_field(adapter, A_SGE_CONTROL2, V_IDMAARBROUNDROBIN(1U),
 			 V_IDMAARBROUNDROBIN(1U));
 
diff --git a/drivers/net/cxgbe/sge.c b/drivers/net/cxgbe/sge.c
index 020879a..d98c3f6 100644
--- a/drivers/net/cxgbe/sge.c
+++ b/drivers/net/cxgbe/sge.c
@@ -848,7 +848,7 @@ static inline void ship_tx_pkt_coalesce_wr(struct adapter *adap,
 
 	/* fill the pkts WR header */
 	wr = (void *)&q->desc[q->pidx];
-	wr->op_pkd = htonl(V_FW_WR_OP(FW_ETH_TX_PKTS_WR));
+	wr->op_pkd = htonl(V_FW_WR_OP(FW_ETH_TX_PKTS2_WR));
 
 	wr_mid = V_FW_WR_LEN16(DIV_ROUND_UP(q->coalesce.flits, 2));
 	ndesc = flits_to_desc(q->coalesce.flits);
@@ -971,7 +971,7 @@ static inline int tx_do_packet_coalesce(struct sge_eth_txq *txq,
 					struct rte_mbuf *mbuf,
 					int flits, struct adapter *adap,
 					const struct port_info *pi,
-					dma_addr_t *addr)
+					dma_addr_t *addr, uint16_t nb_pkts)
 {
 	u64 cntrl, *end;
 	struct sge_txq *q = &txq->q;
@@ -981,6 +981,10 @@ static inline int tx_do_packet_coalesce(struct sge_eth_txq *txq,
 	struct tx_sw_desc *sd;
 	unsigned int idx = q->coalesce.idx, len = mbuf->pkt_len;
 
+#ifdef RTE_LIBRTE_CXGBE_TPUT
+	RTE_SET_USED(nb_pkts);
+#endif
+
 	if (q->coalesce.type == 0) {
 		mc = (struct ulp_txpkt *)q->coalesce.ptr;
 		mc->cmd_dest = htonl(V_ULPTX_CMD(4) | V_ULP_TXPKT_DEST(0) |
@@ -1050,7 +1054,11 @@ static inline int tx_do_packet_coalesce(struct sge_eth_txq *txq,
 	sd->coalesce.idx = (idx & 1) + 1;
 
 	/* send the coaelsced work request if max reached */
-	if (++q->coalesce.idx == ETH_COALESCE_PKT_NUM)
+	if (++q->coalesce.idx == ETH_COALESCE_PKT_NUM
+#ifndef RTE_LIBRTE_CXGBE_TPUT
+	    || q->coalesce.idx >= nb_pkts
+#endif
+	    )
 		ship_tx_pkt_coalesce_wr(adap, txq);
 	return 0;
 }
@@ -1062,7 +1070,8 @@ static inline int tx_do_packet_coalesce(struct sge_eth_txq *txq,
  *
  * Add a packet to an SGE Ethernet Tx queue.  Runs with softirqs disabled.
  */
-int t4_eth_xmit(struct sge_eth_txq *txq, struct rte_mbuf *mbuf)
+int t4_eth_xmit(struct sge_eth_txq *txq, struct rte_mbuf *mbuf,
+		uint16_t nb_pkts)
 {
 	const struct port_info *pi;
 	struct cpl_tx_pkt_lso_core *lso;
@@ -1116,7 +1125,7 @@ int t4_eth_xmit(struct sge_eth_txq *txq, struct rte_mbuf *mbuf)
 			}
 			rte_prefetch0((volatile void *)addr);
 			return tx_do_packet_coalesce(txq, mbuf, cflits, adap,
-						     pi, addr);
+						     pi, addr, nb_pkts);
 		} else {
 			return -EBUSY;
 		}
@@ -1398,20 +1407,6 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp,
 	return 0;
 }
 
-/**
- * is_new_response - check if a response is newly written
- * @r: the response descriptor
- * @q: the response queue
- *
- * Returns true if a response descriptor contains a yet unprocessed
- * response.
- */
-static inline bool is_new_response(const struct rsp_ctrl *r,
-				   const struct sge_rspq *q)
-{
-	return (r->u.type_gen >> S_RSPD_GEN) == q->gen;
-}
-
 #define CXGB4_MSG_AN ((void *)1)
 
 /**
@@ -1453,12 +1448,12 @@ static int process_responses(struct sge_rspq *q, int budget,
 	struct sge_eth_rxq *rxq = container_of(q, struct sge_eth_rxq, rspq);
 
 	while (likely(budget_left)) {
+		if (q->cidx == ntohs(q->stat->pidx))
+			break;
+
 		rc = (const struct rsp_ctrl *)
 		     ((const char *)q->cur_desc + (q->iqe_len - sizeof(*rc)));
 
-		if (!is_new_response(rc, q))
-			break;
-
 		/*
 		 * Ensure response has been read
 		 */
@@ -1548,35 +1543,6 @@ static int process_responses(struct sge_rspq *q, int budget,
 
 		rspq_next(q);
 		budget_left--;
-
-		if (R_IDXDIFF(q, gts_idx) >= 64) {
-			unsigned int cidx_inc = R_IDXDIFF(q, gts_idx);
-			unsigned int params;
-			u32 val;
-
-			if (fl_cap(&rxq->fl) - rxq->fl.avail >= 64)
-				__refill_fl(q->adapter, &rxq->fl);
-			params = V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX);
-			q->next_intr_params = params;
-			val = V_CIDXINC(cidx_inc) | V_SEINTARM(params);
-
-			if (unlikely(!q->bar2_addr))
-				t4_write_reg(q->adapter, MYPF_REG(A_SGE_PF_GTS),
-					     val |
-					     V_INGRESSQID((u32)q->cntxt_id));
-			else {
-				writel(val | V_INGRESSQID(q->bar2_qid),
-				       (void *)((uintptr_t)q->bar2_addr +
-				       SGE_UDB_GTS));
-				/*
-				 * This Write memory Barrier will force the
-				 * write to the User Doorbell area to be
-				 * flushed.
-				 */
-				wmb();
-			}
-			q->gts_idx = q->cidx;
-		}
 	}
 
 	/*
@@ -1594,10 +1560,38 @@ static int process_responses(struct sge_rspq *q, int budget,
 int cxgbe_poll(struct sge_rspq *q, struct rte_mbuf **rx_pkts,
 	       unsigned int budget, unsigned int *work_done)
 {
-	int err = 0;
+	struct sge_eth_rxq *rxq = container_of(q, struct sge_eth_rxq, rspq);
+	unsigned int cidx_inc;
+	unsigned int params;
+	u32 val;
 
 	*work_done = process_responses(q, budget, rx_pkts);
-	return err;
+
+	if (*work_done) {
+		cidx_inc = R_IDXDIFF(q, gts_idx);
+
+		if (q->offset >= 0 && fl_cap(&rxq->fl) - rxq->fl.avail >= 64)
+			__refill_fl(q->adapter, &rxq->fl);
+
+		params = q->intr_params;
+		q->next_intr_params = params;
+		val = V_CIDXINC(cidx_inc) | V_SEINTARM(params);
+
+		if (unlikely(!q->bar2_addr)) {
+			t4_write_reg(q->adapter, MYPF_REG(A_SGE_PF_GTS),
+				     val | V_INGRESSQID((u32)q->cntxt_id));
+		} else {
+			writel(val | V_INGRESSQID(q->bar2_qid),
+			       (void *)((uintptr_t)q->bar2_addr + SGE_UDB_GTS));
+			/* This Write memory Barrier will force the
+			 * write to the User Doorbell area to be
+			 * flushed.
+			 */
+			wmb();
+		}
+		q->gts_idx = q->cidx;
+	}
+	return 0;
 }
 
 /**
@@ -1687,18 +1681,20 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
 		      V_FW_IQ_CMD_IQASYNCH(fwevtq) |
 		      V_FW_IQ_CMD_VIID(pi->viid) |
 		      V_FW_IQ_CMD_IQANDST(intr_idx < 0) |
-		      V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT) |
+		      V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_STATUS_PAGE) |
 		      V_FW_IQ_CMD_IQANDSTINDEX(intr_idx >= 0 ? intr_idx :
 							       -intr_idx - 1));
 	c.iqdroprss_to_iqesize =
-		htons(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) |
+		htons(V_FW_IQ_CMD_IQPCIECH(cong > 0 ? cxgbe_ffs(cong) - 1 :
+						      pi->tx_chan) |
 		      F_FW_IQ_CMD_IQGTSMODE |
 		      V_FW_IQ_CMD_IQINTCNTTHRESH(iq->pktcnt_idx) |
 		      V_FW_IQ_CMD_IQESIZE(ilog2(iq->iqe_len) - 4));
 	c.iqsize = htons(iq->size);
 	c.iqaddr = cpu_to_be64(iq->phys_addr);
 	if (cong >= 0)
-		c.iqns_to_fl0congen = htonl(F_FW_IQ_CMD_IQFLINTCONGEN);
+		c.iqns_to_fl0congen = htonl(F_FW_IQ_CMD_IQFLINTCONGEN |
+					    F_FW_IQ_CMD_IQRO);
 
 	if (fl) {
 		struct sge_eth_rxq *rxq = container_of(fl, struct sge_eth_rxq,
@@ -1773,6 +1769,7 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
 	iq->bar2_addr = bar2_address(adap, iq->cntxt_id, T4_BAR2_QTYPE_INGRESS,
 				     &iq->bar2_qid);
 	iq->size--;                           /* subtract status entry */
+	iq->stat = (void *)&iq->desc[iq->size * 8];
 	iq->eth_dev = eth_dev;
 	iq->handler = hnd;
 	iq->port_id = pi->port_id;
-- 
2.5.3



More information about the dev mailing list