mlx5_tx_burst_empw() falls back to legacy Tx descriptor for multi-segmented
packets without taking advantage of inlining. In many cases, the 1st
segment can be inlined and this could make device fetch only one segment
instead of two. This helps saving PCIe bandwitdth when trasmitting out
multi-segmented packets with still using the Enhanced Multi-Packet Send for
other packets.
Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
drivers/net/mlx5/mlx5_rxtx.c | 53 +++++++++++++++++++++++++++++++----
drivers/net/mlx5/mlx5_rxtx.h | 50 +++++++++++++++++++++++++++++++++
drivers/net/mlx5/mlx5_rxtx_vec.c | 27 +-----------------
drivers/net/mlx5/mlx5_rxtx_vec_neon.h | 2 +-
drivers/net/mlx5/mlx5_rxtx_vec_sse.h | 2 +-
5 files changed, 100 insertions(+), 34 deletions(-)
@@ -1304,10 +1304,10 @@ mlx5_empw_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw)
}
/**
- * DPDK callback for TX with Enhanced MPW support.
+ * TX with Enhanced MPW support.
*
- * @param dpdk_txq
- * Generic pointer to TX queue structure.
+ * @param txq
+ * Pointer to TX queue structure.
* @param[in] pkts
* Packets to transmit.
* @param pkts_n
@@ -1316,10 +1316,10 @@ mlx5_empw_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw)
* @return
* Number of packets successfully transmitted (<= pkts_n).
*/
-uint16_t
-mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
+static inline uint16_t
+txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
+ uint16_t pkts_n)
{
- struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
uint16_t elts_head = txq->elts_head;
const uint16_t elts_n = 1 << txq->elts_n;
const uint16_t elts_m = elts_n - 1;
@@ -1585,6 +1585,47 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
}
/**
+ * DPDK callback for TX with Enhanced MPW support.
+ *
+ * @param dpdk_txq
+ * Generic pointer to TX queue structure.
+ * @param[in] pkts
+ * Packets to transmit.
+ * @param pkts_n
+ * Number of packets in array.
+ *
+ * @return
+ * Number of packets successfully transmitted (<= pkts_n).
+ */
+uint16_t
+mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
+{
+ struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
+ uint16_t nb_tx = 0;
+
+ while (pkts_n > nb_tx) {
+ uint16_t n;
+ uint16_t ret;
+
+ n = txq_count_contig_multi_seg(&pkts[nb_tx], pkts_n - nb_tx);
+ if (n) {
+ ret = mlx5_tx_burst(dpdk_txq, &pkts[nb_tx], n);
+ if (!ret)
+ break;
+ nb_tx += ret;
+ }
+ n = txq_count_contig_single_seg(&pkts[nb_tx], pkts_n - nb_tx);
+ if (n) {
+ ret = txq_burst_empw(txq, &pkts[nb_tx], n);
+ if (!ret)
+ break;
+ nb_tx += ret;
+ }
+ }
+ return nb_tx;
+}
+
+/**
* Translate RX completion flags to packet type.
*
* @param[in] cqe
@@ -652,4 +652,54 @@ txq_ol_cksum_to_cs(struct mlx5_txq_data *txq_data, struct rte_mbuf *buf)
return cs_flags;
}
+/**
+ * Count the number of contiguous single segment packets.
+ *
+ * @param pkts
+ * Pointer to array of packets.
+ * @param pkts_n
+ * Number of packets.
+ *
+ * @return
+ * Number of contiguous single segment packets.
+ */
+static __rte_always_inline unsigned int
+txq_count_contig_single_seg(struct rte_mbuf **pkts, uint16_t pkts_n)
+{
+ unsigned int pos;
+
+ if (!pkts_n)
+ return 0;
+ /* Count the number of contiguous single segment packets. */
+ for (pos = 0; pos < pkts_n; ++pos)
+ if (NB_SEGS(pkts[pos]) > 1)
+ break;
+ return pos;
+}
+
+/**
+ * Count the number of contiguous multi-segment packets.
+ *
+ * @param pkts
+ * Pointer to array of packets.
+ * @param pkts_n
+ * Number of packets.
+ *
+ * @return
+ * Number of contiguous multi-segment packets.
+ */
+static __rte_always_inline unsigned int
+txq_count_contig_multi_seg(struct rte_mbuf **pkts, uint16_t pkts_n)
+{
+ unsigned int pos;
+
+ if (!pkts_n)
+ return 0;
+ /* Count the number of contiguous multi-segment packets. */
+ for (pos = 0; pos < pkts_n; ++pos)
+ if (NB_SEGS(pkts[pos]) == 1)
+ break;
+ return pos;
+}
+
#endif /* RTE_PMD_MLX5_RXTX_H_ */
@@ -68,31 +68,6 @@
#endif
/**
- * Count the number of continuous single segment packets.
- *
- * @param pkts
- * Pointer to array of packets.
- * @param pkts_n
- * Number of packets.
- *
- * @return
- * Number of continuous single segment packets.
- */
-static inline unsigned int
-txq_check_multiseg(struct rte_mbuf **pkts, uint16_t pkts_n)
-{
- unsigned int pos;
-
- if (!pkts_n)
- return 0;
- /* Count the number of continuous single segment packets. */
- for (pos = 0; pos < pkts_n; ++pos)
- if (NB_SEGS(pkts[pos]) > 1)
- break;
- return pos;
-}
-
-/**
* Count the number of packets having same ol_flags and calculate cs_flags.
*
* @param txq
@@ -192,7 +167,7 @@ mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
pkts_n - nb_tx);
n = RTE_MIN((uint16_t)(pkts_n - nb_tx), MLX5_VPMD_TX_MAX_BURST);
if (!(txq->flags & ETH_TXQ_FLAGS_NOMULTSEGS))
- n = txq_check_multiseg(&pkts[nb_tx], n);
+ n = txq_count_contig_single_seg(&pkts[nb_tx], n);
if (!(txq->flags & ETH_TXQ_FLAGS_NOOFFLOADS))
n = txq_calc_offload(txq, &pkts[nb_tx], n, &cs_flags);
ret = txq_burst_v(txq, &pkts[nb_tx], n, cs_flags);
@@ -222,7 +222,7 @@ txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
* Send burst of packets with Enhanced MPW. If it encounters a multi-seg packet,
* it returns to make it processed by txq_scatter_v(). All the packets in
* the pkts list should be single segment packets having same offload flags.
- * This must be checked by txq_check_multiseg() and txq_calc_offload().
+ * This must be checked by txq_count_contig_single_seg() and txq_calc_offload().
*
* @param txq
* Pointer to TX queue structure.
@@ -223,7 +223,7 @@ txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
* Send burst of packets with Enhanced MPW. If it encounters a multi-seg packet,
* it returns to make it processed by txq_scatter_v(). All the packets in
* the pkts list should be single segment packets having same offload flags.
- * This must be checked by txq_check_multiseg() and txq_calc_offload().
+ * This must be checked by txq_count_contig_single_seg() and txq_calc_offload().
*
* @param txq
* Pointer to TX queue structure.