[dpdk-dev] [PATCH v2 01/13] net/cnxk: add multi seg Rx vector routine
pbhagavatula at marvell.com
pbhagavatula at marvell.com
Sat Jun 19 13:01:41 CEST 2021
From: Pavan Nikhilesh <pbhagavatula at marvell.com>
Add multi-segment Rx vector routine, form the primary mbufs using
vector path switch to scalar path when extracting segments.
Signed-off-by: Pavan Nikhilesh <pbhagavatula at marvell.com>
---
Depends-on: http://patches.dpdk.org/project/dpdk/list/?series=17394
drivers/net/cnxk/cn10k_rx.c | 31 +++++++++++------
drivers/net/cnxk/cn10k_rx.h | 51 +++++++++++++++++++++-------
drivers/net/cnxk/cn10k_rx_vec_mseg.c | 17 ++++++++++
drivers/net/cnxk/cn9k_rx.c | 31 +++++++++++------
drivers/net/cnxk/cn9k_rx.h | 51 +++++++++++++++++++++-------
drivers/net/cnxk/cn9k_rx_vec_mseg.c | 18 ++++++++++
drivers/net/cnxk/meson.build | 2 ++
7 files changed, 157 insertions(+), 44 deletions(-)
create mode 100644 drivers/net/cnxk/cn10k_rx_vec_mseg.c
create mode 100644 drivers/net/cnxk/cn9k_rx_vec_mseg.c
diff --git a/drivers/net/cnxk/cn10k_rx.c b/drivers/net/cnxk/cn10k_rx.c
index 5c956c06b4..3a9fd71309 100644
--- a/drivers/net/cnxk/cn10k_rx.c
+++ b/drivers/net/cnxk/cn10k_rx.c
@@ -29,6 +29,8 @@ pick_rx_func(struct rte_eth_dev *eth_dev,
[!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_CHECKSUM_F)]
[!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_PTYPE_F)]
[!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_RSS_F)];
+
+ rte_atomic_thread_fence(__ATOMIC_RELEASE);
}
void
@@ -60,20 +62,29 @@ cn10k_eth_set_rx_function(struct rte_eth_dev *eth_dev)
#undef R
};
- /* For PTP enabled, scalar rx function should be chosen as most of the
- * PTP apps are implemented to rx burst 1 pkt.
- */
- if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP)
- pick_rx_func(eth_dev, nix_eth_rx_burst);
- else
- pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
+ const eth_rx_burst_t nix_eth_rx_vec_burst_mseg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_nix_recv_pkts_vec_mseg_##name,
- if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
- pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
/* Copy multi seg version with no offload for tear down sequence */
if (rte_eal_process_type() == RTE_PROC_PRIMARY)
dev->rx_pkt_burst_no_offload =
nix_eth_rx_burst_mseg[0][0][0][0][0][0];
- rte_mb();
+
+ /* For PTP enabled, scalar rx function should be chosen as most of the
+ * PTP apps are implemented to rx burst 1 pkt.
+ */
+ if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) {
+ if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
+ return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
+ return pick_rx_func(eth_dev, nix_eth_rx_burst);
+ }
+
+ if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
+ return pick_rx_func(eth_dev, nix_eth_rx_vec_burst_mseg);
+ return pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
}
diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h
index 1cc37cbaa0..5926ff7f46 100644
--- a/drivers/net/cnxk/cn10k_rx.h
+++ b/drivers/net/cnxk/cn10k_rx.h
@@ -119,8 +119,15 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf,
sg = *(const uint64_t *)(rx + 1);
nb_segs = (sg >> 48) & 0x3;
- mbuf->nb_segs = nb_segs;
+
+ if (nb_segs == 1) {
+ mbuf->next = NULL;
+ return;
+ }
+
+ mbuf->pkt_len = rx->pkt_lenm1 + 1;
mbuf->data_len = sg & 0xFFFF;
+ mbuf->nb_segs = nb_segs;
sg = sg >> 16;
eol = ((const rte_iova_t *)(rx + 1) + ((rx->desc_sizem1 + 1) << 1));
@@ -195,15 +202,14 @@ cn10k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag,
ol_flags = nix_update_match_id(rx->match_id, ol_flags, mbuf);
mbuf->ol_flags = ol_flags;
- *(uint64_t *)(&mbuf->rearm_data) = val;
mbuf->pkt_len = len;
+ mbuf->data_len = len;
+ *(uint64_t *)(&mbuf->rearm_data) = val;
- if (flag & NIX_RX_MULTI_SEG_F) {
+ if (flag & NIX_RX_MULTI_SEG_F)
nix_cqe_xtract_mseg(rx, mbuf, val);
- } else {
- mbuf->data_len = len;
+ else
mbuf->next = NULL;
- }
}
static inline uint16_t
@@ -481,16 +487,34 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2);
vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3);
- /* Update that no more segments */
- mbuf0->next = NULL;
- mbuf1->next = NULL;
- mbuf2->next = NULL;
- mbuf3->next = NULL;
-
/* Store the mbufs to rx_pkts */
vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01);
vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23);
+ if (flags & NIX_RX_MULTI_SEG_F) {
+ /* Multi segment is enable build mseg list for
+ * individual mbufs in scalar mode.
+ */
+ nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+ (cq0 + CQE_SZ(0) + 8), mbuf0,
+ mbuf_initializer);
+ nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+ (cq0 + CQE_SZ(1) + 8), mbuf1,
+ mbuf_initializer);
+ nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+ (cq0 + CQE_SZ(2) + 8), mbuf2,
+ mbuf_initializer);
+ nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+ (cq0 + CQE_SZ(3) + 8), mbuf3,
+ mbuf_initializer);
+ } else {
+ /* Update that no more segments */
+ mbuf0->next = NULL;
+ mbuf1->next = NULL;
+ mbuf2->next = NULL;
+ mbuf3->next = NULL;
+ }
+
/* Prefetch mbufs */
roc_prefetch_store_keep(mbuf0);
roc_prefetch_store_keep(mbuf1);
@@ -645,6 +669,9 @@ R(vlan_ts_mark_cksum_ptype_rss, 1, 1, 1, 1, 1, 1, \
void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \
\
uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_##name( \
+ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \
+ \
+ uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \
void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);
NIX_RX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn10k_rx_vec_mseg.c b/drivers/net/cnxk/cn10k_rx_vec_mseg.c
new file mode 100644
index 0000000000..04d1e46c82
--- /dev/null
+++ b/drivers/net/cnxk/cn10k_rx_vec_mseg.c
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_ethdev.h"
+#include "cn10k_rx.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \
+ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \
+ { \
+ return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \
+ (flags) | NIX_RX_MULTI_SEG_F); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/net/cnxk/cn9k_rx.c b/drivers/net/cnxk/cn9k_rx.c
index 0acedd0a1f..d293d4eac3 100644
--- a/drivers/net/cnxk/cn9k_rx.c
+++ b/drivers/net/cnxk/cn9k_rx.c
@@ -29,6 +29,8 @@ pick_rx_func(struct rte_eth_dev *eth_dev,
[!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_CHECKSUM_F)]
[!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_PTYPE_F)]
[!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_RSS_F)];
+
+ rte_atomic_thread_fence(__ATOMIC_RELEASE);
}
void
@@ -60,20 +62,29 @@ cn9k_eth_set_rx_function(struct rte_eth_dev *eth_dev)
#undef R
};
- /* For PTP enabled, scalar rx function should be chosen as most of the
- * PTP apps are implemented to rx burst 1 pkt.
- */
- if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP)
- pick_rx_func(eth_dev, nix_eth_rx_burst);
- else
- pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
+ const eth_rx_burst_t nix_eth_rx_vec_burst_mseg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_nix_recv_pkts_vec_mseg_##name,
- if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
- pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
/* Copy multi seg version with no offload for tear down sequence */
if (rte_eal_process_type() == RTE_PROC_PRIMARY)
dev->rx_pkt_burst_no_offload =
nix_eth_rx_burst_mseg[0][0][0][0][0][0];
- rte_mb();
+
+ /* For PTP enabled, scalar rx function should be chosen as most of the
+ * PTP apps are implemented to rx burst 1 pkt.
+ */
+ if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) {
+ if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
+ return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
+ return pick_rx_func(eth_dev, nix_eth_rx_burst);
+ }
+
+ if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
+ return pick_rx_func(eth_dev, nix_eth_rx_vec_burst_mseg);
+ return pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
}
diff --git a/drivers/net/cnxk/cn9k_rx.h b/drivers/net/cnxk/cn9k_rx.h
index 10ef5c6905..5ae9e8195c 100644
--- a/drivers/net/cnxk/cn9k_rx.h
+++ b/drivers/net/cnxk/cn9k_rx.h
@@ -120,8 +120,15 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf,
sg = *(const uint64_t *)(rx + 1);
nb_segs = (sg >> 48) & 0x3;
- mbuf->nb_segs = nb_segs;
+
+ if (nb_segs == 1) {
+ mbuf->next = NULL;
+ return;
+ }
+
+ mbuf->pkt_len = rx->pkt_lenm1 + 1;
mbuf->data_len = sg & 0xFFFF;
+ mbuf->nb_segs = nb_segs;
sg = sg >> 16;
eol = ((const rte_iova_t *)(rx + 1) +
@@ -198,15 +205,14 @@ cn9k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag,
nix_update_match_id(rx->cn9k.match_id, ol_flags, mbuf);
mbuf->ol_flags = ol_flags;
- *(uint64_t *)(&mbuf->rearm_data) = val;
mbuf->pkt_len = len;
+ mbuf->data_len = len;
+ *(uint64_t *)(&mbuf->rearm_data) = val;
- if (flag & NIX_RX_MULTI_SEG_F) {
+ if (flag & NIX_RX_MULTI_SEG_F)
nix_cqe_xtract_mseg(rx, mbuf, val);
- } else {
- mbuf->data_len = len;
+ else
mbuf->next = NULL;
- }
}
static inline uint16_t
@@ -484,16 +490,34 @@ cn9k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2);
vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3);
- /* Update that no more segments */
- mbuf0->next = NULL;
- mbuf1->next = NULL;
- mbuf2->next = NULL;
- mbuf3->next = NULL;
-
/* Store the mbufs to rx_pkts */
vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01);
vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23);
+ if (flags & NIX_RX_MULTI_SEG_F) {
+ /* Multi segment is enable build mseg list for
+ * individual mbufs in scalar mode.
+ */
+ nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+ (cq0 + CQE_SZ(0) + 8), mbuf0,
+ mbuf_initializer);
+ nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+ (cq0 + CQE_SZ(1) + 8), mbuf1,
+ mbuf_initializer);
+ nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+ (cq0 + CQE_SZ(2) + 8), mbuf2,
+ mbuf_initializer);
+ nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+ (cq0 + CQE_SZ(3) + 8), mbuf3,
+ mbuf_initializer);
+ } else {
+ /* Update that no more segments */
+ mbuf0->next = NULL;
+ mbuf1->next = NULL;
+ mbuf2->next = NULL;
+ mbuf3->next = NULL;
+ }
+
/* Prefetch mbufs */
roc_prefetch_store_keep(mbuf0);
roc_prefetch_store_keep(mbuf1);
@@ -647,6 +671,9 @@ R(vlan_ts_mark_cksum_ptype_rss, 1, 1, 1, 1, 1, 1, \
void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \
\
uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_##name( \
+ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \
+ \
+ uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_mseg_##name( \
void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);
NIX_RX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn9k_rx_vec_mseg.c b/drivers/net/cnxk/cn9k_rx_vec_mseg.c
new file mode 100644
index 0000000000..e46d8a4749
--- /dev/null
+++ b/drivers/net/cnxk/cn9k_rx_vec_mseg.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_ethdev.h"
+#include "cn9k_rx.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_mseg_##name( \
+ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \
+ { \
+ return cn9k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \
+ (flags) | \
+ NIX_RX_MULTI_SEG_F); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/net/cnxk/meson.build b/drivers/net/cnxk/meson.build
index 2071d0dcb2..aa8c7253fb 100644
--- a/drivers/net/cnxk/meson.build
+++ b/drivers/net/cnxk/meson.build
@@ -23,6 +23,7 @@ sources += files('cn9k_ethdev.c',
'cn9k_rx.c',
'cn9k_rx_mseg.c',
'cn9k_rx_vec.c',
+ 'cn9k_rx_vec_mseg.c',
'cn9k_tx.c',
'cn9k_tx_mseg.c',
'cn9k_tx_vec.c')
@@ -32,6 +33,7 @@ sources += files('cn10k_ethdev.c',
'cn10k_rx.c',
'cn10k_rx_mseg.c',
'cn10k_rx_vec.c',
+ 'cn10k_rx_vec_mseg.c',
'cn10k_tx.c',
'cn10k_tx_mseg.c',
'cn10k_tx_vec.c')
--
2.17.1
More information about the dev
mailing list