[dpdk-dev] [PATCH v5 1/6] net/cnxk: add multi seg Rx vector routine

Jerin Jacob jerinjacobk at gmail.com
Tue Jun 29 18:20:56 CEST 2021


On Tue, Jun 29, 2021 at 1:14 PM <pbhagavatula at marvell.com> wrote:
>
> From: Pavan Nikhilesh <pbhagavatula at marvell.com>
>
> Add multi-segment Rx vector routine, form the primary mbufs using
> vector path switch to scalar path when extracting segments.
>
> Signed-off-by: Pavan Nikhilesh <pbhagavatula at marvell.com>
> Series-acked-by:  Nithin Dabilpuram <ndabilpuram at marvell.com>


Series applied to dpdk-next-net-mrvl/for-dpdk-main. Thanks.


> ---
>  v5 Changes:
>  - Fix incorrect mbuf assignment.
>  v4 Changes:
>  - Split patches for easier merge.
>  - Rebase on dpdk-next-net-mrvl.
>  v3 Changes:
>  - Spell check.
>
>  drivers/net/cnxk/cn10k_rx.c          | 31 +++++++++++------
>  drivers/net/cnxk/cn10k_rx.h          | 51 +++++++++++++++++++++-------
>  drivers/net/cnxk/cn10k_rx_vec_mseg.c | 17 ++++++++++
>  drivers/net/cnxk/cn9k_rx.c           | 31 +++++++++++------
>  drivers/net/cnxk/cn9k_rx.h           | 51 +++++++++++++++++++++-------
>  drivers/net/cnxk/cn9k_rx_vec_mseg.c  | 18 ++++++++++
>  drivers/net/cnxk/meson.build         |  2 ++
>  7 files changed, 157 insertions(+), 44 deletions(-)
>  create mode 100644 drivers/net/cnxk/cn10k_rx_vec_mseg.c
>  create mode 100644 drivers/net/cnxk/cn9k_rx_vec_mseg.c
>
> diff --git a/drivers/net/cnxk/cn10k_rx.c b/drivers/net/cnxk/cn10k_rx.c
> index 5c956c06b..3a9fd7130 100644
> --- a/drivers/net/cnxk/cn10k_rx.c
> +++ b/drivers/net/cnxk/cn10k_rx.c
> @@ -29,6 +29,8 @@ pick_rx_func(struct rte_eth_dev *eth_dev,
>                 [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_CHECKSUM_F)]
>                 [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_PTYPE_F)]
>                 [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_RSS_F)];
> +
> +       rte_atomic_thread_fence(__ATOMIC_RELEASE);
>  }
>
>  void
> @@ -60,20 +62,29 @@ cn10k_eth_set_rx_function(struct rte_eth_dev *eth_dev)
>  #undef R
>         };
>
> -       /* For PTP enabled, scalar rx function should be chosen as most of the
> -        * PTP apps are implemented to rx burst 1 pkt.
> -        */
> -       if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP)
> -               pick_rx_func(eth_dev, nix_eth_rx_burst);
> -       else
> -               pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
> +       const eth_rx_burst_t nix_eth_rx_vec_burst_mseg[2][2][2][2][2][2] = {
> +#define R(name, f5, f4, f3, f2, f1, f0, flags)                                 \
> +       [f5][f4][f3][f2][f1][f0] = cn10k_nix_recv_pkts_vec_mseg_##name,
>
> -       if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
> -               pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
> +               NIX_RX_FASTPATH_MODES
> +#undef R
> +       };
>
>         /* Copy multi seg version with no offload for tear down sequence */
>         if (rte_eal_process_type() == RTE_PROC_PRIMARY)
>                 dev->rx_pkt_burst_no_offload =
>                         nix_eth_rx_burst_mseg[0][0][0][0][0][0];
> -       rte_mb();
> +
> +       /* For PTP enabled, scalar rx function should be chosen as most of the
> +        * PTP apps are implemented to rx burst 1 pkt.
> +        */
> +       if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) {
> +               if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
> +                       return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
> +               return pick_rx_func(eth_dev, nix_eth_rx_burst);
> +       }
> +
> +       if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
> +               return pick_rx_func(eth_dev, nix_eth_rx_vec_burst_mseg);
> +       return pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
>  }
> diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h
> index 1cc37cbaa..5926ff7f4 100644
> --- a/drivers/net/cnxk/cn10k_rx.h
> +++ b/drivers/net/cnxk/cn10k_rx.h
> @@ -119,8 +119,15 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf,
>
>         sg = *(const uint64_t *)(rx + 1);
>         nb_segs = (sg >> 48) & 0x3;
> -       mbuf->nb_segs = nb_segs;
> +
> +       if (nb_segs == 1) {
> +               mbuf->next = NULL;
> +               return;
> +       }
> +
> +       mbuf->pkt_len = rx->pkt_lenm1 + 1;
>         mbuf->data_len = sg & 0xFFFF;
> +       mbuf->nb_segs = nb_segs;
>         sg = sg >> 16;
>
>         eol = ((const rte_iova_t *)(rx + 1) + ((rx->desc_sizem1 + 1) << 1));
> @@ -195,15 +202,14 @@ cn10k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag,
>                 ol_flags = nix_update_match_id(rx->match_id, ol_flags, mbuf);
>
>         mbuf->ol_flags = ol_flags;
> -       *(uint64_t *)(&mbuf->rearm_data) = val;
>         mbuf->pkt_len = len;
> +       mbuf->data_len = len;
> +       *(uint64_t *)(&mbuf->rearm_data) = val;
>
> -       if (flag & NIX_RX_MULTI_SEG_F) {
> +       if (flag & NIX_RX_MULTI_SEG_F)
>                 nix_cqe_xtract_mseg(rx, mbuf, val);
> -       } else {
> -               mbuf->data_len = len;
> +       else
>                 mbuf->next = NULL;
> -       }
>  }
>
>  static inline uint16_t
> @@ -481,16 +487,34 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
>                 vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2);
>                 vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3);
>
> -               /* Update that no more segments */
> -               mbuf0->next = NULL;
> -               mbuf1->next = NULL;
> -               mbuf2->next = NULL;
> -               mbuf3->next = NULL;
> -
>                 /* Store the mbufs to rx_pkts */
>                 vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01);
>                 vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23);
>
> +               if (flags & NIX_RX_MULTI_SEG_F) {
> +                       /* Multi segment is enable build mseg list for
> +                        * individual mbufs in scalar mode.
> +                        */
> +                       nix_cqe_xtract_mseg((union nix_rx_parse_u *)
> +                                           (cq0 + CQE_SZ(0) + 8), mbuf0,
> +                                           mbuf_initializer);
> +                       nix_cqe_xtract_mseg((union nix_rx_parse_u *)
> +                                           (cq0 + CQE_SZ(1) + 8), mbuf1,
> +                                           mbuf_initializer);
> +                       nix_cqe_xtract_mseg((union nix_rx_parse_u *)
> +                                           (cq0 + CQE_SZ(2) + 8), mbuf2,
> +                                           mbuf_initializer);
> +                       nix_cqe_xtract_mseg((union nix_rx_parse_u *)
> +                                           (cq0 + CQE_SZ(3) + 8), mbuf3,
> +                                           mbuf_initializer);
> +               } else {
> +                       /* Update that no more segments */
> +                       mbuf0->next = NULL;
> +                       mbuf1->next = NULL;
> +                       mbuf2->next = NULL;
> +                       mbuf3->next = NULL;
> +               }
> +
>                 /* Prefetch mbufs */
>                 roc_prefetch_store_keep(mbuf0);
>                 roc_prefetch_store_keep(mbuf1);
> @@ -645,6 +669,9 @@ R(vlan_ts_mark_cksum_ptype_rss,     1, 1, 1, 1, 1, 1,                              \
>                 void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);     \
>                                                                                \
>         uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_##name(      \
> +               void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);     \
> +                                                                              \
> +       uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \
>                 void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);
>
>  NIX_RX_FASTPATH_MODES
> diff --git a/drivers/net/cnxk/cn10k_rx_vec_mseg.c b/drivers/net/cnxk/cn10k_rx_vec_mseg.c
> new file mode 100644
> index 000000000..04d1e46c8
> --- /dev/null
> +++ b/drivers/net/cnxk/cn10k_rx_vec_mseg.c
> @@ -0,0 +1,17 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(C) 2021 Marvell.
> + */
> +
> +#include "cn10k_ethdev.h"
> +#include "cn10k_rx.h"
> +
> +#define R(name, f5, f4, f3, f2, f1, f0, flags)                                 \
> +       uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \
> +               void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts)      \
> +       {                                                                      \
> +               return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts,     \
> +                                         (flags) | NIX_RX_MULTI_SEG_F);       \
> +       }
> +
> +NIX_RX_FASTPATH_MODES
> +#undef R
> diff --git a/drivers/net/cnxk/cn9k_rx.c b/drivers/net/cnxk/cn9k_rx.c
> index 0acedd0a1..d293d4eac 100644
> --- a/drivers/net/cnxk/cn9k_rx.c
> +++ b/drivers/net/cnxk/cn9k_rx.c
> @@ -29,6 +29,8 @@ pick_rx_func(struct rte_eth_dev *eth_dev,
>                 [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_CHECKSUM_F)]
>                 [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_PTYPE_F)]
>                 [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_RSS_F)];
> +
> +       rte_atomic_thread_fence(__ATOMIC_RELEASE);
>  }
>
>  void
> @@ -60,20 +62,29 @@ cn9k_eth_set_rx_function(struct rte_eth_dev *eth_dev)
>  #undef R
>         };
>
> -       /* For PTP enabled, scalar rx function should be chosen as most of the
> -        * PTP apps are implemented to rx burst 1 pkt.
> -        */
> -       if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP)
> -               pick_rx_func(eth_dev, nix_eth_rx_burst);
> -       else
> -               pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
> +       const eth_rx_burst_t nix_eth_rx_vec_burst_mseg[2][2][2][2][2][2] = {
> +#define R(name, f5, f4, f3, f2, f1, f0, flags)                                 \
> +       [f5][f4][f3][f2][f1][f0] = cn9k_nix_recv_pkts_vec_mseg_##name,
>
> -       if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
> -               pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
> +               NIX_RX_FASTPATH_MODES
> +#undef R
> +       };
>
>         /* Copy multi seg version with no offload for tear down sequence */
>         if (rte_eal_process_type() == RTE_PROC_PRIMARY)
>                 dev->rx_pkt_burst_no_offload =
>                         nix_eth_rx_burst_mseg[0][0][0][0][0][0];
> -       rte_mb();
> +
> +       /* For PTP enabled, scalar rx function should be chosen as most of the
> +        * PTP apps are implemented to rx burst 1 pkt.
> +        */
> +       if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) {
> +               if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
> +                       return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
> +               return pick_rx_func(eth_dev, nix_eth_rx_burst);
> +       }
> +
> +       if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
> +               return pick_rx_func(eth_dev, nix_eth_rx_vec_burst_mseg);
> +       return pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
>  }
> diff --git a/drivers/net/cnxk/cn9k_rx.h b/drivers/net/cnxk/cn9k_rx.h
> index 10ef5c690..5ae9e8195 100644
> --- a/drivers/net/cnxk/cn9k_rx.h
> +++ b/drivers/net/cnxk/cn9k_rx.h
> @@ -120,8 +120,15 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf,
>
>         sg = *(const uint64_t *)(rx + 1);
>         nb_segs = (sg >> 48) & 0x3;
> -       mbuf->nb_segs = nb_segs;
> +
> +       if (nb_segs == 1) {
> +               mbuf->next = NULL;
> +               return;
> +       }
> +
> +       mbuf->pkt_len = rx->pkt_lenm1 + 1;
>         mbuf->data_len = sg & 0xFFFF;
> +       mbuf->nb_segs = nb_segs;
>         sg = sg >> 16;
>
>         eol = ((const rte_iova_t *)(rx + 1) +
> @@ -198,15 +205,14 @@ cn9k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag,
>                         nix_update_match_id(rx->cn9k.match_id, ol_flags, mbuf);
>
>         mbuf->ol_flags = ol_flags;
> -       *(uint64_t *)(&mbuf->rearm_data) = val;
>         mbuf->pkt_len = len;
> +       mbuf->data_len = len;
> +       *(uint64_t *)(&mbuf->rearm_data) = val;
>
> -       if (flag & NIX_RX_MULTI_SEG_F) {
> +       if (flag & NIX_RX_MULTI_SEG_F)
>                 nix_cqe_xtract_mseg(rx, mbuf, val);
> -       } else {
> -               mbuf->data_len = len;
> +       else
>                 mbuf->next = NULL;
> -       }
>  }
>
>  static inline uint16_t
> @@ -484,16 +490,34 @@ cn9k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
>                 vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2);
>                 vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3);
>
> -               /* Update that no more segments */
> -               mbuf0->next = NULL;
> -               mbuf1->next = NULL;
> -               mbuf2->next = NULL;
> -               mbuf3->next = NULL;
> -
>                 /* Store the mbufs to rx_pkts */
>                 vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01);
>                 vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23);
>
> +               if (flags & NIX_RX_MULTI_SEG_F) {
> +                       /* Multi segment is enable build mseg list for
> +                        * individual mbufs in scalar mode.
> +                        */
> +                       nix_cqe_xtract_mseg((union nix_rx_parse_u *)
> +                                           (cq0 + CQE_SZ(0) + 8), mbuf0,
> +                                           mbuf_initializer);
> +                       nix_cqe_xtract_mseg((union nix_rx_parse_u *)
> +                                           (cq0 + CQE_SZ(1) + 8), mbuf1,
> +                                           mbuf_initializer);
> +                       nix_cqe_xtract_mseg((union nix_rx_parse_u *)
> +                                           (cq0 + CQE_SZ(2) + 8), mbuf2,
> +                                           mbuf_initializer);
> +                       nix_cqe_xtract_mseg((union nix_rx_parse_u *)
> +                                           (cq0 + CQE_SZ(3) + 8), mbuf3,
> +                                           mbuf_initializer);
> +               } else {
> +                       /* Update that no more segments */
> +                       mbuf0->next = NULL;
> +                       mbuf1->next = NULL;
> +                       mbuf2->next = NULL;
> +                       mbuf3->next = NULL;
> +               }
> +
>                 /* Prefetch mbufs */
>                 roc_prefetch_store_keep(mbuf0);
>                 roc_prefetch_store_keep(mbuf1);
> @@ -647,6 +671,9 @@ R(vlan_ts_mark_cksum_ptype_rss,     1, 1, 1, 1, 1, 1,                              \
>                 void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);     \
>                                                                                \
>         uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_##name(       \
> +               void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);     \
> +                                                                              \
> +       uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_mseg_##name(  \
>                 void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);
>
>  NIX_RX_FASTPATH_MODES
> diff --git a/drivers/net/cnxk/cn9k_rx_vec_mseg.c b/drivers/net/cnxk/cn9k_rx_vec_mseg.c
> new file mode 100644
> index 000000000..e46d8a474
> --- /dev/null
> +++ b/drivers/net/cnxk/cn9k_rx_vec_mseg.c
> @@ -0,0 +1,18 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(C) 2021 Marvell.
> + */
> +
> +#include "cn9k_ethdev.h"
> +#include "cn9k_rx.h"
> +
> +#define R(name, f5, f4, f3, f2, f1, f0, flags)                                 \
> +       uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_mseg_##name(  \
> +               void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts)      \
> +       {                                                                      \
> +               return cn9k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts,      \
> +                                                (flags) |                     \
> +                                                        NIX_RX_MULTI_SEG_F);  \
> +       }
> +
> +NIX_RX_FASTPATH_MODES
> +#undef R
> diff --git a/drivers/net/cnxk/meson.build b/drivers/net/cnxk/meson.build
> index 2071d0dcb..aa8c7253f 100644
> --- a/drivers/net/cnxk/meson.build
> +++ b/drivers/net/cnxk/meson.build
> @@ -23,6 +23,7 @@ sources += files('cn9k_ethdev.c',
>                  'cn9k_rx.c',
>                  'cn9k_rx_mseg.c',
>                  'cn9k_rx_vec.c',
> +                'cn9k_rx_vec_mseg.c',
>                  'cn9k_tx.c',
>                  'cn9k_tx_mseg.c',
>                  'cn9k_tx_vec.c')
> @@ -32,6 +33,7 @@ sources += files('cn10k_ethdev.c',
>                  'cn10k_rx.c',
>                  'cn10k_rx_mseg.c',
>                  'cn10k_rx_vec.c',
> +                'cn10k_rx_vec_mseg.c',
>                  'cn10k_tx.c',
>                  'cn10k_tx_mseg.c',
>                  'cn10k_tx_vec.c')
> --
> 2.17.1
>


More information about the dev mailing list