[dpdk-dev] [PATCH v5 6/8] net/ice: support Rx AVX2 vector
Maxime Coquelin
maxime.coquelin at redhat.com
Fri Mar 22 11:12:07 CET 2019
On 3/22/19 3:58 AM, Wenzhuo Lu wrote:
> Signed-off-by: Wenzhuo Lu <wenzhuo.lu at intel.com>
> ---
> drivers/net/ice/Makefile | 19 ++
> drivers/net/ice/ice_rxtx.c | 16 +-
> drivers/net/ice/ice_rxtx.h | 2 +
> drivers/net/ice/ice_rxtx_vec_avx2.c | 622 ++++++++++++++++++++++++++++++++++++
> drivers/net/ice/meson.build | 15 +
> 5 files changed, 671 insertions(+), 3 deletions(-)
> create mode 100644 drivers/net/ice/ice_rxtx_vec_avx2.c
>
> diff --git a/drivers/net/ice/Makefile b/drivers/net/ice/Makefile
> index 92594bb..5ba59f4 100644
> --- a/drivers/net/ice/Makefile
> +++ b/drivers/net/ice/Makefile
> @@ -58,4 +58,23 @@ ifeq ($(CONFIG_RTE_ARCH_X86), y)
> SRCS-$(CONFIG_RTE_LIBRTE_ICE_PMD) += ice_rxtx_vec_sse.c
> endif
>
> +ifeq ($(findstring RTE_MACHINE_CPUFLAG_AVX2,$(CFLAGS)),RTE_MACHINE_CPUFLAG_AVX2)
> + CC_AVX2_SUPPORT=1
> +else
> + CC_AVX2_SUPPORT=\
> + $(shell $(CC) -march=core-avx2 -dM -E - </dev/null 2>&1 | \
> + grep -q AVX2 && echo 1)
> + ifeq ($(CC_AVX2_SUPPORT), 1)
> + ifeq ($(CONFIG_RTE_TOOLCHAIN_ICC),y)
> + CFLAGS_ice_rxtx_vec_avx2.o += -march=core-avx2
> + else
> + CFLAGS_ice_rxtx_vec_avx2.o += -mavx2
> + endif
> + endif
> +endif
> +
> +ifeq ($(CC_AVX2_SUPPORT), 1)
> + SRCS-$(CONFIG_RTE_LIBRTE_ICE_PMD) += ice_rxtx_vec_avx2.c
> +endif
> +
> include $(RTE_SDK)/mk/rte.lib.mk
> diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c
> index f9ecffa..6191f34 100644
> --- a/drivers/net/ice/ice_rxtx.c
> +++ b/drivers/net/ice/ice_rxtx.c
> @@ -1494,7 +1494,8 @@
>
> #ifdef RTE_ARCH_X86
> if (dev->rx_pkt_burst == ice_recv_pkts_vec ||
> - dev->rx_pkt_burst == ice_recv_scattered_pkts_vec)
> + dev->rx_pkt_burst == ice_recv_scattered_pkts_vec ||
> + dev->rx_pkt_burst == ice_recv_pkts_vec_avx2)
> return ptypes;
> #endif
>
> @@ -2236,21 +2237,30 @@ void __attribute__((cold))
> #ifdef RTE_ARCH_X86
> struct ice_rx_queue *rxq;
> int i;
> + bool use_avx2 = false;
>
> if (!ice_rx_vec_dev_check(dev)) {
> for (i = 0; i < dev->data->nb_rx_queues; i++) {
> rxq = dev->data->rx_queues[i];
> (void)ice_rxq_vec_setup(rxq);
> }
> +
> + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
> + rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1)
> + use_avx2 = true;
> +
> if (dev->data->scattered_rx) {
> PMD_DRV_LOG(DEBUG,
> "Using Vector Scattered Rx (port %d).",
> dev->data->port_id);
> dev->rx_pkt_burst = ice_recv_scattered_pkts_vec;
> } else {
> - PMD_DRV_LOG(DEBUG, "Using Vector Rx (port %d).",
> + PMD_DRV_LOG(DEBUG, "Using %sVector Rx (port %d).",
> + use_avx2 ? "avx2 " : "",
> dev->data->port_id);
> - dev->rx_pkt_burst = ice_recv_pkts_vec;
> + dev->rx_pkt_burst = use_avx2 ?
> + ice_recv_pkts_vec_avx2 :
> + ice_recv_pkts_vec;
> }
>
> return;
> diff --git a/drivers/net/ice/ice_rxtx.h b/drivers/net/ice/ice_rxtx.h
> index 1dde4e7..d1c9b92 100644
> --- a/drivers/net/ice/ice_rxtx.h
> +++ b/drivers/net/ice/ice_rxtx.h
> @@ -179,4 +179,6 @@ uint16_t ice_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
> uint16_t nb_pkts);
> uint16_t ice_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
> uint16_t nb_pkts);
> +uint16_t ice_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
> + uint16_t nb_pkts);
> #endif /* _ICE_RXTX_H_ */
> diff --git a/drivers/net/ice/ice_rxtx_vec_avx2.c b/drivers/net/ice/ice_rxtx_vec_avx2.c
> new file mode 100644
> index 0000000..763fa9f
> --- /dev/null
> +++ b/drivers/net/ice/ice_rxtx_vec_avx2.c
> @@ -0,0 +1,622 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2019 Intel Corporation
> + */
> +
> +#include "ice_rxtx_vec_common.h"
> +
> +#include <x86intrin.h>
> +
> +#ifndef __INTEL_COMPILER
> +#pragma GCC diagnostic ignored "-Wcast-qual"
> +#endif
> +
> +static inline void
> +ice_rxq_rearm(struct ice_rx_queue *rxq)
> +{
> + int i;
> + uint16_t rx_id;
> + volatile union ice_rx_desc *rxdp;
> + struct ice_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
> +
> + rxdp = rxq->rx_ring + rxq->rxrearm_start;
> +
> + /* Pull 'n' more MBUFs into the software ring */
> + if (rte_mempool_get_bulk(rxq->mp,
> + (void *)rxep,
> + ICE_RXQ_REARM_THRESH) < 0) {
> + if (rxq->rxrearm_nb + ICE_RXQ_REARM_THRESH >=
> + rxq->nb_rx_desc) {
> + __m128i dma_addr0;
> +
> + dma_addr0 = _mm_setzero_si128();
> + for (i = 0; i < ICE_DESCS_PER_LOOP; i++) {
> + rxep[i].mbuf = &rxq->fake_mbuf;
> + _mm_store_si128((__m128i *)&rxdp[i].read,
> + dma_addr0);
> + }
> + }
> + rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
> + ICE_RXQ_REARM_THRESH;
> + return;
> + }
> +
> +#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
I see same is done for other Intel NICs, but I wonder what would be the
performance cost of making it dynamic, if any cost?
Having it dynamic (as a dev arg for instance) would make it possible to
change the value when the user is using dpdk from a distro. It would
also help testing coverage.
Btw, how do you select this option with meson build system?
More information about the dev
mailing list