[dpdk-dev] [PATCH v5 6/8] net/ice: support Rx AVX2 vector

Maxime Coquelin maxime.coquelin at redhat.com
Fri Mar 22 11:12:07 CET 2019



On 3/22/19 3:58 AM, Wenzhuo Lu wrote:
> Signed-off-by: Wenzhuo Lu <wenzhuo.lu at intel.com>
> ---
>   drivers/net/ice/Makefile            |  19 ++
>   drivers/net/ice/ice_rxtx.c          |  16 +-
>   drivers/net/ice/ice_rxtx.h          |   2 +
>   drivers/net/ice/ice_rxtx_vec_avx2.c | 622 ++++++++++++++++++++++++++++++++++++
>   drivers/net/ice/meson.build         |  15 +
>   5 files changed, 671 insertions(+), 3 deletions(-)
>   create mode 100644 drivers/net/ice/ice_rxtx_vec_avx2.c
> 
> diff --git a/drivers/net/ice/Makefile b/drivers/net/ice/Makefile
> index 92594bb..5ba59f4 100644
> --- a/drivers/net/ice/Makefile
> +++ b/drivers/net/ice/Makefile
> @@ -58,4 +58,23 @@ ifeq ($(CONFIG_RTE_ARCH_X86), y)
>   SRCS-$(CONFIG_RTE_LIBRTE_ICE_PMD) += ice_rxtx_vec_sse.c
>   endif
>   
> +ifeq ($(findstring RTE_MACHINE_CPUFLAG_AVX2,$(CFLAGS)),RTE_MACHINE_CPUFLAG_AVX2)
> +	CC_AVX2_SUPPORT=1
> +else
> +	CC_AVX2_SUPPORT=\
> +	$(shell $(CC) -march=core-avx2 -dM -E - </dev/null 2>&1 | \
> +	grep -q AVX2 && echo 1)
> +	ifeq ($(CC_AVX2_SUPPORT), 1)
> +		ifeq ($(CONFIG_RTE_TOOLCHAIN_ICC),y)
> +			CFLAGS_ice_rxtx_vec_avx2.o += -march=core-avx2
> +		else
> +			CFLAGS_ice_rxtx_vec_avx2.o += -mavx2
> +		endif
> +	endif
> +endif
> +
> +ifeq ($(CC_AVX2_SUPPORT), 1)
> +	SRCS-$(CONFIG_RTE_LIBRTE_ICE_PMD) += ice_rxtx_vec_avx2.c
> +endif
> +
>   include $(RTE_SDK)/mk/rte.lib.mk
> diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c
> index f9ecffa..6191f34 100644
> --- a/drivers/net/ice/ice_rxtx.c
> +++ b/drivers/net/ice/ice_rxtx.c
> @@ -1494,7 +1494,8 @@
>   
>   #ifdef RTE_ARCH_X86
>   	if (dev->rx_pkt_burst == ice_recv_pkts_vec ||
> -	    dev->rx_pkt_burst == ice_recv_scattered_pkts_vec)
> +	    dev->rx_pkt_burst == ice_recv_scattered_pkts_vec ||
> +	    dev->rx_pkt_burst == ice_recv_pkts_vec_avx2)
>   		return ptypes;
>   #endif
>   
> @@ -2236,21 +2237,30 @@ void __attribute__((cold))
>   #ifdef RTE_ARCH_X86
>   	struct ice_rx_queue *rxq;
>   	int i;
> +	bool use_avx2 = false;
>   
>   	if (!ice_rx_vec_dev_check(dev)) {
>   		for (i = 0; i < dev->data->nb_rx_queues; i++) {
>   			rxq = dev->data->rx_queues[i];
>   			(void)ice_rxq_vec_setup(rxq);
>   		}
> +
> +		if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
> +		    rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1)
> +			use_avx2 = true;
> +
>   		if (dev->data->scattered_rx) {
>   			PMD_DRV_LOG(DEBUG,
>   				    "Using Vector Scattered Rx (port %d).",
>   				    dev->data->port_id);
>   			dev->rx_pkt_burst = ice_recv_scattered_pkts_vec;
>   		} else {
> -			PMD_DRV_LOG(DEBUG, "Using Vector Rx (port %d).",
> +			PMD_DRV_LOG(DEBUG, "Using %sVector Rx (port %d).",
> +				    use_avx2 ? "avx2 " : "",
>   				    dev->data->port_id);
> -			dev->rx_pkt_burst = ice_recv_pkts_vec;
> +			dev->rx_pkt_burst = use_avx2 ?
> +					    ice_recv_pkts_vec_avx2 :
> +					    ice_recv_pkts_vec;
>   		}
>   
>   		return;
> diff --git a/drivers/net/ice/ice_rxtx.h b/drivers/net/ice/ice_rxtx.h
> index 1dde4e7..d1c9b92 100644
> --- a/drivers/net/ice/ice_rxtx.h
> +++ b/drivers/net/ice/ice_rxtx.h
> @@ -179,4 +179,6 @@ uint16_t ice_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
>   				     uint16_t nb_pkts);
>   uint16_t ice_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
>   			   uint16_t nb_pkts);
> +uint16_t ice_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
> +				uint16_t nb_pkts);
>   #endif /* _ICE_RXTX_H_ */
> diff --git a/drivers/net/ice/ice_rxtx_vec_avx2.c b/drivers/net/ice/ice_rxtx_vec_avx2.c
> new file mode 100644
> index 0000000..763fa9f
> --- /dev/null
> +++ b/drivers/net/ice/ice_rxtx_vec_avx2.c
> @@ -0,0 +1,622 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2019 Intel Corporation
> + */
> +
> +#include "ice_rxtx_vec_common.h"
> +
> +#include <x86intrin.h>
> +
> +#ifndef __INTEL_COMPILER
> +#pragma GCC diagnostic ignored "-Wcast-qual"
> +#endif
> +
> +static inline void
> +ice_rxq_rearm(struct ice_rx_queue *rxq)
> +{
> +	int i;
> +	uint16_t rx_id;
> +	volatile union ice_rx_desc *rxdp;
> +	struct ice_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
> +
> +	rxdp = rxq->rx_ring + rxq->rxrearm_start;
> +
> +	/* Pull 'n' more MBUFs into the software ring */
> +	if (rte_mempool_get_bulk(rxq->mp,
> +				 (void *)rxep,
> +				 ICE_RXQ_REARM_THRESH) < 0) {
> +		if (rxq->rxrearm_nb + ICE_RXQ_REARM_THRESH >=
> +		    rxq->nb_rx_desc) {
> +			__m128i dma_addr0;
> +
> +			dma_addr0 = _mm_setzero_si128();
> +			for (i = 0; i < ICE_DESCS_PER_LOOP; i++) {
> +				rxep[i].mbuf = &rxq->fake_mbuf;
> +				_mm_store_si128((__m128i *)&rxdp[i].read,
> +						dma_addr0);
> +			}
> +		}
> +		rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
> +			ICE_RXQ_REARM_THRESH;
> +		return;
> +	}
> +
> +#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC

I see same is done for other Intel NICs, but I wonder what would be the
performance cost of making it dynamic, if any cost?

Having it dynamic (as a dev arg for instance) would make it possible to
change the value when the user is using dpdk from a distro. It would
also help testing coverage.

Btw, how do you select this option with meson build system?



More information about the dev mailing list