[dpdk-dev] [PATCH v5 3/8] net/ice: support vector SSE in RX

Maxime Coquelin maxime.coquelin at redhat.com
Fri Mar 22 10:42:57 CET 2019



On 3/22/19 3:58 AM, Wenzhuo Lu wrote:
> Signed-off-by: Wenzhuo Lu <wenzhuo.lu at intel.com>
> ---
>   doc/guides/nics/features/ice_vec.ini  |  33 +++
>   drivers/net/ice/Makefile              |   3 +
>   drivers/net/ice/ice_ethdev.c          |   2 -
>   drivers/net/ice/ice_ethdev.h          |   2 +
>   drivers/net/ice/ice_rxtx.c            |  27 +-
>   drivers/net/ice/ice_rxtx.h            |  21 +-
>   drivers/net/ice/ice_rxtx_vec_common.h | 155 +++++++++++
>   drivers/net/ice/ice_rxtx_vec_sse.c    | 496 ++++++++++++++++++++++++++++++++++
>   drivers/net/ice/meson.build           |   4 +
>   9 files changed, 737 insertions(+), 6 deletions(-)
>   create mode 100644 doc/guides/nics/features/ice_vec.ini
>   create mode 100644 drivers/net/ice/ice_rxtx_vec_common.h
>   create mode 100644 drivers/net/ice/ice_rxtx_vec_sse.c
> 
> diff --git a/doc/guides/nics/features/ice_vec.ini b/doc/guides/nics/features/ice_vec.ini
> new file mode 100644
> index 0000000..1a19788
> --- /dev/null
> +++ b/doc/guides/nics/features/ice_vec.ini
> @@ -0,0 +1,33 @@
> +;
> +; Supported features of the 'ice_vec' network poll mode driver.
> +;
> +; Refer to default.ini for the full list of available PMD features.
> +;
> +[Features]
> +Speed capabilities   = Y
> +Link status          = Y
> +Link status event    = Y
> +Rx interrupt         = Y
> +Queue start/stop     = Y
> +MTU update           = Y
> +Jumbo frame          = Y
> +Scattered Rx         = Y
> +Promiscuous mode     = Y
> +Allmulticast mode    = Y
> +Unicast MAC filter   = Y
> +Multicast MAC filter = Y
> +RSS hash             = Y
> +RSS key update       = Y
> +RSS reta update      = Y
> +VLAN filter          = Y
> +Packet type parsing  = Y
> +Rx descriptor status = Y
> +Basic stats          = Y
> +Extended stats       = Y
> +FW version           = Y
> +Module EEPROM dump   = Y
> +BSD nic_uio          = Y
> +Linux UIO            = Y
> +Linux VFIO           = Y
> +x86-32               = Y
> +x86-64               = Y
> diff --git a/drivers/net/ice/Makefile b/drivers/net/ice/Makefile
> index 61846ca..92594bb 100644
> --- a/drivers/net/ice/Makefile
> +++ b/drivers/net/ice/Makefile
> @@ -54,5 +54,8 @@ SRCS-$(CONFIG_RTE_LIBRTE_ICE_PMD) += ice_flow.c
>   
>   SRCS-$(CONFIG_RTE_LIBRTE_ICE_PMD) += ice_ethdev.c
>   SRCS-$(CONFIG_RTE_LIBRTE_ICE_PMD) += ice_rxtx.c
> +ifeq ($(CONFIG_RTE_ARCH_X86), y)
> +SRCS-$(CONFIG_RTE_LIBRTE_ICE_PMD) += ice_rxtx_vec_sse.c
> +endif
>   
>   include $(RTE_SDK)/mk/rte.lib.mk
> diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c
> index b804be1..8e7c7db 100644
> --- a/drivers/net/ice/ice_ethdev.c
> +++ b/drivers/net/ice/ice_ethdev.c
> @@ -2,8 +2,6 @@
>    * Copyright(c) 2018 Intel Corporation
>    */
>   
> -#include <rte_ethdev_pci.h>
> -
>   #include "base/ice_sched.h"
>   #include "ice_ethdev.h"
>   #include "ice_rxtx.h"
> diff --git a/drivers/net/ice/ice_ethdev.h b/drivers/net/ice/ice_ethdev.h
> index 3cefa5b..151a09e 100644
> --- a/drivers/net/ice/ice_ethdev.h
> +++ b/drivers/net/ice/ice_ethdev.h
> @@ -7,6 +7,8 @@
>   
>   #include <rte_kvargs.h>
>   
> +#include <rte_ethdev_pci.h>
> +
>   #include "base/ice_common.h"
>   #include "base/ice_adminq_cmd.h"
>   
> diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c
> index d540ed1..ebb1cab 100644
> --- a/drivers/net/ice/ice_rxtx.c
> +++ b/drivers/net/ice/ice_rxtx.c
> @@ -7,8 +7,6 @@
>   
>   #include "ice_rxtx.h"
>   
> -#define ICE_TD_CMD ICE_TX_DESC_CMD_EOP
> -
>   #define ICE_TX_CKSUM_OFFLOAD_MASK (		 \
>   		PKT_TX_IP_CKSUM |		 \
>   		PKT_TX_L4_MASK |		 \
> @@ -319,6 +317,9 @@
>   	rxq->nb_rx_hold = 0;
>   	rxq->pkt_first_seg = NULL;
>   	rxq->pkt_last_seg = NULL;
> +
> +	rxq->rxrearm_start = 0;
> +	rxq->rxrearm_nb = 0;
>   }
>   
>   int
> @@ -1490,6 +1491,12 @@
>   #endif
>   	    dev->rx_pkt_burst == ice_recv_scattered_pkts)
>   		return ptypes;
> +
> +#ifdef RTE_ARCH_X86
> +	if (dev->rx_pkt_burst == ice_recv_pkts_vec)
> +		return ptypes;
> +#endif
> +
>   	return NULL;
>   }
>   
> @@ -2225,6 +2232,22 @@ void __attribute__((cold))
>   	PMD_INIT_FUNC_TRACE();
>   	struct ice_adapter *ad =
>   		ICE_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
> +#ifdef RTE_ARCH_X86
> +	struct ice_rx_queue *rxq;
> +	int i;
> +
> +	if (!ice_rx_vec_dev_check(dev)) {
> +		for (i = 0; i < dev->data->nb_rx_queues; i++) {
> +			rxq = dev->data->rx_queues[i];
> +			(void)ice_rxq_vec_setup(rxq);
> +		}
> +		PMD_DRV_LOG(DEBUG, "Using Vector Rx (port %d).",
> +			    dev->data->port_id);
> +		dev->rx_pkt_burst = ice_recv_pkts_vec;
> +
> +		return;
> +	}
> +#endif
>   
>   	if (dev->data->scattered_rx) {
>   		/* Set the non-LRO scattered function */
> diff --git a/drivers/net/ice/ice_rxtx.h b/drivers/net/ice/ice_rxtx.h
> index 78b4928..656ca0d 100644
> --- a/drivers/net/ice/ice_rxtx.h
> +++ b/drivers/net/ice/ice_rxtx.h
> @@ -27,6 +27,15 @@
>   
>   #define ICE_SUPPORT_CHAIN_NUM 5
>   
> +#define ICE_TD_CMD                      ICE_TX_DESC_CMD_EOP
> +
> +#define ICE_VPMD_RX_BURST           32
> +#define ICE_VPMD_TX_BURST           32
> +#define ICE_RXQ_REARM_THRESH        32
> +#define ICE_MAX_RX_BURST            ICE_RXQ_REARM_THRESH
> +#define ICE_TX_MAX_FREE_BUF_SZ      64
> +#define ICE_DESCS_PER_LOOP          4
> +
>   typedef void (*ice_rx_release_mbufs_t)(struct ice_rx_queue *rxq);
>   typedef void (*ice_tx_release_mbufs_t)(struct ice_tx_queue *txq);
>   
> @@ -45,13 +54,16 @@ struct ice_rx_queue {
>   	uint16_t nb_rx_hold; /* number of held free RX desc */
>   	struct rte_mbuf *pkt_first_seg; /**< first segment of current packet */
>   	struct rte_mbuf *pkt_last_seg; /**< last segment of current packet */
> -#ifdef RTE_LIBRTE_ICE_RX_ALLOW_BULK_ALLOC
>   	uint16_t rx_nb_avail; /**< number of staged packets ready */
>   	uint16_t rx_next_avail; /**< index of next staged packets */
>   	uint16_t rx_free_trigger; /**< triggers rx buffer allocation */
>   	struct rte_mbuf fake_mbuf; /**< dummy mbuf */
>   	struct rte_mbuf *rx_stage[ICE_RX_MAX_BURST * 2];
> -#endif
> +
> +	uint16_t rxrearm_nb;	/**< number of remaining to be re-armed */
> +	uint16_t rxrearm_start;	/**< the idx we start the re-arming from */
> +	uint64_t mbuf_initializer; /**< value to init mbufs */
> +
>   	uint8_t port_id; /* device port ID */
>   	uint8_t crc_len; /* 0 if CRC stripped, 4 otherwise */
>   	uint16_t queue_id; /* RX queue index */
> @@ -156,4 +168,9 @@ void ice_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
>   int ice_tx_descriptor_status(void *tx_queue, uint16_t offset);
>   void ice_set_default_ptype_table(struct rte_eth_dev *dev);
>   const uint32_t *ice_dev_supported_ptypes_get(struct rte_eth_dev *dev);
> +
> +int ice_rx_vec_dev_check(struct rte_eth_dev *dev);
> +int ice_rxq_vec_setup(struct ice_rx_queue *rxq);
> +uint16_t ice_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
> +			   uint16_t nb_pkts);
>   #endif /* _ICE_RXTX_H_ */
> diff --git a/drivers/net/ice/ice_rxtx_vec_common.h b/drivers/net/ice/ice_rxtx_vec_common.h
> new file mode 100644
> index 0000000..cfef91b
> --- /dev/null
> +++ b/drivers/net/ice/ice_rxtx_vec_common.h
> @@ -0,0 +1,155 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2019 Intel Corporation
> + */
> +
> +#ifndef _ICE_RXTX_VEC_COMMON_H_
> +#define _ICE_RXTX_VEC_COMMON_H_
> +
> +#include "ice_rxtx.h"
> +
> +static inline uint16_t
> +reassemble_packets(struct ice_rx_queue *rxq, struct rte_mbuf **rx_bufs,
As this is in the header file, I think it could be better to prefix it
with 'ice_'. Or maybe with 'ice_rx_' as it seems to be rx-only.
> +		   uint16_t nb_bufs, uint8_t *split_flags)
> +{
> +	struct rte_mbuf *pkts[ICE_VPMD_RX_BURST] = {0}; /*finished pkts*/
> +	struct rte_mbuf *start = rxq->pkt_first_seg;
> +	struct rte_mbuf *end =  rxq->pkt_last_seg;
> +	unsigned int pkt_idx, buf_idx;
> +
> +	for (buf_idx = 0, pkt_idx = 0; buf_idx < nb_bufs; buf_idx++) {
> +		if (end) {
> +			/* processing a split packet */
> +			end->next = rx_bufs[buf_idx];
> +			rx_bufs[buf_idx]->data_len += rxq->crc_len;
> +
> +			start->nb_segs++;
> +			start->pkt_len += rx_bufs[buf_idx]->data_len;
> +			end = end->next;
> +
> +			if (!split_flags[buf_idx]) {
> +				/* it's the last packet of the set */
> +				start->hash = end->hash;
> +				start->ol_flags = end->ol_flags;
> +				/* we need to strip crc for the whole packet */
> +				start->pkt_len -= rxq->crc_len;
> +				if (end->data_len > rxq->crc_len) {
> +					end->data_len -= rxq->crc_len;
> +				} else {
> +					/* free up last mbuf */
> +					struct rte_mbuf *secondlast = start;
> +
> +					start->nb_segs--;
> +					while (secondlast->next != end)
> +						secondlast = secondlast->next;
> +					secondlast->data_len -= (rxq->crc_len -
> +							end->data_len);
> +					secondlast->next = NULL;
> +					rte_pktmbuf_free_seg(end);
> +				}
> +				pkts[pkt_idx++] = start;
> +				start = NULL;
> +				end = NULL;
> +			}
> +		} else {
> +			/* not processing a split packet */
> +			if (!split_flags[buf_idx]) {
> +				/* not a split packet, save and skip */
> +				pkts[pkt_idx++] = rx_bufs[buf_idx];
> +				continue;
> +			}
> +			start = rx_bufs[buf_idx];
> +			end = start;
> +			rx_bufs[buf_idx]->data_len += rxq->crc_len;
> +			rx_bufs[buf_idx]->pkt_len += rxq->crc_len;
> +		}
> +	}
> +
> +	/* save the partial packet for next time */
> +	rxq->pkt_first_seg = start;
> +	rxq->pkt_last_seg = end;
> +	rte_memcpy(rx_bufs, pkts, pkt_idx * (sizeof(*pkts)));
> +	return pkt_idx;
> +}
> +
> +static inline void
> +_ice_rx_queue_release_mbufs_vec(struct ice_rx_queue *rxq)
> +{
> +	const unsigned int mask = rxq->nb_rx_desc - 1;
> +	unsigned int i;
> +
> +	if (!rxq->sw_ring || rxq->rxrearm_nb >= rxq->nb_rx_desc)
> +		return;

Maybe not a big deal, but I understand that !rxq->sw_ring is not the
common case, more an error. If so, the if condition could be split in
two, and having the first one tagged with unlikely.

Looking at Tx patch, you should also ensure that rxq != NULL and also
print a debug/error message to be consistent.

> +
> +	/* free all mbufs that are valid in the ring */
> +	if (rxq->rxrearm_nb == 0) {
> +		for (i = 0; i < rxq->nb_rx_desc; i++) {
> +			if (rxq->sw_ring[i].mbuf)
> +				rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
> +		}
> +	} else {
> +		for (i = rxq->rx_tail;
> +		     i != rxq->rxrearm_start;
> +		     i = (i + 1) & mask) {
> +			if (rxq->sw_ring[i].mbuf)
> +				rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
> +		}
> +	}
> +
> +	rxq->rxrearm_nb = rxq->nb_rx_desc;
> +
> +	/* set all entries to NULL */
> +	memset(rxq->sw_ring, 0, sizeof(rxq->sw_ring[0]) * rxq->nb_rx_desc);
> +}

...

> diff --git a/drivers/net/ice/ice_rxtx_vec_sse.c b/drivers/net/ice/ice_rxtx_vec_sse.c
> new file mode 100644
> index 0000000..f6fe9ef
> --- /dev/null
> +++ b/drivers/net/ice/ice_rxtx_vec_sse.c

...

> +
> +/**
> + * Notice:
> + * - nb_pkts < ICE_DESCS_PER_LOOP, just return no packet
> + * - nb_pkts > ICE_VPMD_RX_BURST, only scan ICE_VPMD_RX_BURST
> + *   numbers of DD bits
> + */
> +uint16_t
> +ice_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
> +		  uint16_t nb_pkts)
> +{
> +	return _recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL);

Same as below comment.

> +}
> +
> +static void __attribute__((cold))
> +ice_rx_queue_release_mbufs_vec(struct ice_rx_queue *rxq)
> +{
> +	_ice_rx_queue_release_mbufs_vec(rxq);

What is the point of having _ice_rx_queue_release_mbufs_vec as it is
only called once here?

> +}
> +
> +int __attribute__((cold))
> +ice_rxq_vec_setup(struct ice_rx_queue *rxq)
> +{
> +	if (!rxq)
> +		return -1;
> +
> +	rxq->rx_rel_mbufs = ice_rx_queue_release_mbufs_vec;
> +	return ice_rxq_vec_setup_default(rxq);
> +}
> +
> +int __attribute__((cold))
> +ice_rx_vec_dev_check(struct rte_eth_dev *dev)
> +{
> +	return ice_rx_vec_dev_check_default(dev);
> +}
> diff --git a/drivers/net/ice/meson.build b/drivers/net/ice/meson.build
> index 857dc0e..469264d 100644
> --- a/drivers/net/ice/meson.build
> +++ b/drivers/net/ice/meson.build
> @@ -11,3 +11,7 @@ sources = files(
>   
>   deps += ['hash']
>   includes += include_directories('base')
> +
> +if arch_subdir == 'x86'
> +	sources += files('ice_rxtx_vec_sse.c')
> +endif
> 


More information about the dev mailing list