[dpdk-dev] [PATCH v5 1/6] ethdev: fix max Rx packet length

Ananyev, Konstantin konstantin.ananyev at intel.com
Fri Oct 8 17:57:14 CEST 2021



> There is a confusion on setting max Rx packet length, this patch aims to
> clarify it.
> 
> 'rte_eth_dev_configure()' API accepts max Rx packet size via
> 'uint32_t max_rx_pkt_len' field of the config struct 'struct
> rte_eth_conf'.
> 
> Also 'rte_eth_dev_set_mtu()' API can be used to set the MTU, and result
> stored into '(struct rte_eth_dev)->data->mtu'.
> 
> These two APIs are related but they work in a disconnected way, they
> store the set values in different variables which makes hard to figure
> out which one to use, also having two different method for a related
> functionality is confusing for the users.
> 
> Other issues causing confusion is:
> * maximum transmission unit (MTU) is payload of the Ethernet frame. And
>   'max_rx_pkt_len' is the size of the Ethernet frame. Difference is
>   Ethernet frame overhead, and this overhead may be different from
>   device to device based on what device supports, like VLAN and QinQ.
> * 'max_rx_pkt_len' is only valid when application requested jumbo frame,
>   which adds additional confusion and some APIs and PMDs already
>   discards this documented behavior.
> * For the jumbo frame enabled case, 'max_rx_pkt_len' is an mandatory
>   field, this adds configuration complexity for application.
> 
> As solution, both APIs gets MTU as parameter, and both saves the result
> in same variable '(struct rte_eth_dev)->data->mtu'. For this
> 'max_rx_pkt_len' updated as 'mtu', and it is always valid independent
> from jumbo frame.
> 
> For 'rte_eth_dev_configure()', 'dev->data->dev_conf.rxmode.mtu' is user
> request and it should be used only within configure function and result
> should be stored to '(struct rte_eth_dev)->data->mtu'. After that point
> both application and PMD uses MTU from this variable.
> 
> When application doesn't provide an MTU during 'rte_eth_dev_configure()'
> default 'RTE_ETHER_MTU' value is used.
> 
> Additional clarification done on scattered Rx configuration, in
> relation to MTU and Rx buffer size.
> MTU is used to configure the device for physical Rx/Tx size limitation,
> Rx buffer is where to store Rx packets, many PMDs use mbuf data buffer
> size as Rx buffer size.
> PMDs compare MTU against Rx buffer size to decide enabling scattered Rx
> or not. If scattered Rx is not supported by device, MTU bigger than Rx
> buffer size should fail.

LGTM in general, one question below.

...

> diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
> index daf5ca924221..4d0584af52e3 100644
> --- a/lib/ethdev/rte_ethdev.c
> +++ b/lib/ethdev/rte_ethdev.c
> @@ -1324,6 +1324,19 @@ eth_dev_validate_offloads(uint16_t port_id, uint64_t req_offloads,
>  	return ret;
>  }
> 
> +static uint16_t
> +eth_dev_get_overhead_len(uint32_t max_rx_pktlen, uint16_t max_mtu)
> +{
> +	uint16_t overhead_len;
> +
> +	if (max_mtu != UINT16_MAX && max_rx_pktlen > max_mtu)
> +		overhead_len = max_rx_pktlen - max_mtu;

In theory it could be overflow here, though I do realize that in practise it is unlikely situation.
Anyway why uint16_t, why not uint32_t for all variables here?
Just no to worry about such things.

> +	else
> +		overhead_len = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
> +
> +	return overhead_len;
> +}
> +
>  int
>  rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
>  		      const struct rte_eth_conf *dev_conf)
> @@ -1331,6 +1344,7 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
>  	struct rte_eth_dev *dev;
>  	struct rte_eth_dev_info dev_info;
>  	struct rte_eth_conf orig_conf;
> +	uint32_t max_rx_pktlen;
>  	uint16_t overhead_len;
>  	int diag;
>  	int ret;
> @@ -1381,11 +1395,8 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
>  		goto rollback;
> 
>  	/* Get the real Ethernet overhead length */
> -	if (dev_info.max_mtu != UINT16_MAX &&
> -	    dev_info.max_rx_pktlen > dev_info.max_mtu)
> -		overhead_len = dev_info.max_rx_pktlen - dev_info.max_mtu;
> -	else
> -		overhead_len = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
> +	overhead_len = eth_dev_get_overhead_len(dev_info.max_rx_pktlen,
> +			dev_info.max_mtu);
> 
>  	/* If number of queues specified by application for both Rx and Tx is
>  	 * zero, use driver preferred values. This cannot be done individually
> @@ -1454,49 +1465,45 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
>  	}
> 
>  	/*
> -	 * If jumbo frames are enabled, check that the maximum RX packet
> -	 * length is supported by the configured device.
> +	 * Check that the maximum RX packet length is supported by the
> +	 * configured device.
>  	 */
> -	if (dev_conf->rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
> -		if (dev_conf->rxmode.max_rx_pkt_len > dev_info.max_rx_pktlen) {
> -			RTE_ETHDEV_LOG(ERR,
> -				"Ethdev port_id=%u max_rx_pkt_len %u > max valid value %u\n",
> -				port_id, dev_conf->rxmode.max_rx_pkt_len,
> -				dev_info.max_rx_pktlen);
> -			ret = -EINVAL;
> -			goto rollback;
> -		} else if (dev_conf->rxmode.max_rx_pkt_len < RTE_ETHER_MIN_LEN) {
> -			RTE_ETHDEV_LOG(ERR,
> -				"Ethdev port_id=%u max_rx_pkt_len %u < min valid value %u\n",
> -				port_id, dev_conf->rxmode.max_rx_pkt_len,
> -				(unsigned int)RTE_ETHER_MIN_LEN);
> -			ret = -EINVAL;
> -			goto rollback;
> -		}
> +	if (dev_conf->rxmode.mtu == 0)
> +		dev->data->dev_conf.rxmode.mtu = RTE_ETHER_MTU;
> +	max_rx_pktlen = dev->data->dev_conf.rxmode.mtu + overhead_len;
> +	if (max_rx_pktlen > dev_info.max_rx_pktlen) {
> +		RTE_ETHDEV_LOG(ERR,
> +			"Ethdev port_id=%u max_rx_pktlen %u > max valid value %u\n",
> +			port_id, max_rx_pktlen, dev_info.max_rx_pktlen);
> +		ret = -EINVAL;
> +		goto rollback;
> +	} else if (max_rx_pktlen < RTE_ETHER_MIN_LEN) {
> +		RTE_ETHDEV_LOG(ERR,
> +			"Ethdev port_id=%u max_rx_pktlen %u < min valid value %u\n",
> +			port_id, max_rx_pktlen, RTE_ETHER_MIN_LEN);
> +		ret = -EINVAL;
> +		goto rollback;
> +	}
> 
> -		/* Scale the MTU size to adapt max_rx_pkt_len */
> -		dev->data->mtu = dev->data->dev_conf.rxmode.max_rx_pkt_len -
> -				overhead_len;
> -	} else {
> -		uint16_t pktlen = dev_conf->rxmode.max_rx_pkt_len;
> -		if (pktlen < RTE_ETHER_MIN_MTU + overhead_len ||
> -		    pktlen > RTE_ETHER_MTU + overhead_len)
> +	if ((dev_conf->rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) == 0) {
> +		if (dev->data->dev_conf.rxmode.mtu < RTE_ETHER_MIN_MTU ||
> +				dev->data->dev_conf.rxmode.mtu > RTE_ETHER_MTU)
>  			/* Use default value */
> -			dev->data->dev_conf.rxmode.max_rx_pkt_len =
> -						RTE_ETHER_MTU + overhead_len;
> +			dev->data->dev_conf.rxmode.mtu = RTE_ETHER_MTU;
>  	}
> 
> +	dev->data->mtu = dev->data->dev_conf.rxmode.mtu;
> +
>  	/*
>  	 * If LRO is enabled, check that the maximum aggregated packet
>  	 * size is supported by the configured device.
>  	 */
>  	if (dev_conf->rxmode.offloads & DEV_RX_OFFLOAD_TCP_LRO) {
>  		if (dev_conf->rxmode.max_lro_pkt_size == 0)
> -			dev->data->dev_conf.rxmode.max_lro_pkt_size =
> -				dev->data->dev_conf.rxmode.max_rx_pkt_len;
> +			dev->data->dev_conf.rxmode.max_lro_pkt_size = max_rx_pktlen;
>  		ret = eth_dev_check_lro_pkt_size(port_id,
>  				dev->data->dev_conf.rxmode.max_lro_pkt_size,
> -				dev->data->dev_conf.rxmode.max_rx_pkt_len,
> +				max_rx_pktlen,
>  				dev_info.max_lro_pkt_size);
>  		if (ret != 0)
>  			goto rollback;
> @@ -2156,13 +2163,20 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
>  	 * If LRO is enabled, check that the maximum aggregated packet
>  	 * size is supported by the configured device.
>  	 */
> +	/* Get the real Ethernet overhead length */
>  	if (local_conf.offloads & DEV_RX_OFFLOAD_TCP_LRO) {
> +		uint16_t overhead_len;
> +		uint32_t max_rx_pktlen;
> +		int ret;
> +
> +		overhead_len = eth_dev_get_overhead_len(dev_info.max_rx_pktlen,
> +				dev_info.max_mtu);
> +		max_rx_pktlen = dev->data->mtu + overhead_len;
>  		if (dev->data->dev_conf.rxmode.max_lro_pkt_size == 0)
> -			dev->data->dev_conf.rxmode.max_lro_pkt_size =
> -				dev->data->dev_conf.rxmode.max_rx_pkt_len;
> -		int ret = eth_dev_check_lro_pkt_size(port_id,
> +			dev->data->dev_conf.rxmode.max_lro_pkt_size = max_rx_pktlen;
> +		ret = eth_dev_check_lro_pkt_size(port_id,
>  				dev->data->dev_conf.rxmode.max_lro_pkt_size,
> -				dev->data->dev_conf.rxmode.max_rx_pkt_len,
> +				max_rx_pktlen,
>  				dev_info.max_lro_pkt_size);
>  		if (ret != 0)
>  			return ret;
> diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
> index afdc53b674cc..9fba2bd73c84 100644
> --- a/lib/ethdev/rte_ethdev.h
> +++ b/lib/ethdev/rte_ethdev.h
> @@ -416,7 +416,7 @@ enum rte_eth_tx_mq_mode {
>  struct rte_eth_rxmode {
>  	/** The multi-queue packet distribution mode to be used, e.g. RSS. */
>  	enum rte_eth_rx_mq_mode mq_mode;
> -	uint32_t max_rx_pkt_len;  /**< Only used if JUMBO_FRAME enabled. */
> +	uint32_t mtu;  /**< Requested MTU. */
>  	/** Maximum allowed size of LRO aggregated packet. */
>  	uint32_t max_lro_pkt_size;
>  	uint16_t split_hdr_size;  /**< hdr buf size (header_split enabled).*/
> diff --git a/lib/ethdev/rte_ethdev_trace.h b/lib/ethdev/rte_ethdev_trace.h
> index 0036bda7465c..1491c815c312 100644
> --- a/lib/ethdev/rte_ethdev_trace.h
> +++ b/lib/ethdev/rte_ethdev_trace.h
> @@ -28,7 +28,7 @@ RTE_TRACE_POINT(
>  	rte_trace_point_emit_u16(nb_tx_q);
>  	rte_trace_point_emit_u32(dev_conf->link_speeds);
>  	rte_trace_point_emit_u32(dev_conf->rxmode.mq_mode);
> -	rte_trace_point_emit_u32(dev_conf->rxmode.max_rx_pkt_len);
> +	rte_trace_point_emit_u32(dev_conf->rxmode.mtu);
>  	rte_trace_point_emit_u64(dev_conf->rxmode.offloads);
>  	rte_trace_point_emit_u32(dev_conf->txmode.mq_mode);
>  	rte_trace_point_emit_u64(dev_conf->txmode.offloads);
> --
> 2.31.1



More information about the dev mailing list