[v2,01/22] ethdev: fix MTU size exceeds max rx packet length

Message ID 20201217092312.27033-2-stevex.yang@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers
Series fix rx packets dropped issue |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Steve Yang Dec. 17, 2020, 9:22 a.m. UTC
  If max rx packet length is smaller then MTU + Ether overhead, that will
drop all MTU size packets.

Update the MTU size according to the max rx packet and Ether overhead.

Fixes: 59d0ecdbf0e1 ("ethdev: MTU accessors")

Signed-off-by: Steve Yang <stevex.yang@intel.com>
---
 lib/librte_ethdev/rte_ethdev.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)
  

Comments

Andrew Rybchenko Dec. 28, 2020, 2:51 p.m. UTC | #1
On 12/17/20 12:22 PM, Steve Yang wrote:
> If max rx packet length is smaller then MTU + Ether overhead, that will
> drop all MTU size packets.
> 
> Update the MTU size according to the max rx packet and Ether overhead.
> 
> Fixes: 59d0ecdbf0e1 ("ethdev: MTU accessors")
> 
> Signed-off-by: Steve Yang <stevex.yang@intel.com>
> ---
>  lib/librte_ethdev/rte_ethdev.c | 21 ++++++++++++++++++---
>  1 file changed, 18 insertions(+), 3 deletions(-)
> 
> diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c
> index 17ddacc78d..ff6a1e675f 100644
> --- a/lib/librte_ethdev/rte_ethdev.c
> +++ b/lib/librte_ethdev/rte_ethdev.c
> @@ -1292,6 +1292,7 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
>  	struct rte_eth_dev *dev;
>  	struct rte_eth_dev_info dev_info;
>  	struct rte_eth_conf orig_conf;
> +	uint16_t overhead_len;
>  	int diag;
>  	int ret;
>  
> @@ -1323,6 +1324,15 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
>  	if (ret != 0)
>  		goto rollback;
>  
> +	/* Get the real Ethernet overhead length */
> +	if (dev_info.max_mtu &&

First of all I'm not sure that we need to handle 0 value
separately. Or it should be checked separately and trigger
an error since it is a driver mis-behaviour.

If kept, it should be compared vs 0 explicitly in accordance
with DPDK coding style.

> +	    dev_info.max_mtu != UINT16_MAX &&
> +	    dev_info.max_rx_pktlen &&

It should be compared vs 0 explicitly in accordance
with DPDK coding style.

> +	    dev_info.max_rx_pktlen > dev_info.max_mtu)
> +		overhead_len = dev_info.max_rx_pktlen - dev_info.max_mtu;
> +	else
> +		overhead_len = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
> +
>  	/* If number of queues specified by application for both Rx and Tx is
>  	 * zero, use driver preferred values. This cannot be done individually
>  	 * as it is valid for either Tx or Rx (but not both) to be zero.
> @@ -1410,13 +1420,18 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
>  			goto rollback;
>  		}
>  	} else {
> -		if (dev_conf->rxmode.max_rx_pkt_len < RTE_ETHER_MIN_LEN ||
> -			dev_conf->rxmode.max_rx_pkt_len > RTE_ETHER_MAX_LEN)
> +		uint16_t pktlen = dev_conf->rxmode.max_rx_pkt_len;
> +		if (pktlen < RTE_ETHER_MIN_MTU + overhead_len ||
> +			pktlen > RTE_ETHER_MTU + overhead_len)

Alignment looks misleading. Either two tabs or just 4 spaces.

>  			/* Use default value */
>  			dev->data->dev_conf.rxmode.max_rx_pkt_len =
> -							RTE_ETHER_MAX_LEN;
> +						RTE_ETHER_MTU + overhead_len;
>  	}
>  
> +	/* Scale the MTU size to adapt max_rx_pkt_len */
> +	dev->data->mtu = dev->data->dev_conf.rxmode.max_rx_pkt_len -
> +				overhead_len;
> +

Is it expected side effect that re-configure always resets
previously set MTU. I.e.:
   configure -> set_mtu -> start -> stop -> re-configure
and set MTU is lost.

>  	/*
>  	 * If LRO is enabled, check that the maximum aggregated packet
>  	 * size is supported by the configured device.
>
  
Lijun Ou Dec. 30, 2020, 10:19 a.m. UTC | #2
在 2020/12/17 17:22, Steve Yang 写道:
> If max rx packet length is smaller then MTU + Ether overhead, that will
> drop all MTU size packets.
> 
> Update the MTU size according to the max rx packet and Ether overhead.
> 
> Fixes: 59d0ecdbf0e1 ("ethdev: MTU accessors")
> 
> Signed-off-by: Steve Yang <stevex.yang@intel.com>
> ---
>   lib/librte_ethdev/rte_ethdev.c | 21 ++++++++++++++++++---
>   1 file changed, 18 insertions(+), 3 deletions(-)
> 
> diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c
> index 17ddacc78d..ff6a1e675f 100644
> --- a/lib/librte_ethdev/rte_ethdev.c
> +++ b/lib/librte_ethdev/rte_ethdev.c
> @@ -1292,6 +1292,7 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
>   	struct rte_eth_dev *dev;
>   	struct rte_eth_dev_info dev_info;
>   	struct rte_eth_conf orig_conf;
> +	uint16_t overhead_len;
>   	int diag;
>   	int ret;
>   
> @@ -1323,6 +1324,15 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
>   	if (ret != 0)
>   		goto rollback;
>   
> +	/* Get the real Ethernet overhead length */
> +	if (dev_info.max_mtu &&
> +	    dev_info.max_mtu != UINT16_MAX &&
> +	    dev_info.max_rx_pktlen &&
> +	    dev_info.max_rx_pktlen > dev_info.max_mtu)
> +		overhead_len = dev_info.max_rx_pktlen - dev_info.max_mtu;
> +	else
> +		overhead_len = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
> +
>   	/* If number of queues specified by application for both Rx and Tx is
>   	 * zero, use driver preferred values. This cannot be done individually
>   	 * as it is valid for either Tx or Rx (but not both) to be zero.
> @@ -1410,13 +1420,18 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
>   			goto rollback;
>   		}
>   	} else {
> -		if (dev_conf->rxmode.max_rx_pkt_len < RTE_ETHER_MIN_LEN ||
> -			dev_conf->rxmode.max_rx_pkt_len > RTE_ETHER_MAX_LEN)
> +		uint16_t pktlen = dev_conf->rxmode.max_rx_pkt_len;
> +		if (pktlen < RTE_ETHER_MIN_MTU + overhead_len ||
> +			pktlen > RTE_ETHER_MTU + overhead_len)
>   			/* Use default value */
>   			dev->data->dev_conf.rxmode.max_rx_pkt_len =
> -							RTE_ETHER_MAX_LEN;
> +						RTE_ETHER_MTU + overhead_len;
>   	}
>   
> +	/* Scale the MTU size to adapt max_rx_pkt_len */
> +	dev->data->mtu = dev->data->dev_conf.rxmode.max_rx_pkt_len -
> +				overhead_len;
> +
Hi
   I think the dev->data->mtu should be updated after configured 
success. So the update in this position seems unreasonable. 
'max_rx_pkt_len' is only used when jumbo_frame enabled, as follows:
struct rte_eth_rxmode {
	.....
	uint32_t max_rx_pkt_len;  /**< Only used if JUMBO_FRAME enabled. */
	/** Maximum allowed size of LRO aggregated packet. */
	....};

So If DEV_RX_OFFLOAD_JUMBO_FRAME is set to rxmode.offload, driver should 
configure mtu to hardware according to 'max_rx_pkt_len' and update 
dev->data->mtu. This seems more reasonable. And some PMD drivers are 
already doing this.

In addition, validity check for 'max_rx_pkt_len' in 
rte_eth_dev_configure API may be error. It should be greater than 
'RTE_ETHER_MTU + overhead_len'.
Because driver must enable DEV_RX_OFFLOAD_JUMBO_FRAME when user set mtu 
with greater than 1500 by 'rte_eth_dev_set_mtu' API.

What do you think?

Thanks
Lijun Ou

>   	/*
>   	 * If LRO is enabled, check that the maximum aggregated packet
>   	 * size is supported by the configured device.
>
  
Ferruh Yigit Jan. 13, 2021, 10:25 a.m. UTC | #3
On 1/6/2021 3:36 AM, Yang, SteveX wrote:
> Hi Lijun,
> 
> Thanks for your review. Please check my comments inline.
> Regards,
> Steve Yang.
> 
>> -----Original Message-----
>> From: oulijun <oulijun@huawei.com>
>> Sent: Wednesday, December 30, 2020 6:20 PM
>> To: Yang, SteveX <stevex.yang@intel.com>; dev@dpdk.org
>> Cc: Lu, Wenzhuo <wenzhuo.lu@intel.com>; Xing, Beilei
>> <beilei.xing@intel.com>; Iremonger, Bernard
>> <bernard.iremonger@intel.com>; asomalap@amd.com;
>> rahul.lakkireddy@chelsio.com; hemant.agrawal@nxp.com;
>> sachin.saxena@oss.nxp.com; Guo, Jia <jia.guo@intel.com>; Wang, Haiyue
>> <haiyue.wang@intel.com>; g.singh@nxp.com; xuanziyang2@huawei.com;
>> cloud.wangxiaoyun@huawei.com; zhouguoyang@huawei.com;
>> xavier.huwei@huawei.com; humin29@huawei.com;
>> yisen.zhuang@huawei.com; Wu, Jingjing <jingjing.wu@intel.com>; Yang,
>> Qiming <qiming.yang@intel.com>; Zhang, Qi Z <qi.z.zhang@intel.com>; Xu,
>> Rosen <rosen.xu@intel.com>; sthotton@marvell.com;
>> srinivasan@marvell.com; heinrich.kuhn@netronome.com;
>> hkalra@marvell.com; jerinj@marvell.com; ndabilpuram@marvell.com;
>> kirankumark@marvell.com; rmody@marvell.com; shshaikh@marvell.com;
>> andrew.rybchenko@oktetlabs.ru; mczekaj@marvell.com;
>> thomas@monjalon.net; Yigit, Ferruh <ferruh.yigit@intel.com>;
>> ivan.boule@6wind.com; Ananyev, Konstantin
>> <konstantin.ananyev@intel.com>; samuel.gauthier@6wind.com;
>> david.marchand@6wind.com; shahafs@mellanox.com;
>> stephen@networkplumber.org; maxime.coquelin@redhat.com;
>> olivier.matz@6wind.com; lihuisong@huawei.com; shreyansh.jain@nxp.com;
>> wei.dai@intel.com; fengchunsong@huawei.com; chenhao164@huawei.com;
>> tangchengchang@hisilicon.com; Zhang, Helin <helin.zhang@intel.com>;
>> yanglong.wu@intel.com; xiaolong.ye@intel.com; Xu, Ting
>> <ting.xu@intel.com>; Li, Xiaoyun <xiaoyun.li@intel.com>; Wei, Dan
>> <dan.wei@intel.com>; Pei, Andy <andy.pei@intel.com>;
>> vattunuru@marvell.com; skori@marvell.com; sony.chacko@qlogic.com;
>> Richardson, Bruce <bruce.richardson@intel.com>; ivan.malov@oktetlabs.ru;
>> rad@semihalf.com; slawomir.rosek@semihalf.com;
>> kamil.rytarowski@caviumnetworks.com; Zhao1, Wei <wei.zhao1@intel.com>;
>> Jiang, JunyuX <junyux.jiang@intel.com>; kumaras@chelsio.com;
>> girish.nandibasappa@amd.com; rolf.neugebauer@netronome.com;
>> alejandro.lucero@netronome.com
>> Subject: Re: [PATCH v2 01/22] ethdev: fix MTU size exceeds max rx packet
>> length
>>
>>
>>
>> 在 2020/12/17 17:22, Steve Yang 写道:
>>> If max rx packet length is smaller then MTU + Ether overhead, that
>>> will drop all MTU size packets.
>>>
>>> Update the MTU size according to the max rx packet and Ether overhead.
>>>
>>> Fixes: 59d0ecdbf0e1 ("ethdev: MTU accessors")
>>>
>>> Signed-off-by: Steve Yang <stevex.yang@intel.com>
>>> ---
>>>    lib/librte_ethdev/rte_ethdev.c | 21 ++++++++++++++++++---
>>>    1 file changed, 18 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/lib/librte_ethdev/rte_ethdev.c
>>> b/lib/librte_ethdev/rte_ethdev.c index 17ddacc78d..ff6a1e675f 100644
>>> --- a/lib/librte_ethdev/rte_ethdev.c
>>> +++ b/lib/librte_ethdev/rte_ethdev.c
>>> @@ -1292,6 +1292,7 @@ rte_eth_dev_configure(uint16_t port_id,
>> uint16_t nb_rx_q, uint16_t nb_tx_q,
>>>    struct rte_eth_dev *dev;
>>>    struct rte_eth_dev_info dev_info;
>>>    struct rte_eth_conf orig_conf;
>>> +uint16_t overhead_len;
>>>    int diag;
>>>    int ret;
>>>
>>> @@ -1323,6 +1324,15 @@ rte_eth_dev_configure(uint16_t port_id,
>> uint16_t nb_rx_q, uint16_t nb_tx_q,
>>>    if (ret != 0)
>>>    goto rollback;
>>>
>>> +/* Get the real Ethernet overhead length */
>>> +if (dev_info.max_mtu &&
>>> +    dev_info.max_mtu != UINT16_MAX &&
>>> +    dev_info.max_rx_pktlen &&
>>> +    dev_info.max_rx_pktlen > dev_info.max_mtu)
>>> +overhead_len = dev_info.max_rx_pktlen -
>> dev_info.max_mtu;
>>> +else
>>> +overhead_len = RTE_ETHER_HDR_LEN +
>> RTE_ETHER_CRC_LEN;
>>> +
>>>    /* If number of queues specified by application for both Rx and Tx is
>>>     * zero, use driver preferred values. This cannot be done individually
>>>     * as it is valid for either Tx or Rx (but not both) to be zero.
>>> @@ -1410,13 +1420,18 @@ rte_eth_dev_configure(uint16_t port_id,
>> uint16_t nb_rx_q, uint16_t nb_tx_q,
>>>    goto rollback;
>>>    }
>>>    } else {
>>> -if (dev_conf->rxmode.max_rx_pkt_len <
>> RTE_ETHER_MIN_LEN ||
>>> -dev_conf->rxmode.max_rx_pkt_len >
>> RTE_ETHER_MAX_LEN)
>>> +uint16_t pktlen = dev_conf->rxmode.max_rx_pkt_len;
>>> +if (pktlen < RTE_ETHER_MIN_MTU + overhead_len ||
>>> +pktlen > RTE_ETHER_MTU + overhead_len)
>>>    /* Use default value */
>>>    dev->data->dev_conf.rxmode.max_rx_pkt_len =
>>> -
>> RTE_ETHER_MAX_LEN;
>>> +RTE_ETHER_MTU +
>> overhead_len;
>>>    }
>>>
>>> +/* Scale the MTU size to adapt max_rx_pkt_len */
>>> +dev->data->mtu = dev->data->dev_conf.rxmode.max_rx_pkt_len -
>>> +overhead_len;
>>> +
>> Hi
>>     I think the dev->data->mtu should be updated after configured success. So
>> the update in this position seems unreasonable.
> 
> Good suggestion, I've checked some PMDs, and the corresponding assignments are existed within their 'dev_configure_ops'.
> For example: [bnxt_ethdev.c # 1100]
> ```
> if (rx_offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
> eth_dev->data->mtu =
> eth_dev->data->dev_conf.rxmode.max_rx_pkt_len -
> RTE_ETHER_HDR_LEN - RTE_ETHER_CRC_LEN - VLAN_TAG_SIZE *
> BNXT_NUM_VLANS;
> bnxt_mtu_set_op(eth_dev, eth_dev->data->mtu);
> }
> ```
> Hence, I will move this 'mtu' assignment to line after '(*dev->dev_ops->dev_configure)(dev)'.

There is a 'rollback' already for the similar reason.

What do you think to store the old MTU and restore it in the rollback if needed? 
So you don't need to change where MTU set.

> Actually, it doesn't matter if it is the JUMBO frame, the 'mtu' must keep pace with 'max_rx_pkt_len' anytime.
> E.g.: if 'mtu' is 1500, and 'max_rx_pkt_len' is set to 1510 by command line, that 'mtu' must be reduced to '1510 - 18 = 1492' in 'dev_configure' phase, even though it is not a Jumbo frame.
> 

According to the API definition:
  max_rx_pkt_len;  /**< Only used if JUMBO_FRAME enabled. */

The concern was removing this check from the ethdev may break some PMDs that 
does not follow the API and use the 'max_rx_pkt_len' even if JUMBO frame offload 
set.

For this release, we can afford to break the PMDs implementing wrong and fix 
them after testing.

>> 'max_rx_pkt_len' is only used when jumbo_frame enabled, as follows:
>> struct rte_eth_rxmode {
>> .....
>> uint32_t max_rx_pkt_len;  /**< Only used if JUMBO_FRAME
>> enabled. */
>> /** Maximum allowed size of LRO aggregated packet. */
>> ....};
>>
>> So If DEV_RX_OFFLOAD_JUMBO_FRAME is set to rxmode.offload, driver
>> should configure mtu to hardware according to 'max_rx_pkt_len' and update
>> dev->data->mtu. This seems more reasonable. And some PMD drivers are
>> already doing this.
> 
>>
>> In addition, validity check for 'max_rx_pkt_len' in
>> rte_eth_dev_configure API may be error. It should be greater than
>> 'RTE_ETHER_MTU + overhead_len'.
>> Because driver must enable DEV_RX_OFFLOAD_JUMBO_FRAME when user
>> set mtu
>> with greater than 1500 by 'rte_eth_dev_set_mtu' API.
>>
> 
> I'm not sure if it is following validity check you mentioned.
>>> +if (pktlen < RTE_ETHER_MIN_MTU + overhead_len ||
>>> +pktlen > RTE_ETHER_MTU + overhead_len)
> If yes, no issue here, because the 'rte_eth_dev_configure' is only used for initializing device,
> and just gives out an initial 'max_rx_pkt_len' value by default. Once user tries to change mtu via 'set mtu',
> the 'max_rx_pkt_len' will be updated to expected value in the 'mtu_set' ops.
> Another aspect, that is the 'disable DEV_RX_OFFLOAD_JUMBO' branch, 'max_rx_pkt_len' must be limited to
> effective max value for non-Jumbo frame.
> 

In the 'disable DEV_RX_OFFLOAD_JUMBO' branch, 'max_rx_pkt_len' should be invalid 
according API doc as mentioned above, I guess Lijun refers to this.

Overall what about updating as following, in 'rte_eth_dev_configure()':

if (offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
   // max_rx_pkt_len checks
   old_mtu = mtu
   mtu = max_rx_pkt_len - overhead_len
}

...

rollback:
  mtu = old_mtu;


>> What do you think?
>>
>> Thanks
>> Lijun Ou
>>
>>>    /*
>>>     * If LRO is enabled, check that the maximum aggregated packet
>>>     * size is supported by the configured device.
>>>
  
Ferruh Yigit Jan. 13, 2021, 10:32 a.m. UTC | #4
On 12/28/2020 2:51 PM, Andrew Rybchenko wrote:
> On 12/17/20 12:22 PM, Steve Yang wrote:
>> If max rx packet length is smaller then MTU + Ether overhead, that will
>> drop all MTU size packets.
>>
>> Update the MTU size according to the max rx packet and Ether overhead.
>>
>> Fixes: 59d0ecdbf0e1 ("ethdev: MTU accessors")
>>
>> Signed-off-by: Steve Yang <stevex.yang@intel.com>
>> ---
>>   lib/librte_ethdev/rte_ethdev.c | 21 ++++++++++++++++++---
>>   1 file changed, 18 insertions(+), 3 deletions(-)
>>
>> diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c
>> index 17ddacc78d..ff6a1e675f 100644
>> --- a/lib/librte_ethdev/rte_ethdev.c
>> +++ b/lib/librte_ethdev/rte_ethdev.c
>> @@ -1292,6 +1292,7 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
>>   	struct rte_eth_dev *dev;
>>   	struct rte_eth_dev_info dev_info;
>>   	struct rte_eth_conf orig_conf;
>> +	uint16_t overhead_len;
>>   	int diag;
>>   	int ret;
>>   
>> @@ -1323,6 +1324,15 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
>>   	if (ret != 0)
>>   		goto rollback;
>>   
>> +	/* Get the real Ethernet overhead length */
>> +	if (dev_info.max_mtu &&
> 
> First of all I'm not sure that we need to handle 0 value
> separately. Or it should be checked separately and trigger
> an error since it is a driver mis-behaviour.
> 

Agree. Most probably we can drop it, "dev_info.max_mtu != UINT16_MAX" covers the 
case driver doesn't provide any value.

> If kept, it should be compared vs 0 explicitly in accordance
> with DPDK coding style.
> 
>> +	    dev_info.max_mtu != UINT16_MAX &&
>> +	    dev_info.max_rx_pktlen &&
> 
> It should be compared vs 0 explicitly in accordance
> with DPDK coding style.
> 
>> +	    dev_info.max_rx_pktlen > dev_info.max_mtu)
>> +		overhead_len = dev_info.max_rx_pktlen - dev_info.max_mtu;
>> +	else
>> +		overhead_len = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
>> +
>>   	/* If number of queues specified by application for both Rx and Tx is
>>   	 * zero, use driver preferred values. This cannot be done individually
>>   	 * as it is valid for either Tx or Rx (but not both) to be zero.
>> @@ -1410,13 +1420,18 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
>>   			goto rollback;
>>   		}
>>   	} else {
>> -		if (dev_conf->rxmode.max_rx_pkt_len < RTE_ETHER_MIN_LEN ||
>> -			dev_conf->rxmode.max_rx_pkt_len > RTE_ETHER_MAX_LEN)
>> +		uint16_t pktlen = dev_conf->rxmode.max_rx_pkt_len;
>> +		if (pktlen < RTE_ETHER_MIN_MTU + overhead_len ||
>> +			pktlen > RTE_ETHER_MTU + overhead_len)
> 
> Alignment looks misleading. Either two tabs or just 4 spaces.
> 
>>   			/* Use default value */
>>   			dev->data->dev_conf.rxmode.max_rx_pkt_len =
>> -							RTE_ETHER_MAX_LEN;
>> +						RTE_ETHER_MTU + overhead_len;
>>   	}
>>   
>> +	/* Scale the MTU size to adapt max_rx_pkt_len */
>> +	dev->data->mtu = dev->data->dev_conf.rxmode.max_rx_pkt_len -
>> +				overhead_len;
>> +
> 
> Is it expected side effect that re-configure always resets
> previously set MTU. I.e.:
>     configure -> set_mtu -> start -> stop -> re-configure
> and set MTU is lost.
> 

This is the problem of two APIs updating same/related values, when device 
re-configure with a given 'max_rx_pkt_len', can we know if the intentions is 
update to the value to new provided 'max_rx_pkt_len' or not?

For this case if user want to keep the MTU value, can read the MTU from device 
first and set 'max_rx_pkt_len' according it.

And we can reduce to updating the MTU in the configure() only when JUMBO frame 
offload is requested, that should be when the 'max_rx_pkt_len' is valid only.

>>   	/*
>>   	 * If LRO is enabled, check that the maximum aggregated packet
>>   	 * size is supported by the configured device.
>>
>
  
Ferruh Yigit Jan. 13, 2021, 11:04 a.m. UTC | #5
On 1/6/2021 3:36 AM, Yang, SteveX wrote:

<...>

>>> If max rx packet length is smaller then MTU + Ether overhead, that
>>> will drop all MTU size packets.
>>>
>>> Update the MTU size according to the max rx packet and Ether overhead.
>>>

Can you please elaborate the explanation a little more, it is hard to understand 
the problem with above description.

Problem is:
"
Ethdev is using default Ethernet overhead to decide if provided 'max_rx_pkt_len' 
value is bigger than max (non jumbo) MTU value, and limits it to MAX if it is.

Since the application/driver used Ethernet overhead is different than the ethdev 
one, check result is wrong.

If the driver is using Ethernet overhead bigger than the default one, the 
provided 'max_rx_pkt_len' is trimmed down, and in the driver when correct 
Ethernet overhead is used to convert back, the resulting MTU is less than the 
intended one, causing some packets to be dropped.

Like,
app     -> max_rx_pkt_len = 1500/*mtu*/ + 22/*overhead*/ = 1522
ethdev  -> 1522 > 1518/*MAX*/; max_rx_pkt_len = 1518
driver  -> MTU = 1518 - 22 = 1496
Packets with size 1497-1500 are dropped although intention is to be able to 
send/receive them.

The fix is to make ethdev use the correct Ethernet overhead for port, instead of 
default one.
"

Addition to above, the code reviews suggest slight difference, like dropping the 
ethdev check completely for non-jumbo packets, please reflect them to the commit 
log if changes are implemented.

>>> Fixes: 59d0ecdbf0e1 ("ethdev: MTU accessors")
>>>
>>> Signed-off-by: Steve Yang <stevex.yang@intel.com>
>>> ---
  

Patch

diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c
index 17ddacc78d..ff6a1e675f 100644
--- a/lib/librte_ethdev/rte_ethdev.c
+++ b/lib/librte_ethdev/rte_ethdev.c
@@ -1292,6 +1292,7 @@  rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 	struct rte_eth_dev *dev;
 	struct rte_eth_dev_info dev_info;
 	struct rte_eth_conf orig_conf;
+	uint16_t overhead_len;
 	int diag;
 	int ret;
 
@@ -1323,6 +1324,15 @@  rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 	if (ret != 0)
 		goto rollback;
 
+	/* Get the real Ethernet overhead length */
+	if (dev_info.max_mtu &&
+	    dev_info.max_mtu != UINT16_MAX &&
+	    dev_info.max_rx_pktlen &&
+	    dev_info.max_rx_pktlen > dev_info.max_mtu)
+		overhead_len = dev_info.max_rx_pktlen - dev_info.max_mtu;
+	else
+		overhead_len = RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
+
 	/* If number of queues specified by application for both Rx and Tx is
 	 * zero, use driver preferred values. This cannot be done individually
 	 * as it is valid for either Tx or Rx (but not both) to be zero.
@@ -1410,13 +1420,18 @@  rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
 			goto rollback;
 		}
 	} else {
-		if (dev_conf->rxmode.max_rx_pkt_len < RTE_ETHER_MIN_LEN ||
-			dev_conf->rxmode.max_rx_pkt_len > RTE_ETHER_MAX_LEN)
+		uint16_t pktlen = dev_conf->rxmode.max_rx_pkt_len;
+		if (pktlen < RTE_ETHER_MIN_MTU + overhead_len ||
+			pktlen > RTE_ETHER_MTU + overhead_len)
 			/* Use default value */
 			dev->data->dev_conf.rxmode.max_rx_pkt_len =
-							RTE_ETHER_MAX_LEN;
+						RTE_ETHER_MTU + overhead_len;
 	}
 
+	/* Scale the MTU size to adapt max_rx_pkt_len */
+	dev->data->mtu = dev->data->dev_conf.rxmode.max_rx_pkt_len -
+				overhead_len;
+
 	/*
 	 * If LRO is enabled, check that the maximum aggregated packet
 	 * size is supported by the configured device.