[v3,3/5] net/tap: fix check for mbuf's nb_segs failure

Message ID 1586233383-1084-1-git-send-email-wangyunjian@huawei.com (mailing list archive)
State Changes Requested, archived
Delegated to: Ferruh Yigit
Headers
Series fixes for tap |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation success Compilation OK

Commit Message

Yunjian Wang April 7, 2020, 4:23 a.m. UTC
  From: Yunjian Wang <wangyunjian@huawei.com>

Now the rxq->pool is mbuf concatenation, But its nb_segs is 1.
When do some sanity checks on the mbuf, it fails.

Fixes: 0781f5762cfe ("net/tap: support segmented mbufs")
CC: stable@dpdk.org

Signed-off-by: Yunjian Wang <wangyunjian@huawei.com>
---
 drivers/net/tap/rte_eth_tap.c | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)
  

Comments

Ferruh Yigit April 7, 2020, 3:15 p.m. UTC | #1
On 4/7/2020 5:23 AM, wangyunjian wrote:
> From: Yunjian Wang <wangyunjian@huawei.com>
> 
> Now the rxq->pool is mbuf concatenation, But its nb_segs is 1.
> When do some sanity checks on the mbuf, it fails.

+1, 'rxq->pool' seems Rx ring representation as linked mbufs and empty ones has
'nb_segs' values as 1.

> 
> Fixes: 0781f5762cfe ("net/tap: support segmented mbufs")
> CC: stable@dpdk.org
> 
> Signed-off-by: Yunjian Wang <wangyunjian@huawei.com>
> ---
>  drivers/net/tap/rte_eth_tap.c | 27 ++++++++++++++++++++++-----
>  1 file changed, 22 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
> index a9ba0ca68..703fcceb9 100644
> --- a/drivers/net/tap/rte_eth_tap.c
> +++ b/drivers/net/tap/rte_eth_tap.c
> @@ -339,6 +339,23 @@ tap_rx_offload_get_queue_capa(void)
>  	       DEV_RX_OFFLOAD_TCP_CKSUM;
>  }
>  
> +static void
> +tap_rxq_pool_free(struct rte_mbuf *pool)
> +{
> +	struct rte_mbuf *mbuf = pool;
> +	uint16_t nb_segs = 1;
> +
> +	if (mbuf == NULL)
> +		return;
> +
> +	while (mbuf->next) {
> +		mbuf = mbuf->next;
> +		nb_segs++;
> +	}
> +	pool->nb_segs = nb_segs;
> +	rte_pktmbuf_free(pool);
> +}

Since you are already iterating the chain, why not free immediately instead of
calculating the nb_segs and making API go through the chain again, what about
following:

tap_rxq_pool_free(struct rte_mbuf *pool)
{
    struct rte_mbuf *next;







    while (pool) {



         next = pool->next;



         rte_pktmbuf_free(pool);



         pool = next;



    }
}

> +
>  /* Callback to handle the rx burst of packets to the correct interface and
>   * file descriptor(s) in a multi-queue setup.
>   */
> @@ -389,7 +406,7 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>  					goto end;
>  
>  				seg->next = NULL;
> -				rte_pktmbuf_free(mbuf);
> +				tap_rxq_pool_free(mbuf);

As far as I can see 'mbuf' should have correct 'nb_segs' value, and it can
continue to use 'rte_pktmbuf_free()'. If you can observe the problem can you
please try this?

>  
>  				goto end;
>  			}
> @@ -1033,7 +1050,7 @@ tap_dev_close(struct rte_eth_dev *dev)
>  			rxq = &internals->rxq[i];
>  			close(process_private->rxq_fds[i]);
>  			process_private->rxq_fds[i] = -1;
> -			rte_pktmbuf_free(rxq->pool);
> +			tap_rxq_pool_free(rxq->pool);
>  			rte_free(rxq->iovecs);
>  			rxq->pool = NULL;
>  			rxq->iovecs = NULL;
> @@ -1072,7 +1089,7 @@ tap_rx_queue_release(void *queue)
>  	if (process_private->rxq_fds[rxq->queue_id] > 0) {
>  		close(process_private->rxq_fds[rxq->queue_id]);
>  		process_private->rxq_fds[rxq->queue_id] = -1;
> -		rte_pktmbuf_free(rxq->pool);
> +		tap_rxq_pool_free(rxq->pool);
>  		rte_free(rxq->iovecs);
>  		rxq->pool = NULL;
>  		rxq->iovecs = NULL;
> @@ -1480,7 +1497,7 @@ tap_rx_queue_setup(struct rte_eth_dev *dev,
>  	return 0;
>  
>  error:
> -	rte_pktmbuf_free(rxq->pool);
> +	tap_rxq_pool_free(rxq->pool);
>  	rxq->pool = NULL;
>  	rte_free(rxq->iovecs);
>  	rxq->iovecs = NULL;
> @@ -2435,7 +2452,7 @@ rte_pmd_tap_remove(struct rte_vdev_device *dev)
>  			rxq = &internals->rxq[i];
>  			close(process_private->rxq_fds[i]);
>  			process_private->rxq_fds[i] = -1;
> -			rte_pktmbuf_free(rxq->pool);
> +			tap_rxq_pool_free(rxq->pool);
>  			rte_free(rxq->iovecs);
>  			rxq->pool = NULL;
>  			rxq->iovecs = NULL;
>
  
Stephen Hemminger April 7, 2020, 3:38 p.m. UTC | #2
On Tue, 7 Apr 2020 16:15:16 +0100
Ferruh Yigit <ferruh.yigit@intel.com> wrote:

> > +static void
> > +tap_rxq_pool_free(struct rte_mbuf *pool)
> > +{
> > +	struct rte_mbuf *mbuf = pool;
> > +	uint16_t nb_segs = 1;
> > +
> > +	if (mbuf == NULL)
> > +		return;
> > +
> > +	while (mbuf->next) {
> > +		mbuf = mbuf->next;
> > +		nb_segs++;
> > +	}
> > +	pool->nb_segs = nb_segs;
> > +	rte_pktmbuf_free(pool);
> > +}  

Since mbuf is going to be free, why bother with nb_segs.
Since rte_pktmbuf_free takes NULL as an argument, and frees the m->next chain
I don't see why not just
	rte_pktmbuf_free(pool)
  
Ferruh Yigit April 7, 2020, 3:45 p.m. UTC | #3
On 4/7/2020 4:38 PM, Stephen Hemminger wrote:
> On Tue, 7 Apr 2020 16:15:16 +0100
> Ferruh Yigit <ferruh.yigit@intel.com> wrote:
> 
>>> +static void
>>> +tap_rxq_pool_free(struct rte_mbuf *pool)
>>> +{
>>> +	struct rte_mbuf *mbuf = pool;
>>> +	uint16_t nb_segs = 1;
>>> +
>>> +	if (mbuf == NULL)
>>> +		return;
>>> +
>>> +	while (mbuf->next) {
>>> +		mbuf = mbuf->next;
>>> +		nb_segs++;
>>> +	}
>>> +	pool->nb_segs = nb_segs;
>>> +	rte_pktmbuf_free(pool);
>>> +}  
> 
> Since mbuf is going to be free, why bother with nb_segs.
> Since rte_pktmbuf_free takes NULL as an argument, and frees the m->next chain
> I don't see why not just
> 	rte_pktmbuf_free(pool)
> 

Chain is not constructed properly, 'nb_segs' is wrong, only 'rte_pktmbuf_free()'
call won't free all the chain but first mbuf.

This implementation is fixing 'nb_segs' sot that 'rte_pktmbuf_free()' can work
as you suggested.

Or I suggest iterate the list and fix all mbufs, instead of fixing 'nb_segs',
this may be one iteration less.
  
Ferruh Yigit April 7, 2020, 3:49 p.m. UTC | #4
On 4/7/2020 4:45 PM, Ferruh Yigit wrote:
> On 4/7/2020 4:38 PM, Stephen Hemminger wrote:
>> On Tue, 7 Apr 2020 16:15:16 +0100
>> Ferruh Yigit <ferruh.yigit@intel.com> wrote:
>>
>>>> +static void
>>>> +tap_rxq_pool_free(struct rte_mbuf *pool)
>>>> +{
>>>> +	struct rte_mbuf *mbuf = pool;
>>>> +	uint16_t nb_segs = 1;
>>>> +
>>>> +	if (mbuf == NULL)
>>>> +		return;
>>>> +
>>>> +	while (mbuf->next) {
>>>> +		mbuf = mbuf->next;
>>>> +		nb_segs++;
>>>> +	}
>>>> +	pool->nb_segs = nb_segs;
>>>> +	rte_pktmbuf_free(pool);
>>>> +}  
>>
>> Since mbuf is going to be free, why bother with nb_segs.
>> Since rte_pktmbuf_free takes NULL as an argument, and frees the m->next chain
>> I don't see why not just
>> 	rte_pktmbuf_free(pool)
>>
> 
> Chain is not constructed properly, 'nb_segs' is wrong, only 'rte_pktmbuf_free()'
> call won't free all the chain but first mbuf.

No, my bad. It will free all and will work as expected. But this is to fix the
mbuf sanity checks when they are enabled.

> 
> This implementation is fixing 'nb_segs' sot that 'rte_pktmbuf_free()' can work
> as you suggested.
> 
> Or I suggest iterate the list and fix all mbufs, instead of fixing 'nb_segs',
> this may be one iteration less.
>
  
Ferruh Yigit April 7, 2020, 3:58 p.m. UTC | #5
On 4/7/2020 4:15 PM, Ferruh Yigit wrote:
> On 4/7/2020 5:23 AM, wangyunjian wrote:
>> From: Yunjian Wang <wangyunjian@huawei.com>
>>
>> Now the rxq->pool is mbuf concatenation, But its nb_segs is 1.
>> When do some sanity checks on the mbuf, it fails.
> 
> +1, 'rxq->pool' seems Rx ring representation as linked mbufs and empty ones has
> 'nb_segs' values as 1.
> 
>>
>> Fixes: 0781f5762cfe ("net/tap: support segmented mbufs")
>> CC: stable@dpdk.org
>>
>> Signed-off-by: Yunjian Wang <wangyunjian@huawei.com>
>> ---
>>  drivers/net/tap/rte_eth_tap.c | 27 ++++++++++++++++++++++-----
>>  1 file changed, 22 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
>> index a9ba0ca68..703fcceb9 100644
>> --- a/drivers/net/tap/rte_eth_tap.c
>> +++ b/drivers/net/tap/rte_eth_tap.c
>> @@ -339,6 +339,23 @@ tap_rx_offload_get_queue_capa(void)
>>  	       DEV_RX_OFFLOAD_TCP_CKSUM;
>>  }
>>  
>> +static void
>> +tap_rxq_pool_free(struct rte_mbuf *pool)
>> +{
>> +	struct rte_mbuf *mbuf = pool;
>> +	uint16_t nb_segs = 1;
>> +
>> +	if (mbuf == NULL)
>> +		return;
>> +
>> +	while (mbuf->next) {
>> +		mbuf = mbuf->next;
>> +		nb_segs++;
>> +	}
>> +	pool->nb_segs = nb_segs;
>> +	rte_pktmbuf_free(pool);
>> +}
> 
> Since you are already iterating the chain, why not free immediately instead of
> calculating the nb_segs and making API go through the chain again, what about
> following:
> 
> tap_rxq_pool_free(struct rte_mbuf *pool)
> {
>     struct rte_mbuf *next;
>     while (pool) {
>          next = pool->next;
>          rte_pktmbuf_free(pool);
>          pool = next;
>     }
> }

Ignore this please, this may be still complaining in mbuf sanity check, so OK to
your usage.

> 
>> +
>>  /* Callback to handle the rx burst of packets to the correct interface and
>>   * file descriptor(s) in a multi-queue setup.
>>   */
>> @@ -389,7 +406,7 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>>  					goto end;
>>  
>>  				seg->next = NULL;
>> -				rte_pktmbuf_free(mbuf);
>> +				tap_rxq_pool_free(mbuf);
> 
> As far as I can see 'mbuf' should have correct 'nb_segs' value, and it can
> continue to use 'rte_pktmbuf_free()'. If you can observe the problem can you
> please try this?
> 
>>  
>>  				goto end;
>>  			}
>> @@ -1033,7 +1050,7 @@ tap_dev_close(struct rte_eth_dev *dev)
>>  			rxq = &internals->rxq[i];
>>  			close(process_private->rxq_fds[i]);
>>  			process_private->rxq_fds[i] = -1;
>> -			rte_pktmbuf_free(rxq->pool);
>> +			tap_rxq_pool_free(rxq->pool);
>>  			rte_free(rxq->iovecs);
>>  			rxq->pool = NULL;
>>  			rxq->iovecs = NULL;
>> @@ -1072,7 +1089,7 @@ tap_rx_queue_release(void *queue)
>>  	if (process_private->rxq_fds[rxq->queue_id] > 0) {
>>  		close(process_private->rxq_fds[rxq->queue_id]);
>>  		process_private->rxq_fds[rxq->queue_id] = -1;
>> -		rte_pktmbuf_free(rxq->pool);
>> +		tap_rxq_pool_free(rxq->pool);
>>  		rte_free(rxq->iovecs);
>>  		rxq->pool = NULL;
>>  		rxq->iovecs = NULL;
>> @@ -1480,7 +1497,7 @@ tap_rx_queue_setup(struct rte_eth_dev *dev,
>>  	return 0;
>>  
>>  error:
>> -	rte_pktmbuf_free(rxq->pool);
>> +	tap_rxq_pool_free(rxq->pool);
>>  	rxq->pool = NULL;
>>  	rte_free(rxq->iovecs);
>>  	rxq->iovecs = NULL;
>> @@ -2435,7 +2452,7 @@ rte_pmd_tap_remove(struct rte_vdev_device *dev)
>>  			rxq = &internals->rxq[i];
>>  			close(process_private->rxq_fds[i]);
>>  			process_private->rxq_fds[i] = -1;
>> -			rte_pktmbuf_free(rxq->pool);
>> +			tap_rxq_pool_free(rxq->pool);
>>  			rte_free(rxq->iovecs);
>>  			rxq->pool = NULL;
>>  			rxq->iovecs = NULL;
>>
>
  
Stephen Hemminger April 7, 2020, 4:08 p.m. UTC | #6
On Tue, 7 Apr 2020 16:45:59 +0100
Ferruh Yigit <ferruh.yigit@intel.com> wrote:

> On 4/7/2020 4:38 PM, Stephen Hemminger wrote:
> > On Tue, 7 Apr 2020 16:15:16 +0100
> > Ferruh Yigit <ferruh.yigit@intel.com> wrote:
> >   
> >>> +static void
> >>> +tap_rxq_pool_free(struct rte_mbuf *pool)
> >>> +{
> >>> +	struct rte_mbuf *mbuf = pool;
> >>> +	uint16_t nb_segs = 1;
> >>> +
> >>> +	if (mbuf == NULL)
> >>> +		return;
> >>> +
> >>> +	while (mbuf->next) {
> >>> +		mbuf = mbuf->next;
> >>> +		nb_segs++;
> >>> +	}
> >>> +	pool->nb_segs = nb_segs;
> >>> +	rte_pktmbuf_free(pool);
> >>> +}    
> > 
> > Since mbuf is going to be free, why bother with nb_segs.
> > Since rte_pktmbuf_free takes NULL as an argument, and frees the m->next chain
> > I don't see why not just
> > 	rte_pktmbuf_free(pool)
> >   
> 
> Chain is not constructed properly, 'nb_segs' is wrong, only 'rte_pktmbuf_free()'
> call won't free all the chain but first mbuf.
> 
> This implementation is fixing 'nb_segs' sot that 'rte_pktmbuf_free()' can work
> as you suggested.
> 
> Or I suggest iterate the list and fix all mbufs, instead of fixing 'nb_segs',
> this may be one iteration less.

If you look at implementation of rte_pktmbuf_free() in current DPDK version
it does not care what nb_segs is set to.
  
Yunjian Wang April 8, 2020, 1:10 a.m. UTC | #7
> -----Original Message-----
> From: Stephen Hemminger [mailto:stephen@networkplumber.org]
> Sent: Wednesday, April 8, 2020 12:08 AM
> To: Ferruh Yigit <ferruh.yigit@intel.com>
> Cc: wangyunjian <wangyunjian@huawei.com>; dev@dpdk.org;
> keith.wiles@intel.com; Lilijun (Jerry) <jerry.lilijun@huawei.com>; xudingke
> <xudingke@huawei.com>; stable@dpdk.org
> Subject: Re: [dpdk-dev] [dpdk-stable] [PATCH v3 3/5] net/tap: fix check for
> mbuf's nb_segs failure
> 
> On Tue, 7 Apr 2020 16:45:59 +0100
> Ferruh Yigit <ferruh.yigit@intel.com> wrote:
> 
> > On 4/7/2020 4:38 PM, Stephen Hemminger wrote:
> > > On Tue, 7 Apr 2020 16:15:16 +0100
> > > Ferruh Yigit <ferruh.yigit@intel.com> wrote:
> > >
> > >>> +static void
> > >>> +tap_rxq_pool_free(struct rte_mbuf *pool) {
> > >>> +	struct rte_mbuf *mbuf = pool;
> > >>> +	uint16_t nb_segs = 1;
> > >>> +
> > >>> +	if (mbuf == NULL)
> > >>> +		return;
> > >>> +
> > >>> +	while (mbuf->next) {
> > >>> +		mbuf = mbuf->next;
> > >>> +		nb_segs++;
> > >>> +	}
> > >>> +	pool->nb_segs = nb_segs;
> > >>> +	rte_pktmbuf_free(pool);
> > >>> +}
> > >
> > > Since mbuf is going to be free, why bother with nb_segs.
> > > Since rte_pktmbuf_free takes NULL as an argument, and frees the
> > > m->next chain I don't see why not just
> > > 	rte_pktmbuf_free(pool)
> > >
> >
> > Chain is not constructed properly, 'nb_segs' is wrong, only
> 'rte_pktmbuf_free()'
> > call won't free all the chain but first mbuf.
> >
> > This implementation is fixing 'nb_segs' sot that 'rte_pktmbuf_free()'
> > can work as you suggested.
> >
> > Or I suggest iterate the list and fix all mbufs, instead of fixing
> > 'nb_segs', this may be one iteration less.
> 
> If you look at implementation of rte_pktmbuf_free() in current DPDK version it
> does not care what nb_segs is set to.

I found this problem with mbuf debug enabled.

Thanks
Yunjian
  

Patch

diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
index a9ba0ca68..703fcceb9 100644
--- a/drivers/net/tap/rte_eth_tap.c
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -339,6 +339,23 @@  tap_rx_offload_get_queue_capa(void)
 	       DEV_RX_OFFLOAD_TCP_CKSUM;
 }
 
+static void
+tap_rxq_pool_free(struct rte_mbuf *pool)
+{
+	struct rte_mbuf *mbuf = pool;
+	uint16_t nb_segs = 1;
+
+	if (mbuf == NULL)
+		return;
+
+	while (mbuf->next) {
+		mbuf = mbuf->next;
+		nb_segs++;
+	}
+	pool->nb_segs = nb_segs;
+	rte_pktmbuf_free(pool);
+}
+
 /* Callback to handle the rx burst of packets to the correct interface and
  * file descriptor(s) in a multi-queue setup.
  */
@@ -389,7 +406,7 @@  pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 					goto end;
 
 				seg->next = NULL;
-				rte_pktmbuf_free(mbuf);
+				tap_rxq_pool_free(mbuf);
 
 				goto end;
 			}
@@ -1033,7 +1050,7 @@  tap_dev_close(struct rte_eth_dev *dev)
 			rxq = &internals->rxq[i];
 			close(process_private->rxq_fds[i]);
 			process_private->rxq_fds[i] = -1;
-			rte_pktmbuf_free(rxq->pool);
+			tap_rxq_pool_free(rxq->pool);
 			rte_free(rxq->iovecs);
 			rxq->pool = NULL;
 			rxq->iovecs = NULL;
@@ -1072,7 +1089,7 @@  tap_rx_queue_release(void *queue)
 	if (process_private->rxq_fds[rxq->queue_id] > 0) {
 		close(process_private->rxq_fds[rxq->queue_id]);
 		process_private->rxq_fds[rxq->queue_id] = -1;
-		rte_pktmbuf_free(rxq->pool);
+		tap_rxq_pool_free(rxq->pool);
 		rte_free(rxq->iovecs);
 		rxq->pool = NULL;
 		rxq->iovecs = NULL;
@@ -1480,7 +1497,7 @@  tap_rx_queue_setup(struct rte_eth_dev *dev,
 	return 0;
 
 error:
-	rte_pktmbuf_free(rxq->pool);
+	tap_rxq_pool_free(rxq->pool);
 	rxq->pool = NULL;
 	rte_free(rxq->iovecs);
 	rxq->iovecs = NULL;
@@ -2435,7 +2452,7 @@  rte_pmd_tap_remove(struct rte_vdev_device *dev)
 			rxq = &internals->rxq[i];
 			close(process_private->rxq_fds[i]);
 			process_private->rxq_fds[i] = -1;
-			rte_pktmbuf_free(rxq->pool);
+			tap_rxq_pool_free(rxq->pool);
 			rte_free(rxq->iovecs);
 			rxq->pool = NULL;
 			rxq->iovecs = NULL;