vfio: free mp_reply msgs in failure cases

Message ID 156595762238.18723.10089009448135563310.stgit@jrharri1-skx (mailing list archive)
State Accepted, archived
Delegated to: David Marchand
Headers
Series vfio: free mp_reply msgs in failure cases |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK
ci/iol-Compile-Testing success Compile Testing PASS
ci/intel-Performance-Testing success Performance Testing PASS
ci/mellanox-Performance-Testing success Performance Testing PASS

Commit Message

Harris, James R Aug. 16, 2019, 12:13 p.m. UTC
  The code checks both rte_mp_request_sync() return
code and that the number of messages in the reply
equals 1.  If rte_mp_request_sync() succeeds but
there was more than one message, those messages
would get leaked.

Found via code review by Anatoly Burakov of patches
that used the vhost code as a template for using
rte_mp_request_sync().

Signed-off-by: Jim Harris <james.r.harris@intel.com>
---
 lib/librte_eal/linux/eal/eal_vfio.c |   16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)
  

Comments

Burakov, Anatoly Aug. 20, 2019, 1:13 p.m. UTC | #1
On 16-Aug-19 1:13 PM, Jim Harris wrote:
> The code checks both rte_mp_request_sync() return
> code and that the number of messages in the reply
> equals 1.  If rte_mp_request_sync() succeeds but
> there was more than one message, those messages
> would get leaked.
> 
> Found via code review by Anatoly Burakov of patches
> that used the vhost code as a template for using
> rte_mp_request_sync().
> 
> Signed-off-by: Jim Harris <james.r.harris@intel.com>
> ---
>   lib/librte_eal/linux/eal/eal_vfio.c |   16 ++++++++--------
>   1 file changed, 8 insertions(+), 8 deletions(-)
> 
> diff --git a/lib/librte_eal/linux/eal/eal_vfio.c b/lib/librte_eal/linux/eal/eal_vfio.c
> index 501c74f23..d9541b122 100644
> --- a/lib/librte_eal/linux/eal/eal_vfio.c
> +++ b/lib/librte_eal/linux/eal/eal_vfio.c
> @@ -264,7 +264,7 @@ vfio_open_group_fd(int iommu_group_num)
>   	int vfio_group_fd;
>   	char filename[PATH_MAX];
>   	struct rte_mp_msg mp_req, *mp_rep;
> -	struct rte_mp_reply mp_reply;
> +	struct rte_mp_reply mp_reply = {0};
>   	struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
>   	struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
>   
> @@ -320,9 +320,9 @@ vfio_open_group_fd(int iommu_group_num)
>   			RTE_LOG(ERR, EAL, "  bad VFIO group fd\n");
>   			vfio_group_fd = 0;
>   		}
> -		free(mp_reply.msgs);
>   	}
>   
> +	free(mp_reply.msgs);

That's not quite correct. This fixes the problem of missing free() when 
nb_received mismatches, but this /adds/ a problem of doing an 
unnecessary free() when rte_mp_request_sync() returns -1. Same for other 
places, i believe.
  
Harris, James R Aug. 20, 2019, 1:16 p.m. UTC | #2
> On Aug 20, 2019, at 6:13 AM, Burakov, Anatoly <anatoly.burakov@intel.com> wrote:
> 
>> On 16-Aug-19 1:13 PM, Jim Harris wrote:
>> The code checks both rte_mp_request_sync() return
>> code and that the number of messages in the reply
>> equals 1.  If rte_mp_request_sync() succeeds but
>> there was more than one message, those messages
>> would get leaked.
>> Found via code review by Anatoly Burakov of patches
>> that used the vhost code as a template for using
>> rte_mp_request_sync().
>> Signed-off-by: Jim Harris <james.r.harris@intel.com>
>> ---
>>  lib/librte_eal/linux/eal/eal_vfio.c |   16 ++++++++--------
>>  1 file changed, 8 insertions(+), 8 deletions(-)
>> diff --git a/lib/librte_eal/linux/eal/eal_vfio.c b/lib/librte_eal/linux/eal/eal_vfio.c
>> index 501c74f23..d9541b122 100644
>> --- a/lib/librte_eal/linux/eal/eal_vfio.c
>> +++ b/lib/librte_eal/linux/eal/eal_vfio.c
>> @@ -264,7 +264,7 @@ vfio_open_group_fd(int iommu_group_num)
>>      int vfio_group_fd;
>>      char filename[PATH_MAX];
>>      struct rte_mp_msg mp_req, *mp_rep;
>> -    struct rte_mp_reply mp_reply;
>> +    struct rte_mp_reply mp_reply = {0};
>>      struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
>>      struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
>>  @@ -320,9 +320,9 @@ vfio_open_group_fd(int iommu_group_num)
>>              RTE_LOG(ERR, EAL, "  bad VFIO group fd\n");
>>              vfio_group_fd = 0;
>>          }
>> -        free(mp_reply.msgs);
>>      }
>>  +    free(mp_reply.msgs);
> 
> That's not quite correct. This fixes the problem of missing free() when nb_received mismatches, but this /adds/ a problem of doing an unnecessary free() when rte_mp_request_sync() returns -1. Same for other places, i believe.

This would just resolve to free(NULL) in the -1 case.

Jim

> 
> -- 
> Thanks,
> Anatoly
  
Burakov, Anatoly Aug. 20, 2019, 1:22 p.m. UTC | #3
On 20-Aug-19 2:16 PM, Harris, James R wrote:
> 
> 
>> On Aug 20, 2019, at 6:13 AM, Burakov, Anatoly <anatoly.burakov@intel.com> wrote:
>>
>>> On 16-Aug-19 1:13 PM, Jim Harris wrote:
>>> The code checks both rte_mp_request_sync() return
>>> code and that the number of messages in the reply
>>> equals 1.  If rte_mp_request_sync() succeeds but
>>> there was more than one message, those messages
>>> would get leaked.
>>> Found via code review by Anatoly Burakov of patches
>>> that used the vhost code as a template for using
>>> rte_mp_request_sync().
>>> Signed-off-by: Jim Harris <james.r.harris@intel.com>
>>> ---
>>>   lib/librte_eal/linux/eal/eal_vfio.c |   16 ++++++++--------
>>>   1 file changed, 8 insertions(+), 8 deletions(-)
>>> diff --git a/lib/librte_eal/linux/eal/eal_vfio.c b/lib/librte_eal/linux/eal/eal_vfio.c
>>> index 501c74f23..d9541b122 100644
>>> --- a/lib/librte_eal/linux/eal/eal_vfio.c
>>> +++ b/lib/librte_eal/linux/eal/eal_vfio.c
>>> @@ -264,7 +264,7 @@ vfio_open_group_fd(int iommu_group_num)
>>>       int vfio_group_fd;
>>>       char filename[PATH_MAX];
>>>       struct rte_mp_msg mp_req, *mp_rep;
>>> -    struct rte_mp_reply mp_reply;
>>> +    struct rte_mp_reply mp_reply = {0};
>>>       struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
>>>       struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
>>>   @@ -320,9 +320,9 @@ vfio_open_group_fd(int iommu_group_num)
>>>               RTE_LOG(ERR, EAL, "  bad VFIO group fd\n");
>>>               vfio_group_fd = 0;
>>>           }
>>> -        free(mp_reply.msgs);
>>>       }
>>>   +    free(mp_reply.msgs);
>>
>> That's not quite correct. This fixes the problem of missing free() when nb_received mismatches, but this /adds/ a problem of doing an unnecessary free() when rte_mp_request_sync() returns -1. Same for other places, i believe.
> 
> This would just resolve to free(NULL) in the -1 case.
> 

Ah, you're right! We did fix that bug :)

With that in mind,

Acked-by: Anatoly Burakov <anatoly.burakov@intel.com>
  
David Marchand Oct. 14, 2019, 11:17 a.m. UTC | #4
On Fri, Aug 16, 2019 at 9:19 PM Jim Harris <james.r.harris@intel.com> wrote:
>
> The code checks both rte_mp_request_sync() return
> code and that the number of messages in the reply
> equals 1.  If rte_mp_request_sync() succeeds but
> there was more than one message, those messages
> would get leaked.
>
> Found via code review by Anatoly Burakov of patches
> that used the vhost code as a template for using
> rte_mp_request_sync().

The patch looks fine, I just want to make sure its title reflect what it fixes.
Can you give some insights of how common this issue is? If there are
known cases where it happens?


I might have spotted another issue (could be worth a followup patch
later if confirmed), please see below.

>
> Signed-off-by: Jim Harris <james.r.harris@intel.com>
> ---
>  lib/librte_eal/linux/eal/eal_vfio.c |   16 ++++++++--------
>  1 file changed, 8 insertions(+), 8 deletions(-)
>
> diff --git a/lib/librte_eal/linux/eal/eal_vfio.c b/lib/librte_eal/linux/eal/eal_vfio.c
> index 501c74f23..d9541b122 100644
> --- a/lib/librte_eal/linux/eal/eal_vfio.c
> +++ b/lib/librte_eal/linux/eal/eal_vfio.c

[snip]

> @@ -1021,7 +1021,7 @@ int
>  vfio_get_default_container_fd(void)
>  {
>         struct rte_mp_msg mp_req, *mp_rep;
> -       struct rte_mp_reply mp_reply;
> +       struct rte_mp_reply mp_reply = {0};
>         struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
>         struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
>
> @@ -1049,9 +1049,9 @@ vfio_get_default_container_fd(void)
>                         free(mp_reply.msgs);
>                         return mp_rep->fds[0];

Do we have a use after free on mp_rep which points to &mp_reply.msgs[0] ?


>                 }
> -               free(mp_reply.msgs);
>         }
>
> +       free(mp_reply.msgs);
>         RTE_LOG(ERR, EAL, "  cannot request default container fd\n");
>         return -1;
>  }
  
Harris, James R Oct. 14, 2019, 1:49 p.m. UTC | #5
On 10/14/19, 4:18 AM, "David Marchand" <david.marchand@redhat.com> wrote:

    On Fri, Aug 16, 2019 at 9:19 PM Jim Harris <james.r.harris@intel.com> wrote:
    >
    > The code checks both rte_mp_request_sync() return
    > code and that the number of messages in the reply
    > equals 1.  If rte_mp_request_sync() succeeds but
    > there was more than one message, those messages
    > would get leaked.
    >
    > Found via code review by Anatoly Burakov of patches
    > that used the vhost code as a template for using
    > rte_mp_request_sync().
    
    The patch looks fine, I just want to make sure its title reflect what it fixes.
    Can you give some insights of how common this issue is? If there are
    known cases where it happens?

Hi David,

I don't think this issue is common at all.  I don't have any known cases in mind - it was only found via code inspection.
    
    I might have spotted another issue (could be worth a followup patch
    later if confirmed), please see below.
    
    >
    > Signed-off-by: Jim Harris <james.r.harris@intel.com>
    > ---
    >  lib/librte_eal/linux/eal/eal_vfio.c |   16 ++++++++--------
    >  1 file changed, 8 insertions(+), 8 deletions(-)
    >
    > diff --git a/lib/librte_eal/linux/eal/eal_vfio.c b/lib/librte_eal/linux/eal/eal_vfio.c
    > index 501c74f23..d9541b122 100644
    > --- a/lib/librte_eal/linux/eal/eal_vfio.c
    > +++ b/lib/librte_eal/linux/eal/eal_vfio.c
    
    [snip]
    
    > @@ -1021,7 +1021,7 @@ int
    >  vfio_get_default_container_fd(void)
    >  {
    >         struct rte_mp_msg mp_req, *mp_rep;
    > -       struct rte_mp_reply mp_reply;
    > +       struct rte_mp_reply mp_reply = {0};
    >         struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
    >         struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
    >
    > @@ -1049,9 +1049,9 @@ vfio_get_default_container_fd(void)
    >                         free(mp_reply.msgs);
    >                         return mp_rep->fds[0];
    
    Do we have a use after free on mp_rep which points to &mp_reply.msgs[0] ?
    
You're right.  It needs to save mp_rep->fds[0] into a local variable before we free that array.  That would be a good follow-up patch!

-Jim

    >                 }
    > -               free(mp_reply.msgs);
    >         }
    >
    > +       free(mp_reply.msgs);
    >         RTE_LOG(ERR, EAL, "  cannot request default container fd\n");
    >         return -1;
    >  }
    
    
    
    -- 
    David Marchand
  
David Marchand Oct. 14, 2019, 2:47 p.m. UTC | #6
On Mon, Oct 14, 2019 at 3:49 PM Harris, James R
<james.r.harris@intel.com> wrote:
> On 10/14/19, 4:18 AM, "David Marchand" <david.marchand@redhat.com> wrote:
>
>     On Fri, Aug 16, 2019 at 9:19 PM Jim Harris <james.r.harris@intel.com> wrote:
>     >
>     > The code checks both rte_mp_request_sync() return
>     > code and that the number of messages in the reply
>     > equals 1.  If rte_mp_request_sync() succeeds but
>     > there was more than one message, those messages
>     > would get leaked.
>     >
>     > Found via code review by Anatoly Burakov of patches
>     > that used the vhost code as a template for using
>     > rte_mp_request_sync().
>
>     The patch looks fine, I just want to make sure its title reflect what it fixes.
>     Can you give some insights of how common this issue is? If there are
>     known cases where it happens?
>
> Hi David,
>
> I don't think this issue is common at all.  I don't have any known cases in mind - it was only found via code inspection.

Anatoly, Jim,

Not really inspired for the title, what do you think of:
vfio: fix potential leak with multiprocess

Plus, it deserves a Fixes: line.
Fixes: 83a73c5fef66 ("vfio: use generic multi-process channel")
Cc: stable@dpdk.org

If you are okay with this, I will do the change when applying.
  
Harris, James R Oct. 14, 2019, 2:50 p.m. UTC | #7
> On Oct 14, 2019, at 7:47 AM, David Marchand <david.marchand@redhat.com> wrote:
> 
> On Mon, Oct 14, 2019 at 3:49 PM Harris, James R
> <james.r.harris@intel.com> wrote:
>> On 10/14/19, 4:18 AM, "David Marchand" <david.marchand@redhat.com> wrote:
>> 
>>>    On Fri, Aug 16, 2019 at 9:19 PM Jim Harris <james.r.harris@intel.com> wrote:
>>> 
>>> The code checks both rte_mp_request_sync() return
>>> code and that the number of messages in the reply
>>> equals 1.  If rte_mp_request_sync() succeeds but
>>> there was more than one message, those messages
>>> would get leaked.
>>> 
>>> Found via code review by Anatoly Burakov of patches
>>> that used the vhost code as a template for using
>>> rte_mp_request_sync().
>> 
>>    The patch looks fine, I just want to make sure its title reflect what it fixes.
>>    Can you give some insights of how common this issue is? If there are
>>    known cases where it happens?
>> 
>> Hi David,
>> 
>> I don't think this issue is common at all.  I don't have any known cases in mind - it was only found via code inspection.
> 
> Anatoly, Jim,
> 
> Not really inspired for the title, what do you think of:
> vfio: fix potential leak with multiprocess
> 
> Plus, it deserves a Fixes: line.
> Fixes: 83a73c5fef66 ("vfio: use generic multi-process channel")
> Cc: stable@dpdk.org
> 
> If you are okay with this, I will do the change when applying.

I am ok with those changes. Thanks!

> 
> -- 
> David Marchand
>
  
David Marchand Oct. 15, 2019, 6:38 p.m. UTC | #8
On Mon, Oct 14, 2019 at 4:47 PM David Marchand
<david.marchand@redhat.com> wrote:
>
> On Mon, Oct 14, 2019 at 3:49 PM Harris, James R
> <james.r.harris@intel.com> wrote:
> > On 10/14/19, 4:18 AM, "David Marchand" <david.marchand@redhat.com> wrote:
> >
> >     On Fri, Aug 16, 2019 at 9:19 PM Jim Harris <james.r.harris@intel.com> wrote:
> >     >
> >     > The code checks both rte_mp_request_sync() return
> >     > code and that the number of messages in the reply
> >     > equals 1.  If rte_mp_request_sync() succeeds but
> >     > there was more than one message, those messages
> >     > would get leaked.
> >     >
> >     > Found via code review by Anatoly Burakov of patches
> >     > that used the vhost code as a template for using
> >     > rte_mp_request_sync().
> >
> >     The patch looks fine, I just want to make sure its title reflect what it fixes.
> >     Can you give some insights of how common this issue is? If there are
> >     known cases where it happens?
> >
> > Hi David,
> >
> > I don't think this issue is common at all.  I don't have any known cases in mind - it was only found via code inspection.
>
> Anatoly, Jim,
>
> Not really inspired for the title, what do you think of:
> vfio: fix potential leak with multiprocess
>
> Plus, it deserves a Fixes: line.
> Fixes: 83a73c5fef66 ("vfio: use generic multi-process channel")
> Cc: stable@dpdk.org
>
> If you are okay with this, I will do the change when applying.

Applied, thanks.



--
David Marchand
  

Patch

diff --git a/lib/librte_eal/linux/eal/eal_vfio.c b/lib/librte_eal/linux/eal/eal_vfio.c
index 501c74f23..d9541b122 100644
--- a/lib/librte_eal/linux/eal/eal_vfio.c
+++ b/lib/librte_eal/linux/eal/eal_vfio.c
@@ -264,7 +264,7 @@  vfio_open_group_fd(int iommu_group_num)
 	int vfio_group_fd;
 	char filename[PATH_MAX];
 	struct rte_mp_msg mp_req, *mp_rep;
-	struct rte_mp_reply mp_reply;
+	struct rte_mp_reply mp_reply = {0};
 	struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
 	struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
 
@@ -320,9 +320,9 @@  vfio_open_group_fd(int iommu_group_num)
 			RTE_LOG(ERR, EAL, "  bad VFIO group fd\n");
 			vfio_group_fd = 0;
 		}
-		free(mp_reply.msgs);
 	}
 
+	free(mp_reply.msgs);
 	if (vfio_group_fd < 0)
 		RTE_LOG(ERR, EAL, "  cannot request group fd\n");
 	return vfio_group_fd;
@@ -554,7 +554,7 @@  static int
 vfio_sync_default_container(void)
 {
 	struct rte_mp_msg mp_req, *mp_rep;
-	struct rte_mp_reply mp_reply;
+	struct rte_mp_reply mp_reply = {0};
 	struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
 	struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
 	int iommu_type_id;
@@ -584,8 +584,8 @@  vfio_sync_default_container(void)
 		p = (struct vfio_mp_param *)mp_rep->param;
 		if (p->result == SOCKET_OK)
 			iommu_type_id = p->iommu_type_id;
-		free(mp_reply.msgs);
 	}
+	free(mp_reply.msgs);
 	if (iommu_type_id < 0) {
 		RTE_LOG(ERR, EAL, "Could not get IOMMU type for default container\n");
 		return -1;
@@ -1021,7 +1021,7 @@  int
 vfio_get_default_container_fd(void)
 {
 	struct rte_mp_msg mp_req, *mp_rep;
-	struct rte_mp_reply mp_reply;
+	struct rte_mp_reply mp_reply = {0};
 	struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
 	struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
 
@@ -1049,9 +1049,9 @@  vfio_get_default_container_fd(void)
 			free(mp_reply.msgs);
 			return mp_rep->fds[0];
 		}
-		free(mp_reply.msgs);
 	}
 
+	free(mp_reply.msgs);
 	RTE_LOG(ERR, EAL, "  cannot request default container fd\n");
 	return -1;
 }
@@ -1127,7 +1127,7 @@  rte_vfio_get_container_fd(void)
 {
 	int ret, vfio_container_fd;
 	struct rte_mp_msg mp_req, *mp_rep;
-	struct rte_mp_reply mp_reply;
+	struct rte_mp_reply mp_reply = {0};
 	struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
 	struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
 
@@ -1181,9 +1181,9 @@  rte_vfio_get_container_fd(void)
 			free(mp_reply.msgs);
 			return vfio_container_fd;
 		}
-		free(mp_reply.msgs);
 	}
 
+	free(mp_reply.msgs);
 	RTE_LOG(ERR, EAL, "  cannot request container fd\n");
 	return -1;
 }