[dpdk-dev] [PATCH 2/2] vhost: add pipe event for optimizing negotiating

Maxime Coquelin maxime.coquelin at redhat.com
Fri Mar 30 09:57:15 CEST 2018


Hi Xiangxia,

On 03/29/2018 01:24 PM, Tonghao Zhang wrote:
> On Thu, Mar 29, 2018 at 3:32 PM, Tan, Jianfeng <jianfeng.tan at intel.com> wrote:
>> Hi Xiangxia,
>>
>>> -----Original Message-----
>>> From: xiangxia.m.yue at gmail.com [mailto:xiangxia.m.yue at gmail.com]
>>> Sent: Wednesday, March 28, 2018 1:49 PM
>>> To: Tan, Jianfeng
>>> Cc: dev at dpdk.org; Tonghao Zhang
>>> Subject: [PATCH 2/2] vhost: add pipe event for optimizing negotiating
>>>
>>> From: Tonghao Zhang <xiangxia.m.yue at gmail.com>
>>>
>>> When vhost—user connects qemu successfully, dpdk will call
>>
>> Typo: "-"
> v2 will update it. :)
> 
>>> the vhost_user_add_connection to add unix socket fd to poll.
>>> And fdset_add only set the socket fd to a fdentry while poll
>>> may sleep now. In a general case, this is no problem. But if
>>> we use hot update for vhost-user, most downtime of VMs network
>>> is 750+ms. This patch adds pipe event, so after connections are
>>> ok, dpdk rebuild the poll immediately. With this patch, the
>>> most downtime is 20~30ms.
>>>
>>> Signed-off-by: Tonghao Zhang <xiangxia.m.yue at gmail.com>
>>> ---
>>>   lib/librte_vhost/fd_man.c | 49
>>> +++++++++++++++++++++++++++++++++++++++++++++++
>>>   lib/librte_vhost/fd_man.h | 16 ++++++++++++++++
>>>   lib/librte_vhost/socket.c | 14 ++++++++++++++
>>>   3 files changed, 79 insertions(+)
>>>
>>> diff --git a/lib/librte_vhost/fd_man.c b/lib/librte_vhost/fd_man.c
>>> index 181711c..7716757 100644
>>> --- a/lib/librte_vhost/fd_man.c
>>> +++ b/lib/librte_vhost/fd_man.c
>>> @@ -15,6 +15,7 @@
>>>   #include <rte_log.h>
>>>
>>>   #include "fd_man.h"
>>> +#include "vhost.h"
>>
>> This is a nice finding and solution, however, I don't think we shall include vhost header file in fd related files. Actually, I did not find out why you need to include this header file.
> 
> Hi Jianfeng, thanks for your review. In the fdset_pipe_init function,
> I call the RTE_LOG with VHOST_CONFIG, when init pipe not successfully.
> So I included the vhost header file.

Maybe better to create VHOST_SOCKET in socket.c, than including vhost.h 
that creates a layer violation:

#define RTE_LOGTYPE_VHOST_SOCKET RTE_LOGTYPE_USER1

Is it Ok for you? I so no need to resend, I can handle the change when
applying.

Thanks,
Maxime
> 
> 
>> Thanks,
>> Jianfeng
>>
>>>
>>>   #define FDPOLLERR (POLLERR | POLLHUP | POLLNVAL)
>>>
>>> @@ -272,3 +273,51 @@
>>>
>>>        return NULL;
>>>   }
>>> +
>>> +static void
>>> +fdset_pipe_read_cb(int readfd, void *dat __rte_unused,
>>> +                int *remove __rte_unused)
>>> +{
>>> +     char charbuf[16];
>>> +     read(readfd, charbuf, sizeof(charbuf));
>>> +}
>>> +
>>> +void
>>> +fdset_pipe_uninit(struct fdset *fdset)
>>> +{
>>> +     fdset_del(fdset, fdset->u.readfd);
>>> +     close(fdset->u.readfd);
>>> +     close(fdset->u.writefd);
>>> +}
>>> +
>>> +int
>>> +fdset_pipe_init(struct fdset *fdset)
>>> +{
>>> +     int ret;
>>> +
>>> +     if (pipe(fdset->u.pipefd) < 0) {
>>> +             RTE_LOG(ERR, VHOST_CONFIG,
>>> +                     "failed to create pipe for vhost fdset\n");
>>> +             return -1;
>>> +     }
>>> +
>>> +     ret = fdset_add(fdset, fdset->u.readfd,
>>> +                     fdset_pipe_read_cb, NULL, NULL);
>>> +
>>> +     if (ret < 0) {
>>> +             RTE_LOG(ERR, VHOST_CONFIG,
>>> +                     "failed to add pipe readfd %d into vhost server
>>> fdset\n",
>>> +                     fdset->u.readfd);
>>> +
>>> +             fdset_pipe_uninit(fdset);
>>> +             return -1;
>>> +     }
>>> +
>>> +     return 0;
>>> +}
>>> +
>>> +void
>>> +fdset_pipe_notify(struct fdset *fdset)
>>> +{
>>> +     write(fdset->u.writefd, "1", 1);
>>> +}
>>> diff --git a/lib/librte_vhost/fd_man.h b/lib/librte_vhost/fd_man.h
>>> index 3a9276c..76a42fb 100644
>>> --- a/lib/librte_vhost/fd_man.h
>>> +++ b/lib/librte_vhost/fd_man.h
>>> @@ -25,6 +25,16 @@ struct fdset {
>>>        struct fdentry fd[MAX_FDS];
>>>        pthread_mutex_t fd_mutex;
>>>        int num;        /* current fd number of this fdset */
>>> +
>>> +     union pipefds {
>>> +             struct {
>>> +                     int pipefd[2];
>>> +             };
>>> +             struct {
>>> +                     int readfd;
>>> +                     int writefd;
>>> +             };
>>> +     } u;
>>>   };
>>>
>>>
>>> @@ -37,4 +47,10 @@ int fdset_add(struct fdset *pfdset, int fd,
>>>
>>>   void *fdset_event_dispatch(void *arg);
>>>
>>> +int fdset_pipe_init(struct fdset *fdset);
>>> +
>>> +void fdset_pipe_uninit(struct fdset *fdset);
>>> +
>>> +void fdset_pipe_notify(struct fdset *fdset);
>>> +
>>>   #endif
>>> diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c
>>> index 95bed78..795239c 100644
>>> --- a/lib/librte_vhost/socket.c
>>> +++ b/lib/librte_vhost/socket.c
>>> @@ -231,6 +231,8 @@ struct vhost_user {
>>>        pthread_mutex_lock(&vsocket->conn_mutex);
>>>        TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next);
>>>        pthread_mutex_unlock(&vsocket->conn_mutex);
>>> +
>>> +     fdset_pipe_notify(&vhost_user.fdset);
>>>        return;
>>>
>>>   err:
>>> @@ -829,11 +831,23 @@ struct vhost_device_ops const *
>>>                return -1;
>>>
>>>        if (fdset_tid == 0) {
>>> +             /**
>>> +              * create a pipe which will be waited by poll and notified to
>>> +              * rebuild the wait list of poll.
>>> +              */
>>> +             if (fdset_pipe_init(&vhost_user.fdset) < 0) {
>>> +                     RTE_LOG(ERR, VHOST_CONFIG,
>>> +                             "failed to create pipe for vhost fdset\n");
>>> +                     return -1;
>>> +             }
>>> +
>>>                int ret = pthread_create(&fdset_tid, NULL, fdset_event_dispatch,
>>>                                     &vhost_user.fdset);
>>>                if (ret != 0) {
>>>                        RTE_LOG(ERR, VHOST_CONFIG,
>>>                                "failed to create fdset handling thread");
>>> +
>>> +                     fdset_pipe_uninit(&vhost_user.fdset);
>>>                        return -1;
>>>                } else {
>>>                        snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN,
>>> --
>>> 1.8.3.1
>>


More information about the dev mailing list