[dpdk-dev,1/4] vhost: move fdset functions from fd_man.c to fd_man.h

Message ID 20180214145330.4679-2-zhiyong.yang@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Maxime Coquelin
Headers

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Yang, Zhiyong Feb. 14, 2018, 2:53 p.m. UTC
  The patch moves fdset related funcitons from fd_man.c to fd_man.h in
order to reuse these funcitons from the perspective of PMDs.

Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
---
 lib/librte_vhost/Makefile |   3 +-
 lib/librte_vhost/fd_man.c | 274 ----------------------------------------------
 lib/librte_vhost/fd_man.h | 258 +++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 253 insertions(+), 282 deletions(-)
 delete mode 100644 lib/librte_vhost/fd_man.c
  

Comments

Maxime Coquelin Feb. 27, 2018, 5:51 p.m. UTC | #1
Hi Zhiyong,

On 02/14/2018 03:53 PM, Zhiyong Yang wrote:
> The patch moves fdset related funcitons from fd_man.c to fd_man.h in
> order to reuse these funcitons from the perspective of PMDs.
> 
> Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
> ---
>   lib/librte_vhost/Makefile |   3 +-
>   lib/librte_vhost/fd_man.c | 274 ----------------------------------------------
>   lib/librte_vhost/fd_man.h | 258 +++++++++++++++++++++++++++++++++++++++++--
>   3 files changed, 253 insertions(+), 282 deletions(-)
>   delete mode 100644 lib/librte_vhost/fd_man.c

I disagree with the patch.
It is a good thing to reuse the code, but to do it, you need to extend
the vhost lib API.

New API need to be prefixed with rte_vhost_, and be declared in
rte_vhost.h.

And no need to move the functions from the .c to the .h file, as it
moreover makes you inline them, which is not necessary here.

> diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
> index 5d6c6abae..e201df79c 100644
> --- a/lib/librte_vhost/Makefile
> +++ b/lib/librte_vhost/Makefile
> @@ -21,10 +21,11 @@ endif
>   LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev -lrte_net
>   
>   # all source are stored in SRCS-y
> -SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := fd_man.c iotlb.c socket.c vhost.c \
> +SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := iotlb.c socket.c vhost.c \
>   					vhost_user.c virtio_net.c
>   
>   # install includes
>   SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_vhost.h
> +SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += fd_man.h
>   
>   include $(RTE_SDK)/mk/rte.lib.mk
> diff --git a/lib/librte_vhost/fd_man.c b/lib/librte_vhost/fd_man.c
> deleted file mode 100644
> index 181711c2a..000000000
> --- a/lib/librte_vhost/fd_man.c
> +++ /dev/null
> @@ -1,274 +0,0 @@
> -/* SPDX-License-Identifier: BSD-3-Clause
> - * Copyright(c) 2010-2014 Intel Corporation
> - */
> -
> -#include <stdint.h>
> -#include <stdio.h>
> -#include <stdlib.h>
> -#include <sys/socket.h>
> -#include <sys/time.h>
> -#include <sys/types.h>
> -#include <unistd.h>
> -#include <string.h>
> -
> -#include <rte_common.h>
> -#include <rte_log.h>
> -
> -#include "fd_man.h"
> -
> -#define FDPOLLERR (POLLERR | POLLHUP | POLLNVAL)
> -
> -static int
> -get_last_valid_idx(struct fdset *pfdset, int last_valid_idx)
> -{
> -	int i;
> -
> -	for (i = last_valid_idx; i >= 0 && pfdset->fd[i].fd == -1; i--)
> -		;
> -
> -	return i;
> -}
> -
> -static void
> -fdset_move(struct fdset *pfdset, int dst, int src)
> -{
> -	pfdset->fd[dst]    = pfdset->fd[src];
> -	pfdset->rwfds[dst] = pfdset->rwfds[src];
> -}
> -
> -static void
> -fdset_shrink_nolock(struct fdset *pfdset)
> -{
> -	int i;
> -	int last_valid_idx = get_last_valid_idx(pfdset, pfdset->num - 1);
> -
> -	for (i = 0; i < last_valid_idx; i++) {
> -		if (pfdset->fd[i].fd != -1)
> -			continue;
> -
> -		fdset_move(pfdset, i, last_valid_idx);
> -		last_valid_idx = get_last_valid_idx(pfdset, last_valid_idx - 1);
> -	}
> -	pfdset->num = last_valid_idx + 1;
> -}
> -
> -/*
> - * Find deleted fd entries and remove them
> - */
> -static void
> -fdset_shrink(struct fdset *pfdset)
> -{
> -	pthread_mutex_lock(&pfdset->fd_mutex);
> -	fdset_shrink_nolock(pfdset);
> -	pthread_mutex_unlock(&pfdset->fd_mutex);
> -}
> -
> -/**
> - * Returns the index in the fdset for a given fd.
> - * @return
> - *   index for the fd, or -1 if fd isn't in the fdset.
> - */
> -static int
> -fdset_find_fd(struct fdset *pfdset, int fd)
> -{
> -	int i;
> -
> -	for (i = 0; i < pfdset->num && pfdset->fd[i].fd != fd; i++)
> -		;
> -
> -	return i == pfdset->num ? -1 : i;
> -}
> -
> -static void
> -fdset_add_fd(struct fdset *pfdset, int idx, int fd,
> -	fd_cb rcb, fd_cb wcb, void *dat)
> -{
> -	struct fdentry *pfdentry = &pfdset->fd[idx];
> -	struct pollfd *pfd = &pfdset->rwfds[idx];
> -
> -	pfdentry->fd  = fd;
> -	pfdentry->rcb = rcb;
> -	pfdentry->wcb = wcb;
> -	pfdentry->dat = dat;
> -
> -	pfd->fd = fd;
> -	pfd->events  = rcb ? POLLIN : 0;
> -	pfd->events |= wcb ? POLLOUT : 0;
> -	pfd->revents = 0;
> -}
> -
> -void
> -fdset_init(struct fdset *pfdset)
> -{
> -	int i;
> -
> -	if (pfdset == NULL)
> -		return;
> -
> -	for (i = 0; i < MAX_FDS; i++) {
> -		pfdset->fd[i].fd = -1;
> -		pfdset->fd[i].dat = NULL;
> -	}
> -	pfdset->num = 0;
> -}
> -
> -/**
> - * Register the fd in the fdset with read/write handler and context.
> - */
> -int
> -fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
> -{
> -	int i;
> -
> -	if (pfdset == NULL || fd == -1)
> -		return -1;
> -
> -	pthread_mutex_lock(&pfdset->fd_mutex);
> -	i = pfdset->num < MAX_FDS ? pfdset->num++ : -1;
> -	if (i == -1) {
> -		fdset_shrink_nolock(pfdset);
> -		i = pfdset->num < MAX_FDS ? pfdset->num++ : -1;
> -		if (i == -1) {
> -			pthread_mutex_unlock(&pfdset->fd_mutex);
> -			return -2;
> -		}
> -	}
> -
> -	fdset_add_fd(pfdset, i, fd, rcb, wcb, dat);
> -	pthread_mutex_unlock(&pfdset->fd_mutex);
> -
> -	return 0;
> -}
> -
> -/**
> - *  Unregister the fd from the fdset.
> - *  Returns context of a given fd or NULL.
> - */
> -void *
> -fdset_del(struct fdset *pfdset, int fd)
> -{
> -	int i;
> -	void *dat = NULL;
> -
> -	if (pfdset == NULL || fd == -1)
> -		return NULL;
> -
> -	do {
> -		pthread_mutex_lock(&pfdset->fd_mutex);
> -
> -		i = fdset_find_fd(pfdset, fd);
> -		if (i != -1 && pfdset->fd[i].busy == 0) {
> -			/* busy indicates r/wcb is executing! */
> -			dat = pfdset->fd[i].dat;
> -			pfdset->fd[i].fd = -1;
> -			pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;
> -			pfdset->fd[i].dat = NULL;
> -			i = -1;
> -		}
> -		pthread_mutex_unlock(&pfdset->fd_mutex);
> -	} while (i != -1);
> -
> -	return dat;
> -}
> -
> -
> -/**
> - * This functions runs in infinite blocking loop until there is no fd in
> - * pfdset. It calls corresponding r/w handler if there is event on the fd.
> - *
> - * Before the callback is called, we set the flag to busy status; If other
> - * thread(now rte_vhost_driver_unregister) calls fdset_del concurrently, it
> - * will wait until the flag is reset to zero(which indicates the callback is
> - * finished), then it could free the context after fdset_del.
> - */
> -void *
> -fdset_event_dispatch(void *arg)
> -{
> -	int i;
> -	struct pollfd *pfd;
> -	struct fdentry *pfdentry;
> -	fd_cb rcb, wcb;
> -	void *dat;
> -	int fd, numfds;
> -	int remove1, remove2;
> -	int need_shrink;
> -	struct fdset *pfdset = arg;
> -	int val;
> -
> -	if (pfdset == NULL)
> -		return NULL;
> -
> -	while (1) {
> -
> -		/*
> -		 * When poll is blocked, other threads might unregister
> -		 * listenfds from and register new listenfds into fdset.
> -		 * When poll returns, the entries for listenfds in the fdset
> -		 * might have been updated. It is ok if there is unwanted call
> -		 * for new listenfds.
> -		 */
> -		pthread_mutex_lock(&pfdset->fd_mutex);
> -		numfds = pfdset->num;
> -		pthread_mutex_unlock(&pfdset->fd_mutex);
> -
> -		val = poll(pfdset->rwfds, numfds, 1000 /* millisecs */);
> -		if (val < 0)
> -			continue;
> -
> -		need_shrink = 0;
> -		for (i = 0; i < numfds; i++) {
> -			pthread_mutex_lock(&pfdset->fd_mutex);
> -
> -			pfdentry = &pfdset->fd[i];
> -			fd = pfdentry->fd;
> -			pfd = &pfdset->rwfds[i];
> -
> -			if (fd < 0) {
> -				need_shrink = 1;
> -				pthread_mutex_unlock(&pfdset->fd_mutex);
> -				continue;
> -			}
> -
> -			if (!pfd->revents) {
> -				pthread_mutex_unlock(&pfdset->fd_mutex);
> -				continue;
> -			}
> -
> -			remove1 = remove2 = 0;
> -
> -			rcb = pfdentry->rcb;
> -			wcb = pfdentry->wcb;
> -			dat = pfdentry->dat;
> -			pfdentry->busy = 1;
> -
> -			pthread_mutex_unlock(&pfdset->fd_mutex);
> -
> -			if (rcb && pfd->revents & (POLLIN | FDPOLLERR))
> -				rcb(fd, dat, &remove1);
> -			if (wcb && pfd->revents & (POLLOUT | FDPOLLERR))
> -				wcb(fd, dat, &remove2);
> -			pfdentry->busy = 0;
> -			/*
> -			 * fdset_del needs to check busy flag.
> -			 * We don't allow fdset_del to be called in callback
> -			 * directly.
> -			 */
> -			/*
> -			 * When we are to clean up the fd from fdset,
> -			 * because the fd is closed in the cb,
> -			 * the old fd val could be reused by when creates new
> -			 * listen fd in another thread, we couldn't call
> -			 * fd_set_del.
> -			 */
> -			if (remove1 || remove2) {
> -				pfdentry->fd = -1;
> -				need_shrink = 1;
> -			}
> -		}
> -
> -		if (need_shrink)
> -			fdset_shrink(pfdset);
> -	}
> -
> -	return NULL;
> -}
> diff --git a/lib/librte_vhost/fd_man.h b/lib/librte_vhost/fd_man.h
> index 3a9276c3c..b1c628251 100644
> --- a/lib/librte_vhost/fd_man.h
> +++ b/lib/librte_vhost/fd_man.h
> @@ -4,11 +4,11 @@
>   
>   #ifndef _FD_MAN_H_
>   #define _FD_MAN_H_
> -#include <stdint.h>
> -#include <pthread.h>
>   #include <poll.h>
> +#include <stdio.h>
>   
>   #define MAX_FDS 1024
> +#define FDPOLLERR (POLLERR | POLLHUP | POLLNVAL)
>   
>   typedef void (*fd_cb)(int fd, void *dat, int *remove);
>   
> @@ -27,14 +27,258 @@ struct fdset {
>   	int num;	/* current fd number of this fdset */
>   };
>   
> +static inline int
> +get_last_valid_idx(struct fdset *pfdset, int last_valid_idx)
> +{
> +	int i;
>   
> -void fdset_init(struct fdset *pfdset);
> +	for (i = last_valid_idx; i >= 0 && pfdset->fd[i].fd == -1; i--)
> +		;
>   
> -int fdset_add(struct fdset *pfdset, int fd,
> -	fd_cb rcb, fd_cb wcb, void *dat);
> +	return i;
> +}
>   
> -void *fdset_del(struct fdset *pfdset, int fd);
> +static inline void
> +fdset_move(struct fdset *pfdset, int dst, int src)
> +{
> +	pfdset->fd[dst]    = pfdset->fd[src];
> +	pfdset->rwfds[dst] = pfdset->rwfds[src];
> +}
>   
> -void *fdset_event_dispatch(void *arg);
> +static inline void
> +fdset_shrink_nolock(struct fdset *pfdset)
> +{
> +	int i;
> +	int last_valid_idx = get_last_valid_idx(pfdset, pfdset->num - 1);
> +
> +	for (i = 0; i < last_valid_idx; i++) {
> +		if (pfdset->fd[i].fd != -1)
> +			continue;
> +
> +		fdset_move(pfdset, i, last_valid_idx);
> +		last_valid_idx = get_last_valid_idx(pfdset, last_valid_idx - 1);
> +	}
> +	pfdset->num = last_valid_idx + 1;
> +}
> +
> +/*
> + * Find deleted fd entries and remove them
> + */
> +static inline void
> +fdset_shrink(struct fdset *pfdset)
> +{
> +	pthread_mutex_lock(&pfdset->fd_mutex);
> +	fdset_shrink_nolock(pfdset);
> +	pthread_mutex_unlock(&pfdset->fd_mutex);
> +}
> +
> +/**
> + * Returns the index in the fdset for a given fd.
> + * @return
> + *   index for the fd, or -1 if fd isn't in the fdset.
> + */
> +static inline int
> +fdset_find_fd(struct fdset *pfdset, int fd)
> +{
> +	int i;
> +
> +	for (i = 0; i < pfdset->num && pfdset->fd[i].fd != fd; i++)
> +		;
> +
> +	return i == pfdset->num ? -1 : i;
> +}
> +
> +static inline void
> +fdset_add_fd(struct fdset *pfdset, int idx, int fd,
> +	fd_cb rcb, fd_cb wcb, void *dat)
> +{
> +	struct fdentry *pfdentry = &pfdset->fd[idx];
> +	struct pollfd *pfd = &pfdset->rwfds[idx];
> +
> +	pfdentry->fd  = fd;
> +	pfdentry->rcb = rcb;
> +	pfdentry->wcb = wcb;
> +	pfdentry->dat = dat;
> +
> +	pfd->fd = fd;
> +	pfd->events  = rcb ? POLLIN : 0;
> +	pfd->events |= wcb ? POLLOUT : 0;
> +	pfd->revents = 0;
> +}
> +
> +static inline void
> +fdset_init(struct fdset *pfdset)
> +{
> +	int i;
> +
> +	if (pfdset == NULL)
> +		return;
> +
> +	for (i = 0; i < MAX_FDS; i++) {
> +		pfdset->fd[i].fd = -1;
> +		pfdset->fd[i].dat = NULL;
> +	}
> +	pfdset->num = 0;
> +}
> +
> +/**
> + * Register the fd in the fdset with read/write handler and context.
> + */
> +static inline int
> +fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
> +{
> +	int i;
> +
> +	if (pfdset == NULL || fd == -1)
> +		return -1;
> +
> +	pthread_mutex_lock(&pfdset->fd_mutex);
> +	i = pfdset->num < MAX_FDS ? pfdset->num++ : -1;
> +	if (i == -1) {
> +		fdset_shrink_nolock(pfdset);
> +		i = pfdset->num < MAX_FDS ? pfdset->num++ : -1;
> +		if (i == -1) {
> +			pthread_mutex_unlock(&pfdset->fd_mutex);
> +			return -2;
> +		}
> +	}
> +
> +	fdset_add_fd(pfdset, i, fd, rcb, wcb, dat);
> +	pthread_mutex_unlock(&pfdset->fd_mutex);
> +
> +	return 0;
> +}
> +
> +/**
> + *  Unregister the fd from the fdset.
> + *  Returns context of a given fd or NULL.
> + */
> +static inline void *
> +fdset_del(struct fdset *pfdset, int fd)
> +{
> +	int i;
> +	void *dat = NULL;
> +
> +	if (pfdset == NULL || fd == -1)
> +		return NULL;
> +
> +	do {
> +		pthread_mutex_lock(&pfdset->fd_mutex);
> +
> +		i = fdset_find_fd(pfdset, fd);
> +		if (i != -1 && pfdset->fd[i].busy == 0) {
> +			/* busy indicates r/wcb is executing! */
> +			dat = pfdset->fd[i].dat;
> +			pfdset->fd[i].fd = -1;
> +			pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;
> +			pfdset->fd[i].dat = NULL;
> +			i = -1;
> +		}
> +		pthread_mutex_unlock(&pfdset->fd_mutex);
> +	} while (i != -1);
> +
> +	return dat;
> +}
> +
> +/**
> + * This functions runs in infinite blocking loop until there is no fd in
> + * pfdset. It calls corresponding r/w handler if there is event on the fd.
> + *
> + * Before the callback is called, we set the flag to busy status; If other
> + * thread(now rte_vhost_driver_unregister) calls fdset_del concurrently, it
> + * will wait until the flag is reset to zero(which indicates the callback is
> + * finished), then it could free the context after fdset_del.
> + */
> +static inline void *
> +fdset_event_dispatch(void *arg)
> +{
> +	int i;
> +	struct pollfd *pfd;
> +	struct fdentry *pfdentry;
> +	fd_cb rcb, wcb;
> +	void *dat;
> +	int fd, numfds;
> +	int remove1, remove2;
> +	int need_shrink;
> +	struct fdset *pfdset = arg;
> +	int val;
> +
> +	if (pfdset == NULL)
> +		return NULL;
> +
> +	while (1) {
> +
> +		/*
> +		 * When poll is blocked, other threads might unregister
> +		 * listenfds from and register new listenfds into fdset.
> +		 * When poll returns, the entries for listenfds in the fdset
> +		 * might have been updated. It is ok if there is unwanted call
> +		 * for new listenfds.
> +		 */
> +		pthread_mutex_lock(&pfdset->fd_mutex);
> +		numfds = pfdset->num;
> +		pthread_mutex_unlock(&pfdset->fd_mutex);
> +
> +		val = poll(pfdset->rwfds, numfds, 1000 /* millisecs */);
> +		if (val < 0)
> +			continue;
> +
> +		need_shrink = 0;
> +		for (i = 0; i < numfds; i++) {
> +			pthread_mutex_lock(&pfdset->fd_mutex);
> +
> +			pfdentry = &pfdset->fd[i];
> +			fd = pfdentry->fd;
> +			pfd = &pfdset->rwfds[i];
> +
> +			if (fd < 0) {
> +				need_shrink = 1;
> +				pthread_mutex_unlock(&pfdset->fd_mutex);
> +				continue;
> +			}
> +
> +			if (!pfd->revents) {
> +				pthread_mutex_unlock(&pfdset->fd_mutex);
> +				continue;
> +			}
> +
> +			remove1 = remove2 = 0;
> +
> +			rcb = pfdentry->rcb;
> +			wcb = pfdentry->wcb;
> +			dat = pfdentry->dat;
> +			pfdentry->busy = 1;
> +
> +			pthread_mutex_unlock(&pfdset->fd_mutex);
> +
> +			if (rcb && pfd->revents & (POLLIN | FDPOLLERR))
> +				rcb(fd, dat, &remove1);
> +			if (wcb && pfd->revents & (POLLOUT | FDPOLLERR))
> +				wcb(fd, dat, &remove2);
> +			pfdentry->busy = 0;
> +			/*
> +			 * fdset_del needs to check busy flag.
> +			 * We don't allow fdset_del to be called in callback
> +			 * directly.
> +			 */
> +			/*
> +			 * When we are to clean up the fd from fdset,
> +			 * because the fd is closed in the cb,
> +			 * the old fd val could be reused by when creates new
> +			 * listen fd in another thread, we couldn't call
> +			 * fd_set_del.
> +			 */
> +			if (remove1 || remove2) {
> +				pfdentry->fd = -1;
> +				need_shrink = 1;
> +			}
> +		}
> +
> +		if (need_shrink)
> +			fdset_shrink(pfdset);
> +	}
> +
> +	return NULL;
> +}
>   
>   #endif
>
  
Yang, Zhiyong Feb. 28, 2018, 1:36 a.m. UTC | #2
> -----Original Message-----

> From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]

> Sent: Wednesday, February 28, 2018 1:52 AM

> To: Yang, Zhiyong <zhiyong.yang@intel.com>; dev@dpdk.org;

> yliu@fridaylinux.org; Tan, Jianfeng <jianfeng.tan@intel.com>; Bie, Tiwei

> <tiwei.bie@intel.com>; Wang, Zhihong <zhihong.wang@intel.com>

> Cc: Wang, Dong1 <dong1.wang@intel.com>

> Subject: Re: [PATCH 1/4] vhost: move fdset functions from fd_man.c to

> fd_man.h

> 

> Hi Zhiyong,

> 

> On 02/14/2018 03:53 PM, Zhiyong Yang wrote:

> > The patch moves fdset related funcitons from fd_man.c to fd_man.h in

> > order to reuse these funcitons from the perspective of PMDs.

> >

> > Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>

> > ---

> >   lib/librte_vhost/Makefile |   3 +-

> >   lib/librte_vhost/fd_man.c | 274 ----------------------------------------------

> >   lib/librte_vhost/fd_man.h | 258

> +++++++++++++++++++++++++++++++++++++++++--

> >   3 files changed, 253 insertions(+), 282 deletions(-)

> >   delete mode 100644 lib/librte_vhost/fd_man.c

> 

> I disagree with the patch.

> It is a good thing to reuse the code, but to do it, you need to extend the

> vhost lib API.

> 

> New API need to be prefixed with rte_vhost_, and be declared in

> rte_vhost.h.

> 

> And no need to move the functions from the .c to the .h file, as it moreover

> makes you inline them, which is not necessary here.


Thanks for your reviewing the series firstly, Maxime. :)

I considered to do it as you said. However I still preferred this one at last.
Here are my reasons.
1) As far as I know, this set of functions are used privately in librte_vhost before this feature.
No strong request from the perspective of DPDK application. If I understand well,  It is enough to expose the functions to all PMDs
And it is better to keep internal use in DPDK.

2) These functions help to implement vhost user, but they are not strongly related to other APIs of vhost user which have already exposed.
if we want to expose them as APIs at lib layer, many functions and related data structure has to be exposed in rte_vhost.h. it looks messy.
Your opinion?

thanks
Zhiyong
  
Maxime Coquelin Feb. 28, 2018, 8:45 a.m. UTC | #3
On 02/28/2018 02:36 AM, Yang, Zhiyong wrote:
> 
> 
>> -----Original Message-----
>> From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]
>> Sent: Wednesday, February 28, 2018 1:52 AM
>> To: Yang, Zhiyong <zhiyong.yang@intel.com>; dev@dpdk.org;
>> yliu@fridaylinux.org; Tan, Jianfeng <jianfeng.tan@intel.com>; Bie, Tiwei
>> <tiwei.bie@intel.com>; Wang, Zhihong <zhihong.wang@intel.com>
>> Cc: Wang, Dong1 <dong1.wang@intel.com>
>> Subject: Re: [PATCH 1/4] vhost: move fdset functions from fd_man.c to
>> fd_man.h
>>
>> Hi Zhiyong,
>>
>> On 02/14/2018 03:53 PM, Zhiyong Yang wrote:
>>> The patch moves fdset related funcitons from fd_man.c to fd_man.h in
>>> order to reuse these funcitons from the perspective of PMDs.
>>>
>>> Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
>>> ---
>>>    lib/librte_vhost/Makefile |   3 +-
>>>    lib/librte_vhost/fd_man.c | 274 ----------------------------------------------
>>>    lib/librte_vhost/fd_man.h | 258
>> +++++++++++++++++++++++++++++++++++++++++--
>>>    3 files changed, 253 insertions(+), 282 deletions(-)
>>>    delete mode 100644 lib/librte_vhost/fd_man.c
>>
>> I disagree with the patch.
>> It is a good thing to reuse the code, but to do it, you need to extend the
>> vhost lib API.
>>
>> New API need to be prefixed with rte_vhost_, and be declared in
>> rte_vhost.h.
>>
>> And no need to move the functions from the .c to the .h file, as it moreover
>> makes you inline them, which is not necessary here.
> 
> Thanks for your reviewing the series firstly, Maxime. :)
> 
> I considered to do it as you said. However I still preferred this one at last.
> Here are my reasons.
> 1) As far as I know, this set of functions are used privately in librte_vhost before this feature.
> No strong request from the perspective of DPDK application. If I understand well,  It is enough to expose the functions to all PMDs
> And it is better to keep internal use in DPDK.

But what the patch is doing is adding fd_man.h to the API, without doing
it properly. fd_man.h will be installed with other header files, and any
external application can use it.

> 
> 2) These functions help to implement vhost user, but they are not strongly related to other APIs of vhost user which have already exposed.
> if we want to expose them as APIs at lib layer, many functions and related data structure has to be exposed in rte_vhost.h. it looks messy.
> Your opinion?

Yes, it is not really vhost-related, it could be part of a more generic
library. It is maybe better to duplicate these lines, or to move this
code in a existing or new library.

Cheers,
Maxime

> thanks
> Zhiyong
>
  
Jianfeng Tan March 1, 2018, 6:02 a.m. UTC | #4
> -----Original Message-----

> From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]

> Sent: Wednesday, February 28, 2018 4:45 PM

> To: Yang, Zhiyong; dev@dpdk.org; yliu@fridaylinux.org; Tan, Jianfeng; Bie,

> Tiwei; Wang, Zhihong

> Cc: Wang, Dong1

> Subject: Re: [PATCH 1/4] vhost: move fdset functions from fd_man.c to

> fd_man.h

> 

> 

> 

> On 02/28/2018 02:36 AM, Yang, Zhiyong wrote:

> >

> >

> >> -----Original Message-----

> >> From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]

> >> Sent: Wednesday, February 28, 2018 1:52 AM

> >> To: Yang, Zhiyong <zhiyong.yang@intel.com>; dev@dpdk.org;

> >> yliu@fridaylinux.org; Tan, Jianfeng <jianfeng.tan@intel.com>; Bie, Tiwei

> >> <tiwei.bie@intel.com>; Wang, Zhihong <zhihong.wang@intel.com>

> >> Cc: Wang, Dong1 <dong1.wang@intel.com>

> >> Subject: Re: [PATCH 1/4] vhost: move fdset functions from fd_man.c to

> >> fd_man.h

> >>

> >> Hi Zhiyong,

> >>

> >> On 02/14/2018 03:53 PM, Zhiyong Yang wrote:

> >>> The patch moves fdset related funcitons from fd_man.c to fd_man.h in

> >>> order to reuse these funcitons from the perspective of PMDs.

> >>>

> >>> Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>

> >>> ---

> >>>    lib/librte_vhost/Makefile |   3 +-

> >>>    lib/librte_vhost/fd_man.c | 274 ----------------------------------------------

> >>>    lib/librte_vhost/fd_man.h | 258

> >> +++++++++++++++++++++++++++++++++++++++++--

> >>>    3 files changed, 253 insertions(+), 282 deletions(-)

> >>>    delete mode 100644 lib/librte_vhost/fd_man.c

> >>

> >> I disagree with the patch.

> >> It is a good thing to reuse the code, but to do it, you need to extend the

> >> vhost lib API.

> >>

> >> New API need to be prefixed with rte_vhost_, and be declared in

> >> rte_vhost.h.

> >>

> >> And no need to move the functions from the .c to the .h file, as it

> moreover

> >> makes you inline them, which is not necessary here.

> >

> > Thanks for your reviewing the series firstly, Maxime. :)

> >

> > I considered to do it as you said. However I still preferred this one at last.

> > Here are my reasons.

> > 1) As far as I know, this set of functions are used privately in librte_vhost

> before this feature.

> > No strong request from the perspective of DPDK application. If I

> understand well,  It is enough to expose the functions to all PMDs

> > And it is better to keep internal use in DPDK.

> 

> But what the patch is doing is adding fd_man.h to the API, without doing

> it properly. fd_man.h will be installed with other header files, and any

> external application can use it.

> 

> >

> > 2) These functions help to implement vhost user, but they are not strongly

> related to other APIs of vhost user which have already exposed.

> > if we want to expose them as APIs at lib layer, many functions and related

> data structure has to be exposed in rte_vhost.h. it looks messy.

> > Your opinion?

> 

> Yes, it is not really vhost-related, it could be part of a more generic

> library. It is maybe better to duplicate these lines, or to move this

> code in a existing or new library.


I vote to move it to generic library, maybe eal. Poll() has better compatibility even though poll() is not as performant as epoll().

Thomas, how do you think?

Thanks,
Jianfeng 

> 

> Cheers,

> Maxime

> 

> > thanks

> > Zhiyong

> >
  
Thomas Monjalon March 1, 2018, 2:13 p.m. UTC | #5
01/03/2018 07:02, Tan, Jianfeng:
> From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]
> > On 02/28/2018 02:36 AM, Yang, Zhiyong wrote:
> > > From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]
> > >> On 02/14/2018 03:53 PM, Zhiyong Yang wrote:
> > >>>    lib/librte_vhost/Makefile |   3 +-
> > >>>    lib/librte_vhost/fd_man.c | 274 ----------------------------------------------
> > >>>    lib/librte_vhost/fd_man.h | 258
> > >> +++++++++++++++++++++++++++++++++++++++++--
> > >>>    3 files changed, 253 insertions(+), 282 deletions(-)
> > >>>    delete mode 100644 lib/librte_vhost/fd_man.c
> > >>
> > >> I disagree with the patch.
> > >> It is a good thing to reuse the code, but to do it, you need to extend the
> > >> vhost lib API.
> > >>
> > >> New API need to be prefixed with rte_vhost_, and be declared in
> > >> rte_vhost.h.
> > >>
> > >> And no need to move the functions from the .c to the .h file, as it
> > moreover
> > >> makes you inline them, which is not necessary here.
> > >
> > > Thanks for your reviewing the series firstly, Maxime. :)
> > >
> > > I considered to do it as you said. However I still preferred this one at last.
> > > Here are my reasons.
> > > 1) As far as I know, this set of functions are used privately in librte_vhost
> > before this feature.
> > > No strong request from the perspective of DPDK application. If I
> > understand well,  It is enough to expose the functions to all PMDs
> > > And it is better to keep internal use in DPDK.
> > 
> > But what the patch is doing is adding fd_man.h to the API, without doing
> > it properly. fd_man.h will be installed with other header files, and any
> > external application can use it.
> > 
> > >
> > > 2) These functions help to implement vhost user, but they are not strongly
> > related to other APIs of vhost user which have already exposed.
> > > if we want to expose them as APIs at lib layer, many functions and related
> > data structure has to be exposed in rte_vhost.h. it looks messy.
> > > Your opinion?
> > 
> > Yes, it is not really vhost-related, it could be part of a more generic
> > library. It is maybe better to duplicate these lines, or to move this
> > code in a existing or new library.
> 
> I vote to move it to generic library, maybe eal. Poll() has better compatibility even though poll() is not as performant as epoll().
> 
> Thomas, how do you think?

I don't see why it should be exported outside of DPDK, except for PMDs.
I would tend to keep it internal but I understand that it would mean
duplicating some code, which is not ideal.
Please could you show what would be the content of the .h in EAL?
  
Yang, Zhiyong March 5, 2018, 7:43 a.m. UTC | #6
> -----Original Message-----
> From: Thomas Monjalon [mailto:thomas@monjalon.net]
> Sent: Thursday, March 1, 2018 10:14 PM
> To: Tan, Jianfeng <jianfeng.tan@intel.com>
> Cc: Maxime Coquelin <maxime.coquelin@redhat.com>; Yang, Zhiyong
> <zhiyong.yang@intel.com>; dev@dpdk.org; yliu@fridaylinux.org; Bie, Tiwei
> <tiwei.bie@intel.com>; Wang, Zhihong <zhihong.wang@intel.com>; Wang,
> Dong1 <dong1.wang@intel.com>
> Subject: Re: [PATCH 1/4] vhost: move fdset functions from fd_man.c to
> fd_man.h
> 
> 01/03/2018 07:02, Tan, Jianfeng:
> > From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]
> > > On 02/28/2018 02:36 AM, Yang, Zhiyong wrote:
> > > > From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]
> > > >> On 02/14/2018 03:53 PM, Zhiyong Yang wrote:
> > > >>>    lib/librte_vhost/Makefile |   3 +-
> > > >>>    lib/librte_vhost/fd_man.c | 274 -------------------------------------------
> ---
> > > >>>    lib/librte_vhost/fd_man.h | 258
> > > >> +++++++++++++++++++++++++++++++++++++++++--
> > > >>>    3 files changed, 253 insertions(+), 282 deletions(-)
> > > >>>    delete mode 100644 lib/librte_vhost/fd_man.c
> > > >>
> > > >> I disagree with the patch.
> > > >> It is a good thing to reuse the code, but to do it, you need to
> > > >> extend the vhost lib API.
> > > >>
> > > >> New API need to be prefixed with rte_vhost_, and be declared in
> > > >> rte_vhost.h.
> > > >>
> > > >> And no need to move the functions from the .c to the .h file, as
> > > >> it
> > > moreover
> > > >> makes you inline them, which is not necessary here.
> > > >
> > > > Thanks for your reviewing the series firstly, Maxime. :)
> > > >
> > > > I considered to do it as you said. However I still preferred this one at last.
> > > > Here are my reasons.
> > > > 1) As far as I know, this set of functions are used privately in
> > > > librte_vhost
> > > before this feature.
> > > > No strong request from the perspective of DPDK application. If I
> > > understand well,  It is enough to expose the functions to all PMDs
> > > > And it is better to keep internal use in DPDK.
> > >
> > > But what the patch is doing is adding fd_man.h to the API, without
> > > doing it properly. fd_man.h will be installed with other header
> > > files, and any external application can use it.
> > >
> > > >
> > > > 2) These functions help to implement vhost user, but they are not
> > > > strongly
> > > related to other APIs of vhost user which have already exposed.
> > > > if we want to expose them as APIs at lib layer, many functions and
> > > > related
> > > data structure has to be exposed in rte_vhost.h. it looks messy.
> > > > Your opinion?
> > >
> > > Yes, it is not really vhost-related, it could be part of a more
> > > generic library. It is maybe better to duplicate these lines, or to
> > > move this code in a existing or new library.
> >
> > I vote to move it to generic library, maybe eal. Poll() has better
> compatibility even though poll() is not as performant as epoll().
> >
> > Thomas, how do you think?
> 
> I don't see why it should be exported outside of DPDK, except for PMDs.
> I would tend to keep it internal but I understand that it would mean
> duplicating some code, which is not ideal.
> Please could you show what would be the content of the .h in EAL?
> 

If needed to expose them in eal.h, 
I think that they should be the whole fdset mechanism as followings.

typedef void (*fd_cb)(int fd, void *dat, int *remove);

struct fdentry {
	int fd;		/* -1 indicates this entry is empty */
	fd_cb rcb;	/* callback when this fd is readable. */
	fd_cb wcb;	/* callback when this fd is writeable.*/
	void *dat;	/* fd context */
	int busy;	/* whether this entry is being used in cb. */
};

struct fdset {
	struct pollfd rwfds[MAX_FDS];
	struct fdentry fd[MAX_FDS];
	pthread_mutex_t fd_mutex;
	int num;	/* current fd number of this fdset */
};

void fdset_init(struct fdset *pfdset);    (not used in the patchset)

int fdset_add(struct fdset *pfdset, int fd,
	fd_cb rcb, fd_cb wcb, void *dat);     (used in this patchset)

void *fdset_del(struct fdset *pfdset, int fd); (not used in the patchset)

void *fdset_event_dispatch(void *arg);   (used in this patchset)

seems that we have 4 options.
1) expose them in librte_vhost
2) expose them in other existing or new libs. for example,  eal.
3) duplicate the code lines at PMD layer.
4) do it as the patch does that.

thanks
Zhiyong
  
Thomas Monjalon March 5, 2018, 8:54 a.m. UTC | #7
05/03/2018 08:43, Yang, Zhiyong:
> From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > 01/03/2018 07:02, Tan, Jianfeng:
> > > From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]
> > > > On 02/28/2018 02:36 AM, Yang, Zhiyong wrote:
> > > > > From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]
> > > > >> On 02/14/2018 03:53 PM, Zhiyong Yang wrote:
> > > > >>>    lib/librte_vhost/Makefile |   3 +-
> > > > >>>    lib/librte_vhost/fd_man.c | 274 -------------------------------------------
> > ---
> > > > >>>    lib/librte_vhost/fd_man.h | 258
> > > > >> +++++++++++++++++++++++++++++++++++++++++--
> > > > >>>    3 files changed, 253 insertions(+), 282 deletions(-)
> > > > >>>    delete mode 100644 lib/librte_vhost/fd_man.c
> > > > >>
> > > > >> I disagree with the patch.
> > > > >> It is a good thing to reuse the code, but to do it, you need to
> > > > >> extend the vhost lib API.
> > > > >>
> > > > >> New API need to be prefixed with rte_vhost_, and be declared in
> > > > >> rte_vhost.h.
> > > > >>
> > > > >> And no need to move the functions from the .c to the .h file, as
> > > > >> it
> > > > moreover
> > > > >> makes you inline them, which is not necessary here.
> > > > >
> > > > > Thanks for your reviewing the series firstly, Maxime. :)
> > > > >
> > > > > I considered to do it as you said. However I still preferred this one at last.
> > > > > Here are my reasons.
> > > > > 1) As far as I know, this set of functions are used privately in
> > > > > librte_vhost
> > > > before this feature.
> > > > > No strong request from the perspective of DPDK application. If I
> > > > understand well,  It is enough to expose the functions to all PMDs
> > > > > And it is better to keep internal use in DPDK.
> > > >
> > > > But what the patch is doing is adding fd_man.h to the API, without
> > > > doing it properly. fd_man.h will be installed with other header
> > > > files, and any external application can use it.
> > > >
> > > > >
> > > > > 2) These functions help to implement vhost user, but they are not
> > > > > strongly
> > > > related to other APIs of vhost user which have already exposed.
> > > > > if we want to expose them as APIs at lib layer, many functions and
> > > > > related
> > > > data structure has to be exposed in rte_vhost.h. it looks messy.
> > > > > Your opinion?
> > > >
> > > > Yes, it is not really vhost-related, it could be part of a more
> > > > generic library. It is maybe better to duplicate these lines, or to
> > > > move this code in a existing or new library.
> > >
> > > I vote to move it to generic library, maybe eal. Poll() has better
> > compatibility even though poll() is not as performant as epoll().
> > >
> > > Thomas, how do you think?
> > 
> > I don't see why it should be exported outside of DPDK, except for PMDs.
> > I would tend to keep it internal but I understand that it would mean
> > duplicating some code, which is not ideal.
> > Please could you show what would be the content of the .h in EAL?
> > 
> 
> If needed to expose them in eal.h, 
> I think that they should be the whole fdset mechanism as followings.
> 
> typedef void (*fd_cb)(int fd, void *dat, int *remove);
> 
> struct fdentry {
> 	int fd;		/* -1 indicates this entry is empty */
> 	fd_cb rcb;	/* callback when this fd is readable. */
> 	fd_cb wcb;	/* callback when this fd is writeable.*/
> 	void *dat;	/* fd context */
> 	int busy;	/* whether this entry is being used in cb. */
> };
> 
> struct fdset {
> 	struct pollfd rwfds[MAX_FDS];
> 	struct fdentry fd[MAX_FDS];
> 	pthread_mutex_t fd_mutex;
> 	int num;	/* current fd number of this fdset */
> };
> 
> void fdset_init(struct fdset *pfdset);    (not used in the patchset)
> 
> int fdset_add(struct fdset *pfdset, int fd,
> 	fd_cb rcb, fd_cb wcb, void *dat);     (used in this patchset)
> 
> void *fdset_del(struct fdset *pfdset, int fd); (not used in the patchset)
> 
> void *fdset_event_dispatch(void *arg);   (used in this patchset)
> 
> seems that we have 4 options.
> 1) expose them in librte_vhost
> 2) expose them in other existing or new libs. for example,  eal.
> 3) duplicate the code lines at PMD layer.
> 4) do it as the patch does that.

It looks to be very close of the interrupt thread.
Can we have all merged in an unique event dispatcher thread?
  
Yang, Zhiyong March 13, 2018, 8:46 a.m. UTC | #8
Hi Thomas,

> -----Original Message-----
> From: Thomas Monjalon [mailto:thomas@monjalon.net]
> Sent: Monday, March 5, 2018 4:55 PM
> To: Yang, Zhiyong <zhiyong.yang@intel.com>
> Cc: Tan, Jianfeng <jianfeng.tan@intel.com>; Maxime Coquelin
> <maxime.coquelin@redhat.com>; dev@dpdk.org; yliu@fridaylinux.org; Bie,
> Tiwei <tiwei.bie@intel.com>; Wang, Zhihong <zhihong.wang@intel.com>;
> Wang, Dong1 <dong1.wang@intel.com>
> Subject: Re: [PATCH 1/4] vhost: move fdset functions from fd_man.c to
> fd_man.h
> 
> 05/03/2018 08:43, Yang, Zhiyong:
> > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > > 01/03/2018 07:02, Tan, Jianfeng:
> > > > From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]
> > > > > On 02/28/2018 02:36 AM, Yang, Zhiyong wrote:
> > > > > > From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]
> > > > > >> On 02/14/2018 03:53 PM, Zhiyong Yang wrote:
> > > > > >>>    lib/librte_vhost/Makefile |   3 +-
> > > > > >>>    lib/librte_vhost/fd_man.c | 274
> > > > > >>> -------------------------------------------
> > > ---
> > > > > >>>    lib/librte_vhost/fd_man.h | 258
> > > > > >> +++++++++++++++++++++++++++++++++++++++++--
> > > > > >>>    3 files changed, 253 insertions(+), 282 deletions(-)
> > > > > >>>    delete mode 100644 lib/librte_vhost/fd_man.c
> > > > > >>
> > > > > >> I disagree with the patch.
> > > > > >> It is a good thing to reuse the code, but to do it, you need
> > > > > >> to extend the vhost lib API.
> > > > > >>
> > > > > >> New API need to be prefixed with rte_vhost_, and be declared
> > > > > >> in rte_vhost.h.
> > > > > >>
> > > > > >> And no need to move the functions from the .c to the .h file,
> > > > > >> as it
> > > > > moreover
> > > > > >> makes you inline them, which is not necessary here.
> > > > > >
> > > > > > Thanks for your reviewing the series firstly, Maxime. :)
> > > > > >
> > > > > > I considered to do it as you said. However I still preferred this one at
> last.
> > > > > > Here are my reasons.
> > > > > > 1) As far as I know, this set of functions are used privately
> > > > > > in librte_vhost
> > > > > before this feature.
> > > > > > No strong request from the perspective of DPDK application. If
> > > > > > I
> > > > > understand well,  It is enough to expose the functions to all
> > > > > PMDs
> > > > > > And it is better to keep internal use in DPDK.
> > > > >
> > > > > But what the patch is doing is adding fd_man.h to the API,
> > > > > without doing it properly. fd_man.h will be installed with other
> > > > > header files, and any external application can use it.
> > > > >
> > > > > >
> > > > > > 2) These functions help to implement vhost user, but they are
> > > > > > not strongly
> > > > > related to other APIs of vhost user which have already exposed.
> > > > > > if we want to expose them as APIs at lib layer, many functions
> > > > > > and related
> > > > > data structure has to be exposed in rte_vhost.h. it looks messy.
> > > > > > Your opinion?
> > > > >
> > > > > Yes, it is not really vhost-related, it could be part of a more
> > > > > generic library. It is maybe better to duplicate these lines, or
> > > > > to move this code in a existing or new library.
> > > >
> > > > I vote to move it to generic library, maybe eal. Poll() has better
> > > compatibility even though poll() is not as performant as epoll().
> > > >
> > > > Thomas, how do you think?
> > >
> > > I don't see why it should be exported outside of DPDK, except for PMDs.
> > > I would tend to keep it internal but I understand that it would mean
> > > duplicating some code, which is not ideal.
> > > Please could you show what would be the content of the .h in EAL?
> > >
> >
> > If needed to expose them in eal.h,
> > I think that they should be the whole fdset mechanism as followings.
> >
> > typedef void (*fd_cb)(int fd, void *dat, int *remove);
> >
> > struct fdentry {
> > 	int fd;		/* -1 indicates this entry is empty */
> > 	fd_cb rcb;	/* callback when this fd is readable. */
> > 	fd_cb wcb;	/* callback when this fd is writeable.*/
> > 	void *dat;	/* fd context */
> > 	int busy;	/* whether this entry is being used in cb. */
> > };
> >
> > struct fdset {
> > 	struct pollfd rwfds[MAX_FDS];
> > 	struct fdentry fd[MAX_FDS];
> > 	pthread_mutex_t fd_mutex;
> > 	int num;	/* current fd number of this fdset */
> > };
> >
> > void fdset_init(struct fdset *pfdset);    (not used in the patchset)
> >
> > int fdset_add(struct fdset *pfdset, int fd,
> > 	fd_cb rcb, fd_cb wcb, void *dat);     (used in this patchset)
> >
> > void *fdset_del(struct fdset *pfdset, int fd); (not used in the
> > patchset)
> >
> > void *fdset_event_dispatch(void *arg);   (used in this patchset)
> >
> > seems that we have 4 options.
> > 1) expose them in librte_vhost
> > 2) expose them in other existing or new libs. for example,  eal.
> > 3) duplicate the code lines at PMD layer.
> > 4) do it as the patch does that.
> 
> It looks to be very close of the interrupt thread.
> Can we have all merged in an unique event dispatcher thread?
> 

If I understand right, do you mean that we can merge them in lib eal ?  right?

Thanks
Zhiyong
  
Thomas Monjalon March 13, 2018, 9:43 a.m. UTC | #9
13/03/2018 09:46, Yang, Zhiyong:
> Hi Thomas,
> 
> From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > 05/03/2018 08:43, Yang, Zhiyong:
> > > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > > > 01/03/2018 07:02, Tan, Jianfeng:
> > > > > From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]
> > > > > > On 02/28/2018 02:36 AM, Yang, Zhiyong wrote:
> > > > > > > From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]
> > > > > > >> On 02/14/2018 03:53 PM, Zhiyong Yang wrote:
> > > > > > >>>    lib/librte_vhost/Makefile |   3 +-
> > > > > > >>>    lib/librte_vhost/fd_man.c | 274
> > > > > > >>> -------------------------------------------
> > > > ---
> > > > > > >>>    lib/librte_vhost/fd_man.h | 258
> > > > > > >> +++++++++++++++++++++++++++++++++++++++++--
> > > > > > >>>    3 files changed, 253 insertions(+), 282 deletions(-)
> > > > > > >>>    delete mode 100644 lib/librte_vhost/fd_man.c
> > > > > > >>
> > > > > > >> I disagree with the patch.
> > > > > > >> It is a good thing to reuse the code, but to do it, you need
> > > > > > >> to extend the vhost lib API.
> > > > > > >>
> > > > > > >> New API need to be prefixed with rte_vhost_, and be declared
> > > > > > >> in rte_vhost.h.
> > > > > > >>
> > > > > > >> And no need to move the functions from the .c to the .h file,
> > > > > > >> as it
> > > > > > moreover
> > > > > > >> makes you inline them, which is not necessary here.
> > > > > > >
> > > > > > > Thanks for your reviewing the series firstly, Maxime. :)
> > > > > > >
> > > > > > > I considered to do it as you said. However I still preferred this one at
> > last.
> > > > > > > Here are my reasons.
> > > > > > > 1) As far as I know, this set of functions are used privately
> > > > > > > in librte_vhost
> > > > > > before this feature.
> > > > > > > No strong request from the perspective of DPDK application. If
> > > > > > > I
> > > > > > understand well,  It is enough to expose the functions to all
> > > > > > PMDs
> > > > > > > And it is better to keep internal use in DPDK.
> > > > > >
> > > > > > But what the patch is doing is adding fd_man.h to the API,
> > > > > > without doing it properly. fd_man.h will be installed with other
> > > > > > header files, and any external application can use it.
> > > > > >
> > > > > > >
> > > > > > > 2) These functions help to implement vhost user, but they are
> > > > > > > not strongly
> > > > > > related to other APIs of vhost user which have already exposed.
> > > > > > > if we want to expose them as APIs at lib layer, many functions
> > > > > > > and related
> > > > > > data structure has to be exposed in rte_vhost.h. it looks messy.
> > > > > > > Your opinion?
> > > > > >
> > > > > > Yes, it is not really vhost-related, it could be part of a more
> > > > > > generic library. It is maybe better to duplicate these lines, or
> > > > > > to move this code in a existing or new library.
> > > > >
> > > > > I vote to move it to generic library, maybe eal. Poll() has better
> > > > compatibility even though poll() is not as performant as epoll().
> > > > >
> > > > > Thomas, how do you think?
> > > >
> > > > I don't see why it should be exported outside of DPDK, except for PMDs.
> > > > I would tend to keep it internal but I understand that it would mean
> > > > duplicating some code, which is not ideal.
> > > > Please could you show what would be the content of the .h in EAL?
> > > >
> > >
> > > If needed to expose them in eal.h,
> > > I think that they should be the whole fdset mechanism as followings.
> > >
> > > typedef void (*fd_cb)(int fd, void *dat, int *remove);
> > >
> > > struct fdentry {
> > > 	int fd;		/* -1 indicates this entry is empty */
> > > 	fd_cb rcb;	/* callback when this fd is readable. */
> > > 	fd_cb wcb;	/* callback when this fd is writeable.*/
> > > 	void *dat;	/* fd context */
> > > 	int busy;	/* whether this entry is being used in cb. */
> > > };
> > >
> > > struct fdset {
> > > 	struct pollfd rwfds[MAX_FDS];
> > > 	struct fdentry fd[MAX_FDS];
> > > 	pthread_mutex_t fd_mutex;
> > > 	int num;	/* current fd number of this fdset */
> > > };
> > >
> > > void fdset_init(struct fdset *pfdset);    (not used in the patchset)
> > >
> > > int fdset_add(struct fdset *pfdset, int fd,
> > > 	fd_cb rcb, fd_cb wcb, void *dat);     (used in this patchset)
> > >
> > > void *fdset_del(struct fdset *pfdset, int fd); (not used in the
> > > patchset)
> > >
> > > void *fdset_event_dispatch(void *arg);   (used in this patchset)
> > >
> > > seems that we have 4 options.
> > > 1) expose them in librte_vhost
> > > 2) expose them in other existing or new libs. for example,  eal.
> > > 3) duplicate the code lines at PMD layer.
> > > 4) do it as the patch does that.
> > 
> > It looks to be very close of the interrupt thread.
> > Can we have all merged in an unique event dispatcher thread?
> > 
> 
> If I understand right, do you mean that we can merge them in lib eal ?  right?

Yes merge with interrupt thread in EAL.
I didn't look at the details, but it seems the right place for such thing.
  
Yang, Zhiyong March 13, 2018, 9:50 a.m. UTC | #10
> -----Original Message-----
> From: Thomas Monjalon [mailto:thomas@monjalon.net]
> Sent: Tuesday, March 13, 2018 5:43 PM
> To: Yang, Zhiyong <zhiyong.yang@intel.com>
> Cc: Tan, Jianfeng <jianfeng.tan@intel.com>; Maxime Coquelin
> <maxime.coquelin@redhat.com>; dev@dpdk.org; yliu@fridaylinux.org; Bie,
> Tiwei <tiwei.bie@intel.com>; Wang, Zhihong <zhihong.wang@intel.com>;
> Wang, Dong1 <dong1.wang@intel.com>
> Subject: Re: [PATCH 1/4] vhost: move fdset functions from fd_man.c to
> fd_man.h
> 
> 13/03/2018 09:46, Yang, Zhiyong:
> > Hi Thomas,
> >
> > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > > 05/03/2018 08:43, Yang, Zhiyong:
> > > > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > > > > 01/03/2018 07:02, Tan, Jianfeng:
> > > > > > From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]
> > > > > > > On 02/28/2018 02:36 AM, Yang, Zhiyong wrote:
> > > > > > > > From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]
> > > > > > > >> On 02/14/2018 03:53 PM, Zhiyong Yang wrote:
> > > > > > > >>>    lib/librte_vhost/Makefile |   3 +-
> > > > > > > >>>    lib/librte_vhost/fd_man.c | 274
> > > > > > > >>> -------------------------------------------
> > > > > ---
> > > > > > > >>>    lib/librte_vhost/fd_man.h | 258
> > > > > > > >> +++++++++++++++++++++++++++++++++++++++++--
> > > > > > > >>>    3 files changed, 253 insertions(+), 282 deletions(-)
> > > > > > > >>>    delete mode 100644 lib/librte_vhost/fd_man.c
> > > > > > > >>
> > > > > > > >> I disagree with the patch.
> > > > > > > >> It is a good thing to reuse the code, but to do it, you
> > > > > > > >> need to extend the vhost lib API.
> > > > > > > >>
> > > > > > > >> New API need to be prefixed with rte_vhost_, and be
> > > > > > > >> declared in rte_vhost.h.
> > > > > > > >>
> > > > > > > >> And no need to move the functions from the .c to the .h
> > > > > > > >> file, as it
> > > > > > > moreover
> > > > > > > >> makes you inline them, which is not necessary here.
> > > > > > > >
> > > > > > > > Thanks for your reviewing the series firstly, Maxime. :)
> > > > > > > >
> > > > > > > > I considered to do it as you said. However I still
> > > > > > > > preferred this one at
> > > last.
> > > > > > > > Here are my reasons.
> > > > > > > > 1) As far as I know, this set of functions are used
> > > > > > > > privately in librte_vhost
> > > > > > > before this feature.
> > > > > > > > No strong request from the perspective of DPDK
> > > > > > > > application. If I
> > > > > > > understand well,  It is enough to expose the functions to
> > > > > > > all PMDs
> > > > > > > > And it is better to keep internal use in DPDK.
> > > > > > >
> > > > > > > But what the patch is doing is adding fd_man.h to the API,
> > > > > > > without doing it properly. fd_man.h will be installed with
> > > > > > > other header files, and any external application can use it.
> > > > > > >
> > > > > > > >
> > > > > > > > 2) These functions help to implement vhost user, but they
> > > > > > > > are not strongly
> > > > > > > related to other APIs of vhost user which have already exposed.
> > > > > > > > if we want to expose them as APIs at lib layer, many
> > > > > > > > functions and related
> > > > > > > data structure has to be exposed in rte_vhost.h. it looks messy.
> > > > > > > > Your opinion?
> > > > > > >
> > > > > > > Yes, it is not really vhost-related, it could be part of a
> > > > > > > more generic library. It is maybe better to duplicate these
> > > > > > > lines, or to move this code in a existing or new library.
> > > > > >
> > > > > > I vote to move it to generic library, maybe eal. Poll() has
> > > > > > better
> > > > > compatibility even though poll() is not as performant as epoll().
> > > > > >
> > > > > > Thomas, how do you think?
> > > > >
> > > > > I don't see why it should be exported outside of DPDK, except for
> PMDs.
> > > > > I would tend to keep it internal but I understand that it would
> > > > > mean duplicating some code, which is not ideal.
> > > > > Please could you show what would be the content of the .h in EAL?
> > > > >
> > > >
> > > > If needed to expose them in eal.h, I think that they should be the
> > > > whole fdset mechanism as followings.
> > > >
> > > > typedef void (*fd_cb)(int fd, void *dat, int *remove);
> > > >
> > > > struct fdentry {
> > > > 	int fd;		/* -1 indicates this entry is empty */
> > > > 	fd_cb rcb;	/* callback when this fd is readable. */
> > > > 	fd_cb wcb;	/* callback when this fd is writeable.*/
> > > > 	void *dat;	/* fd context */
> > > > 	int busy;	/* whether this entry is being used in cb. */
> > > > };
> > > >
> > > > struct fdset {
> > > > 	struct pollfd rwfds[MAX_FDS];
> > > > 	struct fdentry fd[MAX_FDS];
> > > > 	pthread_mutex_t fd_mutex;
> > > > 	int num;	/* current fd number of this fdset */
> > > > };
> > > >
> > > > void fdset_init(struct fdset *pfdset);    (not used in the patchset)
> > > >
> > > > int fdset_add(struct fdset *pfdset, int fd,
> > > > 	fd_cb rcb, fd_cb wcb, void *dat);     (used in this patchset)
> > > >
> > > > void *fdset_del(struct fdset *pfdset, int fd); (not used in the
> > > > patchset)
> > > >
> > > > void *fdset_event_dispatch(void *arg);   (used in this patchset)
> > > >
> > > > seems that we have 4 options.
> > > > 1) expose them in librte_vhost
> > > > 2) expose them in other existing or new libs. for example,  eal.
> > > > 3) duplicate the code lines at PMD layer.
> > > > 4) do it as the patch does that.
> > >
> > > It looks to be very close of the interrupt thread.
> > > Can we have all merged in an unique event dispatcher thread?
> > >
> >
> > If I understand right, do you mean that we can merge them in lib eal ?  right?
> 
> Yes merge with interrupt thread in EAL.
> I didn't look at the details, but it seems the right place for such thing.
> 
Ok,  we have to expose them as new APIs.  Expect that somebody as DPDK users can use and like them as well. :)

Thanks
Zhiyong
  
Thomas Monjalon March 15, 2018, 9:32 a.m. UTC | #11
13/03/2018 10:50, Yang, Zhiyong:
> From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > 13/03/2018 09:46, Yang, Zhiyong:
> > > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > > > 05/03/2018 08:43, Yang, Zhiyong:
> > > > > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > > > > > I don't see why it should be exported outside of DPDK, except for
> > PMDs.
> > > > > > I would tend to keep it internal but I understand that it would
> > > > > > mean duplicating some code, which is not ideal.
> > > > > > Please could you show what would be the content of the .h in EAL?
> > > > > >
> > > > >
> > > > > If needed to expose them in eal.h, I think that they should be the
> > > > > whole fdset mechanism as followings.
> > > > >
> > > > > typedef void (*fd_cb)(int fd, void *dat, int *remove);
> > > > >
> > > > > struct fdentry {
> > > > > 	int fd;		/* -1 indicates this entry is empty */
> > > > > 	fd_cb rcb;	/* callback when this fd is readable. */
> > > > > 	fd_cb wcb;	/* callback when this fd is writeable.*/
> > > > > 	void *dat;	/* fd context */
> > > > > 	int busy;	/* whether this entry is being used in cb. */
> > > > > };
> > > > >
> > > > > struct fdset {
> > > > > 	struct pollfd rwfds[MAX_FDS];
> > > > > 	struct fdentry fd[MAX_FDS];
> > > > > 	pthread_mutex_t fd_mutex;
> > > > > 	int num;	/* current fd number of this fdset */
> > > > > };
> > > > >
> > > > > void fdset_init(struct fdset *pfdset);    (not used in the patchset)
> > > > >
> > > > > int fdset_add(struct fdset *pfdset, int fd,
> > > > > 	fd_cb rcb, fd_cb wcb, void *dat);     (used in this patchset)
> > > > >
> > > > > void *fdset_del(struct fdset *pfdset, int fd); (not used in the
> > > > > patchset)
> > > > >
> > > > > void *fdset_event_dispatch(void *arg);   (used in this patchset)
> > > > >
> > > > > seems that we have 4 options.
> > > > > 1) expose them in librte_vhost
> > > > > 2) expose them in other existing or new libs. for example,  eal.
> > > > > 3) duplicate the code lines at PMD layer.
> > > > > 4) do it as the patch does that.
> > > >
> > > > It looks to be very close of the interrupt thread.
> > > > Can we have all merged in an unique event dispatcher thread?
> > >
> > > If I understand right, do you mean that we can merge them in lib eal ?  right?
> > 
> > Yes merge with interrupt thread in EAL.
> > I didn't look at the details, but it seems the right place for such thing.
> > 
> Ok,  we have to expose them as new APIs.  Expect that somebody as DPDK users can use and like them as well. :)

I think you missed my initial question:
Is it possible to merge the vhost events needs in the EAL interrupt thread?
  
Yang, Zhiyong March 15, 2018, 9:45 a.m. UTC | #12
In a container environment if the vhost-user backend restarts, there's no way
for it to reconnect to virtio-user currently. To address this, support for
server mode is added. In this mode the socket file is created by virtio-user,
which the backend then connects to. This means that if the backend restarts,
it can reconnect to virtio-user and continue communications.

The series add support for the feature and target for 18.05 release.

Virtio-user with server mode creates socket file and then starts to wait for the
first connection from vhost user with client mode in blocking mode.

Virtio-user with server mode supports many times' vhost reconnections with same configurations. 

Virtio-user supports only one connection at the same time in server/client mode.

How to test?
The following scripts are as reference.

./x86_64-native-linuxapp-gcc/app/testpmd -c 0x3 -n 4 -m 256,0 --no-pci \
--file-prefix=testpmd0 --vdev=net_virtio_user0,mac=00:11:22:33:44:10, \
path=/tmp/sock0,server=1,queues=1 -- -i --rxq=1 --txq=1 --no-numa

./x86_64-native-linuxapp-gcc/app/testpmd -c 0x3e000 -n 4 --socket-mem 256,0 \
--vdev 'net_vhost0,iface=/tmp/sock0,client=1,queues=1' -- -i --rxq=1 --txq=1 \
--nb-cores=1 --no-numa

step1 : at the virio-user side, run "start"
step2: at the vhost-user side run "start tx_first 40000"

Changes in V2:
1. split two patch 1/5 and 2/5 from v1 patchset to fix some existing issues which is not
strongly related to support for server mode according to Maxime's comments.
2. move fdset related functions to librte_eal from librte_vhost exposed as
new APIs according to Thomas' comments.
3. release note is added in the patch 5/5.
4. squash data structure change patch into 4/5 according to Maxime's suggestion.

Zhiyong Yang (5):
  net/virtio: fix add pointer checking
  net/virtio: add checking for cvq
  eal: expose fdset related APIs
  net/virtio-user: add support for server mode
  net/vhost: add memory checking

 doc/guides/rel_notes/release_18_05.rst             |   7 +
 drivers/net/vhost/rte_eth_vhost.c                  |   9 +
 drivers/net/virtio/virtio_ethdev.c                 |  10 +-
 drivers/net/virtio/virtio_user/vhost_user.c        |  77 +++++-
 drivers/net/virtio/virtio_user/virtio_user_dev.c   |  44 ++--
 drivers/net/virtio/virtio_user/virtio_user_dev.h   |   8 +
 drivers/net/virtio/virtio_user_ethdev.c            |  82 +++++-
 lib/librte_eal/common/include/rte_eal_interrupts.h |  56 +++++
 lib/librte_eal/linuxapp/eal/eal_interrupts.c       | 257 +++++++++++++++++++
 lib/librte_eal/rte_eal_version.map                 |  10 +
 lib/librte_vhost/Makefile                          |   2 +-
 lib/librte_vhost/fd_man.c                          | 274 ---------------------
 lib/librte_vhost/fd_man.h                          |  40 ---
 lib/librte_vhost/socket.c                          |  22 +-
 14 files changed, 548 insertions(+), 350 deletions(-)
 delete mode 100644 lib/librte_vhost/fd_man.c
 delete mode 100644 lib/librte_vhost/fd_man.h
  
Yang, Zhiyong March 16, 2018, 8:43 a.m. UTC | #13
> -----Original Message-----
> From: Thomas Monjalon [mailto:thomas@monjalon.net]
> Sent: Thursday, March 15, 2018 5:33 PM
> To: Yang, Zhiyong <zhiyong.yang@intel.com>
> Cc: Tan, Jianfeng <jianfeng.tan@intel.com>; Maxime Coquelin
> <maxime.coquelin@redhat.com>; dev@dpdk.org; yliu@fridaylinux.org; Bie,
> Tiwei <tiwei.bie@intel.com>; Wang, Zhihong <zhihong.wang@intel.com>;
> Wang, Dong1 <dong1.wang@intel.com>
> Subject: Re: [PATCH 1/4] vhost: move fdset functions from fd_man.c to
> fd_man.h
> 
> 13/03/2018 10:50, Yang, Zhiyong:
> > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > > 13/03/2018 09:46, Yang, Zhiyong:
> > > > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > > > > 05/03/2018 08:43, Yang, Zhiyong:
> > > > > > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > > > > > > I don't see why it should be exported outside of DPDK,
> > > > > > > except for
> > > PMDs.
> > > > > > > I would tend to keep it internal but I understand that it
> > > > > > > would mean duplicating some code, which is not ideal.
> > > > > > > Please could you show what would be the content of the .h in EAL?
> > > > > > >
> > > > > >
> > > > > > If needed to expose them in eal.h, I think that they should be
> > > > > > the whole fdset mechanism as followings.
> > > > > >
> > > > > > typedef void (*fd_cb)(int fd, void *dat, int *remove);
> > > > > >
> > > > > > struct fdentry {
> > > > > > 	int fd;		/* -1 indicates this entry is empty */
> > > > > > 	fd_cb rcb;	/* callback when this fd is readable. */
> > > > > > 	fd_cb wcb;	/* callback when this fd is writeable.*/
> > > > > > 	void *dat;	/* fd context */
> > > > > > 	int busy;	/* whether this entry is being used in cb. */
> > > > > > };
> > > > > >
> > > > > > struct fdset {
> > > > > > 	struct pollfd rwfds[MAX_FDS];
> > > > > > 	struct fdentry fd[MAX_FDS];
> > > > > > 	pthread_mutex_t fd_mutex;
> > > > > > 	int num;	/* current fd number of this fdset */
> > > > > > };
> > > > > >
> > > > > > void fdset_init(struct fdset *pfdset);    (not used in the patchset)
> > > > > >
> > > > > > int fdset_add(struct fdset *pfdset, int fd,
> > > > > > 	fd_cb rcb, fd_cb wcb, void *dat);     (used in this patchset)
> > > > > >
> > > > > > void *fdset_del(struct fdset *pfdset, int fd); (not used in
> > > > > > the
> > > > > > patchset)
> > > > > >
> > > > > > void *fdset_event_dispatch(void *arg);   (used in this patchset)
> > > > > >
> > > > > > seems that we have 4 options.
> > > > > > 1) expose them in librte_vhost
> > > > > > 2) expose them in other existing or new libs. for example,  eal.
> > > > > > 3) duplicate the code lines at PMD layer.
> > > > > > 4) do it as the patch does that.
> > > > >
> > > > > It looks to be very close of the interrupt thread.
> > > > > Can we have all merged in an unique event dispatcher thread?
> > > >
> > > > If I understand right, do you mean that we can merge them in lib eal ?
> right?
> > >
> > > Yes merge with interrupt thread in EAL.
> > > I didn't look at the details, but it seems the right place for such thing.
> > >
> > Ok,  we have to expose them as new APIs.  Expect that somebody as DPDK
> > users can use and like them as well. :)
> 
> I think you missed my initial question:
> Is it possible to merge the vhost events needs in the EAL interrupt thread?
> 

Sorry to miss this question.
Compared to vhost event mechanism(poll), Eal interrupt uses epoll,
From my basic understanding,  linux and freeBSD both support poll,
Epoll is supported by Linux only. 

Hi Maxime, 
I want to know your opinion about Thomas's question.

This patchset aim to support for virtio-user server mode and just need to call event functions,
So, Let me try librte_eal epoll mechanism if support for server mode.


Thanks
Zhiyong
  
Yang, Zhiyong March 21, 2018, 6:51 a.m. UTC | #14
Hi Thomas, Maxime, Jianfeng,

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Yang, Zhiyong
> Sent: Friday, March 16, 2018 4:44 PM
> To: Thomas Monjalon <thomas@monjalon.net>
> Cc: Tan, Jianfeng <jianfeng.tan@intel.com>; Maxime Coquelin
> <maxime.coquelin@redhat.com>; dev@dpdk.org; yliu@fridaylinux.org; Bie,
> Tiwei <tiwei.bie@intel.com>; Wang, Zhihong <zhihong.wang@intel.com>;
> Wang, Dong1 <dong1.wang@intel.com>
> Subject: Re: [dpdk-dev] [PATCH 1/4] vhost: move fdset functions from
> fd_man.c to fd_man.h
> 
> 
> 
> > -----Original Message-----
> > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > Sent: Thursday, March 15, 2018 5:33 PM
> > To: Yang, Zhiyong <zhiyong.yang@intel.com>
> > Cc: Tan, Jianfeng <jianfeng.tan@intel.com>; Maxime Coquelin
> > <maxime.coquelin@redhat.com>; dev@dpdk.org; yliu@fridaylinux.org; Bie,
> > Tiwei <tiwei.bie@intel.com>; Wang, Zhihong <zhihong.wang@intel.com>;
> > Wang, Dong1 <dong1.wang@intel.com>
> > Subject: Re: [PATCH 1/4] vhost: move fdset functions from fd_man.c to
> > fd_man.h
> >
> > 13/03/2018 10:50, Yang, Zhiyong:
> > > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > > > 13/03/2018 09:46, Yang, Zhiyong:
> > > > > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > > > > > 05/03/2018 08:43, Yang, Zhiyong:
> > > > > > > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > > > > > > > I don't see why it should be exported outside of DPDK,
> > > > > > > > except for
> > > > PMDs.
> > > > > > > > I would tend to keep it internal but I understand that it
> > > > > > > > would mean duplicating some code, which is not ideal.
> > > > > > > > Please could you show what would be the content of the .h in
> EAL?
> > > > > > > >
> > > > > > >
> > > > > > > If needed to expose them in eal.h, I think that they should
> > > > > > > be the whole fdset mechanism as followings.
> > > > > > >
> > > > > > > typedef void (*fd_cb)(int fd, void *dat, int *remove);
> > > > > > >
> > > > > > > struct fdentry {
> > > > > > > 	int fd;		/* -1 indicates this entry is empty */
> > > > > > > 	fd_cb rcb;	/* callback when this fd is readable. */
> > > > > > > 	fd_cb wcb;	/* callback when this fd is writeable.*/
> > > > > > > 	void *dat;	/* fd context */
> > > > > > > 	int busy;	/* whether this entry is being used in cb. */
> > > > > > > };
> > > > > > >
> > > > > > > struct fdset {
> > > > > > > 	struct pollfd rwfds[MAX_FDS];
> > > > > > > 	struct fdentry fd[MAX_FDS];
> > > > > > > 	pthread_mutex_t fd_mutex;
> > > > > > > 	int num;	/* current fd number of this fdset */
> > > > > > > };
> > > > > > >
> > > > > > > void fdset_init(struct fdset *pfdset);    (not used in the patchset)
> > > > > > >
> > > > > > > int fdset_add(struct fdset *pfdset, int fd,
> > > > > > > 	fd_cb rcb, fd_cb wcb, void *dat);     (used in this patchset)
> > > > > > >
> > > > > > > void *fdset_del(struct fdset *pfdset, int fd); (not used in
> > > > > > > the
> > > > > > > patchset)
> > > > > > >
> > > > > > > void *fdset_event_dispatch(void *arg);   (used in this patchset)
> > > > > > >
> > > > > > > seems that we have 4 options.
> > > > > > > 1) expose them in librte_vhost
> > > > > > > 2) expose them in other existing or new libs. for example,  eal.
> > > > > > > 3) duplicate the code lines at PMD layer.
> > > > > > > 4) do it as the patch does that.
> > > > > >
> > > > > > It looks to be very close of the interrupt thread.
> > > > > > Can we have all merged in an unique event dispatcher thread?
> > > > >
> > > > > If I understand right, do you mean that we can merge them in lib eal ?
> > right?
> > > >
> > > > Yes merge with interrupt thread in EAL.
> > > > I didn't look at the details, but it seems the right place for such thing.
> > > >
> > > Ok,  we have to expose them as new APIs.  Expect that somebody as
> > > DPDK users can use and like them as well. :)
> >
> > I think you missed my initial question:
> > Is it possible to merge the vhost events needs in the EAL interrupt thread?
> >
> 
> Sorry to miss this question.
> Compared to vhost event mechanism(poll), Eal interrupt uses epoll, From my
> basic understanding,  linux and freeBSD both support poll, Epoll is supported
> by Linux only.
> 
> Hi Maxime,
> I want to know your opinion about Thomas's question.
> 
> This patchset aim to support for virtio-user server mode and just need to call
> event functions, So, Let me try librte_eal epoll mechanism if support for
> server mode.
> 

I have implemented the same functionality calling librte_eal  epoll mechanism instead of
Vhost event and V3 has been sent out.  Please help review and welcome any comments.

Thanks
Zhiyong
  

Patch

diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index 5d6c6abae..e201df79c 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -21,10 +21,11 @@  endif
 LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev -lrte_net
 
 # all source are stored in SRCS-y
-SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := fd_man.c iotlb.c socket.c vhost.c \
+SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := iotlb.c socket.c vhost.c \
 					vhost_user.c virtio_net.c
 
 # install includes
 SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_vhost.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += fd_man.h
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_vhost/fd_man.c b/lib/librte_vhost/fd_man.c
deleted file mode 100644
index 181711c2a..000000000
--- a/lib/librte_vhost/fd_man.c
+++ /dev/null
@@ -1,274 +0,0 @@ 
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2014 Intel Corporation
- */
-
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/socket.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <unistd.h>
-#include <string.h>
-
-#include <rte_common.h>
-#include <rte_log.h>
-
-#include "fd_man.h"
-
-#define FDPOLLERR (POLLERR | POLLHUP | POLLNVAL)
-
-static int
-get_last_valid_idx(struct fdset *pfdset, int last_valid_idx)
-{
-	int i;
-
-	for (i = last_valid_idx; i >= 0 && pfdset->fd[i].fd == -1; i--)
-		;
-
-	return i;
-}
-
-static void
-fdset_move(struct fdset *pfdset, int dst, int src)
-{
-	pfdset->fd[dst]    = pfdset->fd[src];
-	pfdset->rwfds[dst] = pfdset->rwfds[src];
-}
-
-static void
-fdset_shrink_nolock(struct fdset *pfdset)
-{
-	int i;
-	int last_valid_idx = get_last_valid_idx(pfdset, pfdset->num - 1);
-
-	for (i = 0; i < last_valid_idx; i++) {
-		if (pfdset->fd[i].fd != -1)
-			continue;
-
-		fdset_move(pfdset, i, last_valid_idx);
-		last_valid_idx = get_last_valid_idx(pfdset, last_valid_idx - 1);
-	}
-	pfdset->num = last_valid_idx + 1;
-}
-
-/*
- * Find deleted fd entries and remove them
- */
-static void
-fdset_shrink(struct fdset *pfdset)
-{
-	pthread_mutex_lock(&pfdset->fd_mutex);
-	fdset_shrink_nolock(pfdset);
-	pthread_mutex_unlock(&pfdset->fd_mutex);
-}
-
-/**
- * Returns the index in the fdset for a given fd.
- * @return
- *   index for the fd, or -1 if fd isn't in the fdset.
- */
-static int
-fdset_find_fd(struct fdset *pfdset, int fd)
-{
-	int i;
-
-	for (i = 0; i < pfdset->num && pfdset->fd[i].fd != fd; i++)
-		;
-
-	return i == pfdset->num ? -1 : i;
-}
-
-static void
-fdset_add_fd(struct fdset *pfdset, int idx, int fd,
-	fd_cb rcb, fd_cb wcb, void *dat)
-{
-	struct fdentry *pfdentry = &pfdset->fd[idx];
-	struct pollfd *pfd = &pfdset->rwfds[idx];
-
-	pfdentry->fd  = fd;
-	pfdentry->rcb = rcb;
-	pfdentry->wcb = wcb;
-	pfdentry->dat = dat;
-
-	pfd->fd = fd;
-	pfd->events  = rcb ? POLLIN : 0;
-	pfd->events |= wcb ? POLLOUT : 0;
-	pfd->revents = 0;
-}
-
-void
-fdset_init(struct fdset *pfdset)
-{
-	int i;
-
-	if (pfdset == NULL)
-		return;
-
-	for (i = 0; i < MAX_FDS; i++) {
-		pfdset->fd[i].fd = -1;
-		pfdset->fd[i].dat = NULL;
-	}
-	pfdset->num = 0;
-}
-
-/**
- * Register the fd in the fdset with read/write handler and context.
- */
-int
-fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
-{
-	int i;
-
-	if (pfdset == NULL || fd == -1)
-		return -1;
-
-	pthread_mutex_lock(&pfdset->fd_mutex);
-	i = pfdset->num < MAX_FDS ? pfdset->num++ : -1;
-	if (i == -1) {
-		fdset_shrink_nolock(pfdset);
-		i = pfdset->num < MAX_FDS ? pfdset->num++ : -1;
-		if (i == -1) {
-			pthread_mutex_unlock(&pfdset->fd_mutex);
-			return -2;
-		}
-	}
-
-	fdset_add_fd(pfdset, i, fd, rcb, wcb, dat);
-	pthread_mutex_unlock(&pfdset->fd_mutex);
-
-	return 0;
-}
-
-/**
- *  Unregister the fd from the fdset.
- *  Returns context of a given fd or NULL.
- */
-void *
-fdset_del(struct fdset *pfdset, int fd)
-{
-	int i;
-	void *dat = NULL;
-
-	if (pfdset == NULL || fd == -1)
-		return NULL;
-
-	do {
-		pthread_mutex_lock(&pfdset->fd_mutex);
-
-		i = fdset_find_fd(pfdset, fd);
-		if (i != -1 && pfdset->fd[i].busy == 0) {
-			/* busy indicates r/wcb is executing! */
-			dat = pfdset->fd[i].dat;
-			pfdset->fd[i].fd = -1;
-			pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;
-			pfdset->fd[i].dat = NULL;
-			i = -1;
-		}
-		pthread_mutex_unlock(&pfdset->fd_mutex);
-	} while (i != -1);
-
-	return dat;
-}
-
-
-/**
- * This functions runs in infinite blocking loop until there is no fd in
- * pfdset. It calls corresponding r/w handler if there is event on the fd.
- *
- * Before the callback is called, we set the flag to busy status; If other
- * thread(now rte_vhost_driver_unregister) calls fdset_del concurrently, it
- * will wait until the flag is reset to zero(which indicates the callback is
- * finished), then it could free the context after fdset_del.
- */
-void *
-fdset_event_dispatch(void *arg)
-{
-	int i;
-	struct pollfd *pfd;
-	struct fdentry *pfdentry;
-	fd_cb rcb, wcb;
-	void *dat;
-	int fd, numfds;
-	int remove1, remove2;
-	int need_shrink;
-	struct fdset *pfdset = arg;
-	int val;
-
-	if (pfdset == NULL)
-		return NULL;
-
-	while (1) {
-
-		/*
-		 * When poll is blocked, other threads might unregister
-		 * listenfds from and register new listenfds into fdset.
-		 * When poll returns, the entries for listenfds in the fdset
-		 * might have been updated. It is ok if there is unwanted call
-		 * for new listenfds.
-		 */
-		pthread_mutex_lock(&pfdset->fd_mutex);
-		numfds = pfdset->num;
-		pthread_mutex_unlock(&pfdset->fd_mutex);
-
-		val = poll(pfdset->rwfds, numfds, 1000 /* millisecs */);
-		if (val < 0)
-			continue;
-
-		need_shrink = 0;
-		for (i = 0; i < numfds; i++) {
-			pthread_mutex_lock(&pfdset->fd_mutex);
-
-			pfdentry = &pfdset->fd[i];
-			fd = pfdentry->fd;
-			pfd = &pfdset->rwfds[i];
-
-			if (fd < 0) {
-				need_shrink = 1;
-				pthread_mutex_unlock(&pfdset->fd_mutex);
-				continue;
-			}
-
-			if (!pfd->revents) {
-				pthread_mutex_unlock(&pfdset->fd_mutex);
-				continue;
-			}
-
-			remove1 = remove2 = 0;
-
-			rcb = pfdentry->rcb;
-			wcb = pfdentry->wcb;
-			dat = pfdentry->dat;
-			pfdentry->busy = 1;
-
-			pthread_mutex_unlock(&pfdset->fd_mutex);
-
-			if (rcb && pfd->revents & (POLLIN | FDPOLLERR))
-				rcb(fd, dat, &remove1);
-			if (wcb && pfd->revents & (POLLOUT | FDPOLLERR))
-				wcb(fd, dat, &remove2);
-			pfdentry->busy = 0;
-			/*
-			 * fdset_del needs to check busy flag.
-			 * We don't allow fdset_del to be called in callback
-			 * directly.
-			 */
-			/*
-			 * When we are to clean up the fd from fdset,
-			 * because the fd is closed in the cb,
-			 * the old fd val could be reused by when creates new
-			 * listen fd in another thread, we couldn't call
-			 * fd_set_del.
-			 */
-			if (remove1 || remove2) {
-				pfdentry->fd = -1;
-				need_shrink = 1;
-			}
-		}
-
-		if (need_shrink)
-			fdset_shrink(pfdset);
-	}
-
-	return NULL;
-}
diff --git a/lib/librte_vhost/fd_man.h b/lib/librte_vhost/fd_man.h
index 3a9276c3c..b1c628251 100644
--- a/lib/librte_vhost/fd_man.h
+++ b/lib/librte_vhost/fd_man.h
@@ -4,11 +4,11 @@ 
 
 #ifndef _FD_MAN_H_
 #define _FD_MAN_H_
-#include <stdint.h>
-#include <pthread.h>
 #include <poll.h>
+#include <stdio.h>
 
 #define MAX_FDS 1024
+#define FDPOLLERR (POLLERR | POLLHUP | POLLNVAL)
 
 typedef void (*fd_cb)(int fd, void *dat, int *remove);
 
@@ -27,14 +27,258 @@  struct fdset {
 	int num;	/* current fd number of this fdset */
 };
 
+static inline int
+get_last_valid_idx(struct fdset *pfdset, int last_valid_idx)
+{
+	int i;
 
-void fdset_init(struct fdset *pfdset);
+	for (i = last_valid_idx; i >= 0 && pfdset->fd[i].fd == -1; i--)
+		;
 
-int fdset_add(struct fdset *pfdset, int fd,
-	fd_cb rcb, fd_cb wcb, void *dat);
+	return i;
+}
 
-void *fdset_del(struct fdset *pfdset, int fd);
+static inline void
+fdset_move(struct fdset *pfdset, int dst, int src)
+{
+	pfdset->fd[dst]    = pfdset->fd[src];
+	pfdset->rwfds[dst] = pfdset->rwfds[src];
+}
 
-void *fdset_event_dispatch(void *arg);
+static inline void
+fdset_shrink_nolock(struct fdset *pfdset)
+{
+	int i;
+	int last_valid_idx = get_last_valid_idx(pfdset, pfdset->num - 1);
+
+	for (i = 0; i < last_valid_idx; i++) {
+		if (pfdset->fd[i].fd != -1)
+			continue;
+
+		fdset_move(pfdset, i, last_valid_idx);
+		last_valid_idx = get_last_valid_idx(pfdset, last_valid_idx - 1);
+	}
+	pfdset->num = last_valid_idx + 1;
+}
+
+/*
+ * Find deleted fd entries and remove them
+ */
+static inline void
+fdset_shrink(struct fdset *pfdset)
+{
+	pthread_mutex_lock(&pfdset->fd_mutex);
+	fdset_shrink_nolock(pfdset);
+	pthread_mutex_unlock(&pfdset->fd_mutex);
+}
+
+/**
+ * Returns the index in the fdset for a given fd.
+ * @return
+ *   index for the fd, or -1 if fd isn't in the fdset.
+ */
+static inline int
+fdset_find_fd(struct fdset *pfdset, int fd)
+{
+	int i;
+
+	for (i = 0; i < pfdset->num && pfdset->fd[i].fd != fd; i++)
+		;
+
+	return i == pfdset->num ? -1 : i;
+}
+
+static inline void
+fdset_add_fd(struct fdset *pfdset, int idx, int fd,
+	fd_cb rcb, fd_cb wcb, void *dat)
+{
+	struct fdentry *pfdentry = &pfdset->fd[idx];
+	struct pollfd *pfd = &pfdset->rwfds[idx];
+
+	pfdentry->fd  = fd;
+	pfdentry->rcb = rcb;
+	pfdentry->wcb = wcb;
+	pfdentry->dat = dat;
+
+	pfd->fd = fd;
+	pfd->events  = rcb ? POLLIN : 0;
+	pfd->events |= wcb ? POLLOUT : 0;
+	pfd->revents = 0;
+}
+
+static inline void
+fdset_init(struct fdset *pfdset)
+{
+	int i;
+
+	if (pfdset == NULL)
+		return;
+
+	for (i = 0; i < MAX_FDS; i++) {
+		pfdset->fd[i].fd = -1;
+		pfdset->fd[i].dat = NULL;
+	}
+	pfdset->num = 0;
+}
+
+/**
+ * Register the fd in the fdset with read/write handler and context.
+ */
+static inline int
+fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)
+{
+	int i;
+
+	if (pfdset == NULL || fd == -1)
+		return -1;
+
+	pthread_mutex_lock(&pfdset->fd_mutex);
+	i = pfdset->num < MAX_FDS ? pfdset->num++ : -1;
+	if (i == -1) {
+		fdset_shrink_nolock(pfdset);
+		i = pfdset->num < MAX_FDS ? pfdset->num++ : -1;
+		if (i == -1) {
+			pthread_mutex_unlock(&pfdset->fd_mutex);
+			return -2;
+		}
+	}
+
+	fdset_add_fd(pfdset, i, fd, rcb, wcb, dat);
+	pthread_mutex_unlock(&pfdset->fd_mutex);
+
+	return 0;
+}
+
+/**
+ *  Unregister the fd from the fdset.
+ *  Returns context of a given fd or NULL.
+ */
+static inline void *
+fdset_del(struct fdset *pfdset, int fd)
+{
+	int i;
+	void *dat = NULL;
+
+	if (pfdset == NULL || fd == -1)
+		return NULL;
+
+	do {
+		pthread_mutex_lock(&pfdset->fd_mutex);
+
+		i = fdset_find_fd(pfdset, fd);
+		if (i != -1 && pfdset->fd[i].busy == 0) {
+			/* busy indicates r/wcb is executing! */
+			dat = pfdset->fd[i].dat;
+			pfdset->fd[i].fd = -1;
+			pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;
+			pfdset->fd[i].dat = NULL;
+			i = -1;
+		}
+		pthread_mutex_unlock(&pfdset->fd_mutex);
+	} while (i != -1);
+
+	return dat;
+}
+
+/**
+ * This functions runs in infinite blocking loop until there is no fd in
+ * pfdset. It calls corresponding r/w handler if there is event on the fd.
+ *
+ * Before the callback is called, we set the flag to busy status; If other
+ * thread(now rte_vhost_driver_unregister) calls fdset_del concurrently, it
+ * will wait until the flag is reset to zero(which indicates the callback is
+ * finished), then it could free the context after fdset_del.
+ */
+static inline void *
+fdset_event_dispatch(void *arg)
+{
+	int i;
+	struct pollfd *pfd;
+	struct fdentry *pfdentry;
+	fd_cb rcb, wcb;
+	void *dat;
+	int fd, numfds;
+	int remove1, remove2;
+	int need_shrink;
+	struct fdset *pfdset = arg;
+	int val;
+
+	if (pfdset == NULL)
+		return NULL;
+
+	while (1) {
+
+		/*
+		 * When poll is blocked, other threads might unregister
+		 * listenfds from and register new listenfds into fdset.
+		 * When poll returns, the entries for listenfds in the fdset
+		 * might have been updated. It is ok if there is unwanted call
+		 * for new listenfds.
+		 */
+		pthread_mutex_lock(&pfdset->fd_mutex);
+		numfds = pfdset->num;
+		pthread_mutex_unlock(&pfdset->fd_mutex);
+
+		val = poll(pfdset->rwfds, numfds, 1000 /* millisecs */);
+		if (val < 0)
+			continue;
+
+		need_shrink = 0;
+		for (i = 0; i < numfds; i++) {
+			pthread_mutex_lock(&pfdset->fd_mutex);
+
+			pfdentry = &pfdset->fd[i];
+			fd = pfdentry->fd;
+			pfd = &pfdset->rwfds[i];
+
+			if (fd < 0) {
+				need_shrink = 1;
+				pthread_mutex_unlock(&pfdset->fd_mutex);
+				continue;
+			}
+
+			if (!pfd->revents) {
+				pthread_mutex_unlock(&pfdset->fd_mutex);
+				continue;
+			}
+
+			remove1 = remove2 = 0;
+
+			rcb = pfdentry->rcb;
+			wcb = pfdentry->wcb;
+			dat = pfdentry->dat;
+			pfdentry->busy = 1;
+
+			pthread_mutex_unlock(&pfdset->fd_mutex);
+
+			if (rcb && pfd->revents & (POLLIN | FDPOLLERR))
+				rcb(fd, dat, &remove1);
+			if (wcb && pfd->revents & (POLLOUT | FDPOLLERR))
+				wcb(fd, dat, &remove2);
+			pfdentry->busy = 0;
+			/*
+			 * fdset_del needs to check busy flag.
+			 * We don't allow fdset_del to be called in callback
+			 * directly.
+			 */
+			/*
+			 * When we are to clean up the fd from fdset,
+			 * because the fd is closed in the cb,
+			 * the old fd val could be reused by when creates new
+			 * listen fd in another thread, we couldn't call
+			 * fd_set_del.
+			 */
+			if (remove1 || remove2) {
+				pfdentry->fd = -1;
+				need_shrink = 1;
+			}
+		}
+
+		if (need_shrink)
+			fdset_shrink(pfdset);
+	}
+
+	return NULL;
+}
 
 #endif