[dpdk-dev,11/12] net/vhost: support to run in the secondary process

Message ID 1503654052-84730-12-git-send-email-jianfeng.tan@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Yuanhan Liu
Headers

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation fail Compilation issues

Commit Message

Jianfeng Tan Aug. 25, 2017, 9:40 a.m. UTC
  Support to run vhost-pmd vdev in the secondary process. We obtain
information, like memory regions, kickfd, callfd, through
primary/secondary communication channel.

And by invoking rte_vhost_set_vring_effective_fd, we can set the
kickfd which can be recognized by the secondary process.

Signed-off-by: Jianfeng Tan <jianfeng.tan@intel.com>
---
 drivers/net/vhost/rte_eth_vhost.c | 158 +++++++++++++++++++++++++++++++++++---
 1 file changed, 146 insertions(+), 12 deletions(-)
  

Comments

Yuanhan Liu Sept. 21, 2017, 4:29 a.m. UTC | #1
On Fri, Aug 25, 2017 at 09:40:51AM +0000, Jianfeng Tan wrote:
>  static int
> +share_device(int vid)
> +{
> +	uint32_t i, vring_num;
> +	int len;
> +	int fds[8];
> +	struct rte_vhost_memory *mem;
> +	struct vhost_params *params;
> +	struct rte_vhost_vring vring;
> +
> +	/* share mem table */
> +	if (rte_vhost_get_mem_table(vid, &mem) < 0) {
> +		printf("Failed to get mem table\n");

No printf in DPDK lib and pmd, use RTE_LOG instead.

[...]
> +static int
> +eth_dev_vhost_attach(struct rte_vdev_device *dev)
> +{
> +	struct rte_eth_dev *eth_dev = NULL;
> +	struct rte_eth_dev_data *data = NULL;
> +
> +	RTE_LOG(INFO, PMD, "Attach vhost user port\n");
> +
> +	/* reserve an ethdev entry */
> +	eth_dev = rte_eth_vdev_allocate(dev, sizeof(struct pmd_internal));
> +	if (eth_dev == NULL)
> +		goto error;

I'd suggest to "return -1" directly here, without introducing goto.

> +
> +	eth_dev->dev_ops = &ops;
> +
> +	/* finally assign rx and tx ops */
> +	eth_dev->rx_pkt_burst = eth_vhost_rx;
> +	eth_dev->tx_pkt_burst = eth_vhost_tx;
> +
> +	data = eth_dev->data;
> +
> +	return data->port_id;
> +
> +error:
> +	return -1;
> +}
> +
>  static inline int
>  open_iface(const char *key __rte_unused, const char *value, void *extra_args)
>  {
> @@ -1154,6 +1244,39 @@ open_int(const char *key __rte_unused, const char *value, void *extra_args)
>  }
>  
>  static int
> +vhost_pmd_action(const char *params, int len __rte_unused,
> +		 int fds[], int fds_num)
> +{
> +	int i;
> +	void *base_addr;
> +	const struct vhost_params *p = (const struct vhost_params *)params;

The cast could be avoided if you define the action prototype with
"const void *params".

> +	const struct rte_vhost_mem_region *regions;
> +
> +	switch (p->type) {
> +	case VHOST_MSG_TYPE_REGIONS:
> +		regions = (const void *)p->regions;

Unnecessary cast.

> +		for (i = 0; i < fds_num; ++i) {
> +			base_addr = mmap(regions[i].mmap_addr,
> +					 regions[i].mmap_size,
> +					 PROT_READ | PROT_WRITE,
> +					 MAP_FIXED | MAP_SHARED, fds[i], 0);
> +			if (base_addr != regions[i].mmap_addr) {
> +				RTE_LOG(INFO, PMD, "mmap error");

The log level should be error, nothing will work if it happens. Also, the
log message should be more informative.

	--yliu
  

Patch

diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
index 0dac5e6..7120105 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -33,6 +33,7 @@ 
 #include <unistd.h>
 #include <pthread.h>
 #include <stdbool.h>
+#include <sys/mman.h>
 
 #include <rte_mbuf.h>
 #include <rte_ethdev.h>
@@ -46,6 +47,16 @@ 
 
 #include "rte_eth_vhost.h"
 
+#define VHOST_MSG_TYPE_REGIONS	1
+#define VHOST_MSG_TYPE_SET_FDS	2
+
+struct vhost_params {
+	int type;
+	int vid;
+	int vring_idx;
+	struct rte_vhost_mem_region regions[0];
+};
+
 enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
 
 #define ETH_VHOST_IFACE_ARG		"iface"
@@ -550,6 +561,67 @@  update_queuing_status(struct rte_eth_dev *dev)
 }
 
 static int
+share_device(int vid)
+{
+	uint32_t i, vring_num;
+	int len;
+	int fds[8];
+	struct rte_vhost_memory *mem;
+	struct vhost_params *params;
+	struct rte_vhost_vring vring;
+
+	/* share mem table */
+	if (rte_vhost_get_mem_table(vid, &mem) < 0) {
+		printf("Failed to get mem table\n");
+		return 0;
+	}
+	for (i = 0; i < mem->nregions; ++i)
+		fds[i] = mem->regions[i].fd;
+
+	len = sizeof(struct rte_vhost_mem_region) * mem->nregions;
+	params = malloc(sizeof(*params) + len);
+	if (params == NULL) {
+		printf("Failed to allocate memory\n");
+		return -1;
+	}
+
+	params->type = VHOST_MSG_TYPE_REGIONS;
+	params->vid = vid;
+	memcpy(params->regions, mem->regions, len);
+
+	if (rte_eal_primary_secondary_sendmsg("vhost pmd", params,
+					      sizeof(*params) + len,
+					      fds, mem->nregions) < 0) {
+		printf("Failed to share mem table\n");
+		free(params);
+		return -1;
+	}
+
+	/* share callfd and kickfd */
+	params->type = VHOST_MSG_TYPE_SET_FDS;
+	vring_num = rte_vhost_get_vring_num(vid);
+	for (i = 0; i < vring_num; i++) {
+		if (rte_vhost_get_vhost_vring(vid, i, &vring) < 0) {
+			printf("Failed to get vring, idx = %d\n", i);
+			free(params);
+			return -1;
+		}
+
+		params->vring_idx = i;
+		fds[0] = vring.callfd;
+		fds[1] = vring.kickfd;
+		if (rte_eal_primary_secondary_sendmsg("vhost pmd", params,
+					sizeof(*params),
+					fds, 2) < 0) {
+			printf("Failed to set fds\n");
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int
 new_device(int vid)
 {
 	struct rte_eth_dev *eth_dev;
@@ -610,6 +682,8 @@  new_device(int vid)
 	_rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC,
 				      NULL, NULL);
 
+	share_device(vid);
+
 	return 0;
 }
 
@@ -1025,13 +1099,6 @@  eth_dev_vhost_create(struct rte_vdev_device *dev, char *iface_name,
 	RTE_LOG(INFO, PMD, "Creating VHOST-USER backend on numa socket %u\n",
 		numa_node);
 
-	/* now do all data allocation - for eth_dev structure and internal
-	 * (private) data
-	 */
-	data = rte_zmalloc_socket(name, sizeof(*data), 0, numa_node);
-	if (data == NULL)
-		goto error;
-
 	list = rte_zmalloc_socket(name, sizeof(*list), 0, numa_node);
 	if (list == NULL)
 		goto error;
@@ -1073,11 +1140,7 @@  eth_dev_vhost_create(struct rte_vdev_device *dev, char *iface_name,
 	rte_spinlock_init(&vring_state->lock);
 	vring_states[eth_dev->data->port_id] = vring_state;
 
-	/* We'll replace the 'data' originally allocated by eth_dev. So the
-	 * vhost PMD resources won't be shared between multi processes.
-	 */
-	rte_memcpy(data, eth_dev->data, sizeof(*data));
-	eth_dev->data = data;
+	data = eth_dev->data;
 
 	data->nb_rx_queues = queues;
 	data->nb_tx_queues = queues;
@@ -1125,6 +1188,33 @@  eth_dev_vhost_create(struct rte_vdev_device *dev, char *iface_name,
 	return -1;
 }
 
+static int
+eth_dev_vhost_attach(struct rte_vdev_device *dev)
+{
+	struct rte_eth_dev *eth_dev = NULL;
+	struct rte_eth_dev_data *data = NULL;
+
+	RTE_LOG(INFO, PMD, "Attach vhost user port\n");
+
+	/* reserve an ethdev entry */
+	eth_dev = rte_eth_vdev_allocate(dev, sizeof(struct pmd_internal));
+	if (eth_dev == NULL)
+		goto error;
+
+	eth_dev->dev_ops = &ops;
+
+	/* finally assign rx and tx ops */
+	eth_dev->rx_pkt_burst = eth_vhost_rx;
+	eth_dev->tx_pkt_burst = eth_vhost_tx;
+
+	data = eth_dev->data;
+
+	return data->port_id;
+
+error:
+	return -1;
+}
+
 static inline int
 open_iface(const char *key __rte_unused, const char *value, void *extra_args)
 {
@@ -1154,6 +1244,39 @@  open_int(const char *key __rte_unused, const char *value, void *extra_args)
 }
 
 static int
+vhost_pmd_action(const char *params, int len __rte_unused,
+		 int fds[], int fds_num)
+{
+	int i;
+	void *base_addr;
+	const struct vhost_params *p = (const struct vhost_params *)params;
+	const struct rte_vhost_mem_region *regions;
+
+	switch (p->type) {
+	case VHOST_MSG_TYPE_REGIONS:
+		regions = (const void *)p->regions;
+		for (i = 0; i < fds_num; ++i) {
+			base_addr = mmap(regions[i].mmap_addr,
+					 regions[i].mmap_size,
+					 PROT_READ | PROT_WRITE,
+					 MAP_FIXED | MAP_SHARED, fds[i], 0);
+			if (base_addr != regions[i].mmap_addr) {
+				RTE_LOG(INFO, PMD, "mmap error");
+				break;
+			}
+		}
+		break;
+	case VHOST_MSG_TYPE_SET_FDS:
+		rte_vhost_set_vring_effective_fd(p->vid,
+						 p->vring_idx,
+			       			 fds[0], fds[1]);
+		break;
+	}
+
+	return 0;
+}
+
+static int
 rte_pmd_vhost_probe(struct rte_vdev_device *dev)
 {
 	struct rte_kvargs *kvlist = NULL;
@@ -1167,6 +1290,17 @@  rte_pmd_vhost_probe(struct rte_vdev_device *dev)
 	RTE_LOG(INFO, PMD, "Initializing pmd_vhost for %s\n",
 		rte_vdev_device_name(dev));
 
+	if (rte_eal_primary_secondary_add_action("vhost pmd",
+						 vhost_pmd_action) < 0) {
+		RTE_LOG(ERR, PMD, "vhost fails to add action\n");
+		return -1;
+	}
+
+	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+		ret = eth_dev_vhost_attach(dev);
+	       	return	(ret >= 0) ? 0 : -1;
+	}
+
 	kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments);
 	if (kvlist == NULL)
 		return -1;