[v3] net/tap: add queues when attaching from secondary process

Message ID 20180720111552.14132-1-thomas@monjalon.net (mailing list archive)
State Changes Requested, archived
Delegated to: Ferruh Yigit
Headers
Series [v3] net/tap: add queues when attaching from secondary process |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Thomas Monjalon July 20, 2018, 11:15 a.m. UTC
  From: Raslan Darawsheh <rasland@mellanox.com>

In the case the device is created by the primary process,
the secondary must request some file descriptors to attach the queues.
The file descriptors are shared via IPC Unix socket.

Thanks to the IPC synchronization, the secondary process
is now able to do Rx/Tx on a TAP created by the primary process.

Signed-off-by: Raslan Darawsheh <rasland@mellanox.com>
Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
---
Note: there is a bug in EAL IPC regarding fd translation.
A fix will be sent later for EAL.

v3:
   - split some long lines
v2:
   - translate file descriptors via IPC API
   - add documentation
---
 doc/guides/nics/tap.rst                |  16 +++
 doc/guides/rel_notes/release_18_08.rst |   5 +
 drivers/net/tap/Makefile               |   1 +
 drivers/net/tap/rte_eth_tap.c          | 131 ++++++++++++++++++++++++-
 4 files changed, 152 insertions(+), 1 deletion(-)
  

Comments

Wiles, Keith July 20, 2018, 3:35 p.m. UTC | #1
> On Jul 20, 2018, at 4:15 AM, Thomas Monjalon <thomas@monjalon.net> wrote:
> 
> From: Raslan Darawsheh <rasland@mellanox.com>
> 
> In the case the device is created by the primary process,
> the secondary must request some file descriptors to attach the queues.
> The file descriptors are shared via IPC Unix socket.
> 
> Thanks to the IPC synchronization, the secondary process
> is now able to do Rx/Tx on a TAP created by the primary process.
> 
> Signed-off-by: Raslan Darawsheh <rasland@mellanox.com>
> Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
> ---
> Note: there is a bug in EAL IPC regarding fd translation.
> A fix will be sent later for EAL.
> 
> v3:
>   - split some long lines
> v2:
>   - translate file descriptors via IPC API
>   - add documentation
> ---
> doc/guides/nics/tap.rst                |  16 +++
> doc/guides/rel_notes/release_18_08.rst |   5 +
> drivers/net/tap/Makefile               |   1 +
> drivers/net/tap/rte_eth_tap.c          | 131 ++++++++++++++++++++++++-
> 4 files changed, 152 insertions(+), 1 deletion(-)
> 
> diff --git a/doc/guides/nics/tap.rst b/doc/guides/nics/tap.rst
> index 27148681c..d1f3e1c24 100644
> --- a/doc/guides/nics/tap.rst
> +++ b/doc/guides/nics/tap.rst
> @@ -152,6 +152,22 @@ Distribute IPv4 TCP packets using RSS to a given MAC address over queues 0-3::
>    testpmd> flow create 0 priority 4 ingress pattern eth dst is 0a:0b:0c:0d:0e:0f \
>             / ipv4 / tcp / end actions rss queues 0 1 2 3 end / end
> 
> +Multi-process sharing
> +---------------------
> +
> +It is possible to attach an existing TAP device in a secondary process,
> +by declaring it as a vdev with the same name as in the primary process,
> +and without any parameter.
> +
> +The port attached in a secondary process will give access to the
> +statistics and the queues.
> +Therefore it can be used for monitoring or Rx/Tx processing.
> +
> +The IPC synchronization of Rx/Tx queues is currently limited:
> +
> +  - Only 8 queues
> +  - Synchronized on probing, but not on later port update
> +
> Example
> -------
> 
> diff --git a/doc/guides/rel_notes/release_18_08.rst b/doc/guides/rel_notes/release_18_08.rst
> index dd611b571..ec6a81236 100644
> --- a/doc/guides/rel_notes/release_18_08.rst
> +++ b/doc/guides/rel_notes/release_18_08.rst
> @@ -74,6 +74,11 @@ New Features
>   * Add handlers to add/delete VxLAN port number.
>   * Add devarg to specify ingress VLAN rewrite mode.
> 
> +* **Added TAP Rx/Tx queues sharing with a secondary process.**
> +
> +  A secondary process can attach a TAP device created in the primary process,
> +  probe the queues, and process Rx/Tx in a secondary process.
> +
> 
> API Changes
> -----------
> diff --git a/drivers/net/tap/Makefile b/drivers/net/tap/Makefile
> index 324336535..3dcf05a72 100644
> --- a/drivers/net/tap/Makefile
> +++ b/drivers/net/tap/Makefile
> @@ -27,6 +27,7 @@ LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs -lrte_hash
> LDLIBS += -lrte_bus_vdev -lrte_gso
> 
> CFLAGS += -DTAP_MAX_QUEUES=$(TAP_MAX_QUEUES)
> +CFLAGS += -DALLOW_EXPERIMENTAL_API
> 
> #
> # all source are stored in SRCS-y
> diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
> index 4493507ed..98cbdf614 100644
> --- a/drivers/net/tap/rte_eth_tap.c
> +++ b/drivers/net/tap/rte_eth_tap.c
> @@ -16,6 +16,8 @@
> #include <rte_debug.h>
> #include <rte_ip.h>
> #include <rte_string_fns.h>
> +#include <rte_ethdev.h>
> +#include <rte_errno.h>
> 
> #include <assert.h>
> #include <sys/types.h>
> @@ -62,6 +64,9 @@
> #define TAP_GSO_MBUFS_NUM \
> 	(TAP_GSO_MBUFS_PER_CORE * TAP_GSO_MBUF_CACHE_SIZE)
> 
> +/* IPC key for queue fds sync */
> +#define TAP_MP_KEY "tap_mp_sync_queues"
> +
> static struct rte_vdev_driver pmd_tap_drv;
> static struct rte_vdev_driver pmd_tun_drv;
> 
> @@ -100,6 +105,17 @@ enum ioctl_mode {
> 	REMOTE_ONLY,
> };
> 
> +/* Message header to synchronize queues via IPC */
> +struct ipc_queues {
> +	char port_name[RTE_DEV_NAME_MAX_LEN];
> +	int rxq_count;
> +	int txq_count;
> +	/*
> +	 * The file descriptors are in the dedicated part
> +	 * of the Unix message to be translated by the kernel.
> +	 */
> +};
> +
> static int tap_intr_handle_set(struct rte_eth_dev *dev, int set);
> 
> /**
> @@ -1920,6 +1936,100 @@ rte_pmd_tun_probe(struct rte_vdev_device *dev)
> 	return ret;
> }
> 
> +/* Request queue file descriptors from secondary to primary. */
> +static int
> +tap_mp_attach_queues(const char *port_name, struct rte_eth_dev *dev)
> +{
> +	int ret;
> +	struct pmd_internals *devpriv;
> +	struct timespec timeout = {.tv_sec = 1, .tv_nsec = 0};
> +	struct rte_mp_msg request, *reply;
> +	struct rte_mp_reply replies;
> +	struct ipc_queues *request_param = (struct ipc_queues *)request.param;
> +	struct ipc_queues *reply_param;
> +	int queue, fd_iterator;
> +
> +	/* Prepare the request */
> +	strcpy(request.name, TAP_MP_KEY);
> +	strcpy(request_param->port_name, port_name);
> +	request.len_param = sizeof(*request_param);
> +
> +	/* Send request and receive reply */
> +	ret = rte_mp_request_sync(&request, &replies, &timeout);
> +	if (ret < 0) {
> +		TAP_LOG(ERR, "Failed to request queues from primary: %d",
> +			rte_errno);
> +		return -1;
> +	}
> +	/* FIXME: handle replies.nb_received > 1 */

I am not a big fan of having TODO or FIXME comments in the code. Can we remove them and just describe the problem and what would happen or not happen if the condition occurs? If we need to add this support in the future then we need to put these in a enhancement tracker or someplace else.
> +	reply = &replies.msgs[0];
> +	reply_param = (struct ipc_queues *)reply->param;
> +	TAP_LOG(DEBUG, "Received IPC reply for %s", reply_param->port_name);
> +
> +	/* Attach the queues from received file descriptors */
> +	devpriv = dev->data->dev_private;
> +	fd_iterator = 0;
> +	for (queue = 0; queue < reply_param->rxq_count; queue++)
> +		devpriv->rxq[queue].fd = reply->fds[fd_iterator++];
> +	for (queue = 0; queue < reply_param->txq_count; queue++)
> +		devpriv->txq[queue].fd = reply->fds[fd_iterator++];
> +
> +	return 0;
> +}
> +
> +/* Send the queue file descriptors from the primary process to secondary. */
> +static int
> +tap_mp_sync_queues(const struct rte_mp_msg *request, const void *peer)
> +{
> +	struct rte_eth_dev *dev;
> +	struct pmd_internals *devpriv;
> +	struct rte_mp_msg reply;
> +	const struct ipc_queues *request_param =
> +		(const struct ipc_queues *)request->param;
> +	struct ipc_queues *reply_param =
> +		(struct ipc_queues *)reply.param;
> +	uint16_t port_id;
> +	int queue;
> +	int ret;
> +
> +	/* Get requested port */
> +	TAP_LOG(DEBUG, "Received IPC request for %s", request_param->port_name);
> +	ret = rte_eth_dev_get_port_by_name(request_param->port_name, &port_id);
> +	if (ret) {
> +		TAP_LOG(ERR, "Failed to get port id for %s",
> +			request_param->port_name);
> +		return -1;
> +	}
> +	dev = &rte_eth_devices[port_id];
> +	devpriv = dev->data->dev_private;
> +
> +	/* Fill file descriptors for all queues */
> +	reply.num_fds = 0;
> +	reply_param->rxq_count = 0;
> +	for (queue = 0; queue < dev->data->nb_rx_queues; queue++) {
> +		reply.fds[reply.num_fds++] = devpriv->rxq[queue].fd;
> +		reply_param->rxq_count++;
> +	}
> +	reply_param->txq_count = 0;
> +	for (queue = 0; queue < dev->data->nb_tx_queues; queue++) {
> +		reply.fds[reply.num_fds++] = devpriv->txq[queue].fd;
> +		reply_param->txq_count++;
> +	}
> +	/* FIXME: split message if more queues than RTE_MP_MAX_FD_NUM */

Here too.
> +	RTE_ASSERT(reply.num_fds <= RTE_MP_MAX_FD_NUM);
> +
> +	/* Send reply */
> +	strcpy(reply.name, request->name);
> +	strcpy(reply_param->port_name, request_param->port_name);

Normally we use the snprintf or strlcpy() functions for the above should we do that here too?

> +	reply.len_param = sizeof(*reply_param);
> +	if (rte_mp_reply(&reply, peer) < 0) {
> +		TAP_LOG(ERR, "Failed to reply an IPC request to sync queues");
> +		return -1;
> +	}
> +
> +	return 0;
> +}
> +
> /* Open a TAP interface device.
>  */
> static int
> @@ -1946,8 +2056,18 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
> 			TAP_LOG(ERR, "Failed to probe %s", name);
> 			return -1;
> 		}
> -		/* TODO: request info from primary to set up Rx and Tx */
> 		eth_dev->dev_ops = &ops;
> +		eth_dev->rx_pkt_burst = pmd_rx_burst;
> +		eth_dev->tx_pkt_burst = pmd_tx_burst;
> +
> +		if (!rte_eal_primary_proc_alive(NULL)) {
> +			TAP_LOG(ERR, "Primary process is missing");
> +			return -1;
> +		}
> +		ret = tap_mp_attach_queues(name, eth_dev);
> +		if (ret != 0)
> +			return -1;

Does the call above need to be wrapped using if secondary process or is this for both primary and secondary?
> +
> 		rte_eth_dev_probing_finish(eth_dev);
> 		return 0;
> 	}
> @@ -1998,10 +2118,19 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
> 	ret = eth_dev_tap_create(dev, tap_name, remote_iface, &user_mac,
> 		ETH_TUNTAP_TYPE_TAP);
> 
> +	/* Register IPC feed callback */
> +	ret = rte_mp_action_register(TAP_MP_KEY, tap_mp_sync_queues);
> +	if (ret < 0 && rte_errno != EEXIST) {
> +		TAP_LOG(ERR, "%s: Failed to register IPC callback: %s",
> +			tuntap_name, strerror(rte_errno));
> +		goto leave;
> +	}

Same for this one as above?
> +
> leave:
> 	if (ret == -1) {
> 		TAP_LOG(ERR, "Failed to create pmd for %s as %s",
> 			name, tap_name);
> +		rte_mp_action_unregister(TAP_MP_KEY);
> 		tap_unit--;		/* Restore the unit number */
> 	}
> 	rte_kvargs_free(kvlist);
> -- 
> 2.17.1
> 

This looks fine to me except for the couple tests and the FIXME comments. Let me know about the other comments and with the FIXME changes I can Ack it.

Regards,
Keith
  
Thomas Monjalon July 20, 2018, 9:51 p.m. UTC | #2
20/07/2018 17:35, Wiles, Keith:
> > On Jul 20, 2018, at 4:15 AM, Thomas Monjalon <thomas@monjalon.net> wrote:
> > +	/* FIXME: handle replies.nb_received > 1 */
> 
> I am not a big fan of having TODO or FIXME comments in the code.

What don't you like in such comments?

> Can we remove them and just describe the problem and what would happen
> or not happen if the condition occurs?

You mean describing the problem in the code?

> If we need to add this support in the future then we need to put these
> in a enhancement tracker or someplace else.

The limitation is documented in the guide (limit of 8 queues).

> > +	reply = &replies.msgs[0];

[...]
> > +	/* FIXME: split message if more queues than RTE_MP_MAX_FD_NUM */
> 
> Here too.

This limitation is related to the previous one (send only one message,
receive only message).

> > +	RTE_ASSERT(reply.num_fds <= RTE_MP_MAX_FD_NUM);
> > +
> > +	/* Send reply */
> > +	strcpy(reply.name, request->name);
> > +	strcpy(reply_param->port_name, request_param->port_name);
> 
> Normally we use the snprintf or strlcpy() functions for the above should we do that here too?

Yes it looks to be a good idea.


> > @@ -1946,8 +2056,18 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
> > 			TAP_LOG(ERR, "Failed to probe %s", name);
> > 			return -1;
> > 		}
> > -		/* TODO: request info from primary to set up Rx and Tx */
> > 		eth_dev->dev_ops = &ops;
> > +		eth_dev->rx_pkt_burst = pmd_rx_burst;
> > +		eth_dev->tx_pkt_burst = pmd_tx_burst;
> > +
> > +		if (!rte_eal_primary_proc_alive(NULL)) {
> > +			TAP_LOG(ERR, "Primary process is missing");
> > +			return -1;
> > +		}
> > +		ret = tap_mp_attach_queues(name, eth_dev);
> > +		if (ret != 0)
> > +			return -1;
> 
> Does the call above need to be wrapped using if secondary process or is this for both primary and secondary?

It is already in a "secondary only" block.

> > +	/* Register IPC feed callback */
> > +	ret = rte_mp_action_register(TAP_MP_KEY, tap_mp_sync_queues);
> > +	if (ret < 0 && rte_errno != EEXIST) {
> > +		TAP_LOG(ERR, "%s: Failed to register IPC callback: %s",
> > +			tuntap_name, strerror(rte_errno));
> > +		goto leave;
> > +	}
> 
> Same for this one as above?

This code path is executed only in primary or creation of port in secondary.
I think it is fine.

However I am thinking it should be registered only once for all TAP ports.
  
Wiles, Keith July 21, 2018, 1:44 p.m. UTC | #3
> On Jul 20, 2018, at 4:51 PM, Thomas Monjalon <thomas@monjalon.net> wrote:
> 
> 20/07/2018 17:35, Wiles, Keith:
>>> On Jul 20, 2018, at 4:15 AM, Thomas Monjalon <thomas@monjalon.net> wrote:
>>> +	/* FIXME: handle replies.nb_received > 1 */
>> 
>> I am not a big fan of having TODO or FIXME comments in the code.
> 
> What don't you like in such comments?

We should not have FIXME or TODO in the code it does not look like it is complete, if we need to fix something then fix it or put it on a todo list not in the code. The same thing for the TODO which to me means a future enhancement we just need to add it to a future todo list.

If the code in these sections have a limitation them describe the limitation and remove the FIXME and TODOs from the code.

> 
>> Can we remove them and just describe the problem and what would happen
>> or not happen if the condition occurs?
> 
> You mean describing the problem in the code?
> 
>> If we need to add this support in the future then we need to put these
>> in a enhancement tracker or someplace else.
> 
> The limitation is documented in the guide (limit of 8 queues).
> 
>>> +	reply = &replies.msgs[0];
> 
> [...]
>>> +	/* FIXME: split message if more queues than RTE_MP_MAX_FD_NUM */
>> 
>> Here too.
> 
> This limitation is related to the previous one (send only one message,
> receive only message).
> 
>>> +	RTE_ASSERT(reply.num_fds <= RTE_MP_MAX_FD_NUM);
>>> +
>>> +	/* Send reply */
>>> +	strcpy(reply.name, request->name);
>>> +	strcpy(reply_param->port_name, request_param->port_name);
>> 
>> Normally we use the snprintf or strlcpy() functions for the above should we do that here too?
> 
> Yes it looks to be a good idea.
> 
> 
>>> @@ -1946,8 +2056,18 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
>>> 			TAP_LOG(ERR, "Failed to probe %s", name);
>>> 			return -1;
>>> 		}
>>> -		/* TODO: request info from primary to set up Rx and Tx */
>>> 		eth_dev->dev_ops = &ops;
>>> +		eth_dev->rx_pkt_burst = pmd_rx_burst;
>>> +		eth_dev->tx_pkt_burst = pmd_tx_burst;
>>> +
>>> +		if (!rte_eal_primary_proc_alive(NULL)) {
>>> +			TAP_LOG(ERR, "Primary process is missing");
>>> +			return -1;
>>> +		}
>>> +		ret = tap_mp_attach_queues(name, eth_dev);
>>> +		if (ret != 0)
>>> +			return -1;
>> 
>> Does the call above need to be wrapped using if secondary process or is this for both primary and secondary?
> 
> It is already in a "secondary only" block.
> 
>>> +	/* Register IPC feed callback */
>>> +	ret = rte_mp_action_register(TAP_MP_KEY, tap_mp_sync_queues);
>>> +	if (ret < 0 && rte_errno != EEXIST) {
>>> +		TAP_LOG(ERR, "%s: Failed to register IPC callback: %s",
>>> +			tuntap_name, strerror(rte_errno));
>>> +		goto leave;
>>> +	}
>> 
>> Same for this one as above?
> 
> This code path is executed only in primary or creation of port in secondary.
> I think it is fine.
> 
> However I am thinking it should be registered only once for all TAP ports.
> 
> 

Regards,
Keith
  
Ferruh Yigit Aug. 23, 2018, 11:51 a.m. UTC | #4
On 7/20/2018 12:15 PM, Thomas Monjalon wrote:
> From: Raslan Darawsheh <rasland@mellanox.com>
> 
> In the case the device is created by the primary process,
> the secondary must request some file descriptors to attach the queues.
> The file descriptors are shared via IPC Unix socket.
> 
> Thanks to the IPC synchronization, the secondary process
> is now able to do Rx/Tx on a TAP created by the primary process.
> 
> Signed-off-by: Raslan Darawsheh <rasland@mellanox.com>
> Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
> ---
> Note: there is a bug in EAL IPC regarding fd translation.
> A fix will be sent later for EAL.
> 
> v3:
>    - split some long lines
> v2:
>    - translate file descriptors via IPC API
>    - add documentation
> ---
>  doc/guides/nics/tap.rst                |  16 +++
>  doc/guides/rel_notes/release_18_08.rst |   5 +

Needs to be 18.11 release notes now.

<...>

> @@ -1946,8 +2056,18 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
>  			TAP_LOG(ERR, "Failed to probe %s", name);
>  			return -1;
>  		}
> -		/* TODO: request info from primary to set up Rx and Tx */
>  		eth_dev->dev_ops = &ops;
> +		eth_dev->rx_pkt_burst = pmd_rx_burst;
> +		eth_dev->tx_pkt_burst = pmd_tx_burst;
> +
> +		if (!rte_eal_primary_proc_alive(NULL)) {
> +			TAP_LOG(ERR, "Primary process is missing");
> +			return -1;
> +		}
> +		ret = tap_mp_attach_queues(name, eth_dev);
> +		if (ret != 0)
> +			return -1;
> +

Should update rte_pmd_tun_probe() too?

>  		rte_eth_dev_probing_finish(eth_dev);
>  		return 0;
>  	}
> @@ -1998,10 +2118,19 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
>  	ret = eth_dev_tap_create(dev, tap_name, remote_iface, &user_mac,
>  		ETH_TUNTAP_TYPE_TAP);
>  
> +	/* Register IPC feed callback */
> +	ret = rte_mp_action_register(TAP_MP_KEY, tap_mp_sync_queues);
> +	if (ret < 0 && rte_errno != EEXIST) {
> +		TAP_LOG(ERR, "%s: Failed to register IPC callback: %s",
> +			tuntap_name, strerror(rte_errno));
> +		goto leave;
> +	}

This is causing an error if there are multiple tap devices, since it tries to
register multiple times.
  

Patch

diff --git a/doc/guides/nics/tap.rst b/doc/guides/nics/tap.rst
index 27148681c..d1f3e1c24 100644
--- a/doc/guides/nics/tap.rst
+++ b/doc/guides/nics/tap.rst
@@ -152,6 +152,22 @@  Distribute IPv4 TCP packets using RSS to a given MAC address over queues 0-3::
    testpmd> flow create 0 priority 4 ingress pattern eth dst is 0a:0b:0c:0d:0e:0f \
             / ipv4 / tcp / end actions rss queues 0 1 2 3 end / end
 
+Multi-process sharing
+---------------------
+
+It is possible to attach an existing TAP device in a secondary process,
+by declaring it as a vdev with the same name as in the primary process,
+and without any parameter.
+
+The port attached in a secondary process will give access to the
+statistics and the queues.
+Therefore it can be used for monitoring or Rx/Tx processing.
+
+The IPC synchronization of Rx/Tx queues is currently limited:
+
+  - Only 8 queues
+  - Synchronized on probing, but not on later port update
+
 Example
 -------
 
diff --git a/doc/guides/rel_notes/release_18_08.rst b/doc/guides/rel_notes/release_18_08.rst
index dd611b571..ec6a81236 100644
--- a/doc/guides/rel_notes/release_18_08.rst
+++ b/doc/guides/rel_notes/release_18_08.rst
@@ -74,6 +74,11 @@  New Features
   * Add handlers to add/delete VxLAN port number.
   * Add devarg to specify ingress VLAN rewrite mode.
 
+* **Added TAP Rx/Tx queues sharing with a secondary process.**
+
+  A secondary process can attach a TAP device created in the primary process,
+  probe the queues, and process Rx/Tx in a secondary process.
+
 
 API Changes
 -----------
diff --git a/drivers/net/tap/Makefile b/drivers/net/tap/Makefile
index 324336535..3dcf05a72 100644
--- a/drivers/net/tap/Makefile
+++ b/drivers/net/tap/Makefile
@@ -27,6 +27,7 @@  LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs -lrte_hash
 LDLIBS += -lrte_bus_vdev -lrte_gso
 
 CFLAGS += -DTAP_MAX_QUEUES=$(TAP_MAX_QUEUES)
+CFLAGS += -DALLOW_EXPERIMENTAL_API
 
 #
 # all source are stored in SRCS-y
diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
index 4493507ed..98cbdf614 100644
--- a/drivers/net/tap/rte_eth_tap.c
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -16,6 +16,8 @@ 
 #include <rte_debug.h>
 #include <rte_ip.h>
 #include <rte_string_fns.h>
+#include <rte_ethdev.h>
+#include <rte_errno.h>
 
 #include <assert.h>
 #include <sys/types.h>
@@ -62,6 +64,9 @@ 
 #define TAP_GSO_MBUFS_NUM \
 	(TAP_GSO_MBUFS_PER_CORE * TAP_GSO_MBUF_CACHE_SIZE)
 
+/* IPC key for queue fds sync */
+#define TAP_MP_KEY "tap_mp_sync_queues"
+
 static struct rte_vdev_driver pmd_tap_drv;
 static struct rte_vdev_driver pmd_tun_drv;
 
@@ -100,6 +105,17 @@  enum ioctl_mode {
 	REMOTE_ONLY,
 };
 
+/* Message header to synchronize queues via IPC */
+struct ipc_queues {
+	char port_name[RTE_DEV_NAME_MAX_LEN];
+	int rxq_count;
+	int txq_count;
+	/*
+	 * The file descriptors are in the dedicated part
+	 * of the Unix message to be translated by the kernel.
+	 */
+};
+
 static int tap_intr_handle_set(struct rte_eth_dev *dev, int set);
 
 /**
@@ -1920,6 +1936,100 @@  rte_pmd_tun_probe(struct rte_vdev_device *dev)
 	return ret;
 }
 
+/* Request queue file descriptors from secondary to primary. */
+static int
+tap_mp_attach_queues(const char *port_name, struct rte_eth_dev *dev)
+{
+	int ret;
+	struct pmd_internals *devpriv;
+	struct timespec timeout = {.tv_sec = 1, .tv_nsec = 0};
+	struct rte_mp_msg request, *reply;
+	struct rte_mp_reply replies;
+	struct ipc_queues *request_param = (struct ipc_queues *)request.param;
+	struct ipc_queues *reply_param;
+	int queue, fd_iterator;
+
+	/* Prepare the request */
+	strcpy(request.name, TAP_MP_KEY);
+	strcpy(request_param->port_name, port_name);
+	request.len_param = sizeof(*request_param);
+
+	/* Send request and receive reply */
+	ret = rte_mp_request_sync(&request, &replies, &timeout);
+	if (ret < 0) {
+		TAP_LOG(ERR, "Failed to request queues from primary: %d",
+			rte_errno);
+		return -1;
+	}
+	/* FIXME: handle replies.nb_received > 1 */
+	reply = &replies.msgs[0];
+	reply_param = (struct ipc_queues *)reply->param;
+	TAP_LOG(DEBUG, "Received IPC reply for %s", reply_param->port_name);
+
+	/* Attach the queues from received file descriptors */
+	devpriv = dev->data->dev_private;
+	fd_iterator = 0;
+	for (queue = 0; queue < reply_param->rxq_count; queue++)
+		devpriv->rxq[queue].fd = reply->fds[fd_iterator++];
+	for (queue = 0; queue < reply_param->txq_count; queue++)
+		devpriv->txq[queue].fd = reply->fds[fd_iterator++];
+
+	return 0;
+}
+
+/* Send the queue file descriptors from the primary process to secondary. */
+static int
+tap_mp_sync_queues(const struct rte_mp_msg *request, const void *peer)
+{
+	struct rte_eth_dev *dev;
+	struct pmd_internals *devpriv;
+	struct rte_mp_msg reply;
+	const struct ipc_queues *request_param =
+		(const struct ipc_queues *)request->param;
+	struct ipc_queues *reply_param =
+		(struct ipc_queues *)reply.param;
+	uint16_t port_id;
+	int queue;
+	int ret;
+
+	/* Get requested port */
+	TAP_LOG(DEBUG, "Received IPC request for %s", request_param->port_name);
+	ret = rte_eth_dev_get_port_by_name(request_param->port_name, &port_id);
+	if (ret) {
+		TAP_LOG(ERR, "Failed to get port id for %s",
+			request_param->port_name);
+		return -1;
+	}
+	dev = &rte_eth_devices[port_id];
+	devpriv = dev->data->dev_private;
+
+	/* Fill file descriptors for all queues */
+	reply.num_fds = 0;
+	reply_param->rxq_count = 0;
+	for (queue = 0; queue < dev->data->nb_rx_queues; queue++) {
+		reply.fds[reply.num_fds++] = devpriv->rxq[queue].fd;
+		reply_param->rxq_count++;
+	}
+	reply_param->txq_count = 0;
+	for (queue = 0; queue < dev->data->nb_tx_queues; queue++) {
+		reply.fds[reply.num_fds++] = devpriv->txq[queue].fd;
+		reply_param->txq_count++;
+	}
+	/* FIXME: split message if more queues than RTE_MP_MAX_FD_NUM */
+	RTE_ASSERT(reply.num_fds <= RTE_MP_MAX_FD_NUM);
+
+	/* Send reply */
+	strcpy(reply.name, request->name);
+	strcpy(reply_param->port_name, request_param->port_name);
+	reply.len_param = sizeof(*reply_param);
+	if (rte_mp_reply(&reply, peer) < 0) {
+		TAP_LOG(ERR, "Failed to reply an IPC request to sync queues");
+		return -1;
+	}
+
+	return 0;
+}
+
 /* Open a TAP interface device.
  */
 static int
@@ -1946,8 +2056,18 @@  rte_pmd_tap_probe(struct rte_vdev_device *dev)
 			TAP_LOG(ERR, "Failed to probe %s", name);
 			return -1;
 		}
-		/* TODO: request info from primary to set up Rx and Tx */
 		eth_dev->dev_ops = &ops;
+		eth_dev->rx_pkt_burst = pmd_rx_burst;
+		eth_dev->tx_pkt_burst = pmd_tx_burst;
+
+		if (!rte_eal_primary_proc_alive(NULL)) {
+			TAP_LOG(ERR, "Primary process is missing");
+			return -1;
+		}
+		ret = tap_mp_attach_queues(name, eth_dev);
+		if (ret != 0)
+			return -1;
+
 		rte_eth_dev_probing_finish(eth_dev);
 		return 0;
 	}
@@ -1998,10 +2118,19 @@  rte_pmd_tap_probe(struct rte_vdev_device *dev)
 	ret = eth_dev_tap_create(dev, tap_name, remote_iface, &user_mac,
 		ETH_TUNTAP_TYPE_TAP);
 
+	/* Register IPC feed callback */
+	ret = rte_mp_action_register(TAP_MP_KEY, tap_mp_sync_queues);
+	if (ret < 0 && rte_errno != EEXIST) {
+		TAP_LOG(ERR, "%s: Failed to register IPC callback: %s",
+			tuntap_name, strerror(rte_errno));
+		goto leave;
+	}
+
 leave:
 	if (ret == -1) {
 		TAP_LOG(ERR, "Failed to create pmd for %s as %s",
 			name, tap_name);
+		rte_mp_action_unregister(TAP_MP_KEY);
 		tap_unit--;		/* Restore the unit number */
 	}
 	rte_kvargs_free(kvlist);