[dpdk-dev] [PATCH 2/4] net/mlx5: replace IPC socket with EAL API

Yongseok Koh yskoh at mellanox.com
Mon Mar 18 22:29:53 CET 2019


On Thu, Mar 14, 2019 at 05:36:32AM -0700, Shahaf Shuler wrote:
> Hi Koh, 
> 
> Thursday, March 7, 2019 9:33 AM, Yongseok Koh:
> > Subject: [PATCH 2/4] net/mlx5: replace IPC socket with EAL API
> > 
> > Socket API is used for IPC in order for secondary process to acquire Verb
> > command file descriptor. The FD is used to remap UAR address. The new
> > multi-process APIs (rte_mp) in EAL are newly introduced. mlx5_socket.c is
> > replaced with mlx5_mp.c, which uses the new APIs.
> > 
> > As it is PMD global infrastructure, only one IPC channel is established.
> > All the IPC message types may have port_id in the message if there is need
> > to reference a specific device.
> > 
> > Signed-off-by: Yongseok Koh <yskoh at mellanox.com>
> 
> [...]
> 
> >  /**
> > diff --git a/drivers/net/mlx5/mlx5_mp.c b/drivers/net/mlx5/mlx5_mp.c new
> > file mode 100644 index 0000000000..19a1f25f0e
> > --- /dev/null
> > +++ b/drivers/net/mlx5/mlx5_mp.c
> > @@ -0,0 +1,126 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright 2019 6WIND S.A.
> > + * Copyright 2019 Mellanox Technologies, Ltd  */
> > +
> > +#include <assert.h>
> > +#include <stdio.h>
> > +#include <time.h>
> > +
> > +#include <rte_eal.h>
> > +#include <rte_ethdev_driver.h>
> > +#include <rte_string_fns.h>
> > +
> > +#include "mlx5.h"
> > +#include "mlx5_utils.h"
> > +
> > +/**
> > + * Initialize IPC message.
> > + *
> > + * @param[in] dev
> > + *   Pointer to Ethernet structure.
> > + * @param[out] msg
> > + *   Pointer to message to fill in.
> > + * @param[in] type
> > + *   Message type.
> > + */
> > +static inline void
> > +mp_init_msg(struct rte_eth_dev *dev, struct rte_mp_msg *msg,
> > +	    enum mlx5_mp_req_type type)
> > +{
> > +	struct mlx5_mp_param *param = (struct mlx5_mp_param *)msg-
> > >param;
> > +
> > +	memset(msg, 0, sizeof(*msg));
> > +	strlcpy(msg->name, MLX5_MP_NAME, sizeof(msg->name));
> > +	msg->len_param = sizeof(*param);
> > +	param->type = type;
> > +	param->port_id = dev->data->port_id;
> > +}
> > +
> > +/**
> > + * IPC message handler of primary process.
> > + *
> > + * @param[in] dev
> > + *   Pointer to Ethernet structure.
> > + * @param[in] peer
> > + *   Pointer to the peer socket path.
> > + *
> > + * @return
> > + *   0 on success, a negative errno value otherwise and rte_errno is set.
> > + */
> > +static int
> > +mp_primary_handle(const struct rte_mp_msg *mp_msg, const void *peer)
> > {
> > +	struct rte_mp_msg mp_res;
> > +	struct mlx5_mp_param *res = (struct mlx5_mp_param
> > *)mp_res.param;
> > +	const struct mlx5_mp_param *param =
> > +		(const struct mlx5_mp_param *)mp_msg->param;
> > +	struct rte_eth_dev *dev = &rte_eth_devices[param->port_id];
> 
> Need to check dev is a valid one before we continue. If such case happen need error log (like you have for invalid req). 

Good point.

> > +	struct mlx5_priv *priv = dev->data->dev_private;
> > +	int ret = 0;
> > +
> > +	assert(rte_eal_process_type() == RTE_PROC_PRIMARY);
> > +	switch (param->type) {
> > +	case MLX5_MP_REQ_VERBS_CMD_FD:
> > +		mp_init_msg(dev, &mp_res, param->type);
> > +		mp_res.num_fds = 1;
> > +		mp_res.fds[0] = priv->ctx->cmd_fd;
> > +		res->result = 0;
> > +		ret = rte_mp_reply(&mp_res, peer);
> > +		break;
> > +	default:
> > +		rte_errno = EINVAL;
> > +		DRV_LOG(ERR, "port %u invalid mp request type",
> > +			dev->data->port_id);
> > +		return -rte_errno;
> > +	}
> > +	return ret;
> > +}
> > +
> > +/**
> > + * Request Verbs command file descriptor for mmap to the primary process.
> > + *
> > + * @param[in] dev
> > + *   Pointer to Ethernet structure.
> > + *
> > + * @return
> > + *   fd on success, a negative errno value otherwise and rte_errno is set.
> > + */
> > +int
> > +mlx5_mp_req_verbs_cmd_fd(struct rte_eth_dev *dev) {
> > +	struct rte_mp_msg mp_req;
> > +	struct rte_mp_msg *mp_res;
> > +	struct rte_mp_reply mp_rep;
> > +	struct mlx5_mp_param *res __rte_unused;
> > +	struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
> > +	int cmd_fd;
> > +	int ret;
> > +
> > +	assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
> > +	mp_init_msg(dev, &mp_req, MLX5_MP_REQ_VERBS_CMD_FD);
> > +	ret = rte_mp_request_sync(&mp_req, &mp_rep, &ts);
> > +	if (ret) {
> > +		DRV_LOG(ERR,
> > +			"port %u failed to get command FD from primary
> > process",
> > +			dev->data->port_id);
> > +		return -rte_errno;
> > +	}
> > +	assert(mp_rep.nb_received == 1);
> > +	mp_res = &mp_rep.msgs[0];
> > +	res = (struct mlx5_mp_param *)mp_res->param;
> > +	assert(!res->result);
> 
> Above should not be an assert rather and actual check. 

The reason was because only zero is set by the handler. If the request is
successful, res->result has to have zero. Otherwise, that should be memory
corruption or bug in the rte_mp lib. So, I think it is better to have 'if'
instead of 'assert' so that we can prevent returning invalid cmd_fd.

Thanks,
Yongseok

> > +	assert(mp_res->num_fds == 1);
> > +	cmd_fd = mp_res->fds[0];
> > +	free(mp_rep.msgs);
> > +	DRV_LOG(DEBUG, "port %u command FD from primary is %d",
> > +		dev->data->port_id, cmd_fd);
> > +	return cmd_fd;
> > +}
> > +
> > +void
> > +mlx5_mp_init(void)
> > +{
> > +	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
> > +		rte_mp_action_register(MLX5_MP_NAME,
> > mp_primary_handle); }
> > diff --git a/drivers/net/mlx5/mlx5_socket.c
> > b/drivers/net/mlx5/mlx5_socket.c deleted file mode 100644 index
> > 41cac3c6aa..0000000000
> > --- a/drivers/net/mlx5/mlx5_socket.c
> > +++ /dev/null
> > @@ -1,306 +0,0 @@
> > -/* SPDX-License-Identifier: BSD-3-Clause
> > - * Copyright 2016 6WIND S.A.
> > - * Copyright 2016 Mellanox Technologies, Ltd
> > - */
> > -
> > -#include <sys/types.h>
> > -#include <sys/socket.h>
> > -#include <sys/un.h>
> > -#include <fcntl.h>
> > -#include <stdio.h>
> > -#include <unistd.h>
> > -#include <sys/stat.h>
> > -
> > -#include "mlx5.h"
> > -#include "mlx5_utils.h"
> > -
> > -/**
> > - * Initialise the socket to communicate with the secondary process
> > - *
> > - * @param[in] dev
> > - *   Pointer to Ethernet device.
> > - *
> > - * @return
> > - *   0 on success, a negative errno value otherwise and rte_errno is set.
> > - */
> > -int
> > -mlx5_socket_init(struct rte_eth_dev *dev) -{
> > -	struct mlx5_priv *priv = dev->data->dev_private;
> > -	struct sockaddr_un sun = {
> > -		.sun_family = AF_UNIX,
> > -	};
> > -	int ret;
> > -	int flags;
> > -
> > -	/*
> > -	 * Close the last socket that was used to communicate
> > -	 * with the secondary process
> > -	 */
> > -	if (priv->primary_socket)
> > -		mlx5_socket_uninit(dev);
> > -	/*
> > -	 * Initialise the socket to communicate with the secondary
> > -	 * process.
> > -	 */
> > -	ret = socket(AF_UNIX, SOCK_STREAM, 0);
> > -	if (ret < 0) {
> > -		rte_errno = errno;
> > -		DRV_LOG(WARNING, "port %u secondary process not
> > supported: %s",
> > -			dev->data->port_id, strerror(errno));
> > -		goto error;
> > -	}
> > -	priv->primary_socket = ret;
> > -	flags = fcntl(priv->primary_socket, F_GETFL, 0);
> > -	if (flags == -1) {
> > -		rte_errno = errno;
> > -		goto error;
> > -	}
> > -	ret = fcntl(priv->primary_socket, F_SETFL, flags | O_NONBLOCK);
> > -	if (ret < 0) {
> > -		rte_errno = errno;
> > -		goto error;
> > -	}
> > -	snprintf(sun.sun_path, sizeof(sun.sun_path), "/var/tmp/%s_%d",
> > -		 MLX5_DRIVER_NAME, priv->primary_socket);
> > -	remove(sun.sun_path);
> > -	ret = bind(priv->primary_socket, (const struct sockaddr *)&sun,
> > -		   sizeof(sun));
> > -	if (ret < 0) {
> > -		rte_errno = errno;
> > -		DRV_LOG(WARNING,
> > -			"port %u cannot bind socket, secondary process not"
> > -			" supported: %s",
> > -			dev->data->port_id, strerror(errno));
> > -		goto close;
> > -	}
> > -	ret = listen(priv->primary_socket, 0);
> > -	if (ret < 0) {
> > -		rte_errno = errno;
> > -		DRV_LOG(WARNING, "port %u secondary process not
> > supported: %s",
> > -			dev->data->port_id, strerror(errno));
> > -		goto close;
> > -	}
> > -	return 0;
> > -close:
> > -	remove(sun.sun_path);
> > -error:
> > -	claim_zero(close(priv->primary_socket));
> > -	priv->primary_socket = 0;
> > -	return -rte_errno;
> > -}
> > -
> > -/**
> > - * Un-Initialise the socket to communicate with the secondary process
> > - *
> > - * @param[in] dev
> > - */
> > -void
> > -mlx5_socket_uninit(struct rte_eth_dev *dev) -{
> > -	struct mlx5_priv *priv = dev->data->dev_private;
> > -
> > -	MKSTR(path, "/var/tmp/%s_%d", MLX5_DRIVER_NAME, priv-
> > >primary_socket);
> > -	claim_zero(close(priv->primary_socket));
> > -	priv->primary_socket = 0;
> > -	claim_zero(remove(path));
> > -}
> > -
> > -/**
> > - * Handle socket interrupts.
> > - *
> > - * @param dev
> > - *   Pointer to Ethernet device.
> > - */
> > -void
> > -mlx5_socket_handle(struct rte_eth_dev *dev) -{
> > -	struct mlx5_priv *priv = dev->data->dev_private;
> > -	int conn_sock;
> > -	int ret = 0;
> > -	struct cmsghdr *cmsg = NULL;
> > -	struct ucred *cred = NULL;
> > -	char buf[CMSG_SPACE(sizeof(struct ucred))] = { 0 };
> > -	char vbuf[1024] = { 0 };
> > -	struct iovec io = {
> > -		.iov_base = vbuf,
> > -		.iov_len = sizeof(*vbuf),
> > -	};
> > -	struct msghdr msg = {
> > -		.msg_iov = &io,
> > -		.msg_iovlen = 1,
> > -		.msg_control = buf,
> > -		.msg_controllen = sizeof(buf),
> > -	};
> > -	int *fd;
> > -
> > -	/* Accept the connection from the client. */
> > -	conn_sock = accept(priv->primary_socket, NULL, NULL);
> > -	if (conn_sock < 0) {
> > -		DRV_LOG(WARNING, "port %u connection failed: %s",
> > -			dev->data->port_id, strerror(errno));
> > -		return;
> > -	}
> > -	ret = setsockopt(conn_sock, SOL_SOCKET, SO_PASSCRED, &(int){1},
> > -					 sizeof(int));
> > -	if (ret < 0) {
> > -		ret = errno;
> > -		DRV_LOG(WARNING, "port %u cannot change socket
> > options: %s",
> > -			dev->data->port_id, strerror(rte_errno));
> > -		goto error;
> > -	}
> > -	ret = recvmsg(conn_sock, &msg, MSG_WAITALL);
> > -	if (ret < 0) {
> > -		ret = errno;
> > -		DRV_LOG(WARNING, "port %u received an empty message:
> > %s",
> > -			dev->data->port_id, strerror(rte_errno));
> > -		goto error;
> > -	}
> > -	/* Expect to receive credentials only. */
> > -	cmsg = CMSG_FIRSTHDR(&msg);
> > -	if (cmsg == NULL) {
> > -		DRV_LOG(WARNING, "port %u no message", dev->data-
> > >port_id);
> > -		goto error;
> > -	}
> > -	if ((cmsg->cmsg_type == SCM_CREDENTIALS) &&
> > -		(cmsg->cmsg_len >= sizeof(*cred))) {
> > -		cred = (struct ucred *)CMSG_DATA(cmsg);
> > -		assert(cred != NULL);
> > -	}
> > -	cmsg = CMSG_NXTHDR(&msg, cmsg);
> > -	if (cmsg != NULL) {
> > -		DRV_LOG(WARNING, "port %u message wrongly
> > formatted",
> > -			dev->data->port_id);
> > -		goto error;
> > -	}
> > -	/* Make sure all the ancillary data was received and valid. */
> > -	if ((cred == NULL) || (cred->uid != getuid()) ||
> > -	    (cred->gid != getgid())) {
> > -		DRV_LOG(WARNING, "port %u wrong credentials",
> > -			dev->data->port_id);
> > -		goto error;
> > -	}
> > -	/* Set-up the ancillary data. */
> > -	cmsg = CMSG_FIRSTHDR(&msg);
> > -	assert(cmsg != NULL);
> > -	cmsg->cmsg_level = SOL_SOCKET;
> > -	cmsg->cmsg_type = SCM_RIGHTS;
> > -	cmsg->cmsg_len = CMSG_LEN(sizeof(priv->ctx->cmd_fd));
> > -	fd = (int *)CMSG_DATA(cmsg);
> > -	*fd = priv->ctx->cmd_fd;
> > -	ret = sendmsg(conn_sock, &msg, 0);
> > -	if (ret < 0)
> > -		DRV_LOG(WARNING, "port %u cannot send response",
> > -			dev->data->port_id);
> > -error:
> > -	close(conn_sock);
> > -}
> > -
> > -/**
> > - * Connect to the primary process.
> > - *
> > - * @param[in] dev
> > - *   Pointer to Ethernet structure.
> > - *
> > - * @return
> > - *   fd on success, negative errno value otherwise and rte_errno is set.
> > - */
> > -int
> > -mlx5_socket_connect(struct rte_eth_dev *dev) -{
> > -	struct mlx5_priv *priv = dev->data->dev_private;
> > -	struct sockaddr_un sun = {
> > -		.sun_family = AF_UNIX,
> > -	};
> > -	int socket_fd = -1;
> > -	int *fd = NULL;
> > -	int ret;
> > -	struct ucred *cred;
> > -	char buf[CMSG_SPACE(sizeof(*cred))] = { 0 };
> > -	char vbuf[1024] = { 0 };
> > -	struct iovec io = {
> > -		.iov_base = vbuf,
> > -		.iov_len = sizeof(*vbuf),
> > -	};
> > -	struct msghdr msg = {
> > -		.msg_control = buf,
> > -		.msg_controllen = sizeof(buf),
> > -		.msg_iov = &io,
> > -		.msg_iovlen = 1,
> > -	};
> > -	struct cmsghdr *cmsg;
> > -
> > -	ret = socket(AF_UNIX, SOCK_STREAM, 0);
> > -	if (ret < 0) {
> > -		rte_errno = errno;
> > -		DRV_LOG(WARNING, "port %u cannot connect to primary",
> > -			dev->data->port_id);
> > -		goto error;
> > -	}
> > -	socket_fd = ret;
> > -	snprintf(sun.sun_path, sizeof(sun.sun_path), "/var/tmp/%s_%d",
> > -		 MLX5_DRIVER_NAME, priv->primary_socket);
> > -	ret = connect(socket_fd, (const struct sockaddr *)&sun, sizeof(sun));
> > -	if (ret < 0) {
> > -		rte_errno = errno;
> > -		DRV_LOG(WARNING, "port %u cannot connect to primary",
> > -			dev->data->port_id);
> > -		goto error;
> > -	}
> > -	cmsg = CMSG_FIRSTHDR(&msg);
> > -	if (cmsg == NULL) {
> > -		rte_errno = EINVAL;
> > -		DRV_LOG(DEBUG, "port %u cannot get first message",
> > -			dev->data->port_id);
> > -		goto error;
> > -	}
> > -	cmsg->cmsg_level = SOL_SOCKET;
> > -	cmsg->cmsg_type = SCM_CREDENTIALS;
> > -	cmsg->cmsg_len = CMSG_LEN(sizeof(*cred));
> > -	cred = (struct ucred *)CMSG_DATA(cmsg);
> > -	if (cred == NULL) {
> > -		rte_errno = EINVAL;
> > -		DRV_LOG(DEBUG, "port %u no credentials received",
> > -			dev->data->port_id);
> > -		goto error;
> > -	}
> > -	cred->pid = getpid();
> > -	cred->uid = getuid();
> > -	cred->gid = getgid();
> > -	ret = sendmsg(socket_fd, &msg, MSG_DONTWAIT);
> > -	if (ret < 0) {
> > -		rte_errno = errno;
> > -		DRV_LOG(WARNING,
> > -			"port %u cannot send credentials to primary: %s",
> > -			dev->data->port_id, strerror(errno));
> > -		goto error;
> > -	}
> > -	ret = recvmsg(socket_fd, &msg, MSG_WAITALL);
> > -	if (ret <= 0) {
> > -		rte_errno = errno;
> > -		DRV_LOG(WARNING, "port %u no message from primary:
> > %s",
> > -			dev->data->port_id, strerror(errno));
> > -		goto error;
> > -	}
> > -	cmsg = CMSG_FIRSTHDR(&msg);
> > -	if (cmsg == NULL) {
> > -		rte_errno = EINVAL;
> > -		DRV_LOG(WARNING, "port %u no file descriptor received",
> > -			dev->data->port_id);
> > -		goto error;
> > -	}
> > -	fd = (int *)CMSG_DATA(cmsg);
> > -	if (*fd < 0) {
> > -		DRV_LOG(WARNING, "port %u no file descriptor received:
> > %s",
> > -			dev->data->port_id, strerror(errno));
> > -		rte_errno = *fd;
> > -		goto error;
> > -	}
> > -	ret = *fd;
> > -	close(socket_fd);
> > -	return ret;
> > -error:
> > -	if (socket_fd != -1)
> > -		close(socket_fd);
> > -	return -rte_errno;
> > -}
> > --
> > 2.11.0
> 


More information about the dev mailing list