[dpdk-dev] [PATCH v2 2/3] rte_ctrl_if: add control interface library

Ananyev, Konstantin konstantin.ananyev at intel.com
Wed Feb 17 20:58:16 CET 2016



> -----Original Message-----
> From: dev [mailto:dev-bounces at dpdk.org] On Behalf Of Ferruh Yigit
> Sent: Friday, February 12, 2016 1:46 PM
> To: dev at dpdk.org
> Subject: [dpdk-dev] [PATCH v2 2/3] rte_ctrl_if: add control interface library
> 
> This library gets control messages form kernelspace and forwards them to
> librte_ether and returns response back to the kernelspace.
> 
> Library does:
> 1) Trigger Linux virtual interface creation
> 2) Initialize the netlink socket communication
> 3) Provides process() API to the application that does processing the
> received messages
> 
> This library requires corresponding kernel module to be inserted.
> 
> Signed-off-by: Ferruh Yigit <ferruh.yigit at intel.com>
> 
> ---
> 
> v2:
> * User rtnetlink to create interfaces.
> * Add more ethtool support: get/set ringparam, set pauseparam.
> * return defined error instead of hardcoded value
> ---
>  MAINTAINERS                                |   1 +
>  config/common_linuxapp                     |   3 +-
>  doc/api/doxy-api-index.md                  |   3 +-
>  doc/api/doxy-api.conf                      |   1 +
>  doc/guides/rel_notes/release_16_04.rst     |   9 +
>  lib/Makefile                               |   3 +-
>  lib/librte_ctrl_if/Makefile                |  58 ++++
>  lib/librte_ctrl_if/rte_ctrl_if.c           | 385 ++++++++++++++++++++++++
>  lib/librte_ctrl_if/rte_ctrl_if.h           | 115 ++++++++
>  lib/librte_ctrl_if/rte_ctrl_if_version.map |   9 +
>  lib/librte_ctrl_if/rte_ethtool.c           | 450 +++++++++++++++++++++++++++++
>  lib/librte_ctrl_if/rte_ethtool.h           |  54 ++++
>  lib/librte_ctrl_if/rte_nl.c                | 274 ++++++++++++++++++
>  lib/librte_ctrl_if/rte_nl.h                |  49 ++++
>  lib/librte_eal/common/include/rte_log.h    |   3 +-
>  mk/rte.app.mk                              |   3 +-
>  16 files changed, 1415 insertions(+), 5 deletions(-)
>  create mode 100644 lib/librte_ctrl_if/Makefile
>  create mode 100644 lib/librte_ctrl_if/rte_ctrl_if.c
>  create mode 100644 lib/librte_ctrl_if/rte_ctrl_if.h
>  create mode 100644 lib/librte_ctrl_if/rte_ctrl_if_version.map
>  create mode 100644 lib/librte_ctrl_if/rte_ethtool.c
>  create mode 100644 lib/librte_ctrl_if/rte_ethtool.h
>  create mode 100644 lib/librte_ctrl_if/rte_nl.c
>  create mode 100644 lib/librte_ctrl_if/rte_nl.h
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 09c56c7..91c98bc 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -256,6 +256,7 @@ F: doc/guides/sample_app_ug/kernel_nic_interface.rst
>  Linux KCP
>  M: Ferruh Yigit <ferruh.yigit at intel.com>
>  F: lib/librte_eal/linuxapp/kcp/
> +F: lib/librte_ctrl_if/
> 
>  Linux AF_PACKET
>  M: John W. Linville <linville at tuxdriver.com>
> diff --git a/config/common_linuxapp b/config/common_linuxapp
> index 2f4eb1d..4bcd508 100644
> --- a/config/common_linuxapp
> +++ b/config/common_linuxapp
> @@ -1,6 +1,6 @@
>  #   BSD LICENSE
>  #
> -#   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
> +#   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
>  #   All rights reserved.
>  #
>  #   Redistribution and use in source and binary forms, with or without
> @@ -501,6 +501,7 @@ CONFIG_RTE_KNI_VHOST_DEBUG_TX=n
>  #
>  CONFIG_RTE_KCP_KMOD=y
>  CONFIG_RTE_KCP_KO_DEBUG=n
> +CONFIG_RTE_LIBRTE_CTRL_IF=y
> 
>  #
>  # Compile vhost library
> diff --git a/doc/api/doxy-api-index.md b/doc/api/doxy-api-index.md
> index 7a91001..214d16e 100644
> --- a/doc/api/doxy-api-index.md
> +++ b/doc/api/doxy-api-index.md
> @@ -149,4 +149,5 @@ There are many libraries, so their headers may be grouped by topics:
>    [common]             (@ref rte_common.h),
>    [ABI compat]         (@ref rte_compat.h),
>    [keepalive]          (@ref rte_keepalive.h),
> -  [version]            (@ref rte_version.h)
> +  [version]            (@ref rte_version.h),
> +  [control interface]  (@ref rte_ctrl_if.h)
> diff --git a/doc/api/doxy-api.conf b/doc/api/doxy-api.conf
> index 57e8b5d..fd69bf1 100644
> --- a/doc/api/doxy-api.conf
> +++ b/doc/api/doxy-api.conf
> @@ -39,6 +39,7 @@ INPUT                   = doc/api/doxy-api-index.md \
>                            lib/librte_cmdline \
>                            lib/librte_compat \
>                            lib/librte_cryptodev \
> +                          lib/librte_ctrl_if \
>                            lib/librte_distributor \
>                            lib/librte_ether \
>                            lib/librte_hash \
> diff --git a/doc/guides/rel_notes/release_16_04.rst b/doc/guides/rel_notes/release_16_04.rst
> index 27fc624..1b1d34c 100644
> --- a/doc/guides/rel_notes/release_16_04.rst
> +++ b/doc/guides/rel_notes/release_16_04.rst
> @@ -39,6 +39,14 @@ This section should contain new features added in this release. Sample format:
> 
>    Enabled virtio 1.0 support for virtio pmd driver.
> 
> +* **Control interface support added.**
> +
> +  To enable controlling DPDK ports by common Linux tools.
> +  Following modules added to DPDK:
> +
> +  * librte_ctrl_if library
> +  * librte_eal/linuxapp/kcp kernel module
> +
> 
>  Resolved Issues
>  ---------------
> @@ -113,6 +121,7 @@ The libraries prepended with a plus sign were incremented in this version.
>       librte_acl.so.2
>       librte_cfgfile.so.2
>       librte_cmdline.so.1
> +   + librte_ctrl_if.so.1
>       librte_distributor.so.1
>       librte_eal.so.2
>       librte_hash.so.2
> diff --git a/lib/Makefile b/lib/Makefile
> index ef172ea..a50bc1e 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -1,6 +1,6 @@
>  #   BSD LICENSE
>  #
> -#   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
> +#   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
>  #   All rights reserved.
>  #
>  #   Redistribution and use in source and binary forms, with or without
> @@ -58,6 +58,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_PORT) += librte_port
>  DIRS-$(CONFIG_RTE_LIBRTE_TABLE) += librte_table
>  DIRS-$(CONFIG_RTE_LIBRTE_PIPELINE) += librte_pipeline
>  DIRS-$(CONFIG_RTE_LIBRTE_REORDER) += librte_reorder
> +DIRS-$(CONFIG_RTE_LIBRTE_CTRL_IF) += librte_ctrl_if
> 
>  ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
>  DIRS-$(CONFIG_RTE_LIBRTE_KNI) += librte_kni
> diff --git a/lib/librte_ctrl_if/Makefile b/lib/librte_ctrl_if/Makefile
> new file mode 100644
> index 0000000..4e82966
> --- /dev/null
> +++ b/lib/librte_ctrl_if/Makefile
> @@ -0,0 +1,58 @@
> +#   BSD LICENSE
> +#
> +#   Copyright(c) 2016 Intel Corporation. All rights reserved.
> +#   All rights reserved.
> +#
> +#   Redistribution and use in source and binary forms, with or without
> +#   modification, are permitted provided that the following conditions
> +#   are met:
> +#
> +#     * Redistributions of source code must retain the above copyright
> +#       notice, this list of conditions and the following disclaimer.
> +#     * Redistributions in binary form must reproduce the above copyright
> +#       notice, this list of conditions and the following disclaimer in
> +#       the documentation and/or other materials provided with the
> +#       distribution.
> +#     * Neither the name of Intel Corporation nor the names of its
> +#       contributors may be used to endorse or promote products derived
> +#       from this software without specific prior written permission.
> +#
> +#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> +#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> +#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> +#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> +#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> +#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> +#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> +#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> +#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> +#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> +#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> +
> +include $(RTE_SDK)/mk/rte.vars.mk
> +
> +#
> +# library name
> +#
> +LIB = librte_ctrl_if.a
> +
> +CFLAGS += -O3
> +CFLAGS += $(WERROR_FLAGS)
> +
> +EXPORT_MAP := rte_ctrl_if_version.map
> +
> +LIBABIVER := 1
> +
> +SRCS-y += rte_ctrl_if.c
> +SRCS-y += rte_nl.c
> +SRCS-y += rte_ethtool.c
> +
> +#
> +# Export include files
> +#
> +SYMLINK-y-include += rte_ctrl_if.h
> +
> +# this lib depends upon:
> +DEPDIRS-y += lib/librte_ether
> +
> +include $(RTE_SDK)/mk/rte.lib.mk
> diff --git a/lib/librte_ctrl_if/rte_ctrl_if.c b/lib/librte_ctrl_if/rte_ctrl_if.c
> new file mode 100644
> index 0000000..d16398f
> --- /dev/null
> +++ b/lib/librte_ctrl_if/rte_ctrl_if.c
> @@ -0,0 +1,385 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2016 Intel Corporation. All rights reserved.
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include <fcntl.h>
> +#include <unistd.h>
> +#include <time.h>
> +
> +#include <sys/ioctl.h>
> +#include <sys/socket.h>
> +#include <linux/netlink.h>
> +#include <linux/rtnetlink.h>
> +
> +#include <rte_ethdev.h>
> +#include "rte_ctrl_if.h"
> +#include "rte_nl.h"
> +
> +#define NAMESZ 32
> +#define IFNAME "dpdk"
> +#define BUFSZ 1024
> +
> +static int kcp_rtnl_fd = -1;
> +static int kcp_fd_ref;
> +
> +struct kcp_request {
> +	struct nlmsghdr nlmsg;
> +	char buf[BUFSZ];
> +};
> +
> +static int
> +conrol_interface_rtnl_init(void)
> +{
> +	struct sockaddr_nl src;
> +	int ret;
> +
> +	kcp_rtnl_fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
> +	if (kcp_rtnl_fd < 0) {
> +		RTE_LOG(ERR, CTRL_IF, "socket for create failed.\n");
> +		return -1;
> +	}
> +
> +	memset(&src, 0, sizeof(struct sockaddr_nl));
> +
> +	src.nl_family = AF_NETLINK;
> +	src.nl_pid = getpid();
> +
> +	ret = bind(kcp_rtnl_fd, (struct sockaddr *)&src,
> +			sizeof(struct sockaddr_nl));
> +	if (ret < 0) {
> +		RTE_LOG(ERR, CTRL_IF, "Bind for create failed.\n");
> +		return -1;
> +	}
> +
> +	return 0;
> +}
> +
> +static int
> +control_interface_init(void)
> +{
> +	int ret;
> +
> +	ret = conrol_interface_rtnl_init();
> +	if (ret < 0) {
> +		RTE_LOG(ERR, CTRL_IF, "Failed to initialize rtnetlink.\n");
> +		return -1;
> +	}
> +
> +	ret = control_interface_nl_init();
> +	if (ret < 0) {
> +		RTE_LOG(ERR, CTRL_IF, "Failed to initialize netlink.\n");
> +		close(kcp_rtnl_fd);
> +		kcp_rtnl_fd = -1;
> +	}
> +
> +	return ret;
> +}
> +
> +static int
> +control_interface_ref_get(void)
> +{
> +	int ret = 0;
> +
> +	if (kcp_fd_ref == 0)
> +		ret = control_interface_init();
> +
> +	if (ret == 0)
> +		kcp_fd_ref++;
> +	else
> +		RTE_LOG(ERR, CTRL_IF,
> +				"Failed to initialize control interface.\n");
> +
> +	return kcp_fd_ref;
> +}
> +
> +static void
> +control_interface_release(void)
> +{
> +	close(kcp_rtnl_fd);
> +	control_interface_nl_release();
> +}
> +
> +static int
> +control_interface_ref_put(void)
> +{
> +	if (kcp_fd_ref == 0)
> +		return 0;
> +
> +	kcp_fd_ref--;
> +
> +	if (kcp_fd_ref == 0)
> +		control_interface_release();
> +
> +	return kcp_fd_ref;
> +}
> +
> +static int
> +add_attr(struct kcp_request *req, unsigned short type, void *buf, size_t len)
> +{
> +	struct rtattr *rta;
> +	int nlmsg_len;
> +
> +	nlmsg_len = NLMSG_ALIGN(req->nlmsg.nlmsg_len);
> +	rta = (struct rtattr *)((char *)&req->nlmsg + nlmsg_len);
> +	if (nlmsg_len + RTA_LENGTH(len) > sizeof(struct kcp_request))
> +		return -1;
> +	rta->rta_type = type;
> +	rta->rta_len = RTA_LENGTH(len);
> +	memcpy(RTA_DATA(rta), buf, len);
> +	req->nlmsg.nlmsg_len = nlmsg_len + RTA_LENGTH(len);
> +
> +	return 0;
> +}
> +
> +static struct
> +rtattr *add_attr_nested(struct kcp_request *req, unsigned short type)
> +{
> +	struct rtattr *rta;
> +	int nlmsg_len;
> +
> +	nlmsg_len = NLMSG_ALIGN(req->nlmsg.nlmsg_len);
> +	rta = (struct rtattr *)((char *)&req->nlmsg + nlmsg_len);
> +	if (nlmsg_len + RTA_LENGTH(0) > sizeof(struct kcp_request))
> +		return NULL;
> +	rta->rta_type = type;
> +	rta->rta_len = nlmsg_len;
> +	req->nlmsg.nlmsg_len = nlmsg_len + RTA_LENGTH(0);
> +
> +	return rta;
> +}
> +
> +static void
> +end_attr_nested(struct kcp_request *req, struct rtattr *rta)
> +{
> +	rta->rta_len = req->nlmsg.nlmsg_len - rta->rta_len;
> +}
> +
> +static int
> +rte_eth_rtnl_create(uint8_t port_id)
> +{
> +	struct kcp_request req;
> +	struct ifinfomsg *info;
> +	struct rtattr *rta1;
> +	struct rtattr *rta2;
> +	unsigned int pid = getpid();
> +	char name[NAMESZ];
> +	char type[NAMESZ];
> +	struct iovec iov;
> +	struct msghdr msg;
> +	struct sockaddr_nl nladdr;
> +	int ret;
> +	char buf[BUFSZ];
> +
> +	memset(&req, 0, sizeof(struct kcp_request));
> +
> +	req.nlmsg.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
> +	req.nlmsg.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
> +	req.nlmsg.nlmsg_flags |= NLM_F_ACK;
> +	req.nlmsg.nlmsg_type = RTM_NEWLINK;
> +
> +	info = NLMSG_DATA(&req.nlmsg);
> +
> +	info->ifi_family = AF_UNSPEC;
> +	info->ifi_index = 0;
> +
> +	snprintf(name, NAMESZ, IFNAME"%u", port_id);
> +	ret = add_attr(&req, IFLA_IFNAME, name, strlen(name) + 1);
> +	if (ret < 0)
> +		return -1;
> +
> +	rta1 = add_attr_nested(&req, IFLA_LINKINFO);
> +	if (rta1 == NULL)
> +		return -1;
> +
> +	snprintf(type, NAMESZ, KCP_DEVICE);
> +	ret = add_attr(&req, IFLA_INFO_KIND, type, strlen(type) + 1);
> +	if (ret < 0)
> +		return -1;
> +
> +	rta2 = add_attr_nested(&req, IFLA_INFO_DATA);
> +	if (rta2 == NULL)
> +		return -1;
> +
> +	ret = add_attr(&req, IFLA_KCP_PORTID, &port_id, sizeof(uint8_t));
> +	if (ret < 0)
> +		return -1;
> +
> +	ret = add_attr(&req, IFLA_KCP_PID, &pid, sizeof(unsigned int));
> +	if (ret < 0)
> +		return -1;
> +
> +	end_attr_nested(&req, rta2);
> +	end_attr_nested(&req, rta1);
> +
> +	memset(&nladdr, 0, sizeof(nladdr));
> +	nladdr.nl_family = AF_NETLINK;
> +
> +	iov.iov_base = (void *)&req.nlmsg;
> +	iov.iov_len = req.nlmsg.nlmsg_len;
> +
> +	memset(&msg, 0, sizeof(struct msghdr));
> +	msg.msg_name = &nladdr;
> +	msg.msg_namelen = sizeof(nladdr);
> +	msg.msg_iov = &iov;
> +	msg.msg_iovlen = 1;
> +
> +	ret = sendmsg(kcp_rtnl_fd, &msg, 0);
> +	if (ret < 0) {
> +		RTE_LOG(ERR, CTRL_IF, "Send for create failed %d.\n", errno);
> +		return -1;
> +	}
> +
> +	memset(buf, 0, sizeof(buf));
> +	iov.iov_base = buf;
> +	iov.iov_len = sizeof(buf);
> +
> +	ret = recvmsg(kcp_rtnl_fd, &msg, 0);
> +	if (ret < 0) {
> +		RTE_LOG(ERR, CTRL_IF, "Recv for create failed.\n");
> +		return -1;
> +	}
> +
> +	return 0;
> +}
> +

It is probably would be good to make
rte_eth_control_interface_create_one()/rte_eth_control_interface_destroy_one()
a public API, so the user can decide which ports to expose to KCP, which not.
If that is not possible by some reason - then seems no reason to keep 
control_interface_ref_get/ control_interface_ref_put.

> +static int
> +rte_eth_control_interface_create_one(uint8_t port_id)
> +{
> +	int ret;
> +
> +	if (control_interface_ref_get() != 0) {
> +		ret = rte_eth_rtnl_create(port_id);
> +		RTE_LOG(DEBUG, CTRL_IF,
> +			"Control interface %s for port:%u\n",
> +			ret < 0 ? "failed" : "created", port_id);
> +	}
> +
> +	return 0;
> +}
> +
> +int
> +rte_eth_control_interface_create(void)
> +{
> +	int i;
> +	int ret = 0;
> +
> +	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
> +		if (rte_eth_dev_is_valid_port(i)) {
> +			ret = rte_eth_control_interface_create_one(i);
> +			if (ret < 0)
> +				return ret;
> +		}
> +	}
> +
> +	return ret;
> +}
> +
> +static int
> +rte_eth_rtnl_destroy(uint8_t port_id)
> +{
> +	struct kcp_request req;
> +	struct ifinfomsg *info;
> +	char name[NAMESZ];
> +	struct iovec iov;
> +	struct msghdr msg;
> +	struct sockaddr_nl nladdr;
> +	int ret;
> +
> +	memset(&req, 0, sizeof(struct kcp_request));
> +
> +	req.nlmsg.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
> +	req.nlmsg.nlmsg_flags = NLM_F_REQUEST;
> +	req.nlmsg.nlmsg_type = RTM_DELLINK;
> +
> +	info = NLMSG_DATA(&req.nlmsg);
> +
> +	info->ifi_family = AF_UNSPEC;
> +	info->ifi_index = 0;
> +
> +	snprintf(name, NAMESZ, IFNAME"%u", port_id);
> +	ret = add_attr(&req, IFLA_IFNAME, name, strlen(name) + 1);
> +	if (ret < 0)
> +		return -1;
> +
> +	memset(&nladdr, 0, sizeof(nladdr));
> +	nladdr.nl_family = AF_NETLINK;
> +
> +	iov.iov_base = (void *)&req.nlmsg;
> +	iov.iov_len = req.nlmsg.nlmsg_len;
> +
> +	memset(&msg, 0, sizeof(struct msghdr));
> +	msg.msg_name = &nladdr;
> +	msg.msg_namelen = sizeof(nladdr);
> +	msg.msg_iov = &iov;
> +	msg.msg_iovlen = 1;
> +
> +	ret = sendmsg(kcp_rtnl_fd, &msg, 0);
> +	if (ret < 0) {
> +		RTE_LOG(ERR, CTRL_IF, "Send for destroy failed.\n");
> +		return -1;
> +	}
> +	return 0;
> +}
> +
> +static int
> +rte_eth_control_interface_destroy_one(uint8_t port_id)
> +{
> +	rte_eth_rtnl_destroy(port_id);
> +	control_interface_ref_put();
> +	RTE_LOG(DEBUG, CTRL_IF, "Control interface destroyed for port:%u\n",
> +			port_id);
> +
> +	return 0;
> +}
> +
> +int
> +rte_eth_control_interface_destroy(void)
> +{
> +	int i;
> +	int ret = 0;
> +
> +	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
> +		if (rte_eth_dev_is_valid_port(i)) {
> +			ret = rte_eth_control_interface_destroy_one(i);
> +			if (ret < 0)
> +				return ret;
> +		}
> +	}
> +
> +	return ret;
> +}
> +
> +int
> +rte_eth_control_interface_process_msg(int flag, unsigned int timeout_sec)
> +{
> +	return control_interface_process_msg(flag, timeout_sec);
> +}
> +};
....

> diff --git a/lib/librte_ctrl_if/rte_nl.c b/lib/librte_ctrl_if/rte_nl.c
> new file mode 100644
> index 0000000..adc5fa8
> --- /dev/null
> +++ b/lib/librte_ctrl_if/rte_nl.c
> @@ -0,0 +1,274 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2016 Intel Corporation. All rights reserved.
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include <unistd.h>
> +
> +#include <sys/socket.h>
> +#include <linux/netlink.h>
> +
> +#include <rte_spinlock.h>
> +#include <rte_ethdev.h>
> +#include "rte_ethtool.h"
> +#include "rte_nl.h"
> +#include "rte_ctrl_if.h"
> +
> +#define MAX_PAYLOAD sizeof(struct kcp_ethtool_msg)
> +
> +struct ctrl_if_nl {
> +	union {
> +		struct nlmsghdr nlh;
> +		uint8_t nlmsg[NLMSG_SPACE(MAX_PAYLOAD)];
> +	};
> +	struct msghdr msg;
> +	struct iovec iov;
> +};
> +
> +static int sock_fd = -1;
> +pthread_t thread_id;
> +
> +struct sockaddr_nl dest_addr;
> +
> +pthread_cond_t cond  = PTHREAD_COND_INITIALIZER;
> +pthread_mutex_t msg_lock = PTHREAD_MUTEX_INITIALIZER;

Could you group related global variables into some struct.
Then at least people would realise what lock and cond are supposed to synchronise.
Another good thing to do - make them static, if possible. 

> +
> +static struct ctrl_if_nl nl_s;
> +static struct ctrl_if_nl nl_r;
> +
> +static int kcp_ethtool_msg_count;
> +static struct kcp_ethtool_msg msg_storage;
> +
> +static int
> +nl_send(void *buf, size_t len)
> +{
> +	int ret;
> +
> +	if (nl_s.nlh.nlmsg_len < len) {
> +		RTE_LOG(ERR, CTRL_IF, "Message is too big, len:%zu\n", len);
> +		return -1;
> +	}
> +
> +	if (!NLMSG_OK(&nl_s.nlh, NLMSG_SPACE(MAX_PAYLOAD))) {
> +		RTE_LOG(ERR, CTRL_IF, "Message is not OK\n");
> +		return -1;
> +	}
> +
> +	/* Fill in the netlink message payload */
> +	memcpy(NLMSG_DATA(nl_s.nlmsg), buf, len);
> +
> +	ret = sendmsg(sock_fd, &nl_s.msg, 0);
> +
> +	if (ret < 0)
> +		RTE_LOG(ERR, CTRL_IF, "Failed nl msg send. ret:%d, err:%d\n",
> +				ret, errno);
> +	return ret;
> +}
> +
> +static int
> +nl_ethtool_msg_send(struct kcp_ethtool_msg *msg)
> +{
> +	return nl_send((void *)msg, sizeof(struct kcp_ethtool_msg));
> +}
> +
> +static void
> +process_msg(struct kcp_ethtool_msg *msg)
> +{
> +	if (msg->cmd_id > RTE_KCP_REQ_UNKNOWN) {
> +		msg->err = rte_eth_dev_control_process(msg->cmd_id,
> +				msg->port_id, msg->input_buffer,
> +				msg->output_buffer, &msg->output_buffer_len);
> +	} else {
> +		msg->err = rte_eth_dev_ethtool_process(msg->cmd_id,
> +				msg->port_id, msg->input_buffer,
> +				msg->output_buffer, &msg->output_buffer_len);
> +	}
> +
> +	if (msg->err)
> +		memset(msg->output_buffer, 0, msg->output_buffer_len);
> +
> +	nl_ethtool_msg_send(msg);
> +}
> +
> +int
> +control_interface_process_msg(int flag, unsigned int timeout_sec)
> +{
> +	int ret = 0;
> +	struct timespec ts;
> +
> +	pthread_mutex_lock(&msg_lock);
> +
> +	clock_gettime(CLOCK_REALTIME, &ts);
> +	ts.tv_sec += timeout_sec;
> +	while (timeout_sec && !kcp_ethtool_msg_count && !ret)
> +		ret = pthread_cond_timedwait(&cond, &msg_lock, &ts);


Better to avoid holding lock while calling process_msg().
That is a good programming practice, as msg_lock protects only contents of the msg_storage.
Again if by some reason process() would take a while, wouldn't stop your control thread. 
lock(); copy_message_into_local_buffer; unlock(); process();

> +
> +	switch (flag) {
> +	case RTE_ETHTOOL_CTRL_IF_PROCESS_MSG:
> +		if (kcp_ethtool_msg_count) {
> +			process_msg(&msg_storage);
> +			kcp_ethtool_msg_count = 0;
> +		}
> +		ret = 0;
> +		break;
> +
> +	case RTE_ETHTOOL_CTRL_IF_DISCARD_MSG:
> +		if (kcp_ethtool_msg_count) {
> +			msg_storage.err = -1;
> +			nl_ethtool_msg_send(&msg_storage);
> +			kcp_ethtool_msg_count = 0;
> +		}
> +		ret = 0;
> +		break;
> +
> +	default:
> +		ret = -1;
> +		break;
> +	}
> +	pthread_mutex_unlock(&msg_lock);
> +
> +	return ret;
> +}
> +


More information about the dev mailing list