[dpdk-dev] [PATCH v1] net/mlx: control netdevices through ioctl only

Shahaf Shuler shahafs at mellanox.com
Wed Feb 28 09:07:41 CET 2018


Friday, February 9, 2018 10:34 PM, Marcelo Ricardo Leitner:
> 
> On Thu, Feb 08, 2018 at 05:37:06PM +0100, Adrien Mazarguil wrote:
> > Several control operations implemented by these PMDs affect netdevices
> > through sysfs, itself subject to file system permission checks
> > enforced by the kernel, which limits their use for most purposes to
> > applications running with root privileges.
> >
> > Since performing the same operations through ioctl() requires fewer
> > capabilities (only CAP_NET_ADMIN) and given the remaining operations
> > are already implemented this way, this patch standardizes on ioctl()
> > and gets rid of redundant code.
> >
> > Signed-off-by: Adrien Mazarguil <adrien.mazarguil at 6wind.com>
> 
> Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner at gmail.com>

Applied to next-net-mlx, thanks. 

> 
> > ---
> >  drivers/net/mlx4/mlx4_ethdev.c | 192 ++-------------------------
> >  drivers/net/mlx5/mlx5.h        |   2 -
> >  drivers/net/mlx5/mlx5_ethdev.c | 255
> > ++++--------------------------------
> >  drivers/net/mlx5/mlx5_stats.c  |  28 +++-
> >  4 files changed, 63 insertions(+), 414 deletions(-)
> >
> > diff --git a/drivers/net/mlx4/mlx4_ethdev.c
> > b/drivers/net/mlx4/mlx4_ethdev.c index 3bc692731..fbeef16c8 100644
> > --- a/drivers/net/mlx4/mlx4_ethdev.c
> > +++ b/drivers/net/mlx4/mlx4_ethdev.c
> > @@ -132,167 +132,6 @@ mlx4_get_ifname(const struct priv *priv, char
> > (*ifname)[IF_NAMESIZE])  }
> >
> >  /**
> > - * Read from sysfs entry.
> > - *
> > - * @param[in] priv
> > - *   Pointer to private structure.
> > - * @param[in] entry
> > - *   Entry name relative to sysfs path.
> > - * @param[out] buf
> > - *   Data output buffer.
> > - * @param size
> > - *   Buffer size.
> > - *
> > - * @return
> > - *   Number of bytes read on success, negative errno value otherwise and
> > - *   rte_errno is set.
> > - */
> > -static int
> > -mlx4_sysfs_read(const struct priv *priv, const char *entry,
> > -		char *buf, size_t size)
> > -{
> > -	char ifname[IF_NAMESIZE];
> > -	FILE *file;
> > -	int ret;
> > -
> > -	ret = mlx4_get_ifname(priv, &ifname);
> > -	if (ret)
> > -		return ret;
> > -
> > -	MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device-
> >ibdev_path,
> > -	      ifname, entry);
> > -
> > -	file = fopen(path, "rb");
> > -	if (file == NULL) {
> > -		rte_errno = errno;
> > -		return -rte_errno;
> > -	}
> > -	ret = fread(buf, 1, size, file);
> > -	if ((size_t)ret < size && ferror(file)) {
> > -		rte_errno = EIO;
> > -		ret = -rte_errno;
> > -	} else {
> > -		ret = size;
> > -	}
> > -	fclose(file);
> > -	return ret;
> > -}
> > -
> > -/**
> > - * Write to sysfs entry.
> > - *
> > - * @param[in] priv
> > - *   Pointer to private structure.
> > - * @param[in] entry
> > - *   Entry name relative to sysfs path.
> > - * @param[in] buf
> > - *   Data buffer.
> > - * @param size
> > - *   Buffer size.
> > - *
> > - * @return
> > - *   Number of bytes written on success, negative errno value otherwise
> and
> > - *   rte_errno is set.
> > - */
> > -static int
> > -mlx4_sysfs_write(const struct priv *priv, const char *entry,
> > -		 char *buf, size_t size)
> > -{
> > -	char ifname[IF_NAMESIZE];
> > -	FILE *file;
> > -	int ret;
> > -
> > -	ret = mlx4_get_ifname(priv, &ifname);
> > -	if (ret)
> > -		return ret;
> > -
> > -	MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device-
> >ibdev_path,
> > -	      ifname, entry);
> > -
> > -	file = fopen(path, "wb");
> > -	if (file == NULL) {
> > -		rte_errno = errno;
> > -		return -rte_errno;
> > -	}
> > -	ret = fwrite(buf, 1, size, file);
> > -	if ((size_t)ret < size || ferror(file)) {
> > -		rte_errno = EIO;
> > -		ret = -rte_errno;
> > -	} else {
> > -		ret = size;
> > -	}
> > -	fclose(file);
> > -	return ret;
> > -}
> > -
> > -/**
> > - * Get unsigned long sysfs property.
> > - *
> > - * @param priv
> > - *   Pointer to private structure.
> > - * @param[in] name
> > - *   Entry name relative to sysfs path.
> > - * @param[out] value
> > - *   Value output buffer.
> > - *
> > - * @return
> > - *   0 on success, negative errno value otherwise and rte_errno is set.
> > - */
> > -static int
> > -mlx4_get_sysfs_ulong(struct priv *priv, const char *name, unsigned
> > long *value) -{
> > -	int ret;
> > -	unsigned long value_ret;
> > -	char value_str[32];
> > -
> > -	ret = mlx4_sysfs_read(priv, name, value_str, (sizeof(value_str) - 1));
> > -	if (ret < 0) {
> > -		DEBUG("cannot read %s value from sysfs: %s",
> > -		      name, strerror(rte_errno));
> > -		return ret;
> > -	}
> > -	value_str[ret] = '\0';
> > -	errno = 0;
> > -	value_ret = strtoul(value_str, NULL, 0);
> > -	if (errno) {
> > -		rte_errno = errno;
> > -		DEBUG("invalid %s value `%s': %s", name, value_str,
> > -		      strerror(rte_errno));
> > -		return -rte_errno;
> > -	}
> > -	*value = value_ret;
> > -	return 0;
> > -}
> > -
> > -/**
> > - * Set unsigned long sysfs property.
> > - *
> > - * @param priv
> > - *   Pointer to private structure.
> > - * @param[in] name
> > - *   Entry name relative to sysfs path.
> > - * @param value
> > - *   Value to set.
> > - *
> > - * @return
> > - *   0 on success, negative errno value otherwise and rte_errno is set.
> > - */
> > -static int
> > -mlx4_set_sysfs_ulong(struct priv *priv, const char *name, unsigned
> > long value) -{
> > -	int ret;
> > -	MKSTR(value_str, "%lu", value);
> > -
> > -	ret = mlx4_sysfs_write(priv, name, value_str, (sizeof(value_str) - 1));
> > -	if (ret < 0) {
> > -		DEBUG("cannot write %s `%s' (%lu) to sysfs: %s",
> > -		      name, value_str, value, strerror(rte_errno));
> > -		return ret;
> > -	}
> > -	return 0;
> > -}
> > -
> > -/**
> >   * Perform ifreq ioctl() on associated Ethernet device.
> >   *
> >   * @param[in] priv
> > @@ -361,12 +200,12 @@ mlx4_get_mac(struct priv *priv, uint8_t
> > (*mac)[ETHER_ADDR_LEN])  int  mlx4_mtu_get(struct priv *priv, uint16_t
> > *mtu)  {
> > -	unsigned long ulong_mtu = 0;
> > -	int ret = mlx4_get_sysfs_ulong(priv, "mtu", &ulong_mtu);
> > +	struct ifreq request;
> > +	int ret = mlx4_ifreq(priv, SIOCGIFMTU, &request);
> >
> >  	if (ret)
> >  		return ret;
> > -	*mtu = ulong_mtu;
> > +	*mtu = request.ifr_mtu;
> >  	return 0;
> >  }
> >
> > @@ -385,20 +224,13 @@ int
> >  mlx4_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)  {
> >  	struct priv *priv = dev->data->dev_private;
> > -	uint16_t new_mtu;
> > -	int ret = mlx4_set_sysfs_ulong(priv, "mtu", mtu);
> > +	struct ifreq request = { .ifr_mtu = mtu, };
> > +	int ret = mlx4_ifreq(priv, SIOCSIFMTU, &request);
> >
> >  	if (ret)
> >  		return ret;
> > -	ret = mlx4_mtu_get(priv, &new_mtu);
> > -	if (ret)
> > -		return ret;
> > -	if (new_mtu == mtu) {
> > -		priv->mtu = mtu;
> > -		return 0;
> > -	}
> > -	rte_errno = EINVAL;
> > -	return -rte_errno;
> > +	priv->mtu = mtu;
> > +	return 0;
> >  }
> >
> >  /**
> > @@ -417,14 +249,14 @@ mlx4_mtu_set(struct rte_eth_dev *dev, uint16_t
> > mtu)  static int  mlx4_set_flags(struct priv *priv, unsigned int keep,
> > unsigned int flags)  {
> > -	unsigned long tmp = 0;
> > -	int ret = mlx4_get_sysfs_ulong(priv, "flags", &tmp);
> > +	struct ifreq request;
> > +	int ret = mlx4_ifreq(priv, SIOCGIFFLAGS, &request);
> >
> >  	if (ret)
> >  		return ret;
> > -	tmp &= keep;
> > -	tmp |= (flags & (~keep));
> > -	return mlx4_set_sysfs_ulong(priv, "flags", tmp);
> > +	request.ifr_flags &= keep;
> > +	request.ifr_flags |= flags & ~keep;
> > +	return mlx4_ifreq(priv, SIOCSIFFLAGS, &request);
> >  }
> >
> >  /**
> > diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index
> > 965c19f21..da44faaf4 100644
> > --- a/drivers/net/mlx5/mlx5.h
> > +++ b/drivers/net/mlx5/mlx5.h
> > @@ -209,8 +209,6 @@ struct priv *mlx5_get_priv(struct rte_eth_dev
> > *dev);  int mlx5_is_secondary(void);  int priv_get_ifname(const struct
> > priv *, char (*)[IF_NAMESIZE]);  int priv_ifreq(const struct priv *,
> > int req, struct ifreq *); -int priv_is_ib_cntr(const char *); -int
> > priv_get_cntr_sysfs(struct priv *, const char *, uint64_t *);  int
> > priv_get_num_vfs(struct priv *, uint16_t *);  int priv_get_mtu(struct
> > priv *, uint16_t *);  int priv_set_flags(struct priv *, unsigned int,
> > unsigned int); diff --git a/drivers/net/mlx5/mlx5_ethdev.c
> > b/drivers/net/mlx5/mlx5_ethdev.c index 666507691..b73cb53df 100644
> > --- a/drivers/net/mlx5/mlx5_ethdev.c
> > +++ b/drivers/net/mlx5/mlx5_ethdev.c
> > @@ -7,6 +7,7 @@
> >
> >  #include <stddef.h>
> >  #include <assert.h>
> > +#include <inttypes.h>
> >  #include <unistd.h>
> >  #include <stdint.h>
> >  #include <stdio.h>
> > @@ -173,181 +174,6 @@ priv_get_ifname(const struct priv *priv, char
> > (*ifname)[IF_NAMESIZE])  }
> >
> >  /**
> > - * Check if the counter is located on ib counters file.
> > - *
> > - * @param[in] cntr
> > - *   Counter name.
> > - *
> > - * @return
> > - *   1 if counter is located on ib counters file , 0 otherwise.
> > - */
> > -int
> > -priv_is_ib_cntr(const char *cntr)
> > -{
> > -	if (!strcmp(cntr, "out_of_buffer"))
> > -		return 1;
> > -	return 0;
> > -}
> > -
> > -/**
> > - * Read from sysfs entry.
> > - *
> > - * @param[in] priv
> > - *   Pointer to private structure.
> > - * @param[in] entry
> > - *   Entry name relative to sysfs path.
> > - * @param[out] buf
> > - *   Data output buffer.
> > - * @param size
> > - *   Buffer size.
> > - *
> > - * @return
> > - *   0 on success, -1 on failure and errno is set.
> > - */
> > -static int
> > -priv_sysfs_read(const struct priv *priv, const char *entry,
> > -		char *buf, size_t size)
> > -{
> > -	char ifname[IF_NAMESIZE];
> > -	FILE *file;
> > -	int ret;
> > -	int err;
> > -
> > -	if (priv_get_ifname(priv, &ifname))
> > -		return -1;
> > -
> > -	if (priv_is_ib_cntr(entry)) {
> > -		MKSTR(path, "%s/ports/1/hw_counters/%s",
> > -		      priv->ibdev_path, entry);
> > -		file = fopen(path, "rb");
> > -	} else {
> > -		MKSTR(path, "%s/device/net/%s/%s",
> > -		      priv->ibdev_path, ifname, entry);
> > -		file = fopen(path, "rb");
> > -	}
> > -	if (file == NULL)
> > -		return -1;
> > -	ret = fread(buf, 1, size, file);
> > -	err = errno;
> > -	if (((size_t)ret < size) && (ferror(file)))
> > -		ret = -1;
> > -	else
> > -		ret = size;
> > -	fclose(file);
> > -	errno = err;
> > -	return ret;
> > -}
> > -
> > -/**
> > - * Write to sysfs entry.
> > - *
> > - * @param[in] priv
> > - *   Pointer to private structure.
> > - * @param[in] entry
> > - *   Entry name relative to sysfs path.
> > - * @param[in] buf
> > - *   Data buffer.
> > - * @param size
> > - *   Buffer size.
> > - *
> > - * @return
> > - *   0 on success, -1 on failure and errno is set.
> > - */
> > -static int
> > -priv_sysfs_write(const struct priv *priv, const char *entry,
> > -		 char *buf, size_t size)
> > -{
> > -	char ifname[IF_NAMESIZE];
> > -	FILE *file;
> > -	int ret;
> > -	int err;
> > -
> > -	if (priv_get_ifname(priv, &ifname))
> > -		return -1;
> > -
> > -	MKSTR(path, "%s/device/net/%s/%s", priv->ibdev_path, ifname,
> entry);
> > -
> > -	file = fopen(path, "wb");
> > -	if (file == NULL)
> > -		return -1;
> > -	ret = fwrite(buf, 1, size, file);
> > -	err = errno;
> > -	if (((size_t)ret < size) || (ferror(file)))
> > -		ret = -1;
> > -	else
> > -		ret = size;
> > -	fclose(file);
> > -	errno = err;
> > -	return ret;
> > -}
> > -
> > -/**
> > - * Get unsigned long sysfs property.
> > - *
> > - * @param priv
> > - *   Pointer to private structure.
> > - * @param[in] name
> > - *   Entry name relative to sysfs path.
> > - * @param[out] value
> > - *   Value output buffer.
> > - *
> > - * @return
> > - *   0 on success, -1 on failure and errno is set.
> > - */
> > -static int
> > -priv_get_sysfs_ulong(struct priv *priv, const char *name, unsigned
> > long *value) -{
> > -	int ret;
> > -	unsigned long value_ret;
> > -	char value_str[32];
> > -
> > -	ret = priv_sysfs_read(priv, name, value_str, (sizeof(value_str) - 1));
> > -	if (ret == -1) {
> > -		DEBUG("cannot read %s value from sysfs: %s",
> > -		      name, strerror(errno));
> > -		return -1;
> > -	}
> > -	value_str[ret] = '\0';
> > -	errno = 0;
> > -	value_ret = strtoul(value_str, NULL, 0);
> > -	if (errno) {
> > -		DEBUG("invalid %s value `%s': %s", name, value_str,
> > -		      strerror(errno));
> > -		return -1;
> > -	}
> > -	*value = value_ret;
> > -	return 0;
> > -}
> > -
> > -/**
> > - * Set unsigned long sysfs property.
> > - *
> > - * @param priv
> > - *   Pointer to private structure.
> > - * @param[in] name
> > - *   Entry name relative to sysfs path.
> > - * @param value
> > - *   Value to set.
> > - *
> > - * @return
> > - *   0 on success, -1 on failure and errno is set.
> > - */
> > -static int
> > -priv_set_sysfs_ulong(struct priv *priv, const char *name, unsigned
> > long value) -{
> > -	int ret;
> > -	MKSTR(value_str, "%lu", value);
> > -
> > -	ret = priv_sysfs_write(priv, name, value_str, (sizeof(value_str) - 1));
> > -	if (ret == -1) {
> > -		DEBUG("cannot write %s `%s' (%lu) to sysfs: %s",
> > -		      name, value_str, value, strerror(errno));
> > -		return -1;
> > -	}
> > -	return 0;
> > -}
> > -
> > -/**
> >   * Perform ifreq ioctl() on associated Ethernet device.
> >   *
> >   * @param[in] priv
> > @@ -390,20 +216,25 @@ priv_get_num_vfs(struct priv *priv, uint16_t
> > *num_vfs)  {
> >  	/* The sysfs entry name depends on the operating system. */
> >  	const char **name = (const char *[]){
> > -		"device/sriov_numvfs",
> > -		"device/mlx5_num_vfs",
> > +		"sriov_numvfs",
> > +		"mlx5_num_vfs",
> >  		NULL,
> >  	};
> > -	int ret;
> >
> >  	do {
> > -		unsigned long ulong_num_vfs;
> > +		int n;
> > +		FILE *file;
> > +		MKSTR(path, "%s/device/%s", priv->ibdev_path, *name);
> >
> > -		ret = priv_get_sysfs_ulong(priv, *name, &ulong_num_vfs);
> > -		if (!ret)
> > -			*num_vfs = ulong_num_vfs;
> > -	} while (*(++name) && ret);
> > -	return ret;
> > +		file = fopen(path, "rb");
> > +		if (!file)
> > +			continue;
> > +		n = fscanf(file, "%" SCNu16, num_vfs);
> > +		fclose(file);
> > +		if (n == 1)
> > +			return 0;
> > +	} while (*(++name));
> > +	return -1;
> >  }
> >
> >  /**
> > @@ -420,35 +251,12 @@ priv_get_num_vfs(struct priv *priv, uint16_t
> > *num_vfs)  int  priv_get_mtu(struct priv *priv, uint16_t *mtu)  {
> > -	unsigned long ulong_mtu;
> > +	struct ifreq request;
> > +	int ret = priv_ifreq(priv, SIOCGIFMTU, &request);
> >
> > -	if (priv_get_sysfs_ulong(priv, "mtu", &ulong_mtu) == -1)
> > -		return -1;
> > -	*mtu = ulong_mtu;
> > -	return 0;
> > -}
> > -
> > -/**
> > - * Read device counter from sysfs.
> > - *
> > - * @param priv
> > - *   Pointer to private structure.
> > - * @param name
> > - *   Counter name.
> > - * @param[out] cntr
> > - *   Counter output buffer.
> > - *
> > - * @return
> > - *   0 on success, -1 on failure and errno is set.
> > - */
> > -int
> > -priv_get_cntr_sysfs(struct priv *priv, const char *name, uint64_t
> > *cntr) -{
> > -	unsigned long ulong_ctr;
> > -
> > -	if (priv_get_sysfs_ulong(priv, name, &ulong_ctr) == -1)
> > -		return -1;
> > -	*cntr = ulong_ctr;
> > +	if (ret)
> > +		return ret;
> > +	*mtu = request.ifr_mtu;
> >  	return 0;
> >  }
> >
> > @@ -466,15 +274,9 @@ priv_get_cntr_sysfs(struct priv *priv, const char
> > *name, uint64_t *cntr)  static int  priv_set_mtu(struct priv *priv,
> > uint16_t mtu)  {
> > -	uint16_t new_mtu;
> > +	struct ifreq request = { .ifr_mtu = mtu, };
> >
> > -	if (priv_set_sysfs_ulong(priv, "mtu", mtu) ||
> > -	    priv_get_mtu(priv, &new_mtu))
> > -		return -1;
> > -	if (new_mtu == mtu)
> > -		return 0;
> > -	errno = EINVAL;
> > -	return -1;
> > +	return priv_ifreq(priv, SIOCSIFMTU, &request);
> >  }
> >
> >  /**
> > @@ -493,13 +295,14 @@ priv_set_mtu(struct priv *priv, uint16_t mtu)
> > int  priv_set_flags(struct priv *priv, unsigned int keep, unsigned int
> > flags)  {
> > -	unsigned long tmp;
> > +	struct ifreq request;
> > +	int ret = priv_ifreq(priv, SIOCGIFFLAGS, &request);
> >
> > -	if (priv_get_sysfs_ulong(priv, "flags", &tmp) == -1)
> > -		return -1;
> > -	tmp &= keep;
> > -	tmp |= (flags & (~keep));
> > -	return priv_set_sysfs_ulong(priv, "flags", tmp);
> > +	if (ret)
> > +		return ret;
> > +	request.ifr_flags &= keep;
> > +	request.ifr_flags |= flags & ~keep;
> > +	return priv_ifreq(priv, SIOCSIFFLAGS, &request);
> >  }
> >
> >  /**
> > diff --git a/drivers/net/mlx5/mlx5_stats.c
> > b/drivers/net/mlx5/mlx5_stats.c index 378472a70..eb9c65dcc 100644
> > --- a/drivers/net/mlx5/mlx5_stats.c
> > +++ b/drivers/net/mlx5/mlx5_stats.c
> > @@ -3,8 +3,11 @@
> >   * Copyright 2015 Mellanox.
> >   */
> >
> > +#include <inttypes.h>
> >  #include <linux/sockios.h>
> >  #include <linux/ethtool.h>
> > +#include <stdint.h>
> > +#include <stdio.h>
> >
> >  #include <rte_ethdev_driver.h>
> >  #include <rte_common.h>
> > @@ -19,6 +22,7 @@ struct mlx5_counter_ctrl {
> >  	char dpdk_name[RTE_ETH_XSTATS_NAME_SIZE];
> >  	/* Name of the counter on the device table. */
> >  	char ctr_name[RTE_ETH_XSTATS_NAME_SIZE];
> > +	uint32_t ib:1; /**< Nonzero for IB counters. */
> >  };
> >
> >  static const struct mlx5_counter_ctrl mlx5_counters_init[] = { @@
> > -93,6 +97,7 @@ static const struct mlx5_counter_ctrl mlx5_counters_init[]
> = {
> >  	{
> >  		.dpdk_name = "rx_out_of_buffer",
> >  		.ctr_name = "out_of_buffer",
> > +		.ib = 1,
> >  	},
> >  	{
> >  		.dpdk_name = "tx_packets_phy",
> > @@ -143,13 +148,24 @@ priv_read_dev_counters(struct priv *priv,
> uint64_t *stats)
> >  		return -1;
> >  	}
> >  	for (i = 0; i != xstats_n; ++i) {
> > -		if (priv_is_ib_cntr(mlx5_counters_init[i].ctr_name))
> > -			priv_get_cntr_sysfs(priv,
> > -					    mlx5_counters_init[i].ctr_name,
> > -					    &stats[i]);
> > -		else
> > +		if (mlx5_counters_init[i].ib) {
> > +			FILE *file;
> > +			MKSTR(path, "%s/ports/1/hw_counters/%s",
> > +			      priv->ibdev_path,
> > +			      mlx5_counters_init[i].ctr_name);
> > +
> > +			file = fopen(path, "rb");
> > +			if (file) {
> > +				int n = fscanf(file, "%" SCNu64, &stats[i]);
> > +
> > +				fclose(file);
> > +				if (n != 1)
> > +					stats[i] = 0;
> > +			}
> > +		} else {
> >  			stats[i] = (uint64_t)
> >  				et_stats->data[xstats_ctrl-
> >dev_table_idx[i]];
> > +		}
> >  	}
> >  	return 0;
> >  }
> > @@ -232,7 +248,7 @@ priv_xstats_init(struct priv *priv)
> >  		}
> >  	}
> >  	for (j = 0; j != xstats_n; ++j) {
> > -		if (priv_is_ib_cntr(mlx5_counters_init[j].ctr_name))
> > +		if (mlx5_counters_init[j].ib)
> >  			continue;
> >  		if (xstats_ctrl->dev_table_idx[j] >= dev_stats_n) {
> >  			WARN("counter \"%s\" is not recognized",
> > --
> > 2.11.0


More information about the dev mailing list