[dpdk-dev] [PATCH v1] net/mlx: control netdevices through ioctl only

Shahaf Shuler shahafs at mellanox.com
Sun Feb 11 12:55:44 CET 2018


Hi Adrien,

Small doc issues.

Thursday, February 8, 2018 6:37 PM, Adrien Mazarguil:
> Subject: [PATCH v1] net/mlx: control netdevices through ioctl only
> 
> Several control operations implemented by these PMDs affect netdevices
> through sysfs, itself subject to file system permission checks enforced by the
> kernel, which limits their use for most purposes to applications running with
> root privileges.
> 
> Since performing the same operations through ioctl() requires fewer
> capabilities (only CAP_NET_ADMIN) and given the remaining operations are
> already implemented this way, this patch standardizes on ioctl() and gets rid
> of redundant code.
> 
> Signed-off-by: Adrien Mazarguil <adrien.mazarguil at 6wind.com>
> ---
>  drivers/net/mlx4/mlx4_ethdev.c | 192 ++-------------------------
>  drivers/net/mlx5/mlx5.h        |   2 -
>  drivers/net/mlx5/mlx5_ethdev.c | 255 ++++--------------------------------
>  drivers/net/mlx5/mlx5_stats.c  |  28 +++-
>  4 files changed, 63 insertions(+), 414 deletions(-)
> 
> diff --git a/drivers/net/mlx4/mlx4_ethdev.c
> b/drivers/net/mlx4/mlx4_ethdev.c index 3bc692731..fbeef16c8 100644
> --- a/drivers/net/mlx4/mlx4_ethdev.c
> +++ b/drivers/net/mlx4/mlx4_ethdev.c
> @@ -132,167 +132,6 @@ mlx4_get_ifname(const struct priv *priv, char
> (*ifname)[IF_NAMESIZE])  }
> 
>  /**
> - * Read from sysfs entry.
> - *
> - * @param[in] priv
> - *   Pointer to private structure.
> - * @param[in] entry
> - *   Entry name relative to sysfs path.
> - * @param[out] buf
> - *   Data output buffer.
> - * @param size
> - *   Buffer size.
> - *
> - * @return
> - *   Number of bytes read on success, negative errno value otherwise and
> - *   rte_errno is set.
> - */
> -static int
> -mlx4_sysfs_read(const struct priv *priv, const char *entry,
> -		char *buf, size_t size)
> -{
> -	char ifname[IF_NAMESIZE];
> -	FILE *file;
> -	int ret;
> -
> -	ret = mlx4_get_ifname(priv, &ifname);
> -	if (ret)
> -		return ret;
> -
> -	MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device-
> >ibdev_path,
> -	      ifname, entry);
> -
> -	file = fopen(path, "rb");
> -	if (file == NULL) {
> -		rte_errno = errno;
> -		return -rte_errno;
> -	}
> -	ret = fread(buf, 1, size, file);
> -	if ((size_t)ret < size && ferror(file)) {
> -		rte_errno = EIO;
> -		ret = -rte_errno;
> -	} else {
> -		ret = size;
> -	}
> -	fclose(file);
> -	return ret;
> -}
> -
> -/**
> - * Write to sysfs entry.
> - *
> - * @param[in] priv
> - *   Pointer to private structure.
> - * @param[in] entry
> - *   Entry name relative to sysfs path.
> - * @param[in] buf
> - *   Data buffer.
> - * @param size
> - *   Buffer size.
> - *
> - * @return
> - *   Number of bytes written on success, negative errno value otherwise and
> - *   rte_errno is set.
> - */
> -static int
> -mlx4_sysfs_write(const struct priv *priv, const char *entry,
> -		 char *buf, size_t size)
> -{
> -	char ifname[IF_NAMESIZE];
> -	FILE *file;
> -	int ret;
> -
> -	ret = mlx4_get_ifname(priv, &ifname);
> -	if (ret)
> -		return ret;
> -
> -	MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device-
> >ibdev_path,
> -	      ifname, entry);
> -
> -	file = fopen(path, "wb");
> -	if (file == NULL) {
> -		rte_errno = errno;
> -		return -rte_errno;
> -	}
> -	ret = fwrite(buf, 1, size, file);
> -	if ((size_t)ret < size || ferror(file)) {
> -		rte_errno = EIO;
> -		ret = -rte_errno;
> -	} else {
> -		ret = size;
> -	}
> -	fclose(file);
> -	return ret;
> -}
> -
> -/**
> - * Get unsigned long sysfs property.
> - *
> - * @param priv
> - *   Pointer to private structure.
> - * @param[in] name
> - *   Entry name relative to sysfs path.
> - * @param[out] value
> - *   Value output buffer.
> - *
> - * @return
> - *   0 on success, negative errno value otherwise and rte_errno is set.
> - */
> -static int
> -mlx4_get_sysfs_ulong(struct priv *priv, const char *name, unsigned long
> *value) -{
> -	int ret;
> -	unsigned long value_ret;
> -	char value_str[32];
> -
> -	ret = mlx4_sysfs_read(priv, name, value_str, (sizeof(value_str) - 1));
> -	if (ret < 0) {
> -		DEBUG("cannot read %s value from sysfs: %s",
> -		      name, strerror(rte_errno));
> -		return ret;
> -	}
> -	value_str[ret] = '\0';
> -	errno = 0;
> -	value_ret = strtoul(value_str, NULL, 0);
> -	if (errno) {
> -		rte_errno = errno;
> -		DEBUG("invalid %s value `%s': %s", name, value_str,
> -		      strerror(rte_errno));
> -		return -rte_errno;
> -	}
> -	*value = value_ret;
> -	return 0;
> -}
> -
> -/**
> - * Set unsigned long sysfs property.
> - *
> - * @param priv
> - *   Pointer to private structure.
> - * @param[in] name
> - *   Entry name relative to sysfs path.
> - * @param value
> - *   Value to set.
> - *
> - * @return
> - *   0 on success, negative errno value otherwise and rte_errno is set.
> - */
> -static int
> -mlx4_set_sysfs_ulong(struct priv *priv, const char *name, unsigned long
> value) -{
> -	int ret;
> -	MKSTR(value_str, "%lu", value);
> -
> -	ret = mlx4_sysfs_write(priv, name, value_str, (sizeof(value_str) - 1));
> -	if (ret < 0) {
> -		DEBUG("cannot write %s `%s' (%lu) to sysfs: %s",
> -		      name, value_str, value, strerror(rte_errno));
> -		return ret;
> -	}
> -	return 0;
> -}
> -
> -/**
>   * Perform ifreq ioctl() on associated Ethernet device.
>   *
>   * @param[in] priv
> @@ -361,12 +200,12 @@ mlx4_get_mac(struct priv *priv, uint8_t
> (*mac)[ETHER_ADDR_LEN])  int  mlx4_mtu_get(struct priv *priv, uint16_t
> *mtu)  {
> -	unsigned long ulong_mtu = 0;
> -	int ret = mlx4_get_sysfs_ulong(priv, "mtu", &ulong_mtu);
> +	struct ifreq request;
> +	int ret = mlx4_ifreq(priv, SIOCGIFMTU, &request);
> 
>  	if (ret)
>  		return ret;

Function documentation is : "0 on success, negative errno value otherwise and rte_errno is set."
Per my understating ioctl returns -1 on error with errno set. 


> -	*mtu = ulong_mtu;
> +	*mtu = request.ifr_mtu;
>  	return 0;
>  }
> 
> @@ -385,20 +224,13 @@ int
>  mlx4_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)  {
>  	struct priv *priv = dev->data->dev_private;
> -	uint16_t new_mtu;
> -	int ret = mlx4_set_sysfs_ulong(priv, "mtu", mtu);
> +	struct ifreq request = { .ifr_mtu = mtu, };
> +	int ret = mlx4_ifreq(priv, SIOCSIFMTU, &request);
> 
>  	if (ret)
>  		return ret;

Also here.

> -	ret = mlx4_mtu_get(priv, &new_mtu);
> -	if (ret)
> -		return ret;
> -	if (new_mtu == mtu) {
> -		priv->mtu = mtu;
> -		return 0;
> -	}
> -	rte_errno = EINVAL;
> -	return -rte_errno;
> +	priv->mtu = mtu;
> +	return 0;
>  }
> 
>  /**
> @@ -417,14 +249,14 @@ mlx4_mtu_set(struct rte_eth_dev *dev, uint16_t
> mtu)  static int  mlx4_set_flags(struct priv *priv, unsigned int keep, unsigned
> int flags)  {
> -	unsigned long tmp = 0;
> -	int ret = mlx4_get_sysfs_ulong(priv, "flags", &tmp);
> +	struct ifreq request;
> +	int ret = mlx4_ifreq(priv, SIOCGIFFLAGS, &request);
> 
>  	if (ret)
>  		return ret;

And here.

> -	tmp &= keep;
> -	tmp |= (flags & (~keep));
> -	return mlx4_set_sysfs_ulong(priv, "flags", tmp);
> +	request.ifr_flags &= keep;
> +	request.ifr_flags |= flags & ~keep;
> +	return mlx4_ifreq(priv, SIOCSIFFLAGS, &request);
>  }
> 
>  /**
> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index
> 965c19f21..da44faaf4 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -209,8 +209,6 @@ struct priv *mlx5_get_priv(struct rte_eth_dev *dev);
> int mlx5_is_secondary(void);  int priv_get_ifname(const struct priv *, char
> (*)[IF_NAMESIZE]);  int priv_ifreq(const struct priv *, int req, struct ifreq *); -
> int priv_is_ib_cntr(const char *); -int priv_get_cntr_sysfs(struct priv *, const
> char *, uint64_t *);  int priv_get_num_vfs(struct priv *, uint16_t *);  int
> priv_get_mtu(struct priv *, uint16_t *);  int priv_set_flags(struct priv *,
> unsigned int, unsigned int); diff --git a/drivers/net/mlx5/mlx5_ethdev.c
> b/drivers/net/mlx5/mlx5_ethdev.c index 666507691..b73cb53df 100644
> --- a/drivers/net/mlx5/mlx5_ethdev.c
> +++ b/drivers/net/mlx5/mlx5_ethdev.c
> @@ -7,6 +7,7 @@
> 
>  #include <stddef.h>
>  #include <assert.h>
> +#include <inttypes.h>
>  #include <unistd.h>
>  #include <stdint.h>
>  #include <stdio.h>
> @@ -173,181 +174,6 @@ priv_get_ifname(const struct priv *priv, char
> (*ifname)[IF_NAMESIZE])  }
> 
>  /**
> - * Check if the counter is located on ib counters file.
> - *
> - * @param[in] cntr
> - *   Counter name.
> - *
> - * @return
> - *   1 if counter is located on ib counters file , 0 otherwise.
> - */
> -int
> -priv_is_ib_cntr(const char *cntr)
> -{
> -	if (!strcmp(cntr, "out_of_buffer"))
> -		return 1;
> -	return 0;
> -}
> -
> -/**
> - * Read from sysfs entry.
> - *
> - * @param[in] priv
> - *   Pointer to private structure.
> - * @param[in] entry
> - *   Entry name relative to sysfs path.
> - * @param[out] buf
> - *   Data output buffer.
> - * @param size
> - *   Buffer size.
> - *
> - * @return
> - *   0 on success, -1 on failure and errno is set.
> - */
> -static int
> -priv_sysfs_read(const struct priv *priv, const char *entry,
> -		char *buf, size_t size)
> -{
> -	char ifname[IF_NAMESIZE];
> -	FILE *file;
> -	int ret;
> -	int err;
> -
> -	if (priv_get_ifname(priv, &ifname))
> -		return -1;
> -
> -	if (priv_is_ib_cntr(entry)) {
> -		MKSTR(path, "%s/ports/1/hw_counters/%s",
> -		      priv->ibdev_path, entry);
> -		file = fopen(path, "rb");
> -	} else {
> -		MKSTR(path, "%s/device/net/%s/%s",
> -		      priv->ibdev_path, ifname, entry);
> -		file = fopen(path, "rb");
> -	}
> -	if (file == NULL)
> -		return -1;
> -	ret = fread(buf, 1, size, file);
> -	err = errno;
> -	if (((size_t)ret < size) && (ferror(file)))
> -		ret = -1;
> -	else
> -		ret = size;
> -	fclose(file);
> -	errno = err;
> -	return ret;
> -}
> -
> -/**
> - * Write to sysfs entry.
> - *
> - * @param[in] priv
> - *   Pointer to private structure.
> - * @param[in] entry
> - *   Entry name relative to sysfs path.
> - * @param[in] buf
> - *   Data buffer.
> - * @param size
> - *   Buffer size.
> - *
> - * @return
> - *   0 on success, -1 on failure and errno is set.
> - */
> -static int
> -priv_sysfs_write(const struct priv *priv, const char *entry,
> -		 char *buf, size_t size)
> -{
> -	char ifname[IF_NAMESIZE];
> -	FILE *file;
> -	int ret;
> -	int err;
> -
> -	if (priv_get_ifname(priv, &ifname))
> -		return -1;
> -
> -	MKSTR(path, "%s/device/net/%s/%s", priv->ibdev_path, ifname,
> entry);
> -
> -	file = fopen(path, "wb");
> -	if (file == NULL)
> -		return -1;
> -	ret = fwrite(buf, 1, size, file);
> -	err = errno;
> -	if (((size_t)ret < size) || (ferror(file)))
> -		ret = -1;
> -	else
> -		ret = size;
> -	fclose(file);
> -	errno = err;
> -	return ret;
> -}
> -
> -/**
> - * Get unsigned long sysfs property.
> - *
> - * @param priv
> - *   Pointer to private structure.
> - * @param[in] name
> - *   Entry name relative to sysfs path.
> - * @param[out] value
> - *   Value output buffer.
> - *
> - * @return
> - *   0 on success, -1 on failure and errno is set.
> - */
> -static int
> -priv_get_sysfs_ulong(struct priv *priv, const char *name, unsigned long
> *value) -{
> -	int ret;
> -	unsigned long value_ret;
> -	char value_str[32];
> -
> -	ret = priv_sysfs_read(priv, name, value_str, (sizeof(value_str) - 1));
> -	if (ret == -1) {
> -		DEBUG("cannot read %s value from sysfs: %s",
> -		      name, strerror(errno));
> -		return -1;
> -	}
> -	value_str[ret] = '\0';
> -	errno = 0;
> -	value_ret = strtoul(value_str, NULL, 0);
> -	if (errno) {
> -		DEBUG("invalid %s value `%s': %s", name, value_str,
> -		      strerror(errno));
> -		return -1;
> -	}
> -	*value = value_ret;
> -	return 0;
> -}
> -
> -/**
> - * Set unsigned long sysfs property.
> - *
> - * @param priv
> - *   Pointer to private structure.
> - * @param[in] name
> - *   Entry name relative to sysfs path.
> - * @param value
> - *   Value to set.
> - *
> - * @return
> - *   0 on success, -1 on failure and errno is set.
> - */
> -static int
> -priv_set_sysfs_ulong(struct priv *priv, const char *name, unsigned long
> value) -{
> -	int ret;
> -	MKSTR(value_str, "%lu", value);
> -
> -	ret = priv_sysfs_write(priv, name, value_str, (sizeof(value_str) - 1));
> -	if (ret == -1) {
> -		DEBUG("cannot write %s `%s' (%lu) to sysfs: %s",
> -		      name, value_str, value, strerror(errno));
> -		return -1;
> -	}
> -	return 0;
> -}
> -
> -/**
>   * Perform ifreq ioctl() on associated Ethernet device.
>   *
>   * @param[in] priv
> @@ -390,20 +216,25 @@ priv_get_num_vfs(struct priv *priv, uint16_t
> *num_vfs)  {
>  	/* The sysfs entry name depends on the operating system. */
>  	const char **name = (const char *[]){
> -		"device/sriov_numvfs",
> -		"device/mlx5_num_vfs",
> +		"sriov_numvfs",
> +		"mlx5_num_vfs",
>  		NULL,
>  	};
> -	int ret;
> 
>  	do {
> -		unsigned long ulong_num_vfs;
> +		int n;
> +		FILE *file;
> +		MKSTR(path, "%s/device/%s", priv->ibdev_path, *name);
> 
> -		ret = priv_get_sysfs_ulong(priv, *name, &ulong_num_vfs);
> -		if (!ret)
> -			*num_vfs = ulong_num_vfs;
> -	} while (*(++name) && ret);
> -	return ret;
> +		file = fopen(path, "rb");
> +		if (!file)
> +			continue;
> +		n = fscanf(file, "%" SCNu16, num_vfs);
> +		fclose(file);
> +		if (n == 1)
> +			return 0;
> +	} while (*(++name));
> +	return -1;
>  }
> 
>  /**
> @@ -420,35 +251,12 @@ priv_get_num_vfs(struct priv *priv, uint16_t
> *num_vfs)  int  priv_get_mtu(struct priv *priv, uint16_t *mtu)  {
> -	unsigned long ulong_mtu;
> +	struct ifreq request;
> +	int ret = priv_ifreq(priv, SIOCGIFMTU, &request);
> 
> -	if (priv_get_sysfs_ulong(priv, "mtu", &ulong_mtu) == -1)
> -		return -1;
> -	*mtu = ulong_mtu;
> -	return 0;
> -}
> -
> -/**
> - * Read device counter from sysfs.
> - *
> - * @param priv
> - *   Pointer to private structure.
> - * @param name
> - *   Counter name.
> - * @param[out] cntr
> - *   Counter output buffer.
> - *
> - * @return
> - *   0 on success, -1 on failure and errno is set.
> - */
> -int
> -priv_get_cntr_sysfs(struct priv *priv, const char *name, uint64_t *cntr) -{
> -	unsigned long ulong_ctr;
> -
> -	if (priv_get_sysfs_ulong(priv, name, &ulong_ctr) == -1)
> -		return -1;
> -	*cntr = ulong_ctr;
> +	if (ret)
> +		return ret;
> +	*mtu = request.ifr_mtu;
>  	return 0;
>  }
> 
> @@ -466,15 +274,9 @@ priv_get_cntr_sysfs(struct priv *priv, const char
> *name, uint64_t *cntr)  static int  priv_set_mtu(struct priv *priv, uint16_t
> mtu)  {
> -	uint16_t new_mtu;
> +	struct ifreq request = { .ifr_mtu = mtu, };
> 
> -	if (priv_set_sysfs_ulong(priv, "mtu", mtu) ||
> -	    priv_get_mtu(priv, &new_mtu))
> -		return -1;
> -	if (new_mtu == mtu)
> -		return 0;
> -	errno = EINVAL;
> -	return -1;
> +	return priv_ifreq(priv, SIOCSIFMTU, &request);
>  }
> 
>  /**
> @@ -493,13 +295,14 @@ priv_set_mtu(struct priv *priv, uint16_t mtu)  int
> priv_set_flags(struct priv *priv, unsigned int keep, unsigned int flags)  {
> -	unsigned long tmp;
> +	struct ifreq request;
> +	int ret = priv_ifreq(priv, SIOCGIFFLAGS, &request);
> 
> -	if (priv_get_sysfs_ulong(priv, "flags", &tmp) == -1)
> -		return -1;
> -	tmp &= keep;
> -	tmp |= (flags & (~keep));
> -	return priv_set_sysfs_ulong(priv, "flags", tmp);
> +	if (ret)
> +		return ret;
> +	request.ifr_flags &= keep;
> +	request.ifr_flags |= flags & ~keep;
> +	return priv_ifreq(priv, SIOCSIFFLAGS, &request);
>  }
> 
>  /**
> diff --git a/drivers/net/mlx5/mlx5_stats.c b/drivers/net/mlx5/mlx5_stats.c
> index 378472a70..eb9c65dcc 100644
> --- a/drivers/net/mlx5/mlx5_stats.c
> +++ b/drivers/net/mlx5/mlx5_stats.c
> @@ -3,8 +3,11 @@
>   * Copyright 2015 Mellanox.
>   */
> 
> +#include <inttypes.h>
>  #include <linux/sockios.h>
>  #include <linux/ethtool.h>
> +#include <stdint.h>
> +#include <stdio.h>
> 
>  #include <rte_ethdev_driver.h>
>  #include <rte_common.h>
> @@ -19,6 +22,7 @@ struct mlx5_counter_ctrl {
>  	char dpdk_name[RTE_ETH_XSTATS_NAME_SIZE];
>  	/* Name of the counter on the device table. */
>  	char ctr_name[RTE_ETH_XSTATS_NAME_SIZE];
> +	uint32_t ib:1; /**< Nonzero for IB counters. */
>  };
> 
>  static const struct mlx5_counter_ctrl mlx5_counters_init[] = { @@ -93,6
> +97,7 @@ static const struct mlx5_counter_ctrl mlx5_counters_init[] = {
>  	{
>  		.dpdk_name = "rx_out_of_buffer",
>  		.ctr_name = "out_of_buffer",
> +		.ib = 1,
>  	},
>  	{
>  		.dpdk_name = "tx_packets_phy",
> @@ -143,13 +148,24 @@ priv_read_dev_counters(struct priv *priv, uint64_t
> *stats)
>  		return -1;
>  	}
>  	for (i = 0; i != xstats_n; ++i) {
> -		if (priv_is_ib_cntr(mlx5_counters_init[i].ctr_name))
> -			priv_get_cntr_sysfs(priv,
> -					    mlx5_counters_init[i].ctr_name,
> -					    &stats[i]);
> -		else
> +		if (mlx5_counters_init[i].ib) {
> +			FILE *file;
> +			MKSTR(path, "%s/ports/1/hw_counters/%s",
> +			      priv->ibdev_path,
> +			      mlx5_counters_init[i].ctr_name);
> +
> +			file = fopen(path, "rb");
> +			if (file) {
> +				int n = fscanf(file, "%" SCNu64, &stats[i]);
> +
> +				fclose(file);
> +				if (n != 1)
> +					stats[i] = 0;
> +			}
> +		} else {
>  			stats[i] = (uint64_t)
>  				et_stats->data[xstats_ctrl-
> >dev_table_idx[i]];
> +		}
>  	}
>  	return 0;
>  }
> @@ -232,7 +248,7 @@ priv_xstats_init(struct priv *priv)
>  		}
>  	}
>  	for (j = 0; j != xstats_n; ++j) {
> -		if (priv_is_ib_cntr(mlx5_counters_init[j].ctr_name))
> +		if (mlx5_counters_init[j].ib)
>  			continue;
>  		if (xstats_ctrl->dev_table_idx[j] >= dev_stats_n) {
>  			WARN("counter \"%s\" is not recognized",
> --
> 2.11.0


More information about the dev mailing list