[v2,2/2] net/mlx5: make vectorized Tx threshold configurable
Checks
Commit Message
Add txqs_max_vec parameter to configure the maximum number of Tx queues to
enable vectorized Tx. And its default value is set according to the
architecture and device type.
Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
doc/guides/nics/mlx5.rst | 16 +++++++++++++++-
drivers/net/mlx5/mlx5.c | 16 ++++++++++++++++
drivers/net/mlx5/mlx5.h | 1 +
drivers/net/mlx5/mlx5_defs.h | 6 ++++--
drivers/net/mlx5/mlx5_rxtx_vec.c | 2 +-
5 files changed, 37 insertions(+), 4 deletions(-)
Comments
Tuesday, October 30, 2018 9:49 AM, Yongseok Koh:
> Subject: [PATCH v2 2/2] net/mlx5: make vectorized Tx threshold configurable
>
> Add txqs_max_vec parameter to configure the maximum number of Tx
> queues to enable vectorized Tx. And its default value is set according to the
> architecture and device type.
>
> Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
> ---
> doc/guides/nics/mlx5.rst | 16 +++++++++++++++-
> drivers/net/mlx5/mlx5.c | 16 ++++++++++++++++
> drivers/net/mlx5/mlx5.h | 1 +
> drivers/net/mlx5/mlx5_defs.h | 6 ++++--
> drivers/net/mlx5/mlx5_rxtx_vec.c | 2 +-
> 5 files changed, 37 insertions(+), 4 deletions(-)
>
> diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst index
> 1dc32829ff..7379cf39b0 100644
> --- a/doc/guides/nics/mlx5.rst
> +++ b/doc/guides/nics/mlx5.rst
> @@ -338,6 +338,20 @@ Run-time configuration
>
> - Set to 8 by default.
>
> +- ``txqs_max_vec`` parameter [int]
> +
> + Enable vectorized Tx only when the number of TX queues is less than
> + or equal to this value. Effective only when ``tx_vec_en`` is enabled.
> +
> + On ConnectX-5:
> +
> + - Set to 8 by default on ARMv8.
> + - Set to 4 by default otherwise.
> +
> + On Bluefield
> +
> + - Set to 16 by default.
> +
> - ``txq_mpw_en`` parameter [int]
>
> A nonzero value enables multi-packet send (MPS) for ConnectX-4 Lx and
> @@ -383,7 +397,7 @@ Run-time configuration
> - ``tx_vec_en`` parameter [int]
>
> A nonzero value enables Tx vector on ConnectX-5 and Bluefield NICs if the
> number of
> - global Tx queues on the port is lesser than MLX5_VPMD_MIN_TXQS.
> + global Tx queues on the port is less than ``txqs_max_vec``.
>
> This option cannot be used with certain offloads such as
> ``DEV_TX_OFFLOAD_TCP_TSO,
> DEV_TX_OFFLOAD_VXLAN_TNL_TSO, DEV_TX_OFFLOAD_GRE_TNL_TSO,
> DEV_TX_OFFLOAD_VLAN_INSERT``.
> diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index
> 6fa50ba1b1..d575469f9b 100644
> --- a/drivers/net/mlx5/mlx5.c
> +++ b/drivers/net/mlx5/mlx5.c
> @@ -75,6 +75,12 @@
> */
> #define MLX5_TXQS_MIN_INLINE "txqs_min_inline"
>
> +/*
> + * Device parameter to configure the number of TX queues threshold for
> + * enabling vectorized Tx.
> + */
> +#define MLX5_TXQS_MAX_VEC "txqs_max_vec"
> +
> /* Device parameter to enable multi-packet send WQEs. */ #define
> MLX5_TXQ_MPW_EN "txq_mpw_en"
>
> @@ -496,6 +502,8 @@ mlx5_args_check(const char *key, const char *val,
> void *opaque)
> config->txq_inline = tmp;
> } else if (strcmp(MLX5_TXQS_MIN_INLINE, key) == 0) {
> config->txqs_inline = tmp;
> + } else if (strcmp(MLX5_TXQS_MAX_VEC, key) == 0) {
> + config->txqs_vec = tmp;
> } else if (strcmp(MLX5_TXQ_MPW_EN, key) == 0) {
> config->mps = !!tmp;
> } else if (strcmp(MLX5_TXQ_MPW_HDR_DSEG_EN, key) == 0) { @@ -
> 543,6 +551,7 @@ mlx5_args(struct mlx5_dev_config *config, struct
> rte_devargs *devargs)
> MLX5_RXQS_MIN_MPRQ,
> MLX5_TXQ_INLINE,
> MLX5_TXQS_MIN_INLINE,
> + MLX5_TXQS_MAX_VEC,
> MLX5_TXQ_MPW_EN,
> MLX5_TXQ_MPW_HDR_DSEG_EN,
> MLX5_TXQ_MAX_INLINE_LEN,
> @@ -1443,6 +1452,8 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv
> __rte_unused,
> };
> /* Device speicific configuration. */
> switch (pci_dev->id.device_id) {
> + case PCI_DEVICE_ID_MELLANOX_CONNECTX5BF:
> + dev_config.txqs_vec =
> MLX5_VPMD_MAX_TXQS_BLUEFIELD;
Missing break?
> case PCI_DEVICE_ID_MELLANOX_CONNECTX4VF:
> case PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF:
> case PCI_DEVICE_ID_MELLANOX_CONNECTX5VF:
What about all the VFs? They don't have a default value, will it lead the vec code not to be used for VFs at all?
I think VFs decision should be like the PF. If the Arch is ARMv8 then 8 otherwise 4.
Pay attention PCI_DEVICE_ID_MELLANOX_CONNECTX5BFVF is missing, and its default value should be like above phrase, because the VF port is not on the SoC rather on the host side.
> @@ -1450,6 +1461,11 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv
> __rte_unused,
> dev_config.vf = 1;
> break;
> default:
> +#if defined(RTE_ARCH_ARM64)
> + dev_config.txqs_vec = MLX5_VPMD_MAX_TXQS_ARM64;
> #else
> + dev_config.txqs_vec = MLX5_VPMD_MAX_TXQS; #endif
> break;
> }
> for (i = 0; i != n; ++i) {
> diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index
> 24a3415c8d..0b4418b80b 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -140,6 +140,7 @@ struct mlx5_dev_config {
> unsigned int ind_table_max_size; /* Maximum indirection table size.
> */
> int txq_inline; /* Maximum packet size for inlining. */
> int txqs_inline; /* Queue number threshold for inlining. */
> + int txqs_vec; /* Queue number threshold for vectorized Tx. */
> int inline_max_packet_sz; /* Max packet size for inlining. */ };
>
> diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h
> index f2a1679511..221ca188bb 100644
> --- a/drivers/net/mlx5/mlx5_defs.h
> +++ b/drivers/net/mlx5/mlx5_defs.h
> @@ -60,8 +60,10 @@
> /* Maximum Packet headers size (L2+L3+L4) for TSO. */ #define
> MLX5_MAX_TSO_HEADER 192
>
> -/* Default minimum number of Tx queues for vectorized Tx. */ -#define
> MLX5_VPMD_MIN_TXQS 4
> +/* Default maximum number of Tx queues for vectorized Tx. */ #define
> +MLX5_VPMD_MAX_TXQS 4 #define MLX5_VPMD_MAX_TXQS_ARM64 8
> #define
> +MLX5_VPMD_MAX_TXQS_BLUEFIELD 16
>
> /* Threshold of buffer replenishment for vectorized Rx. */ #define
> MLX5_VPMD_RXQ_RPLNSH_THRESH(n) \ diff --git
> a/drivers/net/mlx5/mlx5_rxtx_vec.c b/drivers/net/mlx5/mlx5_rxtx_vec.c
> index 1453f4ff63..340292addf 100644
> --- a/drivers/net/mlx5/mlx5_rxtx_vec.c
> +++ b/drivers/net/mlx5/mlx5_rxtx_vec.c
> @@ -277,7 +277,7 @@ mlx5_check_vec_tx_support(struct rte_eth_dev
> *dev)
> uint64_t offloads = dev->data->dev_conf.txmode.offloads;
>
> if (!priv->config.tx_vec_en ||
> - priv->txqs_n > MLX5_VPMD_MIN_TXQS ||
> + priv->txqs_n > (unsigned int)priv->config.txqs_vec ||
> priv->config.mps != MLX5_MPW_ENHANCED ||
> offloads & ~MLX5_VEC_TX_OFFLOAD_CAP)
> return -ENOTSUP;
> --
> 2.11.0
@@ -338,6 +338,20 @@ Run-time configuration
- Set to 8 by default.
+- ``txqs_max_vec`` parameter [int]
+
+ Enable vectorized Tx only when the number of TX queues is less than or
+ equal to this value. Effective only when ``tx_vec_en`` is enabled.
+
+ On ConnectX-5:
+
+ - Set to 8 by default on ARMv8.
+ - Set to 4 by default otherwise.
+
+ On Bluefield
+
+ - Set to 16 by default.
+
- ``txq_mpw_en`` parameter [int]
A nonzero value enables multi-packet send (MPS) for ConnectX-4 Lx and
@@ -383,7 +397,7 @@ Run-time configuration
- ``tx_vec_en`` parameter [int]
A nonzero value enables Tx vector on ConnectX-5 and Bluefield NICs if the number of
- global Tx queues on the port is lesser than MLX5_VPMD_MIN_TXQS.
+ global Tx queues on the port is less than ``txqs_max_vec``.
This option cannot be used with certain offloads such as ``DEV_TX_OFFLOAD_TCP_TSO,
DEV_TX_OFFLOAD_VXLAN_TNL_TSO, DEV_TX_OFFLOAD_GRE_TNL_TSO, DEV_TX_OFFLOAD_VLAN_INSERT``.
@@ -75,6 +75,12 @@
*/
#define MLX5_TXQS_MIN_INLINE "txqs_min_inline"
+/*
+ * Device parameter to configure the number of TX queues threshold for
+ * enabling vectorized Tx.
+ */
+#define MLX5_TXQS_MAX_VEC "txqs_max_vec"
+
/* Device parameter to enable multi-packet send WQEs. */
#define MLX5_TXQ_MPW_EN "txq_mpw_en"
@@ -496,6 +502,8 @@ mlx5_args_check(const char *key, const char *val, void *opaque)
config->txq_inline = tmp;
} else if (strcmp(MLX5_TXQS_MIN_INLINE, key) == 0) {
config->txqs_inline = tmp;
+ } else if (strcmp(MLX5_TXQS_MAX_VEC, key) == 0) {
+ config->txqs_vec = tmp;
} else if (strcmp(MLX5_TXQ_MPW_EN, key) == 0) {
config->mps = !!tmp;
} else if (strcmp(MLX5_TXQ_MPW_HDR_DSEG_EN, key) == 0) {
@@ -543,6 +551,7 @@ mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs)
MLX5_RXQS_MIN_MPRQ,
MLX5_TXQ_INLINE,
MLX5_TXQS_MIN_INLINE,
+ MLX5_TXQS_MAX_VEC,
MLX5_TXQ_MPW_EN,
MLX5_TXQ_MPW_HDR_DSEG_EN,
MLX5_TXQ_MAX_INLINE_LEN,
@@ -1443,6 +1452,8 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
};
/* Device speicific configuration. */
switch (pci_dev->id.device_id) {
+ case PCI_DEVICE_ID_MELLANOX_CONNECTX5BF:
+ dev_config.txqs_vec = MLX5_VPMD_MAX_TXQS_BLUEFIELD;
case PCI_DEVICE_ID_MELLANOX_CONNECTX4VF:
case PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF:
case PCI_DEVICE_ID_MELLANOX_CONNECTX5VF:
@@ -1450,6 +1461,11 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
dev_config.vf = 1;
break;
default:
+#if defined(RTE_ARCH_ARM64)
+ dev_config.txqs_vec = MLX5_VPMD_MAX_TXQS_ARM64;
+#else
+ dev_config.txqs_vec = MLX5_VPMD_MAX_TXQS;
+#endif
break;
}
for (i = 0; i != n; ++i) {
@@ -140,6 +140,7 @@ struct mlx5_dev_config {
unsigned int ind_table_max_size; /* Maximum indirection table size. */
int txq_inline; /* Maximum packet size for inlining. */
int txqs_inline; /* Queue number threshold for inlining. */
+ int txqs_vec; /* Queue number threshold for vectorized Tx. */
int inline_max_packet_sz; /* Max packet size for inlining. */
};
@@ -60,8 +60,10 @@
/* Maximum Packet headers size (L2+L3+L4) for TSO. */
#define MLX5_MAX_TSO_HEADER 192
-/* Default minimum number of Tx queues for vectorized Tx. */
-#define MLX5_VPMD_MIN_TXQS 4
+/* Default maximum number of Tx queues for vectorized Tx. */
+#define MLX5_VPMD_MAX_TXQS 4
+#define MLX5_VPMD_MAX_TXQS_ARM64 8
+#define MLX5_VPMD_MAX_TXQS_BLUEFIELD 16
/* Threshold of buffer replenishment for vectorized Rx. */
#define MLX5_VPMD_RXQ_RPLNSH_THRESH(n) \
@@ -277,7 +277,7 @@ mlx5_check_vec_tx_support(struct rte_eth_dev *dev)
uint64_t offloads = dev->data->dev_conf.txmode.offloads;
if (!priv->config.tx_vec_en ||
- priv->txqs_n > MLX5_VPMD_MIN_TXQS ||
+ priv->txqs_n > (unsigned int)priv->config.txqs_vec ||
priv->config.mps != MLX5_MPW_ENHANCED ||
offloads & ~MLX5_VEC_TX_OFFLOAD_CAP)
return -ENOTSUP;