[dpdk-dev] [PATCH] net/mlx5: restrict workaround of gcc AVX512F bug

Stephen Hemminger stephen at networkplumber.org
Tue Nov 13 01:14:30 CET 2018


The additional annotations clutter the code.
How big a performance hit is it to disable for whole driver? Or just use
memcpy instead of rte_memcpy?

On Mon, Nov 12, 2018, 4:01 PM Thomas Monjalon <thomas at monjalon.net wrote:

> A bug was found when the inline function mlx5_tx_complete()
> is optimized with AVX512F instructions. It corrupts an offset
> in the instructions vmovdqu8 of the AVX2 version of rte_mov128(),
> used in rte_memcpy(), which is called in rte_mempool_put_bulk().
>
> All the above functions are inline. So the workaround is
> to disable AVX512F optimization for the functions calling the
> top-level function of this call stack, i.e. mlx5_tx_complete().
> All GCC versions supporting AVX512 are supposed to be affected.
>
> The root cause is not identified yet. It may be thought that
> more related bugs may happen in other functions.
> That's why the initial workaround was to disable AVX512F globally.
> This patch takes the risk of applying the workaround only for the
> functions known to be affected, in order to preserve the optimization
> everywhere else.
>
> Bugzilla ID: 97
> Fixes: 8d07c82b239f ("mk: disable gcc AVX512F support")
>
> Signed-off-by: Thomas Monjalon <thomas at monjalon.net>
> ---
>  drivers/net/mlx5/mlx5_rxtx.c         | 10 +++++-----
>  drivers/net/mlx5/mlx5_rxtx_vec.c     |  4 ++--
>  drivers/net/mlx5/mlx5_rxtx_vec_sse.h |  2 +-
>  drivers/net/mlx5/mlx5_utils.h        | 11 +++++++++++
>  mk/rte.cpuflags.mk                   |  5 -----
>  5 files changed, 19 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
> index 6eceea5fe..c08f63299 100644
> --- a/drivers/net/mlx5/mlx5_rxtx.c
> +++ b/drivers/net/mlx5/mlx5_rxtx.c
> @@ -403,7 +403,7 @@ inline_tso(struct mlx5_txq_data *txq, struct rte_mbuf
> *buf,
>   * @return
>   *   The status of the tx descriptor.
>   */
> -int
> +int MLX5_WORKAROUND_GCC_BUG_AVX512F
>  mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset)
>  {
>         struct mlx5_txq_data *txq = tx_queue;
> @@ -537,7 +537,7 @@ mlx5_rx_queue_count(struct rte_eth_dev *dev, uint16_t
> rx_queue_id)
>   * @return
>   *   Number of packets successfully transmitted (<= pkts_n).
>   */
> -uint16_t
> +uint16_t MLX5_WORKAROUND_GCC_BUG_AVX512F
>  mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
>  {
>         struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
> @@ -978,7 +978,7 @@ mlx5_mpw_close(struct mlx5_txq_data *txq, struct
> mlx5_mpw *mpw)
>   * @return
>   *   Number of packets successfully transmitted (<= pkts_n).
>   */
> -uint16_t
> +uint16_t MLX5_WORKAROUND_GCC_BUG_AVX512F
>  mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
>  {
>         struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
> @@ -1196,7 +1196,7 @@ mlx5_mpw_inline_close(struct mlx5_txq_data *txq,
> struct mlx5_mpw *mpw)
>   * @return
>   *   Number of packets successfully transmitted (<= pkts_n).
>   */
> -uint16_t
> +uint16_t MLX5_WORKAROUND_GCC_BUG_AVX512F
>  mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
>                          uint16_t pkts_n)
>  {
> @@ -1725,7 +1725,7 @@ txq_burst_empw(struct mlx5_txq_data *txq, struct
> rte_mbuf **pkts,
>   * @return
>   *   Number of packets successfully transmitted (<= pkts_n).
>   */
> -uint16_t
> +uint16_t MLX5_WORKAROUND_GCC_BUG_AVX512F
>  mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t
> pkts_n)
>  {
>         struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
> diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.c
> b/drivers/net/mlx5/mlx5_rxtx_vec.c
> index 340292add..da9f30f16 100644
> --- a/drivers/net/mlx5/mlx5_rxtx_vec.c
> +++ b/drivers/net/mlx5/mlx5_rxtx_vec.c
> @@ -104,7 +104,7 @@ txq_calc_offload(struct rte_mbuf **pkts, uint16_t
> pkts_n, uint8_t *cs_flags,
>   * @return
>   *   Number of packets successfully transmitted (<= pkts_n).
>   */
> -uint16_t
> +uint16_t MLX5_WORKAROUND_GCC_BUG_AVX512F
>  mlx5_tx_burst_raw_vec(void *dpdk_txq, struct rte_mbuf **pkts,
>                       uint16_t pkts_n)
>  {
> @@ -137,7 +137,7 @@ mlx5_tx_burst_raw_vec(void *dpdk_txq, struct rte_mbuf
> **pkts,
>   * @return
>   *   Number of packets successfully transmitted (<= pkts_n).
>   */
> -uint16_t
> +uint16_t MLX5_WORKAROUND_GCC_BUG_AVX512F
>  mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
>  {
>         struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
> diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
> b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
> index e0f95f923..399fd39c5 100644
> --- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
> +++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
> @@ -89,7 +89,7 @@ txq_wr_dseg_v(struct mlx5_txq_data *txq, __m128i *dseg,
>   * @return
>   *   Number of packets successfully transmitted (<= pkts_n).
>   */
> -static uint16_t
> +static uint16_t MLX5_WORKAROUND_GCC_BUG_AVX512F
>  txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
>               uint16_t pkts_n)
>  {
> diff --git a/drivers/net/mlx5/mlx5_utils.h b/drivers/net/mlx5/mlx5_utils.h
> index 886f60e61..10503f3f0 100644
> --- a/drivers/net/mlx5/mlx5_utils.h
> +++ b/drivers/net/mlx5/mlx5_utils.h
> @@ -15,6 +15,17 @@
>
>  #include "mlx5_defs.h"
>
> +/*
> + * GCC bug workaround for rte_memcpy broken when optimized for AVX512.
> + * Details are in https://bugs.dpdk.org/show_bug.cgi?id=97
> + * AVX512F is disabled for affected functions (calling mlx5_tx_complete).
> + */
> +#if defined(__clang__) || defined(__INTEL_COMPILER)
> +#define MLX5_WORKAROUND_GCC_BUG_AVX512F
> +#else
> +#define MLX5_WORKAROUND_GCC_BUG_AVX512F
> __attribute__((target("no-avx512f")))
> +#endif
> +
>  /* Bit-field manipulation. */
>  #define BITFIELD_DECLARE(bf, type, size) \
>         type bf[(((size_t)(size) / (sizeof(type) * CHAR_BIT)) + \
> diff --git a/mk/rte.cpuflags.mk b/mk/rte.cpuflags.mk
> index c3291b17a..43ed84155 100644
> --- a/mk/rte.cpuflags.mk
> +++ b/mk/rte.cpuflags.mk
> @@ -68,11 +68,6 @@ endif
>  ifneq ($(filter $(AUTO_CPUFLAGS),__AVX512F__),)
>  ifeq ($(CONFIG_RTE_ENABLE_AVX512),y)
>  CPUFLAGS += AVX512F
> -else
> -# disable AVX512F support of gcc as a workaround for Bug 97
> -ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
> -MACHINE_CFLAGS += -mno-avx512f
> -endif
>  endif
>  endif
>
> --
> 2.19.0
>
>


More information about the dev mailing list