[v1,3/3] net/mlx5: support flow counters using devx

Message ID 1545748697-3385-4-git-send-email-motih@mellanox.com (mailing list archive)
State Superseded, archived
Delegated to: Shahaf Shuler
Headers
Series support flow counters using devx |

Checks

Context Check Description
ci/Intel-compilation success Compilation OK

Commit Message

Moti Haimovsky Dec. 25, 2018, 2:38 p.m. UTC
  This commit adds counters support when creating flows via direct
verbs. The implementation uses devx interface in order to create
query and delete the counters.
This support requires MLNX_OFED_LINUX-4.5-0.1.0.1 installation.

Signed-off-by: Moti Haimovsky <motih@mellanox.com>
---
 drivers/net/mlx5/Makefile         |   6 +
 drivers/net/mlx5/meson.build      |   5 +
 drivers/net/mlx5/mlx5.c           |  17 ++-
 drivers/net/mlx5/mlx5.h           |   1 +
 drivers/net/mlx5/mlx5_devx_cmds.c | 117 +++++++++++++++++++
 drivers/net/mlx5/mlx5_flow.h      |  12 +-
 drivers/net/mlx5/mlx5_flow_dv.c   | 232 ++++++++++++++++++++++++++++++++++++--
 drivers/net/mlx5/mlx5_glue.c      |   1 +
 drivers/net/mlx5/mlx5_prm.h       |  86 ++++++++++++++
 9 files changed, 461 insertions(+), 16 deletions(-)
 create mode 100644 drivers/net/mlx5/mlx5_devx_cmds.c
  

Comments

Shahaf Shuler Dec. 27, 2018, 8:15 a.m. UTC | #1
Tuesday, December 25, 2018 4:39 PM, Mordechay Haimovsky:
> Subject: [dpdk-dev] [PATCH v1 3/3] net/mlx5: support flow counters using devx
> 
> This commit adds counters support when creating flows via direct verbs. The
> implementation uses devx interface in order to create query and delete the
> counters.
> This support requires MLNX_OFED_LINUX-4.5-0.1.0.1 installation.
> 
> Signed-off-by: Moti Haimovsky <motih@mellanox.com>
> ---
>  drivers/net/mlx5/Makefile         |   6 +
>  drivers/net/mlx5/meson.build      |   5 +
>  drivers/net/mlx5/mlx5.c           |  17 ++-
>  drivers/net/mlx5/mlx5.h           |   1 +
>  drivers/net/mlx5/mlx5_devx_cmds.c | 117 +++++++++++++++++++
>  drivers/net/mlx5/mlx5_flow.h      |  12 +-
>  drivers/net/mlx5/mlx5_flow_dv.c   | 232
> ++++++++++++++++++++++++++++++++++++--
>  drivers/net/mlx5/mlx5_glue.c      |   1 +
>  drivers/net/mlx5/mlx5_prm.h       |  86 ++++++++++++++
>  9 files changed, 461 insertions(+), 16 deletions(-)  create mode 100644
> drivers/net/mlx5/mlx5_devx_cmds.c
> 
> diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile index
> 58e2d15..bd96706 100644
> --- a/drivers/net/mlx5/Makefile
> +++ b/drivers/net/mlx5/Makefile
> @@ -36,6 +36,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) +=
> mlx5_flow_tcf.c
>  SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow_verbs.c
>  SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_socket.c
>  SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_nl.c
> +SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_devx_cmds.c
> 
>  ifeq ($(CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS),y)
>  INSTALL-$(CONFIG_RTE_LIBRTE_MLX5_PMD)-lib += $(LIB_GLUE) @@ -153,6
> +154,11 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
>  		func mlx5dv_devx_obj_create \
>  		$(AUTOCONF_OUTPUT)
>  	$Q sh -- '$<' '$@' \
> +		HAVE_IBV_FLOW_DEVX_COUNTERS \
> +		infiniband/mlx5dv.h \
> +		enum MLX5DV_FLOW_ACTION_COUNTER_DEVX \
> +		$(AUTOCONF_OUTPUT)
> +	$Q sh -- '$<' '$@' \
>  		HAVE_ETHTOOL_LINK_MODE_25G \
>  		/usr/include/linux/ethtool.h \
>  		enum ETHTOOL_LINK_MODE_25000baseCR_Full_BIT \ diff --
> git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build index
> e323c3a..9a5077d 100644
> --- a/drivers/net/mlx5/meson.build
> +++ b/drivers/net/mlx5/meson.build
> @@ -46,6 +46,7 @@ if build
>  		'mlx5_trigger.c',
>  		'mlx5_txq.c',
>  		'mlx5_vlan.c',
> +		'mlx5_devx_cmds.c',
>  	)
>  	if dpdk_conf.has('RTE_ARCH_X86_64') or
> dpdk_conf.has('RTE_ARCH_ARM64')
>  		sources += files('mlx5_rxtx_vec.c')
> @@ -100,6 +101,10 @@ if build
>  		'MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD' ],
>  		[ 'HAVE_IBV_FLOW_DV_SUPPORT', 'infiniband/mlx5dv.h',
>  		'mlx5dv_create_flow_action_packet_reformat' ],
> +		[ 'HAVE_IBV_FLOW_DEVX_COUNTERS', 'infiniband/mlx5dv.h',
> +		'MLX5DV_FLOW_ACTION_COUNTER_DEVX' ],
> +		[ 'HAVE_IBV_DEVX_OBJ', 'infiniband/mlx5dv.h',
> +		'MLX5DV_CONTEXT_FLAGS_DEVX' ],

I don't understand,
On the second patch of this series you detect the HVA_IBV_DEVX_OBJ according to:
+		[ 'HAVE_IBV_DEVX_OBJ', 'infiniband/mlx5dv.h',
+		'mlx5dv_devx_obj_create' ],

Typo?

>  		[ 'HAVE_IBV_DEVICE_MPLS_SUPPORT', 'infiniband/verbs.h',
>  		'IBV_FLOW_SPEC_MPLS' ],
>  		[ 'HAVE_IBV_WQ_FLAG_RX_END_PADDING',
> 'infiniband/verbs.h', diff --git a/drivers/net/mlx5/mlx5.c
> b/drivers/net/mlx5/mlx5.c index 9e5cab1..1e00b8b 100644
> --- a/drivers/net/mlx5/mlx5.c
> +++ b/drivers/net/mlx5/mlx5.c
> @@ -727,7 +727,7 @@
>  	       struct mlx5_dev_config config,
>  	       const struct mlx5_switch_info *switch_info)  {
> -	struct ibv_context *ctx;
> +	struct ibv_context *ctx = NULL;
>  	struct ibv_device_attr_ex attr;
>  	struct ibv_port_attr port_attr;
>  	struct ibv_pd *pd = NULL;
> @@ -786,10 +786,17 @@
>  	/* Prepare shared data between primary and secondary process. */
>  	mlx5_prepare_shared_data();
>  	errno = 0;
> -	ctx = mlx5_glue->open_device(ibv_dev);
> -	if (!ctx) {
> -		rte_errno = errno ? errno : ENODEV;
> -		return NULL;
> +	ctx = mlx5_glue->dv_open_device(ibv_dev);
> +	if (ctx) {
> +		config.devx = 1;
> +		DRV_LOG(DEBUG, "DEVX is %ssupported",
> +			config.devx ? "" : "not ");

You can just print "devx is support", config is set right above. 

> +	} else {
> +		ctx = mlx5_glue->open_device(ibv_dev);
> +		if (!ctx) {
> +			rte_errno = errno ? errno : ENODEV;
> +			return NULL;
> +		}
>  	}
>  #ifdef HAVE_IBV_MLX5_MOD_SWP
>  	dv_attr.comp_mask |= MLX5DV_CONTEXT_MASK_SWP; diff --git
> a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index 75aeeb2..1fcdb71
> 100644
> --- a/drivers/net/mlx5/mlx5.h
> +++ b/drivers/net/mlx5/mlx5.h
> @@ -129,6 +129,7 @@ struct mlx5_dev_config {
>  	unsigned int vf_nl_en:1; /* Enable Netlink requests in VF mode. */
>  	unsigned int dv_flow_en:1; /* Enable DV flow. */
>  	unsigned int swp:1; /* Tx generic tunnel checksum and TSO offload. */
> +	unsigned int devx:1; /* Whether devx interface is available or not. */
>  	struct {
>  		unsigned int enabled:1; /* Whether MPRQ is enabled. */
>  		unsigned int stride_num_n; /* Number of strides. */ diff --git
> a/drivers/net/mlx5/mlx5_devx_cmds.c b/drivers/net/mlx5/mlx5_devx_cmds.c
> new file mode 100644
> index 0000000..a3f421e
> --- /dev/null
> +++ b/drivers/net/mlx5/mlx5_devx_cmds.c
> @@ -0,0 +1,117 @@
> +// SPDX-License-Identifier: BSD-3-Clause
> +/* Copyright 2018 Mellanox Technologies, Ltd */
> +
> +#include <rte_flow_driver.h>
> +
> +#include "mlx5.h"
> +#include "mlx5_glue.h"
> +#include "mlx5_prm.h"
> +
> +/*
> + * Dummy struct to prevent compilation errors when
> + * mlx5dv_devx_obj is not defined in mlx5dv.h  */ #ifndef
> +HAVE_IBV_DEVX_OBJ struct mlx5dv_devx_obj {
> +	void *ctx;
> +};
> +#endif /* HAVE_IBV_DEVX_OBJ */

Since it is devx commands file, why not to ifdef the entire file and avoid such dummy declaration? 

> +
> +/**
> + * Allocate flow counters via devx interface.
> + *
> + * @param[in] ctx
> + *   ibv contexts returned from mlx5dv_open_device.
> + * @param dcs
> + *   Pointer to counters properties structure to be filled by the routine.
> + *
> + * @return
> + *   0 on success, a negative value otherwise.
> + */
> +int mlx5_devx_cmd_flow_counter_alloc(struct ibv_context *ctx,
> +				     struct mlx5_devx_counter_set *dcs) {
> +	uint32_t in[MLX5_ST_SZ_DW(alloc_flow_counter_in)]   = {0};
> +	uint32_t out[MLX5_ST_SZ_DW(alloc_flow_counter_out)] = {0};
> +	int status, syndrome;
> +
> +	MLX5_SET(alloc_flow_counter_in, in, opcode,
> +		 MLX5_CMD_OP_ALLOC_FLOW_COUNTER);
> +	dcs->obj = mlx5_glue->devx_obj_create(ctx, in,
> +					      sizeof(in), out, sizeof(out));
> +	if (!dcs->obj)
> +		return -errno;
> +	status = MLX5_GET(query_flow_counter_out, out, status);
> +	syndrome = MLX5_GET(query_flow_counter_out, out, syndrome);
> +	if (status) {
> +		DRV_LOG(DEBUG, "Failed to create devx counters, "
> +			"status %x, syndrome %x", status, syndrome);
> +		return -1;
> +	}
> +	dcs->id = MLX5_GET(alloc_flow_counter_out,
> +			   out, flow_counter_id);
> +	return 0;
> +}
> +
> +/**
> + * Free flow counters obtained via devx interface.
> + *
> + * @param[in] obj
> + *   devx object that was obtained from mlx5_devx_cmd_fc_alloc.
> + *
> + * @return
> + *   0 on success, a negative value otherwise.
> + */
> +int mlx5_devx_cmd_flow_counter_free(struct mlx5dv_devx_obj *obj) {
> +	return mlx5_glue->devx_obj_destroy(obj); }
> +
> +/**
> + * Query flow counters values.
> + *
> + * @param[in] dcs
> + *   devx object that was obtained from mlx5_devx_cmd_fc_alloc.
> + * @param[in] clear
> + *   Whether hardware should clear the counters after the query or not.
> + *  @param pkts
> + *   The number of packets that matched the flow.
> + *  @param bytes
> + *    The number of bytes that matched the flow.
> + *
> + * @return
> + *   0 on success, a negative value otherwise.
> + */
> +int
> +mlx5_devx_cmd_flow_counter_query(struct mlx5_devx_counter_set *dcs,
> +				 int clear __rte_unused,
> +				 uint64_t *pkts, uint64_t *bytes)
> +{
> +	uint32_t out[MLX5_ST_SZ_BYTES(query_flow_counter_out) +
> +		MLX5_ST_SZ_BYTES(traffic_counter)]   = {0};
> +	uint32_t in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {0};
> +	void *stats;
> +	int status, syndrome, rc;
> +
> +	MLX5_SET(query_flow_counter_in, in, opcode,
> +		 MLX5_CMD_OP_QUERY_FLOW_COUNTER);
> +	MLX5_SET(query_flow_counter_in, in, op_mod, 0);
> +	MLX5_SET(query_flow_counter_in, in, flow_counter_id, dcs->id);
> +	rc = mlx5_glue->devx_obj_query(dcs->obj,
> +				       in, sizeof(in), out, sizeof(out));
> +	if (rc)
> +		return rc;
> +	status = MLX5_GET(query_flow_counter_out, out, status);
> +	syndrome = MLX5_GET(query_flow_counter_out, out, syndrome);
> +	if (status) {
> +		DRV_LOG(DEBUG, "Failed to query devx counters, "
> +			"id %d, status %x, syndrome = %x",
> +			status, syndrome, dcs->id);
> +		return -1;
> +	}
> +	stats = MLX5_ADDR_OF(query_flow_counter_out,
> +			     out, flow_statistics);
> +	*pkts = MLX5_GET64(traffic_counter, stats, packets);
> +	*bytes = MLX5_GET64(traffic_counter, stats, octets);
> +	return 0;
> +}
> diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
> index 4a7c052..838c85a 100644
> --- a/drivers/net/mlx5/mlx5_flow.h
> +++ b/drivers/net/mlx5/mlx5_flow.h
> @@ -21,6 +21,9 @@
>  #pragma GCC diagnostic error "-Wpedantic"
>  #endif
> 
> +#include "mlx5.h"
> +#include "mlx5_prm.h"
> +
>  /* Pattern outer Layer bits. */
>  #define MLX5_FLOW_LAYER_OUTER_L2 (1u << 0)  #define
> MLX5_FLOW_LAYER_OUTER_L3_IPV4 (1u << 1) @@ -273,13 +276,16 @@
> struct mlx5_flow {  struct mlx5_flow_counter {
>  	LIST_ENTRY(mlx5_flow_counter) next; /**< Pointer to the next counter.
> */
>  	uint32_t shared:1; /**< Share counter ID with other flow rules. */
> -	uint32_t ref_cnt:31; /**< Reference counter. */
> +	uint32_t ref_cnt:30; /**< Reference counter. */

Why you take 1 bit out of the ref_cnt?

>  	uint32_t id; /**< Counter ID. */
> +	union {  /**< Holds the counters for the rule. */
>  #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
> -	struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
> +		struct ibv_counter_set *cs;
>  #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
> -	struct ibv_counters *cs; /**< Holds the counters for the rule. */
> +		struct ibv_counters *cs;
>  #endif
> +		struct mlx5_devx_counter_set *dcs;
> +	};
>  	uint64_t hits; /**< Number of packets matched by the rule. */
>  	uint64_t bytes; /**< Number of bytes matched by the rule. */  }; diff --
> git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
> index 1f31874..14dadce 100644
> --- a/drivers/net/mlx5/mlx5_flow_dv.c
> +++ b/drivers/net/mlx5/mlx5_flow_dv.c
> @@ -97,6 +97,36 @@
>  }
> 
>  /**
> + * Validate count action.
> + *
> + * @param[in] dev
> + *   device otr.
> + * @param[out] error
> + *   Pointer to error structure.
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
> + */
> +static int
> +flow_dv_validate_action_count(struct rte_eth_dev *dev,
> +			      struct rte_flow_error *error)
> +{
> +	struct priv *priv = dev->data->dev_private;
> +
> +	if (!priv->config.devx)
> +		goto notsup_err;
> +#ifdef HAVE_IBV_FLOW_DEVX_COUNTERS
> +	return 0;
> +#endif
> +notsup_err:
> +	return rte_flow_error_set
> +		      (error, ENOTSUP,
> +		       RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
> +		       NULL,
> +		       "count action not supported"); }
> +
> +/**
>   * Validate the L2 encap action.
>   *
>   * @param[in] action_flags
> @@ -704,6 +734,87 @@
>  }
> 
>  /**
> + * Get or create a flow counter.
> + *
> + * @param[in] dev
> + *   Pointer to the Ethernet device structure.
> + * @param[in] shared
> + *   Indicate if this counter is shared with other flows.
> + * @param[in] id
> + *   Counter identifier.
> + *
> + * @return
> + *   pointer to flow counter on success, NULL otherwise and rte_errno is set.
> + */
> +static struct mlx5_flow_counter *
> +flow_dv_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t
> +id) {
> +	struct priv *priv = dev->data->dev_private;
> +	struct mlx5_flow_counter *cnt = NULL;
> +	struct mlx5_devx_counter_set *dcs = NULL;
> +	int ret;
> +
> +	if (!priv->config.devx) {
> +		ret = -ENOTSUP;
> +		goto error_exit;
> +	}
> +	if (shared) {
> +		LIST_FOREACH(cnt, &priv->flow_counters, next) {
> +			if (cnt->shared && cnt->id == id) {
> +				cnt->ref_cnt++;
> +				return cnt;
> +			}
> +		}
> +	}
> +	cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
> +	dcs = rte_calloc(__func__, 1, sizeof(*dcs), 0);
> +	if (!dcs || !cnt) {
> +		ret = -ENOMEM;
> +		goto error_exit;
> +	}
> +	ret = mlx5_devx_cmd_flow_counter_alloc(priv->ctx, dcs);
> +	if (ret)
> +		goto error_exit;
> +	struct mlx5_flow_counter tmpl = {
> +		.shared = shared,
> +		.ref_cnt = 1,
> +		.id = id,
> +		.dcs = dcs,
> +	};
> +	*cnt = tmpl;
> +	LIST_INSERT_HEAD(&priv->flow_counters, cnt, next);
> +	return cnt;
> +error_exit:
> +	rte_free(cnt);
> +	rte_free(dcs);
> +	rte_errno = -ret;
> +	return NULL;
> +}
> +
> +/**
> + * Release a flow counter.
> + *
> + * @param[in] counter
> + *   Pointer to the counter handler.
> + */
> +static void
> +flow_dv_counter_release(struct mlx5_flow_counter *counter) {
> +	int ret;
> +
> +	if (!counter)
> +		return;
> +	if (--counter->ref_cnt == 0) {
> +		ret = mlx5_devx_cmd_flow_counter_free(counter->dcs->obj);
> +		if (ret)
> +			DRV_LOG(ERR, "Failed to free devx counters, %d", ret);
> +		LIST_REMOVE(counter, next);
> +		rte_free(counter->dcs);
> +		rte_free(counter);
> +	}
> +}
> +
> +/**
>   * Verify the @p attributes will be correctly understood by the NIC and store
>   * them in the @p flow if everything is correct.
>   *
> @@ -965,7 +1076,7 @@
>  			++actions_n;
>  			break;
>  		case RTE_FLOW_ACTION_TYPE_COUNT:
> -			ret = mlx5_flow_validate_action_count(dev, attr,
> error);
> +			ret = flow_dv_validate_action_count(dev, error);
>  			if (ret < 0)
>  				return ret;
>  			action_flags |= MLX5_FLOW_ACTION_COUNT; @@ -
> 1902,6 +2013,9 @@
>  		const struct rte_flow_action_queue *queue;
>  		const struct rte_flow_action_rss *rss;
>  		const struct rte_flow_action *action = actions;
> +#ifdef HAVE_IBV_FLOW_DEVX_COUNTERS
> +		const struct rte_flow_action_count *count = action->conf;
> #endif
>  		const uint8_t *rss_key;
> 
>  		switch (actions->type) {
> @@ -1950,6 +2064,37 @@
>  			flow->rss.level = rss->level;
>  			action_flags |= MLX5_FLOW_ACTION_RSS;
>  			break;
> +		case RTE_FLOW_ACTION_TYPE_COUNT:
> +			if (!priv->config.devx) {
> +				rte_errno = ENOTSUP;
> +				goto cnt_err;
> +			}
> +			flow->counter =
> +				flow_dv_counter_new(dev,
> +						    count->shared, count->id);
> +			if (flow->counter == NULL)
> +				goto cnt_err;
> +			dev_flow->dv.actions[actions_n].type =
> +
> 	MLX5DV_FLOW_ACTION_COUNTER_DEVX;
> +			dev_flow->dv.actions[actions_n].obj =
> +						flow->counter->dcs->obj;
> +			action_flags |= MLX5_FLOW_ACTION_COUNT;
> +			++actions_n;
> +			break;
> +cnt_err:
> +			if (rte_errno == ENOTSUP)
> +				return rte_flow_error_set
> +					      (error, ENOTSUP,
> +
> RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
> +					       NULL,
> +					       "count action not supported");
> +			else
> +				return rte_flow_error_set
> +						(error, rte_errno,
> +
> RTE_FLOW_ERROR_TYPE_ACTION,
> +						 action,
> +						 "cannot create counter"
> +						  " object.");
>  		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
>  		case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
>  			if (flow_dv_create_action_l2_encap(dev, actions, @@
> -2338,8 +2483,6 @@
>  			dv->hrxq = NULL;
>  		}
>  	}
> -	if (flow->counter)
> -		flow->counter = NULL;
>  }
> 
>  /**
> @@ -2358,6 +2501,10 @@
>  	if (!flow)
>  		return;
>  	flow_dv_remove(dev, flow);
> +	if (flow->counter) {
> +		flow_dv_counter_release(flow->counter);
> +		flow->counter = NULL;
> +	}
>  	while (!LIST_EMPTY(&flow->dev_flows)) {
>  		dev_flow = LIST_FIRST(&flow->dev_flows);
>  		LIST_REMOVE(dev_flow, next);
> @@ -2370,22 +2517,91 @@
>  }
> 
>  /**
> + * Query a dv flow  rule for its statistics via devx.
> + *
> + * @param[in] dev
> + *   Pointer to Ethernet device.
> + * @param[in] flow
> + *   Pointer to the sub flow.
> + * @param[out] data
> + *   data retrieved by the query.
> + * @param[out] error
> + *   Perform verbose error reporting if not NULL.
> + *
> + * @return
> + *   0 on success, a negative errno value otherwise and rte_errno is set.
> + */
> +static int
> +flow_dv_query_count(struct rte_eth_dev *dev, struct rte_flow *flow,
> +		    void *data, struct rte_flow_error *error) {
> +	struct priv *priv = dev->data->dev_private;
> +	struct rte_flow_query_count *qc = data;
> +	uint64_t pkts = 0;
> +	uint64_t bytes = 0;
> +	int err;
> +
> +	if (!priv->config.devx)
> +		return rte_flow_error_set(error, ENOTSUP,
> +
> RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
> +					  NULL,
> +					  "counters are not supported");
> +	if (flow->counter) {
> +		err = mlx5_devx_cmd_flow_counter_query
> +						(flow->counter->dcs,
> +						 qc->reset, &pkts, &bytes);
> +		if (err)
> +			return rte_flow_error_set
> +				(error, err,
> +				 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
> +				 NULL,
> +				 "cannot read counters");
> +		qc->hits_set = 1;
> +		qc->bytes_set = 1;
> +		qc->hits = pkts - flow->counter->hits;
> +		qc->bytes = bytes - flow->counter->bytes;
> +		if (qc->reset) {
> +			flow->counter->hits = pkts;
> +			flow->counter->bytes = bytes;
> +		}
> +		return 0;
> +	}
> +	return rte_flow_error_set(error, EINVAL,
> +				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
> +				  NULL,
> +				  "counters are not available");
> +}
> +
> +/**
>   * Query a flow.
>   *
>   * @see rte_flow_query()
>   * @see rte_flow_ops
>   */
>  static int
> -flow_dv_query(struct rte_eth_dev *dev __rte_unused,
> +flow_dv_query(struct rte_eth_dev *dev,
>  	      struct rte_flow *flow __rte_unused,
>  	      const struct rte_flow_action *actions __rte_unused,
>  	      void *data __rte_unused,
>  	      struct rte_flow_error *error __rte_unused)  {
> -	return rte_flow_error_set(error, ENOTSUP,
> -				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
> -				  NULL,
> -				  "flow query with DV is not supported");
> +	int ret = -EINVAL;
> +
> +	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
> +		switch (actions->type) {
> +		case RTE_FLOW_ACTION_TYPE_VOID:
> +			break;
> +		case RTE_FLOW_ACTION_TYPE_COUNT:
> +			ret = flow_dv_query_count(dev, flow, data, error);
> +			break;
> +		default:
> +			return rte_flow_error_set(error, ENOTSUP,
> +
> RTE_FLOW_ERROR_TYPE_ACTION,
> +						  actions,
> +						  "action not supported");
> +		}
> +	}
> +	return ret;
>  }
> 
> 
> diff --git a/drivers/net/mlx5/mlx5_glue.c b/drivers/net/mlx5/mlx5_glue.c index
> 7d3d9d3..4078b54 100644
> --- a/drivers/net/mlx5/mlx5_glue.c
> +++ b/drivers/net/mlx5/mlx5_glue.c
> @@ -65,6 +65,7 @@
>  	return ibv_open_device(device);
>  }
> 
> +

Remove blank line. 

>  static int
>  mlx5_glue_close_device(struct ibv_context *context)  { diff --git
> a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h index
> 29742b1..812b4bf 100644
> --- a/drivers/net/mlx5/mlx5_prm.h
> +++ b/drivers/net/mlx5/mlx5_prm.h
> @@ -368,6 +368,7 @@ struct mlx5_modification_cmd {  #define
> __mlx5_dw_bit_off(typ, fld) (32 - __mlx5_bit_sz(typ, fld) - \
>  				    (__mlx5_bit_off(typ, fld) & 0x1f))  #define
> __mlx5_dw_off(typ, fld) (__mlx5_bit_off(typ, fld) / 32)
> +#define __mlx5_64_off(typ, fld) (__mlx5_bit_off(typ, fld) / 64)
>  #define __mlx5_dw_mask(typ, fld) (__mlx5_mask(typ, fld) << \
>  				  __mlx5_dw_bit_off(typ, fld))
>  #define __mlx5_mask(typ, fld) ((u32)((1ull << __mlx5_bit_sz(typ, fld)) - 1)) @@
> -375,6 +376,7 @@ struct mlx5_modification_cmd {  #define
> __mlx5_16_bit_off(typ, fld) (16 - __mlx5_bit_sz(typ, fld) - \
>  				    (__mlx5_bit_off(typ, fld) & 0xf))  #define
> __mlx5_mask16(typ, fld) ((u16)((1ull << __mlx5_bit_sz(typ, fld)) - 1))
> +#define MLX5_ST_SZ_BYTES(typ) (sizeof(struct mlx5_ifc_##typ##_bits) /
> +8)
>  #define MLX5_ST_SZ_DW(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 32)
> #define MLX5_ST_SZ_DB(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 8)
> #define MLX5_BYTE_OFF(typ, fld) (__mlx5_bit_off(typ, fld) / 8) @@ -391,10
> +393,16 @@ struct mlx5_modification_cmd {
>  				 (((_v) & __mlx5_mask(typ, fld)) << \
>  				   __mlx5_dw_bit_off(typ, fld))); \
>  	} while (0)
> +#define MLX5_GET(typ, p, fld) \
> +	((rte_be_to_cpu_32(*((__be32 *)(p) +\
> +	__mlx5_dw_off(typ, fld))) >> __mlx5_dw_bit_off(typ, fld)) & \
> +	__mlx5_mask(typ, fld))
>  #define MLX5_GET16(typ, p, fld) \
>  	((rte_be_to_cpu_16(*((__be16 *)(p) + \
>  	  __mlx5_16_off(typ, fld))) >> __mlx5_16_bit_off(typ, fld)) & \
>  	 __mlx5_mask16(typ, fld))
> +#define MLX5_GET64(typ, p, fld) rte_be_to_cpu_64(*((__be64 *)(p) + \
> +						   __mlx5_64_off(typ, fld)))
>  #define MLX5_FLD_SZ_BYTES(typ, fld) (__mlx5_bit_sz(typ, fld) / 8)
> 
>  struct mlx5_ifc_fte_match_set_misc_bits { @@ -500,6 +508,69 @@ enum {
>  	MLX5_MATCH_CRITERIA_ENABLE_MISC2_BIT
>  };
> 
> +enum {
> +	MLX5_CMD_OP_ALLOC_FLOW_COUNTER = 0x939,
> +	MLX5_CMD_OP_QUERY_FLOW_COUNTER = 0x93b, };
> +
> +/* Flow counters. */
> +struct mlx5_ifc_alloc_flow_counter_out_bits {
> +	u8         status[0x8];
> +	u8         reserved_at_8[0x18];
> +	u8         syndrome[0x20];
> +	u8         flow_counter_id[0x20];
> +	u8         reserved_at_60[0x20];
> +};
> +
> +struct mlx5_ifc_alloc_flow_counter_in_bits {
> +	u8         opcode[0x10];
> +	u8         reserved_at_10[0x10];
> +	u8         reserved_at_20[0x10];
> +	u8         op_mod[0x10];
> +	u8         reserved_at_40[0x40];
> +};
> +
> +struct mlx5_ifc_dealloc_flow_counter_out_bits {
> +	u8         status[0x8];
> +	u8         reserved_at_8[0x18];
> +	u8         syndrome[0x20];
> +	u8         reserved_at_40[0x40];
> +};
> +
> +struct mlx5_ifc_dealloc_flow_counter_in_bits {
> +	u8         opcode[0x10];
> +	u8         reserved_at_10[0x10];
> +	u8         reserved_at_20[0x10];
> +	u8         op_mod[0x10];
> +	u8         flow_counter_id[0x20];
> +	u8         reserved_at_60[0x20];
> +};
> +
> +struct mlx5_ifc_traffic_counter_bits {
> +	u8         packets[0x40];
> +	u8         octets[0x40];
> +};
> +
> +struct mlx5_ifc_query_flow_counter_out_bits {
> +	u8         status[0x8];
> +	u8         reserved_at_8[0x18];
> +	u8         syndrome[0x20];
> +	u8         reserved_at_40[0x40];
> +	struct mlx5_ifc_traffic_counter_bits flow_statistics[]; };
> +
> +struct mlx5_ifc_query_flow_counter_in_bits {
> +	u8         opcode[0x10];
> +	u8         reserved_at_10[0x10];
> +	u8         reserved_at_20[0x10];
> +	u8         op_mod[0x10];
> +	u8         reserved_at_40[0x80];
> +	u8         clear[0x1];
> +	u8         reserved_at_c1[0xf];
> +	u8         num_of_counters[0x10];
> +	u8         flow_counter_id[0x20];
> +};
> +
>  /* CQE format mask. */
>  #define MLX5E_CQE_FORMAT_MASK 0xc
> 
> @@ -581,4 +652,19 @@ struct mlx5_mini_cqe8 {  #endif  }
> 
> +

The below declaration does not belong to mlx5_prm.h. either in mlx5.h or new mlx5_devx.h. 
Need to ifdef accordingly. 

> +/* devx counyter object */
> +struct mlx5_devx_counter_set {
> +	struct mlx5dv_devx_obj *obj;
> +	int id; /* Flow counter ID */
> +};
> +
> +/* mlx5_devx_cmds.c */
> +
> +int mlx5_devx_cmd_flow_counter_alloc(struct ibv_context *ctx,
> +				     struct mlx5_devx_counter_set *dcx); int
> +mlx5_devx_cmd_flow_counter_free(struct mlx5dv_devx_obj *obj); int
> +mlx5_devx_cmd_flow_counter_query(struct mlx5_devx_counter_set *dcx,
> +				     int clear,
> +				     uint64_t *pkts, uint64_t *bytes);
>  #endif /* RTE_PMD_MLX5_PRM_H_ */
> --
> 1.8.3.1
  

Patch

diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index 58e2d15..bd96706 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -36,6 +36,7 @@  SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow_tcf.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow_verbs.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_socket.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_nl.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_devx_cmds.c
 
 ifeq ($(CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS),y)
 INSTALL-$(CONFIG_RTE_LIBRTE_MLX5_PMD)-lib += $(LIB_GLUE)
@@ -153,6 +154,11 @@  mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
 		func mlx5dv_devx_obj_create \
 		$(AUTOCONF_OUTPUT)
 	$Q sh -- '$<' '$@' \
+		HAVE_IBV_FLOW_DEVX_COUNTERS \
+		infiniband/mlx5dv.h \
+		enum MLX5DV_FLOW_ACTION_COUNTER_DEVX \
+		$(AUTOCONF_OUTPUT)
+	$Q sh -- '$<' '$@' \
 		HAVE_ETHTOOL_LINK_MODE_25G \
 		/usr/include/linux/ethtool.h \
 		enum ETHTOOL_LINK_MODE_25000baseCR_Full_BIT \
diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
index e323c3a..9a5077d 100644
--- a/drivers/net/mlx5/meson.build
+++ b/drivers/net/mlx5/meson.build
@@ -46,6 +46,7 @@  if build
 		'mlx5_trigger.c',
 		'mlx5_txq.c',
 		'mlx5_vlan.c',
+		'mlx5_devx_cmds.c',
 	)
 	if dpdk_conf.has('RTE_ARCH_X86_64') or dpdk_conf.has('RTE_ARCH_ARM64')
 		sources += files('mlx5_rxtx_vec.c')
@@ -100,6 +101,10 @@  if build
 		'MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD' ],
 		[ 'HAVE_IBV_FLOW_DV_SUPPORT', 'infiniband/mlx5dv.h',
 		'mlx5dv_create_flow_action_packet_reformat' ],
+		[ 'HAVE_IBV_FLOW_DEVX_COUNTERS', 'infiniband/mlx5dv.h',
+		'MLX5DV_FLOW_ACTION_COUNTER_DEVX' ],
+		[ 'HAVE_IBV_DEVX_OBJ', 'infiniband/mlx5dv.h',
+		'MLX5DV_CONTEXT_FLAGS_DEVX' ],
 		[ 'HAVE_IBV_DEVICE_MPLS_SUPPORT', 'infiniband/verbs.h',
 		'IBV_FLOW_SPEC_MPLS' ],
 		[ 'HAVE_IBV_WQ_FLAG_RX_END_PADDING', 'infiniband/verbs.h',
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 9e5cab1..1e00b8b 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -727,7 +727,7 @@ 
 	       struct mlx5_dev_config config,
 	       const struct mlx5_switch_info *switch_info)
 {
-	struct ibv_context *ctx;
+	struct ibv_context *ctx = NULL;
 	struct ibv_device_attr_ex attr;
 	struct ibv_port_attr port_attr;
 	struct ibv_pd *pd = NULL;
@@ -786,10 +786,17 @@ 
 	/* Prepare shared data between primary and secondary process. */
 	mlx5_prepare_shared_data();
 	errno = 0;
-	ctx = mlx5_glue->open_device(ibv_dev);
-	if (!ctx) {
-		rte_errno = errno ? errno : ENODEV;
-		return NULL;
+	ctx = mlx5_glue->dv_open_device(ibv_dev);
+	if (ctx) {
+		config.devx = 1;
+		DRV_LOG(DEBUG, "DEVX is %ssupported",
+			config.devx ? "" : "not ");
+	} else {
+		ctx = mlx5_glue->open_device(ibv_dev);
+		if (!ctx) {
+			rte_errno = errno ? errno : ENODEV;
+			return NULL;
+		}
 	}
 #ifdef HAVE_IBV_MLX5_MOD_SWP
 	dv_attr.comp_mask |= MLX5DV_CONTEXT_MASK_SWP;
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 75aeeb2..1fcdb71 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -129,6 +129,7 @@  struct mlx5_dev_config {
 	unsigned int vf_nl_en:1; /* Enable Netlink requests in VF mode. */
 	unsigned int dv_flow_en:1; /* Enable DV flow. */
 	unsigned int swp:1; /* Tx generic tunnel checksum and TSO offload. */
+	unsigned int devx:1; /* Whether devx interface is available or not. */
 	struct {
 		unsigned int enabled:1; /* Whether MPRQ is enabled. */
 		unsigned int stride_num_n; /* Number of strides. */
diff --git a/drivers/net/mlx5/mlx5_devx_cmds.c b/drivers/net/mlx5/mlx5_devx_cmds.c
new file mode 100644
index 0000000..a3f421e
--- /dev/null
+++ b/drivers/net/mlx5/mlx5_devx_cmds.c
@@ -0,0 +1,117 @@ 
+// SPDX-License-Identifier: BSD-3-Clause
+/* Copyright 2018 Mellanox Technologies, Ltd */
+
+#include <rte_flow_driver.h>
+
+#include "mlx5.h"
+#include "mlx5_glue.h"
+#include "mlx5_prm.h"
+
+/*
+ * Dummy struct to prevent compilation errors when
+ * mlx5dv_devx_obj is not defined in mlx5dv.h
+ */
+#ifndef HAVE_IBV_DEVX_OBJ
+struct mlx5dv_devx_obj {
+	void *ctx;
+};
+#endif /* HAVE_IBV_DEVX_OBJ */
+
+/**
+ * Allocate flow counters via devx interface.
+ *
+ * @param[in] ctx
+ *   ibv contexts returned from mlx5dv_open_device.
+ * @param dcs
+ *   Pointer to counters properties structure to be filled by the routine.
+ *
+ * @return
+ *   0 on success, a negative value otherwise.
+ */
+int mlx5_devx_cmd_flow_counter_alloc(struct ibv_context *ctx,
+				     struct mlx5_devx_counter_set *dcs)
+{
+	uint32_t in[MLX5_ST_SZ_DW(alloc_flow_counter_in)]   = {0};
+	uint32_t out[MLX5_ST_SZ_DW(alloc_flow_counter_out)] = {0};
+	int status, syndrome;
+
+	MLX5_SET(alloc_flow_counter_in, in, opcode,
+		 MLX5_CMD_OP_ALLOC_FLOW_COUNTER);
+	dcs->obj = mlx5_glue->devx_obj_create(ctx, in,
+					      sizeof(in), out, sizeof(out));
+	if (!dcs->obj)
+		return -errno;
+	status = MLX5_GET(query_flow_counter_out, out, status);
+	syndrome = MLX5_GET(query_flow_counter_out, out, syndrome);
+	if (status) {
+		DRV_LOG(DEBUG, "Failed to create devx counters, "
+			"status %x, syndrome %x", status, syndrome);
+		return -1;
+	}
+	dcs->id = MLX5_GET(alloc_flow_counter_out,
+			   out, flow_counter_id);
+	return 0;
+}
+
+/**
+ * Free flow counters obtained via devx interface.
+ *
+ * @param[in] obj
+ *   devx object that was obtained from mlx5_devx_cmd_fc_alloc.
+ *
+ * @return
+ *   0 on success, a negative value otherwise.
+ */
+int mlx5_devx_cmd_flow_counter_free(struct mlx5dv_devx_obj *obj)
+{
+	return mlx5_glue->devx_obj_destroy(obj);
+}
+
+/**
+ * Query flow counters values.
+ *
+ * @param[in] dcs
+ *   devx object that was obtained from mlx5_devx_cmd_fc_alloc.
+ * @param[in] clear
+ *   Whether hardware should clear the counters after the query or not.
+ *  @param pkts
+ *   The number of packets that matched the flow.
+ *  @param bytes
+ *    The number of bytes that matched the flow.
+ *
+ * @return
+ *   0 on success, a negative value otherwise.
+ */
+int
+mlx5_devx_cmd_flow_counter_query(struct mlx5_devx_counter_set *dcs,
+				 int clear __rte_unused,
+				 uint64_t *pkts, uint64_t *bytes)
+{
+	uint32_t out[MLX5_ST_SZ_BYTES(query_flow_counter_out) +
+		MLX5_ST_SZ_BYTES(traffic_counter)]   = {0};
+	uint32_t in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {0};
+	void *stats;
+	int status, syndrome, rc;
+
+	MLX5_SET(query_flow_counter_in, in, opcode,
+		 MLX5_CMD_OP_QUERY_FLOW_COUNTER);
+	MLX5_SET(query_flow_counter_in, in, op_mod, 0);
+	MLX5_SET(query_flow_counter_in, in, flow_counter_id, dcs->id);
+	rc = mlx5_glue->devx_obj_query(dcs->obj,
+				       in, sizeof(in), out, sizeof(out));
+	if (rc)
+		return rc;
+	status = MLX5_GET(query_flow_counter_out, out, status);
+	syndrome = MLX5_GET(query_flow_counter_out, out, syndrome);
+	if (status) {
+		DRV_LOG(DEBUG, "Failed to query devx counters, "
+			"id %d, status %x, syndrome = %x",
+			status, syndrome, dcs->id);
+		return -1;
+	}
+	stats = MLX5_ADDR_OF(query_flow_counter_out,
+			     out, flow_statistics);
+	*pkts = MLX5_GET64(traffic_counter, stats, packets);
+	*bytes = MLX5_GET64(traffic_counter, stats, octets);
+	return 0;
+}
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
index 4a7c052..838c85a 100644
--- a/drivers/net/mlx5/mlx5_flow.h
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -21,6 +21,9 @@ 
 #pragma GCC diagnostic error "-Wpedantic"
 #endif
 
+#include "mlx5.h"
+#include "mlx5_prm.h"
+
 /* Pattern outer Layer bits. */
 #define MLX5_FLOW_LAYER_OUTER_L2 (1u << 0)
 #define MLX5_FLOW_LAYER_OUTER_L3_IPV4 (1u << 1)
@@ -273,13 +276,16 @@  struct mlx5_flow {
 struct mlx5_flow_counter {
 	LIST_ENTRY(mlx5_flow_counter) next; /**< Pointer to the next counter. */
 	uint32_t shared:1; /**< Share counter ID with other flow rules. */
-	uint32_t ref_cnt:31; /**< Reference counter. */
+	uint32_t ref_cnt:30; /**< Reference counter. */
 	uint32_t id; /**< Counter ID. */
+	union {  /**< Holds the counters for the rule. */
 #if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
-	struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
+		struct ibv_counter_set *cs;
 #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
-	struct ibv_counters *cs; /**< Holds the counters for the rule. */
+		struct ibv_counters *cs;
 #endif
+		struct mlx5_devx_counter_set *dcs;
+	};
 	uint64_t hits; /**< Number of packets matched by the rule. */
 	uint64_t bytes; /**< Number of bytes matched by the rule. */
 };
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
index 1f31874..14dadce 100644
--- a/drivers/net/mlx5/mlx5_flow_dv.c
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -97,6 +97,36 @@ 
 }
 
 /**
+ * Validate count action.
+ *
+ * @param[in] dev
+ *   device otr.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_dv_validate_action_count(struct rte_eth_dev *dev,
+			      struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+
+	if (!priv->config.devx)
+		goto notsup_err;
+#ifdef HAVE_IBV_FLOW_DEVX_COUNTERS
+	return 0;
+#endif
+notsup_err:
+	return rte_flow_error_set
+		      (error, ENOTSUP,
+		       RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+		       NULL,
+		       "count action not supported");
+}
+
+/**
  * Validate the L2 encap action.
  *
  * @param[in] action_flags
@@ -704,6 +734,87 @@ 
 }
 
 /**
+ * Get or create a flow counter.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] shared
+ *   Indicate if this counter is shared with other flows.
+ * @param[in] id
+ *   Counter identifier.
+ *
+ * @return
+ *   pointer to flow counter on success, NULL otherwise and rte_errno is set.
+ */
+static struct mlx5_flow_counter *
+flow_dv_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
+{
+	struct priv *priv = dev->data->dev_private;
+	struct mlx5_flow_counter *cnt = NULL;
+	struct mlx5_devx_counter_set *dcs = NULL;
+	int ret;
+
+	if (!priv->config.devx) {
+		ret = -ENOTSUP;
+		goto error_exit;
+	}
+	if (shared) {
+		LIST_FOREACH(cnt, &priv->flow_counters, next) {
+			if (cnt->shared && cnt->id == id) {
+				cnt->ref_cnt++;
+				return cnt;
+			}
+		}
+	}
+	cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
+	dcs = rte_calloc(__func__, 1, sizeof(*dcs), 0);
+	if (!dcs || !cnt) {
+		ret = -ENOMEM;
+		goto error_exit;
+	}
+	ret = mlx5_devx_cmd_flow_counter_alloc(priv->ctx, dcs);
+	if (ret)
+		goto error_exit;
+	struct mlx5_flow_counter tmpl = {
+		.shared = shared,
+		.ref_cnt = 1,
+		.id = id,
+		.dcs = dcs,
+	};
+	*cnt = tmpl;
+	LIST_INSERT_HEAD(&priv->flow_counters, cnt, next);
+	return cnt;
+error_exit:
+	rte_free(cnt);
+	rte_free(dcs);
+	rte_errno = -ret;
+	return NULL;
+}
+
+/**
+ * Release a flow counter.
+ *
+ * @param[in] counter
+ *   Pointer to the counter handler.
+ */
+static void
+flow_dv_counter_release(struct mlx5_flow_counter *counter)
+{
+	int ret;
+
+	if (!counter)
+		return;
+	if (--counter->ref_cnt == 0) {
+		ret = mlx5_devx_cmd_flow_counter_free(counter->dcs->obj);
+		if (ret)
+			DRV_LOG(ERR, "Failed to free devx counters, %d", ret);
+		LIST_REMOVE(counter, next);
+		rte_free(counter->dcs);
+		rte_free(counter);
+	}
+}
+
+/**
  * Verify the @p attributes will be correctly understood by the NIC and store
  * them in the @p flow if everything is correct.
  *
@@ -965,7 +1076,7 @@ 
 			++actions_n;
 			break;
 		case RTE_FLOW_ACTION_TYPE_COUNT:
-			ret = mlx5_flow_validate_action_count(dev, attr, error);
+			ret = flow_dv_validate_action_count(dev, error);
 			if (ret < 0)
 				return ret;
 			action_flags |= MLX5_FLOW_ACTION_COUNT;
@@ -1902,6 +2013,9 @@ 
 		const struct rte_flow_action_queue *queue;
 		const struct rte_flow_action_rss *rss;
 		const struct rte_flow_action *action = actions;
+#ifdef HAVE_IBV_FLOW_DEVX_COUNTERS
+		const struct rte_flow_action_count *count = action->conf;
+#endif
 		const uint8_t *rss_key;
 
 		switch (actions->type) {
@@ -1950,6 +2064,37 @@ 
 			flow->rss.level = rss->level;
 			action_flags |= MLX5_FLOW_ACTION_RSS;
 			break;
+		case RTE_FLOW_ACTION_TYPE_COUNT:
+			if (!priv->config.devx) {
+				rte_errno = ENOTSUP;
+				goto cnt_err;
+			}
+			flow->counter =
+				flow_dv_counter_new(dev,
+						    count->shared, count->id);
+			if (flow->counter == NULL)
+				goto cnt_err;
+			dev_flow->dv.actions[actions_n].type =
+					MLX5DV_FLOW_ACTION_COUNTER_DEVX;
+			dev_flow->dv.actions[actions_n].obj =
+						flow->counter->dcs->obj;
+			action_flags |= MLX5_FLOW_ACTION_COUNT;
+			++actions_n;
+			break;
+cnt_err:
+			if (rte_errno == ENOTSUP)
+				return rte_flow_error_set
+					      (error, ENOTSUP,
+					       RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					       NULL,
+					       "count action not supported");
+			else
+				return rte_flow_error_set
+						(error, rte_errno,
+						 RTE_FLOW_ERROR_TYPE_ACTION,
+						 action,
+						 "cannot create counter"
+						  " object.");
 		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
 		case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
 			if (flow_dv_create_action_l2_encap(dev, actions,
@@ -2338,8 +2483,6 @@ 
 			dv->hrxq = NULL;
 		}
 	}
-	if (flow->counter)
-		flow->counter = NULL;
 }
 
 /**
@@ -2358,6 +2501,10 @@ 
 	if (!flow)
 		return;
 	flow_dv_remove(dev, flow);
+	if (flow->counter) {
+		flow_dv_counter_release(flow->counter);
+		flow->counter = NULL;
+	}
 	while (!LIST_EMPTY(&flow->dev_flows)) {
 		dev_flow = LIST_FIRST(&flow->dev_flows);
 		LIST_REMOVE(dev_flow, next);
@@ -2370,22 +2517,91 @@ 
 }
 
 /**
+ * Query a dv flow  rule for its statistics via devx.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ * @param[in] flow
+ *   Pointer to the sub flow.
+ * @param[out] data
+ *   data retrieved by the query.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_dv_query_count(struct rte_eth_dev *dev, struct rte_flow *flow,
+		    void *data, struct rte_flow_error *error)
+{
+	struct priv *priv = dev->data->dev_private;
+	struct rte_flow_query_count *qc = data;
+	uint64_t pkts = 0;
+	uint64_t bytes = 0;
+	int err;
+
+	if (!priv->config.devx)
+		return rte_flow_error_set(error, ENOTSUP,
+					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+					  NULL,
+					  "counters are not supported");
+	if (flow->counter) {
+		err = mlx5_devx_cmd_flow_counter_query
+						(flow->counter->dcs,
+						 qc->reset, &pkts, &bytes);
+		if (err)
+			return rte_flow_error_set
+				(error, err,
+				 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				 NULL,
+				 "cannot read counters");
+		qc->hits_set = 1;
+		qc->bytes_set = 1;
+		qc->hits = pkts - flow->counter->hits;
+		qc->bytes = bytes - flow->counter->bytes;
+		if (qc->reset) {
+			flow->counter->hits = pkts;
+			flow->counter->bytes = bytes;
+		}
+		return 0;
+	}
+	return rte_flow_error_set(error, EINVAL,
+				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+				  NULL,
+				  "counters are not available");
+}
+
+/**
  * Query a flow.
  *
  * @see rte_flow_query()
  * @see rte_flow_ops
  */
 static int
-flow_dv_query(struct rte_eth_dev *dev __rte_unused,
+flow_dv_query(struct rte_eth_dev *dev,
 	      struct rte_flow *flow __rte_unused,
 	      const struct rte_flow_action *actions __rte_unused,
 	      void *data __rte_unused,
 	      struct rte_flow_error *error __rte_unused)
 {
-	return rte_flow_error_set(error, ENOTSUP,
-				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-				  NULL,
-				  "flow query with DV is not supported");
+	int ret = -EINVAL;
+
+	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+		switch (actions->type) {
+		case RTE_FLOW_ACTION_TYPE_VOID:
+			break;
+		case RTE_FLOW_ACTION_TYPE_COUNT:
+			ret = flow_dv_query_count(dev, flow, data, error);
+			break;
+		default:
+			return rte_flow_error_set(error, ENOTSUP,
+						  RTE_FLOW_ERROR_TYPE_ACTION,
+						  actions,
+						  "action not supported");
+		}
+	}
+	return ret;
 }
 
 
diff --git a/drivers/net/mlx5/mlx5_glue.c b/drivers/net/mlx5/mlx5_glue.c
index 7d3d9d3..4078b54 100644
--- a/drivers/net/mlx5/mlx5_glue.c
+++ b/drivers/net/mlx5/mlx5_glue.c
@@ -65,6 +65,7 @@ 
 	return ibv_open_device(device);
 }
 
+
 static int
 mlx5_glue_close_device(struct ibv_context *context)
 {
diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
index 29742b1..812b4bf 100644
--- a/drivers/net/mlx5/mlx5_prm.h
+++ b/drivers/net/mlx5/mlx5_prm.h
@@ -368,6 +368,7 @@  struct mlx5_modification_cmd {
 #define __mlx5_dw_bit_off(typ, fld) (32 - __mlx5_bit_sz(typ, fld) - \
 				    (__mlx5_bit_off(typ, fld) & 0x1f))
 #define __mlx5_dw_off(typ, fld) (__mlx5_bit_off(typ, fld) / 32)
+#define __mlx5_64_off(typ, fld) (__mlx5_bit_off(typ, fld) / 64)
 #define __mlx5_dw_mask(typ, fld) (__mlx5_mask(typ, fld) << \
 				  __mlx5_dw_bit_off(typ, fld))
 #define __mlx5_mask(typ, fld) ((u32)((1ull << __mlx5_bit_sz(typ, fld)) - 1))
@@ -375,6 +376,7 @@  struct mlx5_modification_cmd {
 #define __mlx5_16_bit_off(typ, fld) (16 - __mlx5_bit_sz(typ, fld) - \
 				    (__mlx5_bit_off(typ, fld) & 0xf))
 #define __mlx5_mask16(typ, fld) ((u16)((1ull << __mlx5_bit_sz(typ, fld)) - 1))
+#define MLX5_ST_SZ_BYTES(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 8)
 #define MLX5_ST_SZ_DW(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 32)
 #define MLX5_ST_SZ_DB(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 8)
 #define MLX5_BYTE_OFF(typ, fld) (__mlx5_bit_off(typ, fld) / 8)
@@ -391,10 +393,16 @@  struct mlx5_modification_cmd {
 				 (((_v) & __mlx5_mask(typ, fld)) << \
 				   __mlx5_dw_bit_off(typ, fld))); \
 	} while (0)
+#define MLX5_GET(typ, p, fld) \
+	((rte_be_to_cpu_32(*((__be32 *)(p) +\
+	__mlx5_dw_off(typ, fld))) >> __mlx5_dw_bit_off(typ, fld)) & \
+	__mlx5_mask(typ, fld))
 #define MLX5_GET16(typ, p, fld) \
 	((rte_be_to_cpu_16(*((__be16 *)(p) + \
 	  __mlx5_16_off(typ, fld))) >> __mlx5_16_bit_off(typ, fld)) & \
 	 __mlx5_mask16(typ, fld))
+#define MLX5_GET64(typ, p, fld) rte_be_to_cpu_64(*((__be64 *)(p) + \
+						   __mlx5_64_off(typ, fld)))
 #define MLX5_FLD_SZ_BYTES(typ, fld) (__mlx5_bit_sz(typ, fld) / 8)
 
 struct mlx5_ifc_fte_match_set_misc_bits {
@@ -500,6 +508,69 @@  enum {
 	MLX5_MATCH_CRITERIA_ENABLE_MISC2_BIT
 };
 
+enum {
+	MLX5_CMD_OP_ALLOC_FLOW_COUNTER = 0x939,
+	MLX5_CMD_OP_QUERY_FLOW_COUNTER = 0x93b,
+};
+
+/* Flow counters. */
+struct mlx5_ifc_alloc_flow_counter_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+	u8         syndrome[0x20];
+	u8         flow_counter_id[0x20];
+	u8         reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_alloc_flow_counter_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+	u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_dealloc_flow_counter_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+	u8         syndrome[0x20];
+	u8         reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_dealloc_flow_counter_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+	u8         flow_counter_id[0x20];
+	u8         reserved_at_60[0x20];
+};
+
+struct mlx5_ifc_traffic_counter_bits {
+	u8         packets[0x40];
+	u8         octets[0x40];
+};
+
+struct mlx5_ifc_query_flow_counter_out_bits {
+	u8         status[0x8];
+	u8         reserved_at_8[0x18];
+	u8         syndrome[0x20];
+	u8         reserved_at_40[0x40];
+	struct mlx5_ifc_traffic_counter_bits flow_statistics[];
+};
+
+struct mlx5_ifc_query_flow_counter_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_at_10[0x10];
+	u8         reserved_at_20[0x10];
+	u8         op_mod[0x10];
+	u8         reserved_at_40[0x80];
+	u8         clear[0x1];
+	u8         reserved_at_c1[0xf];
+	u8         num_of_counters[0x10];
+	u8         flow_counter_id[0x20];
+};
+
 /* CQE format mask. */
 #define MLX5E_CQE_FORMAT_MASK 0xc
 
@@ -581,4 +652,19 @@  struct mlx5_mini_cqe8 {
 #endif
 }
 
+
+/* devx counyter object */
+struct mlx5_devx_counter_set {
+	struct mlx5dv_devx_obj *obj;
+	int id; /* Flow counter ID */
+};
+
+/* mlx5_devx_cmds.c */
+
+int mlx5_devx_cmd_flow_counter_alloc(struct ibv_context *ctx,
+				     struct mlx5_devx_counter_set *dcx);
+int mlx5_devx_cmd_flow_counter_free(struct mlx5dv_devx_obj *obj);
+int mlx5_devx_cmd_flow_counter_query(struct mlx5_devx_counter_set *dcx,
+				     int clear,
+				     uint64_t *pkts, uint64_t *bytes);
 #endif /* RTE_PMD_MLX5_PRM_H_ */