[v1,5/5] net/iavf: query QoS cap and set queue TC mapping

Message ID 20210601014034.36100-6-ting.xu@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Qi Zhang
Headers
Series Enable ETS-based Tx QoS for VF in DCF |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation success Compilation OK
ci/iol-intel-Functional fail Functional Testing issues
ci/intel-Testing fail Testing issues
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-abi-testing success Testing PASS
ci/github-robot success github build: passed
ci/iol-testing fail Testing issues

Commit Message

Xu, Ting June 1, 2021, 1:40 a.m. UTC
  This patch added the support for VF to config the ETS-based Tx QoS,
including querying current QoS configuration from PF and config queue TC
mapping. PF QoS is configured in advance and the queried info is
provided to the user for future usage. VF queues are mapped to different
TCs in PF through virtchnl.

Signed-off-by: Qiming Yang <qiming.yang@intel.com>
Signed-off-by: Ting Xu <ting.xu@intel.com>
---
 drivers/net/iavf/iavf.h        |  45 +++
 drivers/net/iavf/iavf_ethdev.c |  31 ++
 drivers/net/iavf/iavf_tm.c     | 675 +++++++++++++++++++++++++++++++++
 drivers/net/iavf/iavf_vchnl.c  |  56 ++-
 drivers/net/iavf/meson.build   |   1 +
 5 files changed, 807 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/iavf/iavf_tm.c
  

Patch

diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h
index 4f5811ae87..77ddf15f42 100644
--- a/drivers/net/iavf/iavf.h
+++ b/drivers/net/iavf/iavf.h
@@ -6,6 +6,8 @@ 
 #define _IAVF_ETHDEV_H_
 
 #include <rte_kvargs.h>
+#include <rte_tm_driver.h>
+
 #include <iavf_prototype.h>
 #include <iavf_adminq_cmd.h>
 #include <iavf_type.h>
@@ -82,6 +84,8 @@ 
 #define IAVF_RX_DESC_EXT_STATUS_FLEXBH_MASK  0x03
 #define IAVF_RX_DESC_EXT_STATUS_FLEXBH_FD_ID 0x01
 
+#define IAVF_BITS_PER_BYTE 8
+
 struct iavf_adapter;
 struct iavf_rx_queue;
 struct iavf_tx_queue;
@@ -129,6 +133,38 @@  enum iavf_aq_result {
 	IAVF_MSG_CMD,      /* Read async command result */
 };
 
+/* Struct to store Traffic Manager node configuration. */
+struct iavf_tm_node {
+	TAILQ_ENTRY(iavf_tm_node) node;
+	uint32_t id;
+	uint32_t tc;
+	uint32_t priority;
+	uint32_t weight;
+	uint32_t reference_count;
+	struct iavf_tm_node *parent;
+	struct rte_tm_node_params params;
+};
+
+TAILQ_HEAD(iavf_tm_node_list, iavf_tm_node);
+
+/* node type of Traffic Manager */
+enum iavf_tm_node_type {
+	IAVF_TM_NODE_TYPE_PORT,
+	IAVF_TM_NODE_TYPE_TC,
+	IAVF_TM_NODE_TYPE_QUEUE,
+	IAVF_TM_NODE_TYPE_MAX,
+};
+
+/* Struct to store all the Traffic Manager configuration. */
+struct iavf_tm_conf {
+	struct iavf_tm_node *root; /* root node - vf vsi */
+	struct iavf_tm_node_list tc_list; /* node list for all the TCs */
+	struct iavf_tm_node_list queue_list; /* node list for all the queues */
+	uint32_t nb_tc_node;
+	uint32_t nb_queue_node;
+	bool committed;
+};
+
 /* Structure to store private data specific for VF instance. */
 struct iavf_info {
 	uint16_t num_queue_pairs;
@@ -175,6 +211,9 @@  struct iavf_info {
 	struct iavf_fdir_info fdir; /* flow director info */
 	/* indicate large VF support enabled or not */
 	bool lv_enabled;
+
+	struct virtchnl_qos_cap_list *qos_cap;
+	struct iavf_tm_conf tm_conf;
 };
 
 #define IAVF_MAX_PKT_TYPE 1024
@@ -344,4 +383,10 @@  int iavf_add_del_mc_addr_list(struct iavf_adapter *adapter,
 			uint32_t mc_addrs_num, bool add);
 int iavf_request_queues(struct iavf_adapter *adapter, uint16_t num);
 int iavf_get_max_rss_queue_region(struct iavf_adapter *adapter);
+int iavf_get_qos_cap(struct iavf_adapter *adapter);
+int iavf_set_q_tc_map(struct rte_eth_dev *dev,
+			struct virtchnl_queue_tc_mapping *q_tc_mapping,
+			uint16_t size);
+void iavf_tm_conf_init(struct rte_eth_dev *dev);
+extern const struct rte_tm_ops iavf_tm_ops;
 #endif /* _IAVF_ETHDEV_H_ */
diff --git a/drivers/net/iavf/iavf_ethdev.c b/drivers/net/iavf/iavf_ethdev.c
index cb38fe81e1..e0a03a0bee 100644
--- a/drivers/net/iavf/iavf_ethdev.c
+++ b/drivers/net/iavf/iavf_ethdev.c
@@ -122,6 +122,7 @@  static int iavf_dev_flow_ops_get(struct rte_eth_dev *dev,
 static int iavf_set_mc_addr_list(struct rte_eth_dev *dev,
 			struct rte_ether_addr *mc_addrs,
 			uint32_t mc_addrs_num);
+static int iavf_tm_ops_get(struct rte_eth_dev *dev __rte_unused, void *arg);
 
 static const struct rte_pci_id pci_id_iavf_map[] = {
 	{ RTE_PCI_DEVICE(IAVF_INTEL_VENDOR_ID, IAVF_DEV_ID_ADAPTIVE_VF) },
@@ -200,8 +201,21 @@  static const struct eth_dev_ops iavf_eth_dev_ops = {
 	.flow_ops_get               = iavf_dev_flow_ops_get,
 	.tx_done_cleanup	    = iavf_dev_tx_done_cleanup,
 	.get_monitor_addr           = iavf_get_monitor_addr,
+	.tm_ops_get                 = iavf_tm_ops_get,
 };
 
+static int
+iavf_tm_ops_get(struct rte_eth_dev *dev __rte_unused,
+			void *arg)
+{
+	if (!arg)
+		return -EINVAL;
+
+	*(const void **)arg = &iavf_tm_ops;
+
+	return 0;
+}
+
 static int
 iavf_set_mc_addr_list(struct rte_eth_dev *dev,
 			struct rte_ether_addr *mc_addrs,
@@ -806,6 +820,11 @@  iavf_dev_start(struct rte_eth_dev *dev)
 				      dev->data->nb_tx_queues);
 	num_queue_pairs = vf->num_queue_pairs;
 
+	if (iavf_get_qos_cap(adapter)) {
+		PMD_INIT_LOG(ERR, "Failed to get qos capability");
+		return -1;
+	}
+
 	if (iavf_init_queues(dev) != 0) {
 		PMD_DRV_LOG(ERR, "failed to do Queue init");
 		return -1;
@@ -2090,6 +2109,15 @@  iavf_init_vf(struct rte_eth_dev *dev)
 		PMD_INIT_LOG(ERR, "unable to allocate vf_res memory");
 		goto err_api;
 	}
+
+	bufsz = sizeof(struct virtchnl_qos_cap_list) +
+		IAVF_MAX_TRAFFIC_CLASS * sizeof(struct virtchnl_qos_cap_elem);
+	vf->qos_cap = rte_zmalloc("qos_cap", bufsz, 0);
+	if (!vf->qos_cap) {
+		PMD_INIT_LOG(ERR, "unable to allocate qos_cap memory");
+		goto err_api;
+	}
+
 	if (iavf_get_vf_resource(adapter) != 0) {
 		PMD_INIT_LOG(ERR, "iavf_get_vf_config failed");
 		goto err_alloc;
@@ -2131,6 +2159,7 @@  iavf_init_vf(struct rte_eth_dev *dev)
 	rte_free(vf->rss_key);
 	rte_free(vf->rss_lut);
 err_alloc:
+	rte_free(vf->qos_cap);
 	rte_free(vf->vf_res);
 	vf->vsi_res = NULL;
 err_api:
@@ -2299,6 +2328,8 @@  iavf_dev_init(struct rte_eth_dev *eth_dev)
 
 	iavf_default_rss_disable(adapter);
 
+	iavf_tm_conf_init(eth_dev);
+
 	return 0;
 }
 
diff --git a/drivers/net/iavf/iavf_tm.c b/drivers/net/iavf/iavf_tm.c
new file mode 100644
index 0000000000..b8e11cbe84
--- /dev/null
+++ b/drivers/net/iavf/iavf_tm.c
@@ -0,0 +1,675 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2017 Intel Corporation
+ */
+#include <rte_tm_driver.h>
+
+#include "iavf.h"
+
+static int iavf_hierarchy_commit(struct rte_eth_dev *dev,
+				 __rte_unused int clear_on_fail,
+				 __rte_unused struct rte_tm_error *error);
+static int iavf_tm_node_add(struct rte_eth_dev *dev, uint32_t node_id,
+	      uint32_t parent_node_id, uint32_t priority,
+	      uint32_t weight, uint32_t level_id,
+	      struct rte_tm_node_params *params,
+	      struct rte_tm_error *error);
+static int iavf_tm_node_delete(struct rte_eth_dev *dev, uint32_t node_id,
+			    struct rte_tm_error *error);
+static int iavf_tm_capabilities_get(struct rte_eth_dev *dev,
+			 struct rte_tm_capabilities *cap,
+			 struct rte_tm_error *error);
+static int iavf_level_capabilities_get(struct rte_eth_dev *dev,
+			    uint32_t level_id,
+			    struct rte_tm_level_capabilities *cap,
+			    struct rte_tm_error *error);
+static int iavf_node_capabilities_get(struct rte_eth_dev *dev,
+				      uint32_t node_id,
+				      struct rte_tm_node_capabilities *cap,
+				      struct rte_tm_error *error);
+static int iavf_node_type_get(struct rte_eth_dev *dev, uint32_t node_id,
+		   int *is_leaf, struct rte_tm_error *error);
+
+const struct rte_tm_ops iavf_tm_ops = {
+	.node_add = iavf_tm_node_add,
+	.node_delete = iavf_tm_node_delete,
+	.capabilities_get = iavf_tm_capabilities_get,
+	.level_capabilities_get = iavf_level_capabilities_get,
+	.node_capabilities_get = iavf_node_capabilities_get,
+	.node_type_get = iavf_node_type_get,
+	.hierarchy_commit = iavf_hierarchy_commit,
+};
+
+void
+iavf_tm_conf_init(struct rte_eth_dev *dev)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+
+	/* initialize node configuration */
+	vf->tm_conf.root = NULL;
+	TAILQ_INIT(&vf->tm_conf.tc_list);
+	TAILQ_INIT(&vf->tm_conf.queue_list);
+	vf->tm_conf.nb_tc_node = 0;
+	vf->tm_conf.nb_queue_node = 0;
+	vf->tm_conf.committed = false;
+}
+
+
+static inline struct iavf_tm_node *
+iavf_tm_node_search(struct rte_eth_dev *dev,
+		    uint32_t node_id, enum iavf_tm_node_type *node_type)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	struct iavf_tm_node_list *tc_list = &vf->tm_conf.tc_list;
+	struct iavf_tm_node_list *queue_list = &vf->tm_conf.queue_list;
+	struct iavf_tm_node *tm_node;
+
+	if (vf->tm_conf.root && vf->tm_conf.root->id == node_id) {
+		*node_type = IAVF_TM_NODE_TYPE_PORT;
+		return vf->tm_conf.root;
+	}
+
+	TAILQ_FOREACH(tm_node, tc_list, node) {
+		if (tm_node->id == node_id) {
+			*node_type = IAVF_TM_NODE_TYPE_TC;
+			return tm_node;
+		}
+	}
+
+	TAILQ_FOREACH(tm_node, queue_list, node) {
+		if (tm_node->id == node_id) {
+			*node_type = IAVF_TM_NODE_TYPE_QUEUE;
+			return tm_node;
+		}
+	}
+
+	return NULL;
+}
+
+static int
+iavf_node_param_check(struct iavf_info *vf, uint32_t node_id,
+		      uint32_t priority, uint32_t weight,
+		      struct rte_tm_node_params *params,
+		      struct rte_tm_error *error)
+{
+	/* checked all the unsupported parameter */
+	if (node_id == RTE_TM_NODE_ID_NULL) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "invalid node id";
+		return -EINVAL;
+	}
+
+	if (priority) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PRIORITY;
+		error->message = "priority should be 0";
+		return -EINVAL;
+	}
+
+	if (weight != 1) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_WEIGHT;
+		error->message = "weight must be 1";
+		return -EINVAL;
+	}
+
+	/* not support shaper profile */
+	if (params->shaper_profile_id) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_SHAPER_PROFILE_ID;
+		error->message = "shaper profile not supported";
+		return -EINVAL;
+	}
+
+	/* not support shared shaper */
+	if (params->shared_shaper_id) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_SHARED_SHAPER_ID;
+		error->message = "shared shaper not supported";
+		return -EINVAL;
+	}
+	if (params->n_shared_shapers) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_SHAPERS;
+		error->message = "shared shaper not supported";
+		return -EINVAL;
+	}
+
+	/* for non-leaf node */
+	if (node_id >= vf->num_queue_pairs) {
+		if (params->nonleaf.wfq_weight_mode) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_WFQ_WEIGHT_MODE;
+			error->message = "WFQ not supported";
+			return -EINVAL;
+		}
+		if (params->nonleaf.n_sp_priorities != 1) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SP_PRIORITIES;
+			error->message = "SP priority not supported";
+			return -EINVAL;
+		} else if (params->nonleaf.wfq_weight_mode &&
+			   !(*params->nonleaf.wfq_weight_mode)) {
+			error->type =
+				RTE_TM_ERROR_TYPE_NODE_PARAMS_WFQ_WEIGHT_MODE;
+			error->message = "WFP should be byte mode";
+			return -EINVAL;
+		}
+
+		return 0;
+	}
+
+	/* for leaf node */
+	if (params->leaf.cman) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS_CMAN;
+		error->message = "Congestion management not supported";
+		return -EINVAL;
+	}
+	if (params->leaf.wred.wred_profile_id !=
+	    RTE_TM_WRED_PROFILE_ID_NONE) {
+		error->type =
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_WRED_PROFILE_ID;
+		error->message = "WRED not supported";
+		return -EINVAL;
+	}
+	if (params->leaf.wred.shared_wred_context_id) {
+		error->type =
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_SHARED_WRED_CONTEXT_ID;
+		error->message = "WRED not supported";
+		return -EINVAL;
+	}
+	if (params->leaf.wred.n_shared_wred_contexts) {
+		error->type =
+			RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_WRED_CONTEXTS;
+		error->message = "WRED not supported";
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+iavf_node_type_get(struct rte_eth_dev *dev, uint32_t node_id,
+		   int *is_leaf, struct rte_tm_error *error)
+{
+	enum iavf_tm_node_type node_type = IAVF_TM_NODE_TYPE_MAX;
+	struct iavf_tm_node *tm_node;
+
+	if (!is_leaf || !error)
+		return -EINVAL;
+
+	if (node_id == RTE_TM_NODE_ID_NULL) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "invalid node id";
+		return -EINVAL;
+	}
+
+	/* check if the node id exists */
+	tm_node = iavf_tm_node_search(dev, node_id, &node_type);
+	if (!tm_node) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "no such node";
+		return -EINVAL;
+	}
+
+	if (node_type == IAVF_TM_NODE_TYPE_QUEUE)
+		*is_leaf = true;
+	else
+		*is_leaf = false;
+
+	return 0;
+}
+
+static int
+iavf_tm_node_add(struct rte_eth_dev *dev, uint32_t node_id,
+	      uint32_t parent_node_id, uint32_t priority,
+	      uint32_t weight, uint32_t level_id,
+	      struct rte_tm_node_params *params,
+	      struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	enum iavf_tm_node_type node_type = IAVF_TM_NODE_TYPE_MAX;
+	enum iavf_tm_node_type parent_node_type = IAVF_TM_NODE_TYPE_MAX;
+	struct iavf_tm_node *tm_node;
+	struct iavf_tm_node *parent_node;
+	uint16_t tc_nb = vf->qos_cap->num_elem;
+	int ret;
+
+	if (!params || !error)
+		return -EINVAL;
+
+	/* if already committed */
+	if (vf->tm_conf.committed) {
+		error->type = RTE_TM_ERROR_TYPE_UNSPECIFIED;
+		error->message = "already committed";
+		return -EINVAL;
+	}
+
+	ret = iavf_node_param_check(vf, node_id, priority, weight,
+				    params, error);
+	if (ret)
+		return ret;
+
+	/* check if the node is already existed */
+	if (iavf_tm_node_search(dev, node_id, &node_type)) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "node id already used";
+		return -EINVAL;
+	}
+
+	/* root node if not have a parent */
+	if (parent_node_id == RTE_TM_NODE_ID_NULL) {
+		/* check level */
+		if (level_id != IAVF_TM_NODE_TYPE_PORT) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
+			error->message = "Wrong level";
+			return -EINVAL;
+		}
+
+		/* obviously no more than one root */
+		if (vf->tm_conf.root) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID;
+			error->message = "already have a root";
+			return -EINVAL;
+		}
+
+		/* add the root node */
+		tm_node = rte_zmalloc("iavf_tm_node",
+				      sizeof(struct iavf_tm_node),
+				      0);
+		if (!tm_node)
+			return -ENOMEM;
+		tm_node->id = node_id;
+		tm_node->parent = NULL;
+		tm_node->reference_count = 0;
+		rte_memcpy(&tm_node->params, params,
+				 sizeof(struct rte_tm_node_params));
+		vf->tm_conf.root = tm_node;
+		return 0;
+	}
+
+	/* TC or queue node */
+	/* check the parent node */
+	parent_node = iavf_tm_node_search(dev, parent_node_id,
+					  &parent_node_type);
+	if (!parent_node) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID;
+		error->message = "parent not exist";
+		return -EINVAL;
+	}
+	if (parent_node_type != IAVF_TM_NODE_TYPE_PORT &&
+	    parent_node_type != IAVF_TM_NODE_TYPE_TC) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID;
+		error->message = "parent is not root or TC";
+		return -EINVAL;
+	}
+	/* check level */
+	if (level_id != RTE_TM_NODE_LEVEL_ID_ANY &&
+	    level_id != parent_node_type + 1) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
+		error->message = "Wrong level";
+		return -EINVAL;
+	}
+
+	/* check the node number */
+	if (parent_node_type == IAVF_TM_NODE_TYPE_PORT) {
+		/* check the TC number */
+		if (vf->tm_conf.nb_tc_node >= tc_nb) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+			error->message = "too many TCs";
+			return -EINVAL;
+		}
+	} else {
+		/* check the queue number */
+		if (parent_node->reference_count >= vf->num_queue_pairs) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+			error->message = "too many queues";
+			return -EINVAL;
+		}
+		if (node_id >= vf->num_queue_pairs) {
+			error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+			error->message = "too large queue id";
+			return -EINVAL;
+		}
+	}
+
+	/* add the TC or queue node */
+	tm_node = rte_zmalloc("iavf_tm_node",
+			      sizeof(struct iavf_tm_node),
+			      0);
+	if (!tm_node)
+		return -ENOMEM;
+	tm_node->id = node_id;
+	tm_node->reference_count = 0;
+	tm_node->parent = parent_node;
+	rte_memcpy(&tm_node->params, params,
+			 sizeof(struct rte_tm_node_params));
+	if (parent_node_type == IAVF_TM_NODE_TYPE_PORT) {
+		TAILQ_INSERT_TAIL(&vf->tm_conf.tc_list,
+				  tm_node, node);
+		tm_node->tc = vf->tm_conf.nb_tc_node;
+		vf->tm_conf.nb_tc_node++;
+	} else {
+		TAILQ_INSERT_TAIL(&vf->tm_conf.queue_list,
+				  tm_node, node);
+		tm_node->tc = parent_node->tc;
+		vf->tm_conf.nb_queue_node++;
+	}
+	tm_node->parent->reference_count++;
+
+	return 0;
+}
+
+static int
+iavf_tm_node_delete(struct rte_eth_dev *dev, uint32_t node_id,
+		 struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	enum iavf_tm_node_type node_type = IAVF_TM_NODE_TYPE_MAX;
+	struct iavf_tm_node *tm_node;
+
+	if (!error)
+		return -EINVAL;
+
+	/* if already committed */
+	if (vf->tm_conf.committed) {
+		error->type = RTE_TM_ERROR_TYPE_UNSPECIFIED;
+		error->message = "already committed";
+		return -EINVAL;
+	}
+
+	if (node_id == RTE_TM_NODE_ID_NULL) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "invalid node id";
+		return -EINVAL;
+	}
+
+	/* check if the node id exists */
+	tm_node = iavf_tm_node_search(dev, node_id, &node_type);
+	if (!tm_node) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "no such node";
+		return -EINVAL;
+	}
+
+	/* the node should have no child */
+	if (tm_node->reference_count) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message =
+			"cannot delete a node which has children";
+		return -EINVAL;
+	}
+
+	/* root node */
+	if (node_type == IAVF_TM_NODE_TYPE_PORT) {
+		rte_free(tm_node);
+		vf->tm_conf.root = NULL;
+		return 0;
+	}
+
+	/* TC or queue node */
+	tm_node->parent->reference_count--;
+	if (node_type == IAVF_TM_NODE_TYPE_TC) {
+		TAILQ_REMOVE(&vf->tm_conf.tc_list, tm_node, node);
+		vf->tm_conf.nb_tc_node--;
+	} else {
+		TAILQ_REMOVE(&vf->tm_conf.queue_list, tm_node, node);
+		vf->tm_conf.nb_queue_node--;
+	}
+	rte_free(tm_node);
+
+	return 0;
+}
+
+static int
+iavf_tm_capabilities_get(struct rte_eth_dev *dev,
+			 struct rte_tm_capabilities *cap,
+			 struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	uint16_t tc_nb = vf->qos_cap->num_elem;
+
+	if (!cap || !error)
+		return -EINVAL;
+
+	if (tc_nb > vf->vf_res->num_queue_pairs)
+		return -EINVAL;
+
+	error->type = RTE_TM_ERROR_TYPE_NONE;
+
+	/* set all the parameters to 0 first. */
+	memset(cap, 0, sizeof(struct rte_tm_capabilities));
+
+	/**
+	 * support port + TCs + queues
+	 * here shows the max capability not the current configuration.
+	 */
+	cap->n_nodes_max = 1 + IAVF_MAX_TRAFFIC_CLASS
+		+ vf->num_queue_pairs;
+	cap->n_levels_max = 3; /* port, TC, queue */
+	cap->non_leaf_nodes_identical = 1;
+	cap->leaf_nodes_identical = 1;
+	cap->shaper_n_max = cap->n_nodes_max;
+	cap->shaper_private_n_max = cap->n_nodes_max;
+	cap->shaper_private_dual_rate_n_max = 0;
+	cap->shaper_private_rate_min = 0;
+	/* GBps */
+	cap->shaper_private_rate_max =
+		vf->link_speed * 1000 / IAVF_BITS_PER_BYTE;
+	cap->shaper_private_packet_mode_supported = 0;
+	cap->shaper_private_byte_mode_supported = 1;
+	cap->shaper_shared_n_max = 0;
+	cap->shaper_shared_n_nodes_per_shaper_max = 0;
+	cap->shaper_shared_n_shapers_per_node_max = 0;
+	cap->shaper_shared_dual_rate_n_max = 0;
+	cap->shaper_shared_rate_min = 0;
+	cap->shaper_shared_rate_max = 0;
+	cap->shaper_shared_packet_mode_supported = 0;
+	cap->shaper_shared_byte_mode_supported = 0;
+	cap->sched_n_children_max = vf->num_queue_pairs;
+	cap->sched_sp_n_priorities_max = 1;
+	cap->sched_wfq_n_children_per_group_max = 0;
+	cap->sched_wfq_n_groups_max = 0;
+	cap->sched_wfq_weight_max = 1;
+	cap->sched_wfq_packet_mode_supported = 0;
+	cap->sched_wfq_byte_mode_supported = 0;
+	cap->cman_head_drop_supported = 0;
+	cap->dynamic_update_mask = 0;
+	cap->shaper_pkt_length_adjust_min = RTE_TM_ETH_FRAMING_OVERHEAD;
+	cap->shaper_pkt_length_adjust_max = RTE_TM_ETH_FRAMING_OVERHEAD_FCS;
+	cap->cman_wred_context_n_max = 0;
+	cap->cman_wred_context_private_n_max = 0;
+	cap->cman_wred_context_shared_n_max = 0;
+	cap->cman_wred_context_shared_n_nodes_per_context_max = 0;
+	cap->cman_wred_context_shared_n_contexts_per_node_max = 0;
+	cap->stats_mask = 0;
+
+	return 0;
+}
+
+static int
+iavf_level_capabilities_get(struct rte_eth_dev *dev,
+			    uint32_t level_id,
+			    struct rte_tm_level_capabilities *cap,
+			    struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+
+	if (!cap || !error)
+		return -EINVAL;
+
+	if (level_id >= IAVF_TM_NODE_TYPE_MAX) {
+		error->type = RTE_TM_ERROR_TYPE_LEVEL_ID;
+		error->message = "too deep level";
+		return -EINVAL;
+	}
+
+	/* root node */
+	if (level_id == IAVF_TM_NODE_TYPE_PORT) {
+		cap->n_nodes_max = 1;
+		cap->n_nodes_nonleaf_max = 1;
+		cap->n_nodes_leaf_max = 0;
+	} else if (level_id == IAVF_TM_NODE_TYPE_TC) {
+		/* TC */
+		cap->n_nodes_max = IAVF_MAX_TRAFFIC_CLASS;
+		cap->n_nodes_nonleaf_max = IAVF_MAX_TRAFFIC_CLASS;
+		cap->n_nodes_leaf_max = 0;
+	} else {
+		/* queue */
+		cap->n_nodes_max = vf->num_queue_pairs;
+		cap->n_nodes_nonleaf_max = 0;
+		cap->n_nodes_leaf_max = vf->num_queue_pairs;
+	}
+
+	cap->non_leaf_nodes_identical = true;
+	cap->leaf_nodes_identical = true;
+
+	if (level_id != IAVF_TM_NODE_TYPE_QUEUE) {
+		cap->nonleaf.shaper_private_supported = false;
+		cap->nonleaf.shaper_private_dual_rate_supported = false;
+		cap->nonleaf.shaper_private_rate_min = 0;
+		/* GBps */
+		cap->nonleaf.shaper_private_rate_max =
+			vf->link_speed * 1000 / IAVF_BITS_PER_BYTE;
+		cap->nonleaf.shaper_private_packet_mode_supported = 0;
+		cap->nonleaf.shaper_private_byte_mode_supported = 1;
+		cap->nonleaf.shaper_shared_n_max = 0;
+		cap->nonleaf.shaper_shared_packet_mode_supported = 0;
+		cap->nonleaf.shaper_shared_byte_mode_supported = 0;
+		if (level_id == IAVF_TM_NODE_TYPE_PORT)
+			cap->nonleaf.sched_n_children_max =
+				IAVF_MAX_TRAFFIC_CLASS;
+		else
+			cap->nonleaf.sched_n_children_max =
+				vf->num_queue_pairs;
+		cap->nonleaf.sched_sp_n_priorities_max = 1;
+		cap->nonleaf.sched_wfq_n_children_per_group_max = 0;
+		cap->nonleaf.sched_wfq_n_groups_max = 0;
+		cap->nonleaf.sched_wfq_weight_max = 1;
+		cap->nonleaf.sched_wfq_packet_mode_supported = 0;
+		cap->nonleaf.sched_wfq_byte_mode_supported = 0;
+		cap->nonleaf.stats_mask = 0;
+
+		return 0;
+	}
+
+	/* queue node */
+	cap->leaf.shaper_private_supported = false;
+	cap->leaf.shaper_private_dual_rate_supported = false;
+	cap->leaf.shaper_private_rate_min = 0;
+	/* GBps */
+	cap->leaf.shaper_private_rate_max =
+		vf->link_speed * 1000 / IAVF_BITS_PER_BYTE;;
+	cap->leaf.shaper_private_packet_mode_supported = 0;
+	cap->leaf.shaper_private_byte_mode_supported = 1;
+	cap->leaf.shaper_shared_n_max = 0;
+	cap->leaf.shaper_shared_packet_mode_supported = 0;
+	cap->leaf.shaper_shared_byte_mode_supported = 0;
+	cap->leaf.cman_head_drop_supported = false;
+	cap->leaf.cman_wred_context_private_supported = true;
+	cap->leaf.cman_wred_context_shared_n_max = 0;
+	cap->leaf.stats_mask = 0;
+
+	return 0;
+}
+
+static int
+iavf_node_capabilities_get(struct rte_eth_dev *dev,
+			   uint32_t node_id,
+			   struct rte_tm_node_capabilities *cap,
+			   struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	enum iavf_tm_node_type node_type;
+	struct virtchnl_qos_cap_elem tc_cap;
+	struct iavf_tm_node *tm_node;
+
+	if (!cap || !error)
+		return -EINVAL;
+
+	if (node_id == RTE_TM_NODE_ID_NULL) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "invalid node id";
+		return -EINVAL;
+	}
+
+	/* check if the node id exists */
+	tm_node = iavf_tm_node_search(dev, node_id, &node_type);
+	if (!tm_node) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_ID;
+		error->message = "no such node";
+		return -EINVAL;
+	}
+
+	if (node_type != IAVF_TM_NODE_TYPE_TC) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
+		error->message = "not support capability get";
+		return -EINVAL;
+	}
+
+	tc_cap = vf->qos_cap->cap[tm_node->tc];
+	if (tc_cap.tc_id != tm_node->tc) {
+		error->type = RTE_TM_ERROR_TYPE_NODE_PARAMS;
+		error->message = "tc not match";
+		return -EINVAL;
+	}
+
+	cap->shaper_private_supported = true;
+	cap->shaper_private_dual_rate_supported = false;
+	cap->shaper_private_rate_min = tc_cap.shaper.committed;
+	cap->shaper_private_rate_max = tc_cap.shaper.peak;
+	cap->shaper_shared_n_max = 0;
+	cap->nonleaf.sched_n_children_max = vf->num_queue_pairs;
+
+	if (tc_cap.arbiter == VIRTCHNL_ABITER_ETS) {
+		cap->nonleaf.sched_sp_n_priorities_max = 1;
+		cap->nonleaf.sched_wfq_n_children_per_group_max =
+			vf->num_queue_pairs;
+		cap->nonleaf.sched_wfq_n_groups_max = 1;
+		cap->nonleaf.sched_wfq_weight_max = tc_cap.weight;
+	}
+
+	if (tc_cap.arbiter == VIRTCHNL_ABITER_STRICT) {
+		cap->nonleaf.sched_sp_n_priorities_max = 1;
+		cap->nonleaf.sched_wfq_n_children_per_group_max = 0;
+		cap->nonleaf.sched_wfq_n_groups_max = 0;
+		cap->nonleaf.sched_wfq_weight_max = 1;
+	}
+
+	cap->stats_mask = 0;
+
+	return 0;
+}
+
+static int iavf_hierarchy_commit(struct rte_eth_dev *dev,
+				 __rte_unused int clear_on_fail,
+				 __rte_unused struct rte_tm_error *error)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	struct virtchnl_queue_tc_mapping *q_tc_mapping;
+	struct iavf_tm_node_list *queue_list = &vf->tm_conf.queue_list;
+	struct iavf_tm_node *tm_node;
+	uint16_t size;
+	int index = 0, node_committed = 0;
+	int ret, i;
+
+	size = sizeof(*q_tc_mapping) + sizeof(q_tc_mapping->tc[0]) *
+		(vf->qos_cap->num_elem - 1);
+	q_tc_mapping = rte_zmalloc("q_tc", size, 0);
+	q_tc_mapping->vsi_id = vf->vsi.vsi_id;
+	q_tc_mapping->num_tc = vf->qos_cap->num_elem;
+	q_tc_mapping->num_queue_pairs = vf->num_queue_pairs;
+	TAILQ_FOREACH(tm_node, queue_list, node) {
+		q_tc_mapping->tc[tm_node->tc].req.queue_count++;
+		node_committed++;
+	}
+
+	for (i = 0; i < IAVF_MAX_TRAFFIC_CLASS; i++) {
+		q_tc_mapping->tc[i].req.start_queue_id = index;
+		index += q_tc_mapping->tc[i].req.queue_count;
+	}
+	if (node_committed < vf->num_queue_pairs) {
+		PMD_DRV_LOG(ERR, "queue node is less than allocated queue pairs");
+		return IAVF_ERR_PARAM;
+	}
+
+	ret = iavf_set_q_tc_map(dev, q_tc_mapping, size);
+	if (ret)
+		return ret;
+
+	return IAVF_SUCCESS;
+}
diff --git a/drivers/net/iavf/iavf_vchnl.c b/drivers/net/iavf/iavf_vchnl.c
index 5d57e8b541..daa1b3755c 100644
--- a/drivers/net/iavf/iavf_vchnl.c
+++ b/drivers/net/iavf/iavf_vchnl.c
@@ -467,7 +467,8 @@  iavf_get_vf_resource(struct iavf_adapter *adapter)
 		VIRTCHNL_VF_OFFLOAD_REQ_QUEUES |
 		VIRTCHNL_VF_OFFLOAD_CRC |
 		VIRTCHNL_VF_OFFLOAD_VLAN_V2 |
-		VIRTCHNL_VF_LARGE_NUM_QPAIRS;
+		VIRTCHNL_VF_LARGE_NUM_QPAIRS |
+		VIRTCHNL_VF_OFFLOAD_TC;
 
 	args.in_args = (uint8_t *)&caps;
 	args.in_args_size = sizeof(caps);
@@ -1550,6 +1551,59 @@  iavf_set_hena(struct iavf_adapter *adapter, uint64_t hena)
 	return err;
 }
 
+int
+iavf_get_qos_cap(struct iavf_adapter *adapter)
+{
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter);
+	struct iavf_cmd_info args;
+	uint32_t len;
+	int err;
+
+	args.ops = VIRTCHNL_OP_GET_QOS_CAPS;
+	args.in_args = NULL;
+	args.in_args_size = 0;
+	args.out_buffer = vf->aq_resp;
+	args.out_size = IAVF_AQ_BUF_SZ;
+	err = iavf_execute_vf_cmd(adapter, &args);
+
+	if (err) {
+		PMD_DRV_LOG(ERR,
+			    "Failed to execute command of OP_GET_VF_RESOURCE");
+		return -1;
+	}
+
+	len =  sizeof(struct virtchnl_qos_cap_list) +
+		IAVF_MAX_TRAFFIC_CLASS * sizeof(struct virtchnl_qos_cap_elem);
+
+	rte_memcpy(vf->qos_cap, args.out_buffer,
+		   RTE_MIN(args.out_size, len));
+
+	return 0;
+}
+
+int iavf_set_q_tc_map(struct rte_eth_dev *dev,
+		struct virtchnl_queue_tc_mapping *q_tc_mapping, uint16_t size)
+{
+	struct iavf_adapter *adapter =
+			IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+	struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
+	struct iavf_cmd_info args;
+	int err;
+
+	memset(&args, 0, sizeof(args));
+	args.ops = VIRTCHNL_OP_CONFIG_TC_MAP;
+	args.in_args = (uint8_t *)q_tc_mapping;
+	args.in_args_size = size;
+	args.out_buffer = vf->aq_resp;
+	args.out_size = IAVF_AQ_BUF_SZ;
+
+	err = iavf_execute_vf_cmd(adapter, &args);
+	if (err)
+		PMD_DRV_LOG(ERR, "Failed to execute command of"
+			    " VIRTCHNL_OP_CONFIG_TC_MAP");
+	return err;
+}
+
 int
 iavf_add_del_mc_addr_list(struct iavf_adapter *adapter,
 			struct rte_ether_addr *mc_addrs,
diff --git a/drivers/net/iavf/meson.build b/drivers/net/iavf/meson.build
index 6f222a9e87..f2010a8337 100644
--- a/drivers/net/iavf/meson.build
+++ b/drivers/net/iavf/meson.build
@@ -19,6 +19,7 @@  sources = files(
         'iavf_generic_flow.c',
         'iavf_fdir.c',
         'iavf_hash.c',
+        'iavf_tm.c',
 )
 
 if arch_subdir == 'x86'