[dpdk-dev,v3,1/2] net/i40e: queue region set and flush

Message ID 1505445188-70251-2-git-send-email-wei.zhao1@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Zhao1, Wei Sept. 15, 2017, 3:13 a.m. UTC
  This feature enable queue regions configuration for RSS in PF/VF,
so that different traffic classes or different packet
classification types can be separated to different queues in
different queue regions.This patch can set queue region range,
it include queue number in a region and the index of first queue.
This patch enable mapping between different priorities (UP) and
different traffic classes.It also enable mapping between a region
index and a sepcific flowtype(PCTYPE).It also provide the solution
of flush all configuration about queue region the above described.

Signed-off-by: Wei Zhao <wei.zhao1@intel.com>
---
 drivers/net/i40e/i40e_ethdev.c            |  19 +-
 drivers/net/i40e/i40e_ethdev.h            |  30 ++
 drivers/net/i40e/rte_pmd_i40e.c           | 482 ++++++++++++++++++++++++++++++
 drivers/net/i40e/rte_pmd_i40e.h           |  38 +++
 drivers/net/i40e/rte_pmd_i40e_version.map |   1 +
 5 files changed, 566 insertions(+), 4 deletions(-)
  

Comments

Ferruh Yigit Sept. 20, 2017, 10:36 a.m. UTC | #1
On 9/15/2017 4:13 AM, Wei Zhao wrote:
> This feature enable queue regions configuration for RSS in PF/VF,
> so that different traffic classes or different packet
> classification types can be separated to different queues in
> different queue regions.This patch can set queue region range,
> it include queue number in a region and the index of first queue.

Is following correct:
So instead of distributing packets to the multiple queues, this will
distribute packets into queue reqions which may consists of multiple queues.

If so, is there a way to control how packets distributed within same
queue region to multiple queues?

And is this feature only supported with RSS? Can it be part of RSS
configuration instead of PMD specific API?

> This patch enable mapping between different priorities (UP) and

User priorities (UP)

> different traffic classes.It also enable mapping between a region
> index and a sepcific flowtype(PCTYPE).It also provide the solution
> of flush all configuration about queue region the above described.
> 
> Signed-off-by: Wei Zhao <wei.zhao1@intel.com>
> ---
>  drivers/net/i40e/i40e_ethdev.c            |  19 +-
>  drivers/net/i40e/i40e_ethdev.h            |  30 ++
>  drivers/net/i40e/rte_pmd_i40e.c           | 482 ++++++++++++++++++++++++++++++
>  drivers/net/i40e/rte_pmd_i40e.h           |  38 +++
>  drivers/net/i40e/rte_pmd_i40e_version.map |   1 +
>  5 files changed, 566 insertions(+), 4 deletions(-)
> 

<...>

> +static int
> +i40e_vsi_update_queue_region_mapping(struct i40e_hw *hw,
> +			      struct i40e_pf *pf)
> +{
> +	uint16_t i;
> +	struct i40e_vsi *vsi = pf->main_vsi;
> +	uint16_t queue_offset, bsf, tc_index;
> +	struct i40e_vsi_context ctxt;
> +	struct i40e_aqc_vsi_properties_data *vsi_info;
> +	struct i40e_queue_region_info *region_info =
> +				&pf->queue_region;
> +	uint32_t ret = -EINVAL;
> +
> +	if (!region_info->queue_region_number) {
> +		PMD_INIT_LOG(ERR, "there is no that region id been set before");

Can you please re-check the log message.

> +		return ret;
> +	}
> +
> +	memset(&ctxt, 0, sizeof(struct i40e_vsi_context));
> +
> +	/* Update Queue Pairs Mapping for currently enabled UPs */
> +	ctxt.seid = vsi->seid;
> +	ctxt.pf_num = hw->pf_id;
> +	ctxt.vf_num = 0;
> +	ctxt.uplink_seid = vsi->uplink_seid;
> +	ctxt.info = vsi->info;
> +	vsi_info = &ctxt.info;
> +
> +	memset(vsi_info->tc_mapping, 0, sizeof(uint16_t) * 8);
> +	memset(vsi_info->queue_mapping, 0, sizeof(uint16_t) * 16);
> +
> +	/**
> +	 * Configure queue region and queue mapping parameters,
> +	 * for enabled queue region, allocate queues to this region.
> +	 */
> +
> +	for (i = 0; i < region_info->queue_region_number; i++) {
> +		tc_index = region_info->region[i].region_id;
> +		bsf = rte_bsf32(region_info->region[i].queue_num);
> +		queue_offset = region_info->region[i].queue_start_index;
> +		vsi_info->tc_mapping[tc_index] = rte_cpu_to_le_16(
> +			(queue_offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) |
> +				(bsf << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT));
> +	}
> +
> +	/* Associate queue number with VSI, Keep vsi->nb_qps unchanged */
> +	if (vsi->type == I40E_VSI_SRIOV) {
> +		vsi_info->mapping_flags |=
> +			rte_cpu_to_le_16(I40E_AQ_VSI_QUE_MAP_NONCONTIG);
> +		for (i = 0; i < vsi->nb_qps; i++)
> +			vsi_info->queue_mapping[i] =
> +				rte_cpu_to_le_16(vsi->base_queue + i);
> +	} else {
> +		vsi_info->mapping_flags |=
> +			rte_cpu_to_le_16(I40E_AQ_VSI_QUE_MAP_CONTIG);
> +		vsi_info->queue_mapping[0] = rte_cpu_to_le_16(vsi->base_queue);
> +	}
> +	vsi_info->valid_sections |=
> +		rte_cpu_to_le_16(I40E_AQ_VSI_PROP_QUEUE_MAP_VALID);
> +
> +	/* Update the VSI after updating the VSI queue-mapping information */
> +	ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
> +	if (ret) {
> +		PMD_INIT_LOG(ERR, "Failed to configure queue region "
> +			"mapping = %d ", hw->aq.asq_last_status);

Please don't split log message.

> +		return ret;
> +	}
> +	/* update the local VSI info with updated queue map */
> +	(void)rte_memcpy(&vsi->info.tc_mapping, &ctxt.info.tc_mapping,
> +					sizeof(vsi->info.tc_mapping));
> +	(void)rte_memcpy(&vsi->info.queue_mapping,
> +			&ctxt.info.queue_mapping,
> +		sizeof(vsi->info.queue_mapping));

Please keep line allignment same with above line.

> +	vsi->info.mapping_flags = ctxt.info.mapping_flags;
> +	vsi->info.valid_sections = 0;
> +
> +	return 0;
> +}
> +
> +
> +static int
> +i40e_set_queue_region(struct i40e_pf *pf,
> +				struct rte_i40e_rss_region_conf *conf_ptr)
> +{
> +	uint16_t tc_size_tb[7] = {1, 2, 4, 8, 16, 32, 64};
> +	uint16_t i;
> +	struct i40e_vsi *main_vsi = pf->main_vsi;
> +	struct i40e_queue_region_info *info = &pf->queue_region;
> +	uint32_t ret = -EINVAL;
> +
> +	for (i = 0; i < I40E_REGION_MAX_INDEX; i++)
> +		if (conf_ptr->queue_num == tc_size_tb[i])
> +			break;
> +
> +	if (i == I40E_REGION_MAX_INDEX) {
> +		printf("The region sizes should be any of the following "
> +		"values: 1, 2, 4, 8, 16, 32, 64 as long as the "
> +		"total number of queues do not exceed the VSI allocation");
> +		return ret;
> +	}
> +
> +	if (conf_ptr->region_id >= I40E_REGION_MAX_INDEX) {
> +		PMD_INIT_LOG(ERR, "the queue region max index is 7");
> +		return ret;
> +	}
> +
> +	if ((conf_ptr->queue_start_index + conf_ptr->queue_num)
> +					> main_vsi->nb_used_qps) {
> +		PMD_INIT_LOG(ERR, "the queue index exceeds the VSI range");
> +		return ret;
> +	}
> +
> +	if (info->queue_region_number) {
> +		for (i = 0; i < info->queue_region_number; i++)
> +			if (conf_ptr->region_id == info->region[i].region_id)
> +				break;
> +
> +		if ((i == info->queue_region_number) &&
> +			(i <= I40E_REGION_MAX_INDEX)) {


Please use one more level indentaion here, and pharantesis looks extra
can you please double check?

> +			info->region[i].region_id = conf_ptr->region_id;
> +			info->region[i].queue_num = conf_ptr->queue_num;
> +			info->region[i].queue_start_index =
> +				conf_ptr->queue_start_index;
> +			info->queue_region_number++;
> +		} else {
> +			PMD_INIT_LOG(ERR, "queue region number "
> +				"exceeds maxnum 8 or the "
> +				"queue region id has "
> +				"been set before");

please don't split log message.

> +			return ret;
> +		}
> +	} else {
> +		info->region[0].region_id = conf_ptr->region_id;
> +		info->region[0].queue_num = conf_ptr->queue_num;
> +		info->region[0].queue_start_index =
> +			conf_ptr->queue_start_index;
> +		info->queue_region_number++;
> +	}
> +
> +	return 0;
> +}
> +
> +static int
> +i40e_set_region_flowtype_pf(struct i40e_hw *hw,
> +		struct i40e_pf *pf, struct rte_i40e_rss_region_conf *conf_ptr)

What do you think starting all functions, even they are static, with
"i40e_queue_region_" prefix?

> +{
> +	uint32_t pfqf_hregion;
> +	uint32_t ret = -EINVAL;
> +	struct i40e_queue_region_info *info = &pf->queue_region;
> +	uint16_t i, j, index, flowtype_set = 0;
> +	uint16_t region_index, flowtype_index;
> +
> +	if (conf_ptr->region_id > I40E_PFQF_HREGION_MAX_INDEX) {
> +		PMD_INIT_LOG(ERR, "the queue region max index is 7");
> +		return ret;
> +	}
> +
> +	if (conf_ptr->hw_flowtype >= I40E_FILTER_PCTYPE_MAX) {
> +		PMD_INIT_LOG(ERR, "the hw_flowtype or PCTYPE max index is 63");
> +		return ret;
> +	}
> +
> +	if (info->queue_region_number) {
> +		for (i = 0; i < info->queue_region_number; i++)
> +			if (conf_ptr->region_id == info->region[i].region_id)
> +				break;
> +
> +		if (i == info->queue_region_number) {
> +			PMD_INIT_LOG(ERR, "that region id has not "
> +				"been set before");
> +			return ret;
> +		}
> +		region_index = i;
> +
> +		for (i = 0; i < info->queue_region_number; i++) {
> +			for (j = 0; j < info->region[i].flowtype_num; j++) {
> +				if (conf_ptr->hw_flowtype ==
> +					info->region[i].hw_flowtype[j]) {
> +					flowtype_set = 1;
> +					break;
> +				}
> +			}
> +		}
> +
> +		if (flowtype_set) {
> +			PMD_INIT_LOG(ERR, "that hw_flowtype "
> +				"has been set before");

Please don't split log messages.

> +			return ret;
> +		}
> +		flowtype_index = info->region[region_index].flowtype_num;
> +		info->region[region_index].hw_flowtype[flowtype_index] =
> +						conf_ptr->hw_flowtype;
> +		info->region[region_index].flowtype_num++;
> +	} else  {
> +		PMD_INIT_LOG(ERR, "there is no that region id been set before");
> +		return ret;
> +	}
> +
> +	index = conf_ptr->hw_flowtype >> 3;
> +	pfqf_hregion = i40e_read_rx_ctl(hw, I40E_PFQF_HREGION(index));
> +
> +	if ((conf_ptr->hw_flowtype & 0x7) == 0) {
> +		pfqf_hregion |= conf_ptr->region_id <<
> +				I40E_PFQF_HREGION_REGION_0_SHIFT;
> +		pfqf_hregion |= 1 << I40E_PFQF_HREGION_OVERRIDE_ENA_0_SHIFT;
> +	} else if ((conf_ptr->hw_flowtype & 0x7) == 1) {
> +		pfqf_hregion |= conf_ptr->region_id <<
> +				I40E_PFQF_HREGION_REGION_1_SHIFT;
> +		pfqf_hregion |= 1 << I40E_PFQF_HREGION_OVERRIDE_ENA_1_SHIFT;
> +	} else if ((conf_ptr->hw_flowtype & 0x7) == 2) {
> +		pfqf_hregion |= conf_ptr->region_id <<
> +				I40E_PFQF_HREGION_REGION_2_SHIFT;
> +		pfqf_hregion |= 1 << I40E_PFQF_HREGION_OVERRIDE_ENA_2_SHIFT;
> +	} else if ((conf_ptr->hw_flowtype & 0x7) == 3) {
> +		pfqf_hregion |= conf_ptr->region_id <<
> +				I40E_PFQF_HREGION_REGION_3_SHIFT;
> +		pfqf_hregion |= 1 << I40E_PFQF_HREGION_OVERRIDE_ENA_3_SHIFT;
> +	} else if ((conf_ptr->hw_flowtype & 0x7) == 4) {
> +		pfqf_hregion |= conf_ptr->region_id <<
> +				I40E_PFQF_HREGION_REGION_4_SHIFT;
> +		pfqf_hregion |= 1 << I40E_PFQF_HREGION_OVERRIDE_ENA_4_SHIFT;
> +	} else if ((conf_ptr->hw_flowtype & 0x7) == 5) {
> +		pfqf_hregion |= conf_ptr->region_id <<
> +				I40E_PFQF_HREGION_REGION_5_SHIFT;
> +		pfqf_hregion |= 1 << I40E_PFQF_HREGION_OVERRIDE_ENA_5_SHIFT;
> +	} else if ((conf_ptr->hw_flowtype & 0x7) == 6) {
> +		pfqf_hregion |= conf_ptr->region_id <<
> +				I40E_PFQF_HREGION_REGION_6_SHIFT;
> +		pfqf_hregion |= 1 << I40E_PFQF_HREGION_OVERRIDE_ENA_6_SHIFT;
> +	} else {
> +		pfqf_hregion |= conf_ptr->region_id <<
> +				I40E_PFQF_HREGION_REGION_7_SHIFT;
> +		pfqf_hregion |= 1 << I40E_PFQF_HREGION_OVERRIDE_ENA_7_SHIFT;
> +	}
> +
> +	i40e_write_rx_ctl(hw, I40E_PFQF_HREGION(index), pfqf_hregion);
> +
> +	return 0;
> +}
> +
> +static int
> +i40e_set_up_region(struct i40e_pf *pf,
> +				struct rte_i40e_rss_region_conf *conf_ptr)

The API name looks like "setup" but that up part is user_priority, can
you please long form against "up"?

> +{
> +	struct i40e_queue_region_info *info = &pf->queue_region;
> +	uint32_t ret = -EINVAL;
> +	uint16_t i, j, region_index, up_set = 0;

Same abrevation comment for "upset", please use long form.

> +
> +	if (conf_ptr->user_priority > I40E_REGION_USERPRIORITY_MAX_INDEX) {
> +		PMD_INIT_LOG(ERR, "the queue region max index is 7");
> +		return ret;
> +	}
> +
> +	if (conf_ptr->region_id >= I40E_REGION_MAX_INDEX) {
> +		PMD_INIT_LOG(ERR, "the region_id max index is 7");
> +		return ret;
> +	}
> +
> +	if (info->queue_region_number) {
> +		for (i = 0; i < info->queue_region_number; i++)
> +			if (conf_ptr->region_id == info->region[i].region_id)
> +				break;
> +
> +		if (i == info->queue_region_number) {
> +			PMD_INIT_LOG(ERR, "that region id "
> +				"has not been set before");
> +			return ret;
> +		}
> +		region_index = i;
> +
> +		for (i = 0; i < info->queue_region_number; i++) {
> +			for (j = 0; j <
> +				info->region[i].user_priority_num; j++) {

Please break line after ";"

> +				if (info->region[i].user_priority[j] ==
> +					conf_ptr->user_priority) {
> +					up_set = 1;
> +					break;
> +				}
> +			}
> +		}
> +
> +		if (up_set) {
> +			PMD_INIT_LOG(ERR, "that user priority "
> +				"has been set before");
> +			return ret;
> +		}
> +
> +		j = info->region[region_index].user_priority_num;
> +		info->region[region_index].user_priority[j] =
> +						conf_ptr->user_priority;
> +		info->region[region_index].user_priority_num++;
> +	} else {
> +		PMD_INIT_LOG(ERR, "there is no that region id been set before");
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +
> +static int
> +i40e_queue_region_dcb_configure(struct i40e_hw *hw,
> +				struct i40e_pf *pf)
> +{
> +	struct i40e_dcbx_config dcb_cfg_local;
> +	struct i40e_dcbx_config *dcb_cfg;
> +	struct i40e_queue_region_info *info = &pf->queue_region;
> +	struct i40e_dcbx_config *old_cfg = &hw->local_dcbx_config;
> +	uint32_t ret = -EINVAL;

Please use signed variable, there are more above.

> +	uint16_t i, j, prio_index, region_index;
> +	uint8_t tc_map, tc_bw, bw_lf;
> +
> +	if (!info->queue_region_number) {
> +		PMD_INIT_LOG(ERR, "there is no that region id been set before");
> +		return ret;
> +	}
> +
> +	dcb_cfg = &dcb_cfg_local;
> +	memset(dcb_cfg, 0, sizeof(struct i40e_dcbx_config));
> +
> +	/* assume each tc has the same bw */
> +	tc_bw = I40E_MAX_PERCENT / info->queue_region_number;
> +	for (i = 0; i < info->queue_region_number; i++)
> +		dcb_cfg->etscfg.tcbwtable[i] = tc_bw;
> +	/* to ensure the sum of tcbw is equal to 100 */
> +	bw_lf = I40E_MAX_PERCENT %  info->queue_region_number;
> +	for (i = 0; i < bw_lf; i++)
> +		dcb_cfg->etscfg.tcbwtable[i]++;
> +
> +	/* assume each tc has the same Transmission Selection Algorithm */
> +	for (i = 0; i < info->queue_region_number; i++)
> +		dcb_cfg->etscfg.tsatable[i] = I40E_IEEE_TSA_ETS;
> +
> +	for (i = 0; i < info->queue_region_number; i++) {
> +		for (j = 0; j < info->region[i].user_priority_num; j++) {
> +			prio_index = info->region[i].user_priority[j];
> +			region_index = info->region[i].region_id;
> +			dcb_cfg->etscfg.prioritytable[prio_index] =
> +						region_index;
> +		}
> +	}
> +
> +	/* FW needs one App to configure HW */
> +	dcb_cfg->numapps = I40E_DEFAULT_DCB_APP_NUM;
> +	dcb_cfg->app[0].selector = I40E_APP_SEL_ETHTYPE;
> +	dcb_cfg->app[0].priority = I40E_DEFAULT_DCB_APP_PRIO;
> +	dcb_cfg->app[0].protocolid = I40E_APP_PROTOID_FCOE;
> +
> +	tc_map = RTE_LEN2MASK(info->queue_region_number, uint8_t);
> +
> +	dcb_cfg->pfc.willing = 0;
> +	dcb_cfg->pfc.pfccap = I40E_MAX_TRAFFIC_CLASS;
> +	dcb_cfg->pfc.pfcenable = tc_map;
> +
> +	/* Copy the new config to the current config */
> +	*old_cfg = *dcb_cfg;
> +	old_cfg->etsrec = old_cfg->etscfg;
> +	ret = i40e_set_dcb_config(hw);
> +
> +	if (ret) {
> +		PMD_INIT_LOG(ERR, "Set queue region DCB Config failed, err"
> +			" %s aq_err %s",

Please don't split the log message.

> +			 i40e_stat_str(hw, ret),
> +			 i40e_aq_str(hw, hw->aq.asq_last_status));
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +
> +static int
> +i40e_queue_region_dcb_configure_default(struct i40e_hw *hw)
> +{
> +	uint16_t i;
> +	uint32_t ret = -EINVAL;

Please use signed variable here.

> +
> +	memset(&hw->local_dcbx_config, 0,
> +	sizeof(struct i40e_dcbx_config));

Wrong indentation.

> +	/* set dcb default configuration */
> +	hw->local_dcbx_config.etscfg.willing = 0;
> +	hw->local_dcbx_config.etscfg.maxtcs = 0;
> +	hw->local_dcbx_config.etscfg.tcbwtable[0] = 100;
> +	hw->local_dcbx_config.etscfg.tsatable[0] =
> +			I40E_IEEE_TSA_ETS;
> +	/* all UPs mapping to region 0 */
> +	for (i = 0; i < I40E_MAX_USER_PRIORITY; i++)
> +		hw->local_dcbx_config.etscfg.prioritytable[i] = 0;
> +	hw->local_dcbx_config.etsrec =
> +		hw->local_dcbx_config.etscfg;
> +	hw->local_dcbx_config.pfc.willing = 0;
> +	hw->local_dcbx_config.pfc.pfccap =
> +			I40E_MAX_TRAFFIC_CLASS;
> +	/* FW needs one App to configure HW */
> +	hw->local_dcbx_config.numapps = 1;
> +	hw->local_dcbx_config.app[0].selector =
> +			I40E_APP_SEL_ETHTYPE;
> +	hw->local_dcbx_config.app[0].priority = 3;
> +	hw->local_dcbx_config.app[0].protocolid =
> +			I40E_APP_PROTOID_FCOE;
> +	ret = i40e_set_dcb_config(hw);
> +	if (ret) {
> +		PMD_INIT_LOG(ERR,
> +		"default dcb config fails. err = %d, aq_err = %d.",
> +			ret, hw->aq.asq_last_status);
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +
> +int
> +i40e_flush_region_all_conf(struct i40e_hw *hw, struct i40e_pf *pf,
> +				uint16_t on)
> +{
> +	uint16_t i;
> +	struct i40e_queue_region_info *info = &pf->queue_region;
> +
> +	if (on) {
> +		i40e_vsi_update_queue_region_mapping(hw, pf);
> +		i40e_queue_region_dcb_configure(hw, pf);
> +		return 0;
> +	}
> +
> +	info->queue_region_number = 1;
> +	info->region[0].queue_num = 64;
> +	info->region[0].queue_start_index = 0;
> +
> +	i40e_vsi_update_queue_region_mapping(hw, pf);
> +	i40e_queue_region_dcb_configure_default(hw);
> +
> +	for (i = 0; i < I40E_PFQF_HREGION_MAX_INDEX; i++)
> +		i40e_write_rx_ctl(hw, I40E_PFQF_HREGION(i), 0);
> +
> +	memset(info, 0, sizeof(struct i40e_queue_region_info));
> +
> +	return 0;
> +}
> +
> +int rte_pmd_i40e_queue_region_conf(uint8_t port,
> +			struct rte_i40e_rss_region_conf *conf_ptr)
> +{
> +	struct rte_eth_dev *dev = &rte_eth_devices[port];

you need to verify port_id, since this is public API now. Please check
other APIs in this file.

> +	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
> +	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> +	enum rte_pmd_i40e_queue_region_op op_type = conf_ptr->op;
> +	uint32_t ret;

This should be signed variable, since you are using it for return and
assigning negative values.

> +
> +	if (!is_i40e_supported(dev))
> +		return -ENOTSUP;
> +
> +	switch (op_type) {
> +	case RTE_PMD_I40E_QUEUE_REGION_SET:
> +		ret = i40e_set_queue_region(pf, conf_ptr);
> +		break;

Does it make sense to add another type to get the current queue region
config?

> +	case RTE_PMD_I40E_REGION_FLOWTYPE_PF_SET:
> +		ret = i40e_set_region_flowtype_pf(hw, pf, conf_ptr);
> +		break;
> +	case RTE_PMD_I40E_REGION_FLOWTYPE_VF_SET:
> +		ret = -EINVAL;

Will this be implemented later, or not a valid case at all?

> +		break;
> +	case RTE_PMD_I40E_UP_REGION_SET:> +		ret = i40e_set_up_region(pf, conf_ptr);
> +		break;
> +	case RTE_PMD_I40E_REGION_ALL_FLUSH_ON:
> +		ret = i40e_flush_region_all_conf(hw, pf, 1);
> +		break;
> +	case RTE_PMD_I40E_REGION_ALL_FLUSH_OFF:
> +		ret = i40e_flush_region_all_conf(hw, pf, 0);

Can you please describe what flush_on and flush_off are, you can comment
to code if also.

> +		break;
> +
> +	default:
> +		PMD_DRV_LOG(WARNING, "op type (%d) not supported",
> +			    op_type);

Line break not required.

> +		ret = -EINVAL;
> +		break;
> +	}
> +
> +	I40E_WRITE_FLUSH(hw);
> +
> +	return ret;
> +}
> diff --git a/drivers/net/i40e/rte_pmd_i40e.h b/drivers/net/i40e/rte_pmd_i40e.h
> index 155b7e8..a1329f4 100644
> --- a/drivers/net/i40e/rte_pmd_i40e.h
> +++ b/drivers/net/i40e/rte_pmd_i40e.h
> @@ -91,6 +91,20 @@ enum rte_pmd_i40e_package_info {
>  	RTE_PMD_I40E_PKG_INFO_MAX = 0xFFFFFFFF
>  };
>  
> +/**
> + *  Option types of queue region.
> + */
> +enum rte_pmd_i40e_queue_region_op {
> +	RTE_PMD_I40E_REGION_UNDEFINED = 0,

No need to set to zero, it is default value.

> +	RTE_PMD_I40E_QUEUE_REGION_SET,      /**< add queue region set*/

I would suggest using same namespace for the enum ites, there are
"RTE_PMD_I40E_REGION" and "RTE_PMD_I40E_QUEUE_REGION".
"RTE_PMD_I40E_QUEUE_REGION" looks better. Or if it makes sense, perhaps
"RTE_PMD_I40E_RSS_QUEUE_REGION"? But please stick with one.

> +	RTE_PMD_I40E_REGION_FLOWTYPE_PF_SET,   /**< add pf region pctype set */
> +	RTE_PMD_I40E_REGION_FLOWTYPE_VF_SET,   /**< add vf region pctype set */
> +	RTE_PMD_I40E_UP_REGION_SET,   /**< add queue region pctype set */

is this comment correct, this type should be for user priority

> +	RTE_PMD_I40E_REGION_ALL_FLUSH_ON,   /**< flush on all configuration */
> +	RTE_PMD_I40E_REGION_ALL_FLUSH_OFF,   /**< flush off all configuration */
> +	RTE_PMD_I40E_QUEUE_REGION_OP_MAX
> +};
> +
>  #define RTE_PMD_I40E_DDP_NAME_SIZE 32
>  
>  /**
> @@ -146,6 +160,18 @@ struct rte_pmd_i40e_ptype_mapping {
>  };
>  
>  /**
> + * Queue region information get from CLI.

It doesn't need to be from CLI, can drop that part

> + */
> +struct rte_i40e_rss_region_conf {

This is now public struct, can you please comment items.

> +	uint8_t region_id;
> +	uint8_t hw_flowtype;
> +	uint8_t queue_start_index;
> +	uint8_t queue_num;
> +	uint8_t user_priority;
> +	enum rte_pmd_i40e_queue_region_op  op;

Extra space before "op"

> +};
> +
> +/**
>   * Notify VF when PF link status changes.
>   *
>   * @param port
> @@ -657,4 +683,16 @@ int rte_pmd_i40e_ptype_mapping_replace(uint8_t port,
>  int rte_pmd_i40e_add_vf_mac_addr(uint8_t port, uint16_t vf_id,
>  				 struct ether_addr *mac_addr);
>  
> +/**
> + * Get RSS queue region info from CLI and do configuration for

Again now this is an API, not just for testpmd, please drop CLI

> + * that port as the command otion type

s/otion/options

> + *
> + * @param port
> + *    pointer to port identifier of the device

Not pointer.
Also this will conflict with patch that updates port storage to
uint16_t. Can you please follow the status of the patch, and if this
merged after that one, please ping to remind to update the storage.

> + * @param conf_ptr
> + *    pointer to the struct that contain all the
> + *    region configuration parameters
> + */
> +int rte_pmd_i40e_queue_region_conf(uint8_t port,

Can you please use port_id as variable name to be consistent.

> +		struct rte_i40e_rss_region_conf *conf_ptr);

conf_ptr variable name doesn't say much, although it is OK in this
context. I would suggest something like rss_region_conf

>  #endif /* _PMD_I40E_H_ */
> diff --git a/drivers/net/i40e/rte_pmd_i40e_version.map b/drivers/net/i40e/rte_pmd_i40e_version.map
> index ef8882b..c3ee2da 100644
> --- a/drivers/net/i40e/rte_pmd_i40e_version.map
> +++ b/drivers/net/i40e/rte_pmd_i40e_version.map
> @@ -50,5 +50,6 @@ DPDK_17.11 {
>  	global:
>  
>  	rte_pmd_i40e_add_vf_mac_addr;
> +	rte_pmd_i40e_queue_region_conf;

The feature has been mention as "rss queue region" above a few places,
do you think rte_pmd_i40e_rss_queue_region_conf() or
rte_pmd_i40e_queue_region_conf() is better for API name?

>  
>  } DPDK_17.08;
>
  
Zhao1, Wei Sept. 21, 2017, 6:48 a.m. UTC | #2
Hi,  Ferruh

> -----Original Message-----

> From: Yigit, Ferruh

> Sent: Wednesday, September 20, 2017 6:36 PM

> To: Zhao1, Wei <wei.zhao1@intel.com>; dev@dpdk.org

> Subject: Re: [dpdk-dev] [PATCH v3 1/2] net/i40e: queue region set and flush

> 

> On 9/15/2017 4:13 AM, Wei Zhao wrote:

> > This feature enable queue regions configuration for RSS in PF/VF, so

> > that different traffic classes or different packet classification

> > types can be separated to different queues in different queue

> > regions.This patch can set queue region range, it include queue number

> > in a region and the index of first queue.

> 

> Is following correct:

> So instead of distributing packets to the multiple queues, this will distribute

> packets into queue reqions which may consists of multiple queues.

> 

> If so, is there a way to control how packets distributed within same queue

> region to multiple queues?

> 

> And is this feature only supported with RSS? Can it be part of RSS

> configuration instead of PMD specific API?

> 

> > This patch enable mapping between different priorities (UP) and

> 

> User priorities (UP)

> 

> > different traffic classes.It also enable mapping between a region

> > index and a sepcific flowtype(PCTYPE).It also provide the solution of

> > flush all configuration about queue region the above described.

> >

> > Signed-off-by: Wei Zhao <wei.zhao1@intel.com>

> > ---

> >  drivers/net/i40e/i40e_ethdev.c            |  19 +-

> >  drivers/net/i40e/i40e_ethdev.h            |  30 ++

> >  drivers/net/i40e/rte_pmd_i40e.c           | 482

> ++++++++++++++++++++++++++++++

> >  drivers/net/i40e/rte_pmd_i40e.h           |  38 +++

> >  drivers/net/i40e/rte_pmd_i40e_version.map |   1 +

> >  5 files changed, 566 insertions(+), 4 deletions(-)

> >

> 

> <...>

> 

> > +static int

> > +i40e_vsi_update_queue_region_mapping(struct i40e_hw *hw,

> > +			      struct i40e_pf *pf)

> > +{

> > +	uint16_t i;

> > +	struct i40e_vsi *vsi = pf->main_vsi;

> > +	uint16_t queue_offset, bsf, tc_index;

> > +	struct i40e_vsi_context ctxt;

> > +	struct i40e_aqc_vsi_properties_data *vsi_info;

> > +	struct i40e_queue_region_info *region_info =

> > +				&pf->queue_region;

> > +	uint32_t ret = -EINVAL;

> > +

> > +	if (!region_info->queue_region_number) {

> > +		PMD_INIT_LOG(ERR, "there is no that region id been set

> before");

> 

> Can you please re-check the log message.

> 

> > +		return ret;

> > +	}

> > +

> > +	memset(&ctxt, 0, sizeof(struct i40e_vsi_context));

> > +

> > +	/* Update Queue Pairs Mapping for currently enabled UPs */

> > +	ctxt.seid = vsi->seid;

> > +	ctxt.pf_num = hw->pf_id;

> > +	ctxt.vf_num = 0;

> > +	ctxt.uplink_seid = vsi->uplink_seid;

> > +	ctxt.info = vsi->info;

> > +	vsi_info = &ctxt.info;

> > +

> > +	memset(vsi_info->tc_mapping, 0, sizeof(uint16_t) * 8);

> > +	memset(vsi_info->queue_mapping, 0, sizeof(uint16_t) * 16);

> > +

> > +	/**

> > +	 * Configure queue region and queue mapping parameters,

> > +	 * for enabled queue region, allocate queues to this region.

> > +	 */

> > +

> > +	for (i = 0; i < region_info->queue_region_number; i++) {

> > +		tc_index = region_info->region[i].region_id;

> > +		bsf = rte_bsf32(region_info->region[i].queue_num);

> > +		queue_offset = region_info->region[i].queue_start_index;

> > +		vsi_info->tc_mapping[tc_index] = rte_cpu_to_le_16(

> > +			(queue_offset <<

> I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) |

> > +				(bsf <<

> I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT));

> > +	}

> > +

> > +	/* Associate queue number with VSI, Keep vsi->nb_qps unchanged

> */

> > +	if (vsi->type == I40E_VSI_SRIOV) {

> > +		vsi_info->mapping_flags |=

> > +

> 	rte_cpu_to_le_16(I40E_AQ_VSI_QUE_MAP_NONCONTIG);

> > +		for (i = 0; i < vsi->nb_qps; i++)

> > +			vsi_info->queue_mapping[i] =

> > +				rte_cpu_to_le_16(vsi->base_queue + i);

> > +	} else {

> > +		vsi_info->mapping_flags |=

> > +

> 	rte_cpu_to_le_16(I40E_AQ_VSI_QUE_MAP_CONTIG);

> > +		vsi_info->queue_mapping[0] = rte_cpu_to_le_16(vsi-

> >base_queue);

> > +	}

> > +	vsi_info->valid_sections |=

> > +

> 	rte_cpu_to_le_16(I40E_AQ_VSI_PROP_QUEUE_MAP_VALID);

> > +

> > +	/* Update the VSI after updating the VSI queue-mapping

> information */

> > +	ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);

> > +	if (ret) {

> > +		PMD_INIT_LOG(ERR, "Failed to configure queue region "

> > +			"mapping = %d ", hw->aq.asq_last_status);

> 

> Please don't split log message.


If I do not spli it , the line will has more 80 character, there will be warning in patch check log!
So, which way do you think will be better? Split or warning log?


> 

> > +		return ret;

> > +	}

> > +	/* update the local VSI info with updated queue map */

> > +	(void)rte_memcpy(&vsi->info.tc_mapping, &ctxt.info.tc_mapping,

> > +					sizeof(vsi->info.tc_mapping));

> > +	(void)rte_memcpy(&vsi->info.queue_mapping,

> > +			&ctxt.info.queue_mapping,

> > +		sizeof(vsi->info.queue_mapping));

> 

> Please keep line allignment same with above line.

> 

> > +	vsi->info.mapping_flags = ctxt.info.mapping_flags;

> > +	vsi->info.valid_sections = 0;

> > +

> > +	return 0;

> > +}

> > +

> > +

> > +static int

> > +i40e_set_queue_region(struct i40e_pf *pf,

> > +				struct rte_i40e_rss_region_conf *conf_ptr) {

> > +	uint16_t tc_size_tb[7] = {1, 2, 4, 8, 16, 32, 64};

> > +	uint16_t i;

> > +	struct i40e_vsi *main_vsi = pf->main_vsi;

> > +	struct i40e_queue_region_info *info = &pf->queue_region;

> > +	uint32_t ret = -EINVAL;

> > +

> > +	for (i = 0; i < I40E_REGION_MAX_INDEX; i++)

> > +		if (conf_ptr->queue_num == tc_size_tb[i])

> > +			break;

> > +

> > +	if (i == I40E_REGION_MAX_INDEX) {

> > +		printf("The region sizes should be any of the following "

> > +		"values: 1, 2, 4, 8, 16, 32, 64 as long as the "

> > +		"total number of queues do not exceed the VSI allocation");

> > +		return ret;

> > +	}

> > +

> > +	if (conf_ptr->region_id >= I40E_REGION_MAX_INDEX) {

> > +		PMD_INIT_LOG(ERR, "the queue region max index is 7");

> > +		return ret;

> > +	}

> > +

> > +	if ((conf_ptr->queue_start_index + conf_ptr->queue_num)

> > +					> main_vsi->nb_used_qps) {

> > +		PMD_INIT_LOG(ERR, "the queue index exceeds the VSI

> range");

> > +		return ret;

> > +	}

> > +

> > +	if (info->queue_region_number) {

> > +		for (i = 0; i < info->queue_region_number; i++)

> > +			if (conf_ptr->region_id == info->region[i].region_id)

> > +				break;

> > +

> > +		if ((i == info->queue_region_number) &&

> > +			(i <= I40E_REGION_MAX_INDEX)) {

> 

> 

> Please use one more level indentaion here, and pharantesis looks extra can

> you please double check?

> 

> > +			info->region[i].region_id = conf_ptr->region_id;

> > +			info->region[i].queue_num = conf_ptr->queue_num;

> > +			info->region[i].queue_start_index =

> > +				conf_ptr->queue_start_index;

> > +			info->queue_region_number++;

> > +		} else {

> > +			PMD_INIT_LOG(ERR, "queue region number "

> > +				"exceeds maxnum 8 or the "

> > +				"queue region id has "

> > +				"been set before");

> 

> please don't split log message.


If I do not spli it , the line will has more 80 character, there will be warning in patch check log!

> 

> > +			return ret;

> > +		}

> > +	} else {

> > +		info->region[0].region_id = conf_ptr->region_id;

> > +		info->region[0].queue_num = conf_ptr->queue_num;

> > +		info->region[0].queue_start_index =

> > +			conf_ptr->queue_start_index;

> > +		info->queue_region_number++;

> > +	}

> > +

> > +	return 0;

> > +}

> > +

> > +static int

> > +i40e_set_region_flowtype_pf(struct i40e_hw *hw,

> > +		struct i40e_pf *pf, struct rte_i40e_rss_region_conf

> *conf_ptr)

> 

> What do you think starting all functions, even they are static, with

> "i40e_queue_region_" prefix?

> 

> > +{

> > +	uint32_t pfqf_hregion;

> > +	uint32_t ret = -EINVAL;

> > +	struct i40e_queue_region_info *info = &pf->queue_region;

> > +	uint16_t i, j, index, flowtype_set = 0;

> > +	uint16_t region_index, flowtype_index;

> > +

> > +	if (conf_ptr->region_id > I40E_PFQF_HREGION_MAX_INDEX) {

> > +		PMD_INIT_LOG(ERR, "the queue region max index is 7");

> > +		return ret;

> > +	}

> > +

> > +	if (conf_ptr->hw_flowtype >= I40E_FILTER_PCTYPE_MAX) {

> > +		PMD_INIT_LOG(ERR, "the hw_flowtype or PCTYPE max index

> is 63");

> > +		return ret;

> > +	}

> > +

> > +	if (info->queue_region_number) {

> > +		for (i = 0; i < info->queue_region_number; i++)

> > +			if (conf_ptr->region_id == info->region[i].region_id)

> > +				break;

> > +

> > +		if (i == info->queue_region_number) {

> > +			PMD_INIT_LOG(ERR, "that region id has not "

> > +				"been set before");

> > +			return ret;

> > +		}

> > +		region_index = i;

> > +

> > +		for (i = 0; i < info->queue_region_number; i++) {

> > +			for (j = 0; j < info->region[i].flowtype_num; j++) {

> > +				if (conf_ptr->hw_flowtype ==

> > +					info->region[i].hw_flowtype[j]) {

> > +					flowtype_set = 1;

> > +					break;

> > +				}

> > +			}

> > +		}

> > +

> > +		if (flowtype_set) {

> > +			PMD_INIT_LOG(ERR, "that hw_flowtype "

> > +				"has been set before");

> 

> Please don't split log messages.

> 

> > +			return ret;

> > +		}

> > +		flowtype_index = info->region[region_index].flowtype_num;

> > +		info->region[region_index].hw_flowtype[flowtype_index] =

> > +						conf_ptr->hw_flowtype;

> > +		info->region[region_index].flowtype_num++;

> > +	} else  {

> > +		PMD_INIT_LOG(ERR, "there is no that region id been set

> before");

> > +		return ret;

> > +	}

> > +

> > +	index = conf_ptr->hw_flowtype >> 3;

> > +	pfqf_hregion = i40e_read_rx_ctl(hw, I40E_PFQF_HREGION(index));

> > +

> > +	if ((conf_ptr->hw_flowtype & 0x7) == 0) {

> > +		pfqf_hregion |= conf_ptr->region_id <<

> > +				I40E_PFQF_HREGION_REGION_0_SHIFT;

> > +		pfqf_hregion |= 1 <<

> I40E_PFQF_HREGION_OVERRIDE_ENA_0_SHIFT;

> > +	} else if ((conf_ptr->hw_flowtype & 0x7) == 1) {

> > +		pfqf_hregion |= conf_ptr->region_id <<

> > +				I40E_PFQF_HREGION_REGION_1_SHIFT;

> > +		pfqf_hregion |= 1 <<

> I40E_PFQF_HREGION_OVERRIDE_ENA_1_SHIFT;

> > +	} else if ((conf_ptr->hw_flowtype & 0x7) == 2) {

> > +		pfqf_hregion |= conf_ptr->region_id <<

> > +				I40E_PFQF_HREGION_REGION_2_SHIFT;

> > +		pfqf_hregion |= 1 <<

> I40E_PFQF_HREGION_OVERRIDE_ENA_2_SHIFT;

> > +	} else if ((conf_ptr->hw_flowtype & 0x7) == 3) {

> > +		pfqf_hregion |= conf_ptr->region_id <<

> > +				I40E_PFQF_HREGION_REGION_3_SHIFT;

> > +		pfqf_hregion |= 1 <<

> I40E_PFQF_HREGION_OVERRIDE_ENA_3_SHIFT;

> > +	} else if ((conf_ptr->hw_flowtype & 0x7) == 4) {

> > +		pfqf_hregion |= conf_ptr->region_id <<

> > +				I40E_PFQF_HREGION_REGION_4_SHIFT;

> > +		pfqf_hregion |= 1 <<

> I40E_PFQF_HREGION_OVERRIDE_ENA_4_SHIFT;

> > +	} else if ((conf_ptr->hw_flowtype & 0x7) == 5) {

> > +		pfqf_hregion |= conf_ptr->region_id <<

> > +				I40E_PFQF_HREGION_REGION_5_SHIFT;

> > +		pfqf_hregion |= 1 <<

> I40E_PFQF_HREGION_OVERRIDE_ENA_5_SHIFT;

> > +	} else if ((conf_ptr->hw_flowtype & 0x7) == 6) {

> > +		pfqf_hregion |= conf_ptr->region_id <<

> > +				I40E_PFQF_HREGION_REGION_6_SHIFT;

> > +		pfqf_hregion |= 1 <<

> I40E_PFQF_HREGION_OVERRIDE_ENA_6_SHIFT;

> > +	} else {

> > +		pfqf_hregion |= conf_ptr->region_id <<

> > +				I40E_PFQF_HREGION_REGION_7_SHIFT;

> > +		pfqf_hregion |= 1 <<

> I40E_PFQF_HREGION_OVERRIDE_ENA_7_SHIFT;

> > +	}

> > +

> > +	i40e_write_rx_ctl(hw, I40E_PFQF_HREGION(index), pfqf_hregion);

> > +

> > +	return 0;

> > +}

> > +

> > +static int

> > +i40e_set_up_region(struct i40e_pf *pf,

> > +				struct rte_i40e_rss_region_conf *conf_ptr)

> 

> The API name looks like "setup" but that up part is user_priority, can you

> please long form against "up"?

> 

> > +{

> > +	struct i40e_queue_region_info *info = &pf->queue_region;

> > +	uint32_t ret = -EINVAL;

> > +	uint16_t i, j, region_index, up_set = 0;

> 

> Same abrevation comment for "upset", please use long form.

> 

> > +

> > +	if (conf_ptr->user_priority >

> I40E_REGION_USERPRIORITY_MAX_INDEX) {

> > +		PMD_INIT_LOG(ERR, "the queue region max index is 7");

> > +		return ret;

> > +	}

> > +

> > +	if (conf_ptr->region_id >= I40E_REGION_MAX_INDEX) {

> > +		PMD_INIT_LOG(ERR, "the region_id max index is 7");

> > +		return ret;

> > +	}

> > +

> > +	if (info->queue_region_number) {

> > +		for (i = 0; i < info->queue_region_number; i++)

> > +			if (conf_ptr->region_id == info->region[i].region_id)

> > +				break;

> > +

> > +		if (i == info->queue_region_number) {

> > +			PMD_INIT_LOG(ERR, "that region id "

> > +				"has not been set before");

> > +			return ret;

> > +		}

> > +		region_index = i;

> > +

> > +		for (i = 0; i < info->queue_region_number; i++) {

> > +			for (j = 0; j <

> > +				info->region[i].user_priority_num; j++) {

> 

> Please break line after ";"

> 

> > +				if (info->region[i].user_priority[j] ==

> > +					conf_ptr->user_priority) {

> > +					up_set = 1;

> > +					break;

> > +				}

> > +			}

> > +		}

> > +

> > +		if (up_set) {

> > +			PMD_INIT_LOG(ERR, "that user priority "

> > +				"has been set before");

> > +			return ret;

> > +		}

> > +

> > +		j = info->region[region_index].user_priority_num;

> > +		info->region[region_index].user_priority[j] =

> > +						conf_ptr->user_priority;

> > +		info->region[region_index].user_priority_num++;

> > +	} else {

> > +		PMD_INIT_LOG(ERR, "there is no that region id been set

> before");

> > +		return ret;

> > +	}

> > +

> > +	return 0;

> > +}

> > +

> > +static int

> > +i40e_queue_region_dcb_configure(struct i40e_hw *hw,

> > +				struct i40e_pf *pf)

> > +{

> > +	struct i40e_dcbx_config dcb_cfg_local;

> > +	struct i40e_dcbx_config *dcb_cfg;

> > +	struct i40e_queue_region_info *info = &pf->queue_region;

> > +	struct i40e_dcbx_config *old_cfg = &hw->local_dcbx_config;

> > +	uint32_t ret = -EINVAL;

> 

> Please use signed variable, there are more above.

> 

> > +	uint16_t i, j, prio_index, region_index;

> > +	uint8_t tc_map, tc_bw, bw_lf;

> > +

> > +	if (!info->queue_region_number) {

> > +		PMD_INIT_LOG(ERR, "there is no that region id been set

> before");

> > +		return ret;

> > +	}

> > +

> > +	dcb_cfg = &dcb_cfg_local;

> > +	memset(dcb_cfg, 0, sizeof(struct i40e_dcbx_config));

> > +

> > +	/* assume each tc has the same bw */

> > +	tc_bw = I40E_MAX_PERCENT / info->queue_region_number;

> > +	for (i = 0; i < info->queue_region_number; i++)

> > +		dcb_cfg->etscfg.tcbwtable[i] = tc_bw;

> > +	/* to ensure the sum of tcbw is equal to 100 */

> > +	bw_lf = I40E_MAX_PERCENT %  info->queue_region_number;

> > +	for (i = 0; i < bw_lf; i++)

> > +		dcb_cfg->etscfg.tcbwtable[i]++;

> > +

> > +	/* assume each tc has the same Transmission Selection Algorithm */

> > +	for (i = 0; i < info->queue_region_number; i++)

> > +		dcb_cfg->etscfg.tsatable[i] = I40E_IEEE_TSA_ETS;

> > +

> > +	for (i = 0; i < info->queue_region_number; i++) {

> > +		for (j = 0; j < info->region[i].user_priority_num; j++) {

> > +			prio_index = info->region[i].user_priority[j];

> > +			region_index = info->region[i].region_id;

> > +			dcb_cfg->etscfg.prioritytable[prio_index] =

> > +						region_index;

> > +		}

> > +	}

> > +

> > +	/* FW needs one App to configure HW */

> > +	dcb_cfg->numapps = I40E_DEFAULT_DCB_APP_NUM;

> > +	dcb_cfg->app[0].selector = I40E_APP_SEL_ETHTYPE;

> > +	dcb_cfg->app[0].priority = I40E_DEFAULT_DCB_APP_PRIO;

> > +	dcb_cfg->app[0].protocolid = I40E_APP_PROTOID_FCOE;

> > +

> > +	tc_map = RTE_LEN2MASK(info->queue_region_number, uint8_t);

> > +

> > +	dcb_cfg->pfc.willing = 0;

> > +	dcb_cfg->pfc.pfccap = I40E_MAX_TRAFFIC_CLASS;

> > +	dcb_cfg->pfc.pfcenable = tc_map;

> > +

> > +	/* Copy the new config to the current config */

> > +	*old_cfg = *dcb_cfg;

> > +	old_cfg->etsrec = old_cfg->etscfg;

> > +	ret = i40e_set_dcb_config(hw);

> > +

> > +	if (ret) {

> > +		PMD_INIT_LOG(ERR, "Set queue region DCB Config failed,

> err"

> > +			" %s aq_err %s",

> 

> Please don't split the log message.

> 

> > +			 i40e_stat_str(hw, ret),

> > +			 i40e_aq_str(hw, hw->aq.asq_last_status));

> > +		return ret;

> > +	}

> > +

> > +	return 0;

> > +}

> > +

> > +static int

> > +i40e_queue_region_dcb_configure_default(struct i40e_hw *hw) {

> > +	uint16_t i;

> > +	uint32_t ret = -EINVAL;

> 

> Please use signed variable here.

> 

> > +

> > +	memset(&hw->local_dcbx_config, 0,

> > +	sizeof(struct i40e_dcbx_config));

> 

> Wrong indentation.

> 

> > +	/* set dcb default configuration */

> > +	hw->local_dcbx_config.etscfg.willing = 0;

> > +	hw->local_dcbx_config.etscfg.maxtcs = 0;

> > +	hw->local_dcbx_config.etscfg.tcbwtable[0] = 100;

> > +	hw->local_dcbx_config.etscfg.tsatable[0] =

> > +			I40E_IEEE_TSA_ETS;

> > +	/* all UPs mapping to region 0 */

> > +	for (i = 0; i < I40E_MAX_USER_PRIORITY; i++)

> > +		hw->local_dcbx_config.etscfg.prioritytable[i] = 0;

> > +	hw->local_dcbx_config.etsrec =

> > +		hw->local_dcbx_config.etscfg;

> > +	hw->local_dcbx_config.pfc.willing = 0;

> > +	hw->local_dcbx_config.pfc.pfccap =

> > +			I40E_MAX_TRAFFIC_CLASS;

> > +	/* FW needs one App to configure HW */

> > +	hw->local_dcbx_config.numapps = 1;

> > +	hw->local_dcbx_config.app[0].selector =

> > +			I40E_APP_SEL_ETHTYPE;

> > +	hw->local_dcbx_config.app[0].priority = 3;

> > +	hw->local_dcbx_config.app[0].protocolid =

> > +			I40E_APP_PROTOID_FCOE;

> > +	ret = i40e_set_dcb_config(hw);

> > +	if (ret) {

> > +		PMD_INIT_LOG(ERR,

> > +		"default dcb config fails. err = %d, aq_err = %d.",

> > +			ret, hw->aq.asq_last_status);

> > +		return ret;

> > +	}

> > +

> > +	return 0;

> > +}

> > +

> > +int

> > +i40e_flush_region_all_conf(struct i40e_hw *hw, struct i40e_pf *pf,

> > +				uint16_t on)

> > +{

> > +	uint16_t i;

> > +	struct i40e_queue_region_info *info = &pf->queue_region;

> > +

> > +	if (on) {

> > +		i40e_vsi_update_queue_region_mapping(hw, pf);

> > +		i40e_queue_region_dcb_configure(hw, pf);

> > +		return 0;

> > +	}

> > +

> > +	info->queue_region_number = 1;

> > +	info->region[0].queue_num = 64;

> > +	info->region[0].queue_start_index = 0;

> > +

> > +	i40e_vsi_update_queue_region_mapping(hw, pf);

> > +	i40e_queue_region_dcb_configure_default(hw);

> > +

> > +	for (i = 0; i < I40E_PFQF_HREGION_MAX_INDEX; i++)

> > +		i40e_write_rx_ctl(hw, I40E_PFQF_HREGION(i), 0);

> > +

> > +	memset(info, 0, sizeof(struct i40e_queue_region_info));

> > +

> > +	return 0;

> > +}

> > +

> > +int rte_pmd_i40e_queue_region_conf(uint8_t port,

> > +			struct rte_i40e_rss_region_conf *conf_ptr) {

> > +	struct rte_eth_dev *dev = &rte_eth_devices[port];

> 

> you need to verify port_id, since this is public API now. Please check other

> APIs in this file.

> 

> > +	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data-

> >dev_private);

> > +	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data-

> >dev_private);

> > +	enum rte_pmd_i40e_queue_region_op op_type = conf_ptr->op;

> > +	uint32_t ret;

> 

> This should be signed variable, since you are using it for return and assigning

> negative values.

> 

> > +

> > +	if (!is_i40e_supported(dev))

> > +		return -ENOTSUP;

> > +

> > +	switch (op_type) {

> > +	case RTE_PMD_I40E_QUEUE_REGION_SET:

> > +		ret = i40e_set_queue_region(pf, conf_ptr);

> > +		break;

> 

> Does it make sense to add another type to get the current queue region

> config?

> 

> > +	case RTE_PMD_I40E_REGION_FLOWTYPE_PF_SET:

> > +		ret = i40e_set_region_flowtype_pf(hw, pf, conf_ptr);

> > +		break;

> > +	case RTE_PMD_I40E_REGION_FLOWTYPE_VF_SET:

> > +		ret = -EINVAL;

> 

> Will this be implemented later, or not a valid case at all?

> 

> > +		break;

> > +	case RTE_PMD_I40E_UP_REGION_SET:> +		ret =

> i40e_set_up_region(pf, conf_ptr);

> > +		break;

> > +	case RTE_PMD_I40E_REGION_ALL_FLUSH_ON:

> > +		ret = i40e_flush_region_all_conf(hw, pf, 1);

> > +		break;

> > +	case RTE_PMD_I40E_REGION_ALL_FLUSH_OFF:

> > +		ret = i40e_flush_region_all_conf(hw, pf, 0);

> 

> Can you please describe what flush_on and flush_off are, you can comment

> to code if also.

> 

> > +		break;

> > +

> > +	default:

> > +		PMD_DRV_LOG(WARNING, "op type (%d) not supported",

> > +			    op_type);

> 

> Line break not required.

> 

> > +		ret = -EINVAL;

> > +		break;

> > +	}

> > +

> > +	I40E_WRITE_FLUSH(hw);

> > +

> > +	return ret;

> > +}

> > diff --git a/drivers/net/i40e/rte_pmd_i40e.h

> > b/drivers/net/i40e/rte_pmd_i40e.h index 155b7e8..a1329f4 100644

> > --- a/drivers/net/i40e/rte_pmd_i40e.h

> > +++ b/drivers/net/i40e/rte_pmd_i40e.h

> > @@ -91,6 +91,20 @@ enum rte_pmd_i40e_package_info {

> >  	RTE_PMD_I40E_PKG_INFO_MAX = 0xFFFFFFFF  };

> >

> > +/**

> > + *  Option types of queue region.

> > + */

> > +enum rte_pmd_i40e_queue_region_op {

> > +	RTE_PMD_I40E_REGION_UNDEFINED = 0,

> 

> No need to set to zero, it is default value.

> 

> > +	RTE_PMD_I40E_QUEUE_REGION_SET,      /**< add queue region

> set*/

> 

> I would suggest using same namespace for the enum ites, there are

> "RTE_PMD_I40E_REGION" and "RTE_PMD_I40E_QUEUE_REGION".

> "RTE_PMD_I40E_QUEUE_REGION" looks better. Or if it makes sense,

> perhaps "RTE_PMD_I40E_RSS_QUEUE_REGION"? But please stick with one.

> 

> > +	RTE_PMD_I40E_REGION_FLOWTYPE_PF_SET,   /**< add pf region

> pctype set */

> > +	RTE_PMD_I40E_REGION_FLOWTYPE_VF_SET,   /**< add vf region

> pctype set */

> > +	RTE_PMD_I40E_UP_REGION_SET,   /**< add queue region pctype

> set */

> 

> is this comment correct, this type should be for user priority

> 

> > +	RTE_PMD_I40E_REGION_ALL_FLUSH_ON,   /**< flush on all

> configuration */

> > +	RTE_PMD_I40E_REGION_ALL_FLUSH_OFF,   /**< flush off all

> configuration */

> > +	RTE_PMD_I40E_QUEUE_REGION_OP_MAX

> > +};

> > +

> >  #define RTE_PMD_I40E_DDP_NAME_SIZE 32

> >

> >  /**

> > @@ -146,6 +160,18 @@ struct rte_pmd_i40e_ptype_mapping {  };

> >

> >  /**

> > + * Queue region information get from CLI.

> 

> It doesn't need to be from CLI, can drop that part

> 

> > + */

> > +struct rte_i40e_rss_region_conf {

> 

> This is now public struct, can you please comment items.

> 

> > +	uint8_t region_id;

> > +	uint8_t hw_flowtype;

> > +	uint8_t queue_start_index;

> > +	uint8_t queue_num;

> > +	uint8_t user_priority;

> > +	enum rte_pmd_i40e_queue_region_op  op;

> 

> Extra space before "op"

> 

> > +};

> > +

> > +/**

> >   * Notify VF when PF link status changes.

> >   *

> >   * @param port

> > @@ -657,4 +683,16 @@ int

> rte_pmd_i40e_ptype_mapping_replace(uint8_t

> > port,  int rte_pmd_i40e_add_vf_mac_addr(uint8_t port, uint16_t vf_id,

> >  				 struct ether_addr *mac_addr);

> >

> > +/**

> > + * Get RSS queue region info from CLI and do configuration for

> 

> Again now this is an API, not just for testpmd, please drop CLI

> 

> > + * that port as the command otion type

> 

> s/otion/options

> 

> > + *

> > + * @param port

> > + *    pointer to port identifier of the device

> 

> Not pointer.

> Also this will conflict with patch that updates port storage to uint16_t. Can

> you please follow the status of the patch, and if this merged after that one,

> please ping to remind to update the storage.

> 

> > + * @param conf_ptr

> > + *    pointer to the struct that contain all the

> > + *    region configuration parameters

> > + */

> > +int rte_pmd_i40e_queue_region_conf(uint8_t port,

> 

> Can you please use port_id as variable name to be consistent.

> 

> > +		struct rte_i40e_rss_region_conf *conf_ptr);

> 

> conf_ptr variable name doesn't say much, although it is OK in this context. I

> would suggest something like rss_region_conf

> 

> >  #endif /* _PMD_I40E_H_ */

> > diff --git a/drivers/net/i40e/rte_pmd_i40e_version.map

> > b/drivers/net/i40e/rte_pmd_i40e_version.map

> > index ef8882b..c3ee2da 100644

> > --- a/drivers/net/i40e/rte_pmd_i40e_version.map

> > +++ b/drivers/net/i40e/rte_pmd_i40e_version.map

> > @@ -50,5 +50,6 @@ DPDK_17.11 {

> >  	global:

> >

> >  	rte_pmd_i40e_add_vf_mac_addr;

> > +	rte_pmd_i40e_queue_region_conf;

> 

> The feature has been mention as "rss queue region" above a few places, do

> you think rte_pmd_i40e_rss_queue_region_conf() or

> rte_pmd_i40e_queue_region_conf() is better for API name?

> 

> >

> >  } DPDK_17.08;

> >
  
Ferruh Yigit Sept. 21, 2017, 7:10 a.m. UTC | #3
On 9/21/2017 7:48 AM, Zhao1, Wei wrote:
> Hi,  Ferruh
> 
>> -----Original Message-----
>> From: Yigit, Ferruh
>> Sent: Wednesday, September 20, 2017 6:36 PM
>> To: Zhao1, Wei <wei.zhao1@intel.com>; dev@dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH v3 1/2] net/i40e: queue region set and flush
>>
>> On 9/15/2017 4:13 AM, Wei Zhao wrote:
>>> This feature enable queue regions configuration for RSS in PF/VF, so
>>> that different traffic classes or different packet classification
>>> types can be separated to different queues in different queue
>>> regions.This patch can set queue region range, it include queue number
>>> in a region and the index of first queue.
>>
>> Is following correct:
>> So instead of distributing packets to the multiple queues, this will distribute
>> packets into queue reqions which may consists of multiple queues.
>>
>> If so, is there a way to control how packets distributed within same queue
>> region to multiple queues?
>>
>> And is this feature only supported with RSS? Can it be part of RSS
>> configuration instead of PMD specific API?
>>
>>> This patch enable mapping between different priorities (UP) and
>>
>> User priorities (UP)
>>
>>> different traffic classes.It also enable mapping between a region
>>> index and a sepcific flowtype(PCTYPE).It also provide the solution of
>>> flush all configuration about queue region the above described.
>>>
>>> Signed-off-by: Wei Zhao <wei.zhao1@intel.com>

<...>

>>> +	ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
>>> +	if (ret) {
>>> +		PMD_INIT_LOG(ERR, "Failed to configure queue region "
>>> +			"mapping = %d ", hw->aq.asq_last_status);
>>
>> Please don't split log message.
> 
> If I do not spli it , the line will has more 80 character, there will be warning in patch check log!
> So, which way do you think will be better? Split or warning log?

Checkpatch detects log lines and shouldn't complain about them.

Even it does I thinks checkpatch warning is better, assuming reasonable
length.

And you can do as following to reduce the line length:

PMD_INIT_LOG(ERR,
	"Failed to configure queue region mapping = %d ", 	
	hw->aq.asq_last_status);

<...>
  
Zhao1, Wei Sept. 21, 2017, 7:26 a.m. UTC | #4
> -----Original Message-----

> From: Yigit, Ferruh

> Sent: Thursday, September 21, 2017 3:10 PM

> To: Zhao1, Wei <wei.zhao1@intel.com>; dev@dpdk.org

> Subject: Re: [dpdk-dev] [PATCH v3 1/2] net/i40e: queue region set and flush

> 

> On 9/21/2017 7:48 AM, Zhao1, Wei wrote:

> > Hi,  Ferruh

> >

> >> -----Original Message-----

> >> From: Yigit, Ferruh

> >> Sent: Wednesday, September 20, 2017 6:36 PM

> >> To: Zhao1, Wei <wei.zhao1@intel.com>; dev@dpdk.org

> >> Subject: Re: [dpdk-dev] [PATCH v3 1/2] net/i40e: queue region set and

> >> flush

> >>

> >> On 9/15/2017 4:13 AM, Wei Zhao wrote:

> >>> This feature enable queue regions configuration for RSS in PF/VF, so

> >>> that different traffic classes or different packet classification

> >>> types can be separated to different queues in different queue

> >>> regions.This patch can set queue region range, it include queue

> >>> number in a region and the index of first queue.

> >>

> >> Is following correct:

> >> So instead of distributing packets to the multiple queues, this will

> >> distribute packets into queue reqions which may consists of multiple

> queues.

> >>

> >> If so, is there a way to control how packets distributed within same

> >> queue region to multiple queues?

> >>

> >> And is this feature only supported with RSS? Can it be part of RSS

> >> configuration instead of PMD specific API?

> >>

> >>> This patch enable mapping between different priorities (UP) and

> >>

> >> User priorities (UP)

> >>

> >>> different traffic classes.It also enable mapping between a region

> >>> index and a sepcific flowtype(PCTYPE).It also provide the solution

> >>> of flush all configuration about queue region the above described.

> >>>

> >>> Signed-off-by: Wei Zhao <wei.zhao1@intel.com>

> 

> <...>

> 

> >>> +	ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);

> >>> +	if (ret) {

> >>> +		PMD_INIT_LOG(ERR, "Failed to configure queue region "

> >>> +			"mapping = %d ", hw->aq.asq_last_status);

> >>

> >> Please don't split log message.

> >

> > If I do not spli it , the line will has more 80 character, there will be warning in

> patch check log!

> > So, which way do you think will be better? Split or warning log?

> 

> Checkpatch detects log lines and shouldn't complain about them.

> 

> Even it does I thinks checkpatch warning is better, assuming reasonable

> length.

> 

> And you can do as following to reduce the line length:

> 

> PMD_INIT_LOG(ERR,

> 	"Failed to configure queue region mapping = %d ",

> 	hw->aq.asq_last_status);

> 

> <...>


Ok, I will change to that way, but there will be warning in patch check.
Because at first I do not split them, then after patch check, I find that I must eliminate these over 80 warning.
  
Ferruh Yigit Sept. 21, 2017, 3:45 p.m. UTC | #5
On 9/21/2017 8:26 AM, Zhao1, Wei wrote:
> 
> 
>> -----Original Message-----
>> From: Yigit, Ferruh
>> Sent: Thursday, September 21, 2017 3:10 PM
>> To: Zhao1, Wei <wei.zhao1@intel.com>; dev@dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH v3 1/2] net/i40e: queue region set and flush
>>
>> On 9/21/2017 7:48 AM, Zhao1, Wei wrote:
>>> Hi,  Ferruh
>>>
>>>> -----Original Message-----
>>>> From: Yigit, Ferruh
>>>> Sent: Wednesday, September 20, 2017 6:36 PM
>>>> To: Zhao1, Wei <wei.zhao1@intel.com>; dev@dpdk.org
>>>> Subject: Re: [dpdk-dev] [PATCH v3 1/2] net/i40e: queue region set and
>>>> flush
>>>>
>>>> On 9/15/2017 4:13 AM, Wei Zhao wrote:
>>>>> This feature enable queue regions configuration for RSS in PF/VF, so
>>>>> that different traffic classes or different packet classification
>>>>> types can be separated to different queues in different queue
>>>>> regions.This patch can set queue region range, it include queue
>>>>> number in a region and the index of first queue.
>>>>
>>>> Is following correct:
>>>> So instead of distributing packets to the multiple queues, this will
>>>> distribute packets into queue reqions which may consists of multiple
>> queues.
>>>>
>>>> If so, is there a way to control how packets distributed within same
>>>> queue region to multiple queues?
>>>>
>>>> And is this feature only supported with RSS? Can it be part of RSS
>>>> configuration instead of PMD specific API?
>>>>
>>>>> This patch enable mapping between different priorities (UP) and
>>>>
>>>> User priorities (UP)
>>>>
>>>>> different traffic classes.It also enable mapping between a region
>>>>> index and a sepcific flowtype(PCTYPE).It also provide the solution
>>>>> of flush all configuration about queue region the above described.
>>>>>
>>>>> Signed-off-by: Wei Zhao <wei.zhao1@intel.com>
>>
>> <...>
>>
>>>>> +	ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
>>>>> +	if (ret) {
>>>>> +		PMD_INIT_LOG(ERR, "Failed to configure queue region "
>>>>> +			"mapping = %d ", hw->aq.asq_last_status);
>>>>
>>>> Please don't split log message.
>>>
>>> If I do not spli it , the line will has more 80 character, there will be warning in
>> patch check log!
>>> So, which way do you think will be better? Split or warning log?
>>
>> Checkpatch detects log lines and shouldn't complain about them.
>>
>> Even it does I thinks checkpatch warning is better, assuming reasonable
>> length.
>>
>> And you can do as following to reduce the line length:
>>
>> PMD_INIT_LOG(ERR,
>> 	"Failed to configure queue region mapping = %d ",
>> 	hw->aq.asq_last_status);
>>
>> <...>
> 
> Ok, I will change to that way, but there will be warning in patch check.
> Because at first I do not split them, then after patch check, I find that I must eliminate these over 80 warning.

If you are using devtools/checkpatches.sh script, it ignores the
LONG_LINE_STRING warnings, so long log messages shouldn't be generating
warning. I just confirmed it is working as expected.
  
Chilikin, Andrey Sept. 21, 2017, 7:53 p.m. UTC | #6
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Wei Zhao
> Sent: Friday, September 15, 2017 4:13 AM
> To: dev@dpdk.org
> Cc: Zhao1, Wei <wei.zhao1@intel.com>
> Subject: [dpdk-dev] [PATCH v3 1/2] net/i40e: queue region set and flush
> 
> This feature enable queue regions configuration for RSS in PF/VF,
> so that different traffic classes or different packet
> classification types can be separated to different queues in
> different queue regions.This patch can set queue region range,
> it include queue number in a region and the index of first queue.
> This patch enable mapping between different priorities (UP) and
> different traffic classes.It also enable mapping between a region
> index and a sepcific flowtype(PCTYPE).It also provide the solution
> of flush all configuration about queue region the above described.
> 
> Signed-off-by: Wei Zhao <wei.zhao1@intel.com>
> ---
>  drivers/net/i40e/i40e_ethdev.c            |  19 +-
>  drivers/net/i40e/i40e_ethdev.h            |  30 ++
>  drivers/net/i40e/rte_pmd_i40e.c           | 482
> ++++++++++++++++++++++++++++++
>  drivers/net/i40e/rte_pmd_i40e.h           |  38 +++
>  drivers/net/i40e/rte_pmd_i40e_version.map |   1 +
>  5 files changed, 566 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
> index 8e0580c..1663fc0 100644
> --- a/drivers/net/i40e/i40e_ethdev.c
> +++ b/drivers/net/i40e/i40e_ethdev.c
> @@ -137,10 +137,6 @@
>  #define I40E_PRTTSYN_TSYNTYPE    0x0e000000
>  #define I40E_CYCLECOUNTER_MASK   0xffffffffffffffffULL
> 
> -#define I40E_MAX_PERCENT            100
> -#define I40E_DEFAULT_DCB_APP_NUM    1
> -#define I40E_DEFAULT_DCB_APP_PRIO   3
> -
>  /**
>   * Below are values for writing un-exposed registers suggested
>   * by silicon experts
> @@ -1034,6 +1030,15 @@ i40e_init_fdir_filter_list(struct rte_eth_dev *dev)
>  	return ret;
>  }
> 
> +static void
> +i40e_init_queue_region_conf(struct rte_eth_dev *dev)
> +{
> +	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data-
> >dev_private);
> +	struct i40e_queue_region_info *info = &pf->queue_region;
> +
> +	memset(info, 0, sizeof(struct i40e_queue_region_info));
> +}
> +
>  static int
>  eth_i40e_dev_init(struct rte_eth_dev *dev)
>  {
> @@ -1309,6 +1314,9 @@ eth_i40e_dev_init(struct rte_eth_dev *dev)
>  	if (ret < 0)
>  		goto err_init_fdir_filter_list;
> 
> +	/* initialize queue region configuration */
> +	i40e_init_queue_region_conf(dev);
> +
>  	return 0;
> 
>  err_init_fdir_filter_list:
> @@ -1464,6 +1472,9 @@ eth_i40e_dev_uninit(struct rte_eth_dev *dev)
>  	/* Remove all Traffic Manager configuration */
>  	i40e_tm_conf_uninit(dev);
> 
> +	/* Remove all the queue region configuration */
> +	i40e_flush_region_all_conf(hw, pf, 0);
> +
>  	return 0;
>  }
> 
> diff --git a/drivers/net/i40e/i40e_ethdev.h
> b/drivers/net/i40e/i40e_ethdev.h
> index ad80f0f..d612886 100644
> --- a/drivers/net/i40e/i40e_ethdev.h
> +++ b/drivers/net/i40e/i40e_ethdev.h
> @@ -260,6 +260,16 @@ enum i40e_flxpld_layer_idx {
>  #define I40E_QOS_BW_WEIGHT_MIN 1
>  /* The max bandwidth weight is 127. */
>  #define I40E_QOS_BW_WEIGHT_MAX 127
> +/* The max queue region index is 7. */
> +#define I40E_REGION_MAX_INDEX 7
> +/* The max queue region userpriority is 7. */
> +#define I40E_REGION_USERPRIORITY_MAX_INDEX 7
> +/* The max pctype index is 63. */
> +#define I40E_REGION_PCTYPE_MAX_INDEX 63
> +
> +#define I40E_MAX_PERCENT            100
> +#define I40E_DEFAULT_DCB_APP_NUM    1
> +#define I40E_DEFAULT_DCB_APP_PRIO   3
> 
>  /**
>   * The overhead from MTU to max frame size.
> @@ -541,6 +551,22 @@ struct i40e_ethertype_rule {
>  	struct rte_hash *hash_table;
>  };
> 
> +/* queue region info */
> +struct i40e_region_info {
> +	uint8_t region_id;
> +	uint8_t queue_start_index;
> +	uint8_t queue_num;
> +	uint8_t user_priority_num;
> +	uint8_t user_priority[I40E_MAX_USER_PRIORITY];
> +	uint8_t flowtype_num;
> +	uint8_t hw_flowtype[I40E_FILTER_PCTYPE_MAX];
> +};
> +
> +struct i40e_queue_region_info {
> +	uint16_t queue_region_number;
> +	struct i40e_region_info region[I40E_REGION_MAX_INDEX + 1];
> +};
> +
>  /* Tunnel filter number HW supports */
>  #define I40E_MAX_TUNNEL_FILTER_NUM 400
> 
> @@ -776,6 +802,7 @@ struct i40e_pf {
>  	struct i40e_fdir_info fdir; /* flow director info */
>  	struct i40e_ethertype_rule ethertype; /* Ethertype filter rule */
>  	struct i40e_tunnel_rule tunnel; /* Tunnel filter rule */
> +	struct i40e_queue_region_info queue_region; /* queue region info
> */
>  	struct i40e_fc_conf fc_conf; /* Flow control conf */
>  	struct i40e_mirror_rule_list mirror_list;
>  	uint16_t nb_mirror_rule;   /* The number of mirror rules */
> @@ -1003,6 +1030,9 @@ void i40e_check_write_reg(struct i40e_hw *hw,
> uint32_t addr, uint32_t val);
>  int i40e_tm_ops_get(struct rte_eth_dev *dev, void *ops);
>  void i40e_tm_conf_init(struct rte_eth_dev *dev);
>  void i40e_tm_conf_uninit(struct rte_eth_dev *dev);
> +int i40e_flush_region_all_conf(struct i40e_hw *hw,
> +				struct i40e_pf *pf, uint16_t on);
> +
> 
>  #define I40E_DEV_TO_PCI(eth_dev) \
>  	RTE_DEV_TO_PCI((eth_dev)->device)
> diff --git a/drivers/net/i40e/rte_pmd_i40e.c
> b/drivers/net/i40e/rte_pmd_i40e.c
> index d69a472..9a75f21 100644
> --- a/drivers/net/i40e/rte_pmd_i40e.c
> +++ b/drivers/net/i40e/rte_pmd_i40e.c
> @@ -35,6 +35,7 @@
>  #include <rte_tailq.h>
> 
>  #include "base/i40e_prototype.h"
> +#include "base/i40e_dcb.h"
>  #include "i40e_ethdev.h"
>  #include "i40e_pf.h"
>  #include "i40e_rxtx.h"
> @@ -2161,3 +2162,484 @@ rte_pmd_i40e_add_vf_mac_addr(uint8_t port,
> uint16_t vf_id,
> 
>  	return 0;
>  }
> +
> +static int
> +i40e_vsi_update_queue_region_mapping(struct i40e_hw *hw,
> +			      struct i40e_pf *pf)
> +{
> +	uint16_t i;
> +	struct i40e_vsi *vsi = pf->main_vsi;
> +	uint16_t queue_offset, bsf, tc_index;
> +	struct i40e_vsi_context ctxt;
> +	struct i40e_aqc_vsi_properties_data *vsi_info;
> +	struct i40e_queue_region_info *region_info =
> +				&pf->queue_region;
> +	uint32_t ret = -EINVAL;
> +
> +	if (!region_info->queue_region_number) {
> +		PMD_INIT_LOG(ERR, "there is no that region id been set
> before");
> +		return ret;
> +	}
> +
> +	memset(&ctxt, 0, sizeof(struct i40e_vsi_context));
> +
> +	/* Update Queue Pairs Mapping for currently enabled UPs */
> +	ctxt.seid = vsi->seid;
> +	ctxt.pf_num = hw->pf_id;
> +	ctxt.vf_num = 0;
> +	ctxt.uplink_seid = vsi->uplink_seid;
> +	ctxt.info = vsi->info;
> +	vsi_info = &ctxt.info;
> +
> +	memset(vsi_info->tc_mapping, 0, sizeof(uint16_t) * 8);
> +	memset(vsi_info->queue_mapping, 0, sizeof(uint16_t) * 16);
> +
> +	/**
> +	 * Configure queue region and queue mapping parameters,
> +	 * for enabled queue region, allocate queues to this region.
> +	 */
> +
> +	for (i = 0; i < region_info->queue_region_number; i++) {
> +		tc_index = region_info->region[i].region_id;
> +		bsf = rte_bsf32(region_info->region[i].queue_num);
> +		queue_offset = region_info->region[i].queue_start_index;
> +		vsi_info->tc_mapping[tc_index] = rte_cpu_to_le_16(
> +			(queue_offset <<
> I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) |
> +				(bsf <<
> I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT));
> +	}
> +
> +	/* Associate queue number with VSI, Keep vsi->nb_qps unchanged
> */
> +	if (vsi->type == I40E_VSI_SRIOV) {
> +		vsi_info->mapping_flags |=
> +
> 	rte_cpu_to_le_16(I40E_AQ_VSI_QUE_MAP_NONCONTIG);
> +		for (i = 0; i < vsi->nb_qps; i++)
> +			vsi_info->queue_mapping[i] =
> +				rte_cpu_to_le_16(vsi->base_queue + i);
> +	} else {
> +		vsi_info->mapping_flags |=
> +
> 	rte_cpu_to_le_16(I40E_AQ_VSI_QUE_MAP_CONTIG);
> +		vsi_info->queue_mapping[0] = rte_cpu_to_le_16(vsi-
> >base_queue);
> +	}
> +	vsi_info->valid_sections |=
> +
> 	rte_cpu_to_le_16(I40E_AQ_VSI_PROP_QUEUE_MAP_VALID);
> +
> +	/* Update the VSI after updating the VSI queue-mapping
> information */
> +	ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
> +	if (ret) {
> +		PMD_INIT_LOG(ERR, "Failed to configure queue region "
> +			"mapping = %d ", hw->aq.asq_last_status);
> +		return ret;
> +	}
> +	/* update the local VSI info with updated queue map */
> +	(void)rte_memcpy(&vsi->info.tc_mapping, &ctxt.info.tc_mapping,
> +					sizeof(vsi->info.tc_mapping));
> +	(void)rte_memcpy(&vsi->info.queue_mapping,
> +			&ctxt.info.queue_mapping,
> +		sizeof(vsi->info.queue_mapping));
> +	vsi->info.mapping_flags = ctxt.info.mapping_flags;
> +	vsi->info.valid_sections = 0;
> +
> +	return 0;
> +}
> +
> +
> +static int
> +i40e_set_queue_region(struct i40e_pf *pf,
> +				struct rte_i40e_rss_region_conf *conf_ptr)
> +{
> +	uint16_t tc_size_tb[7] = {1, 2, 4, 8, 16, 32, 64};
> +	uint16_t i;
> +	struct i40e_vsi *main_vsi = pf->main_vsi;
> +	struct i40e_queue_region_info *info = &pf->queue_region;
> +	uint32_t ret = -EINVAL;
> +
> +	for (i = 0; i < I40E_REGION_MAX_INDEX; i++)
> +		if (conf_ptr->queue_num == tc_size_tb[i])
> +			break;
> +
> +	if (i == I40E_REGION_MAX_INDEX) {
> +		printf("The region sizes should be any of the following "
> +		"values: 1, 2, 4, 8, 16, 32, 64 as long as the "
> +		"total number of queues do not exceed the VSI allocation");

printf?

> +		return ret;
> +	}
> +
> +	if (conf_ptr->region_id >= I40E_REGION_MAX_INDEX) {
> +		PMD_INIT_LOG(ERR, "the queue region max index is 7");
> +		return ret;
> +	}
> +
> +	if ((conf_ptr->queue_start_index + conf_ptr->queue_num)
> +					> main_vsi->nb_used_qps) {
> +		PMD_INIT_LOG(ERR, "the queue index exceeds the VSI
> range");
> +		return ret;
> +	}
> +
> +	if (info->queue_region_number) {
> +		for (i = 0; i < info->queue_region_number; i++)
> +			if (conf_ptr->region_id == info->region[i].region_id)
> +				break;
> +
> +		if ((i == info->queue_region_number) &&
> +			(i <= I40E_REGION_MAX_INDEX)) {
> +			info->region[i].region_id = conf_ptr->region_id;
> +			info->region[i].queue_num = conf_ptr-
> >queue_num;
> +			info->region[i].queue_start_index =
> +				conf_ptr->queue_start_index;
> +			info->queue_region_number++;
> +		} else {
> +			PMD_INIT_LOG(ERR, "queue region number "
> +				"exceeds maxnum 8 or the "
> +				"queue region id has "
> +				"been set before");
> +			return ret;
> +		}
> +	} else {
> +		info->region[0].region_id = conf_ptr->region_id;
> +		info->region[0].queue_num = conf_ptr->queue_num;
> +		info->region[0].queue_start_index =
> +			conf_ptr->queue_start_index;
> +		info->queue_region_number++;
> +	}
> +
> +	return 0;
> +}
> +
> +static int
> +i40e_set_region_flowtype_pf(struct i40e_hw *hw,
> +		struct i40e_pf *pf, struct rte_i40e_rss_region_conf
> *conf_ptr)
> +{
> +	uint32_t pfqf_hregion;
> +	uint32_t ret = -EINVAL;
> +	struct i40e_queue_region_info *info = &pf->queue_region;
> +	uint16_t i, j, index, flowtype_set = 0;
> +	uint16_t region_index, flowtype_index;
> +
> +	if (conf_ptr->region_id > I40E_PFQF_HREGION_MAX_INDEX) {
> +		PMD_INIT_LOG(ERR, "the queue region max index is 7");
> +		return ret;
> +	}
> +
> +	if (conf_ptr->hw_flowtype >= I40E_FILTER_PCTYPE_MAX) {
> +		PMD_INIT_LOG(ERR, "the hw_flowtype or PCTYPE max index
> is 63");
> +		return ret;
> +	}
> +
> +	if (info->queue_region_number) {
> +		for (i = 0; i < info->queue_region_number; i++)
> +			if (conf_ptr->region_id == info->region[i].region_id)
> +				break;
> +
> +		if (i == info->queue_region_number) {
> +			PMD_INIT_LOG(ERR, "that region id has not "
> +				"been set before");
> +			return ret;
> +		}
> +		region_index = i;
> +
> +		for (i = 0; i < info->queue_region_number; i++) {
> +			for (j = 0; j < info->region[i].flowtype_num; j++) {
> +				if (conf_ptr->hw_flowtype ==
> +					info->region[i].hw_flowtype[j]) {
> +					flowtype_set = 1;
> +					break;
> +				}
> +			}
> +		}
> +
> +		if (flowtype_set) {
> +			PMD_INIT_LOG(ERR, "that hw_flowtype "
> +				"has been set before");
> +			return ret;
> +		}
> +		flowtype_index = info-
> >region[region_index].flowtype_num;
> +		info->region[region_index].hw_flowtype[flowtype_index] =
> +						conf_ptr->hw_flowtype;
> +		info->region[region_index].flowtype_num++;
> +	} else  {
> +		PMD_INIT_LOG(ERR, "there is no that region id been set
> before");
> +		return ret;
> +	}
> +
> +	index = conf_ptr->hw_flowtype >> 3;
> +	pfqf_hregion = i40e_read_rx_ctl(hw, I40E_PFQF_HREGION(index));
> +
> +	if ((conf_ptr->hw_flowtype & 0x7) == 0) {
> +		pfqf_hregion |= conf_ptr->region_id <<
> +				I40E_PFQF_HREGION_REGION_0_SHIFT;
> +		pfqf_hregion |= 1 <<
> I40E_PFQF_HREGION_OVERRIDE_ENA_0_SHIFT;
> +	} else if ((conf_ptr->hw_flowtype & 0x7) == 1) {
> +		pfqf_hregion |= conf_ptr->region_id <<
> +				I40E_PFQF_HREGION_REGION_1_SHIFT;
> +		pfqf_hregion |= 1 <<
> I40E_PFQF_HREGION_OVERRIDE_ENA_1_SHIFT;
> +	} else if ((conf_ptr->hw_flowtype & 0x7) == 2) {
> +		pfqf_hregion |= conf_ptr->region_id <<
> +				I40E_PFQF_HREGION_REGION_2_SHIFT;
> +		pfqf_hregion |= 1 <<
> I40E_PFQF_HREGION_OVERRIDE_ENA_2_SHIFT;
> +	} else if ((conf_ptr->hw_flowtype & 0x7) == 3) {
> +		pfqf_hregion |= conf_ptr->region_id <<
> +				I40E_PFQF_HREGION_REGION_3_SHIFT;
> +		pfqf_hregion |= 1 <<
> I40E_PFQF_HREGION_OVERRIDE_ENA_3_SHIFT;
> +	} else if ((conf_ptr->hw_flowtype & 0x7) == 4) {
> +		pfqf_hregion |= conf_ptr->region_id <<
> +				I40E_PFQF_HREGION_REGION_4_SHIFT;
> +		pfqf_hregion |= 1 <<
> I40E_PFQF_HREGION_OVERRIDE_ENA_4_SHIFT;
> +	} else if ((conf_ptr->hw_flowtype & 0x7) == 5) {
> +		pfqf_hregion |= conf_ptr->region_id <<
> +				I40E_PFQF_HREGION_REGION_5_SHIFT;
> +		pfqf_hregion |= 1 <<
> I40E_PFQF_HREGION_OVERRIDE_ENA_5_SHIFT;
> +	} else if ((conf_ptr->hw_flowtype & 0x7) == 6) {
> +		pfqf_hregion |= conf_ptr->region_id <<
> +				I40E_PFQF_HREGION_REGION_6_SHIFT;
> +		pfqf_hregion |= 1 <<
> I40E_PFQF_HREGION_OVERRIDE_ENA_6_SHIFT;
> +	} else {
> +		pfqf_hregion |= conf_ptr->region_id <<
> +				I40E_PFQF_HREGION_REGION_7_SHIFT;
> +		pfqf_hregion |= 1 <<
> I40E_PFQF_HREGION_OVERRIDE_ENA_7_SHIFT;
> +	}
> +
> +	i40e_write_rx_ctl(hw, I40E_PFQF_HREGION(index), pfqf_hregion);
> +
> +	return 0;
> +}
> +
> +static int
> +i40e_set_up_region(struct i40e_pf *pf,
> +				struct rte_i40e_rss_region_conf *conf_ptr)
> +{
> +	struct i40e_queue_region_info *info = &pf->queue_region;
> +	uint32_t ret = -EINVAL;
> +	uint16_t i, j, region_index, up_set = 0;
> +
> +	if (conf_ptr->user_priority >
> I40E_REGION_USERPRIORITY_MAX_INDEX) {
> +		PMD_INIT_LOG(ERR, "the queue region max index is 7");
> +		return ret;
> +	}
> +
> +	if (conf_ptr->region_id >= I40E_REGION_MAX_INDEX) {
> +		PMD_INIT_LOG(ERR, "the region_id max index is 7");
> +		return ret;
> +	}
> +
> +	if (info->queue_region_number) {
> +		for (i = 0; i < info->queue_region_number; i++)
> +			if (conf_ptr->region_id == info->region[i].region_id)
> +				break;
> +
> +		if (i == info->queue_region_number) {
> +			PMD_INIT_LOG(ERR, "that region id "
> +				"has not been set before");
> +			return ret;
> +		}
> +		region_index = i;
> +
> +		for (i = 0; i < info->queue_region_number; i++) {
> +			for (j = 0; j <
> +				info->region[i].user_priority_num; j++) {
> +				if (info->region[i].user_priority[j] ==
> +					conf_ptr->user_priority) {
> +					up_set = 1;
> +					break;
> +				}
> +			}
> +		}
> +
> +		if (up_set) {
> +			PMD_INIT_LOG(ERR, "that user priority "
> +				"has been set before");
> +			return ret;
> +		}
> +
> +		j = info->region[region_index].user_priority_num;
> +		info->region[region_index].user_priority[j] =
> +						conf_ptr->user_priority;
> +		info->region[region_index].user_priority_num++;
> +	} else {
> +		PMD_INIT_LOG(ERR, "there is no that region id been set
> before");
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +
> +static int
> +i40e_queue_region_dcb_configure(struct i40e_hw *hw,
> +				struct i40e_pf *pf)
> +{
> +	struct i40e_dcbx_config dcb_cfg_local;
> +	struct i40e_dcbx_config *dcb_cfg;
> +	struct i40e_queue_region_info *info = &pf->queue_region;
> +	struct i40e_dcbx_config *old_cfg = &hw->local_dcbx_config;
> +	uint32_t ret = -EINVAL;
> +	uint16_t i, j, prio_index, region_index;
> +	uint8_t tc_map, tc_bw, bw_lf;
> +
> +	if (!info->queue_region_number) {
> +		PMD_INIT_LOG(ERR, "there is no that region id been set
> before");
> +		return ret;
> +	}
> +
> +	dcb_cfg = &dcb_cfg_local;
> +	memset(dcb_cfg, 0, sizeof(struct i40e_dcbx_config));
> +
> +	/* assume each tc has the same bw */
> +	tc_bw = I40E_MAX_PERCENT / info->queue_region_number;
> +	for (i = 0; i < info->queue_region_number; i++)
> +		dcb_cfg->etscfg.tcbwtable[i] = tc_bw;
> +	/* to ensure the sum of tcbw is equal to 100 */
> +	bw_lf = I40E_MAX_PERCENT %  info->queue_region_number;
> +	for (i = 0; i < bw_lf; i++)
> +		dcb_cfg->etscfg.tcbwtable[i]++;
> +
> +	/* assume each tc has the same Transmission Selection Algorithm */
> +	for (i = 0; i < info->queue_region_number; i++)
> +		dcb_cfg->etscfg.tsatable[i] = I40E_IEEE_TSA_ETS;
> +
> +	for (i = 0; i < info->queue_region_number; i++) {
> +		for (j = 0; j < info->region[i].user_priority_num; j++) {
> +			prio_index = info->region[i].user_priority[j];
> +			region_index = info->region[i].region_id;
> +			dcb_cfg->etscfg.prioritytable[prio_index] =
> +						region_index;
> +		}
> +	}
> +
> +	/* FW needs one App to configure HW */
> +	dcb_cfg->numapps = I40E_DEFAULT_DCB_APP_NUM;
> +	dcb_cfg->app[0].selector = I40E_APP_SEL_ETHTYPE;
> +	dcb_cfg->app[0].priority = I40E_DEFAULT_DCB_APP_PRIO;
> +	dcb_cfg->app[0].protocolid = I40E_APP_PROTOID_FCOE;
> +
> +	tc_map = RTE_LEN2MASK(info->queue_region_number, uint8_t);
> +
> +	dcb_cfg->pfc.willing = 0;
> +	dcb_cfg->pfc.pfccap = I40E_MAX_TRAFFIC_CLASS;
> +	dcb_cfg->pfc.pfcenable = tc_map;
> +
> +	/* Copy the new config to the current config */
> +	*old_cfg = *dcb_cfg;
> +	old_cfg->etsrec = old_cfg->etscfg;
> +	ret = i40e_set_dcb_config(hw);
> +
> +	if (ret) {
> +		PMD_INIT_LOG(ERR, "Set queue region DCB Config failed,
> err"
> +			" %s aq_err %s",
> +			 i40e_stat_str(hw, ret),
> +			 i40e_aq_str(hw, hw->aq.asq_last_status));
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +
> +static int
> +i40e_queue_region_dcb_configure_default(struct i40e_hw *hw)
> +{
> +	uint16_t i;
> +	uint32_t ret = -EINVAL;
> +
> +	memset(&hw->local_dcbx_config, 0,
> +	sizeof(struct i40e_dcbx_config));
> +	/* set dcb default configuration */
> +	hw->local_dcbx_config.etscfg.willing = 0;
> +	hw->local_dcbx_config.etscfg.maxtcs = 0;
> +	hw->local_dcbx_config.etscfg.tcbwtable[0] = 100;
> +	hw->local_dcbx_config.etscfg.tsatable[0] =
> +			I40E_IEEE_TSA_ETS;
> +	/* all UPs mapping to region 0 */
> +	for (i = 0; i < I40E_MAX_USER_PRIORITY; i++)
> +		hw->local_dcbx_config.etscfg.prioritytable[i] = 0;
> +	hw->local_dcbx_config.etsrec =
> +		hw->local_dcbx_config.etscfg;
> +	hw->local_dcbx_config.pfc.willing = 0;
> +	hw->local_dcbx_config.pfc.pfccap =
> +			I40E_MAX_TRAFFIC_CLASS;
> +	/* FW needs one App to configure HW */
> +	hw->local_dcbx_config.numapps = 1;
> +	hw->local_dcbx_config.app[0].selector =
> +			I40E_APP_SEL_ETHTYPE;
> +	hw->local_dcbx_config.app[0].priority = 3;
> +	hw->local_dcbx_config.app[0].protocolid =
> +			I40E_APP_PROTOID_FCOE;
> +	ret = i40e_set_dcb_config(hw);
> +	if (ret) {
> +		PMD_INIT_LOG(ERR,
> +		"default dcb config fails. err = %d, aq_err = %d.",
> +			ret, hw->aq.asq_last_status);
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +
> +int
> +i40e_flush_region_all_conf(struct i40e_hw *hw, struct i40e_pf *pf,
> +				uint16_t on)
> +{
> +	uint16_t i;
> +	struct i40e_queue_region_info *info = &pf->queue_region;
> +
> +	if (on) {
> +		i40e_vsi_update_queue_region_mapping(hw, pf);
> +		i40e_queue_region_dcb_configure(hw, pf);
> +		return 0;
> +	}
> +
> +	info->queue_region_number = 1;
> +	info->region[0].queue_num = 64;
> +	info->region[0].queue_start_index = 0;
> +
> +	i40e_vsi_update_queue_region_mapping(hw, pf);
> +	i40e_queue_region_dcb_configure_default(hw);
> +
> +	for (i = 0; i < I40E_PFQF_HREGION_MAX_INDEX; i++)
> +		i40e_write_rx_ctl(hw, I40E_PFQF_HREGION(i), 0);
> +
> +	memset(info, 0, sizeof(struct i40e_queue_region_info));
> +
> +	return 0;
> +}
> +
> +int rte_pmd_i40e_queue_region_conf(uint8_t port,
> +			struct rte_i40e_rss_region_conf *conf_ptr)
> +{
> +	struct rte_eth_dev *dev = &rte_eth_devices[port];
> +	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data-
> >dev_private);
> +	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data-
> >dev_private);
> +	enum rte_pmd_i40e_queue_region_op op_type = conf_ptr->op;
> +	uint32_t ret;
> +
> +	if (!is_i40e_supported(dev))
> +		return -ENOTSUP;
> +
> +	switch (op_type) {
> +	case RTE_PMD_I40E_QUEUE_REGION_SET:
> +		ret = i40e_set_queue_region(pf, conf_ptr);
> +		break;
> +	case RTE_PMD_I40E_REGION_FLOWTYPE_PF_SET:
> +		ret = i40e_set_region_flowtype_pf(hw, pf, conf_ptr);
> +		break;
> +	case RTE_PMD_I40E_REGION_FLOWTYPE_VF_SET:
> +		ret = -EINVAL;
> +		break;

Why setting of flowtype mapping is separated to two different option?
Does it mean that application will need to detect which driver it uses before
calling this API? Application should not care which driver is in use, it tracks the
number of queues used for RSS and just configures these queues to
different regions, regardless if it runs on top of PF or VF. Difference
in configuration for PF/VF should be hidden from application, so
only single RTE_PMD_I40E_REGION_FLOWTYPE_SET should be used.

All other options do not have separate check.
Will they work on both PF and VF?

> +	case RTE_PMD_I40E_UP_REGION_SET:
> +		ret = i40e_set_up_region(pf, conf_ptr);
> +		break;
> +	case RTE_PMD_I40E_REGION_ALL_FLUSH_ON:
> +		ret = i40e_flush_region_all_conf(hw, pf, 1);
> +		break;
> +	case RTE_PMD_I40E_REGION_ALL_FLUSH_OFF:
> +		ret = i40e_flush_region_all_conf(hw, pf, 0);
> +		break;
> +
> +	default:
> +		PMD_DRV_LOG(WARNING, "op type (%d) not supported",
> +			    op_type);
> +		ret = -EINVAL;
> +		break;
> +	}
> +
> +	I40E_WRITE_FLUSH(hw);
> +
> +	return ret;
> +}

How this API can be tested, is there a separate testpmd patch?

> diff --git a/drivers/net/i40e/rte_pmd_i40e.h
> b/drivers/net/i40e/rte_pmd_i40e.h
> index 155b7e8..a1329f4 100644
> --- a/drivers/net/i40e/rte_pmd_i40e.h
> +++ b/drivers/net/i40e/rte_pmd_i40e.h
> @@ -91,6 +91,20 @@ enum rte_pmd_i40e_package_info {
>  	RTE_PMD_I40E_PKG_INFO_MAX = 0xFFFFFFFF
>  };
> 
> +/**
> + *  Option types of queue region.
> + */
> +enum rte_pmd_i40e_queue_region_op {
> +	RTE_PMD_I40E_REGION_UNDEFINED = 0,
> +	RTE_PMD_I40E_QUEUE_REGION_SET,      /**< add queue region
> set*/
> +	RTE_PMD_I40E_REGION_FLOWTYPE_PF_SET,   /**< add pf region
> pctype set */
> +	RTE_PMD_I40E_REGION_FLOWTYPE_VF_SET,   /**< add vf region
> pctype set */
> +	RTE_PMD_I40E_UP_REGION_SET,   /**< add queue region pctype
> set */
> +	RTE_PMD_I40E_REGION_ALL_FLUSH_ON,   /**< flush on all
> configuration */
> +	RTE_PMD_I40E_REGION_ALL_FLUSH_OFF,   /**< flush off all
> configuration */
> +	RTE_PMD_I40E_QUEUE_REGION_OP_MAX
> +};
> +
>  #define RTE_PMD_I40E_DDP_NAME_SIZE 32
> 
>  /**
> @@ -146,6 +160,18 @@ struct rte_pmd_i40e_ptype_mapping {
>  };
> 
>  /**
> + * Queue region information get from CLI.
> + */
> +struct rte_i40e_rss_region_conf {
> +	uint8_t region_id;
> +	uint8_t hw_flowtype;
> +	uint8_t queue_start_index;
> +	uint8_t queue_num;
> +	uint8_t user_priority;
> +	enum rte_pmd_i40e_queue_region_op  op;
> +};
> +
> +/**
>   * Notify VF when PF link status changes.
>   *
>   * @param port
> @@ -657,4 +683,16 @@ int rte_pmd_i40e_ptype_mapping_replace(uint8_t
> port,
>  int rte_pmd_i40e_add_vf_mac_addr(uint8_t port, uint16_t vf_id,
>  				 struct ether_addr *mac_addr);
> 
> +/**
> + * Get RSS queue region info from CLI and do configuration for
> + * that port as the command otion type
> + *
> + * @param port
> + *    pointer to port identifier of the device
> + * @param conf_ptr
> + *    pointer to the struct that contain all the
> + *    region configuration parameters
> + */
> +int rte_pmd_i40e_queue_region_conf(uint8_t port,
> +		struct rte_i40e_rss_region_conf *conf_ptr);
>  #endif /* _PMD_I40E_H_ */
> diff --git a/drivers/net/i40e/rte_pmd_i40e_version.map
> b/drivers/net/i40e/rte_pmd_i40e_version.map
> index ef8882b..c3ee2da 100644
> --- a/drivers/net/i40e/rte_pmd_i40e_version.map
> +++ b/drivers/net/i40e/rte_pmd_i40e_version.map
> @@ -50,5 +50,6 @@ DPDK_17.11 {
>  	global:
> 
>  	rte_pmd_i40e_add_vf_mac_addr;
> +	rte_pmd_i40e_queue_region_conf;
> 
>  } DPDK_17.08;
> --
> 2.9.3
  
Zhao1, Wei Sept. 22, 2017, 8:49 a.m. UTC | #7
Hi,Andrey

> -----Original Message-----

> From: Chilikin, Andrey

> Sent: Friday, September 22, 2017 3:53 AM

> To: Zhao1, Wei <wei.zhao1@intel.com>; dev@dpdk.org

> Cc: Zhao1, Wei <wei.zhao1@intel.com>; Yigit, Ferruh

> <ferruh.yigit@intel.com>; Xing, Beilei <beilei.xing@intel.com>; Wu, Jingjing

> <jingjing.wu@intel.com>

> Subject: RE: [dpdk-dev] [PATCH v3 1/2] net/i40e: queue region set and flush

> 

> > -----Original Message-----

> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Wei Zhao

> > Sent: Friday, September 15, 2017 4:13 AM

> > To: dev@dpdk.org

> > Cc: Zhao1, Wei <wei.zhao1@intel.com>

> > Subject: [dpdk-dev] [PATCH v3 1/2] net/i40e: queue region set and

> > flush

> >

> > This feature enable queue regions configuration for RSS in PF/VF, so

> > that different traffic classes or different packet classification

> > types can be separated to different queues in different queue

> > regions.This patch can set queue region range, it include queue number

> > in a region and the index of first queue.

> > This patch enable mapping between different priorities (UP) and

> > different traffic classes.It also enable mapping between a region

> > index and a sepcific flowtype(PCTYPE).It also provide the solution of

> > flush all configuration about queue region the above described.

> >

> > Signed-off-by: Wei Zhao <wei.zhao1@intel.com>

> > ---

> >  drivers/net/i40e/i40e_ethdev.c            |  19 +-

> >  drivers/net/i40e/i40e_ethdev.h            |  30 ++

> >  drivers/net/i40e/rte_pmd_i40e.c           | 482

> > ++++++++++++++++++++++++++++++

> >  drivers/net/i40e/rte_pmd_i40e.h           |  38 +++

> >  drivers/net/i40e/rte_pmd_i40e_version.map |   1 +

> >  5 files changed, 566 insertions(+), 4 deletions(-)

> >

> > diff --git a/drivers/net/i40e/i40e_ethdev.c

> > b/drivers/net/i40e/i40e_ethdev.c index 8e0580c..1663fc0 100644

> > --- a/drivers/net/i40e/i40e_ethdev.c

> > +++ b/drivers/net/i40e/i40e_ethdev.c

> > @@ -137,10 +137,6 @@

> >  #define I40E_PRTTSYN_TSYNTYPE    0x0e000000

> >  #define I40E_CYCLECOUNTER_MASK   0xffffffffffffffffULL

> >

> > -#define I40E_MAX_PERCENT            100

> > -#define I40E_DEFAULT_DCB_APP_NUM    1

> > -#define I40E_DEFAULT_DCB_APP_PRIO   3

> > -

> >  /**

> >   * Below are values for writing un-exposed registers suggested

> >   * by silicon experts

> > @@ -1034,6 +1030,15 @@ i40e_init_fdir_filter_list(struct rte_eth_dev

> *dev)

> >  	return ret;

> >  }

> >

> > +static void

> > +i40e_init_queue_region_conf(struct rte_eth_dev *dev) {

> > +	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data-

> > >dev_private);

> > +	struct i40e_queue_region_info *info = &pf->queue_region;

> > +

> > +	memset(info, 0, sizeof(struct i40e_queue_region_info)); }

> > +

> >  static int

> >  eth_i40e_dev_init(struct rte_eth_dev *dev)  { @@ -1309,6 +1314,9 @@

> > eth_i40e_dev_init(struct rte_eth_dev *dev)

> >  	if (ret < 0)

> >  		goto err_init_fdir_filter_list;

> >

> > +	/* initialize queue region configuration */

> > +	i40e_init_queue_region_conf(dev);

> > +

> >  	return 0;

> >

> >  err_init_fdir_filter_list:

> > @@ -1464,6 +1472,9 @@ eth_i40e_dev_uninit(struct rte_eth_dev *dev)

> >  	/* Remove all Traffic Manager configuration */

> >  	i40e_tm_conf_uninit(dev);

> >

> > +	/* Remove all the queue region configuration */

> > +	i40e_flush_region_all_conf(hw, pf, 0);

> > +

> >  	return 0;

> >  }

> >

> > diff --git a/drivers/net/i40e/i40e_ethdev.h

> > b/drivers/net/i40e/i40e_ethdev.h index ad80f0f..d612886 100644

> > --- a/drivers/net/i40e/i40e_ethdev.h

> > +++ b/drivers/net/i40e/i40e_ethdev.h

> > @@ -260,6 +260,16 @@ enum i40e_flxpld_layer_idx {  #define

> > I40E_QOS_BW_WEIGHT_MIN 1

> >  /* The max bandwidth weight is 127. */  #define

> > I40E_QOS_BW_WEIGHT_MAX 127

> > +/* The max queue region index is 7. */ #define

> I40E_REGION_MAX_INDEX

> > +7

> > +/* The max queue region userpriority is 7. */ #define

> > +I40E_REGION_USERPRIORITY_MAX_INDEX 7

> > +/* The max pctype index is 63. */

> > +#define I40E_REGION_PCTYPE_MAX_INDEX 63

> > +

> > +#define I40E_MAX_PERCENT            100

> > +#define I40E_DEFAULT_DCB_APP_NUM    1

> > +#define I40E_DEFAULT_DCB_APP_PRIO   3

> >

> >  /**

> >   * The overhead from MTU to max frame size.

> > @@ -541,6 +551,22 @@ struct i40e_ethertype_rule {

> >  	struct rte_hash *hash_table;

> >  };

> >

> > +/* queue region info */

> > +struct i40e_region_info {

> > +	uint8_t region_id;

> > +	uint8_t queue_start_index;

> > +	uint8_t queue_num;

> > +	uint8_t user_priority_num;

> > +	uint8_t user_priority[I40E_MAX_USER_PRIORITY];

> > +	uint8_t flowtype_num;

> > +	uint8_t hw_flowtype[I40E_FILTER_PCTYPE_MAX];

> > +};

> > +

> > +struct i40e_queue_region_info {

> > +	uint16_t queue_region_number;

> > +	struct i40e_region_info region[I40E_REGION_MAX_INDEX + 1]; };

> > +

> >  /* Tunnel filter number HW supports */  #define

> > I40E_MAX_TUNNEL_FILTER_NUM 400

> >

> > @@ -776,6 +802,7 @@ struct i40e_pf {

> >  	struct i40e_fdir_info fdir; /* flow director info */

> >  	struct i40e_ethertype_rule ethertype; /* Ethertype filter rule */

> >  	struct i40e_tunnel_rule tunnel; /* Tunnel filter rule */

> > +	struct i40e_queue_region_info queue_region; /* queue region info

> > */

> >  	struct i40e_fc_conf fc_conf; /* Flow control conf */

> >  	struct i40e_mirror_rule_list mirror_list;

> >  	uint16_t nb_mirror_rule;   /* The number of mirror rules */

> > @@ -1003,6 +1030,9 @@ void i40e_check_write_reg(struct i40e_hw *hw,

> > uint32_t addr, uint32_t val);  int i40e_tm_ops_get(struct rte_eth_dev

> > *dev, void *ops);  void i40e_tm_conf_init(struct rte_eth_dev *dev);

> > void i40e_tm_conf_uninit(struct rte_eth_dev *dev);

> > +int i40e_flush_region_all_conf(struct i40e_hw *hw,

> > +				struct i40e_pf *pf, uint16_t on);

> > +

> >

> >  #define I40E_DEV_TO_PCI(eth_dev) \

> >  	RTE_DEV_TO_PCI((eth_dev)->device)

> > diff --git a/drivers/net/i40e/rte_pmd_i40e.c

> > b/drivers/net/i40e/rte_pmd_i40e.c index d69a472..9a75f21 100644

> > --- a/drivers/net/i40e/rte_pmd_i40e.c

> > +++ b/drivers/net/i40e/rte_pmd_i40e.c

> > @@ -35,6 +35,7 @@

> >  #include <rte_tailq.h>

> >

> >  #include "base/i40e_prototype.h"

> > +#include "base/i40e_dcb.h"

> >  #include "i40e_ethdev.h"

> >  #include "i40e_pf.h"

> >  #include "i40e_rxtx.h"

> > @@ -2161,3 +2162,484 @@ rte_pmd_i40e_add_vf_mac_addr(uint8_t port,

> > uint16_t vf_id,

> >

> >  	return 0;

> >  }

> > +

> > +static int

> > +i40e_vsi_update_queue_region_mapping(struct i40e_hw *hw,

> > +			      struct i40e_pf *pf)

> > +{

> > +	uint16_t i;

> > +	struct i40e_vsi *vsi = pf->main_vsi;

> > +	uint16_t queue_offset, bsf, tc_index;

> > +	struct i40e_vsi_context ctxt;

> > +	struct i40e_aqc_vsi_properties_data *vsi_info;

> > +	struct i40e_queue_region_info *region_info =

> > +				&pf->queue_region;

> > +	uint32_t ret = -EINVAL;

> > +

> > +	if (!region_info->queue_region_number) {

> > +		PMD_INIT_LOG(ERR, "there is no that region id been set

> > before");

> > +		return ret;

> > +	}

> > +

> > +	memset(&ctxt, 0, sizeof(struct i40e_vsi_context));

> > +

> > +	/* Update Queue Pairs Mapping for currently enabled UPs */

> > +	ctxt.seid = vsi->seid;

> > +	ctxt.pf_num = hw->pf_id;

> > +	ctxt.vf_num = 0;

> > +	ctxt.uplink_seid = vsi->uplink_seid;

> > +	ctxt.info = vsi->info;

> > +	vsi_info = &ctxt.info;

> > +

> > +	memset(vsi_info->tc_mapping, 0, sizeof(uint16_t) * 8);

> > +	memset(vsi_info->queue_mapping, 0, sizeof(uint16_t) * 16);

> > +

> > +	/**

> > +	 * Configure queue region and queue mapping parameters,

> > +	 * for enabled queue region, allocate queues to this region.

> > +	 */

> > +

> > +	for (i = 0; i < region_info->queue_region_number; i++) {

> > +		tc_index = region_info->region[i].region_id;

> > +		bsf = rte_bsf32(region_info->region[i].queue_num);

> > +		queue_offset = region_info->region[i].queue_start_index;

> > +		vsi_info->tc_mapping[tc_index] = rte_cpu_to_le_16(

> > +			(queue_offset <<

> > I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) |

> > +				(bsf <<

> > I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT));

> > +	}

> > +

> > +	/* Associate queue number with VSI, Keep vsi->nb_qps unchanged

> > */

> > +	if (vsi->type == I40E_VSI_SRIOV) {

> > +		vsi_info->mapping_flags |=

> > +

> > 	rte_cpu_to_le_16(I40E_AQ_VSI_QUE_MAP_NONCONTIG);

> > +		for (i = 0; i < vsi->nb_qps; i++)

> > +			vsi_info->queue_mapping[i] =

> > +				rte_cpu_to_le_16(vsi->base_queue + i);

> > +	} else {

> > +		vsi_info->mapping_flags |=

> > +

> > 	rte_cpu_to_le_16(I40E_AQ_VSI_QUE_MAP_CONTIG);

> > +		vsi_info->queue_mapping[0] = rte_cpu_to_le_16(vsi-

> > >base_queue);

> > +	}

> > +	vsi_info->valid_sections |=

> > +

> > 	rte_cpu_to_le_16(I40E_AQ_VSI_PROP_QUEUE_MAP_VALID);

> > +

> > +	/* Update the VSI after updating the VSI queue-mapping

> > information */

> > +	ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);

> > +	if (ret) {

> > +		PMD_INIT_LOG(ERR, "Failed to configure queue region "

> > +			"mapping = %d ", hw->aq.asq_last_status);

> > +		return ret;

> > +	}

> > +	/* update the local VSI info with updated queue map */

> > +	(void)rte_memcpy(&vsi->info.tc_mapping, &ctxt.info.tc_mapping,

> > +					sizeof(vsi->info.tc_mapping));

> > +	(void)rte_memcpy(&vsi->info.queue_mapping,

> > +			&ctxt.info.queue_mapping,

> > +		sizeof(vsi->info.queue_mapping));

> > +	vsi->info.mapping_flags = ctxt.info.mapping_flags;

> > +	vsi->info.valid_sections = 0;

> > +

> > +	return 0;

> > +}

> > +

> > +

> > +static int

> > +i40e_set_queue_region(struct i40e_pf *pf,

> > +				struct rte_i40e_rss_region_conf *conf_ptr) {

> > +	uint16_t tc_size_tb[7] = {1, 2, 4, 8, 16, 32, 64};

> > +	uint16_t i;

> > +	struct i40e_vsi *main_vsi = pf->main_vsi;

> > +	struct i40e_queue_region_info *info = &pf->queue_region;

> > +	uint32_t ret = -EINVAL;

> > +

> > +	for (i = 0; i < I40E_REGION_MAX_INDEX; i++)

> > +		if (conf_ptr->queue_num == tc_size_tb[i])

> > +			break;

> > +

> > +	if (i == I40E_REGION_MAX_INDEX) {

> > +		printf("The region sizes should be any of the following "

> > +		"values: 1, 2, 4, 8, 16, 32, 64 as long as the "

> > +		"total number of queues do not exceed the VSI allocation");

> 

> printf?

> 

> > +		return ret;

> > +	}

> > +

> > +	if (conf_ptr->region_id >= I40E_REGION_MAX_INDEX) {

> > +		PMD_INIT_LOG(ERR, "the queue region max index is 7");

> > +		return ret;

> > +	}

> > +

> > +	if ((conf_ptr->queue_start_index + conf_ptr->queue_num)

> > +					> main_vsi->nb_used_qps) {

> > +		PMD_INIT_LOG(ERR, "the queue index exceeds the VSI

> > range");

> > +		return ret;

> > +	}

> > +

> > +	if (info->queue_region_number) {

> > +		for (i = 0; i < info->queue_region_number; i++)

> > +			if (conf_ptr->region_id == info->region[i].region_id)

> > +				break;

> > +

> > +		if ((i == info->queue_region_number) &&

> > +			(i <= I40E_REGION_MAX_INDEX)) {

> > +			info->region[i].region_id = conf_ptr->region_id;

> > +			info->region[i].queue_num = conf_ptr-

> > >queue_num;

> > +			info->region[i].queue_start_index =

> > +				conf_ptr->queue_start_index;

> > +			info->queue_region_number++;

> > +		} else {

> > +			PMD_INIT_LOG(ERR, "queue region number "

> > +				"exceeds maxnum 8 or the "

> > +				"queue region id has "

> > +				"been set before");

> > +			return ret;

> > +		}

> > +	} else {

> > +		info->region[0].region_id = conf_ptr->region_id;

> > +		info->region[0].queue_num = conf_ptr->queue_num;

> > +		info->region[0].queue_start_index =

> > +			conf_ptr->queue_start_index;

> > +		info->queue_region_number++;

> > +	}

> > +

> > +	return 0;

> > +}

> > +

> > +static int

> > +i40e_set_region_flowtype_pf(struct i40e_hw *hw,

> > +		struct i40e_pf *pf, struct rte_i40e_rss_region_conf

> > *conf_ptr)

> > +{

> > +	uint32_t pfqf_hregion;

> > +	uint32_t ret = -EINVAL;

> > +	struct i40e_queue_region_info *info = &pf->queue_region;

> > +	uint16_t i, j, index, flowtype_set = 0;

> > +	uint16_t region_index, flowtype_index;

> > +

> > +	if (conf_ptr->region_id > I40E_PFQF_HREGION_MAX_INDEX) {

> > +		PMD_INIT_LOG(ERR, "the queue region max index is 7");

> > +		return ret;

> > +	}

> > +

> > +	if (conf_ptr->hw_flowtype >= I40E_FILTER_PCTYPE_MAX) {

> > +		PMD_INIT_LOG(ERR, "the hw_flowtype or PCTYPE max index

> > is 63");

> > +		return ret;

> > +	}

> > +

> > +	if (info->queue_region_number) {

> > +		for (i = 0; i < info->queue_region_number; i++)

> > +			if (conf_ptr->region_id == info->region[i].region_id)

> > +				break;

> > +

> > +		if (i == info->queue_region_number) {

> > +			PMD_INIT_LOG(ERR, "that region id has not "

> > +				"been set before");

> > +			return ret;

> > +		}

> > +		region_index = i;

> > +

> > +		for (i = 0; i < info->queue_region_number; i++) {

> > +			for (j = 0; j < info->region[i].flowtype_num; j++) {

> > +				if (conf_ptr->hw_flowtype ==

> > +					info->region[i].hw_flowtype[j]) {

> > +					flowtype_set = 1;

> > +					break;

> > +				}

> > +			}

> > +		}

> > +

> > +		if (flowtype_set) {

> > +			PMD_INIT_LOG(ERR, "that hw_flowtype "

> > +				"has been set before");

> > +			return ret;

> > +		}

> > +		flowtype_index = info-

> > >region[region_index].flowtype_num;

> > +		info->region[region_index].hw_flowtype[flowtype_index] =

> > +						conf_ptr->hw_flowtype;

> > +		info->region[region_index].flowtype_num++;

> > +	} else  {

> > +		PMD_INIT_LOG(ERR, "there is no that region id been set

> > before");

> > +		return ret;

> > +	}

> > +

> > +	index = conf_ptr->hw_flowtype >> 3;

> > +	pfqf_hregion = i40e_read_rx_ctl(hw, I40E_PFQF_HREGION(index));

> > +

> > +	if ((conf_ptr->hw_flowtype & 0x7) == 0) {

> > +		pfqf_hregion |= conf_ptr->region_id <<

> > +				I40E_PFQF_HREGION_REGION_0_SHIFT;

> > +		pfqf_hregion |= 1 <<

> > I40E_PFQF_HREGION_OVERRIDE_ENA_0_SHIFT;

> > +	} else if ((conf_ptr->hw_flowtype & 0x7) == 1) {

> > +		pfqf_hregion |= conf_ptr->region_id <<

> > +				I40E_PFQF_HREGION_REGION_1_SHIFT;

> > +		pfqf_hregion |= 1 <<

> > I40E_PFQF_HREGION_OVERRIDE_ENA_1_SHIFT;

> > +	} else if ((conf_ptr->hw_flowtype & 0x7) == 2) {

> > +		pfqf_hregion |= conf_ptr->region_id <<

> > +				I40E_PFQF_HREGION_REGION_2_SHIFT;

> > +		pfqf_hregion |= 1 <<

> > I40E_PFQF_HREGION_OVERRIDE_ENA_2_SHIFT;

> > +	} else if ((conf_ptr->hw_flowtype & 0x7) == 3) {

> > +		pfqf_hregion |= conf_ptr->region_id <<

> > +				I40E_PFQF_HREGION_REGION_3_SHIFT;

> > +		pfqf_hregion |= 1 <<

> > I40E_PFQF_HREGION_OVERRIDE_ENA_3_SHIFT;

> > +	} else if ((conf_ptr->hw_flowtype & 0x7) == 4) {

> > +		pfqf_hregion |= conf_ptr->region_id <<

> > +				I40E_PFQF_HREGION_REGION_4_SHIFT;

> > +		pfqf_hregion |= 1 <<

> > I40E_PFQF_HREGION_OVERRIDE_ENA_4_SHIFT;

> > +	} else if ((conf_ptr->hw_flowtype & 0x7) == 5) {

> > +		pfqf_hregion |= conf_ptr->region_id <<

> > +				I40E_PFQF_HREGION_REGION_5_SHIFT;

> > +		pfqf_hregion |= 1 <<

> > I40E_PFQF_HREGION_OVERRIDE_ENA_5_SHIFT;

> > +	} else if ((conf_ptr->hw_flowtype & 0x7) == 6) {

> > +		pfqf_hregion |= conf_ptr->region_id <<

> > +				I40E_PFQF_HREGION_REGION_6_SHIFT;

> > +		pfqf_hregion |= 1 <<

> > I40E_PFQF_HREGION_OVERRIDE_ENA_6_SHIFT;

> > +	} else {

> > +		pfqf_hregion |= conf_ptr->region_id <<

> > +				I40E_PFQF_HREGION_REGION_7_SHIFT;

> > +		pfqf_hregion |= 1 <<

> > I40E_PFQF_HREGION_OVERRIDE_ENA_7_SHIFT;

> > +	}

> > +

> > +	i40e_write_rx_ctl(hw, I40E_PFQF_HREGION(index), pfqf_hregion);

> > +

> > +	return 0;

> > +}

> > +

> > +static int

> > +i40e_set_up_region(struct i40e_pf *pf,

> > +				struct rte_i40e_rss_region_conf *conf_ptr) {

> > +	struct i40e_queue_region_info *info = &pf->queue_region;

> > +	uint32_t ret = -EINVAL;

> > +	uint16_t i, j, region_index, up_set = 0;

> > +

> > +	if (conf_ptr->user_priority >

> > I40E_REGION_USERPRIORITY_MAX_INDEX) {

> > +		PMD_INIT_LOG(ERR, "the queue region max index is 7");

> > +		return ret;

> > +	}

> > +

> > +	if (conf_ptr->region_id >= I40E_REGION_MAX_INDEX) {

> > +		PMD_INIT_LOG(ERR, "the region_id max index is 7");

> > +		return ret;

> > +	}

> > +

> > +	if (info->queue_region_number) {

> > +		for (i = 0; i < info->queue_region_number; i++)

> > +			if (conf_ptr->region_id == info->region[i].region_id)

> > +				break;

> > +

> > +		if (i == info->queue_region_number) {

> > +			PMD_INIT_LOG(ERR, "that region id "

> > +				"has not been set before");

> > +			return ret;

> > +		}

> > +		region_index = i;

> > +

> > +		for (i = 0; i < info->queue_region_number; i++) {

> > +			for (j = 0; j <

> > +				info->region[i].user_priority_num; j++) {

> > +				if (info->region[i].user_priority[j] ==

> > +					conf_ptr->user_priority) {

> > +					up_set = 1;

> > +					break;

> > +				}

> > +			}

> > +		}

> > +

> > +		if (up_set) {

> > +			PMD_INIT_LOG(ERR, "that user priority "

> > +				"has been set before");

> > +			return ret;

> > +		}

> > +

> > +		j = info->region[region_index].user_priority_num;

> > +		info->region[region_index].user_priority[j] =

> > +						conf_ptr->user_priority;

> > +		info->region[region_index].user_priority_num++;

> > +	} else {

> > +		PMD_INIT_LOG(ERR, "there is no that region id been set

> > before");

> > +		return ret;

> > +	}

> > +

> > +	return 0;

> > +}

> > +

> > +static int

> > +i40e_queue_region_dcb_configure(struct i40e_hw *hw,

> > +				struct i40e_pf *pf)

> > +{

> > +	struct i40e_dcbx_config dcb_cfg_local;

> > +	struct i40e_dcbx_config *dcb_cfg;

> > +	struct i40e_queue_region_info *info = &pf->queue_region;

> > +	struct i40e_dcbx_config *old_cfg = &hw->local_dcbx_config;

> > +	uint32_t ret = -EINVAL;

> > +	uint16_t i, j, prio_index, region_index;

> > +	uint8_t tc_map, tc_bw, bw_lf;

> > +

> > +	if (!info->queue_region_number) {

> > +		PMD_INIT_LOG(ERR, "there is no that region id been set

> > before");

> > +		return ret;

> > +	}

> > +

> > +	dcb_cfg = &dcb_cfg_local;

> > +	memset(dcb_cfg, 0, sizeof(struct i40e_dcbx_config));

> > +

> > +	/* assume each tc has the same bw */

> > +	tc_bw = I40E_MAX_PERCENT / info->queue_region_number;

> > +	for (i = 0; i < info->queue_region_number; i++)

> > +		dcb_cfg->etscfg.tcbwtable[i] = tc_bw;

> > +	/* to ensure the sum of tcbw is equal to 100 */

> > +	bw_lf = I40E_MAX_PERCENT %  info->queue_region_number;

> > +	for (i = 0; i < bw_lf; i++)

> > +		dcb_cfg->etscfg.tcbwtable[i]++;

> > +

> > +	/* assume each tc has the same Transmission Selection Algorithm */

> > +	for (i = 0; i < info->queue_region_number; i++)

> > +		dcb_cfg->etscfg.tsatable[i] = I40E_IEEE_TSA_ETS;

> > +

> > +	for (i = 0; i < info->queue_region_number; i++) {

> > +		for (j = 0; j < info->region[i].user_priority_num; j++) {

> > +			prio_index = info->region[i].user_priority[j];

> > +			region_index = info->region[i].region_id;

> > +			dcb_cfg->etscfg.prioritytable[prio_index] =

> > +						region_index;

> > +		}

> > +	}

> > +

> > +	/* FW needs one App to configure HW */

> > +	dcb_cfg->numapps = I40E_DEFAULT_DCB_APP_NUM;

> > +	dcb_cfg->app[0].selector = I40E_APP_SEL_ETHTYPE;

> > +	dcb_cfg->app[0].priority = I40E_DEFAULT_DCB_APP_PRIO;

> > +	dcb_cfg->app[0].protocolid = I40E_APP_PROTOID_FCOE;

> > +

> > +	tc_map = RTE_LEN2MASK(info->queue_region_number, uint8_t);

> > +

> > +	dcb_cfg->pfc.willing = 0;

> > +	dcb_cfg->pfc.pfccap = I40E_MAX_TRAFFIC_CLASS;

> > +	dcb_cfg->pfc.pfcenable = tc_map;

> > +

> > +	/* Copy the new config to the current config */

> > +	*old_cfg = *dcb_cfg;

> > +	old_cfg->etsrec = old_cfg->etscfg;

> > +	ret = i40e_set_dcb_config(hw);

> > +

> > +	if (ret) {

> > +		PMD_INIT_LOG(ERR, "Set queue region DCB Config failed,

> > err"

> > +			" %s aq_err %s",

> > +			 i40e_stat_str(hw, ret),

> > +			 i40e_aq_str(hw, hw->aq.asq_last_status));

> > +		return ret;

> > +	}

> > +

> > +	return 0;

> > +}

> > +

> > +static int

> > +i40e_queue_region_dcb_configure_default(struct i40e_hw *hw) {

> > +	uint16_t i;

> > +	uint32_t ret = -EINVAL;

> > +

> > +	memset(&hw->local_dcbx_config, 0,

> > +	sizeof(struct i40e_dcbx_config));

> > +	/* set dcb default configuration */

> > +	hw->local_dcbx_config.etscfg.willing = 0;

> > +	hw->local_dcbx_config.etscfg.maxtcs = 0;

> > +	hw->local_dcbx_config.etscfg.tcbwtable[0] = 100;

> > +	hw->local_dcbx_config.etscfg.tsatable[0] =

> > +			I40E_IEEE_TSA_ETS;

> > +	/* all UPs mapping to region 0 */

> > +	for (i = 0; i < I40E_MAX_USER_PRIORITY; i++)

> > +		hw->local_dcbx_config.etscfg.prioritytable[i] = 0;

> > +	hw->local_dcbx_config.etsrec =

> > +		hw->local_dcbx_config.etscfg;

> > +	hw->local_dcbx_config.pfc.willing = 0;

> > +	hw->local_dcbx_config.pfc.pfccap =

> > +			I40E_MAX_TRAFFIC_CLASS;

> > +	/* FW needs one App to configure HW */

> > +	hw->local_dcbx_config.numapps = 1;

> > +	hw->local_dcbx_config.app[0].selector =

> > +			I40E_APP_SEL_ETHTYPE;

> > +	hw->local_dcbx_config.app[0].priority = 3;

> > +	hw->local_dcbx_config.app[0].protocolid =

> > +			I40E_APP_PROTOID_FCOE;

> > +	ret = i40e_set_dcb_config(hw);

> > +	if (ret) {

> > +		PMD_INIT_LOG(ERR,

> > +		"default dcb config fails. err = %d, aq_err = %d.",

> > +			ret, hw->aq.asq_last_status);

> > +		return ret;

> > +	}

> > +

> > +	return 0;

> > +}

> > +

> > +int

> > +i40e_flush_region_all_conf(struct i40e_hw *hw, struct i40e_pf *pf,

> > +				uint16_t on)

> > +{

> > +	uint16_t i;

> > +	struct i40e_queue_region_info *info = &pf->queue_region;

> > +

> > +	if (on) {

> > +		i40e_vsi_update_queue_region_mapping(hw, pf);

> > +		i40e_queue_region_dcb_configure(hw, pf);

> > +		return 0;

> > +	}

> > +

> > +	info->queue_region_number = 1;

> > +	info->region[0].queue_num = 64;

> > +	info->region[0].queue_start_index = 0;

> > +

> > +	i40e_vsi_update_queue_region_mapping(hw, pf);

> > +	i40e_queue_region_dcb_configure_default(hw);

> > +

> > +	for (i = 0; i < I40E_PFQF_HREGION_MAX_INDEX; i++)

> > +		i40e_write_rx_ctl(hw, I40E_PFQF_HREGION(i), 0);

> > +

> > +	memset(info, 0, sizeof(struct i40e_queue_region_info));

> > +

> > +	return 0;

> > +}

> > +

> > +int rte_pmd_i40e_queue_region_conf(uint8_t port,

> > +			struct rte_i40e_rss_region_conf *conf_ptr) {

> > +	struct rte_eth_dev *dev = &rte_eth_devices[port];

> > +	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data-

> > >dev_private);

> > +	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data-

> > >dev_private);

> > +	enum rte_pmd_i40e_queue_region_op op_type = conf_ptr->op;

> > +	uint32_t ret;

> > +

> > +	if (!is_i40e_supported(dev))

> > +		return -ENOTSUP;

> > +

> > +	switch (op_type) {

> > +	case RTE_PMD_I40E_QUEUE_REGION_SET:

> > +		ret = i40e_set_queue_region(pf, conf_ptr);

> > +		break;

> > +	case RTE_PMD_I40E_REGION_FLOWTYPE_PF_SET:

> > +		ret = i40e_set_region_flowtype_pf(hw, pf, conf_ptr);

> > +		break;

> > +	case RTE_PMD_I40E_REGION_FLOWTYPE_VF_SET:

> > +		ret = -EINVAL;

> > +		break;

> 

> Why setting of flowtype mapping is separated to two different option?

> Does it mean that application will need to detect which driver it uses before

> calling this API? Application should not care which driver is in use, it tracks the

> number of queues used for RSS and just configures these queues to

> different regions, regardless if it runs on top of PF or VF. Difference in

> configuration for PF/VF should be hidden from application, so only single

> RTE_PMD_I40E_REGION_FLOWTYPE_SET should be used.


By now , we only support pf for this config. 
But I think what you said is very reasonable. I can change code to a new scheme:
DPDK i40e driver use   rte_eth_dev ->device->name to detect  whether his name is 
" net_i40e_vf " or " net_i40e"  itself, then  we  can PMD driver decide to config PFQF_HREGION or VFQF_HREGION.
I will make change in v4 later. 

> 

> All other options do not have separate check.

> Will they work on both PF and VF?


No, only WORK on PF. The reason we have talk in other mail before.

> 

> > +	case RTE_PMD_I40E_UP_REGION_SET:

> > +		ret = i40e_set_up_region(pf, conf_ptr);

> > +		break;

> > +	case RTE_PMD_I40E_REGION_ALL_FLUSH_ON:

> > +		ret = i40e_flush_region_all_conf(hw, pf, 1);

> > +		break;

> > +	case RTE_PMD_I40E_REGION_ALL_FLUSH_OFF:

> > +		ret = i40e_flush_region_all_conf(hw, pf, 0);

> > +		break;

> > +

> > +	default:

> > +		PMD_DRV_LOG(WARNING, "op type (%d) not supported",

> > +			    op_type);

> > +		ret = -EINVAL;

> > +		break;

> > +	}

> > +

> > +	I40E_WRITE_FLUSH(hw);

> > +

> > +	return ret;

> > +}

> 


> How this API can be tested, is there a separate testpmd patch?


Yes, testpmd has add new CLI commands to support it, also can be use by customer.

> 

> > diff --git a/drivers/net/i40e/rte_pmd_i40e.h

> > b/drivers/net/i40e/rte_pmd_i40e.h index 155b7e8..a1329f4 100644

> > --- a/drivers/net/i40e/rte_pmd_i40e.h

> > +++ b/drivers/net/i40e/rte_pmd_i40e.h

> > @@ -91,6 +91,20 @@ enum rte_pmd_i40e_package_info {

> >  	RTE_PMD_I40E_PKG_INFO_MAX = 0xFFFFFFFF  };

> >

> > +/**

> > + *  Option types of queue region.

> > + */

> > +enum rte_pmd_i40e_queue_region_op {

> > +	RTE_PMD_I40E_REGION_UNDEFINED = 0,

> > +	RTE_PMD_I40E_QUEUE_REGION_SET,      /**< add queue region

> > set*/

> > +	RTE_PMD_I40E_REGION_FLOWTYPE_PF_SET,   /**< add pf region

> > pctype set */

> > +	RTE_PMD_I40E_REGION_FLOWTYPE_VF_SET,   /**< add vf region

> > pctype set */

> > +	RTE_PMD_I40E_UP_REGION_SET,   /**< add queue region pctype

> > set */

> > +	RTE_PMD_I40E_REGION_ALL_FLUSH_ON,   /**< flush on all

> > configuration */

> > +	RTE_PMD_I40E_REGION_ALL_FLUSH_OFF,   /**< flush off all

> > configuration */

> > +	RTE_PMD_I40E_QUEUE_REGION_OP_MAX

> > +};

> > +

> >  #define RTE_PMD_I40E_DDP_NAME_SIZE 32

> >

> >  /**

> > @@ -146,6 +160,18 @@ struct rte_pmd_i40e_ptype_mapping {  };

> >

> >  /**

> > + * Queue region information get from CLI.

> > + */

> > +struct rte_i40e_rss_region_conf {

> > +	uint8_t region_id;

> > +	uint8_t hw_flowtype;

> > +	uint8_t queue_start_index;

> > +	uint8_t queue_num;

> > +	uint8_t user_priority;

> > +	enum rte_pmd_i40e_queue_region_op  op; };

> > +

> > +/**

> >   * Notify VF when PF link status changes.

> >   *

> >   * @param port

> > @@ -657,4 +683,16 @@ int

> rte_pmd_i40e_ptype_mapping_replace(uint8_t

> > port,

> >  int rte_pmd_i40e_add_vf_mac_addr(uint8_t port, uint16_t vf_id,

> >  				 struct ether_addr *mac_addr);

> >

> > +/**

> > + * Get RSS queue region info from CLI and do configuration for

> > + * that port as the command otion type

> > + *

> > + * @param port

> > + *    pointer to port identifier of the device

> > + * @param conf_ptr

> > + *    pointer to the struct that contain all the

> > + *    region configuration parameters

> > + */

> > +int rte_pmd_i40e_queue_region_conf(uint8_t port,

> > +		struct rte_i40e_rss_region_conf *conf_ptr);

> >  #endif /* _PMD_I40E_H_ */

> > diff --git a/drivers/net/i40e/rte_pmd_i40e_version.map

> > b/drivers/net/i40e/rte_pmd_i40e_version.map

> > index ef8882b..c3ee2da 100644

> > --- a/drivers/net/i40e/rte_pmd_i40e_version.map

> > +++ b/drivers/net/i40e/rte_pmd_i40e_version.map

> > @@ -50,5 +50,6 @@ DPDK_17.11 {

> >  	global:

> >

> >  	rte_pmd_i40e_add_vf_mac_addr;

> > +	rte_pmd_i40e_queue_region_conf;

> >

> >  } DPDK_17.08;

> > --

> > 2.9.3

>
  
Chilikin, Andrey Sept. 22, 2017, 8:13 p.m. UTC | #8
Hi Wei

> -----Original Message-----
> From: Zhao1, Wei
> Sent: Friday, September 22, 2017 9:49 AM
> To: Chilikin, Andrey <andrey.chilikin@intel.com>; dev@dpdk.org
> Cc: Yigit, Ferruh <ferruh.yigit@intel.com>; Xing, Beilei
> <beilei.xing@intel.com>; Wu, Jingjing <jingjing.wu@intel.com>
> Subject: RE: [dpdk-dev] [PATCH v3 1/2] net/i40e: queue region set and flush
> 
> Hi,Andrey
> 
> > -----Original Message-----
> > From: Chilikin, Andrey
> > Sent: Friday, September 22, 2017 3:53 AM
> > To: Zhao1, Wei <wei.zhao1@intel.com>; dev@dpdk.org
> > Cc: Zhao1, Wei <wei.zhao1@intel.com>; Yigit, Ferruh
> > <ferruh.yigit@intel.com>; Xing, Beilei <beilei.xing@intel.com>; Wu, Jingjing
> > <jingjing.wu@intel.com>
> > Subject: RE: [dpdk-dev] [PATCH v3 1/2] net/i40e: queue region set and
> flush

<snip>

> > > +	if (i == I40E_REGION_MAX_INDEX) {
> > > +		printf("The region sizes should be any of the following "
> > > +		"values: 1, 2, 4, 8, 16, 32, 64 as long as the "
> > > +		"total number of queues do not exceed the VSI allocation");

 printf?

> > > +		return ret;
> > > +	}
> > > +
> > > +	if (conf_ptr->region_id >= I40E_REGION_MAX_INDEX) {
> > > +		PMD_INIT_LOG(ERR, "the queue region max index is 7");
> > > +		return ret;
> > > +	}
> > > +

<snip>
> > > +	switch (op_type) {
> > > +	case RTE_PMD_I40E_QUEUE_REGION_SET:
> > > +		ret = i40e_set_queue_region(pf, conf_ptr);
> > > +		break;
> > > +	case RTE_PMD_I40E_REGION_FLOWTYPE_PF_SET:
> > > +		ret = i40e_set_region_flowtype_pf(hw, pf, conf_ptr);
> > > +		break;
> > > +	case RTE_PMD_I40E_REGION_FLOWTYPE_VF_SET:
> > > +		ret = -EINVAL;
> > > +		break;
> >
> > Why setting of flowtype mapping is separated to two different option?
> > Does it mean that application will need to detect which driver it uses before
> > calling this API? Application should not care which driver is in use, it tracks
> the
> > number of queues used for RSS and just configures these queues to
> > different regions, regardless if it runs on top of PF or VF. Difference in
> > configuration for PF/VF should be hidden from application, so only single
> > RTE_PMD_I40E_REGION_FLOWTYPE_SET should be used.
> 
> By now , we only support pf for this config.
> But I think what you said is very reasonable. I can change code to a new
> scheme:
> DPDK i40e driver use   rte_eth_dev ->device->name to detect  whether his
> name is
> " net_i40e_vf " or " net_i40e"  itself, then  we  can PMD driver decide to
> config PFQF_HREGION or VFQF_HREGION.
> I will make change in v4 later.
> 
> >
> > All other options do not have separate check.
> > Will they work on both PF and VF?
> 
> No, only WORK on PF. The reason we have talk in other mail before.
> 

Yes, it is why I was confused with separating RTE_PMD_I40E_REGION_FLOWTYPE to PF and VF. All options work on PF only, by only FLOWTYPE was split to PF/VF.

> >
> > > +	case RTE_PMD_I40E_UP_REGION_SET:
> > > +		ret = i40e_set_up_region(pf, conf_ptr);
> > > +		break;
> > > +	case RTE_PMD_I40E_REGION_ALL_FLUSH_ON:
> > > +		ret = i40e_flush_region_all_conf(hw, pf, 1);
> > > +		break;
> > > +	case RTE_PMD_I40E_REGION_ALL_FLUSH_OFF:
> > > +		ret = i40e_flush_region_all_conf(hw, pf, 0);
> > > +		break;
> > > +

Could you explain what is the difference between FLUSH_OFF and FLUSH_ON and how it can be used by an application?

> > > +	default:
> > > +		PMD_DRV_LOG(WARNING, "op type (%d) not supported",
> > > +			    op_type);
> > > +		ret = -EINVAL;
> > > +		break;
> > > +	}
> > > +
> > > +	I40E_WRITE_FLUSH(hw);
> > > +
> > > +	return ret;
> > > +}
> >
> 
> > How this API can be tested, is there a separate testpmd patch?
> 
> Yes, testpmd has add new CLI commands to support it, also can be use by
> customer.

Sorry, missed second part of the patchset.

Regards,
Andrey
  
Jingjing Wu Sept. 24, 2017, 4:01 p.m. UTC | #9
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Wei Zhao
> Sent: Friday, September 15, 2017 11:13 AM
> To: dev@dpdk.org
> Cc: Zhao1, Wei <wei.zhao1@intel.com>
> Subject: [dpdk-dev] [PATCH v3 1/2] net/i40e: queue region set and flush
> 
> This feature enable queue regions configuration for RSS in PF/VF,
> so that different traffic classes or different packet
> classification types can be separated to different queues in
> different queue regions.This patch can set queue region range,
> it include queue number in a region and the index of first queue.
> This patch enable mapping between different priorities (UP) and
> different traffic classes.It also enable mapping between a region
> index and a sepcific flowtype(PCTYPE).It also provide the solution
> of flush all configuration about queue region the above described.
> 
> Signed-off-by: Wei Zhao <wei.zhao1@intel.com>
> ---
>  drivers/net/i40e/i40e_ethdev.c            |  19 +-
>  drivers/net/i40e/i40e_ethdev.h            |  30 ++
>  drivers/net/i40e/rte_pmd_i40e.c           | 482
> ++++++++++++++++++++++++++++++
>  drivers/net/i40e/rte_pmd_i40e.h           |  38 +++
>  drivers/net/i40e/rte_pmd_i40e_version.map |   1 +
>  5 files changed, 566 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
> index 8e0580c..1663fc0 100644
> --- a/drivers/net/i40e/i40e_ethdev.c
> +++ b/drivers/net/i40e/i40e_ethdev.c
> @@ -137,10 +137,6 @@
>  #define I40E_PRTTSYN_TSYNTYPE    0x0e000000
>  #define I40E_CYCLECOUNTER_MASK   0xffffffffffffffffULL
> 
> -#define I40E_MAX_PERCENT            100
> -#define I40E_DEFAULT_DCB_APP_NUM    1
> -#define I40E_DEFAULT_DCB_APP_PRIO   3
> -
>  /**
>   * Below are values for writing un-exposed registers suggested
>   * by silicon experts
> @@ -1034,6 +1030,15 @@ i40e_init_fdir_filter_list(struct rte_eth_dev *dev)
>  	return ret;
>  }
> 
> +static void
> +i40e_init_queue_region_conf(struct rte_eth_dev *dev)
> +{
> +	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
> +	struct i40e_queue_region_info *info = &pf->queue_region;
> +
> +	memset(info, 0, sizeof(struct i40e_queue_region_info));
> +}
> +
>  static int
>  eth_i40e_dev_init(struct rte_eth_dev *dev)
>  {
> @@ -1309,6 +1314,9 @@ eth_i40e_dev_init(struct rte_eth_dev *dev)
>  	if (ret < 0)
>  		goto err_init_fdir_filter_list;
> 
> +	/* initialize queue region configuration */
> +	i40e_init_queue_region_conf(dev);
> +
If this func i40e_init_queue_region_conf is only called here, then it is
Not necessary? Because all the pf struct has been zeroed at probe time.

>  	return 0;
> 
>  err_init_fdir_filter_list:
> @@ -1464,6 +1472,9 @@ eth_i40e_dev_uninit(struct rte_eth_dev *dev)
>  	/* Remove all Traffic Manager configuration */
>  	i40e_tm_conf_uninit(dev);
> 
> +	/* Remove all the queue region configuration */
> +	i40e_flush_region_all_conf(hw, pf, 0);
> +
>  	return 0;
>  }
> 
> diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
> index ad80f0f..d612886 100644
> --- a/drivers/net/i40e/i40e_ethdev.h
> +++ b/drivers/net/i40e/i40e_ethdev.h
> @@ -260,6 +260,16 @@ enum i40e_flxpld_layer_idx {
>  #define I40E_QOS_BW_WEIGHT_MIN 1
>  /* The max bandwidth weight is 127. */
>  #define I40E_QOS_BW_WEIGHT_MAX 127
> +/* The max queue region index is 7. */
> +#define I40E_REGION_MAX_INDEX 7
> +/* The max queue region userpriority is 7. */
> +#define I40E_REGION_USERPRIORITY_MAX_INDEX 7
You can reuse I40E_MAX_USER_PRIORITY.

> +/* The max pctype index is 63. */
> +#define I40E_REGION_PCTYPE_MAX_INDEX 63
> +
You can use I40E_FILTER_PCTYPE_MAX

> +#define I40E_MAX_PERCENT            100
> +#define I40E_DEFAULT_DCB_APP_NUM    1
> +#define I40E_DEFAULT_DCB_APP_PRIO   3
> 
>  /**
>   * The overhead from MTU to max frame size.
> @@ -541,6 +551,22 @@ struct i40e_ethertype_rule {
>  	struct rte_hash *hash_table;
>  };
> 
> +/* queue region info */
> +struct i40e_region_info {
> +	uint8_t region_id;
> +	uint8_t queue_start_index;
> +	uint8_t queue_num;
> +	uint8_t user_priority_num;
> +	uint8_t user_priority[I40E_MAX_USER_PRIORITY];
Please comment what is the meaning of this array?

> +	uint8_t flowtype_num;
> +	uint8_t hw_flowtype[I40E_FILTER_PCTYPE_MAX];
The same, please comment it.

> +};
> +
> +struct i40e_queue_region_info {
> +	uint16_t queue_region_number;
> +	struct i40e_region_info region[I40E_REGION_MAX_INDEX + 1];
> +};
> +
>  /* Tunnel filter number HW supports */
>  #define I40E_MAX_TUNNEL_FILTER_NUM 400
> 
> @@ -776,6 +802,7 @@ struct i40e_pf {
>  	struct i40e_fdir_info fdir; /* flow director info */
>  	struct i40e_ethertype_rule ethertype; /* Ethertype filter rule */
>  	struct i40e_tunnel_rule tunnel; /* Tunnel filter rule */
> +	struct i40e_queue_region_info queue_region; /* queue region info */
>  	struct i40e_fc_conf fc_conf; /* Flow control conf */
>  	struct i40e_mirror_rule_list mirror_list;
>  	uint16_t nb_mirror_rule;   /* The number of mirror rules */
> @@ -1003,6 +1030,9 @@ void i40e_check_write_reg(struct i40e_hw *hw, uint32_t addr,
> uint32_t val);
>  int i40e_tm_ops_get(struct rte_eth_dev *dev, void *ops);
>  void i40e_tm_conf_init(struct rte_eth_dev *dev);
>  void i40e_tm_conf_uninit(struct rte_eth_dev *dev);
> +int i40e_flush_region_all_conf(struct i40e_hw *hw,
> +				struct i40e_pf *pf, uint16_t on);
> +
> 
>  #define I40E_DEV_TO_PCI(eth_dev) \
>  	RTE_DEV_TO_PCI((eth_dev)->device)
> diff --git a/drivers/net/i40e/rte_pmd_i40e.c b/drivers/net/i40e/rte_pmd_i40e.c
> index d69a472..9a75f21 100644
> --- a/drivers/net/i40e/rte_pmd_i40e.c
> +++ b/drivers/net/i40e/rte_pmd_i40e.c
> @@ -35,6 +35,7 @@
>  #include <rte_tailq.h>
> 
>  #include "base/i40e_prototype.h"
> +#include "base/i40e_dcb.h"
>  #include "i40e_ethdev.h"
>  #include "i40e_pf.h"
>  #include "i40e_rxtx.h"
> @@ -2161,3 +2162,484 @@ rte_pmd_i40e_add_vf_mac_addr(uint8_t port, uint16_t
> vf_id,
> 
>  	return 0;
>  }
> +
> +static int
> +i40e_vsi_update_queue_region_mapping(struct i40e_hw *hw,
> +			      struct i40e_pf *pf)
> +{
> +	uint16_t i;
> +	struct i40e_vsi *vsi = pf->main_vsi;
> +	uint16_t queue_offset, bsf, tc_index;
> +	struct i40e_vsi_context ctxt;
> +	struct i40e_aqc_vsi_properties_data *vsi_info;
> +	struct i40e_queue_region_info *region_info =
> +				&pf->queue_region;
> +	uint32_t ret = -EINVAL;
> +
> +	if (!region_info->queue_region_number) {
> +		PMD_INIT_LOG(ERR, "there is no that region id been set before");
> +		return ret;
> +	}
> +
> +	memset(&ctxt, 0, sizeof(struct i40e_vsi_context));
> +
> +	/* Update Queue Pairs Mapping for currently enabled UPs */
> +	ctxt.seid = vsi->seid;
> +	ctxt.pf_num = hw->pf_id;
> +	ctxt.vf_num = 0;
> +	ctxt.uplink_seid = vsi->uplink_seid;
> +	ctxt.info = vsi->info;
> +	vsi_info = &ctxt.info;
> +
Please use the vsi_info stored in i40e_vsi to avoid missing parameters.

> +	memset(vsi_info->tc_mapping, 0, sizeof(uint16_t) * 8);
> +	memset(vsi_info->queue_mapping, 0, sizeof(uint16_t) * 16);
> +
Ctxt has been zeroed, no need to memset tc_mapping and queue_mapping again.
> +	/**
> +	 * Configure queue region and queue mapping parameters,
> +	 * for enabled queue region, allocate queues to this region.
> +	 */
> +
> +	for (i = 0; i < region_info->queue_region_number; i++) {
> +		tc_index = region_info->region[i].region_id;
> +		bsf = rte_bsf32(region_info->region[i].queue_num);
> +		queue_offset = region_info->region[i].queue_start_index;
> +		vsi_info->tc_mapping[tc_index] = rte_cpu_to_le_16(
> +			(queue_offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) |
> +				(bsf << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT));
> +	}
> +
> +	/* Associate queue number with VSI, Keep vsi->nb_qps unchanged */
> +	if (vsi->type == I40E_VSI_SRIOV) {
Vsi is main vsi, it is impossible that the type to be SRIOV. If you want to configure
Queue mapping on sriov VSI, you need to scan all the SRIOV VSIs.

> +		vsi_info->mapping_flags |=
> +			rte_cpu_to_le_16(I40E_AQ_VSI_QUE_MAP_NONCONTIG);
> +		for (i = 0; i < vsi->nb_qps; i++)
> +			vsi_info->queue_mapping[i] =
> +				rte_cpu_to_le_16(vsi->base_queue + i);
> +	} else {
> +		vsi_info->mapping_flags |=
> +			rte_cpu_to_le_16(I40E_AQ_VSI_QUE_MAP_CONTIG);
> +		vsi_info->queue_mapping[0] = rte_cpu_to_le_16(vsi->base_queue);
> +	}
> +	vsi_info->valid_sections |=
> +		rte_cpu_to_le_16(I40E_AQ_VSI_PROP_QUEUE_MAP_VALID);
> +
> +	/* Update the VSI after updating the VSI queue-mapping information */
> +	ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
> +	if (ret) {
> +		PMD_INIT_LOG(ERR, "Failed to configure queue region "
PMD_INIT_LOG  -> PMD_DRV_LOG
> +			"mapping = %d ", hw->aq.asq_last_status);
> +		return ret;
> +	}
> +	/* update the local VSI info with updated queue map */
> +	(void)rte_memcpy(&vsi->info.tc_mapping, &ctxt.info.tc_mapping,
> +					sizeof(vsi->info.tc_mapping));
> +	(void)rte_memcpy(&vsi->info.queue_mapping,
> +			&ctxt.info.queue_mapping,
> +		sizeof(vsi->info.queue_mapping));
> +	vsi->info.mapping_flags = ctxt.info.mapping_flags;
> +	vsi->info.valid_sections = 0;
> +
> +	return 0;
> +}
> +
> +
> +static int
> +i40e_set_queue_region(struct i40e_pf *pf,
> +				struct rte_i40e_rss_region_conf *conf_ptr)
> +{
> +	uint16_t tc_size_tb[7] = {1, 2, 4, 8, 16, 32, 64};
> +	uint16_t i;
> +	struct i40e_vsi *main_vsi = pf->main_vsi;
> +	struct i40e_queue_region_info *info = &pf->queue_region;
> +	uint32_t ret = -EINVAL;
> +
> +	for (i = 0; i < I40E_REGION_MAX_INDEX; i++)
> +		if (conf_ptr->queue_num == tc_size_tb[i])
> +			break;
> +
> +	if (i == I40E_REGION_MAX_INDEX) {
> +		printf("The region sizes should be any of the following "
> +		"values: 1, 2, 4, 8, 16, 32, 64 as long as the "
> +		"total number of queues do not exceed the VSI allocation");
> +		return ret;
> +	}
> +
If you just want to check if the queue_nume is one of {1, 2,4,8,16,32,64}
You can use 

If (rte_is_power_of_2(queue_num) && queue_num <= 64)


> +	if (conf_ptr->region_id >= I40E_REGION_MAX_INDEX) {
> +		PMD_INIT_LOG(ERR, "the queue region max index is 7");
> +		return ret;
> +	}
> +
> +	if ((conf_ptr->queue_start_index + conf_ptr->queue_num)
> +					> main_vsi->nb_used_qps) {
> +		PMD_INIT_LOG(ERR, "the queue index exceeds the VSI range");
> +		return ret;
> +	}
> +
> +	if (info->queue_region_number) {
> +		for (i = 0; i < info->queue_region_number; i++)
> +			if (conf_ptr->region_id == info->region[i].region_id)
> +				break;
> +
> +		if ((i == info->queue_region_number) &&
> +			(i <= I40E_REGION_MAX_INDEX)) {
It is impossible that i > I40E_REGION_MAX_INDEX, right?

> +			info->region[i].region_id = conf_ptr->region_id;
> +			info->region[i].queue_num = conf_ptr->queue_num;
> +			info->region[i].queue_start_index =
> +				conf_ptr->queue_start_index;
> +			info->queue_region_number++;
> +		} else {
> +			PMD_INIT_LOG(ERR, "queue region number "
> +				"exceeds maxnum 8 or the "
> +				"queue region id has "
> +				"been set before");
Does it mean we can not change the set of one queue region whose id is set before?

> +			return ret;
> +		}
> +	} else {
> +		info->region[0].region_id = conf_ptr->region_id;
> +		info->region[0].queue_num = conf_ptr->queue_num;
> +		info->region[0].queue_start_index =
> +			conf_ptr->queue_start_index;
> +		info->queue_region_number++;
> +	}
I think this part of code can be refined. 
> +
> +	return 0;
> +}
> +
> +static int
> +i40e_set_region_flowtype_pf(struct i40e_hw *hw,
> +		struct i40e_pf *pf, struct rte_i40e_rss_region_conf *conf_ptr)
> +{
> +	uint32_t pfqf_hregion;
> +	uint32_t ret = -EINVAL;
> +	struct i40e_queue_region_info *info = &pf->queue_region;
> +	uint16_t i, j, index, flowtype_set = 0;
> +	uint16_t region_index, flowtype_index;
> +
> +	if (conf_ptr->region_id > I40E_PFQF_HREGION_MAX_INDEX) {
> +		PMD_INIT_LOG(ERR, "the queue region max index is 7");
> +		return ret;
> +	}
> +
> +	if (conf_ptr->hw_flowtype >= I40E_FILTER_PCTYPE_MAX) {
> +		PMD_INIT_LOG(ERR, "the hw_flowtype or PCTYPE max index is 63");
> +		return ret;
> +	}
> +
> +	if (info->queue_region_number) {
> +		for (i = 0; i < info->queue_region_number; i++)
> +			if (conf_ptr->region_id == info->region[i].region_id)
> +				break;
> +
> +		if (i == info->queue_region_number) {
> +			PMD_INIT_LOG(ERR, "that region id has not "
> +				"been set before");
> +			return ret;
> +		}
If info->queue_region_number==0, then i would be 0. So, the outer wrap
if (info->queue_region_number) {} can be omit. The same at above func,
I think this part of code can be refined.
> +		region_index = i;
> +	
> +		for (i = 0; i < info->queue_region_number; i++) {
> +			for (j = 0; j < info->region[i].flowtype_num; j++) {
> +				if (conf_ptr->hw_flowtype ==
> +					info->region[i].hw_flowtype[j]) {
> +					flowtype_set = 1;
> +					break;
> +				}
> +			}
> +		}
> +
> +		if (flowtype_set) {
> +			PMD_INIT_LOG(ERR, "that hw_flowtype "
> +				"has been set before");
> +			return ret;
Is that right to return -EINVAL when it has been set. And it seems
In this function, the error code is only -EINVAL, if so, why we define
ret?
> +		}
> +		flowtype_index = info->region[region_index].flowtype_num;
> +		info->region[region_index].hw_flowtype[flowtype_index] =
> +						conf_ptr->hw_flowtype;
> +		info->region[region_index].flowtype_num++;
> +	} else  {
> +		PMD_INIT_LOG(ERR, "there is no that region id been set before");

Again, please use PMD_DRV_LOG. Because the func is not called at initialization time.
Please replace it in all the patch.
> +		return ret;
> +	}
> +
> +	index = conf_ptr->hw_flowtype >> 3;
> +	pfqf_hregion = i40e_read_rx_ctl(hw, I40E_PFQF_HREGION(index));
> +
> +	if ((conf_ptr->hw_flowtype & 0x7) == 0) {
> +		pfqf_hregion |= conf_ptr->region_id <<
> +				I40E_PFQF_HREGION_REGION_0_SHIFT;
> +		pfqf_hregion |= 1 << I40E_PFQF_HREGION_OVERRIDE_ENA_0_SHIFT;
> +	} else if ((conf_ptr->hw_flowtype & 0x7) == 1) {
> +		pfqf_hregion |= conf_ptr->region_id <<
> +				I40E_PFQF_HREGION_REGION_1_SHIFT;
> +		pfqf_hregion |= 1 << I40E_PFQF_HREGION_OVERRIDE_ENA_1_SHIFT;
> +	} else if ((conf_ptr->hw_flowtype & 0x7) == 2) {
> +		pfqf_hregion |= conf_ptr->region_id <<
> +				I40E_PFQF_HREGION_REGION_2_SHIFT;
> +		pfqf_hregion |= 1 << I40E_PFQF_HREGION_OVERRIDE_ENA_2_SHIFT;
> +	} else if ((conf_ptr->hw_flowtype & 0x7) == 3) {
> +		pfqf_hregion |= conf_ptr->region_id <<
> +				I40E_PFQF_HREGION_REGION_3_SHIFT;
> +		pfqf_hregion |= 1 << I40E_PFQF_HREGION_OVERRIDE_ENA_3_SHIFT;
> +	} else if ((conf_ptr->hw_flowtype & 0x7) == 4) {
> +		pfqf_hregion |= conf_ptr->region_id <<
> +				I40E_PFQF_HREGION_REGION_4_SHIFT;
> +		pfqf_hregion |= 1 << I40E_PFQF_HREGION_OVERRIDE_ENA_4_SHIFT;
> +	} else if ((conf_ptr->hw_flowtype & 0x7) == 5) {
> +		pfqf_hregion |= conf_ptr->region_id <<
> +				I40E_PFQF_HREGION_REGION_5_SHIFT;
> +		pfqf_hregion |= 1 << I40E_PFQF_HREGION_OVERRIDE_ENA_5_SHIFT;
> +	} else if ((conf_ptr->hw_flowtype & 0x7) == 6) {
> +		pfqf_hregion |= conf_ptr->region_id <<
> +				I40E_PFQF_HREGION_REGION_6_SHIFT;
> +		pfqf_hregion |= 1 << I40E_PFQF_HREGION_OVERRIDE_ENA_6_SHIFT;
What is the (conf_ptr->hw_flowtype & 0x7) == 1/2/3/4/5/6 meanings? Please avoid
coding with integer number. Can not understand the relationship with flow type.
And the hw_flowtype is API to users, you need to make it clear enough.

> +	} else {
> +		pfqf_hregion |= conf_ptr->region_id <<
> +				I40E_PFQF_HREGION_REGION_7_SHIFT;
> +		pfqf_hregion |= 1 << I40E_PFQF_HREGION_OVERRIDE_ENA_7_SHIFT;
> +	}
> +
> +	i40e_write_rx_ctl(hw, I40E_PFQF_HREGION(index), pfqf_hregion);
> +
> +	return 0;
> +}
> +
> +static int
> +i40e_set_up_region(struct i40e_pf *pf,
> +				struct rte_i40e_rss_region_conf *conf_ptr)
> +{
> +	struct i40e_queue_region_info *info = &pf->queue_region;
> +	uint32_t ret = -EINVAL;
> +	uint16_t i, j, region_index, up_set = 0;
> +
> +	if (conf_ptr->user_priority > I40E_REGION_USERPRIORITY_MAX_INDEX) {
> +		PMD_INIT_LOG(ERR, "the queue region max index is 7");
> +		return ret;
> +	}
> +
> +	if (conf_ptr->region_id >= I40E_REGION_MAX_INDEX) {
> +		PMD_INIT_LOG(ERR, "the region_id max index is 7");
> +		return ret;
> +	}
> +
> +	if (info->queue_region_number) {
> +		for (i = 0; i < info->queue_region_number; i++)
> +			if (conf_ptr->region_id == info->region[i].region_id)
> +				break;
> +
> +		if (i == info->queue_region_number) {
> +			PMD_INIT_LOG(ERR, "that region id "
> +				"has not been set before");
> +			return ret;
> +		}
> +		region_index = i;
> +
> +		for (i = 0; i < info->queue_region_number; i++) {
> +			for (j = 0; j <
> +				info->region[i].user_priority_num; j++) {
> +				if (info->region[i].user_priority[j] ==
> +					conf_ptr->user_priority) {
> +					up_set = 1;
> +					break;
> +				}
> +			}
> +		}
> +
> +		if (up_set) {
> +			PMD_INIT_LOG(ERR, "that user priority "
> +				"has been set before");
> +			return ret;
> +		}
> +
> +		j = info->region[region_index].user_priority_num;
> +		info->region[region_index].user_priority[j] =
> +						conf_ptr->user_priority;
> +		info->region[region_index].user_priority_num++;
> +	} else {
> +		PMD_INIT_LOG(ERR, "there is no that region id been set before");
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +
> +static int
> +i40e_queue_region_dcb_configure(struct i40e_hw *hw,
> +				struct i40e_pf *pf)
> +{
> +	struct i40e_dcbx_config dcb_cfg_local;
> +	struct i40e_dcbx_config *dcb_cfg;
> +	struct i40e_queue_region_info *info = &pf->queue_region;
> +	struct i40e_dcbx_config *old_cfg = &hw->local_dcbx_config;
> +	uint32_t ret = -EINVAL;
> +	uint16_t i, j, prio_index, region_index;
> +	uint8_t tc_map, tc_bw, bw_lf;
> +
> +	if (!info->queue_region_number) {
> +		PMD_INIT_LOG(ERR, "there is no that region id been set before");
> +		return ret;
> +	}
> +
> +	dcb_cfg = &dcb_cfg_local;
> +	memset(dcb_cfg, 0, sizeof(struct i40e_dcbx_config));
> +
> +	/* assume each tc has the same bw */
> +	tc_bw = I40E_MAX_PERCENT / info->queue_region_number;
> +	for (i = 0; i < info->queue_region_number; i++)
> +		dcb_cfg->etscfg.tcbwtable[i] = tc_bw;
> +	/* to ensure the sum of tcbw is equal to 100 */
> +	bw_lf = I40E_MAX_PERCENT %  info->queue_region_number;
> +	for (i = 0; i < bw_lf; i++)
> +		dcb_cfg->etscfg.tcbwtable[i]++;
> +
> +	/* assume each tc has the same Transmission Selection Algorithm */
> +	for (i = 0; i < info->queue_region_number; i++)
> +		dcb_cfg->etscfg.tsatable[i] = I40E_IEEE_TSA_ETS;
> +
> +	for (i = 0; i < info->queue_region_number; i++) {
> +		for (j = 0; j < info->region[i].user_priority_num; j++) {
> +			prio_index = info->region[i].user_priority[j];
> +			region_index = info->region[i].region_id;
> +			dcb_cfg->etscfg.prioritytable[prio_index] =
> +						region_index;
> +		}
> +	}
> +
> +	/* FW needs one App to configure HW */
> +	dcb_cfg->numapps = I40E_DEFAULT_DCB_APP_NUM;
> +	dcb_cfg->app[0].selector = I40E_APP_SEL_ETHTYPE;
> +	dcb_cfg->app[0].priority = I40E_DEFAULT_DCB_APP_PRIO;
> +	dcb_cfg->app[0].protocolid = I40E_APP_PROTOID_FCOE;
> +
> +	tc_map = RTE_LEN2MASK(info->queue_region_number, uint8_t);
> +
> +	dcb_cfg->pfc.willing = 0;
> +	dcb_cfg->pfc.pfccap = I40E_MAX_TRAFFIC_CLASS;
> +	dcb_cfg->pfc.pfcenable = tc_map;
> +
> +	/* Copy the new config to the current config */
> +	*old_cfg = *dcb_cfg;
> +	old_cfg->etsrec = old_cfg->etscfg;
> +	ret = i40e_set_dcb_config(hw);
> +
> +	if (ret) {
> +		PMD_INIT_LOG(ERR, "Set queue region DCB Config failed, err"
> +			" %s aq_err %s",
> +			 i40e_stat_str(hw, ret),
> +			 i40e_aq_str(hw, hw->aq.asq_last_status));
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +
> +static int
> +i40e_queue_region_dcb_configure_default(struct i40e_hw *hw)

Can i40e_dcb_init_configure do what this func did here?

> +{
> +	uint16_t i;
> +	uint32_t ret = -EINVAL;
> +
> +	memset(&hw->local_dcbx_config, 0,
> +	sizeof(struct i40e_dcbx_config));
> +	/* set dcb default configuration */
> +	hw->local_dcbx_config.etscfg.willing = 0;
> +	hw->local_dcbx_config.etscfg.maxtcs = 0;
> +	hw->local_dcbx_config.etscfg.tcbwtable[0] = 100;
> +	hw->local_dcbx_config.etscfg.tsatable[0] =
> +			I40E_IEEE_TSA_ETS;
> +	/* all UPs mapping to region 0 */
> +	for (i = 0; i < I40E_MAX_USER_PRIORITY; i++)
> +		hw->local_dcbx_config.etscfg.prioritytable[i] = 0;
> +	hw->local_dcbx_config.etsrec =
> +		hw->local_dcbx_config.etscfg;
> +	hw->local_dcbx_config.pfc.willing = 0;
> +	hw->local_dcbx_config.pfc.pfccap =
> +			I40E_MAX_TRAFFIC_CLASS;
> +	/* FW needs one App to configure HW */
> +	hw->local_dcbx_config.numapps = 1;
> +	hw->local_dcbx_config.app[0].selector =
> +			I40E_APP_SEL_ETHTYPE;
> +	hw->local_dcbx_config.app[0].priority = 3;
> +	hw->local_dcbx_config.app[0].protocolid =
> +			I40E_APP_PROTOID_FCOE;
> +	ret = i40e_set_dcb_config(hw);
> +	if (ret) {
> +		PMD_INIT_LOG(ERR,
> +		"default dcb config fails. err = %d, aq_err = %d.",
> +			ret, hw->aq.asq_last_status);
> +		return ret;
> +	}
> +
> +	return 0;
> +}
> +
> +int
> +i40e_flush_region_all_conf(struct i40e_hw *hw, struct i40e_pf *pf,
> +				uint16_t on)
> +{
> +	uint16_t i;
> +	struct i40e_queue_region_info *info = &pf->queue_region;
> +
> +	if (on) {
> +		i40e_vsi_update_queue_region_mapping(hw, pf);
> +		i40e_queue_region_dcb_configure(hw, pf);
Don't check all the return code?

> +		return 0;
> +	}
> +
> +	info->queue_region_number = 1;
> +	info->region[0].queue_num = 64;
> +	info->region[0].queue_start_index = 0;
> +
> +	i40e_vsi_update_queue_region_mapping(hw, pf);
> +	i40e_queue_region_dcb_configure_default(hw);
> +
> +	for (i = 0; i < I40E_PFQF_HREGION_MAX_INDEX; i++)
> +		i40e_write_rx_ctl(hw, I40E_PFQF_HREGION(i), 0);
> +	memset(info, 0, sizeof(struct i40e_queue_region_info));
> +
> +	return 0;
> +}
> +
> +int rte_pmd_i40e_queue_region_conf(uint8_t port,
> +			struct rte_i40e_rss_region_conf *conf_ptr)
> +{
> +	struct rte_eth_dev *dev = &rte_eth_devices[port];
> +	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
> +	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> +	enum rte_pmd_i40e_queue_region_op op_type = conf_ptr->op;
> +	uint32_t ret;
> +
> +	if (!is_i40e_supported(dev))
> +		return -ENOTSUP;
> +
> +	switch (op_type) {
> +	case RTE_PMD_I40E_QUEUE_REGION_SET:
> +		ret = i40e_set_queue_region(pf, conf_ptr);
> +		break;
> +	case RTE_PMD_I40E_REGION_FLOWTYPE_PF_SET:
> +		ret = i40e_set_region_flowtype_pf(hw, pf, conf_ptr);
> +		break;
> +	case RTE_PMD_I40E_REGION_FLOWTYPE_VF_SET:
> +		ret = -EINVAL;
> +		break;
If it is not supported, why to define this op?

> +	case RTE_PMD_I40E_UP_REGION_SET:
> +		ret = i40e_set_up_region(pf, conf_ptr);
> +		break;
> +	case RTE_PMD_I40E_REGION_ALL_FLUSH_ON:
> +		ret = i40e_flush_region_all_conf(hw, pf, 1);
> +		break;
What is the FLUSH on meaning? Commit all configure to HW?
If so, what makes me confused that:
RTE_PMD_I40E_QUEUE_REGION_SET only touch the software stored in driver.
But RTE_PMD_I40E_REGION_FLOWTYPE_PF_SET set the I40E_PFQF_HREGION register.
Those behaviors are not consist.

And FLUSH OFF means delete all?

You need to add more comments.

> +	case RTE_PMD_I40E_REGION_ALL_FLUSH_OFF:
> +		ret = i40e_flush_region_all_conf(hw, pf, 0);
> +		break;
> +
> +	default:
> +		PMD_DRV_LOG(WARNING, "op type (%d) not supported",
> +			    op_type);
> +		ret = -EINVAL;
> +		break;
> +	}
> +
> +	I40E_WRITE_FLUSH(hw);
> +
> +	return ret;
> +}
> diff --git a/drivers/net/i40e/rte_pmd_i40e.h b/drivers/net/i40e/rte_pmd_i40e.h
> index 155b7e8..a1329f4 100644
> --- a/drivers/net/i40e/rte_pmd_i40e.h
> +++ b/drivers/net/i40e/rte_pmd_i40e.h
> @@ -91,6 +91,20 @@ enum rte_pmd_i40e_package_info {
>  	RTE_PMD_I40E_PKG_INFO_MAX = 0xFFFFFFFF
>  };
> 
> +/**
> + *  Option types of queue region.
> + */
> +enum rte_pmd_i40e_queue_region_op {
> +	RTE_PMD_I40E_REGION_UNDEFINED = 0,
> +	RTE_PMD_I40E_QUEUE_REGION_SET,      /**< add queue region set*/
> +	RTE_PMD_I40E_REGION_FLOWTYPE_PF_SET,   /**< add pf region pctype set */
> +	RTE_PMD_I40E_REGION_FLOWTYPE_VF_SET,   /**< add vf region pctype set */
> +	RTE_PMD_I40E_UP_REGION_SET,   /**< add queue region pctype set */
> +	RTE_PMD_I40E_REGION_ALL_FLUSH_ON,   /**< flush on all configuration */
> +	RTE_PMD_I40E_REGION_ALL_FLUSH_OFF,   /**< flush off all configuration */
> +	RTE_PMD_I40E_QUEUE_REGION_OP_MAX
> +};
> +
>  #define RTE_PMD_I40E_DDP_NAME_SIZE 32
> 
>  /**
> @@ -146,6 +160,18 @@ struct rte_pmd_i40e_ptype_mapping {
>  };
> 
>  /**
> + * Queue region information get from CLI.
> + */
> +struct rte_i40e_rss_region_conf {
> +	uint8_t region_id;
> +	uint8_t hw_flowtype;
> +	uint8_t queue_start_index;
> +	uint8_t queue_num;
> +	uint8_t user_priority;
> +	enum rte_pmd_i40e_queue_region_op  op;
> +};
This is API struct, you need to comment them in detail

Thanks
Jingjing
  
Zhao1, Wei Sept. 25, 2017, 2:55 a.m. UTC | #10
Hi, Andrey

> -----Original Message-----

> From: Chilikin, Andrey

> Sent: Saturday, September 23, 2017 4:14 AM

> To: Zhao1, Wei <wei.zhao1@intel.com>; dev@dpdk.org

> Cc: Yigit, Ferruh <ferruh.yigit@intel.com>; Xing, Beilei

> <beilei.xing@intel.com>; Wu, Jingjing <jingjing.wu@intel.com>

> Subject: RE: [dpdk-dev] [PATCH v3 1/2] net/i40e: queue region set and flush

> 

> Hi Wei

> 

> > -----Original Message-----

> > From: Zhao1, Wei

> > Sent: Friday, September 22, 2017 9:49 AM

> > To: Chilikin, Andrey <andrey.chilikin@intel.com>; dev@dpdk.org

> > Cc: Yigit, Ferruh <ferruh.yigit@intel.com>; Xing, Beilei

> > <beilei.xing@intel.com>; Wu, Jingjing <jingjing.wu@intel.com>

> > Subject: RE: [dpdk-dev] [PATCH v3 1/2] net/i40e: queue region set and

> > flush

> >

> > Hi,Andrey

> >

> > > -----Original Message-----

> > > From: Chilikin, Andrey

> > > Sent: Friday, September 22, 2017 3:53 AM

> > > To: Zhao1, Wei <wei.zhao1@intel.com>; dev@dpdk.org

> > > Cc: Zhao1, Wei <wei.zhao1@intel.com>; Yigit, Ferruh

> > > <ferruh.yigit@intel.com>; Xing, Beilei <beilei.xing@intel.com>; Wu,

> > > Jingjing <jingjing.wu@intel.com>

> > > Subject: RE: [dpdk-dev] [PATCH v3 1/2] net/i40e: queue region set

> > > and

> > flush

> 

> <snip>

> 

> > > > +	if (i == I40E_REGION_MAX_INDEX) {

> > > > +		printf("The region sizes should be any of the following "

> > > > +		"values: 1, 2, 4, 8, 16, 32, 64 as long as the "

> > > > +		"total number of queues do not exceed the VSI allocation");

> 

>  printf?


Yes, just a reminder for user, you think I should use " PMD_INIT_LOG( )" instead?
I will change to that.

> 

> > > > +		return ret;

> > > > +	}

> > > > +

> > > > +	if (conf_ptr->region_id >= I40E_REGION_MAX_INDEX) {

> > > > +		PMD_INIT_LOG(ERR, "the queue region max index is 7");

> > > > +		return ret;

> > > > +	}

> > > > +

> 

> <snip>

> > > > +	switch (op_type) {

> > > > +	case RTE_PMD_I40E_QUEUE_REGION_SET:

> > > > +		ret = i40e_set_queue_region(pf, conf_ptr);

> > > > +		break;

> > > > +	case RTE_PMD_I40E_REGION_FLOWTYPE_PF_SET:

> > > > +		ret = i40e_set_region_flowtype_pf(hw, pf, conf_ptr);

> > > > +		break;

> > > > +	case RTE_PMD_I40E_REGION_FLOWTYPE_VF_SET:

> > > > +		ret = -EINVAL;

> > > > +		break;

> > >

> > > Why setting of flowtype mapping is separated to two different option?

> > > Does it mean that application will need to detect which driver it

> > > uses before calling this API? Application should not care which

> > > driver is in use, it tracks

> > the

> > > number of queues used for RSS and just configures these queues to

> > > different regions, regardless if it runs on top of PF or VF.

> > > Difference in configuration for PF/VF should be hidden from

> > > application, so only single RTE_PMD_I40E_REGION_FLOWTYPE_SET

> should be used.

> >

> > By now , we only support pf for this config.

> > But I think what you said is very reasonable. I can change code to a

> > new

> > scheme:

> > DPDK i40e driver use   rte_eth_dev ->device->name to detect  whether his

> > name is

> > " net_i40e_vf " or " net_i40e"  itself, then  we  can PMD driver

> > decide to config PFQF_HREGION or VFQF_HREGION.

> > I will make change in v4 later.

> >

> > >

> > > All other options do not have separate check.

> > > Will they work on both PF and VF?

> >

> > No, only WORK on PF. The reason we have talk in other mail before.

> >

> 

> Yes, it is why I was confused with separating

> RTE_PMD_I40E_REGION_FLOWTYPE to PF and VF. All options work on PF

> only, by only FLOWTYPE was split to PF/VF.


Yes,   I will point out that only pf support in commit log in v4.

> 

> > >

> > > > +	case RTE_PMD_I40E_UP_REGION_SET:

> > > > +		ret = i40e_set_up_region(pf, conf_ptr);

> > > > +		break;

> > > > +	case RTE_PMD_I40E_REGION_ALL_FLUSH_ON:

> > > > +		ret = i40e_flush_region_all_conf(hw, pf, 1);

> > > > +		break;

> > > > +	case RTE_PMD_I40E_REGION_ALL_FLUSH_OFF:

> > > > +		ret = i40e_flush_region_all_conf(hw, pf, 0);

> > > > +		break;

> > > > +

> 

> Could you explain what is the difference between FLUSH_OFF and

> FLUSH_ON and how it can be used by an application?


ALL config from CLI at first will only keep in DPDK software stored in driver, only after " FLUSH_ON ", it commit all configure to HW.
Because I have to set hardware config at a time, so I have to record all CLI command at first.
" FLUSH_OFF " is just clean all configuration about queue region  just now, and restore all to DPDK i40e driver default config when start up.
The following is my test process in CLI: 

./x86_64-native-linuxapp-gcc/app/testpmd -c 1ffff -n 4 -- -i --nb-cores=8  --rxq=8 --txq=8 --port-topology=chained
set fwd rxonly
port config all rss all
queue-region set port 0 region_id 0 queue_start_index 2 queue_num 2 
queue-region set port 0 region_id 1 queue_start_index 4 queue_num 2 
queue-region set pf port 0 region_id 0 flowtype 31
queue-region set pf port 0 region_id 1 flowtype 33
queue-region set pf port 0 region_id 1 flowtype 34
queue-region set port 0 UP 1 region_id 0
queue-region set port 0 UP 3 region_id 1
queue-region flush on  port 0
start
set verbose 1
sendp([Ether(dst="3C:FD:FE:9F:70:B8", src="52:00:00:00:00:00")/Dot1Q(prio=3)/IP(src="10.0.0.1",dst="192.168.0.2")/TCP(dport=80, sport=80 )/("X"*48)], iface="ens5f2", count=10)
sendp([Ether(dst="3C:FD:FE:9F:70:B8", src="52:00:00:00:00:00")/IP(src="10.0.0.1",dst="192.168.0.2")/SCTP(dport=80, sport=80 )/("X"*48)], iface="ens5f2", count=10)
sendp([Ether(dst="3C:FD:FE:9F:70:B8", src="52:00:00:00:00:00")/IP(src="10.0.0.1",dst="192.168.0.2")/UDP(dport=80, sport=80 )/("X"*48)], iface="ens5f2", count=10)

> 

> > > > +	default:

> > > > +		PMD_DRV_LOG(WARNING, "op type (%d) not supported",

> > > > +			    op_type);

> > > > +		ret = -EINVAL;

> > > > +		break;

> > > > +	}

> > > > +

> > > > +	I40E_WRITE_FLUSH(hw);

> > > > +

> > > > +	return ret;

> > > > +}

> > >

> >

> > > How this API can be tested, is there a separate testpmd patch?

> >

> > Yes, testpmd has add new CLI commands to support it, also can be use

> > by customer.

> 

> Sorry, missed second part of the patchset.

> 

> Regards,

> Andrey
  
Zhao1, Wei Sept. 25, 2017, 3:26 a.m. UTC | #11
Hi, jingjing

> -----Original Message-----

> From: Wu, Jingjing

> Sent: Monday, September 25, 2017 12:02 AM

> To: Zhao1, Wei <wei.zhao1@intel.com>; dev@dpdk.org

> Cc: Zhao1, Wei <wei.zhao1@intel.com>

> Subject: RE: [dpdk-dev] [PATCH v3 1/2] net/i40e: queue region set and flush

> 

> 

> 

> > -----Original Message-----

> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Wei Zhao

> > Sent: Friday, September 15, 2017 11:13 AM

> > To: dev@dpdk.org

> > Cc: Zhao1, Wei <wei.zhao1@intel.com>

> > Subject: [dpdk-dev] [PATCH v3 1/2] net/i40e: queue region set and

> > flush

> >

> > This feature enable queue regions configuration for RSS in PF/VF, so

> > that different traffic classes or different packet classification

> > types can be separated to different queues in different queue

> > regions.This patch can set queue region range, it include queue number

> > in a region and the index of first queue.

> > This patch enable mapping between different priorities (UP) and

> > different traffic classes.It also enable mapping between a region

> > index and a sepcific flowtype(PCTYPE).It also provide the solution of

> > flush all configuration about queue region the above described.

> >

> > Signed-off-by: Wei Zhao <wei.zhao1@intel.com>

> > ---

> >  drivers/net/i40e/i40e_ethdev.c            |  19 +-

> >  drivers/net/i40e/i40e_ethdev.h            |  30 ++

> >  drivers/net/i40e/rte_pmd_i40e.c           | 482

> > ++++++++++++++++++++++++++++++

> >  drivers/net/i40e/rte_pmd_i40e.h           |  38 +++

> >  drivers/net/i40e/rte_pmd_i40e_version.map |   1 +

> >  5 files changed, 566 insertions(+), 4 deletions(-)

> >

> > diff --git a/drivers/net/i40e/i40e_ethdev.c

> > b/drivers/net/i40e/i40e_ethdev.c index 8e0580c..1663fc0 100644

> > --- a/drivers/net/i40e/i40e_ethdev.c

> > +++ b/drivers/net/i40e/i40e_ethdev.c

> > @@ -137,10 +137,6 @@

> >  #define I40E_PRTTSYN_TSYNTYPE    0x0e000000

> >  #define I40E_CYCLECOUNTER_MASK   0xffffffffffffffffULL

> >

> > -#define I40E_MAX_PERCENT            100

> > -#define I40E_DEFAULT_DCB_APP_NUM    1

> > -#define I40E_DEFAULT_DCB_APP_PRIO   3

> > -

> >  /**

> >   * Below are values for writing un-exposed registers suggested

> >   * by silicon experts

> > @@ -1034,6 +1030,15 @@ i40e_init_fdir_filter_list(struct rte_eth_dev

> *dev)

> >  	return ret;

> >  }

> >

> > +static void

> > +i40e_init_queue_region_conf(struct rte_eth_dev *dev) {

> > +	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data-

> >dev_private);

> > +	struct i40e_queue_region_info *info = &pf->queue_region;

> > +

> > +	memset(info, 0, sizeof(struct i40e_queue_region_info)); }

> > +

> >  static int

> >  eth_i40e_dev_init(struct rte_eth_dev *dev)  { @@ -1309,6 +1314,9 @@

> > eth_i40e_dev_init(struct rte_eth_dev *dev)

> >  	if (ret < 0)

> >  		goto err_init_fdir_filter_list;

> >

> > +	/* initialize queue region configuration */

> > +	i40e_init_queue_region_conf(dev);

> > +

> If this func i40e_init_queue_region_conf is only called here, then it is Not

> necessary? Because all the pf struct has been zeroed at probe time.

> 


I find that t should also be called in i40e_flush_region_all_conf() to replace the code " memset(info, 0, sizeof(struct i40e_queue_region_info)); "
So, in v4 I will change to that. Another reason is to set zero after restart process from CLI command " start -stop -start...."

> >  	return 0;

> >

> >  err_init_fdir_filter_list:

> > @@ -1464,6 +1472,9 @@ eth_i40e_dev_uninit(struct rte_eth_dev *dev)

> >  	/* Remove all Traffic Manager configuration */

> >  	i40e_tm_conf_uninit(dev);

> >

> > +	/* Remove all the queue region configuration */

> > +	i40e_flush_region_all_conf(hw, pf, 0);

> > +

> >  	return 0;

> >  }

> >

> > diff --git a/drivers/net/i40e/i40e_ethdev.h

> > b/drivers/net/i40e/i40e_ethdev.h index ad80f0f..d612886 100644

> > --- a/drivers/net/i40e/i40e_ethdev.h

> > +++ b/drivers/net/i40e/i40e_ethdev.h

> > @@ -260,6 +260,16 @@ enum i40e_flxpld_layer_idx {  #define

> > I40E_QOS_BW_WEIGHT_MIN 1

> >  /* The max bandwidth weight is 127. */  #define

> > I40E_QOS_BW_WEIGHT_MAX 127

> > +/* The max queue region index is 7. */ #define

> I40E_REGION_MAX_INDEX

> > +7

> > +/* The max queue region userpriority is 7. */ #define

> > +I40E_REGION_USERPRIORITY_MAX_INDEX 7

> You can reuse I40E_MAX_USER_PRIORITY.


Ok, change in v4

> 

> > +/* The max pctype index is 63. */

> > +#define I40E_REGION_PCTYPE_MAX_INDEX 63

> > +

> You can use I40E_FILTER_PCTYPE_MAX


Ok, change in v4

> 

> > +#define I40E_MAX_PERCENT            100

> > +#define I40E_DEFAULT_DCB_APP_NUM    1

> > +#define I40E_DEFAULT_DCB_APP_PRIO   3

> >

> >  /**

> >   * The overhead from MTU to max frame size.

> > @@ -541,6 +551,22 @@ struct i40e_ethertype_rule {

> >  	struct rte_hash *hash_table;

> >  };

> >

> > +/* queue region info */

> > +struct i40e_region_info {

> > +	uint8_t region_id;

> > +	uint8_t queue_start_index;

> > +	uint8_t queue_num;

> > +	uint8_t user_priority_num;

> > +	uint8_t user_priority[I40E_MAX_USER_PRIORITY];

> Please comment what is the meaning of this array?


Ok, change in v4

> 

> > +	uint8_t flowtype_num;

> > +	uint8_t hw_flowtype[I40E_FILTER_PCTYPE_MAX];

> The same, please comment it.


Ok, change in v4

> 

> > +};

> > +

> > +struct i40e_queue_region_info {

> > +	uint16_t queue_region_number;

> > +	struct i40e_region_info region[I40E_REGION_MAX_INDEX + 1]; };

> > +

> >  /* Tunnel filter number HW supports */  #define

> > I40E_MAX_TUNNEL_FILTER_NUM 400

> >

> > @@ -776,6 +802,7 @@ struct i40e_pf {

> >  	struct i40e_fdir_info fdir; /* flow director info */

> >  	struct i40e_ethertype_rule ethertype; /* Ethertype filter rule */

> >  	struct i40e_tunnel_rule tunnel; /* Tunnel filter rule */

> > +	struct i40e_queue_region_info queue_region; /* queue region info

> */

> >  	struct i40e_fc_conf fc_conf; /* Flow control conf */

> >  	struct i40e_mirror_rule_list mirror_list;

> >  	uint16_t nb_mirror_rule;   /* The number of mirror rules */

> > @@ -1003,6 +1030,9 @@ void i40e_check_write_reg(struct i40e_hw *hw,

> > uint32_t addr, uint32_t val);  int i40e_tm_ops_get(struct rte_eth_dev

> > *dev, void *ops);  void i40e_tm_conf_init(struct rte_eth_dev *dev);

> > void i40e_tm_conf_uninit(struct rte_eth_dev *dev);

> > +int i40e_flush_region_all_conf(struct i40e_hw *hw,

> > +				struct i40e_pf *pf, uint16_t on);

> > +

> >

> >  #define I40E_DEV_TO_PCI(eth_dev) \

> >  	RTE_DEV_TO_PCI((eth_dev)->device)

> > diff --git a/drivers/net/i40e/rte_pmd_i40e.c

> > b/drivers/net/i40e/rte_pmd_i40e.c index d69a472..9a75f21 100644

> > --- a/drivers/net/i40e/rte_pmd_i40e.c

> > +++ b/drivers/net/i40e/rte_pmd_i40e.c

> > @@ -35,6 +35,7 @@

> >  #include <rte_tailq.h>

> >

> >  #include "base/i40e_prototype.h"

> > +#include "base/i40e_dcb.h"

> >  #include "i40e_ethdev.h"

> >  #include "i40e_pf.h"

> >  #include "i40e_rxtx.h"

> > @@ -2161,3 +2162,484 @@ rte_pmd_i40e_add_vf_mac_addr(uint8_t port,

> > uint16_t vf_id,

> >

> >  	return 0;

> >  }

> > +

> > +static int

> > +i40e_vsi_update_queue_region_mapping(struct i40e_hw *hw,

> > +			      struct i40e_pf *pf)

> > +{

> > +	uint16_t i;

> > +	struct i40e_vsi *vsi = pf->main_vsi;

> > +	uint16_t queue_offset, bsf, tc_index;

> > +	struct i40e_vsi_context ctxt;

> > +	struct i40e_aqc_vsi_properties_data *vsi_info;

> > +	struct i40e_queue_region_info *region_info =

> > +				&pf->queue_region;

> > +	uint32_t ret = -EINVAL;

> > +

> > +	if (!region_info->queue_region_number) {

> > +		PMD_INIT_LOG(ERR, "there is no that region id been set

> before");

> > +		return ret;

> > +	}

> > +

> > +	memset(&ctxt, 0, sizeof(struct i40e_vsi_context));

> > +

> > +	/* Update Queue Pairs Mapping for currently enabled UPs */

> > +	ctxt.seid = vsi->seid;

> > +	ctxt.pf_num = hw->pf_id;

> > +	ctxt.vf_num = 0;

> > +	ctxt.uplink_seid = vsi->uplink_seid;

> > +	ctxt.info = vsi->info;

> > +	vsi_info = &ctxt.info;

> > +

> Please use the vsi_info stored in i40e_vsi to avoid missing parameters.

> 

> > +	memset(vsi_info->tc_mapping, 0, sizeof(uint16_t) * 8);

> > +	memset(vsi_info->queue_mapping, 0, sizeof(uint16_t) * 16);

> > +

> Ctxt has been zeroed, no need to memset tc_mapping and queue_mapping

> again.


Ok, change in v4

> > +	/**

> > +	 * Configure queue region and queue mapping parameters,

> > +	 * for enabled queue region, allocate queues to this region.

> > +	 */

> > +

> > +	for (i = 0; i < region_info->queue_region_number; i++) {

> > +		tc_index = region_info->region[i].region_id;

> > +		bsf = rte_bsf32(region_info->region[i].queue_num);

> > +		queue_offset = region_info->region[i].queue_start_index;

> > +		vsi_info->tc_mapping[tc_index] = rte_cpu_to_le_16(

> > +			(queue_offset <<

> I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) |

> > +				(bsf <<

> I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT));

> > +	}

> > +

> > +	/* Associate queue number with VSI, Keep vsi->nb_qps unchanged

> */

> > +	if (vsi->type == I40E_VSI_SRIOV) {

> Vsi is main vsi, it is impossible that the type to be SRIOV. If you want to

> configure Queue mapping on sriov VSI, you need to scan all the SRIOV VSIs.


Ok.

> 

> > +		vsi_info->mapping_flags |=

> > +

> 	rte_cpu_to_le_16(I40E_AQ_VSI_QUE_MAP_NONCONTIG);

> > +		for (i = 0; i < vsi->nb_qps; i++)

> > +			vsi_info->queue_mapping[i] =

> > +				rte_cpu_to_le_16(vsi->base_queue + i);

> > +	} else {

> > +		vsi_info->mapping_flags |=

> > +

> 	rte_cpu_to_le_16(I40E_AQ_VSI_QUE_MAP_CONTIG);

> > +		vsi_info->queue_mapping[0] = rte_cpu_to_le_16(vsi-

> >base_queue);

> > +	}

> > +	vsi_info->valid_sections |=

> > +

> 	rte_cpu_to_le_16(I40E_AQ_VSI_PROP_QUEUE_MAP_VALID);

> > +

> > +	/* Update the VSI after updating the VSI queue-mapping

> information */

> > +	ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);

> > +	if (ret) {

> > +		PMD_INIT_LOG(ERR, "Failed to configure queue region "

> PMD_INIT_LOG  -> PMD_DRV_LOG

> > +			"mapping = %d ", hw->aq.asq_last_status);

> > +		return ret;

> > +	}

> > +	/* update the local VSI info with updated queue map */

> > +	(void)rte_memcpy(&vsi->info.tc_mapping, &ctxt.info.tc_mapping,

> > +					sizeof(vsi->info.tc_mapping));

> > +	(void)rte_memcpy(&vsi->info.queue_mapping,

> > +			&ctxt.info.queue_mapping,

> > +		sizeof(vsi->info.queue_mapping));

> > +	vsi->info.mapping_flags = ctxt.info.mapping_flags;

> > +	vsi->info.valid_sections = 0;

> > +

> > +	return 0;

> > +}

> > +

> > +

> > +static int

> > +i40e_set_queue_region(struct i40e_pf *pf,

> > +				struct rte_i40e_rss_region_conf *conf_ptr) {

> > +	uint16_t tc_size_tb[7] = {1, 2, 4, 8, 16, 32, 64};

> > +	uint16_t i;

> > +	struct i40e_vsi *main_vsi = pf->main_vsi;

> > +	struct i40e_queue_region_info *info = &pf->queue_region;

> > +	uint32_t ret = -EINVAL;

> > +

> > +	for (i = 0; i < I40E_REGION_MAX_INDEX; i++)

> > +		if (conf_ptr->queue_num == tc_size_tb[i])

> > +			break;

> > +

> > +	if (i == I40E_REGION_MAX_INDEX) {

> > +		printf("The region sizes should be any of the following "

> > +		"values: 1, 2, 4, 8, 16, 32, 64 as long as the "

> > +		"total number of queues do not exceed the VSI allocation");

> > +		return ret;

> > +	}

> > +

> If you just want to check if the queue_nume is one of {1, 2,4,8,16,32,64} You

> can use

> 

> If (rte_is_power_of_2(queue_num) && queue_num <= 64)

> 

> 

> > +	if (conf_ptr->region_id >= I40E_REGION_MAX_INDEX) {

> > +		PMD_INIT_LOG(ERR, "the queue region max index is 7");

> > +		return ret;

> > +	}

> > +

> > +	if ((conf_ptr->queue_start_index + conf_ptr->queue_num)

> > +					> main_vsi->nb_used_qps) {

> > +		PMD_INIT_LOG(ERR, "the queue index exceeds the VSI

> range");

> > +		return ret;

> > +	}

> > +

> > +	if (info->queue_region_number) {

> > +		for (i = 0; i < info->queue_region_number; i++)

> > +			if (conf_ptr->region_id == info->region[i].region_id)

> > +				break;

> > +

> > +		if ((i == info->queue_region_number) &&

> > +			(i <= I40E_REGION_MAX_INDEX)) {

> It is impossible that i > I40E_REGION_MAX_INDEX, right?

> 

> > +			info->region[i].region_id = conf_ptr->region_id;

> > +			info->region[i].queue_num = conf_ptr->queue_num;

> > +			info->region[i].queue_start_index =

> > +				conf_ptr->queue_start_index;

> > +			info->queue_region_number++;

> > +		} else {

> > +			PMD_INIT_LOG(ERR, "queue region number "

> > +				"exceeds maxnum 8 or the "

> > +				"queue region id has "

> > +				"been set before");

> Does it mean we can not change the set of one queue region whose id is set

> before?

> 

> > +			return ret;

> > +		}

> > +	} else {

> > +		info->region[0].region_id = conf_ptr->region_id;

> > +		info->region[0].queue_num = conf_ptr->queue_num;

> > +		info->region[0].queue_start_index =

> > +			conf_ptr->queue_start_index;

> > +		info->queue_region_number++;

> > +	}

> I think this part of code can be refined.

> > +

> > +	return 0;

> > +}

> > +

> > +static int

> > +i40e_set_region_flowtype_pf(struct i40e_hw *hw,

> > +		struct i40e_pf *pf, struct rte_i40e_rss_region_conf

> *conf_ptr) {

> > +	uint32_t pfqf_hregion;

> > +	uint32_t ret = -EINVAL;

> > +	struct i40e_queue_region_info *info = &pf->queue_region;

> > +	uint16_t i, j, index, flowtype_set = 0;

> > +	uint16_t region_index, flowtype_index;

> > +

> > +	if (conf_ptr->region_id > I40E_PFQF_HREGION_MAX_INDEX) {

> > +		PMD_INIT_LOG(ERR, "the queue region max index is 7");

> > +		return ret;

> > +	}

> > +

> > +	if (conf_ptr->hw_flowtype >= I40E_FILTER_PCTYPE_MAX) {

> > +		PMD_INIT_LOG(ERR, "the hw_flowtype or PCTYPE max index

> is 63");

> > +		return ret;

> > +	}

> > +

> > +	if (info->queue_region_number) {

> > +		for (i = 0; i < info->queue_region_number; i++)

> > +			if (conf_ptr->region_id == info->region[i].region_id)

> > +				break;

> > +

> > +		if (i == info->queue_region_number) {

> > +			PMD_INIT_LOG(ERR, "that region id has not "

> > +				"been set before");

> > +			return ret;

> > +		}

> If info->queue_region_number==0, then i would be 0. So, the outer wrap if

> (info->queue_region_number) {} can be omit. The same at above func, I

> think this part of code can be refined.


Let me have a little think about it.

> > +		region_index = i;

> > +

> > +		for (i = 0; i < info->queue_region_number; i++) {

> > +			for (j = 0; j < info->region[i].flowtype_num; j++) {

> > +				if (conf_ptr->hw_flowtype ==

> > +					info->region[i].hw_flowtype[j]) {

> > +					flowtype_set = 1;

> > +					break;

> > +				}

> > +			}

> > +		}

> > +

> > +		if (flowtype_set) {

> > +			PMD_INIT_LOG(ERR, "that hw_flowtype "

> > +				"has been set before");

> > +			return ret;

> Is that right to return -EINVAL when it has been set. And it seems In this

> function, the error code is only -EINVAL, if so, why we define ret?


Yes, if it has been set , I will not do a second time. 
Other code also use rte as this way.
 

> > +		}

> > +		flowtype_index = info->region[region_index].flowtype_num;

> > +		info->region[region_index].hw_flowtype[flowtype_index] =

> > +						conf_ptr->hw_flowtype;

> > +		info->region[region_index].flowtype_num++;

> > +	} else  {

> > +		PMD_INIT_LOG(ERR, "there is no that region id been set

> before");

> 

> Again, please use PMD_DRV_LOG. Because the func is not called at

> initialization time.



Ok, change in v4

> Please replace it in all the patch.

> > +		return ret;

> > +	}

> > +

> > +	index = conf_ptr->hw_flowtype >> 3;

> > +	pfqf_hregion = i40e_read_rx_ctl(hw, I40E_PFQF_HREGION(index));

> > +

> > +	if ((conf_ptr->hw_flowtype & 0x7) == 0) {

> > +		pfqf_hregion |= conf_ptr->region_id <<

> > +				I40E_PFQF_HREGION_REGION_0_SHIFT;

> > +		pfqf_hregion |= 1 <<

> I40E_PFQF_HREGION_OVERRIDE_ENA_0_SHIFT;

> > +	} else if ((conf_ptr->hw_flowtype & 0x7) == 1) {

> > +		pfqf_hregion |= conf_ptr->region_id <<

> > +				I40E_PFQF_HREGION_REGION_1_SHIFT;

> > +		pfqf_hregion |= 1 <<

> I40E_PFQF_HREGION_OVERRIDE_ENA_1_SHIFT;

> > +	} else if ((conf_ptr->hw_flowtype & 0x7) == 2) {

> > +		pfqf_hregion |= conf_ptr->region_id <<

> > +				I40E_PFQF_HREGION_REGION_2_SHIFT;

> > +		pfqf_hregion |= 1 <<

> I40E_PFQF_HREGION_OVERRIDE_ENA_2_SHIFT;

> > +	} else if ((conf_ptr->hw_flowtype & 0x7) == 3) {

> > +		pfqf_hregion |= conf_ptr->region_id <<

> > +				I40E_PFQF_HREGION_REGION_3_SHIFT;

> > +		pfqf_hregion |= 1 <<

> I40E_PFQF_HREGION_OVERRIDE_ENA_3_SHIFT;

> > +	} else if ((conf_ptr->hw_flowtype & 0x7) == 4) {

> > +		pfqf_hregion |= conf_ptr->region_id <<

> > +				I40E_PFQF_HREGION_REGION_4_SHIFT;

> > +		pfqf_hregion |= 1 <<

> I40E_PFQF_HREGION_OVERRIDE_ENA_4_SHIFT;

> > +	} else if ((conf_ptr->hw_flowtype & 0x7) == 5) {

> > +		pfqf_hregion |= conf_ptr->region_id <<

> > +				I40E_PFQF_HREGION_REGION_5_SHIFT;

> > +		pfqf_hregion |= 1 <<

> I40E_PFQF_HREGION_OVERRIDE_ENA_5_SHIFT;

> > +	} else if ((conf_ptr->hw_flowtype & 0x7) == 6) {

> > +		pfqf_hregion |= conf_ptr->region_id <<

> > +				I40E_PFQF_HREGION_REGION_6_SHIFT;

> > +		pfqf_hregion |= 1 <<

> I40E_PFQF_HREGION_OVERRIDE_ENA_6_SHIFT;

> What is the (conf_ptr->hw_flowtype & 0x7) == 1/2/3/4/5/6 meanings?

> Please avoid coding with integer number. Can not understand the

> relationship with flow type.

> And the hw_flowtype is API to users, you need to make it clear enough.


No practical meaning, just find position for that pctype, because 8 type pctype reuse one register.
We support 64 types of them.

> 

> > +	} else {

> > +		pfqf_hregion |= conf_ptr->region_id <<

> > +				I40E_PFQF_HREGION_REGION_7_SHIFT;

> > +		pfqf_hregion |= 1 <<

> I40E_PFQF_HREGION_OVERRIDE_ENA_7_SHIFT;

> > +	}

> > +

> > +	i40e_write_rx_ctl(hw, I40E_PFQF_HREGION(index), pfqf_hregion);

> > +

> > +	return 0;

> > +}

> > +

> > +static int

> > +i40e_set_up_region(struct i40e_pf *pf,

> > +				struct rte_i40e_rss_region_conf *conf_ptr) {

> > +	struct i40e_queue_region_info *info = &pf->queue_region;

> > +	uint32_t ret = -EINVAL;

> > +	uint16_t i, j, region_index, up_set = 0;

> > +

> > +	if (conf_ptr->user_priority >

> I40E_REGION_USERPRIORITY_MAX_INDEX) {

> > +		PMD_INIT_LOG(ERR, "the queue region max index is 7");

> > +		return ret;

> > +	}

> > +

> > +	if (conf_ptr->region_id >= I40E_REGION_MAX_INDEX) {

> > +		PMD_INIT_LOG(ERR, "the region_id max index is 7");

> > +		return ret;

> > +	}

> > +

> > +	if (info->queue_region_number) {

> > +		for (i = 0; i < info->queue_region_number; i++)

> > +			if (conf_ptr->region_id == info->region[i].region_id)

> > +				break;

> > +

> > +		if (i == info->queue_region_number) {

> > +			PMD_INIT_LOG(ERR, "that region id "

> > +				"has not been set before");

> > +			return ret;

> > +		}

> > +		region_index = i;

> > +

> > +		for (i = 0; i < info->queue_region_number; i++) {

> > +			for (j = 0; j <

> > +				info->region[i].user_priority_num; j++) {

> > +				if (info->region[i].user_priority[j] ==

> > +					conf_ptr->user_priority) {

> > +					up_set = 1;

> > +					break;

> > +				}

> > +			}

> > +		}

> > +

> > +		if (up_set) {

> > +			PMD_INIT_LOG(ERR, "that user priority "

> > +				"has been set before");

> > +			return ret;

> > +		}

> > +

> > +		j = info->region[region_index].user_priority_num;

> > +		info->region[region_index].user_priority[j] =

> > +						conf_ptr->user_priority;

> > +		info->region[region_index].user_priority_num++;

> > +	} else {

> > +		PMD_INIT_LOG(ERR, "there is no that region id been set

> before");

> > +		return ret;

> > +	}

> > +

> > +	return 0;

> > +}

> > +

> > +static int

> > +i40e_queue_region_dcb_configure(struct i40e_hw *hw,

> > +				struct i40e_pf *pf)

> > +{

> > +	struct i40e_dcbx_config dcb_cfg_local;

> > +	struct i40e_dcbx_config *dcb_cfg;

> > +	struct i40e_queue_region_info *info = &pf->queue_region;

> > +	struct i40e_dcbx_config *old_cfg = &hw->local_dcbx_config;

> > +	uint32_t ret = -EINVAL;

> > +	uint16_t i, j, prio_index, region_index;

> > +	uint8_t tc_map, tc_bw, bw_lf;

> > +

> > +	if (!info->queue_region_number) {

> > +		PMD_INIT_LOG(ERR, "there is no that region id been set

> before");

> > +		return ret;

> > +	}

> > +

> > +	dcb_cfg = &dcb_cfg_local;

> > +	memset(dcb_cfg, 0, sizeof(struct i40e_dcbx_config));

> > +

> > +	/* assume each tc has the same bw */

> > +	tc_bw = I40E_MAX_PERCENT / info->queue_region_number;

> > +	for (i = 0; i < info->queue_region_number; i++)

> > +		dcb_cfg->etscfg.tcbwtable[i] = tc_bw;

> > +	/* to ensure the sum of tcbw is equal to 100 */

> > +	bw_lf = I40E_MAX_PERCENT %  info->queue_region_number;

> > +	for (i = 0; i < bw_lf; i++)

> > +		dcb_cfg->etscfg.tcbwtable[i]++;

> > +

> > +	/* assume each tc has the same Transmission Selection Algorithm */

> > +	for (i = 0; i < info->queue_region_number; i++)

> > +		dcb_cfg->etscfg.tsatable[i] = I40E_IEEE_TSA_ETS;

> > +

> > +	for (i = 0; i < info->queue_region_number; i++) {

> > +		for (j = 0; j < info->region[i].user_priority_num; j++) {

> > +			prio_index = info->region[i].user_priority[j];

> > +			region_index = info->region[i].region_id;

> > +			dcb_cfg->etscfg.prioritytable[prio_index] =

> > +						region_index;

> > +		}

> > +	}

> > +

> > +	/* FW needs one App to configure HW */

> > +	dcb_cfg->numapps = I40E_DEFAULT_DCB_APP_NUM;

> > +	dcb_cfg->app[0].selector = I40E_APP_SEL_ETHTYPE;

> > +	dcb_cfg->app[0].priority = I40E_DEFAULT_DCB_APP_PRIO;

> > +	dcb_cfg->app[0].protocolid = I40E_APP_PROTOID_FCOE;

> > +

> > +	tc_map = RTE_LEN2MASK(info->queue_region_number, uint8_t);

> > +

> > +	dcb_cfg->pfc.willing = 0;

> > +	dcb_cfg->pfc.pfccap = I40E_MAX_TRAFFIC_CLASS;

> > +	dcb_cfg->pfc.pfcenable = tc_map;

> > +

> > +	/* Copy the new config to the current config */

> > +	*old_cfg = *dcb_cfg;

> > +	old_cfg->etsrec = old_cfg->etscfg;

> > +	ret = i40e_set_dcb_config(hw);

> > +

> > +	if (ret) {

> > +		PMD_INIT_LOG(ERR, "Set queue region DCB Config failed,

> err"

> > +			" %s aq_err %s",

> > +			 i40e_stat_str(hw, ret),

> > +			 i40e_aq_str(hw, hw->aq.asq_last_status));

> > +		return ret;

> > +	}

> > +

> > +	return 0;

> > +}

> > +

> > +static int

> > +i40e_queue_region_dcb_configure_default(struct i40e_hw *hw)

> 

> Can i40e_dcb_init_configure do what this func did here?


It seems ok, change in v4.
 
> 

> > +{

> > +	uint16_t i;

> > +	uint32_t ret = -EINVAL;

> > +

> > +	memset(&hw->local_dcbx_config, 0,

> > +	sizeof(struct i40e_dcbx_config));

> > +	/* set dcb default configuration */

> > +	hw->local_dcbx_config.etscfg.willing = 0;

> > +	hw->local_dcbx_config.etscfg.maxtcs = 0;

> > +	hw->local_dcbx_config.etscfg.tcbwtable[0] = 100;

> > +	hw->local_dcbx_config.etscfg.tsatable[0] =

> > +			I40E_IEEE_TSA_ETS;

> > +	/* all UPs mapping to region 0 */

> > +	for (i = 0; i < I40E_MAX_USER_PRIORITY; i++)

> > +		hw->local_dcbx_config.etscfg.prioritytable[i] = 0;

> > +	hw->local_dcbx_config.etsrec =

> > +		hw->local_dcbx_config.etscfg;

> > +	hw->local_dcbx_config.pfc.willing = 0;

> > +	hw->local_dcbx_config.pfc.pfccap =

> > +			I40E_MAX_TRAFFIC_CLASS;

> > +	/* FW needs one App to configure HW */

> > +	hw->local_dcbx_config.numapps = 1;

> > +	hw->local_dcbx_config.app[0].selector =

> > +			I40E_APP_SEL_ETHTYPE;

> > +	hw->local_dcbx_config.app[0].priority = 3;

> > +	hw->local_dcbx_config.app[0].protocolid =

> > +			I40E_APP_PROTOID_FCOE;

> > +	ret = i40e_set_dcb_config(hw);

> > +	if (ret) {

> > +		PMD_INIT_LOG(ERR,

> > +		"default dcb config fails. err = %d, aq_err = %d.",

> > +			ret, hw->aq.asq_last_status);

> > +		return ret;

> > +	}

> > +

> > +	return 0;

> > +}

> > +

> > +int

> > +i40e_flush_region_all_conf(struct i40e_hw *hw, struct i40e_pf *pf,

> > +				uint16_t on)

> > +{

> > +	uint16_t i;

> > +	struct i40e_queue_region_info *info = &pf->queue_region;

> > +

> > +	if (on) {

> > +		i40e_vsi_update_queue_region_mapping(hw, pf);

> > +		i40e_queue_region_dcb_configure(hw, pf);

> Don't check all the return code?


> 

> > +		return 0;

> > +	}

> > +

> > +	info->queue_region_number = 1;

> > +	info->region[0].queue_num = 64;

> > +	info->region[0].queue_start_index = 0;

> > +

> > +	i40e_vsi_update_queue_region_mapping(hw, pf);

> > +	i40e_queue_region_dcb_configure_default(hw);

> > +

> > +	for (i = 0; i < I40E_PFQF_HREGION_MAX_INDEX; i++)

> > +		i40e_write_rx_ctl(hw, I40E_PFQF_HREGION(i), 0);

> > +	memset(info, 0, sizeof(struct i40e_queue_region_info));

> > +

> > +	return 0;

> > +}

> > +

> > +int rte_pmd_i40e_queue_region_conf(uint8_t port,

> > +			struct rte_i40e_rss_region_conf *conf_ptr) {

> > +	struct rte_eth_dev *dev = &rte_eth_devices[port];

> > +	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data-

> >dev_private);

> > +	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data-

> >dev_private);

> > +	enum rte_pmd_i40e_queue_region_op op_type = conf_ptr->op;

> > +	uint32_t ret;

> > +

> > +	if (!is_i40e_supported(dev))

> > +		return -ENOTSUP;

> > +

> > +	switch (op_type) {

> > +	case RTE_PMD_I40E_QUEUE_REGION_SET:

> > +		ret = i40e_set_queue_region(pf, conf_ptr);

> > +		break;

> > +	case RTE_PMD_I40E_REGION_FLOWTYPE_PF_SET:

> > +		ret = i40e_set_region_flowtype_pf(hw, pf, conf_ptr);

> > +		break;

> > +	case RTE_PMD_I40E_REGION_FLOWTYPE_VF_SET:

> > +		ret = -EINVAL;

> > +		break;

> If it is not supported, why to define this op?

> 

> > +	case RTE_PMD_I40E_UP_REGION_SET:

> > +		ret = i40e_set_up_region(pf, conf_ptr);

> > +		break;

> > +	case RTE_PMD_I40E_REGION_ALL_FLUSH_ON:

> > +		ret = i40e_flush_region_all_conf(hw, pf, 1);

> > +		break;

> What is the FLUSH on meaning? Commit all configure to HW?

> If so, what makes me confused that:

> RTE_PMD_I40E_QUEUE_REGION_SET only touch the software stored in

> driver.

> But RTE_PMD_I40E_REGION_FLOWTYPE_PF_SET set the

> I40E_PFQF_HREGION register.

> Those behaviors are not consist.

> 

> And FLUSH OFF means delete all?

> 

> You need to add more comments.


I will add more comment in function comment.
ALL config from CLI at first will only keep in DPDK software stored in driver, only after " FLUSH_ON ", it commit all configure to HW.
Because I have to set hardware config at a time, so I have to record all CLI command at first.
" FLUSH_OFF " is just clean all configuration about queue region  just now, and restore all to DPDK i40e driver default config when start up.
The following is my test process in CLI: 

./x86_64-native-linuxapp-gcc/app/testpmd -c 1ffff -n 4 -- -i --nb-cores=8  --rxq=8 --txq=8 --port-topology=chained set fwd rxonly port config all rss all queue-region set port 0 region_id 0 queue_start_index 2 queue_num 2 queue-region set port 0 region_id 1 queue_start_index 4 queue_num 2 queue-region set pf port 0 region_id 0 flowtype 31 queue-region set pf port 0 region_id 1 flowtype 33 queue-region set pf port 0 region_id 1 flowtype 34 queue-region set port 0 UP 1 region_id 0 queue-region set port 0 UP 3 region_id 1 queue-region flush on  port 0 start set verbose 1 sendp([Ether(dst="3C:FD:FE:9F:70:B8", src="52:00:00:00:00:00")/Dot1Q(prio=3)/IP(src="10.0.0.1",dst="192.168.0.2")/TCP(dport=80, sport=80 )/("X"*48)], iface="ens5f2", count=10) sendp([Ether(dst="3C:FD:FE:9F:70:B8", src="52:00:00:00:00:00")/IP(src="10.0.0.1",dst="192.168.0.2")/SCTP(dport=80, sport=80 )/("X"*48)], iface="ens5f2", count=10) sendp([Ether(dst="3C:FD:FE:9F:70:B8", src="52:00:00:00:00:00")/IP(src="10.0.0.1",dst="192.168.0.2")/UDP(dport=80, sport=80 )/("X"*48)], iface="ens5f2", count=10)


> 

> > +	case RTE_PMD_I40E_REGION_ALL_FLUSH_OFF:

> > +		ret = i40e_flush_region_all_conf(hw, pf, 0);

> > +		break;

> > +

> > +	default:

> > +		PMD_DRV_LOG(WARNING, "op type (%d) not supported",

> > +			    op_type);

> > +		ret = -EINVAL;

> > +		break;

> > +	}

> > +

> > +	I40E_WRITE_FLUSH(hw);

> > +

> > +	return ret;

> > +}

> > diff --git a/drivers/net/i40e/rte_pmd_i40e.h

> > b/drivers/net/i40e/rte_pmd_i40e.h index 155b7e8..a1329f4 100644

> > --- a/drivers/net/i40e/rte_pmd_i40e.h

> > +++ b/drivers/net/i40e/rte_pmd_i40e.h

> > @@ -91,6 +91,20 @@ enum rte_pmd_i40e_package_info {

> >  	RTE_PMD_I40E_PKG_INFO_MAX = 0xFFFFFFFF  };

> >

> > +/**

> > + *  Option types of queue region.

> > + */

> > +enum rte_pmd_i40e_queue_region_op {

> > +	RTE_PMD_I40E_REGION_UNDEFINED = 0,

> > +	RTE_PMD_I40E_QUEUE_REGION_SET,      /**< add queue region

> set*/

> > +	RTE_PMD_I40E_REGION_FLOWTYPE_PF_SET,   /**< add pf region

> pctype set */

> > +	RTE_PMD_I40E_REGION_FLOWTYPE_VF_SET,   /**< add vf region

> pctype set */

> > +	RTE_PMD_I40E_UP_REGION_SET,   /**< add queue region pctype

> set */

> > +	RTE_PMD_I40E_REGION_ALL_FLUSH_ON,   /**< flush on all

> configuration */

> > +	RTE_PMD_I40E_REGION_ALL_FLUSH_OFF,   /**< flush off all

> configuration */

> > +	RTE_PMD_I40E_QUEUE_REGION_OP_MAX

> > +};

> > +

> >  #define RTE_PMD_I40E_DDP_NAME_SIZE 32

> >

> >  /**

> > @@ -146,6 +160,18 @@ struct rte_pmd_i40e_ptype_mapping {  };

> >

> >  /**

> > + * Queue region information get from CLI.

> > + */

> > +struct rte_i40e_rss_region_conf {

> > +	uint8_t region_id;

> > +	uint8_t hw_flowtype;

> > +	uint8_t queue_start_index;

> > +	uint8_t queue_num;

> > +	uint8_t user_priority;

> > +	enum rte_pmd_i40e_queue_region_op  op; };

> This is API struct, you need to comment them in detail


Ok, change in v4

> 

> Thanks

> Jingjing
  
Zhao1, Wei Sept. 25, 2017, 5:55 a.m. UTC | #12
Hi, jingjing

> -----Original Message-----

> From: Wu, Jingjing

> Sent: Monday, September 25, 2017 12:02 AM

> To: Zhao1, Wei <wei.zhao1@intel.com>; dev@dpdk.org

> Cc: Zhao1, Wei <wei.zhao1@intel.com>

> Subject: RE: [dpdk-dev] [PATCH v3 1/2] net/i40e: queue region set and flush

> 

> 

> 

> > -----Original Message-----

> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Wei Zhao

> > Sent: Friday, September 15, 2017 11:13 AM

> > To: dev@dpdk.org

> > Cc: Zhao1, Wei <wei.zhao1@intel.com>

> > Subject: [dpdk-dev] [PATCH v3 1/2] net/i40e: queue region set and

> > flush

> >

> > This feature enable queue regions configuration for RSS in PF/VF, so

> > that different traffic classes or different packet classification

> > types can be separated to different queues in different queue

> > regions.This patch can set queue region range, it include queue number

> > in a region and the index of first queue.

> > This patch enable mapping between different priorities (UP) and

> > different traffic classes.It also enable mapping between a region

> > index and a sepcific flowtype(PCTYPE).It also provide the solution of

> > flush all configuration about queue region the above described.

> >

> > Signed-off-by: Wei Zhao <wei.zhao1@intel.com>

> > ---

> >  drivers/net/i40e/i40e_ethdev.c            |  19 +-

> >  drivers/net/i40e/i40e_ethdev.h            |  30 ++

> >  drivers/net/i40e/rte_pmd_i40e.c           | 482

> > ++++++++++++++++++++++++++++++

> >  drivers/net/i40e/rte_pmd_i40e.h           |  38 +++

> >  drivers/net/i40e/rte_pmd_i40e_version.map |   1 +

> >  5 files changed, 566 insertions(+), 4 deletions(-)

> >

> > diff --git a/drivers/net/i40e/i40e_ethdev.c

> > b/drivers/net/i40e/i40e_ethdev.c index 8e0580c..1663fc0 100644

> > --- a/drivers/net/i40e/i40e_ethdev.c

> > +++ b/drivers/net/i40e/i40e_ethdev.c

> > @@ -137,10 +137,6 @@

> >  #define I40E_PRTTSYN_TSYNTYPE    0x0e000000

> >  #define I40E_CYCLECOUNTER_MASK   0xffffffffffffffffULL

> >

> > -#define I40E_MAX_PERCENT            100

> > -#define I40E_DEFAULT_DCB_APP_NUM    1

> > -#define I40E_DEFAULT_DCB_APP_PRIO   3

> > -

> >  /**

> >   * Below are values for writing un-exposed registers suggested

> >   * by silicon experts

> > @@ -1034,6 +1030,15 @@ i40e_init_fdir_filter_list(struct rte_eth_dev

> *dev)

> >  	return ret;

> >  }

> >

> > +static void

> > +i40e_init_queue_region_conf(struct rte_eth_dev *dev) {

> > +	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data-

> >dev_private);

> > +	struct i40e_queue_region_info *info = &pf->queue_region;

> > +

> > +	memset(info, 0, sizeof(struct i40e_queue_region_info)); }

> > +

> >  static int

> >  eth_i40e_dev_init(struct rte_eth_dev *dev)  { @@ -1309,6 +1314,9 @@

> > eth_i40e_dev_init(struct rte_eth_dev *dev)

> >  	if (ret < 0)

> >  		goto err_init_fdir_filter_list;

> >

> > +	/* initialize queue region configuration */

> > +	i40e_init_queue_region_conf(dev);

> > +

> If this func i40e_init_queue_region_conf is only called here, then it is Not

> necessary? Because all the pf struct has been zeroed at probe time.

> 

> >  	return 0;

> >

> >  err_init_fdir_filter_list:

> > @@ -1464,6 +1472,9 @@ eth_i40e_dev_uninit(struct rte_eth_dev *dev)

> >  	/* Remove all Traffic Manager configuration */

> >  	i40e_tm_conf_uninit(dev);

> >

> > +	/* Remove all the queue region configuration */

> > +	i40e_flush_region_all_conf(hw, pf, 0);

> > +

> >  	return 0;

> >  }

> >

> > diff --git a/drivers/net/i40e/i40e_ethdev.h

> > b/drivers/net/i40e/i40e_ethdev.h index ad80f0f..d612886 100644

> > --- a/drivers/net/i40e/i40e_ethdev.h

> > +++ b/drivers/net/i40e/i40e_ethdev.h

> > @@ -260,6 +260,16 @@ enum i40e_flxpld_layer_idx {  #define

> > I40E_QOS_BW_WEIGHT_MIN 1

> >  /* The max bandwidth weight is 127. */  #define

> > I40E_QOS_BW_WEIGHT_MAX 127

> > +/* The max queue region index is 7. */ #define

> I40E_REGION_MAX_INDEX

> > +7

> > +/* The max queue region userpriority is 7. */ #define

> > +I40E_REGION_USERPRIORITY_MAX_INDEX 7

> You can reuse I40E_MAX_USER_PRIORITY.

> 

> > +/* The max pctype index is 63. */

> > +#define I40E_REGION_PCTYPE_MAX_INDEX 63

> > +

> You can use I40E_FILTER_PCTYPE_MAX

> 

> > +#define I40E_MAX_PERCENT            100

> > +#define I40E_DEFAULT_DCB_APP_NUM    1

> > +#define I40E_DEFAULT_DCB_APP_PRIO   3

> >

> >  /**

> >   * The overhead from MTU to max frame size.

> > @@ -541,6 +551,22 @@ struct i40e_ethertype_rule {

> >  	struct rte_hash *hash_table;

> >  };

> >

> > +/* queue region info */

> > +struct i40e_region_info {

> > +	uint8_t region_id;

> > +	uint8_t queue_start_index;

> > +	uint8_t queue_num;

> > +	uint8_t user_priority_num;

> > +	uint8_t user_priority[I40E_MAX_USER_PRIORITY];

> Please comment what is the meaning of this array?

> 

> > +	uint8_t flowtype_num;

> > +	uint8_t hw_flowtype[I40E_FILTER_PCTYPE_MAX];

> The same, please comment it.

> 

> > +};

> > +

> > +struct i40e_queue_region_info {

> > +	uint16_t queue_region_number;

> > +	struct i40e_region_info region[I40E_REGION_MAX_INDEX + 1]; };

> > +

> >  /* Tunnel filter number HW supports */  #define

> > I40E_MAX_TUNNEL_FILTER_NUM 400

> >

> > @@ -776,6 +802,7 @@ struct i40e_pf {

> >  	struct i40e_fdir_info fdir; /* flow director info */

> >  	struct i40e_ethertype_rule ethertype; /* Ethertype filter rule */

> >  	struct i40e_tunnel_rule tunnel; /* Tunnel filter rule */

> > +	struct i40e_queue_region_info queue_region; /* queue region info

> */

> >  	struct i40e_fc_conf fc_conf; /* Flow control conf */

> >  	struct i40e_mirror_rule_list mirror_list;

> >  	uint16_t nb_mirror_rule;   /* The number of mirror rules */

> > @@ -1003,6 +1030,9 @@ void i40e_check_write_reg(struct i40e_hw *hw,

> > uint32_t addr, uint32_t val);  int i40e_tm_ops_get(struct rte_eth_dev

> > *dev, void *ops);  void i40e_tm_conf_init(struct rte_eth_dev *dev);

> > void i40e_tm_conf_uninit(struct rte_eth_dev *dev);

> > +int i40e_flush_region_all_conf(struct i40e_hw *hw,

> > +				struct i40e_pf *pf, uint16_t on);

> > +

> >

> >  #define I40E_DEV_TO_PCI(eth_dev) \

> >  	RTE_DEV_TO_PCI((eth_dev)->device)

> > diff --git a/drivers/net/i40e/rte_pmd_i40e.c

> > b/drivers/net/i40e/rte_pmd_i40e.c index d69a472..9a75f21 100644

> > --- a/drivers/net/i40e/rte_pmd_i40e.c

> > +++ b/drivers/net/i40e/rte_pmd_i40e.c

> > @@ -35,6 +35,7 @@

> >  #include <rte_tailq.h>

> >

> >  #include "base/i40e_prototype.h"

> > +#include "base/i40e_dcb.h"

> >  #include "i40e_ethdev.h"

> >  #include "i40e_pf.h"

> >  #include "i40e_rxtx.h"

> > @@ -2161,3 +2162,484 @@ rte_pmd_i40e_add_vf_mac_addr(uint8_t port,

> > uint16_t vf_id,

> >

> >  	return 0;

> >  }

> > +

> > +static int

> > +i40e_vsi_update_queue_region_mapping(struct i40e_hw *hw,

> > +			      struct i40e_pf *pf)

> > +{

> > +	uint16_t i;

> > +	struct i40e_vsi *vsi = pf->main_vsi;

> > +	uint16_t queue_offset, bsf, tc_index;

> > +	struct i40e_vsi_context ctxt;

> > +	struct i40e_aqc_vsi_properties_data *vsi_info;

> > +	struct i40e_queue_region_info *region_info =

> > +				&pf->queue_region;

> > +	uint32_t ret = -EINVAL;

> > +

> > +	if (!region_info->queue_region_number) {

> > +		PMD_INIT_LOG(ERR, "there is no that region id been set

> before");

> > +		return ret;

> > +	}

> > +

> > +	memset(&ctxt, 0, sizeof(struct i40e_vsi_context));

> > +

> > +	/* Update Queue Pairs Mapping for currently enabled UPs */

> > +	ctxt.seid = vsi->seid;

> > +	ctxt.pf_num = hw->pf_id;

> > +	ctxt.vf_num = 0;

> > +	ctxt.uplink_seid = vsi->uplink_seid;

> > +	ctxt.info = vsi->info;

> > +	vsi_info = &ctxt.info;

> > +

> Please use the vsi_info stored in i40e_vsi to avoid missing parameters.

> 

> > +	memset(vsi_info->tc_mapping, 0, sizeof(uint16_t) * 8);

> > +	memset(vsi_info->queue_mapping, 0, sizeof(uint16_t) * 16);

> > +

> Ctxt has been zeroed, no need to memset tc_mapping and queue_mapping

> again.

> > +	/**

> > +	 * Configure queue region and queue mapping parameters,

> > +	 * for enabled queue region, allocate queues to this region.

> > +	 */

> > +

> > +	for (i = 0; i < region_info->queue_region_number; i++) {

> > +		tc_index = region_info->region[i].region_id;

> > +		bsf = rte_bsf32(region_info->region[i].queue_num);

> > +		queue_offset = region_info->region[i].queue_start_index;

> > +		vsi_info->tc_mapping[tc_index] = rte_cpu_to_le_16(

> > +			(queue_offset <<

> I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) |

> > +				(bsf <<

> I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT));

> > +	}

> > +

> > +	/* Associate queue number with VSI, Keep vsi->nb_qps unchanged

> */

> > +	if (vsi->type == I40E_VSI_SRIOV) {

> Vsi is main vsi, it is impossible that the type to be SRIOV. If you want to

> configure Queue mapping on sriov VSI, you need to scan all the SRIOV VSIs.

> 

> > +		vsi_info->mapping_flags |=

> > +

> 	rte_cpu_to_le_16(I40E_AQ_VSI_QUE_MAP_NONCONTIG);

> > +		for (i = 0; i < vsi->nb_qps; i++)

> > +			vsi_info->queue_mapping[i] =

> > +				rte_cpu_to_le_16(vsi->base_queue + i);

> > +	} else {

> > +		vsi_info->mapping_flags |=

> > +

> 	rte_cpu_to_le_16(I40E_AQ_VSI_QUE_MAP_CONTIG);

> > +		vsi_info->queue_mapping[0] = rte_cpu_to_le_16(vsi-

> >base_queue);

> > +	}

> > +	vsi_info->valid_sections |=

> > +

> 	rte_cpu_to_le_16(I40E_AQ_VSI_PROP_QUEUE_MAP_VALID);

> > +

> > +	/* Update the VSI after updating the VSI queue-mapping

> information */

> > +	ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);

> > +	if (ret) {

> > +		PMD_INIT_LOG(ERR, "Failed to configure queue region "

> PMD_INIT_LOG  -> PMD_DRV_LOG

> > +			"mapping = %d ", hw->aq.asq_last_status);

> > +		return ret;

> > +	}

> > +	/* update the local VSI info with updated queue map */

> > +	(void)rte_memcpy(&vsi->info.tc_mapping, &ctxt.info.tc_mapping,

> > +					sizeof(vsi->info.tc_mapping));

> > +	(void)rte_memcpy(&vsi->info.queue_mapping,

> > +			&ctxt.info.queue_mapping,

> > +		sizeof(vsi->info.queue_mapping));

> > +	vsi->info.mapping_flags = ctxt.info.mapping_flags;

> > +	vsi->info.valid_sections = 0;

> > +

> > +	return 0;

> > +}

> > +

> > +

> > +static int

> > +i40e_set_queue_region(struct i40e_pf *pf,

> > +				struct rte_i40e_rss_region_conf *conf_ptr) {

> > +	uint16_t tc_size_tb[7] = {1, 2, 4, 8, 16, 32, 64};

> > +	uint16_t i;

> > +	struct i40e_vsi *main_vsi = pf->main_vsi;

> > +	struct i40e_queue_region_info *info = &pf->queue_region;

> > +	uint32_t ret = -EINVAL;

> > +

> > +	for (i = 0; i < I40E_REGION_MAX_INDEX; i++)

> > +		if (conf_ptr->queue_num == tc_size_tb[i])

> > +			break;

> > +

> > +	if (i == I40E_REGION_MAX_INDEX) {

> > +		printf("The region sizes should be any of the following "

> > +		"values: 1, 2, 4, 8, 16, 32, 64 as long as the "

> > +		"total number of queues do not exceed the VSI allocation");

> > +		return ret;

> > +	}

> > +

> If you just want to check if the queue_nume is one of {1, 2,4,8,16,32,64} You

> can use

> 

> If (rte_is_power_of_2(queue_num) && queue_num <= 64)

> 

> 

> > +	if (conf_ptr->region_id >= I40E_REGION_MAX_INDEX) {

> > +		PMD_INIT_LOG(ERR, "the queue region max index is 7");

> > +		return ret;

> > +	}

> > +

> > +	if ((conf_ptr->queue_start_index + conf_ptr->queue_num)

> > +					> main_vsi->nb_used_qps) {

> > +		PMD_INIT_LOG(ERR, "the queue index exceeds the VSI

> range");

> > +		return ret;

> > +	}

> > +

> > +	if (info->queue_region_number) {

> > +		for (i = 0; i < info->queue_region_number; i++)

> > +			if (conf_ptr->region_id == info->region[i].region_id)

> > +				break;

> > +

> > +		if ((i == info->queue_region_number) &&

> > +			(i <= I40E_REGION_MAX_INDEX)) {

> It is impossible that i > I40E_REGION_MAX_INDEX, right?


Yes, only support 8 queue region max.

> 

> > +			info->region[i].region_id = conf_ptr->region_id;

> > +			info->region[i].queue_num = conf_ptr->queue_num;

> > +			info->region[i].queue_start_index =

> > +				conf_ptr->queue_start_index;

> > +			info->queue_region_number++;

> > +		} else {

> > +			PMD_INIT_LOG(ERR, "queue region number "

> > +				"exceeds maxnum 8 or the "

> > +				"queue region id has "

> > +				"been set before");

> Does it mean we can not change the set of one queue region whose id is set

> before?


No, FLUSH_OFF can clear all the config
> 

> > +			return ret;

> > +		}

> > +	} else {

> > +		info->region[0].region_id = conf_ptr->region_id;

> > +		info->region[0].queue_num = conf_ptr->queue_num;

> > +		info->region[0].queue_start_index =

> > +			conf_ptr->queue_start_index;

> > +		info->queue_region_number++;

> > +	}

> I think this part of code can be refined.

> > +

> > +	return 0;

> > +}

> > +

> > +static int

> > +i40e_set_region_flowtype_pf(struct i40e_hw *hw,

> > +		struct i40e_pf *pf, struct rte_i40e_rss_region_conf

> *conf_ptr) {

> > +	uint32_t pfqf_hregion;

> > +	uint32_t ret = -EINVAL;

> > +	struct i40e_queue_region_info *info = &pf->queue_region;

> > +	uint16_t i, j, index, flowtype_set = 0;

> > +	uint16_t region_index, flowtype_index;

> > +

> > +	if (conf_ptr->region_id > I40E_PFQF_HREGION_MAX_INDEX) {

> > +		PMD_INIT_LOG(ERR, "the queue region max index is 7");

> > +		return ret;

> > +	}

> > +

> > +	if (conf_ptr->hw_flowtype >= I40E_FILTER_PCTYPE_MAX) {

> > +		PMD_INIT_LOG(ERR, "the hw_flowtype or PCTYPE max index

> is 63");

> > +		return ret;

> > +	}

> > +

> > +	if (info->queue_region_number) {

> > +		for (i = 0; i < info->queue_region_number; i++)

> > +			if (conf_ptr->region_id == info->region[i].region_id)

> > +				break;

> > +

> > +		if (i == info->queue_region_number) {

> > +			PMD_INIT_LOG(ERR, "that region id has not "

> > +				"been set before");

> > +			return ret;

> > +		}

> If info->queue_region_number==0, then i would be 0. So, the outer wrap if

> (info->queue_region_number) {} can be omit. The same at above func, I

> think this part of code can be refined.


Ok,change in v4

> > +		region_index = i;

> > +

> > +		for (i = 0; i < info->queue_region_number; i++) {

> > +			for (j = 0; j < info->region[i].flowtype_num; j++) {

> > +				if (conf_ptr->hw_flowtype ==

> > +					info->region[i].hw_flowtype[j]) {

> > +					flowtype_set = 1;

> > +					break;

> > +				}

> > +			}

> > +		}

> > +

> > +		if (flowtype_set) {

> > +			PMD_INIT_LOG(ERR, "that hw_flowtype "

> > +				"has been set before");

> > +			return ret;

> Is that right to return -EINVAL when it has been set. And it seems In this

> function, the error code is only -EINVAL, if so, why we define ret?

> > +		}

> > +		flowtype_index = info->region[region_index].flowtype_num;

> > +		info->region[region_index].hw_flowtype[flowtype_index] =

> > +						conf_ptr->hw_flowtype;

> > +		info->region[region_index].flowtype_num++;

> > +	} else  {

> > +		PMD_INIT_LOG(ERR, "there is no that region id been set

> before");

> 

> Again, please use PMD_DRV_LOG. Because the func is not called at

> initialization time.

> Please replace it in all the patch.

> > +		return ret;

> > +	}

> > +

> > +	index = conf_ptr->hw_flowtype >> 3;

> > +	pfqf_hregion = i40e_read_rx_ctl(hw, I40E_PFQF_HREGION(index));

> > +

> > +	if ((conf_ptr->hw_flowtype & 0x7) == 0) {

> > +		pfqf_hregion |= conf_ptr->region_id <<

> > +				I40E_PFQF_HREGION_REGION_0_SHIFT;

> > +		pfqf_hregion |= 1 <<

> I40E_PFQF_HREGION_OVERRIDE_ENA_0_SHIFT;

> > +	} else if ((conf_ptr->hw_flowtype & 0x7) == 1) {

> > +		pfqf_hregion |= conf_ptr->region_id <<

> > +				I40E_PFQF_HREGION_REGION_1_SHIFT;

> > +		pfqf_hregion |= 1 <<

> I40E_PFQF_HREGION_OVERRIDE_ENA_1_SHIFT;

> > +	} else if ((conf_ptr->hw_flowtype & 0x7) == 2) {

> > +		pfqf_hregion |= conf_ptr->region_id <<

> > +				I40E_PFQF_HREGION_REGION_2_SHIFT;

> > +		pfqf_hregion |= 1 <<

> I40E_PFQF_HREGION_OVERRIDE_ENA_2_SHIFT;

> > +	} else if ((conf_ptr->hw_flowtype & 0x7) == 3) {

> > +		pfqf_hregion |= conf_ptr->region_id <<

> > +				I40E_PFQF_HREGION_REGION_3_SHIFT;

> > +		pfqf_hregion |= 1 <<

> I40E_PFQF_HREGION_OVERRIDE_ENA_3_SHIFT;

> > +	} else if ((conf_ptr->hw_flowtype & 0x7) == 4) {

> > +		pfqf_hregion |= conf_ptr->region_id <<

> > +				I40E_PFQF_HREGION_REGION_4_SHIFT;

> > +		pfqf_hregion |= 1 <<

> I40E_PFQF_HREGION_OVERRIDE_ENA_4_SHIFT;

> > +	} else if ((conf_ptr->hw_flowtype & 0x7) == 5) {

> > +		pfqf_hregion |= conf_ptr->region_id <<

> > +				I40E_PFQF_HREGION_REGION_5_SHIFT;

> > +		pfqf_hregion |= 1 <<

> I40E_PFQF_HREGION_OVERRIDE_ENA_5_SHIFT;

> > +	} else if ((conf_ptr->hw_flowtype & 0x7) == 6) {

> > +		pfqf_hregion |= conf_ptr->region_id <<

> > +				I40E_PFQF_HREGION_REGION_6_SHIFT;

> > +		pfqf_hregion |= 1 <<

> I40E_PFQF_HREGION_OVERRIDE_ENA_6_SHIFT;

> What is the (conf_ptr->hw_flowtype & 0x7) == 1/2/3/4/5/6 meanings?

> Please avoid coding with integer number. Can not understand the

> relationship with flow type.

> And the hw_flowtype is API to users, you need to make it clear enough.

> 

> > +	} else {

> > +		pfqf_hregion |= conf_ptr->region_id <<

> > +				I40E_PFQF_HREGION_REGION_7_SHIFT;

> > +		pfqf_hregion |= 1 <<

> I40E_PFQF_HREGION_OVERRIDE_ENA_7_SHIFT;

> > +	}

> > +

> > +	i40e_write_rx_ctl(hw, I40E_PFQF_HREGION(index), pfqf_hregion);

> > +

> > +	return 0;

> > +}

> > +

> > +static int

> > +i40e_set_up_region(struct i40e_pf *pf,

> > +				struct rte_i40e_rss_region_conf *conf_ptr) {

> > +	struct i40e_queue_region_info *info = &pf->queue_region;

> > +	uint32_t ret = -EINVAL;

> > +	uint16_t i, j, region_index, up_set = 0;

> > +

> > +	if (conf_ptr->user_priority >

> I40E_REGION_USERPRIORITY_MAX_INDEX) {

> > +		PMD_INIT_LOG(ERR, "the queue region max index is 7");

> > +		return ret;

> > +	}

> > +

> > +	if (conf_ptr->region_id >= I40E_REGION_MAX_INDEX) {

> > +		PMD_INIT_LOG(ERR, "the region_id max index is 7");

> > +		return ret;

> > +	}

> > +

> > +	if (info->queue_region_number) {

> > +		for (i = 0; i < info->queue_region_number; i++)

> > +			if (conf_ptr->region_id == info->region[i].region_id)

> > +				break;

> > +

> > +		if (i == info->queue_region_number) {

> > +			PMD_INIT_LOG(ERR, "that region id "

> > +				"has not been set before");

> > +			return ret;

> > +		}

> > +		region_index = i;

> > +

> > +		for (i = 0; i < info->queue_region_number; i++) {

> > +			for (j = 0; j <

> > +				info->region[i].user_priority_num; j++) {

> > +				if (info->region[i].user_priority[j] ==

> > +					conf_ptr->user_priority) {

> > +					up_set = 1;

> > +					break;

> > +				}

> > +			}

> > +		}

> > +

> > +		if (up_set) {

> > +			PMD_INIT_LOG(ERR, "that user priority "

> > +				"has been set before");

> > +			return ret;

> > +		}

> > +

> > +		j = info->region[region_index].user_priority_num;

> > +		info->region[region_index].user_priority[j] =

> > +						conf_ptr->user_priority;

> > +		info->region[region_index].user_priority_num++;

> > +	} else {

> > +		PMD_INIT_LOG(ERR, "there is no that region id been set

> before");

> > +		return ret;

> > +	}

> > +

> > +	return 0;

> > +}

> > +

> > +static int

> > +i40e_queue_region_dcb_configure(struct i40e_hw *hw,

> > +				struct i40e_pf *pf)

> > +{

> > +	struct i40e_dcbx_config dcb_cfg_local;

> > +	struct i40e_dcbx_config *dcb_cfg;

> > +	struct i40e_queue_region_info *info = &pf->queue_region;

> > +	struct i40e_dcbx_config *old_cfg = &hw->local_dcbx_config;

> > +	uint32_t ret = -EINVAL;

> > +	uint16_t i, j, prio_index, region_index;

> > +	uint8_t tc_map, tc_bw, bw_lf;

> > +

> > +	if (!info->queue_region_number) {

> > +		PMD_INIT_LOG(ERR, "there is no that region id been set

> before");

> > +		return ret;

> > +	}

> > +

> > +	dcb_cfg = &dcb_cfg_local;

> > +	memset(dcb_cfg, 0, sizeof(struct i40e_dcbx_config));

> > +

> > +	/* assume each tc has the same bw */

> > +	tc_bw = I40E_MAX_PERCENT / info->queue_region_number;

> > +	for (i = 0; i < info->queue_region_number; i++)

> > +		dcb_cfg->etscfg.tcbwtable[i] = tc_bw;

> > +	/* to ensure the sum of tcbw is equal to 100 */

> > +	bw_lf = I40E_MAX_PERCENT %  info->queue_region_number;

> > +	for (i = 0; i < bw_lf; i++)

> > +		dcb_cfg->etscfg.tcbwtable[i]++;

> > +

> > +	/* assume each tc has the same Transmission Selection Algorithm */

> > +	for (i = 0; i < info->queue_region_number; i++)

> > +		dcb_cfg->etscfg.tsatable[i] = I40E_IEEE_TSA_ETS;

> > +

> > +	for (i = 0; i < info->queue_region_number; i++) {

> > +		for (j = 0; j < info->region[i].user_priority_num; j++) {

> > +			prio_index = info->region[i].user_priority[j];

> > +			region_index = info->region[i].region_id;

> > +			dcb_cfg->etscfg.prioritytable[prio_index] =

> > +						region_index;

> > +		}

> > +	}

> > +

> > +	/* FW needs one App to configure HW */

> > +	dcb_cfg->numapps = I40E_DEFAULT_DCB_APP_NUM;

> > +	dcb_cfg->app[0].selector = I40E_APP_SEL_ETHTYPE;

> > +	dcb_cfg->app[0].priority = I40E_DEFAULT_DCB_APP_PRIO;

> > +	dcb_cfg->app[0].protocolid = I40E_APP_PROTOID_FCOE;

> > +

> > +	tc_map = RTE_LEN2MASK(info->queue_region_number, uint8_t);

> > +

> > +	dcb_cfg->pfc.willing = 0;

> > +	dcb_cfg->pfc.pfccap = I40E_MAX_TRAFFIC_CLASS;

> > +	dcb_cfg->pfc.pfcenable = tc_map;

> > +

> > +	/* Copy the new config to the current config */

> > +	*old_cfg = *dcb_cfg;

> > +	old_cfg->etsrec = old_cfg->etscfg;

> > +	ret = i40e_set_dcb_config(hw);

> > +

> > +	if (ret) {

> > +		PMD_INIT_LOG(ERR, "Set queue region DCB Config failed,

> err"

> > +			" %s aq_err %s",

> > +			 i40e_stat_str(hw, ret),

> > +			 i40e_aq_str(hw, hw->aq.asq_last_status));

> > +		return ret;

> > +	}

> > +

> > +	return 0;

> > +}

> > +

> > +static int

> > +i40e_queue_region_dcb_configure_default(struct i40e_hw *hw)

> 

> Can i40e_dcb_init_configure do what this func did here?

> 

> > +{

> > +	uint16_t i;

> > +	uint32_t ret = -EINVAL;

> > +

> > +	memset(&hw->local_dcbx_config, 0,

> > +	sizeof(struct i40e_dcbx_config));

> > +	/* set dcb default configuration */

> > +	hw->local_dcbx_config.etscfg.willing = 0;

> > +	hw->local_dcbx_config.etscfg.maxtcs = 0;

> > +	hw->local_dcbx_config.etscfg.tcbwtable[0] = 100;

> > +	hw->local_dcbx_config.etscfg.tsatable[0] =

> > +			I40E_IEEE_TSA_ETS;

> > +	/* all UPs mapping to region 0 */

> > +	for (i = 0; i < I40E_MAX_USER_PRIORITY; i++)

> > +		hw->local_dcbx_config.etscfg.prioritytable[i] = 0;

> > +	hw->local_dcbx_config.etsrec =

> > +		hw->local_dcbx_config.etscfg;

> > +	hw->local_dcbx_config.pfc.willing = 0;

> > +	hw->local_dcbx_config.pfc.pfccap =

> > +			I40E_MAX_TRAFFIC_CLASS;

> > +	/* FW needs one App to configure HW */

> > +	hw->local_dcbx_config.numapps = 1;

> > +	hw->local_dcbx_config.app[0].selector =

> > +			I40E_APP_SEL_ETHTYPE;

> > +	hw->local_dcbx_config.app[0].priority = 3;

> > +	hw->local_dcbx_config.app[0].protocolid =

> > +			I40E_APP_PROTOID_FCOE;

> > +	ret = i40e_set_dcb_config(hw);

> > +	if (ret) {

> > +		PMD_INIT_LOG(ERR,

> > +		"default dcb config fails. err = %d, aq_err = %d.",

> > +			ret, hw->aq.asq_last_status);

> > +		return ret;

> > +	}

> > +

> > +	return 0;

> > +}

> > +

> > +int

> > +i40e_flush_region_all_conf(struct i40e_hw *hw, struct i40e_pf *pf,

> > +				uint16_t on)

> > +{

> > +	uint16_t i;

> > +	struct i40e_queue_region_info *info = &pf->queue_region;

> > +

> > +	if (on) {

> > +		i40e_vsi_update_queue_region_mapping(hw, pf);

> > +		i40e_queue_region_dcb_configure(hw, pf);

> Don't check all the return code?

> 

> > +		return 0;

> > +	}

> > +

> > +	info->queue_region_number = 1;

> > +	info->region[0].queue_num = 64;

> > +	info->region[0].queue_start_index = 0;

> > +

> > +	i40e_vsi_update_queue_region_mapping(hw, pf);

> > +	i40e_queue_region_dcb_configure_default(hw);

> > +

> > +	for (i = 0; i < I40E_PFQF_HREGION_MAX_INDEX; i++)

> > +		i40e_write_rx_ctl(hw, I40E_PFQF_HREGION(i), 0);

> > +	memset(info, 0, sizeof(struct i40e_queue_region_info));

> > +

> > +	return 0;

> > +}

> > +

> > +int rte_pmd_i40e_queue_region_conf(uint8_t port,

> > +			struct rte_i40e_rss_region_conf *conf_ptr) {

> > +	struct rte_eth_dev *dev = &rte_eth_devices[port];

> > +	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data-

> >dev_private);

> > +	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data-

> >dev_private);

> > +	enum rte_pmd_i40e_queue_region_op op_type = conf_ptr->op;

> > +	uint32_t ret;

> > +

> > +	if (!is_i40e_supported(dev))

> > +		return -ENOTSUP;

> > +

> > +	switch (op_type) {

> > +	case RTE_PMD_I40E_QUEUE_REGION_SET:

> > +		ret = i40e_set_queue_region(pf, conf_ptr);

> > +		break;

> > +	case RTE_PMD_I40E_REGION_FLOWTYPE_PF_SET:

> > +		ret = i40e_set_region_flowtype_pf(hw, pf, conf_ptr);

> > +		break;

> > +	case RTE_PMD_I40E_REGION_FLOWTYPE_VF_SET:

> > +		ret = -EINVAL;

> > +		break;

> If it is not supported, why to define this op?

> 

> > +	case RTE_PMD_I40E_UP_REGION_SET:

> > +		ret = i40e_set_up_region(pf, conf_ptr);

> > +		break;

> > +	case RTE_PMD_I40E_REGION_ALL_FLUSH_ON:

> > +		ret = i40e_flush_region_all_conf(hw, pf, 1);

> > +		break;

> What is the FLUSH on meaning? Commit all configure to HW?

> If so, what makes me confused that:

> RTE_PMD_I40E_QUEUE_REGION_SET only touch the software stored in

> driver.

> But RTE_PMD_I40E_REGION_FLOWTYPE_PF_SET set the

> I40E_PFQF_HREGION register.

> Those behaviors are not consist.

> 

> And FLUSH OFF means delete all?

> 

> You need to add more comments.

> 

> > +	case RTE_PMD_I40E_REGION_ALL_FLUSH_OFF:

> > +		ret = i40e_flush_region_all_conf(hw, pf, 0);

> > +		break;

> > +

> > +	default:

> > +		PMD_DRV_LOG(WARNING, "op type (%d) not supported",

> > +			    op_type);

> > +		ret = -EINVAL;

> > +		break;

> > +	}

> > +

> > +	I40E_WRITE_FLUSH(hw);

> > +

> > +	return ret;

> > +}

> > diff --git a/drivers/net/i40e/rte_pmd_i40e.h

> > b/drivers/net/i40e/rte_pmd_i40e.h index 155b7e8..a1329f4 100644

> > --- a/drivers/net/i40e/rte_pmd_i40e.h

> > +++ b/drivers/net/i40e/rte_pmd_i40e.h

> > @@ -91,6 +91,20 @@ enum rte_pmd_i40e_package_info {

> >  	RTE_PMD_I40E_PKG_INFO_MAX = 0xFFFFFFFF  };

> >

> > +/**

> > + *  Option types of queue region.

> > + */

> > +enum rte_pmd_i40e_queue_region_op {

> > +	RTE_PMD_I40E_REGION_UNDEFINED = 0,

> > +	RTE_PMD_I40E_QUEUE_REGION_SET,      /**< add queue region

> set*/

> > +	RTE_PMD_I40E_REGION_FLOWTYPE_PF_SET,   /**< add pf region

> pctype set */

> > +	RTE_PMD_I40E_REGION_FLOWTYPE_VF_SET,   /**< add vf region

> pctype set */

> > +	RTE_PMD_I40E_UP_REGION_SET,   /**< add queue region pctype

> set */

> > +	RTE_PMD_I40E_REGION_ALL_FLUSH_ON,   /**< flush on all

> configuration */

> > +	RTE_PMD_I40E_REGION_ALL_FLUSH_OFF,   /**< flush off all

> configuration */

> > +	RTE_PMD_I40E_QUEUE_REGION_OP_MAX

> > +};

> > +

> >  #define RTE_PMD_I40E_DDP_NAME_SIZE 32

> >

> >  /**

> > @@ -146,6 +160,18 @@ struct rte_pmd_i40e_ptype_mapping {  };

> >

> >  /**

> > + * Queue region information get from CLI.

> > + */

> > +struct rte_i40e_rss_region_conf {

> > +	uint8_t region_id;

> > +	uint8_t hw_flowtype;

> > +	uint8_t queue_start_index;

> > +	uint8_t queue_num;

> > +	uint8_t user_priority;

> > +	enum rte_pmd_i40e_queue_region_op  op; };

> This is API struct, you need to comment them in detail

> 

> Thanks

> Jingjing
  
Zhao1, Wei Sept. 25, 2017, 7:40 a.m. UTC | #13
Hi, Ferruh



> -----Original Message-----


> From: Yigit, Ferruh


> Sent: Wednesday, September 20, 2017 6:36 PM


> To: Zhao1, Wei <wei.zhao1@intel.com>; dev@dpdk.org


> Subject: Re: [dpdk-dev] [PATCH v3 1/2] net/i40e: queue region set and flush


>


> On 9/15/2017 4:13 AM, Wei Zhao wrote:


> > This feature enable queue regions configuration for RSS in PF/VF, so


> > that different traffic classes or different packet classification


> > types can be separated to different queues in different queue


> > regions.This patch can set queue region range, it include queue number


> > in a region and the index of first queue.


>


> Is following correct:


> So instead of distributing packets to the multiple queues, this will distribute


> packets into queue reqions which may consists of multiple queues.


>


> If so, is there a way to control how packets distributed within same queue


> region to multiple queues?


>




distributed within same queue region is based on a rss algorithm, it is implemented by NIC self, software can not control it.





> And is this feature only supported with RSS? Can it be part of RSS


> configuration instead of PMD specific API?


>




Yes, only valid after rss enable.May be in the feature, in rte_flow mode, we can combine them together.



> > This patch enable mapping between different priorities (UP) and


>


> User priorities (UP)




oK, change in v4



>


> > different traffic classes.It also enable mapping between a region


> > index and a sepcific flowtype(PCTYPE).It also provide the solution of


> > flush all configuration about queue region the above described.


> >


> > Signed-off-by: Wei Zhao <wei.zhao1@intel.com<mailto:wei.zhao1@intel.com>>


> > ---


> >  drivers/net/i40e/i40e_ethdev.c            |  19 +-


> >  drivers/net/i40e/i40e_ethdev.h            |  30 ++


> >  drivers/net/i40e/rte_pmd_i40e.c           | 482


> ++++++++++++++++++++++++++++++


> >  drivers/net/i40e/rte_pmd_i40e.h           |  38 +++


> >  drivers/net/i40e/rte_pmd_i40e_version.map |   1 +


> >  5 files changed, 566 insertions(+), 4 deletions(-)


> >


>


> <...>


>


> > +static int


> > +i40e_vsi_update_queue_region_mapping(struct i40e_hw *hw,


> > +                                            struct i40e_pf *pf)


> > +{


> > +      uint16_t i;


> > +      struct i40e_vsi *vsi = pf->main_vsi;


> > +      uint16_t queue_offset, bsf, tc_index;


> > +      struct i40e_vsi_context ctxt;


> > +      struct i40e_aqc_vsi_properties_data *vsi_info;


> > +      struct i40e_queue_region_info *region_info =


> > +                                                      &pf->queue_region;


> > +      uint32_t ret = -EINVAL;


> > +


> > +      if (!region_info->queue_region_number) {


> > +                      PMD_INIT_LOG(ERR, "there is no that region id been set


> before");


>


> Can you please re-check the log message.




oK, change in v4



>


> > +                      return ret;


> > +      }


> > +


> > +      memset(&ctxt, 0, sizeof(struct i40e_vsi_context));


> > +


> > +      /* Update Queue Pairs Mapping for currently enabled UPs */


> > +      ctxt.seid = vsi->seid;


> > +      ctxt.pf_num = hw->pf_id;


> > +      ctxt.vf_num = 0;


> > +      ctxt.uplink_seid = vsi->uplink_seid;


> > +      ctxt.info = vsi->info;


> > +      vsi_info = &ctxt.info;


> > +


> > +      memset(vsi_info->tc_mapping, 0, sizeof(uint16_t) * 8);


> > +      memset(vsi_info->queue_mapping, 0, sizeof(uint16_t) * 16);


> > +


> > +      /**


> > +      * Configure queue region and queue mapping parameters,


> > +      * for enabled queue region, allocate queues to this region.


> > +      */


> > +


> > +      for (i = 0; i < region_info->queue_region_number; i++) {


> > +                      tc_index = region_info->region[i].region_id;


> > +                      bsf = rte_bsf32(region_info->region[i].queue_num);


> > +                      queue_offset = region_info->region[i].queue_start_index;


> > +                      vsi_info->tc_mapping[tc_index] = rte_cpu_to_le_16(


> > +                                      (queue_offset <<


> I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) |


> > +                                                      (bsf <<


> I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT));


> > +      }


> > +


> > +      /* Associate queue number with VSI, Keep vsi->nb_qps unchanged


> */


> > +      if (vsi->type == I40E_VSI_SRIOV) {


> > +                      vsi_info->mapping_flags |=


> > +


>             rte_cpu_to_le_16(I40E_AQ_VSI_QUE_MAP_NONCONTIG);


> > +                      for (i = 0; i < vsi->nb_qps; i++)


> > +                                      vsi_info->queue_mapping[i] =


> > +                                                      rte_cpu_to_le_16(vsi->base_queue + i);


> > +      } else {


> > +                      vsi_info->mapping_flags |=


> > +


>             rte_cpu_to_le_16(I40E_AQ_VSI_QUE_MAP_CONTIG);


> > +                      vsi_info->queue_mapping[0] = rte_cpu_to_le_16(vsi-


> >base_queue);


> > +      }


> > +      vsi_info->valid_sections |=


> > +


>             rte_cpu_to_le_16(I40E_AQ_VSI_PROP_QUEUE_MAP_VALID);


> > +


> > +      /* Update the VSI after updating the VSI queue-mapping


> information */


> > +      ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);


> > +      if (ret) {


> > +                      PMD_INIT_LOG(ERR, "Failed to configure queue region "


> > +                                      "mapping = %d ", hw->aq.asq_last_status);


>


> Please don't split log message.




oK, change in v4





>


> > +                      return ret;


> > +      }


> > +      /* update the local VSI info with updated queue map */


> > +      (void)rte_memcpy(&vsi->info.tc_mapping, &ctxt.info.tc_mapping,


> > +                                                                      sizeof(vsi->info.tc_mapping));


> > +      (void)rte_memcpy(&vsi->info.queue_mapping,


> > +                                      &ctxt.info.queue_mapping,


> > +                      sizeof(vsi->info.queue_mapping));


>


> Please keep line allignment same with above line.




oK, change in v4, but can not the same.

(void)rte_memcpy(&vsi->info.queue_mapping, &ctxt.info.queue_mapping,

                                                sizeof(vsi->info.queue_mapping));

will over 80 maxnumber.



>


> > +      vsi->info.mapping_flags = ctxt.info.mapping_flags;


> > +      vsi->info.valid_sections = 0;


> > +


> > +      return 0;


> > +}


> > +


> > +


> > +static int


> > +i40e_set_queue_region(struct i40e_pf *pf,


> > +                                                      struct rte_i40e_rss_region_conf *conf_ptr) {


> > +      uint16_t tc_size_tb[7] = {1, 2, 4, 8, 16, 32, 64};


> > +      uint16_t i;


> > +      struct i40e_vsi *main_vsi = pf->main_vsi;


> > +      struct i40e_queue_region_info *info = &pf->queue_region;


> > +      uint32_t ret = -EINVAL;


> > +


> > +      for (i = 0; i < I40E_REGION_MAX_INDEX; i++)


> > +                      if (conf_ptr->queue_num == tc_size_tb[i])


> > +                                      break;


> > +


> > +      if (i == I40E_REGION_MAX_INDEX) {


> > +                      printf("The region sizes should be any of the following "


> > +                      "values: 1, 2, 4, 8, 16, 32, 64 as long as the "


> > +                      "total number of queues do not exceed the VSI allocation");


> > +                      return ret;


> > +      }


> > +


> > +      if (conf_ptr->region_id >= I40E_REGION_MAX_INDEX) {


> > +                      PMD_INIT_LOG(ERR, "the queue region max index is 7");


> > +                      return ret;


> > +      }


> > +


> > +      if ((conf_ptr->queue_start_index + conf_ptr->queue_num)


> > +                                                                      > main_vsi->nb_used_qps) {


> > +                      PMD_INIT_LOG(ERR, "the queue index exceeds the VSI


> range");


> > +                      return ret;


> > +      }


> > +


> > +      if (info->queue_region_number) {


> > +                      for (i = 0; i < info->queue_region_number; i++)


> > +                                      if (conf_ptr->region_id == info->region[i].region_id)


> > +                                                      break;


> > +


> > +                      if ((i == info->queue_region_number) &&


> > +                                      (i <= I40E_REGION_MAX_INDEX)) {


>


>


> Please use one more level indentaion here, and pharantesis looks extra can


> you please double check?




You mean,you want the following moede ?

                                if (i == info->queue_region_number) {

                                                if(i <= I40E_REGION_MAX_INDEX) {

                                                                ...........................

                                                }

                                }

>


> > +                                      info->region[i].region_id = conf_ptr->region_id;


> > +                                      info->region[i].queue_num = conf_ptr->queue_num;


> > +                                      info->region[i].queue_start_index =


> > +                                                      conf_ptr->queue_start_index;


> > +                                      info->queue_region_number++;


> > +                      } else {


> > +                                      PMD_INIT_LOG(ERR, "queue region number "


> > +                                                      "exceeds maxnum 8 or the "


> > +                                                      "queue region id has "


> > +                                                      "been set before");


>


> please don't split log message.




oK, change in v4



>


> > +                                      return ret;


> > +                      }


> > +      } else {


> > +                      info->region[0].region_id = conf_ptr->region_id;


> > +                      info->region[0].queue_num = conf_ptr->queue_num;


> > +                      info->region[0].queue_start_index =


> > +                                      conf_ptr->queue_start_index;


> > +                      info->queue_region_number++;


> > +      }


> > +


> > +      return 0;


> > +}


> > +


> > +static int


> > +i40e_set_region_flowtype_pf(struct i40e_hw *hw,


> > +                      struct i40e_pf *pf, struct rte_i40e_rss_region_conf


> *conf_ptr)


>


> What do you think starting all functions, even they are static, with


> "i40e_queue_region_" prefix?




                                ret = i40e_set_queue_region(pf, conf_ptr);

                                break;

                case RTE_PMD_I40E_REGION_FLOWTYPE_PF_SET:

                                ret = i40e_set_region_flowtype_pf(hw, pf, conf_ptr);

                                break;

                case RTE_PMD_I40E_REGION_FLOWTYPE_VF_SET:

                                ret = -EINVAL;

                                break;

                case RTE_PMD_I40E_UP_REGION_SET:

                                ret = i40e_set_up_region(pf, conf_ptr);



I have use the format of i40e_set_  as prefix?

Because many set is not related directly to queue region.



>


> > +{


> > +      uint32_t pfqf_hregion;


> > +      uint32_t ret = -EINVAL;


> > +      struct i40e_queue_region_info *info = &pf->queue_region;


> > +      uint16_t i, j, index, flowtype_set = 0;


> > +      uint16_t region_index, flowtype_index;


> > +


> > +      if (conf_ptr->region_id > I40E_PFQF_HREGION_MAX_INDEX) {


> > +                      PMD_INIT_LOG(ERR, "the queue region max index is 7");


> > +                      return ret;


> > +      }


> > +


> > +      if (conf_ptr->hw_flowtype >= I40E_FILTER_PCTYPE_MAX) {


> > +                      PMD_INIT_LOG(ERR, "the hw_flowtype or PCTYPE max index


> is 63");


> > +                      return ret;


> > +      }


> > +


> > +      if (info->queue_region_number) {


> > +                      for (i = 0; i < info->queue_region_number; i++)


> > +                                      if (conf_ptr->region_id == info->region[i].region_id)


> > +                                                      break;


> > +


> > +                      if (i == info->queue_region_number) {


> > +                                      PMD_INIT_LOG(ERR, "that region id has not "


> > +                                                      "been set before");


> > +                                      return ret;


> > +                      }


> > +                      region_index = i;


> > +


> > +                      for (i = 0; i < info->queue_region_number; i++) {


> > +                                      for (j = 0; j < info->region[i].flowtype_num; j++) {


> > +                                                      if (conf_ptr->hw_flowtype ==


> > +                                                                      info->region[i].hw_flowtype[j]) {


> > +                                                                      flowtype_set = 1;


> > +                                                                      break;


> > +                                                      }


> > +                                      }


> > +                      }


> > +


> > +                      if (flowtype_set) {


> > +                                      PMD_INIT_LOG(ERR, "that hw_flowtype "


> > +                                                      "has been set before");


>


> Please don't split log messages.




Ok.





>


> > +                                      return ret;


> > +                      }


> > +                      flowtype_index = info->region[region_index].flowtype_num;


> > +                      info->region[region_index].hw_flowtype[flowtype_index] =


> > +                                                                                      conf_ptr->hw_flowtype;


> > +                      info->region[region_index].flowtype_num++;


> > +      } else  {


> > +                      PMD_INIT_LOG(ERR, "there is no that region id been set


> before");


> > +                      return ret;


> > +      }


> > +


> > +      index = conf_ptr->hw_flowtype >> 3;


> > +      pfqf_hregion = i40e_read_rx_ctl(hw, I40E_PFQF_HREGION(index));


> > +


> > +      if ((conf_ptr->hw_flowtype & 0x7) == 0) {


> > +                      pfqf_hregion |= conf_ptr->region_id <<


> > +                                                      I40E_PFQF_HREGION_REGION_0_SHIFT;


> > +                      pfqf_hregion |= 1 <<


> I40E_PFQF_HREGION_OVERRIDE_ENA_0_SHIFT;


> > +      } else if ((conf_ptr->hw_flowtype & 0x7) == 1) {


> > +                      pfqf_hregion |= conf_ptr->region_id <<


> > +                                                      I40E_PFQF_HREGION_REGION_1_SHIFT;


> > +                      pfqf_hregion |= 1 <<


> I40E_PFQF_HREGION_OVERRIDE_ENA_1_SHIFT;


> > +      } else if ((conf_ptr->hw_flowtype & 0x7) == 2) {


> > +                      pfqf_hregion |= conf_ptr->region_id <<


> > +                                                      I40E_PFQF_HREGION_REGION_2_SHIFT;


> > +                      pfqf_hregion |= 1 <<


> I40E_PFQF_HREGION_OVERRIDE_ENA_2_SHIFT;


> > +      } else if ((conf_ptr->hw_flowtype & 0x7) == 3) {


> > +                      pfqf_hregion |= conf_ptr->region_id <<


> > +                                                      I40E_PFQF_HREGION_REGION_3_SHIFT;


> > +                      pfqf_hregion |= 1 <<


> I40E_PFQF_HREGION_OVERRIDE_ENA_3_SHIFT;


> > +      } else if ((conf_ptr->hw_flowtype & 0x7) == 4) {


> > +                      pfqf_hregion |= conf_ptr->region_id <<


> > +                                                      I40E_PFQF_HREGION_REGION_4_SHIFT;


> > +                      pfqf_hregion |= 1 <<


> I40E_PFQF_HREGION_OVERRIDE_ENA_4_SHIFT;


> > +      } else if ((conf_ptr->hw_flowtype & 0x7) == 5) {


> > +                      pfqf_hregion |= conf_ptr->region_id <<


> > +                                                      I40E_PFQF_HREGION_REGION_5_SHIFT;


> > +                      pfqf_hregion |= 1 <<


> I40E_PFQF_HREGION_OVERRIDE_ENA_5_SHIFT;


> > +      } else if ((conf_ptr->hw_flowtype & 0x7) == 6) {


> > +                      pfqf_hregion |= conf_ptr->region_id <<


> > +                                                      I40E_PFQF_HREGION_REGION_6_SHIFT;


> > +                      pfqf_hregion |= 1 <<


> I40E_PFQF_HREGION_OVERRIDE_ENA_6_SHIFT;


> > +      } else {


> > +                      pfqf_hregion |= conf_ptr->region_id <<


> > +                                                      I40E_PFQF_HREGION_REGION_7_SHIFT;


> > +                      pfqf_hregion |= 1 <<


> I40E_PFQF_HREGION_OVERRIDE_ENA_7_SHIFT;


> > +      }


> > +


> > +      i40e_write_rx_ctl(hw, I40E_PFQF_HREGION(index), pfqf_hregion);


> > +


> > +      return 0;


> > +}


> > +


> > +static int


> > +i40e_set_up_region(struct i40e_pf *pf,


> > +                                                      struct rte_i40e_rss_region_conf *conf_ptr)


>


> The API name looks like "setup" but that up part is user_priority, can you


> please long form against "up"?




Ok, I will change to “user_priority” instead of “up”



>


> > +{


> > +      struct i40e_queue_region_info *info = &pf->queue_region;


> > +      uint32_t ret = -EINVAL;


> > +      uint16_t i, j, region_index, up_set = 0;


>


> Same abrevation comment for "upset", please use long form.


>


> > +


> > +      if (conf_ptr->user_priority >


> I40E_REGION_USERPRIORITY_MAX_INDEX) {


> > +                      PMD_INIT_LOG(ERR, "the queue region max index is 7");


> > +                      return ret;


> > +      }


> > +


> > +      if (conf_ptr->region_id >= I40E_REGION_MAX_INDEX) {


> > +                      PMD_INIT_LOG(ERR, "the region_id max index is 7");


> > +                      return ret;


> > +      }


> > +


> > +      if (info->queue_region_number) {


> > +                      for (i = 0; i < info->queue_region_number; i++)


> > +                                      if (conf_ptr->region_id == info->region[i].region_id)


> > +                                                      break;


> > +


> > +                      if (i == info->queue_region_number) {


> > +                                      PMD_INIT_LOG(ERR, "that region id "


> > +                                                      "has not been set before");


> > +                                      return ret;


> > +                      }


> > +                      region_index = i;


> > +


> > +                      for (i = 0; i < info->queue_region_number; i++) {


> > +                                      for (j = 0; j <


> > +                                                      info->region[i].user_priority_num; j++) {


>


> Please break line after ";"




Ok.





>


> > +                                                      if (info->region[i].user_priority[j] ==


> > +                                                                      conf_ptr->user_priority) {


> > +                                                                      up_set = 1;


> > +                                                                      break;


> > +                                                      }


> > +                                      }


> > +                      }


> > +


> > +                      if (up_set) {


> > +                                      PMD_INIT_LOG(ERR, "that user priority "


> > +                                                      "has been set before");


> > +                                      return ret;


> > +                      }


> > +


> > +                      j = info->region[region_index].user_priority_num;


> > +                      info->region[region_index].user_priority[j] =


> > +                                                                                      conf_ptr->user_priority;


> > +                      info->region[region_index].user_priority_num++;


> > +      } else {


> > +                      PMD_INIT_LOG(ERR, "there is no that region id been set


> before");


> > +                      return ret;


> > +      }


> > +


> > +      return 0;


> > +}


> > +


> > +static int


> > +i40e_queue_region_dcb_configure(struct i40e_hw *hw,


> > +                                                      struct i40e_pf *pf)


> > +{


> > +      struct i40e_dcbx_config dcb_cfg_local;


> > +      struct i40e_dcbx_config *dcb_cfg;


> > +      struct i40e_queue_region_info *info = &pf->queue_region;


> > +      struct i40e_dcbx_config *old_cfg = &hw->local_dcbx_config;


> > +      uint32_t ret = -EINVAL;


>


> Please use signed variable, there are more above.




Ok

>


> > +      uint16_t i, j, prio_index, region_index;


> > +      uint8_t tc_map, tc_bw, bw_lf;


> > +


> > +      if (!info->queue_region_number) {


> > +                      PMD_INIT_LOG(ERR, "there is no that region id been set


> before");


> > +                      return ret;


> > +      }


> > +


> > +      dcb_cfg = &dcb_cfg_local;


> > +      memset(dcb_cfg, 0, sizeof(struct i40e_dcbx_config));


> > +


> > +      /* assume each tc has the same bw */


> > +      tc_bw = I40E_MAX_PERCENT / info->queue_region_number;


> > +      for (i = 0; i < info->queue_region_number; i++)


> > +                      dcb_cfg->etscfg.tcbwtable[i] = tc_bw;


> > +      /* to ensure the sum of tcbw is equal to 100 */


> > +      bw_lf = I40E_MAX_PERCENT %  info->queue_region_number;


> > +      for (i = 0; i < bw_lf; i++)


> > +                      dcb_cfg->etscfg.tcbwtable[i]++;


> > +


> > +      /* assume each tc has the same Transmission Selection Algorithm */


> > +      for (i = 0; i < info->queue_region_number; i++)


> > +                      dcb_cfg->etscfg.tsatable[i] = I40E_IEEE_TSA_ETS;


> > +


> > +      for (i = 0; i < info->queue_region_number; i++) {


> > +                      for (j = 0; j < info->region[i].user_priority_num; j++) {


> > +                                      prio_index = info->region[i].user_priority[j];


> > +                                      region_index = info->region[i].region_id;


> > +                                      dcb_cfg->etscfg.prioritytable[prio_index] =


> > +                                                                                      region_index;


> > +                      }


> > +      }


> > +


> > +      /* FW needs one App to configure HW */


> > +      dcb_cfg->numapps = I40E_DEFAULT_DCB_APP_NUM;


> > +      dcb_cfg->app[0].selector = I40E_APP_SEL_ETHTYPE;


> > +      dcb_cfg->app[0].priority = I40E_DEFAULT_DCB_APP_PRIO;


> > +      dcb_cfg->app[0].protocolid = I40E_APP_PROTOID_FCOE;


> > +


> > +      tc_map = RTE_LEN2MASK(info->queue_region_number, uint8_t);


> > +


> > +      dcb_cfg->pfc.willing = 0;


> > +      dcb_cfg->pfc.pfccap = I40E_MAX_TRAFFIC_CLASS;


> > +      dcb_cfg->pfc.pfcenable = tc_map;


> > +


> > +      /* Copy the new config to the current config */


> > +      *old_cfg = *dcb_cfg;


> > +      old_cfg->etsrec = old_cfg->etscfg;


> > +      ret = i40e_set_dcb_config(hw);


> > +


> > +      if (ret) {


> > +                      PMD_INIT_LOG(ERR, "Set queue region DCB Config failed,


> err"


> > +                                      " %s aq_err %s",


>


> Please don't split the log message.




Ok



>


> > +                                      i40e_stat_str(hw, ret),


> > +                                      i40e_aq_str(hw, hw->aq.asq_last_status));


> > +                      return ret;


> > +      }


> > +


> > +      return 0;


> > +}


> > +


> > +static int


> > +i40e_queue_region_dcb_configure_default(struct i40e_hw *hw) {


> > +      uint16_t i;


> > +      uint32_t ret = -EINVAL;


>


> Please use signed variable here.




Ok



>


> > +


> > +      memset(&hw->local_dcbx_config, 0,


> > +      sizeof(struct i40e_dcbx_config));


>


> Wrong indentation.


>


> > +      /* set dcb default configuration */


> > +      hw->local_dcbx_config.etscfg.willing = 0;


> > +      hw->local_dcbx_config.etscfg.maxtcs = 0;


> > +      hw->local_dcbx_config.etscfg.tcbwtable[0] = 100;


> > +      hw->local_dcbx_config.etscfg.tsatable[0] =


> > +                                      I40E_IEEE_TSA_ETS;


> > +      /* all UPs mapping to region 0 */


> > +      for (i = 0; i < I40E_MAX_USER_PRIORITY; i++)


> > +                      hw->local_dcbx_config.etscfg.prioritytable[i] = 0;


> > +      hw->local_dcbx_config.etsrec =


> > +                      hw->local_dcbx_config.etscfg;


> > +      hw->local_dcbx_config.pfc.willing = 0;


> > +      hw->local_dcbx_config.pfc.pfccap =


> > +                                      I40E_MAX_TRAFFIC_CLASS;


> > +      /* FW needs one App to configure HW */


> > +      hw->local_dcbx_config.numapps = 1;


> > +      hw->local_dcbx_config.app[0].selector =


> > +                                      I40E_APP_SEL_ETHTYPE;


> > +      hw->local_dcbx_config.app[0].priority = 3;


> > +      hw->local_dcbx_config.app[0].protocolid =


> > +                                      I40E_APP_PROTOID_FCOE;


> > +      ret = i40e_set_dcb_config(hw);


> > +      if (ret) {


> > +                      PMD_INIT_LOG(ERR,


> > +                      "default dcb config fails. err = %d, aq_err = %d.",


> > +                                      ret, hw->aq.asq_last_status);


> > +                      return ret;


> > +      }


> > +


> > +      return 0;


> > +}


> > +


> > +int


> > +i40e_flush_region_all_conf(struct i40e_hw *hw, struct i40e_pf *pf,


> > +                                                      uint16_t on)


> > +{


> > +      uint16_t i;


> > +      struct i40e_queue_region_info *info = &pf->queue_region;


> > +


> > +      if (on) {


> > +                      i40e_vsi_update_queue_region_mapping(hw, pf);


> > +                      i40e_queue_region_dcb_configure(hw, pf);


> > +                      return 0;


> > +      }


> > +


> > +      info->queue_region_number = 1;


> > +      info->region[0].queue_num = 64;


> > +      info->region[0].queue_start_index = 0;


> > +


> > +      i40e_vsi_update_queue_region_mapping(hw, pf);


> > +      i40e_queue_region_dcb_configure_default(hw);


> > +


> > +      for (i = 0; i < I40E_PFQF_HREGION_MAX_INDEX; i++)


> > +                      i40e_write_rx_ctl(hw, I40E_PFQF_HREGION(i), 0);


> > +


> > +      memset(info, 0, sizeof(struct i40e_queue_region_info));


> > +


> > +      return 0;


> > +}


> > +


> > +int rte_pmd_i40e_queue_region_conf(uint8_t port,


> > +                                      struct rte_i40e_rss_region_conf *conf_ptr) {


> > +      struct rte_eth_dev *dev = &rte_eth_devices[port];


>


> you need to verify port_id, since this is public API now. Please check other


> APIs in this file.


>


> > +      struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data-


> >dev_private);


> > +      struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data-


> >dev_private);


> > +      enum rte_pmd_i40e_queue_region_op op_type = conf_ptr->op;


> > +      uint32_t ret;


>


> This should be signed variable, since you are using it for return and assigning


> negative values.


>


> > +


> > +      if (!is_i40e_supported(dev))


> > +                      return -ENOTSUP;


> > +


> > +      switch (op_type) {


> > +      case RTE_PMD_I40E_QUEUE_REGION_SET:


> > +                      ret = i40e_set_queue_region(pf, conf_ptr);


> > +                      break;


>


> Does it make sense to add another type to get the current queue region


> config?


>


> > +      case RTE_PMD_I40E_REGION_FLOWTYPE_PF_SET:


> > +                      ret = i40e_set_region_flowtype_pf(hw, pf, conf_ptr);


> > +                      break;


> > +      case RTE_PMD_I40E_REGION_FLOWTYPE_VF_SET:


> > +                      ret = -EINVAL;


>


> Will this be implemented later, or not a valid case at all?


>


> > +                      break;


> > +      case RTE_PMD_I40E_UP_REGION_SET:> +                          ret =


> i40e_set_up_region(pf, conf_ptr);


> > +                      break;


> > +      case RTE_PMD_I40E_REGION_ALL_FLUSH_ON:


> > +                      ret = i40e_flush_region_all_conf(hw, pf, 1);


> > +                      break;


> > +      case RTE_PMD_I40E_REGION_ALL_FLUSH_OFF:


> > +                      ret = i40e_flush_region_all_conf(hw, pf, 0);


>


> Can you please describe what flush_on and flush_off are, you can comment


> to code if also.


>


> > +                      break;


> > +


> > +      default:


> > +                      PMD_DRV_LOG(WARNING, "op type (%d) not supported",


> > +                                          op_type);


>


> Line break not required.




Ok, delete in v4

>


> > +                      ret = -EINVAL;


> > +                      break;


> > +      }


> > +


> > +      I40E_WRITE_FLUSH(hw);


> > +


> > +      return ret;


> > +}


> > diff --git a/drivers/net/i40e/rte_pmd_i40e.h


> > b/drivers/net/i40e/rte_pmd_i40e.h index 155b7e8..a1329f4 100644


> > --- a/drivers/net/i40e/rte_pmd_i40e.h


> > +++ b/drivers/net/i40e/rte_pmd_i40e.h


> > @@ -91,6 +91,20 @@ enum rte_pmd_i40e_package_info {


> >          RTE_PMD_I40E_PKG_INFO_MAX = 0xFFFFFFFF  };


> >


> > +/**


> > + *  Option types of queue region.


> > + */


> > +enum rte_pmd_i40e_queue_region_op {


> > +      RTE_PMD_I40E_REGION_UNDEFINED = 0,


>


> No need to set to zero, it is default value.






Ok



>


> > +      RTE_PMD_I40E_QUEUE_REGION_SET,      /**< add queue region


> set*/


>


> I would suggest using same namespace for the enum ites, there are


> "RTE_PMD_I40E_REGION" and "RTE_PMD_I40E_QUEUE_REGION".


> "RTE_PMD_I40E_QUEUE_REGION" looks better. Or if it makes sense,


> perhaps "RTE_PMD_I40E_RSS_QUEUE_REGION"? But please stick with one.


>


> > +      RTE_PMD_I40E_REGION_FLOWTYPE_PF_SET,   /**< add pf region


> pctype set */


> > +      RTE_PMD_I40E_REGION_FLOWTYPE_VF_SET,   /**< add vf region


> pctype set */


> > +      RTE_PMD_I40E_UP_REGION_SET,   /**< add queue region pctype


> set */


>


> is this comment correct, this type should be for user priority




Ok, fix in v4



>


> > +      RTE_PMD_I40E_REGION_ALL_FLUSH_ON,   /**< flush on all


> configuration */


> > +      RTE_PMD_I40E_REGION_ALL_FLUSH_OFF,   /**< flush off all


> configuration */


> > +      RTE_PMD_I40E_QUEUE_REGION_OP_MAX


> > +};


> > +


> >  #define RTE_PMD_I40E_DDP_NAME_SIZE 32


> >


> >  /**


> > @@ -146,6 +160,18 @@ struct rte_pmd_i40e_ptype_mapping {  };


> >


> >  /**


> > + * Queue region information get from CLI.


>


> It doesn't need to be from CLI, can drop that part


>


> > + */


> > +struct rte_i40e_rss_region_conf {


>


> This is now public struct, can you please comment items.




Ok. Add in v4



>


> > +      uint8_t region_id;


> > +      uint8_t hw_flowtype;


> > +      uint8_t queue_start_index;


> > +      uint8_t queue_num;


> > +      uint8_t user_priority;


> > +      enum rte_pmd_i40e_queue_region_op  op;


>


> Extra space before "op"




Maybe, I SHOULD add uint8_t raw[3] before “op”





>


> > +};


> > +


> > +/**


> >   * Notify VF when PF link status changes.


> >   *


> >   * @param port


> > @@ -657,4 +683,16 @@ int


> rte_pmd_i40e_ptype_mapping_replace(uint8_t


> > port,  int rte_pmd_i40e_add_vf_mac_addr(uint8_t port, uint16_t vf_id,


> >                                                           struct ether_addr *mac_addr);


> >


> > +/**


> > + * Get RSS queue region info from CLI and do configuration for


>


> Again now this is an API, not just for testpmd, please drop CLI


>


> > + * that port as the command otion type


>


> s/otion/options


>


> > + *


> > + * @param port


> > + *    pointer to port identifier of the device


>


> Not pointer.


> Also this will conflict with patch that updates port storage to uint16_t. Can




Ok, change in v4

> you please follow the status of the patch, and if this merged after that one,


> please ping to remind to update the storage.




Ok ,I will commit a new v4 to fix these , then remind you of the new version



>


> > + * @param conf_ptr


> > + *    pointer to the struct that contain all the


> > + *    region configuration parameters


> > + */


> > +int rte_pmd_i40e_queue_region_conf(uint8_t port,


>


> Can you please use port_id as variable name to be consistent.


>


> > +                      struct rte_i40e_rss_region_conf *conf_ptr);


>


> conf_ptr variable name doesn't say much, although it is OK in this context. I


> would suggest something like rss_region_conf




Ok.

>


> >  #endif /* _PMD_I40E_H_ */


> > diff --git a/drivers/net/i40e/rte_pmd_i40e_version.map


> > b/drivers/net/i40e/rte_pmd_i40e_version.map


> > index ef8882b..c3ee2da 100644


> > --- a/drivers/net/i40e/rte_pmd_i40e_version.map


> > +++ b/drivers/net/i40e/rte_pmd_i40e_version.map


> > @@ -50,5 +50,6 @@ DPDK_17.11 {


> >          global:


> >


> >          rte_pmd_i40e_add_vf_mac_addr;


> > +      rte_pmd_i40e_queue_region_conf;


>


> The feature has been mention as "rss queue region" above a few places, do


> you think rte_pmd_i40e_rss_queue_region_conf() or


> rte_pmd_i40e_queue_region_conf() is better for API name?




Ok, I will change to rte_pmd_i40e_rss_queue_region_conf



>


> >


> >  } DPDK_17.08;


> >
  
Ferruh Yigit Sept. 25, 2017, 9:31 a.m. UTC | #14
On 9/25/2017 8:40 AM, Zhao1, Wei wrote:
> Hi, Ferruh
> 
>> -----Original Message-----
>> From: Yigit, Ferruh
>> Sent: Wednesday, September 20, 2017 6:36 PM
>> To: Zhao1, Wei <wei.zhao1@intel.com>; dev@dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH v3 1/2] net/i40e: queue region set and
> flush
>>
>> On 9/15/2017 4:13 AM, Wei Zhao wrote:
>> > This feature enable queue regions configuration for RSS in PF/VF, so
>> > that different traffic classes or different packet classification
>> > types can be separated to different queues in different queue
>> > regions.This patch can set queue region range, it include queue number
>> > in a region and the index of first queue.
>>
>> Is following correct:
>> So instead of distributing packets to the multiple queues, this will
> distribute
>> packets into queue reqions which may consists of multiple queues.
>>
>> If so, is there a way to control how packets distributed within same queue
>> region to multiple queues?
>>
> 
> distributed within same queue region is based on a rss algorithm, it is
> implemented by NIC self, software can not control it.

I was thinking RSS used to select queue region, but no first queue
region selected, later RSS used to select queue within the region.
Thanks for clarification.

<...>
                                                               
> sizeof(vsi->info.tc_mapping));
>> > +      (void)rte_memcpy(&vsi->info.queue_mapping,
>> > +                                      &ctxt.info.queue_mapping,
>> > +                      sizeof(vsi->info.queue_mapping));
>>
>> Please keep line allignment same with above line.
> 
> oK, change in v4, but can not the same.
> (void)rte_memcpy(&vsi->info.queue_mapping, &ctxt.info.queue_mapping,
>                                                
> sizeof(vsi->info.queue_mapping));
> will over 80 maxnumber.

Someting like following, new lines aligned:

	(void)rte_memcpy(&vsi->info.queue_mapping,
		&ctxt.info.queue_mapping,
		sizeof(vsi->info.queue_mapping));

<...>

>> > +                      if ((i == info->queue_region_number) &&
>> > +                                      (i <= I40E_REGION_MAX_INDEX)) {
>>
>>
>> Please use one more level indentaion here, and pharantesis looks extra can
>> you please double check?
> 
> You mean,you want the following moede ?
>         if (i == info->queue_region_number) {
>         	if(i <= I40E_REGION_MAX_INDEX) {
> 			...........................
>         	}
>        }
> 

No, continuation of the "if" should be easy to recognized from body of
the "if", something like:

	if ((i == info->queue_region_number) &&
			(i <= I40E_REGION_MAX_INDEX)) {
		info->region[i].region_id = conf_ptr->region_id;
		info->region[i].queue_num = conf_ptr->queue_num;

<...>


>> > +static int
>> > +i40e_set_region_flowtype_pf(struct i40e_hw *hw,
>> > +                      struct i40e_pf *pf, struct
> rte_i40e_rss_region_conf
>> *conf_ptr)
>>
>> What do you think starting all functions, even they are static, with
>> "i40e_queue_region_" prefix? 
>                                 ret = i40e_set_queue_region(pf, conf_ptr);
>                                 break;
>                 case RTE_PMD_I40E_REGION_FLOWTYPE_PF_SET:
>                                 ret = i40e_set_region_flowtype_pf(hw,
> pf, conf_ptr);
>                                 break;
>                 case RTE_PMD_I40E_REGION_FLOWTYPE_VF_SET:
>                                 ret = -EINVAL;
>                                 break;
>                 case RTE_PMD_I40E_UP_REGION_SET:
>                                 ret = i40e_set_up_region(pf, conf_ptr);
> 
> I have use the format ofi40e_set_  as prefix?
> Because many set is not related directly to queue region.

When looking to this patch what functions are doing is clear, but it
also should be clear when looking the source code some time later what
that function is about.

That is why I believe a namespace is useful, like
"i40e_queue_region_xx", so you can immediately say this function is
about "RSS queue region" feature.

This function is "rte_pmd_i40e_queue_region_conf", it is applying
provided queue region configuration based of region op type, so I
assumed all these functions are directly related to the fature.

If you believe function name will be wrong with that prefix, of course
just don't use it, but please try to stick with a name space, that will
make easy to understand these features in all source code.

<...>


>> > +
>> > +      memset(&hw->local_dcbx_config, 0,
>> > +      sizeof(struct i40e_dcbx_config));
>>
>> Wrong indentation.

Reminder of this one incase missed.

<...>

>> > +int rte_pmd_i40e_queue_region_conf(uint8_t port,
>> > +                                      struct
> rte_i40e_rss_region_conf *conf_ptr) {
>> > +      struct rte_eth_dev *dev = &rte_eth_devices[port];
>>
>> you need to verify port_id, since this is public API now. Please check
> other
>> APIs in this file.

Reminder of this one incase missed.

I mean is_i40e_supported() call that other APIs have.

<...>

>> > +
>> > +      if (!is_i40e_supported(dev))
>> > +                      return -ENOTSUP;
>> > +
>> > +      switch (op_type) {
>> > +      case RTE_PMD_I40E_QUEUE_REGION_SET:
>> > +                      ret = i40e_set_queue_region(pf, conf_ptr);
>> > +                      break;
>>
>> Does it make sense to add another type to get the current queue region
>> config?

Reminder of this one incase missed.

> 
>> > +      case RTE_PMD_I40E_REGION_FLOWTYPE_PF_SET:
>> > +                      ret = i40e_set_region_flowtype_pf(hw, pf,
> conf_ptr);
>> > +                      break;
>> > +      case RTE_PMD_I40E_REGION_FLOWTYPE_VF_SET:
>> > +                      ret = -EINVAL;
>>
>> Will this be implemented later, or not a valid case at all?

Reminder of this one incase missed.

>> > +                      break;
>> > +      case RTE_PMD_I40E_UP_REGION_SET:> +                         
> ret =
>> i40e_set_up_region(pf, conf_ptr);
>> > +                      break;
>> > +      case RTE_PMD_I40E_REGION_ALL_FLUSH_ON:
>> > +                      ret = i40e_flush_region_all_conf(hw, pf, 1);
>> > +                      break;
>> > +      case RTE_PMD_I40E_REGION_ALL_FLUSH_OFF:
>> > +                      ret = i40e_flush_region_all_conf(hw, pf, 0);
>>
>> Can you please describe what flush_on and flush_off are, you can comment
>> to code if also.

Reminder of this one incase missed.

<...>

>> > @@ -146,6 +160,18 @@ struct rte_pmd_i40e_ptype_mapping {  };
>> >
>> >  /**
>> > + * Queue region information get from CLI.
>>
>> It doesn't need to be from CLI, can drop that part

Reminder of this one incase missed.

<...>

>> > +      uint8_t user_priority;
>> > +      enum rte_pmd_i40e_queue_region_op  op;
>>
>> Extra space before "op"
> 
> Maybe, I SHOULD add uint8_t raw[3] before “op”

Sorry, I didn't get this one.

<...>

>> rte_pmd_i40e_ptype_mapping_replace(uint8_t
>> > port,  int rte_pmd_i40e_add_vf_mac_addr(uint8_t port, uint16_t vf_id,
>> >                                                           struct
> ether_addr *mac_addr);
>> >
>> > +/**
>> > + * Get RSS queue region info from CLI and do configuration for
>>
>> Again now this is an API, not just for testpmd, please drop CLI

Reminder of this one incase missed.

>>
>> > + * that port as the command otion type
>>
>> s/otion/options
>>

Reminder of this one incase missed.

<...>

>> > +int rte_pmd_i40e_queue_region_conf(uint8_t port,
>>
>> Can you please use port_id as variable name to be consistent.

Reminder of this one incase missed.

<...>
  
Zhao1, Wei Sept. 26, 2017, 7:46 a.m. UTC | #15
Hi,  Ferruh

> -----Original Message-----

> From: Yigit, Ferruh

> Sent: Monday, September 25, 2017 5:32 PM

> To: Zhao1, Wei <wei.zhao1@intel.com>

> Cc: dev@dpdk.org

> Subject: Re: [dpdk-dev] [PATCH v3 1/2] net/i40e: queue region set and flush

> 

> On 9/25/2017 8:40 AM, Zhao1, Wei wrote:

> > Hi, Ferruh

> >

> >> -----Original Message-----

> >> From: Yigit, Ferruh

> >> Sent: Wednesday, September 20, 2017 6:36 PM

> >> To: Zhao1, Wei <wei.zhao1@intel.com>; dev@dpdk.org

> >> Subject: Re: [dpdk-dev] [PATCH v3 1/2] net/i40e: queue region set and

> > flush

> >>

> >> On 9/15/2017 4:13 AM, Wei Zhao wrote:

> >> > This feature enable queue regions configuration for RSS in PF/VF,

> >> > so that different traffic classes or different packet

> >> > classification types can be separated to different queues in

> >> > different queue regions.This patch can set queue region range, it

> >> > include queue number in a region and the index of first queue.

> >>

> >> Is following correct:

> >> So instead of distributing packets to the multiple queues, this will

> > distribute

> >> packets into queue reqions which may consists of multiple queues.

> >>

> >> If so, is there a way to control how packets distributed within same

> >> queue region to multiple queues?

> >>

> >

> > distributed within same queue region is based on a rss algorithm, it

> > is implemented by NIC self, software can not control it.

> 

> I was thinking RSS used to select queue region, but no first queue region

> selected, later RSS used to select queue within the region.

> Thanks for clarification.


OK

> 

> <...>

> 

> > sizeof(vsi->info.tc_mapping));

> >> > +      (void)rte_memcpy(&vsi->info.queue_mapping,

> >> > +                                      &ctxt.info.queue_mapping,

> >> > +                      sizeof(vsi->info.queue_mapping));

> >>

> >> Please keep line allignment same with above line.

> >

> > oK, change in v4, but can not the same.

> > (void)rte_memcpy(&vsi->info.queue_mapping,

> &ctxt.info.queue_mapping,

> >

> > sizeof(vsi->info.queue_mapping));

> > will over 80 maxnumber.

> 

> Someting like following, new lines aligned:

> 

> 	(void)rte_memcpy(&vsi->info.queue_mapping,

> 		&ctxt.info.queue_mapping,

> 		sizeof(vsi->info.queue_mapping));



Ok, change in v4

> 

> <...>

> 

> >> > +                      if ((i == info->queue_region_number) &&

> >> > +                                      (i <=

> >> > +I40E_REGION_MAX_INDEX)) {

> >>

> >>

> >> Please use one more level indentaion here, and pharantesis looks

> >> extra can you please double check?

> >

> > You mean,you want the following moede ?

> >         if (i == info->queue_region_number) {

> >         	if(i <= I40E_REGION_MAX_INDEX) {

> > 			...........................

> >         	}

> >        }

> >

> 

> No, continuation of the "if" should be easy to recognized from body of the

> "if", something like:

> 

> 	if ((i == info->queue_region_number) &&

> 			(i <= I40E_REGION_MAX_INDEX)) {

> 		info->region[i].region_id = conf_ptr->region_id;

> 		info->region[i].queue_num = conf_ptr->queue_num;

> 

> <...>

> 


Ok, change in v4

> 

> >> > +static int

> >> > +i40e_set_region_flowtype_pf(struct i40e_hw *hw,

> >> > +                      struct i40e_pf *pf, struct

> > rte_i40e_rss_region_conf

> >> *conf_ptr)

> >>

> >> What do you think starting all functions, even they are static, with

> >> "i40e_queue_region_" prefix?

> >                                 ret = i40e_set_queue_region(pf,

> > conf_ptr);

> >                                 break;

> >                 case RTE_PMD_I40E_REGION_FLOWTYPE_PF_SET:

> >                                 ret = i40e_set_region_flowtype_pf(hw,

> > pf, conf_ptr);

> >                                 break;

> >                 case RTE_PMD_I40E_REGION_FLOWTYPE_VF_SET:

> >                                 ret = -EINVAL;

> >                                 break;

> >                 case RTE_PMD_I40E_UP_REGION_SET:

> >                                 ret = i40e_set_up_region(pf,

> > conf_ptr);

> >

> > I have use the format ofi40e_set_  as prefix?

> > Because many set is not related directly to queue region.

> 

> When looking to this patch what functions are doing is clear, but it also should

> be clear when looking the source code some time later what that function is

> about.

> 

> That is why I believe a namespace is useful, like "i40e_queue_region_xx", so

> you can immediately say this function is about "RSS queue region" feature.

> 

> This function is "rte_pmd_i40e_queue_region_conf", it is applying provided

> queue region configuration based of region op type, so I assumed all these

> functions are directly related to the fature.

> 

> If you believe function name will be wrong with that prefix, of course just

> don't use it, but please try to stick with a name space, that will make easy to

> understand these features in all source code.

> 

> <...>


Ok, fix in v4


> 

> 

> >> > +

> >> > +      memset(&hw->local_dcbx_config, 0,

> >> > +      sizeof(struct i40e_dcbx_config));

> >>

> >> Wrong indentation.

> 

> Reminder of this one incase missed.


Ok, change in v4

> 

> <...>

> 

> >> > +int rte_pmd_i40e_queue_region_conf(uint8_t port,

> >> > +                                      struct

> > rte_i40e_rss_region_conf *conf_ptr) {

> >> > +      struct rte_eth_dev *dev = &rte_eth_devices[port];

> >>

> >> you need to verify port_id, since this is public API now. Please

> >> check

> > other

> >> APIs in this file.

> 

> Reminder of this one incase missed.


Ok, change in v4

> 

> I mean is_i40e_supported() call that other APIs have.

> 

> <...>

> 

> >> > +

> >> > +      if (!is_i40e_supported(dev))

> >> > +                      return -ENOTSUP;

> >> > +

> >> > +      switch (op_type) {

> >> > +      case RTE_PMD_I40E_QUEUE_REGION_SET:

> >> > +                      ret = i40e_set_queue_region(pf, conf_ptr);

> >> > +                      break;

> >>

> >> Does it make sense to add another type to get the current queue

> >> region config?

> 

> Reminder of this one incase missed.


I will add a get command.

> 

> >

> >> > +      case RTE_PMD_I40E_REGION_FLOWTYPE_PF_SET:

> >> > +                      ret = i40e_set_region_flowtype_pf(hw, pf,

> > conf_ptr);

> >> > +                      break;

> >> > +      case RTE_PMD_I40E_REGION_FLOWTYPE_VF_SET:

> >> > +                      ret = -EINVAL;

> >>

> >> Will this be implemented later, or not a valid case at all?

> 

> Reminder of this one incase missed.


Ok, change in v4

> 

> >> > +                      break;

> >> > +      case RTE_PMD_I40E_UP_REGION_SET:> +

> > ret =

> >> i40e_set_up_region(pf, conf_ptr);

> >> > +                      break;

> >> > +      case RTE_PMD_I40E_REGION_ALL_FLUSH_ON:

> >> > +                      ret = i40e_flush_region_all_conf(hw, pf, 1);

> >> > +                      break;

> >> > +      case RTE_PMD_I40E_REGION_ALL_FLUSH_OFF:

> >> > +                      ret = i40e_flush_region_all_conf(hw, pf, 0);

> >>

> >> Can you please describe what flush_on and flush_off are, you can

> >> comment to code if also.

> 

> Reminder of this one incase missed.


I will comment to code in v4.
ALL config from CLI at first will only keep in DPDK software stored in driver, only after " FLUSH_ON ", it commit all configure to HW.
Because I have to set hardware config at a time, so I have to record all CLI command at first.
" FLUSH_OFF " is just clean all configuration about queue region  just now, and restore all to DPDK i40e driver default config when start up.
The following is my test process in CLI: 

./x86_64-native-linuxapp-gcc/app/testpmd -c 1ffff -n 4 -- -i --nb-cores=8  --rxq=8 --txq=8 --port-topology=chained set fwd rxonly port config all rss all queue-region set port 0 region_id 0 queue_start_index 2 queue_num 2 queue-region set port 0 region_id 1 queue_start_index 4 queue_num 2 
queue-region set pf port 0 region_id 0 flowtype 31 
queue-region set pf port 0 region_id 1 flowtype 33 
queue-region set pf port 0 region_id 1 flowtype 34 
queue-region set port 0 UP 1 region_id 0 
queue-region set port 0 UP 3 region_id 1 
queue-region flush on  port 0 
start set verbose 1 sendp([Ether(dst="3C:FD:FE:9F:70:B8", src="52:00:00:00:00:00")/Dot1Q(prio=3)/IP(src="10.0.0.1",dst="192.168.0.2")/TCP(dport=80, sport=80 )/("X"*48)], iface="ens5f2", count=10) sendp([Ether(dst="3C:FD:FE:9F:70:B8", src="52:00:00:00:00:00")/IP(src="10.0.0.1",dst="192.168.0.2")/SCTP(dport=80, sport=80 )/("X"*48)], iface="ens5f2", count=10) sendp([Ether(dst="3C:FD:FE:9F:70:B8", src="52:00:00:00:00:00")/IP(src="10.0.0.1",dst="192.168.0.2")/UDP(dport=80, sport=80 )/("X"*48)], iface="ens5f2", count=10)

> 

> <...>

> 

> >> > @@ -146,6 +160,18 @@ struct rte_pmd_i40e_ptype_mapping {  };

> >> >

> >> >  /**

> >> > + * Queue region information get from CLI.

> >>

> >> It doesn't need to be from CLI, can drop that part

> 

> Reminder of this one incase missed.

> 


Ok, change in v4

> <...>

> 

> >> > +      uint8_t user_priority;

> >> > +      enum rte_pmd_i40e_queue_region_op  op;

> >>

> >> Extra space before "op"

> >

> > Maybe, I SHOULD add uint8_t raw[3] before “op”

> 

> Sorry, I didn't get this one.


Ok

> 

> <...>

> 

> >> rte_pmd_i40e_ptype_mapping_replace(uint8_t

> >> > port,  int rte_pmd_i40e_add_vf_mac_addr(uint8_t port, uint16_t

> >> >vf_id,

> >> >                                                           struct

> > ether_addr *mac_addr);

> >> >

> >> > +/**

> >> > + * Get RSS queue region info from CLI and do configuration for

> >>

> >> Again now this is an API, not just for testpmd, please drop CLI

> 

> Reminder of this one incase missed.


Ok, change in v4

> 

> >>

> >> > + * that port as the command otion type

> >>

> >> s/otion/options

> >>

> 

> Reminder of this one incase missed.


Ok, change in v4

> 

> <...>

> 

> >> > +int rte_pmd_i40e_queue_region_conf(uint8_t port,

> >>

> >> Can you please use port_id as variable name to be consistent.

> 

> Reminder of this one incase missed.


Ok, change in v4

> 

> <...>
  
Zhao1, Wei Sept. 26, 2017, 8:54 a.m. UTC | #16
Hi,  Ferruh



> -----Original Message-----


> From: Yigit, Ferruh


> Sent: Wednesday, September 20, 2017 6:36 PM


> To: Zhao1, Wei <wei.zhao1@intel.com>; dev@dpdk.org


> Subject: Re: [dpdk-dev] [PATCH v3 1/2] net/i40e: queue region set and flush


>


> On 9/15/2017 4:13 AM, Wei Zhao wrote:


> > This feature enable queue regions configuration for RSS in PF/VF, so


> > that different traffic classes or different packet classification


> > types can be separated to different queues in different queue


> > regions.This patch can set queue region range, it include queue number


> > in a region and the index of first queue.


>


> Is following correct:


> So instead of distributing packets to the multiple queues, this will distribute


> packets into queue reqions which may consists of multiple queues.


>


> If so, is there a way to control how packets distributed within same queue


> region to multiple queues?


>


> And is this feature only supported with RSS? Can it be part of RSS


> configuration instead of PMD specific API?


>


> > This patch enable mapping between different priorities (UP) and


>


> User priorities (UP)


>


> > different traffic classes.It also enable mapping between a region


> > index and a sepcific flowtype(PCTYPE).It also provide the solution of


> > flush all configuration about queue region the above described.


> >


> > Signed-off-by: Wei Zhao <wei.zhao1@intel.com<mailto:wei.zhao1@intel.com>>


> > ---


> >  drivers/net/i40e/i40e_ethdev.c            |  19 +-


> >  drivers/net/i40e/i40e_ethdev.h            |  30 ++


> >  drivers/net/i40e/rte_pmd_i40e.c           | 482


> ++++++++++++++++++++++++++++++


> >  drivers/net/i40e/rte_pmd_i40e.h           |  38 +++


> >  drivers/net/i40e/rte_pmd_i40e_version.map |   1 +


> >  5 files changed, 566 insertions(+), 4 deletions(-)


> >


>


> <...>


>


> > +static int


> > +i40e_vsi_update_queue_region_mapping(struct i40e_hw *hw,


> > +                                            struct i40e_pf *pf)


> > +{


> > +      uint16_t i;


> > +      struct i40e_vsi *vsi = pf->main_vsi;


> > +      uint16_t queue_offset, bsf, tc_index;


> > +      struct i40e_vsi_context ctxt;


> > +      struct i40e_aqc_vsi_properties_data *vsi_info;


> > +      struct i40e_queue_region_info *region_info =


> > +                                                      &pf->queue_region;


> > +      uint32_t ret = -EINVAL;


> > +


> > +      if (!region_info->queue_region_number) {




....................



> > +int rte_pmd_i40e_queue_region_conf(uint8_t port,


> > +                                      struct rte_i40e_rss_region_conf *conf_ptr) {


> > +      struct rte_eth_dev *dev = &rte_eth_devices[port];


>


> you need to verify port_id, since this is public API now. Please check other


> APIs in this file.




I have already "if (!is_i40e_supported(dev))" code in v3 in function  rte_pmd_i40e_queue_region_conf.

So, I do not know what is your meaning.







>


> > +      struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data-


> >dev_private);


> > +      struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data-


> >dev_private);


> > +      enum rte_pmd_i40e_queue_region_op op_type = conf_ptr->op;


> > +      uint32_t ret;


>


> This should be signed variable, since you are using it for return and assigning


> negative values.


>


> > +


> > +      if (!is_i40e_supported(dev))


> > +                      return -ENOTSUP;


> > +


> > +      switch (op_type) {


> > +      case RTE_PMD_I40E_QUEUE_REGION_SET:


> > +                      ret = i40e_set_queue_region(pf, conf_ptr);


> > +                      break;


>
  
Ferruh Yigit Sept. 27, 2017, 7:13 p.m. UTC | #17
On 9/26/2017 9:54 AM, Zhao1, Wei wrote:
> Hi,  Ferruh

Hi Wei,

>> -----Original Message-----
>> From: Yigit, Ferruh
>> Sent: Wednesday, September 20, 2017 6:36 PM
>> To: Zhao1, Wei <wei.zhao1@intel.com>; dev@dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH v3 1/2] net/i40e: queue region set and
> flush
>>
>> On 9/15/2017 4:13 AM, Wei Zhao wrote:
>> > This feature enable queue regions configuration for RSS in PF/VF, so
>> > that different traffic classes or different packet classification
>> > types can be separated to different queues in different queue
>> > regions.This patch can set queue region range, it include queue number
>> > in a region and the index of first queue.
>>
>> Is following correct:
>> So instead of distributing packets to the multiple queues, this will
> distribute
>> packets into queue reqions which may consists of multiple queues.
>>
>> If so, is there a way to control how packets distributed within same queue
>> region to multiple queues?
>>
>> And is this feature only supported with RSS? Can it be part of RSS
>> configuration instead of PMD specific API?
>>
>> > This patch enable mapping between different priorities (UP) and
>>
>> User priorities (UP)
>>
>> > different traffic classes.It also enable mapping between a region
>> > index and a sepcific flowtype(PCTYPE).It also provide the solution of
>> > flush all configuration about queue region the above described.
>> >
>> > Signed-off-by: Wei Zhao <wei.zhao1@intel.com
> <mailto:wei.zhao1@intel.com>>
>> > ---
>> >  drivers/net/i40e/i40e_ethdev.c            |  19 +-
>> >  drivers/net/i40e/i40e_ethdev.h            |  30 ++
>> >  drivers/net/i40e/rte_pmd_i40e.c           | 482
>> ++++++++++++++++++++++++++++++
>> >  drivers/net/i40e/rte_pmd_i40e.h           |  38 +++
>> >  drivers/net/i40e/rte_pmd_i40e_version.map |   1 +
>> >  5 files changed, 566 insertions(+), 4 deletions(-)
>> >
>>
>> <...>
>>
>> > +static int
>> > +i40e_vsi_update_queue_region_mapping(struct i40e_hw *hw,
>> > +                                            struct i40e_pf *pf)
>> > +{
>> > +      uint16_t i;
>> > +      struct i40e_vsi *vsi = pf->main_vsi;
>> > +      uint16_t queue_offset, bsf, tc_index;
>> > +      struct i40e_vsi_context ctxt;
>> > +      struct i40e_aqc_vsi_properties_data *vsi_info;
>> > +      struct i40e_queue_region_info *region_info =
>> > +                                                     
> &pf->queue_region;
>> > +      uint32_t ret = -EINVAL;
>> > +
>> > +      if (!region_info->queue_region_number) {
>  
> ....................
> 
>> > +int rte_pmd_i40e_queue_region_conf(uint8_t port,
>> > +                                      struct
> rte_i40e_rss_region_conf *conf_ptr) {
>> > +      struct rte_eth_dev *dev = &rte_eth_devices[port];
>>
>> you need to verify port_id, since this is public API now. Please check
> other
>> APIs in this file.
> 
> I have already "if (!is_i40e_supported(dev))" code in v3 in function
>  rte_pmd_i40e_queue_region_conf.
> 
> So, I do not know what is your meaning.

RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);

Normally we don't need to worry about this in PMD because abstraction
layer APIs do this check already. But since this is public API, user can
give any value as port_id parameter, better to verify it.

> 
>>
>> > +      struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data-
>> >dev_private);
>> > +      struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data-
>> >dev_private);
>> > +      enum rte_pmd_i40e_queue_region_op op_type = conf_ptr->op;
>> > +      uint32_t ret;
>>
>> This should be signed variable, since you are using it for return and
> assigning
>> negative values.
>>
>> > +
>> > +      if (!is_i40e_supported(dev))
>> > +                      return -ENOTSUP;
>> > +
>> > +      switch (op_type) {
>> > +      case RTE_PMD_I40E_QUEUE_REGION_SET:
>> > +                      ret = i40e_set_queue_region(pf, conf_ptr);
>> > +                      break;
>>
  
Zhao1, Wei Sept. 28, 2017, 2:40 a.m. UTC | #18
Thank you.

> -----Original Message-----

> From: Yigit, Ferruh

> Sent: Thursday, September 28, 2017 3:13 AM

> To: Zhao1, Wei <wei.zhao1@intel.com>; dev@dpdk.org

> Subject: Re: [dpdk-dev] [PATCH v3 1/2] net/i40e: queue region set and flush

> 

> On 9/26/2017 9:54 AM, Zhao1, Wei wrote:

> > Hi,  Ferruh

> 

> Hi Wei,

> 

> >> -----Original Message-----

> >> From: Yigit, Ferruh

> >> Sent: Wednesday, September 20, 2017 6:36 PM

> >> To: Zhao1, Wei <wei.zhao1@intel.com>; dev@dpdk.org

> >> Subject: Re: [dpdk-dev] [PATCH v3 1/2] net/i40e: queue region set and

> > flush

> >>

> >> On 9/15/2017 4:13 AM, Wei Zhao wrote:

> >> > This feature enable queue regions configuration for RSS in PF/VF,

> >> > so that different traffic classes or different packet

> >> > classification types can be separated to different queues in

> >> > different queue regions.This patch can set queue region range, it

> >> > include queue number in a region and the index of first queue.

> >>

> >> Is following correct:

> >> So instead of distributing packets to the multiple queues, this will

> > distribute

> >> packets into queue reqions which may consists of multiple queues.

> >>

> >> If so, is there a way to control how packets distributed within same

> >> queue region to multiple queues?

> >>

> >> And is this feature only supported with RSS? Can it be part of RSS

> >> configuration instead of PMD specific API?

> >>

> >> > This patch enable mapping between different priorities (UP) and

> >>

> >> User priorities (UP)

> >>

> >> > different traffic classes.It also enable mapping between a region

> >> > index and a sepcific flowtype(PCTYPE).It also provide the solution

> >> > of flush all configuration about queue region the above described.

> >> >

> >> > Signed-off-by: Wei Zhao <wei.zhao1@intel.com

> > <mailto:wei.zhao1@intel.com>>

> >> > ---

> >> >  drivers/net/i40e/i40e_ethdev.c            |  19 +-

> >> >  drivers/net/i40e/i40e_ethdev.h            |  30 ++

> >> >  drivers/net/i40e/rte_pmd_i40e.c           | 482

> >> ++++++++++++++++++++++++++++++

> >> >  drivers/net/i40e/rte_pmd_i40e.h           |  38 +++

> >> >  drivers/net/i40e/rte_pmd_i40e_version.map |   1 +

> >> >  5 files changed, 566 insertions(+), 4 deletions(-)

> >> >

> >>

> >> <...>

> >>

> >> > +static int

> >> > +i40e_vsi_update_queue_region_mapping(struct i40e_hw *hw,

> >> > +                                            struct i40e_pf *pf) {

> >> > +      uint16_t i;

> >> > +      struct i40e_vsi *vsi = pf->main_vsi;

> >> > +      uint16_t queue_offset, bsf, tc_index;

> >> > +      struct i40e_vsi_context ctxt;

> >> > +      struct i40e_aqc_vsi_properties_data *vsi_info;

> >> > +      struct i40e_queue_region_info *region_info =

> >> > +

> > &pf->queue_region;

> >> > +      uint32_t ret = -EINVAL;

> >> > +

> >> > +      if (!region_info->queue_region_number) {

> >

> > ....................

> >

> >> > +int rte_pmd_i40e_queue_region_conf(uint8_t port,

> >> > +                                      struct

> > rte_i40e_rss_region_conf *conf_ptr) {

> >> > +      struct rte_eth_dev *dev = &rte_eth_devices[port];

> >>

> >> you need to verify port_id, since this is public API now. Please

> >> check

> > other

> >> APIs in this file.

> >

> > I have already "if (!is_i40e_supported(dev))" code in v3 in function

> >  rte_pmd_i40e_queue_region_conf.

> >

> > So, I do not know what is your meaning.

> 

> RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV);

> 

> Normally we don't need to worry about this in PMD because abstraction

> layer APIs do this check already. But since this is public API, user can give any

> value as port_id parameter, better to verify it.

> 

> >

> >>

> >> > +      struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data-

> >> >dev_private);

> >> > +      struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data-

> >> >dev_private);

> >> > +      enum rte_pmd_i40e_queue_region_op op_type = conf_ptr->op;

> >> > +      uint32_t ret;

> >>

> >> This should be signed variable, since you are using it for return and

> > assigning

> >> negative values.

> >>

> >> > +

> >> > +      if (!is_i40e_supported(dev))

> >> > +                      return -ENOTSUP;

> >> > +

> >> > +      switch (op_type) {

> >> > +      case RTE_PMD_I40E_QUEUE_REGION_SET:

> >> > +                      ret = i40e_set_queue_region(pf, conf_ptr);

> >> > +                      break;

> >>
  

Patch

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 8e0580c..1663fc0 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -137,10 +137,6 @@ 
 #define I40E_PRTTSYN_TSYNTYPE    0x0e000000
 #define I40E_CYCLECOUNTER_MASK   0xffffffffffffffffULL
 
-#define I40E_MAX_PERCENT            100
-#define I40E_DEFAULT_DCB_APP_NUM    1
-#define I40E_DEFAULT_DCB_APP_PRIO   3
-
 /**
  * Below are values for writing un-exposed registers suggested
  * by silicon experts
@@ -1034,6 +1030,15 @@  i40e_init_fdir_filter_list(struct rte_eth_dev *dev)
 	return ret;
 }
 
+static void
+i40e_init_queue_region_conf(struct rte_eth_dev *dev)
+{
+	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+	struct i40e_queue_region_info *info = &pf->queue_region;
+
+	memset(info, 0, sizeof(struct i40e_queue_region_info));
+}
+
 static int
 eth_i40e_dev_init(struct rte_eth_dev *dev)
 {
@@ -1309,6 +1314,9 @@  eth_i40e_dev_init(struct rte_eth_dev *dev)
 	if (ret < 0)
 		goto err_init_fdir_filter_list;
 
+	/* initialize queue region configuration */
+	i40e_init_queue_region_conf(dev);
+
 	return 0;
 
 err_init_fdir_filter_list:
@@ -1464,6 +1472,9 @@  eth_i40e_dev_uninit(struct rte_eth_dev *dev)
 	/* Remove all Traffic Manager configuration */
 	i40e_tm_conf_uninit(dev);
 
+	/* Remove all the queue region configuration */
+	i40e_flush_region_all_conf(hw, pf, 0);
+
 	return 0;
 }
 
diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
index ad80f0f..d612886 100644
--- a/drivers/net/i40e/i40e_ethdev.h
+++ b/drivers/net/i40e/i40e_ethdev.h
@@ -260,6 +260,16 @@  enum i40e_flxpld_layer_idx {
 #define I40E_QOS_BW_WEIGHT_MIN 1
 /* The max bandwidth weight is 127. */
 #define I40E_QOS_BW_WEIGHT_MAX 127
+/* The max queue region index is 7. */
+#define I40E_REGION_MAX_INDEX 7
+/* The max queue region userpriority is 7. */
+#define I40E_REGION_USERPRIORITY_MAX_INDEX 7
+/* The max pctype index is 63. */
+#define I40E_REGION_PCTYPE_MAX_INDEX 63
+
+#define I40E_MAX_PERCENT            100
+#define I40E_DEFAULT_DCB_APP_NUM    1
+#define I40E_DEFAULT_DCB_APP_PRIO   3
 
 /**
  * The overhead from MTU to max frame size.
@@ -541,6 +551,22 @@  struct i40e_ethertype_rule {
 	struct rte_hash *hash_table;
 };
 
+/* queue region info */
+struct i40e_region_info {
+	uint8_t region_id;
+	uint8_t queue_start_index;
+	uint8_t queue_num;
+	uint8_t user_priority_num;
+	uint8_t user_priority[I40E_MAX_USER_PRIORITY];
+	uint8_t flowtype_num;
+	uint8_t hw_flowtype[I40E_FILTER_PCTYPE_MAX];
+};
+
+struct i40e_queue_region_info {
+	uint16_t queue_region_number;
+	struct i40e_region_info region[I40E_REGION_MAX_INDEX + 1];
+};
+
 /* Tunnel filter number HW supports */
 #define I40E_MAX_TUNNEL_FILTER_NUM 400
 
@@ -776,6 +802,7 @@  struct i40e_pf {
 	struct i40e_fdir_info fdir; /* flow director info */
 	struct i40e_ethertype_rule ethertype; /* Ethertype filter rule */
 	struct i40e_tunnel_rule tunnel; /* Tunnel filter rule */
+	struct i40e_queue_region_info queue_region; /* queue region info */
 	struct i40e_fc_conf fc_conf; /* Flow control conf */
 	struct i40e_mirror_rule_list mirror_list;
 	uint16_t nb_mirror_rule;   /* The number of mirror rules */
@@ -1003,6 +1030,9 @@  void i40e_check_write_reg(struct i40e_hw *hw, uint32_t addr, uint32_t val);
 int i40e_tm_ops_get(struct rte_eth_dev *dev, void *ops);
 void i40e_tm_conf_init(struct rte_eth_dev *dev);
 void i40e_tm_conf_uninit(struct rte_eth_dev *dev);
+int i40e_flush_region_all_conf(struct i40e_hw *hw,
+				struct i40e_pf *pf, uint16_t on);
+
 
 #define I40E_DEV_TO_PCI(eth_dev) \
 	RTE_DEV_TO_PCI((eth_dev)->device)
diff --git a/drivers/net/i40e/rte_pmd_i40e.c b/drivers/net/i40e/rte_pmd_i40e.c
index d69a472..9a75f21 100644
--- a/drivers/net/i40e/rte_pmd_i40e.c
+++ b/drivers/net/i40e/rte_pmd_i40e.c
@@ -35,6 +35,7 @@ 
 #include <rte_tailq.h>
 
 #include "base/i40e_prototype.h"
+#include "base/i40e_dcb.h"
 #include "i40e_ethdev.h"
 #include "i40e_pf.h"
 #include "i40e_rxtx.h"
@@ -2161,3 +2162,484 @@  rte_pmd_i40e_add_vf_mac_addr(uint8_t port, uint16_t vf_id,
 
 	return 0;
 }
+
+static int
+i40e_vsi_update_queue_region_mapping(struct i40e_hw *hw,
+			      struct i40e_pf *pf)
+{
+	uint16_t i;
+	struct i40e_vsi *vsi = pf->main_vsi;
+	uint16_t queue_offset, bsf, tc_index;
+	struct i40e_vsi_context ctxt;
+	struct i40e_aqc_vsi_properties_data *vsi_info;
+	struct i40e_queue_region_info *region_info =
+				&pf->queue_region;
+	uint32_t ret = -EINVAL;
+
+	if (!region_info->queue_region_number) {
+		PMD_INIT_LOG(ERR, "there is no that region id been set before");
+		return ret;
+	}
+
+	memset(&ctxt, 0, sizeof(struct i40e_vsi_context));
+
+	/* Update Queue Pairs Mapping for currently enabled UPs */
+	ctxt.seid = vsi->seid;
+	ctxt.pf_num = hw->pf_id;
+	ctxt.vf_num = 0;
+	ctxt.uplink_seid = vsi->uplink_seid;
+	ctxt.info = vsi->info;
+	vsi_info = &ctxt.info;
+
+	memset(vsi_info->tc_mapping, 0, sizeof(uint16_t) * 8);
+	memset(vsi_info->queue_mapping, 0, sizeof(uint16_t) * 16);
+
+	/**
+	 * Configure queue region and queue mapping parameters,
+	 * for enabled queue region, allocate queues to this region.
+	 */
+
+	for (i = 0; i < region_info->queue_region_number; i++) {
+		tc_index = region_info->region[i].region_id;
+		bsf = rte_bsf32(region_info->region[i].queue_num);
+		queue_offset = region_info->region[i].queue_start_index;
+		vsi_info->tc_mapping[tc_index] = rte_cpu_to_le_16(
+			(queue_offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) |
+				(bsf << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT));
+	}
+
+	/* Associate queue number with VSI, Keep vsi->nb_qps unchanged */
+	if (vsi->type == I40E_VSI_SRIOV) {
+		vsi_info->mapping_flags |=
+			rte_cpu_to_le_16(I40E_AQ_VSI_QUE_MAP_NONCONTIG);
+		for (i = 0; i < vsi->nb_qps; i++)
+			vsi_info->queue_mapping[i] =
+				rte_cpu_to_le_16(vsi->base_queue + i);
+	} else {
+		vsi_info->mapping_flags |=
+			rte_cpu_to_le_16(I40E_AQ_VSI_QUE_MAP_CONTIG);
+		vsi_info->queue_mapping[0] = rte_cpu_to_le_16(vsi->base_queue);
+	}
+	vsi_info->valid_sections |=
+		rte_cpu_to_le_16(I40E_AQ_VSI_PROP_QUEUE_MAP_VALID);
+
+	/* Update the VSI after updating the VSI queue-mapping information */
+	ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
+	if (ret) {
+		PMD_INIT_LOG(ERR, "Failed to configure queue region "
+			"mapping = %d ", hw->aq.asq_last_status);
+		return ret;
+	}
+	/* update the local VSI info with updated queue map */
+	(void)rte_memcpy(&vsi->info.tc_mapping, &ctxt.info.tc_mapping,
+					sizeof(vsi->info.tc_mapping));
+	(void)rte_memcpy(&vsi->info.queue_mapping,
+			&ctxt.info.queue_mapping,
+		sizeof(vsi->info.queue_mapping));
+	vsi->info.mapping_flags = ctxt.info.mapping_flags;
+	vsi->info.valid_sections = 0;
+
+	return 0;
+}
+
+
+static int
+i40e_set_queue_region(struct i40e_pf *pf,
+				struct rte_i40e_rss_region_conf *conf_ptr)
+{
+	uint16_t tc_size_tb[7] = {1, 2, 4, 8, 16, 32, 64};
+	uint16_t i;
+	struct i40e_vsi *main_vsi = pf->main_vsi;
+	struct i40e_queue_region_info *info = &pf->queue_region;
+	uint32_t ret = -EINVAL;
+
+	for (i = 0; i < I40E_REGION_MAX_INDEX; i++)
+		if (conf_ptr->queue_num == tc_size_tb[i])
+			break;
+
+	if (i == I40E_REGION_MAX_INDEX) {
+		printf("The region sizes should be any of the following "
+		"values: 1, 2, 4, 8, 16, 32, 64 as long as the "
+		"total number of queues do not exceed the VSI allocation");
+		return ret;
+	}
+
+	if (conf_ptr->region_id >= I40E_REGION_MAX_INDEX) {
+		PMD_INIT_LOG(ERR, "the queue region max index is 7");
+		return ret;
+	}
+
+	if ((conf_ptr->queue_start_index + conf_ptr->queue_num)
+					> main_vsi->nb_used_qps) {
+		PMD_INIT_LOG(ERR, "the queue index exceeds the VSI range");
+		return ret;
+	}
+
+	if (info->queue_region_number) {
+		for (i = 0; i < info->queue_region_number; i++)
+			if (conf_ptr->region_id == info->region[i].region_id)
+				break;
+
+		if ((i == info->queue_region_number) &&
+			(i <= I40E_REGION_MAX_INDEX)) {
+			info->region[i].region_id = conf_ptr->region_id;
+			info->region[i].queue_num = conf_ptr->queue_num;
+			info->region[i].queue_start_index =
+				conf_ptr->queue_start_index;
+			info->queue_region_number++;
+		} else {
+			PMD_INIT_LOG(ERR, "queue region number "
+				"exceeds maxnum 8 or the "
+				"queue region id has "
+				"been set before");
+			return ret;
+		}
+	} else {
+		info->region[0].region_id = conf_ptr->region_id;
+		info->region[0].queue_num = conf_ptr->queue_num;
+		info->region[0].queue_start_index =
+			conf_ptr->queue_start_index;
+		info->queue_region_number++;
+	}
+
+	return 0;
+}
+
+static int
+i40e_set_region_flowtype_pf(struct i40e_hw *hw,
+		struct i40e_pf *pf, struct rte_i40e_rss_region_conf *conf_ptr)
+{
+	uint32_t pfqf_hregion;
+	uint32_t ret = -EINVAL;
+	struct i40e_queue_region_info *info = &pf->queue_region;
+	uint16_t i, j, index, flowtype_set = 0;
+	uint16_t region_index, flowtype_index;
+
+	if (conf_ptr->region_id > I40E_PFQF_HREGION_MAX_INDEX) {
+		PMD_INIT_LOG(ERR, "the queue region max index is 7");
+		return ret;
+	}
+
+	if (conf_ptr->hw_flowtype >= I40E_FILTER_PCTYPE_MAX) {
+		PMD_INIT_LOG(ERR, "the hw_flowtype or PCTYPE max index is 63");
+		return ret;
+	}
+
+	if (info->queue_region_number) {
+		for (i = 0; i < info->queue_region_number; i++)
+			if (conf_ptr->region_id == info->region[i].region_id)
+				break;
+
+		if (i == info->queue_region_number) {
+			PMD_INIT_LOG(ERR, "that region id has not "
+				"been set before");
+			return ret;
+		}
+		region_index = i;
+
+		for (i = 0; i < info->queue_region_number; i++) {
+			for (j = 0; j < info->region[i].flowtype_num; j++) {
+				if (conf_ptr->hw_flowtype ==
+					info->region[i].hw_flowtype[j]) {
+					flowtype_set = 1;
+					break;
+				}
+			}
+		}
+
+		if (flowtype_set) {
+			PMD_INIT_LOG(ERR, "that hw_flowtype "
+				"has been set before");
+			return ret;
+		}
+		flowtype_index = info->region[region_index].flowtype_num;
+		info->region[region_index].hw_flowtype[flowtype_index] =
+						conf_ptr->hw_flowtype;
+		info->region[region_index].flowtype_num++;
+	} else  {
+		PMD_INIT_LOG(ERR, "there is no that region id been set before");
+		return ret;
+	}
+
+	index = conf_ptr->hw_flowtype >> 3;
+	pfqf_hregion = i40e_read_rx_ctl(hw, I40E_PFQF_HREGION(index));
+
+	if ((conf_ptr->hw_flowtype & 0x7) == 0) {
+		pfqf_hregion |= conf_ptr->region_id <<
+				I40E_PFQF_HREGION_REGION_0_SHIFT;
+		pfqf_hregion |= 1 << I40E_PFQF_HREGION_OVERRIDE_ENA_0_SHIFT;
+	} else if ((conf_ptr->hw_flowtype & 0x7) == 1) {
+		pfqf_hregion |= conf_ptr->region_id <<
+				I40E_PFQF_HREGION_REGION_1_SHIFT;
+		pfqf_hregion |= 1 << I40E_PFQF_HREGION_OVERRIDE_ENA_1_SHIFT;
+	} else if ((conf_ptr->hw_flowtype & 0x7) == 2) {
+		pfqf_hregion |= conf_ptr->region_id <<
+				I40E_PFQF_HREGION_REGION_2_SHIFT;
+		pfqf_hregion |= 1 << I40E_PFQF_HREGION_OVERRIDE_ENA_2_SHIFT;
+	} else if ((conf_ptr->hw_flowtype & 0x7) == 3) {
+		pfqf_hregion |= conf_ptr->region_id <<
+				I40E_PFQF_HREGION_REGION_3_SHIFT;
+		pfqf_hregion |= 1 << I40E_PFQF_HREGION_OVERRIDE_ENA_3_SHIFT;
+	} else if ((conf_ptr->hw_flowtype & 0x7) == 4) {
+		pfqf_hregion |= conf_ptr->region_id <<
+				I40E_PFQF_HREGION_REGION_4_SHIFT;
+		pfqf_hregion |= 1 << I40E_PFQF_HREGION_OVERRIDE_ENA_4_SHIFT;
+	} else if ((conf_ptr->hw_flowtype & 0x7) == 5) {
+		pfqf_hregion |= conf_ptr->region_id <<
+				I40E_PFQF_HREGION_REGION_5_SHIFT;
+		pfqf_hregion |= 1 << I40E_PFQF_HREGION_OVERRIDE_ENA_5_SHIFT;
+	} else if ((conf_ptr->hw_flowtype & 0x7) == 6) {
+		pfqf_hregion |= conf_ptr->region_id <<
+				I40E_PFQF_HREGION_REGION_6_SHIFT;
+		pfqf_hregion |= 1 << I40E_PFQF_HREGION_OVERRIDE_ENA_6_SHIFT;
+	} else {
+		pfqf_hregion |= conf_ptr->region_id <<
+				I40E_PFQF_HREGION_REGION_7_SHIFT;
+		pfqf_hregion |= 1 << I40E_PFQF_HREGION_OVERRIDE_ENA_7_SHIFT;
+	}
+
+	i40e_write_rx_ctl(hw, I40E_PFQF_HREGION(index), pfqf_hregion);
+
+	return 0;
+}
+
+static int
+i40e_set_up_region(struct i40e_pf *pf,
+				struct rte_i40e_rss_region_conf *conf_ptr)
+{
+	struct i40e_queue_region_info *info = &pf->queue_region;
+	uint32_t ret = -EINVAL;
+	uint16_t i, j, region_index, up_set = 0;
+
+	if (conf_ptr->user_priority > I40E_REGION_USERPRIORITY_MAX_INDEX) {
+		PMD_INIT_LOG(ERR, "the queue region max index is 7");
+		return ret;
+	}
+
+	if (conf_ptr->region_id >= I40E_REGION_MAX_INDEX) {
+		PMD_INIT_LOG(ERR, "the region_id max index is 7");
+		return ret;
+	}
+
+	if (info->queue_region_number) {
+		for (i = 0; i < info->queue_region_number; i++)
+			if (conf_ptr->region_id == info->region[i].region_id)
+				break;
+
+		if (i == info->queue_region_number) {
+			PMD_INIT_LOG(ERR, "that region id "
+				"has not been set before");
+			return ret;
+		}
+		region_index = i;
+
+		for (i = 0; i < info->queue_region_number; i++) {
+			for (j = 0; j <
+				info->region[i].user_priority_num; j++) {
+				if (info->region[i].user_priority[j] ==
+					conf_ptr->user_priority) {
+					up_set = 1;
+					break;
+				}
+			}
+		}
+
+		if (up_set) {
+			PMD_INIT_LOG(ERR, "that user priority "
+				"has been set before");
+			return ret;
+		}
+
+		j = info->region[region_index].user_priority_num;
+		info->region[region_index].user_priority[j] =
+						conf_ptr->user_priority;
+		info->region[region_index].user_priority_num++;
+	} else {
+		PMD_INIT_LOG(ERR, "there is no that region id been set before");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int
+i40e_queue_region_dcb_configure(struct i40e_hw *hw,
+				struct i40e_pf *pf)
+{
+	struct i40e_dcbx_config dcb_cfg_local;
+	struct i40e_dcbx_config *dcb_cfg;
+	struct i40e_queue_region_info *info = &pf->queue_region;
+	struct i40e_dcbx_config *old_cfg = &hw->local_dcbx_config;
+	uint32_t ret = -EINVAL;
+	uint16_t i, j, prio_index, region_index;
+	uint8_t tc_map, tc_bw, bw_lf;
+
+	if (!info->queue_region_number) {
+		PMD_INIT_LOG(ERR, "there is no that region id been set before");
+		return ret;
+	}
+
+	dcb_cfg = &dcb_cfg_local;
+	memset(dcb_cfg, 0, sizeof(struct i40e_dcbx_config));
+
+	/* assume each tc has the same bw */
+	tc_bw = I40E_MAX_PERCENT / info->queue_region_number;
+	for (i = 0; i < info->queue_region_number; i++)
+		dcb_cfg->etscfg.tcbwtable[i] = tc_bw;
+	/* to ensure the sum of tcbw is equal to 100 */
+	bw_lf = I40E_MAX_PERCENT %  info->queue_region_number;
+	for (i = 0; i < bw_lf; i++)
+		dcb_cfg->etscfg.tcbwtable[i]++;
+
+	/* assume each tc has the same Transmission Selection Algorithm */
+	for (i = 0; i < info->queue_region_number; i++)
+		dcb_cfg->etscfg.tsatable[i] = I40E_IEEE_TSA_ETS;
+
+	for (i = 0; i < info->queue_region_number; i++) {
+		for (j = 0; j < info->region[i].user_priority_num; j++) {
+			prio_index = info->region[i].user_priority[j];
+			region_index = info->region[i].region_id;
+			dcb_cfg->etscfg.prioritytable[prio_index] =
+						region_index;
+		}
+	}
+
+	/* FW needs one App to configure HW */
+	dcb_cfg->numapps = I40E_DEFAULT_DCB_APP_NUM;
+	dcb_cfg->app[0].selector = I40E_APP_SEL_ETHTYPE;
+	dcb_cfg->app[0].priority = I40E_DEFAULT_DCB_APP_PRIO;
+	dcb_cfg->app[0].protocolid = I40E_APP_PROTOID_FCOE;
+
+	tc_map = RTE_LEN2MASK(info->queue_region_number, uint8_t);
+
+	dcb_cfg->pfc.willing = 0;
+	dcb_cfg->pfc.pfccap = I40E_MAX_TRAFFIC_CLASS;
+	dcb_cfg->pfc.pfcenable = tc_map;
+
+	/* Copy the new config to the current config */
+	*old_cfg = *dcb_cfg;
+	old_cfg->etsrec = old_cfg->etscfg;
+	ret = i40e_set_dcb_config(hw);
+
+	if (ret) {
+		PMD_INIT_LOG(ERR, "Set queue region DCB Config failed, err"
+			" %s aq_err %s",
+			 i40e_stat_str(hw, ret),
+			 i40e_aq_str(hw, hw->aq.asq_last_status));
+		return ret;
+	}
+
+	return 0;
+}
+
+static int
+i40e_queue_region_dcb_configure_default(struct i40e_hw *hw)
+{
+	uint16_t i;
+	uint32_t ret = -EINVAL;
+
+	memset(&hw->local_dcbx_config, 0,
+	sizeof(struct i40e_dcbx_config));
+	/* set dcb default configuration */
+	hw->local_dcbx_config.etscfg.willing = 0;
+	hw->local_dcbx_config.etscfg.maxtcs = 0;
+	hw->local_dcbx_config.etscfg.tcbwtable[0] = 100;
+	hw->local_dcbx_config.etscfg.tsatable[0] =
+			I40E_IEEE_TSA_ETS;
+	/* all UPs mapping to region 0 */
+	for (i = 0; i < I40E_MAX_USER_PRIORITY; i++)
+		hw->local_dcbx_config.etscfg.prioritytable[i] = 0;
+	hw->local_dcbx_config.etsrec =
+		hw->local_dcbx_config.etscfg;
+	hw->local_dcbx_config.pfc.willing = 0;
+	hw->local_dcbx_config.pfc.pfccap =
+			I40E_MAX_TRAFFIC_CLASS;
+	/* FW needs one App to configure HW */
+	hw->local_dcbx_config.numapps = 1;
+	hw->local_dcbx_config.app[0].selector =
+			I40E_APP_SEL_ETHTYPE;
+	hw->local_dcbx_config.app[0].priority = 3;
+	hw->local_dcbx_config.app[0].protocolid =
+			I40E_APP_PROTOID_FCOE;
+	ret = i40e_set_dcb_config(hw);
+	if (ret) {
+		PMD_INIT_LOG(ERR,
+		"default dcb config fails. err = %d, aq_err = %d.",
+			ret, hw->aq.asq_last_status);
+		return ret;
+	}
+
+	return 0;
+}
+
+int
+i40e_flush_region_all_conf(struct i40e_hw *hw, struct i40e_pf *pf,
+				uint16_t on)
+{
+	uint16_t i;
+	struct i40e_queue_region_info *info = &pf->queue_region;
+
+	if (on) {
+		i40e_vsi_update_queue_region_mapping(hw, pf);
+		i40e_queue_region_dcb_configure(hw, pf);
+		return 0;
+	}
+
+	info->queue_region_number = 1;
+	info->region[0].queue_num = 64;
+	info->region[0].queue_start_index = 0;
+
+	i40e_vsi_update_queue_region_mapping(hw, pf);
+	i40e_queue_region_dcb_configure_default(hw);
+
+	for (i = 0; i < I40E_PFQF_HREGION_MAX_INDEX; i++)
+		i40e_write_rx_ctl(hw, I40E_PFQF_HREGION(i), 0);
+
+	memset(info, 0, sizeof(struct i40e_queue_region_info));
+
+	return 0;
+}
+
+int rte_pmd_i40e_queue_region_conf(uint8_t port,
+			struct rte_i40e_rss_region_conf *conf_ptr)
+{
+	struct rte_eth_dev *dev = &rte_eth_devices[port];
+	struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
+	struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+	enum rte_pmd_i40e_queue_region_op op_type = conf_ptr->op;
+	uint32_t ret;
+
+	if (!is_i40e_supported(dev))
+		return -ENOTSUP;
+
+	switch (op_type) {
+	case RTE_PMD_I40E_QUEUE_REGION_SET:
+		ret = i40e_set_queue_region(pf, conf_ptr);
+		break;
+	case RTE_PMD_I40E_REGION_FLOWTYPE_PF_SET:
+		ret = i40e_set_region_flowtype_pf(hw, pf, conf_ptr);
+		break;
+	case RTE_PMD_I40E_REGION_FLOWTYPE_VF_SET:
+		ret = -EINVAL;
+		break;
+	case RTE_PMD_I40E_UP_REGION_SET:
+		ret = i40e_set_up_region(pf, conf_ptr);
+		break;
+	case RTE_PMD_I40E_REGION_ALL_FLUSH_ON:
+		ret = i40e_flush_region_all_conf(hw, pf, 1);
+		break;
+	case RTE_PMD_I40E_REGION_ALL_FLUSH_OFF:
+		ret = i40e_flush_region_all_conf(hw, pf, 0);
+		break;
+
+	default:
+		PMD_DRV_LOG(WARNING, "op type (%d) not supported",
+			    op_type);
+		ret = -EINVAL;
+		break;
+	}
+
+	I40E_WRITE_FLUSH(hw);
+
+	return ret;
+}
diff --git a/drivers/net/i40e/rte_pmd_i40e.h b/drivers/net/i40e/rte_pmd_i40e.h
index 155b7e8..a1329f4 100644
--- a/drivers/net/i40e/rte_pmd_i40e.h
+++ b/drivers/net/i40e/rte_pmd_i40e.h
@@ -91,6 +91,20 @@  enum rte_pmd_i40e_package_info {
 	RTE_PMD_I40E_PKG_INFO_MAX = 0xFFFFFFFF
 };
 
+/**
+ *  Option types of queue region.
+ */
+enum rte_pmd_i40e_queue_region_op {
+	RTE_PMD_I40E_REGION_UNDEFINED = 0,
+	RTE_PMD_I40E_QUEUE_REGION_SET,      /**< add queue region set*/
+	RTE_PMD_I40E_REGION_FLOWTYPE_PF_SET,   /**< add pf region pctype set */
+	RTE_PMD_I40E_REGION_FLOWTYPE_VF_SET,   /**< add vf region pctype set */
+	RTE_PMD_I40E_UP_REGION_SET,   /**< add queue region pctype set */
+	RTE_PMD_I40E_REGION_ALL_FLUSH_ON,   /**< flush on all configuration */
+	RTE_PMD_I40E_REGION_ALL_FLUSH_OFF,   /**< flush off all configuration */
+	RTE_PMD_I40E_QUEUE_REGION_OP_MAX
+};
+
 #define RTE_PMD_I40E_DDP_NAME_SIZE 32
 
 /**
@@ -146,6 +160,18 @@  struct rte_pmd_i40e_ptype_mapping {
 };
 
 /**
+ * Queue region information get from CLI.
+ */
+struct rte_i40e_rss_region_conf {
+	uint8_t region_id;
+	uint8_t hw_flowtype;
+	uint8_t queue_start_index;
+	uint8_t queue_num;
+	uint8_t user_priority;
+	enum rte_pmd_i40e_queue_region_op  op;
+};
+
+/**
  * Notify VF when PF link status changes.
  *
  * @param port
@@ -657,4 +683,16 @@  int rte_pmd_i40e_ptype_mapping_replace(uint8_t port,
 int rte_pmd_i40e_add_vf_mac_addr(uint8_t port, uint16_t vf_id,
 				 struct ether_addr *mac_addr);
 
+/**
+ * Get RSS queue region info from CLI and do configuration for
+ * that port as the command otion type
+ *
+ * @param port
+ *    pointer to port identifier of the device
+ * @param conf_ptr
+ *    pointer to the struct that contain all the
+ *    region configuration parameters
+ */
+int rte_pmd_i40e_queue_region_conf(uint8_t port,
+		struct rte_i40e_rss_region_conf *conf_ptr);
 #endif /* _PMD_I40E_H_ */
diff --git a/drivers/net/i40e/rte_pmd_i40e_version.map b/drivers/net/i40e/rte_pmd_i40e_version.map
index ef8882b..c3ee2da 100644
--- a/drivers/net/i40e/rte_pmd_i40e_version.map
+++ b/drivers/net/i40e/rte_pmd_i40e_version.map
@@ -50,5 +50,6 @@  DPDK_17.11 {
 	global:
 
 	rte_pmd_i40e_add_vf_mac_addr;
+	rte_pmd_i40e_queue_region_conf;
 
 } DPDK_17.08;