[dpdk-dev,v3,7/7] app/testpmd: adjust ethdev port ownership

Message ID 1516293317-30748-8-git-send-email-matan@mellanox.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation fail apply patch file failure

Commit Message

Matan Azrad Jan. 18, 2018, 4:35 p.m. UTC
  Testpmd should not use ethdev ports which are managed by other DPDK
entities.

Set Testpmd ownership to each port which is not used by other entity and
prevent any usage of ethdev ports which are not owned by Testpmd.

Signed-off-by: Matan Azrad <matan@mellanox.com>
---
 app/test-pmd/cmdline.c      | 89 +++++++++++++++++++--------------------------
 app/test-pmd/cmdline_flow.c |  2 +-
 app/test-pmd/config.c       | 37 ++++++++++---------
 app/test-pmd/parameters.c   |  4 +-
 app/test-pmd/testpmd.c      | 63 ++++++++++++++++++++------------
 app/test-pmd/testpmd.h      |  3 ++
 6 files changed, 103 insertions(+), 95 deletions(-)
  

Comments

Ananyev, Konstantin Jan. 19, 2018, 12:37 p.m. UTC | #1
Hi Matan,

> -----Original Message-----
> From: Matan Azrad [mailto:matan@mellanox.com]
> Sent: Thursday, January 18, 2018 4:35 PM
> To: Thomas Monjalon <thomas@monjalon.net>; Gaetan Rivet <gaetan.rivet@6wind.com>; Wu, Jingjing <jingjing.wu@intel.com>
> Cc: dev@dpdk.org; Neil Horman <nhorman@tuxdriver.com>; Richardson, Bruce <bruce.richardson@intel.com>; Ananyev, Konstantin
> <konstantin.ananyev@intel.com>
> Subject: [PATCH v3 7/7] app/testpmd: adjust ethdev port ownership
> 
> Testpmd should not use ethdev ports which are managed by other DPDK
> entities.
> 
> Set Testpmd ownership to each port which is not used by other entity and
> prevent any usage of ethdev ports which are not owned by Testpmd.
> 
> Signed-off-by: Matan Azrad <matan@mellanox.com>
> ---
>  app/test-pmd/cmdline.c      | 89 +++++++++++++++++++--------------------------
>  app/test-pmd/cmdline_flow.c |  2 +-
>  app/test-pmd/config.c       | 37 ++++++++++---------
>  app/test-pmd/parameters.c   |  4 +-
>  app/test-pmd/testpmd.c      | 63 ++++++++++++++++++++------------
>  app/test-pmd/testpmd.h      |  3 ++
>  6 files changed, 103 insertions(+), 95 deletions(-)
> 
> diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
> index 31919ba..6199c64 100644
> --- a/app/test-pmd/cmdline.c
> +++ b/app/test-pmd/cmdline.c
> @@ -1394,7 +1394,7 @@ struct cmd_config_speed_all {
>  			&link_speed) < 0)
>  		return;
> 
> -	RTE_ETH_FOREACH_DEV(pid) {
> +	RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id) {

Why do we need all these changes?
As I understand you changed definition of RTE_ETH_FOREACH_DEV(),
so no testpmd should work ok default (no_owner case).
Am I missing something here? 
Konstantin

>  		ports[pid].dev_conf.link_speeds = link_speed;
>  	}
> 
> @@ -1902,7 +1902,7 @@ struct cmd_config_rss {
>  	struct cmd_config_rss *res = parsed_result;
>  	struct rte_eth_rss_conf rss_conf = { .rss_key_len = 0, };
>  	int diag;
> -	uint8_t i;
> +	uint16_t pid;
> 
>  	if (!strcmp(res->value, "all"))
>  		rss_conf.rss_hf = ETH_RSS_IP | ETH_RSS_TCP |
> @@ -1936,12 +1936,12 @@ struct cmd_config_rss {
>  		return;
>  	}
>  	rss_conf.rss_key = NULL;
> -	for (i = 0; i < rte_eth_dev_count(); i++) {
> -		diag = rte_eth_dev_rss_hash_update(i, &rss_conf);
> +	RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id) {
> +		diag = rte_eth_dev_rss_hash_update(pid, &rss_conf);
>  		if (diag < 0)
>  			printf("Configuration of RSS hash at ethernet port %d "
>  				"failed with error (%d): %s.\n",
> -				i, -diag, strerror(-diag));
> +				pid, -diag, strerror(-diag));
>  	}
>  }
> 
> @@ -3686,10 +3686,9 @@ struct cmd_csum_result {
>  	uint64_t csum_offloads = 0;
>  	struct rte_eth_dev_info dev_info;
> 
> -	if (port_id_is_invalid(res->port_id, ENABLED_WARN)) {
> -		printf("invalid port %d\n", res->port_id);
> +	if (port_id_is_invalid(res->port_id, ENABLED_WARN))
>  		return;
> -	}
> +
>  	if (!port_is_stopped(res->port_id)) {
>  		printf("Please stop port %d first\n", res->port_id);
>  		return;
> @@ -4364,8 +4363,8 @@ struct cmd_gso_show_result {
>  {
>  	struct cmd_gso_show_result *res = parsed_result;
> 
> -	if (!rte_eth_dev_is_valid_port(res->cmd_pid)) {
> -		printf("invalid port id %u\n", res->cmd_pid);
> +	if (port_id_is_invalid(res->cmd_pid, ENABLED_WARN)) {
> +		printf("invalid/not owned port id %u\n", res->cmd_pid);
>  		return;
>  	}
>  	if (!strcmp(res->cmd_keyword, "gso")) {
> @@ -5375,7 +5374,12 @@ static void cmd_create_bonded_device_parsed(void *parsed_result,
>  				port_id);
> 
>  		/* Update number of ports */
> -		nb_ports = rte_eth_dev_count();
> +		if (rte_eth_dev_owner_set(port_id, &my_owner) != 0) {
> +			printf("Error: cannot own new attached port %d\n",
> +			       port_id);
> +			return;
> +		}
> +		nb_ports++;
>  		reconfig(port_id, res->socket);
>  		rte_eth_promiscuous_enable(port_id);
>  	}
> @@ -5484,10 +5488,8 @@ static void cmd_set_bond_mon_period_parsed(void *parsed_result,
>  	struct cmd_set_bond_mon_period_result *res = parsed_result;
>  	int ret;
> 
> -	if (res->port_num >= nb_ports) {
> -		printf("Port id %d must be less than %d\n", res->port_num, nb_ports);
> +	if (port_id_is_invalid(res->port_num, ENABLED_WARN))
>  		return;
> -	}
> 
>  	ret = rte_eth_bond_link_monitoring_set(res->port_num, res->period_ms);
> 
> @@ -5545,11 +5547,8 @@ struct cmd_set_bonding_agg_mode_policy_result {
>  	struct cmd_set_bonding_agg_mode_policy_result *res = parsed_result;
>  	uint8_t policy = AGG_BANDWIDTH;
> 
> -	if (res->port_num >= nb_ports) {
> -		printf("Port id %d must be less than %d\n",
> -				res->port_num, nb_ports);
> +	if (port_id_is_invalid(res->port_num, ENABLED_WARN))
>  		return;
> -	}
> 
>  	if (!strcmp(res->policy, "bandwidth"))
>  		policy = AGG_BANDWIDTH;
> @@ -5808,7 +5807,7 @@ static void cmd_set_promisc_mode_parsed(void *parsed_result,
> 
>  	/* all ports */
>  	if (allports) {
> -		RTE_ETH_FOREACH_DEV(i) {
> +		RTE_ETH_FOREACH_DEV_OWNED_BY(i, my_owner.id) {
>  			if (enable)
>  				rte_eth_promiscuous_enable(i);
>  			else
> @@ -5888,7 +5887,7 @@ static void cmd_set_allmulti_mode_parsed(void *parsed_result,
> 
>  	/* all ports */
>  	if (allports) {
> -		RTE_ETH_FOREACH_DEV(i) {
> +		RTE_ETH_FOREACH_DEV_OWNED_BY(i, my_owner.id) {
>  			if (enable)
>  				rte_eth_allmulticast_enable(i);
>  			else
> @@ -6622,31 +6621,31 @@ static void cmd_showportall_parsed(void *parsed_result,
>  	struct cmd_showportall_result *res = parsed_result;
>  	if (!strcmp(res->show, "clear")) {
>  		if (!strcmp(res->what, "stats"))
> -			RTE_ETH_FOREACH_DEV(i)
> +			RTE_ETH_FOREACH_DEV_OWNED_BY(i, my_owner.id)
>  				nic_stats_clear(i);
>  		else if (!strcmp(res->what, "xstats"))
> -			RTE_ETH_FOREACH_DEV(i)
> +			RTE_ETH_FOREACH_DEV_OWNED_BY(i, my_owner.id)
>  				nic_xstats_clear(i);
>  	} else if (!strcmp(res->what, "info"))
> -		RTE_ETH_FOREACH_DEV(i)
> +		RTE_ETH_FOREACH_DEV_OWNED_BY(i, my_owner.id)
>  			port_infos_display(i);
>  	else if (!strcmp(res->what, "stats"))
> -		RTE_ETH_FOREACH_DEV(i)
> +		RTE_ETH_FOREACH_DEV_OWNED_BY(i, my_owner.id)
>  			nic_stats_display(i);
>  	else if (!strcmp(res->what, "xstats"))
> -		RTE_ETH_FOREACH_DEV(i)
> +		RTE_ETH_FOREACH_DEV_OWNED_BY(i, my_owner.id)
>  			nic_xstats_display(i);
>  	else if (!strcmp(res->what, "fdir"))
> -		RTE_ETH_FOREACH_DEV(i)
> +		RTE_ETH_FOREACH_DEV_OWNED_BY(i, my_owner.id)
>  			fdir_get_infos(i);
>  	else if (!strcmp(res->what, "stat_qmap"))
> -		RTE_ETH_FOREACH_DEV(i)
> +		RTE_ETH_FOREACH_DEV_OWNED_BY(i, my_owner.id)
>  			nic_stats_mapping_display(i);
>  	else if (!strcmp(res->what, "dcb_tc"))
> -		RTE_ETH_FOREACH_DEV(i)
> +		RTE_ETH_FOREACH_DEV_OWNED_BY(i, my_owner.id)
>  			port_dcb_info_display(i);
>  	else if (!strcmp(res->what, "cap"))
> -		RTE_ETH_FOREACH_DEV(i)
> +		RTE_ETH_FOREACH_DEV_OWNED_BY(i, my_owner.id)
>  			port_offload_cap_display(i);
>  }
> 
> @@ -10698,10 +10697,8 @@ struct cmd_flow_director_mask_result {
>  	struct rte_eth_fdir_masks *mask;
>  	struct rte_port *port;
> 
> -	if (res->port_id > nb_ports) {
> -		printf("Invalid port, range is [0, %d]\n", nb_ports - 1);
> +	if (port_id_is_invalid(res->port_id, ENABLED_WARN))
>  		return;
> -	}
> 
>  	port = &ports[res->port_id];
>  	/** Check if the port is not started **/
> @@ -10899,10 +10896,8 @@ struct cmd_flow_director_flex_mask_result {
>  	uint16_t i;
>  	int ret;
> 
> -	if (res->port_id > nb_ports) {
> -		printf("Invalid port, range is [0, %d]\n", nb_ports - 1);
> +	if (port_id_is_invalid(res->port_id, ENABLED_WARN))
>  		return;
> -	}
> 
>  	port = &ports[res->port_id];
>  	/** Check if the port is not started **/
> @@ -11053,12 +11048,10 @@ struct cmd_flow_director_flexpayload_result {
>  	struct cmd_flow_director_flexpayload_result *res = parsed_result;
>  	struct rte_eth_flex_payload_cfg flex_cfg;
>  	struct rte_port *port;
> -	int ret = 0;
> +	int ret;
> 
> -	if (res->port_id > nb_ports) {
> -		printf("Invalid port, range is [0, %d]\n", nb_ports - 1);
> +	if (port_id_is_invalid(res->port_id, ENABLED_WARN))
>  		return;
> -	}
> 
>  	port = &ports[res->port_id];
>  	/** Check if the port is not started **/
> @@ -11774,7 +11767,7 @@ struct cmd_config_l2_tunnel_eth_type_result {
>  	entry.l2_tunnel_type = str2fdir_l2_tunnel_type(res->l2_tunnel_type);
>  	entry.ether_type = res->eth_type_val;
> 
> -	RTE_ETH_FOREACH_DEV(pid) {
> +	RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id) {
>  		rte_eth_dev_l2_tunnel_eth_type_conf(pid, &entry);
>  	}
>  }
> @@ -11890,7 +11883,7 @@ struct cmd_config_l2_tunnel_en_dis_result {
>  	else
>  		en = 0;
> 
> -	RTE_ETH_FOREACH_DEV(pid) {
> +	RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id) {
>  		rte_eth_dev_l2_tunnel_offload_set(pid,
>  						  &entry,
>  						  ETH_L2_TUNNEL_ENABLE_MASK,
> @@ -14440,10 +14433,8 @@ struct cmd_ddp_add_result {
>  	int file_num;
>  	int ret = -ENOTSUP;
> 
> -	if (res->port_id > nb_ports) {
> -		printf("Invalid port, range is [0, %d]\n", nb_ports - 1);
> +	if (port_id_is_invalid(res->port_id, ENABLED_WARN))
>  		return;
> -	}
> 
>  	if (!all_ports_stopped()) {
>  		printf("Please stop all ports first\n");
> @@ -14522,10 +14513,8 @@ struct cmd_ddp_del_result {
>  	uint32_t size;
>  	int ret = -ENOTSUP;
> 
> -	if (res->port_id > nb_ports) {
> -		printf("Invalid port, range is [0, %d]\n", nb_ports - 1);
> +	if (port_id_is_invalid(res->port_id, ENABLED_WARN))
>  		return;
> -	}
> 
>  	if (!all_ports_stopped()) {
>  		printf("Please stop all ports first\n");
> @@ -14837,10 +14826,8 @@ struct cmd_ddp_get_list_result {
>  #endif
>  	int ret = -ENOTSUP;
> 
> -	if (res->port_id > nb_ports) {
> -		printf("Invalid port, range is [0, %d]\n", nb_ports - 1);
> +	if (port_id_is_invalid(res->port_id, ENABLED_WARN))
>  		return;
> -	}
> 
>  #ifdef RTE_LIBRTE_I40E_PMD
>  	size = PROFILE_INFO_SIZE * MAX_PROFILE_NUM + 4;
> @@ -16296,7 +16283,7 @@ struct cmd_cmdfile_result {
>  	if (id == (portid_t)RTE_PORT_ALL) {
>  		portid_t pid;
> 
> -		RTE_ETH_FOREACH_DEV(pid) {
> +		RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id) {
>  			/* check if need_reconfig has been set to 1 */
>  			if (ports[pid].need_reconfig == 0)
>  				ports[pid].need_reconfig = dev;
> diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
> index 561e057..e55490f 100644
> --- a/app/test-pmd/cmdline_flow.c
> +++ b/app/test-pmd/cmdline_flow.c
> @@ -2652,7 +2652,7 @@ static int comp_vc_action_rss_queue(struct context *, const struct token *,
> 
>  	(void)ctx;
>  	(void)token;
> -	RTE_ETH_FOREACH_DEV(p) {
> +	RTE_ETH_FOREACH_DEV_OWNED_BY(p, my_owner.id) {
>  		if (buf && i == ent)
>  			return snprintf(buf, size, "%u", p);
>  		++i;
> diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
> index 957b820..43b9a7d 100644
> --- a/app/test-pmd/config.c
> +++ b/app/test-pmd/config.c
> @@ -156,7 +156,7 @@ struct rss_type_info {
> 
>  	if (port_id_is_invalid(port_id, ENABLED_WARN)) {
>  		printf("Valid port range is [0");
> -		RTE_ETH_FOREACH_DEV(pid)
> +		RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id)
>  			printf(", %d", pid);
>  		printf("]\n");
>  		return;
> @@ -236,7 +236,7 @@ struct rss_type_info {
> 
>  	if (port_id_is_invalid(port_id, ENABLED_WARN)) {
>  		printf("Valid port range is [0");
> -		RTE_ETH_FOREACH_DEV(pid)
> +		RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id)
>  			printf(", %d", pid);
>  		printf("]\n");
>  		return;
> @@ -253,10 +253,9 @@ struct rss_type_info {
>  	struct rte_eth_xstat_name *xstats_names;
> 
>  	printf("###### NIC extended statistics for port %-2d\n", port_id);
> -	if (!rte_eth_dev_is_valid_port(port_id)) {
> -		printf("Error: Invalid port number %i\n", port_id);
> +
> +	if (port_id_is_invalid(port_id, ENABLED_WARN))
>  		return;
> -	}
> 
>  	/* Get count */
>  	cnt_xstats = rte_eth_xstats_get_names(port_id, NULL, 0);
> @@ -321,7 +320,7 @@ struct rss_type_info {
> 
>  	if (port_id_is_invalid(port_id, ENABLED_WARN)) {
>  		printf("Valid port range is [0");
> -		RTE_ETH_FOREACH_DEV(pid)
> +		RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id)
>  			printf(", %d", pid);
>  		printf("]\n");
>  		return;
> @@ -439,7 +438,7 @@ struct rss_type_info {
> 
>  	if (port_id_is_invalid(port_id, ENABLED_WARN)) {
>  		printf("Valid port range is [0");
> -		RTE_ETH_FOREACH_DEV(pid)
> +		RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id)
>  			printf(", %d", pid);
>  		printf("]\n");
>  		return;
> @@ -756,10 +755,15 @@ struct rss_type_info {
>  int
>  port_id_is_invalid(portid_t port_id, enum print_warning warning)
>  {
> +	struct rte_eth_dev_owner owner;
> +	int ret;
> +
>  	if (port_id == (portid_t)RTE_PORT_ALL)
>  		return 0;
> 
> -	if (rte_eth_dev_is_valid_port(port_id))
> +	ret = rte_eth_dev_owner_get(port_id, &owner);
> +
> +	if (ret == 0 && owner.id == my_owner.id)
>  		return 0;
> 
>  	if (warning == ENABLED_WARN)
> @@ -2373,7 +2377,7 @@ struct igb_ring_desc_16_bytes {
>  		return;
>  	}
>  	nb_pt = 0;
> -	RTE_ETH_FOREACH_DEV(i) {
> +	RTE_ETH_FOREACH_DEV_OWNED_BY(i, my_owner.id) {
>  		if (! ((uint64_t)(1ULL << i) & portmask))
>  			continue;
>  		portlist[nb_pt++] = i;
> @@ -2512,10 +2516,9 @@ struct igb_ring_desc_16_bytes {
>  void
>  setup_gro(const char *onoff, portid_t port_id)
>  {
> -	if (!rte_eth_dev_is_valid_port(port_id)) {
> -		printf("invalid port id %u\n", port_id);
> +	if (port_id_is_invalid(port_id, ENABLED_WARN))
>  		return;
> -	}
> +
>  	if (test_done == 0) {
>  		printf("Before enable/disable GRO,"
>  				" please stop forwarding first\n");
> @@ -2574,10 +2577,9 @@ struct igb_ring_desc_16_bytes {
> 
>  	param = &gro_ports[port_id].param;
> 
> -	if (!rte_eth_dev_is_valid_port(port_id)) {
> -		printf("Invalid port id %u.\n", port_id);
> +	if (port_id_is_invalid(port_id, ENABLED_WARN))
>  		return;
> -	}
> +
>  	if (gro_ports[port_id].enable) {
>  		printf("GRO type: TCP/IPv4\n");
>  		if (gro_flush_cycles == GRO_DEFAULT_FLUSH_CYCLES) {
> @@ -2595,10 +2597,9 @@ struct igb_ring_desc_16_bytes {
>  void
>  setup_gso(const char *mode, portid_t port_id)
>  {
> -	if (!rte_eth_dev_is_valid_port(port_id)) {
> -		printf("invalid port id %u\n", port_id);
> +	if (port_id_is_invalid(port_id, ENABLED_WARN))
>  		return;
> -	}
> +
>  	if (strcmp(mode, "on") == 0) {
>  		if (test_done == 0) {
>  			printf("before enabling GSO,"
> diff --git a/app/test-pmd/parameters.c b/app/test-pmd/parameters.c
> index 878c112..0e57b46 100644
> --- a/app/test-pmd/parameters.c
> +++ b/app/test-pmd/parameters.c
> @@ -398,7 +398,7 @@
>  		if (port_id_is_invalid(port_id, ENABLED_WARN) ||
>  			port_id == (portid_t)RTE_PORT_ALL) {
>  			printf("Valid port range is [0");
> -			RTE_ETH_FOREACH_DEV(pid)
> +			RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id)
>  				printf(", %d", pid);
>  			printf("]\n");
>  			return -1;
> @@ -459,7 +459,7 @@
>  		if (port_id_is_invalid(port_id, ENABLED_WARN) ||
>  			port_id == (portid_t)RTE_PORT_ALL) {
>  			printf("Valid port range is [0");
> -			RTE_ETH_FOREACH_DEV(pid)
> +			RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id)
>  				printf(", %d", pid);
>  			printf("]\n");
>  			return -1;
> diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
> index c066cf9..83f5e84 100644
> --- a/app/test-pmd/testpmd.c
> +++ b/app/test-pmd/testpmd.c
> @@ -108,6 +108,11 @@
>  lcoreid_t nb_lcores;           /**< Number of probed logical cores. */
> 
>  /*
> + * My port owner structure used to own Ethernet ports.
> + */
> +struct rte_eth_dev_owner my_owner; /**< Unique owner. */
> +
> +/*
>   * Test Forwarding Configuration.
>   *    nb_fwd_lcores <= nb_cfg_lcores <= nb_lcores
>   *    nb_fwd_ports  <= nb_cfg_ports  <= nb_ports
> @@ -449,7 +454,7 @@ static int eth_event_callback(portid_t port_id,
>  	portid_t pt_id;
>  	int i = 0;
> 
> -	RTE_ETH_FOREACH_DEV(pt_id)
> +	RTE_ETH_FOREACH_DEV_OWNED_BY(pt_id, my_owner.id)
>  		fwd_ports_ids[i++] = pt_id;
> 
>  	nb_cfg_ports = nb_ports;
> @@ -573,7 +578,7 @@ static int eth_event_callback(portid_t port_id,
>  		fwd_lcores[lc_id]->cpuid_idx = lc_id;
>  	}
> 
> -	RTE_ETH_FOREACH_DEV(pid) {
> +	RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id) {
>  		port = &ports[pid];
>  		/* Apply default Tx configuration for all ports */
>  		port->dev_conf.txmode = tx_mode;
> @@ -706,7 +711,7 @@ static int eth_event_callback(portid_t port_id,
>  	queueid_t q;
> 
>  	/* set socket id according to numa or not */
> -	RTE_ETH_FOREACH_DEV(pid) {
> +	RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id) {
>  		port = &ports[pid];
>  		if (nb_rxq > port->dev_info.max_rx_queues) {
>  			printf("Fail: nb_rxq(%d) is greater than "
> @@ -1000,9 +1005,8 @@ static int eth_event_callback(portid_t port_id,
>  	uint64_t tics_per_1sec;
>  	uint64_t tics_datum;
>  	uint64_t tics_current;
> -	uint8_t idx_port, cnt_ports;
> +	uint16_t idx_port;
> 
> -	cnt_ports = rte_eth_dev_count();
>  	tics_datum = rte_rdtsc();
>  	tics_per_1sec = rte_get_timer_hz();
>  #endif
> @@ -1017,11 +1021,10 @@ static int eth_event_callback(portid_t port_id,
>  			tics_current = rte_rdtsc();
>  			if (tics_current - tics_datum >= tics_per_1sec) {
>  				/* Periodic bitrate calculation */
> -				for (idx_port = 0;
> -						idx_port < cnt_ports;
> -						idx_port++)
> +				RTE_ETH_FOREACH_DEV_OWNED_BY(idx_port,
> +							     my_owner.id)
>  					rte_stats_bitrate_calc(bitrate_data,
> -						idx_port);
> +							       idx_port);
>  				tics_datum = tics_current;
>  			}
>  		}
> @@ -1359,7 +1362,7 @@ static int eth_event_callback(portid_t port_id,
>  	portid_t pi;
>  	struct rte_port *port;
> 
> -	RTE_ETH_FOREACH_DEV(pi) {
> +	RTE_ETH_FOREACH_DEV_OWNED_BY(pi, my_owner.id) {
>  		port = &ports[pi];
>  		/* Check if there is a port which is not started */
>  		if ((port->port_status != RTE_PORT_STARTED) &&
> @@ -1387,7 +1390,7 @@ static int eth_event_callback(portid_t port_id,
>  {
>  	portid_t pi;
> 
> -	RTE_ETH_FOREACH_DEV(pi) {
> +	RTE_ETH_FOREACH_DEV_OWNED_BY(pi, my_owner.id) {
>  		if (!port_is_stopped(pi))
>  			return 0;
>  	}
> @@ -1434,7 +1437,7 @@ static int eth_event_callback(portid_t port_id,
> 
>  	if(dcb_config)
>  		dcb_test = 1;
> -	RTE_ETH_FOREACH_DEV(pi) {
> +	RTE_ETH_FOREACH_DEV_OWNED_BY(pi, my_owner.id) {
>  		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
>  			continue;
> 
> @@ -1620,7 +1623,7 @@ static int eth_event_callback(portid_t port_id,
> 
>  	printf("Stopping ports...\n");
> 
> -	RTE_ETH_FOREACH_DEV(pi) {
> +	RTE_ETH_FOREACH_DEV_OWNED_BY(pi, my_owner.id) {
>  		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
>  			continue;
> 
> @@ -1663,7 +1666,7 @@ static int eth_event_callback(portid_t port_id,
> 
>  	printf("Closing ports...\n");
> 
> -	RTE_ETH_FOREACH_DEV(pi) {
> +	RTE_ETH_FOREACH_DEV_OWNED_BY(pi, my_owner.id) {
>  		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
>  			continue;
> 
> @@ -1714,7 +1717,7 @@ static int eth_event_callback(portid_t port_id,
> 
>  	printf("Resetting ports...\n");
> 
> -	RTE_ETH_FOREACH_DEV(pi) {
> +	RTE_ETH_FOREACH_DEV_OWNED_BY(pi, my_owner.id) {
>  		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
>  			continue;
> 
> @@ -1759,6 +1762,12 @@ static int eth_event_callback(portid_t port_id,
>  	if (rte_eth_dev_attach(identifier, &pi))
>  		return;
> 
> +	if (rte_eth_dev_owner_set(pi, &my_owner) != 0) {
> +		printf("Error: cannot own new attached port %d\n", pi);
> +		return;
> +	}
> +	nb_ports++;
> +
>  	socket_id = (unsigned)rte_eth_dev_socket_id(pi);
>  	/* if socket_id is invalid, set to 0 */
>  	if (check_socket_id(socket_id) < 0)
> @@ -1766,8 +1775,6 @@ static int eth_event_callback(portid_t port_id,
>  	reconfig(pi, socket_id);
>  	rte_eth_promiscuous_enable(pi);
> 
> -	nb_ports = rte_eth_dev_count();
> -
>  	ports[pi].port_status = RTE_PORT_STOPPED;
> 
>  	printf("Port %d is attached. Now total ports is %d\n", pi, nb_ports);
> @@ -1781,6 +1788,9 @@ static int eth_event_callback(portid_t port_id,
> 
>  	printf("Detaching a port...\n");
> 
> +	if (port_id_is_invalid(port_id, ENABLED_WARN))
> +		return;
> +
>  	if (!port_is_closed(port_id)) {
>  		printf("Please close port first\n");
>  		return;
> @@ -1794,7 +1804,7 @@ static int eth_event_callback(portid_t port_id,
>  		return;
>  	}
> 
> -	nb_ports = rte_eth_dev_count();
> +	nb_ports--;
> 
>  	printf("Port '%s' is detached. Now total ports is %d\n",
>  			name, nb_ports);
> @@ -1812,7 +1822,7 @@ static int eth_event_callback(portid_t port_id,
> 
>  	if (ports != NULL) {
>  		no_link_check = 1;
> -		RTE_ETH_FOREACH_DEV(pt_id) {
> +		RTE_ETH_FOREACH_DEV_OWNED_BY(pt_id, my_owner.id) {
>  			printf("\nShutting down port %d...\n", pt_id);
>  			fflush(stdout);
>  			stop_port(pt_id);
> @@ -1844,7 +1854,7 @@ struct pmd_test_command {
>  	fflush(stdout);
>  	for (count = 0; count <= MAX_CHECK_TIME; count++) {
>  		all_ports_up = 1;
> -		RTE_ETH_FOREACH_DEV(portid) {
> +		RTE_ETH_FOREACH_DEV_OWNED_BY(portid, my_owner.id) {
>  			if ((port_mask & (1 << portid)) == 0)
>  				continue;
>  			memset(&link, 0, sizeof(link));
> @@ -1936,6 +1946,8 @@ struct pmd_test_command {
> 
>  	switch (type) {
>  	case RTE_ETH_EVENT_INTR_RMV:
> +		if (port_id_is_invalid(port_id, ENABLED_WARN))
> +			break;
>  		if (rte_eal_alarm_set(100000,
>  				rmv_event_callback, (void *)(intptr_t)port_id))
>  			fprintf(stderr, "Could not set up deferred device removal\n");
> @@ -2068,7 +2080,7 @@ struct pmd_test_command {
>  	portid_t pid;
>  	struct rte_port *port;
> 
> -	RTE_ETH_FOREACH_DEV(pid) {
> +	RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id) {
>  		port = &ports[pid];
>  		port->dev_conf.fdir_conf = fdir_conf;
>  		if (nb_rxq > 1) {
> @@ -2383,7 +2395,12 @@ uint8_t port_is_bonding_slave(portid_t slave_pid)
>  	rte_pdump_init(NULL);
>  #endif
> 
> -	nb_ports = (portid_t) rte_eth_dev_count();
> +	if (rte_eth_dev_owner_new(&my_owner.id))
> +		rte_panic("Failed to get unique owner identifier\n");
> +	snprintf(my_owner.name, sizeof(my_owner.name), TESTPMD_OWNER_NAME);
> +	RTE_ETH_FOREACH_DEV(port_id)
> +		if (rte_eth_dev_owner_set(port_id, &my_owner) == 0)
> +			nb_ports++;
>  	if (nb_ports == 0)
>  		TESTPMD_LOG(WARNING, "No probed ethernet devices\n");
> 
> @@ -2431,7 +2448,7 @@ uint8_t port_is_bonding_slave(portid_t slave_pid)
>  		rte_exit(EXIT_FAILURE, "Start ports failed\n");
> 
>  	/* set all ports to promiscuous mode by default */
> -	RTE_ETH_FOREACH_DEV(port_id)
> +	RTE_ETH_FOREACH_DEV_OWNED_BY(port_id, my_owner.id)
>  		rte_eth_promiscuous_enable(port_id);
> 
>  	/* Init metrics library */
> diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
> index 9c739e5..2d253b9 100644
> --- a/app/test-pmd/testpmd.h
> +++ b/app/test-pmd/testpmd.h
> @@ -50,6 +50,8 @@
>  #define NUMA_NO_CONFIG 0xFF
>  #define UMA_NO_CONFIG  0xFF
> 
> +#define TESTPMD_OWNER_NAME "TestPMD"
> +
>  typedef uint8_t  lcoreid_t;
>  typedef uint16_t portid_t;
>  typedef uint16_t queueid_t;
> @@ -361,6 +363,7 @@ struct queue_stats_mappings {
>   * nb_fwd_ports <= nb_cfg_ports <= nb_ports
>   */
>  extern portid_t nb_ports; /**< Number of ethernet ports probed at init time. */
> +extern struct rte_eth_dev_owner my_owner; /**< Unique owner. */
>  extern portid_t nb_cfg_ports; /**< Number of configured ports. */
>  extern portid_t nb_fwd_ports; /**< Number of forwarding ports. */
>  extern portid_t fwd_ports_ids[RTE_MAX_ETHPORTS];
> --
> 1.8.3.1
  
Matan Azrad Jan. 19, 2018, 12:51 p.m. UTC | #2
Hi Konstantin

From: Ananyev, Konstantin, Friday, January 19, 2018 2:38 PM
> To: Matan Azrad <matan@mellanox.com>; Thomas Monjalon
> <thomas@monjalon.net>; Gaetan Rivet <gaetan.rivet@6wind.com>; Wu,
> Jingjing <jingjing.wu@intel.com>
> Cc: dev@dpdk.org; Neil Horman <nhorman@tuxdriver.com>; Richardson,
> Bruce <bruce.richardson@intel.com>
> Subject: RE: [PATCH v3 7/7] app/testpmd: adjust ethdev port ownership
> 
> Hi Matan,
> 
> > -----Original Message-----
> > From: Matan Azrad [mailto:matan@mellanox.com]
> > Sent: Thursday, January 18, 2018 4:35 PM
> > To: Thomas Monjalon <thomas@monjalon.net>; Gaetan Rivet
> > <gaetan.rivet@6wind.com>; Wu, Jingjing <jingjing.wu@intel.com>
> > Cc: dev@dpdk.org; Neil Horman <nhorman@tuxdriver.com>; Richardson,
> > Bruce <bruce.richardson@intel.com>; Ananyev, Konstantin
> > <konstantin.ananyev@intel.com>
> > Subject: [PATCH v3 7/7] app/testpmd: adjust ethdev port ownership
> >
> > Testpmd should not use ethdev ports which are managed by other DPDK
> > entities.
> >
> > Set Testpmd ownership to each port which is not used by other entity
> > and prevent any usage of ethdev ports which are not owned by Testpmd.
> >
> > Signed-off-by: Matan Azrad <matan@mellanox.com>
> > ---
> >  app/test-pmd/cmdline.c      | 89 +++++++++++++++++++---------------------
> -----
> >  app/test-pmd/cmdline_flow.c |  2 +-
> >  app/test-pmd/config.c       | 37 ++++++++++---------
> >  app/test-pmd/parameters.c   |  4 +-
> >  app/test-pmd/testpmd.c      | 63 ++++++++++++++++++++------------
> >  app/test-pmd/testpmd.h      |  3 ++
> >  6 files changed, 103 insertions(+), 95 deletions(-)
> >
> > diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c index
> > 31919ba..6199c64 100644
> > --- a/app/test-pmd/cmdline.c
> > +++ b/app/test-pmd/cmdline.c
> > @@ -1394,7 +1394,7 @@ struct cmd_config_speed_all {
> >  			&link_speed) < 0)
> >  		return;
> >
> > -	RTE_ETH_FOREACH_DEV(pid) {
> > +	RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id) {
> 
> Why do we need all these changes?
> As I understand you changed definition of RTE_ETH_FOREACH_DEV(), so no
> testpmd should work ok default (no_owner case).
> Am I missing something here?

Now, After Gaetan suggestion RTE_ETH_FOREACH_DEV(pid) will iterate over all valid and ownerless ports.
Here Testpmd wants to iterate over its owned ports.

I added to Testpmd ability to take an ownership of ports as the new ownership and synchronization rules suggested,
Since Tespmd is a DPDK entity which wants that no one will touch its owned ports,
It must allocate an unique ID, set owner for its ports (see in main function) and recognizes them by its owner ID.    

> Konstantin
> 
> >  		ports[pid].dev_conf.link_speeds = link_speed;
> >  	}
> >
> > @@ -1902,7 +1902,7 @@ struct cmd_config_rss {
> >  	struct cmd_config_rss *res = parsed_result;
> >  	struct rte_eth_rss_conf rss_conf = { .rss_key_len = 0, };
> >  	int diag;
> > -	uint8_t i;
> > +	uint16_t pid;
> >
> >  	if (!strcmp(res->value, "all"))
> >  		rss_conf.rss_hf = ETH_RSS_IP | ETH_RSS_TCP | @@ -1936,12
> +1936,12
> > @@ struct cmd_config_rss {
> >  		return;
> >  	}
> >  	rss_conf.rss_key = NULL;
> > -	for (i = 0; i < rte_eth_dev_count(); i++) {
> > -		diag = rte_eth_dev_rss_hash_update(i, &rss_conf);
> > +	RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id) {
> > +		diag = rte_eth_dev_rss_hash_update(pid, &rss_conf);
> >  		if (diag < 0)
> >  			printf("Configuration of RSS hash at ethernet port %d
> "
> >  				"failed with error (%d): %s.\n",
> > -				i, -diag, strerror(-diag));
> > +				pid, -diag, strerror(-diag));
> >  	}
> >  }
> >
> > @@ -3686,10 +3686,9 @@ struct cmd_csum_result {
> >  	uint64_t csum_offloads = 0;
> >  	struct rte_eth_dev_info dev_info;
> >
> > -	if (port_id_is_invalid(res->port_id, ENABLED_WARN)) {
> > -		printf("invalid port %d\n", res->port_id);
> > +	if (port_id_is_invalid(res->port_id, ENABLED_WARN))
> >  		return;
> > -	}
> > +
> >  	if (!port_is_stopped(res->port_id)) {
> >  		printf("Please stop port %d first\n", res->port_id);
> >  		return;
> > @@ -4364,8 +4363,8 @@ struct cmd_gso_show_result {  {
> >  	struct cmd_gso_show_result *res = parsed_result;
> >
> > -	if (!rte_eth_dev_is_valid_port(res->cmd_pid)) {
> > -		printf("invalid port id %u\n", res->cmd_pid);
> > +	if (port_id_is_invalid(res->cmd_pid, ENABLED_WARN)) {
> > +		printf("invalid/not owned port id %u\n", res->cmd_pid);
> >  		return;
> >  	}
> >  	if (!strcmp(res->cmd_keyword, "gso")) { @@ -5375,7 +5374,12 @@
> > static void cmd_create_bonded_device_parsed(void *parsed_result,
> >  				port_id);
> >
> >  		/* Update number of ports */
> > -		nb_ports = rte_eth_dev_count();
> > +		if (rte_eth_dev_owner_set(port_id, &my_owner) != 0) {
> > +			printf("Error: cannot own new attached port %d\n",
> > +			       port_id);
> > +			return;
> > +		}
> > +		nb_ports++;
> >  		reconfig(port_id, res->socket);
> >  		rte_eth_promiscuous_enable(port_id);
> >  	}
> > @@ -5484,10 +5488,8 @@ static void
> cmd_set_bond_mon_period_parsed(void *parsed_result,
> >  	struct cmd_set_bond_mon_period_result *res = parsed_result;
> >  	int ret;
> >
> > -	if (res->port_num >= nb_ports) {
> > -		printf("Port id %d must be less than %d\n", res->port_num,
> nb_ports);
> > +	if (port_id_is_invalid(res->port_num, ENABLED_WARN))
> >  		return;
> > -	}
> >
> >  	ret = rte_eth_bond_link_monitoring_set(res->port_num,
> > res->period_ms);
> >
> > @@ -5545,11 +5547,8 @@ struct
> cmd_set_bonding_agg_mode_policy_result {
> >  	struct cmd_set_bonding_agg_mode_policy_result *res =
> parsed_result;
> >  	uint8_t policy = AGG_BANDWIDTH;
> >
> > -	if (res->port_num >= nb_ports) {
> > -		printf("Port id %d must be less than %d\n",
> > -				res->port_num, nb_ports);
> > +	if (port_id_is_invalid(res->port_num, ENABLED_WARN))
> >  		return;
> > -	}
> >
> >  	if (!strcmp(res->policy, "bandwidth"))
> >  		policy = AGG_BANDWIDTH;
> > @@ -5808,7 +5807,7 @@ static void cmd_set_promisc_mode_parsed(void
> > *parsed_result,
> >
> >  	/* all ports */
> >  	if (allports) {
> > -		RTE_ETH_FOREACH_DEV(i) {
> > +		RTE_ETH_FOREACH_DEV_OWNED_BY(i, my_owner.id) {
> >  			if (enable)
> >  				rte_eth_promiscuous_enable(i);
> >  			else
> > @@ -5888,7 +5887,7 @@ static void cmd_set_allmulti_mode_parsed(void
> > *parsed_result,
> >
> >  	/* all ports */
> >  	if (allports) {
> > -		RTE_ETH_FOREACH_DEV(i) {
> > +		RTE_ETH_FOREACH_DEV_OWNED_BY(i, my_owner.id) {
> >  			if (enable)
> >  				rte_eth_allmulticast_enable(i);
> >  			else
> > @@ -6622,31 +6621,31 @@ static void cmd_showportall_parsed(void
> *parsed_result,
> >  	struct cmd_showportall_result *res = parsed_result;
> >  	if (!strcmp(res->show, "clear")) {
> >  		if (!strcmp(res->what, "stats"))
> > -			RTE_ETH_FOREACH_DEV(i)
> > +			RTE_ETH_FOREACH_DEV_OWNED_BY(i,
> my_owner.id)
> >  				nic_stats_clear(i);
> >  		else if (!strcmp(res->what, "xstats"))
> > -			RTE_ETH_FOREACH_DEV(i)
> > +			RTE_ETH_FOREACH_DEV_OWNED_BY(i,
> my_owner.id)
> >  				nic_xstats_clear(i);
> >  	} else if (!strcmp(res->what, "info"))
> > -		RTE_ETH_FOREACH_DEV(i)
> > +		RTE_ETH_FOREACH_DEV_OWNED_BY(i, my_owner.id)
> >  			port_infos_display(i);
> >  	else if (!strcmp(res->what, "stats"))
> > -		RTE_ETH_FOREACH_DEV(i)
> > +		RTE_ETH_FOREACH_DEV_OWNED_BY(i, my_owner.id)
> >  			nic_stats_display(i);
> >  	else if (!strcmp(res->what, "xstats"))
> > -		RTE_ETH_FOREACH_DEV(i)
> > +		RTE_ETH_FOREACH_DEV_OWNED_BY(i, my_owner.id)
> >  			nic_xstats_display(i);
> >  	else if (!strcmp(res->what, "fdir"))
> > -		RTE_ETH_FOREACH_DEV(i)
> > +		RTE_ETH_FOREACH_DEV_OWNED_BY(i, my_owner.id)
> >  			fdir_get_infos(i);
> >  	else if (!strcmp(res->what, "stat_qmap"))
> > -		RTE_ETH_FOREACH_DEV(i)
> > +		RTE_ETH_FOREACH_DEV_OWNED_BY(i, my_owner.id)
> >  			nic_stats_mapping_display(i);
> >  	else if (!strcmp(res->what, "dcb_tc"))
> > -		RTE_ETH_FOREACH_DEV(i)
> > +		RTE_ETH_FOREACH_DEV_OWNED_BY(i, my_owner.id)
> >  			port_dcb_info_display(i);
> >  	else if (!strcmp(res->what, "cap"))
> > -		RTE_ETH_FOREACH_DEV(i)
> > +		RTE_ETH_FOREACH_DEV_OWNED_BY(i, my_owner.id)
> >  			port_offload_cap_display(i);
> >  }
> >
> > @@ -10698,10 +10697,8 @@ struct cmd_flow_director_mask_result {
> >  	struct rte_eth_fdir_masks *mask;
> >  	struct rte_port *port;
> >
> > -	if (res->port_id > nb_ports) {
> > -		printf("Invalid port, range is [0, %d]\n", nb_ports - 1);
> > +	if (port_id_is_invalid(res->port_id, ENABLED_WARN))
> >  		return;
> > -	}
> >
> >  	port = &ports[res->port_id];
> >  	/** Check if the port is not started **/ @@ -10899,10 +10896,8 @@
> > struct cmd_flow_director_flex_mask_result {
> >  	uint16_t i;
> >  	int ret;
> >
> > -	if (res->port_id > nb_ports) {
> > -		printf("Invalid port, range is [0, %d]\n", nb_ports - 1);
> > +	if (port_id_is_invalid(res->port_id, ENABLED_WARN))
> >  		return;
> > -	}
> >
> >  	port = &ports[res->port_id];
> >  	/** Check if the port is not started **/ @@ -11053,12 +11048,10 @@
> > struct cmd_flow_director_flexpayload_result {
> >  	struct cmd_flow_director_flexpayload_result *res = parsed_result;
> >  	struct rte_eth_flex_payload_cfg flex_cfg;
> >  	struct rte_port *port;
> > -	int ret = 0;
> > +	int ret;
> >
> > -	if (res->port_id > nb_ports) {
> > -		printf("Invalid port, range is [0, %d]\n", nb_ports - 1);
> > +	if (port_id_is_invalid(res->port_id, ENABLED_WARN))
> >  		return;
> > -	}
> >
> >  	port = &ports[res->port_id];
> >  	/** Check if the port is not started **/ @@ -11774,7 +11767,7 @@
> > struct cmd_config_l2_tunnel_eth_type_result {
> >  	entry.l2_tunnel_type = str2fdir_l2_tunnel_type(res-
> >l2_tunnel_type);
> >  	entry.ether_type = res->eth_type_val;
> >
> > -	RTE_ETH_FOREACH_DEV(pid) {
> > +	RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id) {
> >  		rte_eth_dev_l2_tunnel_eth_type_conf(pid, &entry);
> >  	}
> >  }
> > @@ -11890,7 +11883,7 @@ struct cmd_config_l2_tunnel_en_dis_result {
> >  	else
> >  		en = 0;
> >
> > -	RTE_ETH_FOREACH_DEV(pid) {
> > +	RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id) {
> >  		rte_eth_dev_l2_tunnel_offload_set(pid,
> >  						  &entry,
> >
> ETH_L2_TUNNEL_ENABLE_MASK,
> > @@ -14440,10 +14433,8 @@ struct cmd_ddp_add_result {
> >  	int file_num;
> >  	int ret = -ENOTSUP;
> >
> > -	if (res->port_id > nb_ports) {
> > -		printf("Invalid port, range is [0, %d]\n", nb_ports - 1);
> > +	if (port_id_is_invalid(res->port_id, ENABLED_WARN))
> >  		return;
> > -	}
> >
> >  	if (!all_ports_stopped()) {
> >  		printf("Please stop all ports first\n"); @@ -14522,10 +14513,8
> @@
> > struct cmd_ddp_del_result {
> >  	uint32_t size;
> >  	int ret = -ENOTSUP;
> >
> > -	if (res->port_id > nb_ports) {
> > -		printf("Invalid port, range is [0, %d]\n", nb_ports - 1);
> > +	if (port_id_is_invalid(res->port_id, ENABLED_WARN))
> >  		return;
> > -	}
> >
> >  	if (!all_ports_stopped()) {
> >  		printf("Please stop all ports first\n"); @@ -14837,10 +14826,8
> @@
> > struct cmd_ddp_get_list_result {  #endif
> >  	int ret = -ENOTSUP;
> >
> > -	if (res->port_id > nb_ports) {
> > -		printf("Invalid port, range is [0, %d]\n", nb_ports - 1);
> > +	if (port_id_is_invalid(res->port_id, ENABLED_WARN))
> >  		return;
> > -	}
> >
> >  #ifdef RTE_LIBRTE_I40E_PMD
> >  	size = PROFILE_INFO_SIZE * MAX_PROFILE_NUM + 4; @@ -16296,7
> +16283,7
> > @@ struct cmd_cmdfile_result {
> >  	if (id == (portid_t)RTE_PORT_ALL) {
> >  		portid_t pid;
> >
> > -		RTE_ETH_FOREACH_DEV(pid) {
> > +		RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id) {
> >  			/* check if need_reconfig has been set to 1 */
> >  			if (ports[pid].need_reconfig == 0)
> >  				ports[pid].need_reconfig = dev;
> > diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
> > index 561e057..e55490f 100644
> > --- a/app/test-pmd/cmdline_flow.c
> > +++ b/app/test-pmd/cmdline_flow.c
> > @@ -2652,7 +2652,7 @@ static int comp_vc_action_rss_queue(struct
> > context *, const struct token *,
> >
> >  	(void)ctx;
> >  	(void)token;
> > -	RTE_ETH_FOREACH_DEV(p) {
> > +	RTE_ETH_FOREACH_DEV_OWNED_BY(p, my_owner.id) {
> >  		if (buf && i == ent)
> >  			return snprintf(buf, size, "%u", p);
> >  		++i;
> > diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c index
> > 957b820..43b9a7d 100644
> > --- a/app/test-pmd/config.c
> > +++ b/app/test-pmd/config.c
> > @@ -156,7 +156,7 @@ struct rss_type_info {
> >
> >  	if (port_id_is_invalid(port_id, ENABLED_WARN)) {
> >  		printf("Valid port range is [0");
> > -		RTE_ETH_FOREACH_DEV(pid)
> > +		RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id)
> >  			printf(", %d", pid);
> >  		printf("]\n");
> >  		return;
> > @@ -236,7 +236,7 @@ struct rss_type_info {
> >
> >  	if (port_id_is_invalid(port_id, ENABLED_WARN)) {
> >  		printf("Valid port range is [0");
> > -		RTE_ETH_FOREACH_DEV(pid)
> > +		RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id)
> >  			printf(", %d", pid);
> >  		printf("]\n");
> >  		return;
> > @@ -253,10 +253,9 @@ struct rss_type_info {
> >  	struct rte_eth_xstat_name *xstats_names;
> >
> >  	printf("###### NIC extended statistics for port %-2d\n", port_id);
> > -	if (!rte_eth_dev_is_valid_port(port_id)) {
> > -		printf("Error: Invalid port number %i\n", port_id);
> > +
> > +	if (port_id_is_invalid(port_id, ENABLED_WARN))
> >  		return;
> > -	}
> >
> >  	/* Get count */
> >  	cnt_xstats = rte_eth_xstats_get_names(port_id, NULL, 0); @@ -
> 321,7
> > +320,7 @@ struct rss_type_info {
> >
> >  	if (port_id_is_invalid(port_id, ENABLED_WARN)) {
> >  		printf("Valid port range is [0");
> > -		RTE_ETH_FOREACH_DEV(pid)
> > +		RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id)
> >  			printf(", %d", pid);
> >  		printf("]\n");
> >  		return;
> > @@ -439,7 +438,7 @@ struct rss_type_info {
> >
> >  	if (port_id_is_invalid(port_id, ENABLED_WARN)) {
> >  		printf("Valid port range is [0");
> > -		RTE_ETH_FOREACH_DEV(pid)
> > +		RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id)
> >  			printf(", %d", pid);
> >  		printf("]\n");
> >  		return;
> > @@ -756,10 +755,15 @@ struct rss_type_info {  int
> > port_id_is_invalid(portid_t port_id, enum print_warning warning)  {
> > +	struct rte_eth_dev_owner owner;
> > +	int ret;
> > +
> >  	if (port_id == (portid_t)RTE_PORT_ALL)
> >  		return 0;
> >
> > -	if (rte_eth_dev_is_valid_port(port_id))
> > +	ret = rte_eth_dev_owner_get(port_id, &owner);
> > +
> > +	if (ret == 0 && owner.id == my_owner.id)
> >  		return 0;
> >
> >  	if (warning == ENABLED_WARN)
> > @@ -2373,7 +2377,7 @@ struct igb_ring_desc_16_bytes {
> >  		return;
> >  	}
> >  	nb_pt = 0;
> > -	RTE_ETH_FOREACH_DEV(i) {
> > +	RTE_ETH_FOREACH_DEV_OWNED_BY(i, my_owner.id) {
> >  		if (! ((uint64_t)(1ULL << i) & portmask))
> >  			continue;
> >  		portlist[nb_pt++] = i;
> > @@ -2512,10 +2516,9 @@ struct igb_ring_desc_16_bytes {  void
> > setup_gro(const char *onoff, portid_t port_id)  {
> > -	if (!rte_eth_dev_is_valid_port(port_id)) {
> > -		printf("invalid port id %u\n", port_id);
> > +	if (port_id_is_invalid(port_id, ENABLED_WARN))
> >  		return;
> > -	}
> > +
> >  	if (test_done == 0) {
> >  		printf("Before enable/disable GRO,"
> >  				" please stop forwarding first\n"); @@ -
> 2574,10 +2577,9 @@ struct
> > igb_ring_desc_16_bytes {
> >
> >  	param = &gro_ports[port_id].param;
> >
> > -	if (!rte_eth_dev_is_valid_port(port_id)) {
> > -		printf("Invalid port id %u.\n", port_id);
> > +	if (port_id_is_invalid(port_id, ENABLED_WARN))
> >  		return;
> > -	}
> > +
> >  	if (gro_ports[port_id].enable) {
> >  		printf("GRO type: TCP/IPv4\n");
> >  		if (gro_flush_cycles == GRO_DEFAULT_FLUSH_CYCLES) { @@
> -2595,10
> > +2597,9 @@ struct igb_ring_desc_16_bytes {  void  setup_gso(const char
> > *mode, portid_t port_id)  {
> > -	if (!rte_eth_dev_is_valid_port(port_id)) {
> > -		printf("invalid port id %u\n", port_id);
> > +	if (port_id_is_invalid(port_id, ENABLED_WARN))
> >  		return;
> > -	}
> > +
> >  	if (strcmp(mode, "on") == 0) {
> >  		if (test_done == 0) {
> >  			printf("before enabling GSO,"
> > diff --git a/app/test-pmd/parameters.c b/app/test-pmd/parameters.c
> > index 878c112..0e57b46 100644
> > --- a/app/test-pmd/parameters.c
> > +++ b/app/test-pmd/parameters.c
> > @@ -398,7 +398,7 @@
> >  		if (port_id_is_invalid(port_id, ENABLED_WARN) ||
> >  			port_id == (portid_t)RTE_PORT_ALL) {
> >  			printf("Valid port range is [0");
> > -			RTE_ETH_FOREACH_DEV(pid)
> > +			RTE_ETH_FOREACH_DEV_OWNED_BY(pid,
> my_owner.id)
> >  				printf(", %d", pid);
> >  			printf("]\n");
> >  			return -1;
> > @@ -459,7 +459,7 @@
> >  		if (port_id_is_invalid(port_id, ENABLED_WARN) ||
> >  			port_id == (portid_t)RTE_PORT_ALL) {
> >  			printf("Valid port range is [0");
> > -			RTE_ETH_FOREACH_DEV(pid)
> > +			RTE_ETH_FOREACH_DEV_OWNED_BY(pid,
> my_owner.id)
> >  				printf(", %d", pid);
> >  			printf("]\n");
> >  			return -1;
> > diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index
> > c066cf9..83f5e84 100644
> > --- a/app/test-pmd/testpmd.c
> > +++ b/app/test-pmd/testpmd.c
> > @@ -108,6 +108,11 @@
> >  lcoreid_t nb_lcores;           /**< Number of probed logical cores. */
> >
> >  /*
> > + * My port owner structure used to own Ethernet ports.
> > + */
> > +struct rte_eth_dev_owner my_owner; /**< Unique owner. */
> > +
> > +/*
> >   * Test Forwarding Configuration.
> >   *    nb_fwd_lcores <= nb_cfg_lcores <= nb_lcores
> >   *    nb_fwd_ports  <= nb_cfg_ports  <= nb_ports
> > @@ -449,7 +454,7 @@ static int eth_event_callback(portid_t port_id,
> >  	portid_t pt_id;
> >  	int i = 0;
> >
> > -	RTE_ETH_FOREACH_DEV(pt_id)
> > +	RTE_ETH_FOREACH_DEV_OWNED_BY(pt_id, my_owner.id)
> >  		fwd_ports_ids[i++] = pt_id;
> >
> >  	nb_cfg_ports = nb_ports;
> > @@ -573,7 +578,7 @@ static int eth_event_callback(portid_t port_id,
> >  		fwd_lcores[lc_id]->cpuid_idx = lc_id;
> >  	}
> >
> > -	RTE_ETH_FOREACH_DEV(pid) {
> > +	RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id) {
> >  		port = &ports[pid];
> >  		/* Apply default Tx configuration for all ports */
> >  		port->dev_conf.txmode = tx_mode;
> > @@ -706,7 +711,7 @@ static int eth_event_callback(portid_t port_id,
> >  	queueid_t q;
> >
> >  	/* set socket id according to numa or not */
> > -	RTE_ETH_FOREACH_DEV(pid) {
> > +	RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id) {
> >  		port = &ports[pid];
> >  		if (nb_rxq > port->dev_info.max_rx_queues) {
> >  			printf("Fail: nb_rxq(%d) is greater than "
> > @@ -1000,9 +1005,8 @@ static int eth_event_callback(portid_t port_id,
> >  	uint64_t tics_per_1sec;
> >  	uint64_t tics_datum;
> >  	uint64_t tics_current;
> > -	uint8_t idx_port, cnt_ports;
> > +	uint16_t idx_port;
> >
> > -	cnt_ports = rte_eth_dev_count();
> >  	tics_datum = rte_rdtsc();
> >  	tics_per_1sec = rte_get_timer_hz();
> >  #endif
> > @@ -1017,11 +1021,10 @@ static int eth_event_callback(portid_t port_id,
> >  			tics_current = rte_rdtsc();
> >  			if (tics_current - tics_datum >= tics_per_1sec) {
> >  				/* Periodic bitrate calculation */
> > -				for (idx_port = 0;
> > -						idx_port < cnt_ports;
> > -						idx_port++)
> > +
> 	RTE_ETH_FOREACH_DEV_OWNED_BY(idx_port,
> > +							     my_owner.id)
> >  					rte_stats_bitrate_calc(bitrate_data,
> > -						idx_port);
> > +							       idx_port);
> >  				tics_datum = tics_current;
> >  			}
> >  		}
> > @@ -1359,7 +1362,7 @@ static int eth_event_callback(portid_t port_id,
> >  	portid_t pi;
> >  	struct rte_port *port;
> >
> > -	RTE_ETH_FOREACH_DEV(pi) {
> > +	RTE_ETH_FOREACH_DEV_OWNED_BY(pi, my_owner.id) {
> >  		port = &ports[pi];
> >  		/* Check if there is a port which is not started */
> >  		if ((port->port_status != RTE_PORT_STARTED) && @@ -
> 1387,7 +1390,7
> > @@ static int eth_event_callback(portid_t port_id,  {
> >  	portid_t pi;
> >
> > -	RTE_ETH_FOREACH_DEV(pi) {
> > +	RTE_ETH_FOREACH_DEV_OWNED_BY(pi, my_owner.id) {
> >  		if (!port_is_stopped(pi))
> >  			return 0;
> >  	}
> > @@ -1434,7 +1437,7 @@ static int eth_event_callback(portid_t port_id,
> >
> >  	if(dcb_config)
> >  		dcb_test = 1;
> > -	RTE_ETH_FOREACH_DEV(pi) {
> > +	RTE_ETH_FOREACH_DEV_OWNED_BY(pi, my_owner.id) {
> >  		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
> >  			continue;
> >
> > @@ -1620,7 +1623,7 @@ static int eth_event_callback(portid_t port_id,
> >
> >  	printf("Stopping ports...\n");
> >
> > -	RTE_ETH_FOREACH_DEV(pi) {
> > +	RTE_ETH_FOREACH_DEV_OWNED_BY(pi, my_owner.id) {
> >  		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
> >  			continue;
> >
> > @@ -1663,7 +1666,7 @@ static int eth_event_callback(portid_t port_id,
> >
> >  	printf("Closing ports...\n");
> >
> > -	RTE_ETH_FOREACH_DEV(pi) {
> > +	RTE_ETH_FOREACH_DEV_OWNED_BY(pi, my_owner.id) {
> >  		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
> >  			continue;
> >
> > @@ -1714,7 +1717,7 @@ static int eth_event_callback(portid_t port_id,
> >
> >  	printf("Resetting ports...\n");
> >
> > -	RTE_ETH_FOREACH_DEV(pi) {
> > +	RTE_ETH_FOREACH_DEV_OWNED_BY(pi, my_owner.id) {
> >  		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
> >  			continue;
> >
> > @@ -1759,6 +1762,12 @@ static int eth_event_callback(portid_t port_id,
> >  	if (rte_eth_dev_attach(identifier, &pi))
> >  		return;
> >
> > +	if (rte_eth_dev_owner_set(pi, &my_owner) != 0) {
> > +		printf("Error: cannot own new attached port %d\n", pi);
> > +		return;
> > +	}
> > +	nb_ports++;
> > +
> >  	socket_id = (unsigned)rte_eth_dev_socket_id(pi);
> >  	/* if socket_id is invalid, set to 0 */
> >  	if (check_socket_id(socket_id) < 0)
> > @@ -1766,8 +1775,6 @@ static int eth_event_callback(portid_t port_id,
> >  	reconfig(pi, socket_id);
> >  	rte_eth_promiscuous_enable(pi);
> >
> > -	nb_ports = rte_eth_dev_count();
> > -
> >  	ports[pi].port_status = RTE_PORT_STOPPED;
> >
> >  	printf("Port %d is attached. Now total ports is %d\n", pi,
> > nb_ports); @@ -1781,6 +1788,9 @@ static int
> > eth_event_callback(portid_t port_id,
> >
> >  	printf("Detaching a port...\n");
> >
> > +	if (port_id_is_invalid(port_id, ENABLED_WARN))
> > +		return;
> > +
> >  	if (!port_is_closed(port_id)) {
> >  		printf("Please close port first\n");
> >  		return;
> > @@ -1794,7 +1804,7 @@ static int eth_event_callback(portid_t port_id,
> >  		return;
> >  	}
> >
> > -	nb_ports = rte_eth_dev_count();
> > +	nb_ports--;
> >
> >  	printf("Port '%s' is detached. Now total ports is %d\n",
> >  			name, nb_ports);
> > @@ -1812,7 +1822,7 @@ static int eth_event_callback(portid_t port_id,
> >
> >  	if (ports != NULL) {
> >  		no_link_check = 1;
> > -		RTE_ETH_FOREACH_DEV(pt_id) {
> > +		RTE_ETH_FOREACH_DEV_OWNED_BY(pt_id, my_owner.id) {
> >  			printf("\nShutting down port %d...\n", pt_id);
> >  			fflush(stdout);
> >  			stop_port(pt_id);
> > @@ -1844,7 +1854,7 @@ struct pmd_test_command {
> >  	fflush(stdout);
> >  	for (count = 0; count <= MAX_CHECK_TIME; count++) {
> >  		all_ports_up = 1;
> > -		RTE_ETH_FOREACH_DEV(portid) {
> > +		RTE_ETH_FOREACH_DEV_OWNED_BY(portid, my_owner.id)
> {
> >  			if ((port_mask & (1 << portid)) == 0)
> >  				continue;
> >  			memset(&link, 0, sizeof(link));
> > @@ -1936,6 +1946,8 @@ struct pmd_test_command {
> >
> >  	switch (type) {
> >  	case RTE_ETH_EVENT_INTR_RMV:
> > +		if (port_id_is_invalid(port_id, ENABLED_WARN))
> > +			break;
> >  		if (rte_eal_alarm_set(100000,
> >  				rmv_event_callback, (void
> *)(intptr_t)port_id))
> >  			fprintf(stderr, "Could not set up deferred device
> removal\n"); @@
> > -2068,7 +2080,7 @@ struct pmd_test_command {
> >  	portid_t pid;
> >  	struct rte_port *port;
> >
> > -	RTE_ETH_FOREACH_DEV(pid) {
> > +	RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id) {
> >  		port = &ports[pid];
> >  		port->dev_conf.fdir_conf = fdir_conf;
> >  		if (nb_rxq > 1) {
> > @@ -2383,7 +2395,12 @@ uint8_t port_is_bonding_slave(portid_t
> slave_pid)
> >  	rte_pdump_init(NULL);
> >  #endif
> >
> > -	nb_ports = (portid_t) rte_eth_dev_count();
> > +	if (rte_eth_dev_owner_new(&my_owner.id))
> > +		rte_panic("Failed to get unique owner identifier\n");
> > +	snprintf(my_owner.name, sizeof(my_owner.name),
> TESTPMD_OWNER_NAME);
> > +	RTE_ETH_FOREACH_DEV(port_id)
> > +		if (rte_eth_dev_owner_set(port_id, &my_owner) == 0)
> > +			nb_ports++;
> >  	if (nb_ports == 0)
> >  		TESTPMD_LOG(WARNING, "No probed ethernet
> devices\n");
> >
> > @@ -2431,7 +2448,7 @@ uint8_t port_is_bonding_slave(portid_t
> slave_pid)
> >  		rte_exit(EXIT_FAILURE, "Start ports failed\n");
> >
> >  	/* set all ports to promiscuous mode by default */
> > -	RTE_ETH_FOREACH_DEV(port_id)
> > +	RTE_ETH_FOREACH_DEV_OWNED_BY(port_id, my_owner.id)
> >  		rte_eth_promiscuous_enable(port_id);
> >
> >  	/* Init metrics library */
> > diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h index
> > 9c739e5..2d253b9 100644
> > --- a/app/test-pmd/testpmd.h
> > +++ b/app/test-pmd/testpmd.h
> > @@ -50,6 +50,8 @@
> >  #define NUMA_NO_CONFIG 0xFF
> >  #define UMA_NO_CONFIG  0xFF
> >
> > +#define TESTPMD_OWNER_NAME "TestPMD"
> > +
> >  typedef uint8_t  lcoreid_t;
> >  typedef uint16_t portid_t;
> >  typedef uint16_t queueid_t;
> > @@ -361,6 +363,7 @@ struct queue_stats_mappings {
> >   * nb_fwd_ports <= nb_cfg_ports <= nb_ports
> >   */
> >  extern portid_t nb_ports; /**< Number of ethernet ports probed at
> > init time. */
> > +extern struct rte_eth_dev_owner my_owner; /**< Unique owner. */
> >  extern portid_t nb_cfg_ports; /**< Number of configured ports. */
> > extern portid_t nb_fwd_ports; /**< Number of forwarding ports. */
> > extern portid_t fwd_ports_ids[RTE_MAX_ETHPORTS];
> > --
> > 1.8.3.1
  
Ananyev, Konstantin Jan. 19, 2018, 1:08 p.m. UTC | #3
> -----Original Message-----
> From: Matan Azrad [mailto:matan@mellanox.com]
> Sent: Friday, January 19, 2018 12:52 PM
> To: Ananyev, Konstantin <konstantin.ananyev@intel.com>; Thomas Monjalon <thomas@monjalon.net>; Gaetan Rivet
> <gaetan.rivet@6wind.com>; Wu, Jingjing <jingjing.wu@intel.com>
> Cc: dev@dpdk.org; Neil Horman <nhorman@tuxdriver.com>; Richardson, Bruce <bruce.richardson@intel.com>
> Subject: RE: [PATCH v3 7/7] app/testpmd: adjust ethdev port ownership
> 
> Hi Konstantin
> 
> From: Ananyev, Konstantin, Friday, January 19, 2018 2:38 PM
> > To: Matan Azrad <matan@mellanox.com>; Thomas Monjalon
> > <thomas@monjalon.net>; Gaetan Rivet <gaetan.rivet@6wind.com>; Wu,
> > Jingjing <jingjing.wu@intel.com>
> > Cc: dev@dpdk.org; Neil Horman <nhorman@tuxdriver.com>; Richardson,
> > Bruce <bruce.richardson@intel.com>
> > Subject: RE: [PATCH v3 7/7] app/testpmd: adjust ethdev port ownership
> >
> > Hi Matan,
> >
> > > -----Original Message-----
> > > From: Matan Azrad [mailto:matan@mellanox.com]
> > > Sent: Thursday, January 18, 2018 4:35 PM
> > > To: Thomas Monjalon <thomas@monjalon.net>; Gaetan Rivet
> > > <gaetan.rivet@6wind.com>; Wu, Jingjing <jingjing.wu@intel.com>
> > > Cc: dev@dpdk.org; Neil Horman <nhorman@tuxdriver.com>; Richardson,
> > > Bruce <bruce.richardson@intel.com>; Ananyev, Konstantin
> > > <konstantin.ananyev@intel.com>
> > > Subject: [PATCH v3 7/7] app/testpmd: adjust ethdev port ownership
> > >
> > > Testpmd should not use ethdev ports which are managed by other DPDK
> > > entities.
> > >
> > > Set Testpmd ownership to each port which is not used by other entity
> > > and prevent any usage of ethdev ports which are not owned by Testpmd.
> > >
> > > Signed-off-by: Matan Azrad <matan@mellanox.com>
> > > ---
> > >  app/test-pmd/cmdline.c      | 89 +++++++++++++++++++---------------------
> > -----
> > >  app/test-pmd/cmdline_flow.c |  2 +-
> > >  app/test-pmd/config.c       | 37 ++++++++++---------
> > >  app/test-pmd/parameters.c   |  4 +-
> > >  app/test-pmd/testpmd.c      | 63 ++++++++++++++++++++------------
> > >  app/test-pmd/testpmd.h      |  3 ++
> > >  6 files changed, 103 insertions(+), 95 deletions(-)
> > >
> > > diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c index
> > > 31919ba..6199c64 100644
> > > --- a/app/test-pmd/cmdline.c
> > > +++ b/app/test-pmd/cmdline.c
> > > @@ -1394,7 +1394,7 @@ struct cmd_config_speed_all {
> > >  			&link_speed) < 0)
> > >  		return;
> > >
> > > -	RTE_ETH_FOREACH_DEV(pid) {
> > > +	RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id) {
> >
> > Why do we need all these changes?
> > As I understand you changed definition of RTE_ETH_FOREACH_DEV(), so no
> > testpmd should work ok default (no_owner case).
> > Am I missing something here?
> 
> Now, After Gaetan suggestion RTE_ETH_FOREACH_DEV(pid) will iterate over all valid and ownerless ports.

Yes.

> Here Testpmd wants to iterate over its owned ports.

Why? Why it can't just iterate over all valid and ownerless ports?
As I understand it would be enough to fix current problems and would allow
us to avoid any changes in testmpd (which I think is a good thing).
Konstantin

> 
> I added to Testpmd ability to take an ownership of ports as the new ownership and synchronization rules suggested,
> Since Tespmd is a DPDK entity which wants that no one will touch its owned ports,
> It must allocate an unique ID, set owner for its ports (see in main function) and recognizes them by its owner ID.
> 
> > Konstantin
> >
  
Matan Azrad Jan. 19, 2018, 1:35 p.m. UTC | #4
Hi Konstantin

From: Ananyev, Konstantin, Friday, January 19, 2018 3:09 PM
> > -----Original Message-----
> > From: Matan Azrad [mailto:matan@mellanox.com]
> > Sent: Friday, January 19, 2018 12:52 PM
> > To: Ananyev, Konstantin <konstantin.ananyev@intel.com>; Thomas
> > Monjalon <thomas@monjalon.net>; Gaetan Rivet
> <gaetan.rivet@6wind.com>;
> > Wu, Jingjing <jingjing.wu@intel.com>
> > Cc: dev@dpdk.org; Neil Horman <nhorman@tuxdriver.com>; Richardson,
> > Bruce <bruce.richardson@intel.com>
> > Subject: RE: [PATCH v3 7/7] app/testpmd: adjust ethdev port ownership
> >
> > Hi Konstantin
> >
> > From: Ananyev, Konstantin, Friday, January 19, 2018 2:38 PM
> > > To: Matan Azrad <matan@mellanox.com>; Thomas Monjalon
> > > <thomas@monjalon.net>; Gaetan Rivet <gaetan.rivet@6wind.com>;
> Wu,
> > > Jingjing <jingjing.wu@intel.com>
> > > Cc: dev@dpdk.org; Neil Horman <nhorman@tuxdriver.com>; Richardson,
> > > Bruce <bruce.richardson@intel.com>
> > > Subject: RE: [PATCH v3 7/7] app/testpmd: adjust ethdev port
> > > ownership
> > >
> > > Hi Matan,
> > >
> > > > -----Original Message-----
> > > > From: Matan Azrad [mailto:matan@mellanox.com]
> > > > Sent: Thursday, January 18, 2018 4:35 PM
> > > > To: Thomas Monjalon <thomas@monjalon.net>; Gaetan Rivet
> > > > <gaetan.rivet@6wind.com>; Wu, Jingjing <jingjing.wu@intel.com>
> > > > Cc: dev@dpdk.org; Neil Horman <nhorman@tuxdriver.com>;
> Richardson,
> > > > Bruce <bruce.richardson@intel.com>; Ananyev, Konstantin
> > > > <konstantin.ananyev@intel.com>
> > > > Subject: [PATCH v3 7/7] app/testpmd: adjust ethdev port ownership
> > > >
> > > > Testpmd should not use ethdev ports which are managed by other
> > > > DPDK entities.
> > > >
> > > > Set Testpmd ownership to each port which is not used by other
> > > > entity and prevent any usage of ethdev ports which are not owned by
> Testpmd.
> > > >
> > > > Signed-off-by: Matan Azrad <matan@mellanox.com>
> > > > ---
> > > >  app/test-pmd/cmdline.c      | 89 +++++++++++++++++++-----------------
> ----
> > > -----
> > > >  app/test-pmd/cmdline_flow.c |  2 +-
> > > >  app/test-pmd/config.c       | 37 ++++++++++---------
> > > >  app/test-pmd/parameters.c   |  4 +-
> > > >  app/test-pmd/testpmd.c      | 63 ++++++++++++++++++++------------
> > > >  app/test-pmd/testpmd.h      |  3 ++
> > > >  6 files changed, 103 insertions(+), 95 deletions(-)
> > > >
> > > > diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c index
> > > > 31919ba..6199c64 100644
> > > > --- a/app/test-pmd/cmdline.c
> > > > +++ b/app/test-pmd/cmdline.c
> > > > @@ -1394,7 +1394,7 @@ struct cmd_config_speed_all {
> > > >  			&link_speed) < 0)
> > > >  		return;
> > > >
> > > > -	RTE_ETH_FOREACH_DEV(pid) {
> > > > +	RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id) {
> > >
> > > Why do we need all these changes?
> > > As I understand you changed definition of RTE_ETH_FOREACH_DEV(), so
> > > no testpmd should work ok default (no_owner case).
> > > Am I missing something here?
> >
> > Now, After Gaetan suggestion RTE_ETH_FOREACH_DEV(pid) will iterate
> over all valid and ownerless ports.
> 
> Yes.
> 
> > Here Testpmd wants to iterate over its owned ports.
> 
> Why? Why it can't just iterate over all valid and ownerless ports?
> As I understand it would be enough to fix current problems and would allow
> us to avoid any changes in testmpd (which I think is a good thing).

Yes, I understand that this big change is very daunted, But I think the current a lot of bugs in testpmd(regarding port ownership) even more daunted.

Look,
Testpmd initiates some of its internal databases depends on specific port iteration,
In some time someone may take ownership of Testpmd ports and testpmd will continue to touch them.

In addition
Using the old iterator in some places in testpmd will cause a race for run-time new ports(can be created by failsafe or any hotplug code):
- testpmd finds an ownerless port(just now created) by the old iterator and start traffic there,
- failsafe takes ownership of this new port and start traffic there.
Problem!
 
In addition
As a good example for well-done application (free from ownership bugs) I tried here to adjust Tespmd to the new rules and BTW to fix a lot of bugs.    


So actually applications which are not aware to the port ownership still are exposed to races, but if there use the old iterator(with the new change) the amount of races decreases. 

Thanks, Matan.
> Konstantin
> 
> >
> > I added to Testpmd ability to take an ownership of ports as the new
> > ownership and synchronization rules suggested, Since Tespmd is a DPDK
> > entity which wants that no one will touch its owned ports, It must allocate
> an unique ID, set owner for its ports (see in main function) and recognizes
> them by its owner ID.
> >
> > > Konstantin
> > >
  
Gaëtan Rivet Jan. 19, 2018, 3 p.m. UTC | #5
Hi Matan,

On Fri, Jan 19, 2018 at 01:35:10PM +0000, Matan Azrad wrote:
> Hi Konstantin
> 
> From: Ananyev, Konstantin, Friday, January 19, 2018 3:09 PM
> > > -----Original Message-----
> > > From: Matan Azrad [mailto:matan@mellanox.com]
> > > Sent: Friday, January 19, 2018 12:52 PM
> > > To: Ananyev, Konstantin <konstantin.ananyev@intel.com>; Thomas
> > > Monjalon <thomas@monjalon.net>; Gaetan Rivet
> > <gaetan.rivet@6wind.com>;
> > > Wu, Jingjing <jingjing.wu@intel.com>
> > > Cc: dev@dpdk.org; Neil Horman <nhorman@tuxdriver.com>; Richardson,
> > > Bruce <bruce.richardson@intel.com>
> > > Subject: RE: [PATCH v3 7/7] app/testpmd: adjust ethdev port ownership
> > >
> > > Hi Konstantin
> > >
> > > From: Ananyev, Konstantin, Friday, January 19, 2018 2:38 PM
> > > > To: Matan Azrad <matan@mellanox.com>; Thomas Monjalon
> > > > <thomas@monjalon.net>; Gaetan Rivet <gaetan.rivet@6wind.com>;
> > Wu,
> > > > Jingjing <jingjing.wu@intel.com>
> > > > Cc: dev@dpdk.org; Neil Horman <nhorman@tuxdriver.com>; Richardson,
> > > > Bruce <bruce.richardson@intel.com>
> > > > Subject: RE: [PATCH v3 7/7] app/testpmd: adjust ethdev port
> > > > ownership
> > > >
> > > > Hi Matan,
> > > >
> > > > > -----Original Message-----
> > > > > From: Matan Azrad [mailto:matan@mellanox.com]
> > > > > Sent: Thursday, January 18, 2018 4:35 PM
> > > > > To: Thomas Monjalon <thomas@monjalon.net>; Gaetan Rivet
> > > > > <gaetan.rivet@6wind.com>; Wu, Jingjing <jingjing.wu@intel.com>
> > > > > Cc: dev@dpdk.org; Neil Horman <nhorman@tuxdriver.com>;
> > Richardson,
> > > > > Bruce <bruce.richardson@intel.com>; Ananyev, Konstantin
> > > > > <konstantin.ananyev@intel.com>
> > > > > Subject: [PATCH v3 7/7] app/testpmd: adjust ethdev port ownership
> > > > >
> > > > > Testpmd should not use ethdev ports which are managed by other
> > > > > DPDK entities.
> > > > >
> > > > > Set Testpmd ownership to each port which is not used by other
> > > > > entity and prevent any usage of ethdev ports which are not owned by
> > Testpmd.
> > > > >
> > > > > Signed-off-by: Matan Azrad <matan@mellanox.com>
> > > > > ---
> > > > >  app/test-pmd/cmdline.c      | 89 +++++++++++++++++++-----------------
> > ----
> > > > -----
> > > > >  app/test-pmd/cmdline_flow.c |  2 +-
> > > > >  app/test-pmd/config.c       | 37 ++++++++++---------
> > > > >  app/test-pmd/parameters.c   |  4 +-
> > > > >  app/test-pmd/testpmd.c      | 63 ++++++++++++++++++++------------
> > > > >  app/test-pmd/testpmd.h      |  3 ++
> > > > >  6 files changed, 103 insertions(+), 95 deletions(-)
> > > > >
> > > > > diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c index
> > > > > 31919ba..6199c64 100644
> > > > > --- a/app/test-pmd/cmdline.c
> > > > > +++ b/app/test-pmd/cmdline.c
> > > > > @@ -1394,7 +1394,7 @@ struct cmd_config_speed_all {
> > > > >  			&link_speed) < 0)
> > > > >  		return;
> > > > >
> > > > > -	RTE_ETH_FOREACH_DEV(pid) {
> > > > > +	RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id) {
> > > >
> > > > Why do we need all these changes?
> > > > As I understand you changed definition of RTE_ETH_FOREACH_DEV(), so
> > > > no testpmd should work ok default (no_owner case).
> > > > Am I missing something here?
> > >
> > > Now, After Gaetan suggestion RTE_ETH_FOREACH_DEV(pid) will iterate
> > over all valid and ownerless ports.
> > 
> > Yes.
> > 
> > > Here Testpmd wants to iterate over its owned ports.
> > 
> > Why? Why it can't just iterate over all valid and ownerless ports?
> > As I understand it would be enough to fix current problems and would allow
> > us to avoid any changes in testmpd (which I think is a good thing).
> 
> Yes, I understand that this big change is very daunted, But I think the current a lot of bugs in testpmd(regarding port ownership) even more daunted.
> 
> Look,
> Testpmd initiates some of its internal databases depends on specific port iteration,
> In some time someone may take ownership of Testpmd ports and testpmd will continue to touch them.
> 

If I look back on the fail-safe, its sole purpose is to have seamless
hotplug with existing applications.

Port ownership is a genericization of some functions introduced by the
fail-safe, that could structure DPDK further. It should allow
applications to have a seamless integration with subsystems using port
ownership. Without this, port ownership cannot be used.

Testpmd should be fixed, but follow the most common design patterns of
DPDK applications. Going with port ownership seems like a paradigm
shift.

> In addition
> Using the old iterator in some places in testpmd will cause a race for run-time new ports(can be created by failsafe or any hotplug code):
> - testpmd finds an ownerless port(just now created) by the old iterator and start traffic there,
> - failsafe takes ownership of this new port and start traffic there.
> Problem!

Testpmd does not handle detection of new port. If it did, testing
fail-safe with it would be wrong.

At startup, RTE_ETH_FOREACH_DEV already fixed the issue of registering
DEFERRED ports. There are still remaining issues regarding this, but I
think they should be fixed. The architecture does not need to be
completely moved to port ownership.

If anything, this should serve as a test for your API with common
applications. I think you'd prefer to know and debug with testpmd
instead of firing up VPP or something like that to determine what went
wrong with using the fail-safe.

>  
> In addition
> As a good example for well-done application (free from ownership bugs) I tried here to adjust Tespmd to the new rules and BTW to fix a lot of bugs.    

Testpmd has too much cruft, it won't ever be a good example of a
well-done application.

If you want to demonstrate ownership, I think you should start an
example application demonstrating race conditions and their mitigation.

I think that would be interesting for many DPDK users.

> 
> 
> So actually applications which are not aware to the port ownership still are exposed to races, but if there use the old iterator(with the new change) the amount of races decreases. 
> 
> Thanks, Matan.
> > Konstantin
> > 
> > >
> > > I added to Testpmd ability to take an ownership of ports as the new
> > > ownership and synchronization rules suggested, Since Tespmd is a DPDK
> > > entity which wants that no one will touch its owned ports, It must allocate
> > an unique ID, set owner for its ports (see in main function) and recognizes
> > them by its owner ID.
> > >
> > > > Konstantin
> > > >

Regards,
  
Matan Azrad Jan. 20, 2018, 6:14 p.m. UTC | #6
Hi Gaetan

From: Gaëtan Rivet, Friday, January 19, 2018 5:00 PM
> Hi Matan,
> 
> On Fri, Jan 19, 2018 at 01:35:10PM +0000, Matan Azrad wrote:
> > Hi Konstantin
> >
> > From: Ananyev, Konstantin, Friday, January 19, 2018 3:09 PM
> > > > -----Original Message-----
> > > > From: Matan Azrad [mailto:matan@mellanox.com]
> > > > Sent: Friday, January 19, 2018 12:52 PM
> > > > To: Ananyev, Konstantin <konstantin.ananyev@intel.com>; Thomas
> > > > Monjalon <thomas@monjalon.net>; Gaetan Rivet
> > > <gaetan.rivet@6wind.com>;
> > > > Wu, Jingjing <jingjing.wu@intel.com>
> > > > Cc: dev@dpdk.org; Neil Horman <nhorman@tuxdriver.com>;
> Richardson,
> > > > Bruce <bruce.richardson@intel.com>
> > > > Subject: RE: [PATCH v3 7/7] app/testpmd: adjust ethdev port
> > > > ownership
> > > >
> > > > Hi Konstantin
> > > >
> > > > From: Ananyev, Konstantin, Friday, January 19, 2018 2:38 PM
> > > > > To: Matan Azrad <matan@mellanox.com>; Thomas Monjalon
> > > > > <thomas@monjalon.net>; Gaetan Rivet <gaetan.rivet@6wind.com>;
> > > Wu,
> > > > > Jingjing <jingjing.wu@intel.com>
> > > > > Cc: dev@dpdk.org; Neil Horman <nhorman@tuxdriver.com>;
> > > > > Richardson, Bruce <bruce.richardson@intel.com>
> > > > > Subject: RE: [PATCH v3 7/7] app/testpmd: adjust ethdev port
> > > > > ownership
> > > > >
> > > > > Hi Matan,
> > > > >
> > > > > > -----Original Message-----
> > > > > > From: Matan Azrad [mailto:matan@mellanox.com]
> > > > > > Sent: Thursday, January 18, 2018 4:35 PM
> > > > > > To: Thomas Monjalon <thomas@monjalon.net>; Gaetan Rivet
> > > > > > <gaetan.rivet@6wind.com>; Wu, Jingjing <jingjing.wu@intel.com>
> > > > > > Cc: dev@dpdk.org; Neil Horman <nhorman@tuxdriver.com>;
> > > Richardson,
> > > > > > Bruce <bruce.richardson@intel.com>; Ananyev, Konstantin
> > > > > > <konstantin.ananyev@intel.com>
> > > > > > Subject: [PATCH v3 7/7] app/testpmd: adjust ethdev port
> > > > > > ownership
> > > > > >
> > > > > > Testpmd should not use ethdev ports which are managed by other
> > > > > > DPDK entities.
> > > > > >
> > > > > > Set Testpmd ownership to each port which is not used by other
> > > > > > entity and prevent any usage of ethdev ports which are not
> > > > > > owned by
> > > Testpmd.
> > > > > >
> > > > > > Signed-off-by: Matan Azrad <matan@mellanox.com>
> > > > > > ---
> > > > > >  app/test-pmd/cmdline.c      | 89 +++++++++++++++++++-------------
> ----
> > > ----
> > > > > -----
> > > > > >  app/test-pmd/cmdline_flow.c |  2 +-
> > > > > >  app/test-pmd/config.c       | 37 ++++++++++---------
> > > > > >  app/test-pmd/parameters.c   |  4 +-
> > > > > >  app/test-pmd/testpmd.c      | 63 ++++++++++++++++++++----------
> --
> > > > > >  app/test-pmd/testpmd.h      |  3 ++
> > > > > >  6 files changed, 103 insertions(+), 95 deletions(-)
> > > > > >
> > > > > > diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
> > > > > > index
> > > > > > 31919ba..6199c64 100644
> > > > > > --- a/app/test-pmd/cmdline.c
> > > > > > +++ b/app/test-pmd/cmdline.c
> > > > > > @@ -1394,7 +1394,7 @@ struct cmd_config_speed_all {
> > > > > >  			&link_speed) < 0)
> > > > > >  		return;
> > > > > >
> > > > > > -	RTE_ETH_FOREACH_DEV(pid) {
> > > > > > +	RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id) {
> > > > >
> > > > > Why do we need all these changes?
> > > > > As I understand you changed definition of RTE_ETH_FOREACH_DEV(),
> > > > > so no testpmd should work ok default (no_owner case).
> > > > > Am I missing something here?
> > > >
> > > > Now, After Gaetan suggestion RTE_ETH_FOREACH_DEV(pid) will iterate
> > > over all valid and ownerless ports.
> > >
> > > Yes.
> > >
> > > > Here Testpmd wants to iterate over its owned ports.
> > >
> > > Why? Why it can't just iterate over all valid and ownerless ports?
> > > As I understand it would be enough to fix current problems and would
> > > allow us to avoid any changes in testmpd (which I think is a good thing).
> >
> > Yes, I understand that this big change is very daunted, But I think the
> current a lot of bugs in testpmd(regarding port ownership) even more
> daunted.
> >
> > Look,
> > Testpmd initiates some of its internal databases depends on specific
> > port iteration, In some time someone may take ownership of Testpmd
> ports and testpmd will continue to touch them.
> >
> 
> If I look back on the fail-safe, its sole purpose is to have seamless hotplug
> with existing applications.
> 

Yes.

> Port ownership is a genericization of some functions introduced by the fail-
> safe, that could structure DPDK further.

Not only.
Port ownership is a new concept saying that not all the ports are only for the application
and defines well the new port usage synchronization rules.

It can be a solution for failsafe scenario, but it solves a big generic problem regardless fail-safe.  

> It should allow applications to have a seamless integration with subsystems using port ownership. Without this, port ownership cannot be used.

I do not think it is accurate.
We can use different solution to solve the fail-safe case (seamless) by using the DEFFERED state as you did.
Port ownership is not only for failsafe case - it is a generic new concept which BTW can fix the fail-safe case(full fix).
So, application should use port ownership regardless the failsafe using, just to be sure no one touch its ports.

> Testpmd should be fixed, but follow the most common design patterns of
> DPDK applications. Going with port ownership seems like a paradigm shift.

I think this patch is a classic fix for it and for the full generic issue.
Do you have simpler fix to the races?

> > In addition
> > Using the old iterator in some places in testpmd will cause a race for run-
> time new ports(can be created by failsafe or any hotplug code):
> > - testpmd finds an ownerless port(just now created) by the old
> > iterator and start traffic there,
> > - failsafe takes ownership of this new port and start traffic there.
> > Problem!
> 
> Testpmd does not handle detection of new port. If it did, testing fail-safe
> with it would be wrong.

It used the old iterator everywhere.
So it see(and uses) all the valid ports all the time.
As the new concept - it should be changed to use its owned ports,
It is a simple classic solution just to use the new iterator.

> At startup, RTE_ETH_FOREACH_DEV already fixed the issue of registering
> DEFERRED ports. There are still remaining issues regarding this, but I think
> they should be fixed. The architecture does not need to be completely
> moved to port ownership.

I think port ownership fixes the issues nicely, don't you think so?

> If anything, this should serve as a test for your API with common applications.
> I think you'd prefer to know and debug with testpmd instead of firing up VPP
> or something like that to determine what went wrong with using the fail-
> safe.
> >

Yes as your examples in docs.

> > In addition
> > As a good example for well-done application (free from ownership bugs) I
> tried here to adjust Tespmd to the new rules and BTW to fix a lot of bugs.
> 
> Testpmd has too much cruft, it won't ever be a good example of a well-done
> application.
> 
> If you want to demonstrate ownership, I think you should start an example
> application demonstrating race conditions and their mitigation.
> 
> I think that would be interesting for many DPDK users.

I will think about that regardless the testpmd adjustment (need to find time :))

> >
> > So actually applications which are not aware to the port ownership still are
> exposed to races, but if there use the old iterator(with the new change) the
> amount of races decreases.
> >
> > Thanks, Matan.
> > > Konstantin
> > >
> > > >
> > > > I added to Testpmd ability to take an ownership of ports as the
> > > > new ownership and synchronization rules suggested, Since Tespmd is
> > > > a DPDK entity which wants that no one will touch its owned ports,
> > > > It must allocate
> > > an unique ID, set owner for its ports (see in main function) and
> > > recognizes them by its owner ID.
> > > >
> > > > > Konstantin
> > > > >
> 
> Regards,
> --
> Gaëtan Rivet
> 6WIND
  
Gaëtan Rivet Jan. 22, 2018, 10:17 a.m. UTC | #7
Hi Matan,

On Sat, Jan 20, 2018 at 06:14:13PM +0000, Matan Azrad wrote:

<snip>

> > > > > > > @@ -1394,7 +1394,7 @@ struct cmd_config_speed_all {
> > > > > > >  			&link_speed) < 0)
> > > > > > >  		return;
> > > > > > >
> > > > > > > -	RTE_ETH_FOREACH_DEV(pid) {
> > > > > > > +	RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id) {
> > > > > >
> > > > > > Why do we need all these changes?
> > > > > > As I understand you changed definition of RTE_ETH_FOREACH_DEV(),
> > > > > > so no testpmd should work ok default (no_owner case).
> > > > > > Am I missing something here?
> > > > >
> > > > > Now, After Gaetan suggestion RTE_ETH_FOREACH_DEV(pid) will iterate
> > > > over all valid and ownerless ports.

To be clear: you did not implement what I suggested, but your own
interpretation of it. Please do not write as if I validated this
interpretation.

Essentially, the NO_OWNER semantic is completely different from a
default owner. A default owner would protect ports from race conditions
and force port ownership requests to go through proper channels
protected by critical sections.

NO_OWNER means that anyone is free to take any ownerless port at any
time. And as a result, your are thus forced here to fix this by
modifying an existing application for any entity using your ownership
API to function with it.

This is very different from what I suggested. What I said was that I
wanted the most common case to be taken care of, and for existing
applications to continue working. It entails having a more complicated
API, but I think this is a price we should pay.

You are implementing the most common case in testpmd (the app entity
creating an owner and putting its valid ports within). Your API should
ease that up as much as possible before considering forcing everyone to
work with it.

                           ~*~

You implemented a way for the failsafe to capture existing ports.
How does it work without the channels for requesting ports suggested above?

Regards,
  
Matan Azrad Jan. 22, 2018, 11:22 a.m. UTC | #8
Hi Gaetan

From: Gaëtan Rivet, Monday, January 22, 2018 12:17 PM
> Hi Matan,
> 
> On Sat, Jan 20, 2018 at 06:14:13PM +0000, Matan Azrad wrote:
> 
> <snip>
> 
> > > > > > > > @@ -1394,7 +1394,7 @@ struct cmd_config_speed_all {
> > > > > > > >  			&link_speed) < 0)
> > > > > > > >  		return;
> > > > > > > >
> > > > > > > > -	RTE_ETH_FOREACH_DEV(pid) {
> > > > > > > > +	RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id) {
> > > > > > >
> > > > > > > Why do we need all these changes?
> > > > > > > As I understand you changed definition of
> > > > > > > RTE_ETH_FOREACH_DEV(), so no testpmd should work ok default
> (no_owner case).
> > > > > > > Am I missing something here?
> > > > > >
> > > > > > Now, After Gaetan suggestion RTE_ETH_FOREACH_DEV(pid) will
> > > > > > iterate
> > > > > over all valid and ownerless ports.
> 
> To be clear: you did not implement what I suggested, but your own
> interpretation of it. Please do not write as if I validated this interpretation.
> 
> Essentially, the NO_OWNER semantic is completely different from a default
> owner. A default owner would protect ports from race conditions and force
> port ownership requests to go through proper channels protected by critical
> sections.
> 

Please explain it more.
Do you want any created port will be owned by default owner(app)?
So, how can other DPDK entity to take control on a port? 

> NO_OWNER means that anyone is free to take any ownerless port at any
> time. And as a result, your are thus forced here to fix this by modifying an
> existing application for any entity using your ownership API to function with
> it.
>

Yes, I think is should be explicit!
Because hotplug is in the game and  a port can be created\released any time,
Any dpdk entity should know about its ports and own them.

> This is very different from what I suggested. What I said was that I wanted
> the most common case to be taken care of, and for existing applications to
> continue working. It entails having a more complicated API, but I think this is
> a price we should pay.
> 

So, please define what is the common case you are talking about.
And if you have an idea how to adjust port ownership to take care of it, I will be happy to hear.

> You are implementing the most common case in testpmd (the app entity
> creating an owner and putting its valid ports within). Your API should ease
> that up as much as possible before considering forcing everyone to work
> with it.

I don't think it is complicated.

>                            ~*~
> 
> You implemented a way for the failsafe to capture existing ports.
> How does it work without the channels for requesting ports suggested
> above?

If the port is without an owner, it will just take ownership of it and will manage it, else will try to take ownership in the next hotplug alarm. 

> 
> Regards,
> --
> Gaëtan Rivet
> 6WIND
  
Ananyev, Konstantin Jan. 22, 2018, 12:28 p.m. UTC | #9
Hi lads,

> 
> Hi Matan,
> 
> On Fri, Jan 19, 2018 at 01:35:10PM +0000, Matan Azrad wrote:
> > Hi Konstantin
> >
> > From: Ananyev, Konstantin, Friday, January 19, 2018 3:09 PM
> > > > -----Original Message-----
> > > > From: Matan Azrad [mailto:matan@mellanox.com]
> > > > Sent: Friday, January 19, 2018 12:52 PM
> > > > To: Ananyev, Konstantin <konstantin.ananyev@intel.com>; Thomas
> > > > Monjalon <thomas@monjalon.net>; Gaetan Rivet
> > > <gaetan.rivet@6wind.com>;
> > > > Wu, Jingjing <jingjing.wu@intel.com>
> > > > Cc: dev@dpdk.org; Neil Horman <nhorman@tuxdriver.com>; Richardson,
> > > > Bruce <bruce.richardson@intel.com>
> > > > Subject: RE: [PATCH v3 7/7] app/testpmd: adjust ethdev port ownership
> > > >
> > > > Hi Konstantin
> > > >
> > > > From: Ananyev, Konstantin, Friday, January 19, 2018 2:38 PM
> > > > > To: Matan Azrad <matan@mellanox.com>; Thomas Monjalon
> > > > > <thomas@monjalon.net>; Gaetan Rivet <gaetan.rivet@6wind.com>;
> > > Wu,
> > > > > Jingjing <jingjing.wu@intel.com>
> > > > > Cc: dev@dpdk.org; Neil Horman <nhorman@tuxdriver.com>; Richardson,
> > > > > Bruce <bruce.richardson@intel.com>
> > > > > Subject: RE: [PATCH v3 7/7] app/testpmd: adjust ethdev port
> > > > > ownership
> > > > >
> > > > > Hi Matan,
> > > > >
> > > > > > -----Original Message-----
> > > > > > From: Matan Azrad [mailto:matan@mellanox.com]
> > > > > > Sent: Thursday, January 18, 2018 4:35 PM
> > > > > > To: Thomas Monjalon <thomas@monjalon.net>; Gaetan Rivet
> > > > > > <gaetan.rivet@6wind.com>; Wu, Jingjing <jingjing.wu@intel.com>
> > > > > > Cc: dev@dpdk.org; Neil Horman <nhorman@tuxdriver.com>;
> > > Richardson,
> > > > > > Bruce <bruce.richardson@intel.com>; Ananyev, Konstantin
> > > > > > <konstantin.ananyev@intel.com>
> > > > > > Subject: [PATCH v3 7/7] app/testpmd: adjust ethdev port ownership
> > > > > >
> > > > > > Testpmd should not use ethdev ports which are managed by other
> > > > > > DPDK entities.
> > > > > >
> > > > > > Set Testpmd ownership to each port which is not used by other
> > > > > > entity and prevent any usage of ethdev ports which are not owned by
> > > Testpmd.
> > > > > >
> > > > > > Signed-off-by: Matan Azrad <matan@mellanox.com>
> > > > > > ---
> > > > > >  app/test-pmd/cmdline.c      | 89 +++++++++++++++++++-----------------
> > > ----
> > > > > -----
> > > > > >  app/test-pmd/cmdline_flow.c |  2 +-
> > > > > >  app/test-pmd/config.c       | 37 ++++++++++---------
> > > > > >  app/test-pmd/parameters.c   |  4 +-
> > > > > >  app/test-pmd/testpmd.c      | 63 ++++++++++++++++++++------------
> > > > > >  app/test-pmd/testpmd.h      |  3 ++
> > > > > >  6 files changed, 103 insertions(+), 95 deletions(-)
> > > > > >
> > > > > > diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c index
> > > > > > 31919ba..6199c64 100644
> > > > > > --- a/app/test-pmd/cmdline.c
> > > > > > +++ b/app/test-pmd/cmdline.c
> > > > > > @@ -1394,7 +1394,7 @@ struct cmd_config_speed_all {
> > > > > >  			&link_speed) < 0)
> > > > > >  		return;
> > > > > >
> > > > > > -	RTE_ETH_FOREACH_DEV(pid) {
> > > > > > +	RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id) {
> > > > >
> > > > > Why do we need all these changes?
> > > > > As I understand you changed definition of RTE_ETH_FOREACH_DEV(), so
> > > > > no testpmd should work ok default (no_owner case).
> > > > > Am I missing something here?
> > > >
> > > > Now, After Gaetan suggestion RTE_ETH_FOREACH_DEV(pid) will iterate
> > > over all valid and ownerless ports.
> > >
> > > Yes.
> > >
> > > > Here Testpmd wants to iterate over its owned ports.
> > >
> > > Why? Why it can't just iterate over all valid and ownerless ports?
> > > As I understand it would be enough to fix current problems and would allow
> > > us to avoid any changes in testmpd (which I think is a good thing).
> >
> > Yes, I understand that this big change is very daunted, But I think the current a lot of bugs in testpmd(regarding port ownership) even more
> daunted.
> >
> > Look,
> > Testpmd initiates some of its internal databases depends on specific port iteration,
> > In some time someone may take ownership of Testpmd ports and testpmd will continue to touch them.

But if someone will take the ownership (assign new owner_id) that port will not appear
in RTE_ETH_FOREACH_DEV() any more.

> >
> 
> If I look back on the fail-safe, its sole purpose is to have seamless
> hotplug with existing applications.
> 
> Port ownership is a genericization of some functions introduced by the
> fail-safe, that could structure DPDK further. It should allow
> applications to have a seamless integration with subsystems using port
> ownership. Without this, port ownership cannot be used.
> 
> Testpmd should be fixed, but follow the most common design patterns of
> DPDK applications. Going with port ownership seems like a paradigm
> shift.
> 
> > In addition
> > Using the old iterator in some places in testpmd will cause a race for run-time new ports(can be created by failsafe or any hotplug code):
> > - testpmd finds an ownerless port(just now created) by the old iterator and start traffic there,
> > - failsafe takes ownership of this new port and start traffic there.
> > Problem!

Could you shed a bit more light here - it would be race condition between whom and whom?
As I remember in testpmd all control ops are done within one thread (main lcore).
The only way to attach/detach port with it - invoke testpmd CLI "attach/detach" port.

Konstantin

> 
> Testpmd does not handle detection of new port. If it did, testing
> fail-safe with it would be wrong.
> 
> At startup, RTE_ETH_FOREACH_DEV already fixed the issue of registering
> DEFERRED ports. There are still remaining issues regarding this, but I
> think they should be fixed. The architecture does not need to be
> completely moved to port ownership.
> 
> If anything, this should serve as a test for your API with common
> applications. I think you'd prefer to know and debug with testpmd
> instead of firing up VPP or something like that to determine what went
> wrong with using the fail-safe.
> 
> >
> > In addition
> > As a good example for well-done application (free from ownership bugs) I tried here to adjust Tespmd to the new rules and BTW to fix a
> lot of bugs.
> 
> Testpmd has too much cruft, it won't ever be a good example of a
> well-done application.
> 
> If you want to demonstrate ownership, I think you should start an
> example application demonstrating race conditions and their mitigation.
> 
> I think that would be interesting for many DPDK users.
> 
> >
> >
> > So actually applications which are not aware to the port ownership still are exposed to races, but if there use the old iterator(with the new
> change) the amount of races decreases.
> >
> > Thanks, Matan.
> > > Konstantin
> > >
> > > >
> > > > I added to Testpmd ability to take an ownership of ports as the new
> > > > ownership and synchronization rules suggested, Since Tespmd is a DPDK
> > > > entity which wants that no one will touch its owned ports, It must allocate
> > > an unique ID, set owner for its ports (see in main function) and recognizes
> > > them by its owner ID.
> > > >
> > > > > Konstantin
> > > > >
> 
> Regards,
> --
> Gaëtan Rivet
> 6WIND
  
Matan Azrad Jan. 22, 2018, 1:22 p.m. UTC | #10
Hi 
From: Ananyev, Konstantin [mailto:konstantin.ananyev@intel.com]
> Hi lads,
> 
> >
> > Hi Matan,
> >
> > On Fri, Jan 19, 2018 at 01:35:10PM +0000, Matan Azrad wrote:
> > > Hi Konstantin
> > >
> > > From: Ananyev, Konstantin, Friday, January 19, 2018 3:09 PM
> > > > > -----Original Message-----
> > > > > From: Matan Azrad [mailto:matan@mellanox.com]
> > > > > Sent: Friday, January 19, 2018 12:52 PM
> > > > > To: Ananyev, Konstantin <konstantin.ananyev@intel.com>; Thomas
> > > > > Monjalon <thomas@monjalon.net>; Gaetan Rivet
> > > > <gaetan.rivet@6wind.com>;
> > > > > Wu, Jingjing <jingjing.wu@intel.com>
> > > > > Cc: dev@dpdk.org; Neil Horman <nhorman@tuxdriver.com>;
> > > > > Richardson, Bruce <bruce.richardson@intel.com>
> > > > > Subject: RE: [PATCH v3 7/7] app/testpmd: adjust ethdev port
> > > > > ownership
> > > > >
> > > > > Hi Konstantin
> > > > >
> > > > > From: Ananyev, Konstantin, Friday, January 19, 2018 2:38 PM
> > > > > > To: Matan Azrad <matan@mellanox.com>; Thomas Monjalon
> > > > > > <thomas@monjalon.net>; Gaetan Rivet
> <gaetan.rivet@6wind.com>;
> > > > Wu,
> > > > > > Jingjing <jingjing.wu@intel.com>
> > > > > > Cc: dev@dpdk.org; Neil Horman <nhorman@tuxdriver.com>;
> > > > > > Richardson, Bruce <bruce.richardson@intel.com>
> > > > > > Subject: RE: [PATCH v3 7/7] app/testpmd: adjust ethdev port
> > > > > > ownership
> > > > > >
> > > > > > Hi Matan,
> > > > > >
> > > > > > > -----Original Message-----
> > > > > > > From: Matan Azrad [mailto:matan@mellanox.com]
> > > > > > > Sent: Thursday, January 18, 2018 4:35 PM
> > > > > > > To: Thomas Monjalon <thomas@monjalon.net>; Gaetan Rivet
> > > > > > > <gaetan.rivet@6wind.com>; Wu, Jingjing
> > > > > > > <jingjing.wu@intel.com>
> > > > > > > Cc: dev@dpdk.org; Neil Horman <nhorman@tuxdriver.com>;
> > > > Richardson,
> > > > > > > Bruce <bruce.richardson@intel.com>; Ananyev, Konstantin
> > > > > > > <konstantin.ananyev@intel.com>
> > > > > > > Subject: [PATCH v3 7/7] app/testpmd: adjust ethdev port
> > > > > > > ownership
> > > > > > >
> > > > > > > Testpmd should not use ethdev ports which are managed by
> > > > > > > other DPDK entities.
> > > > > > >
> > > > > > > Set Testpmd ownership to each port which is not used by
> > > > > > > other entity and prevent any usage of ethdev ports which are
> > > > > > > not owned by
> > > > Testpmd.
> > > > > > >
> > > > > > > Signed-off-by: Matan Azrad <matan@mellanox.com>
> > > > > > > ---
> > > > > > >  app/test-pmd/cmdline.c      | 89 +++++++++++++++++++----------
> -------
> > > > ----
> > > > > > -----
> > > > > > >  app/test-pmd/cmdline_flow.c |  2 +-
> > > > > > >  app/test-pmd/config.c       | 37 ++++++++++---------
> > > > > > >  app/test-pmd/parameters.c   |  4 +-
> > > > > > >  app/test-pmd/testpmd.c      | 63 ++++++++++++++++++++--------
> ----
> > > > > > >  app/test-pmd/testpmd.h      |  3 ++
> > > > > > >  6 files changed, 103 insertions(+), 95 deletions(-)
> > > > > > >
> > > > > > > diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
> > > > > > > index
> > > > > > > 31919ba..6199c64 100644
> > > > > > > --- a/app/test-pmd/cmdline.c
> > > > > > > +++ b/app/test-pmd/cmdline.c
> > > > > > > @@ -1394,7 +1394,7 @@ struct cmd_config_speed_all {
> > > > > > >  			&link_speed) < 0)
> > > > > > >  		return;
> > > > > > >
> > > > > > > -	RTE_ETH_FOREACH_DEV(pid) {
> > > > > > > +	RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id) {
> > > > > >
> > > > > > Why do we need all these changes?
> > > > > > As I understand you changed definition of
> > > > > > RTE_ETH_FOREACH_DEV(), so no testpmd should work ok default
> (no_owner case).
> > > > > > Am I missing something here?
> > > > >
> > > > > Now, After Gaetan suggestion RTE_ETH_FOREACH_DEV(pid) will
> > > > > iterate
> > > > over all valid and ownerless ports.
> > > >
> > > > Yes.
> > > >
> > > > > Here Testpmd wants to iterate over its owned ports.
> > > >
> > > > Why? Why it can't just iterate over all valid and ownerless ports?
> > > > As I understand it would be enough to fix current problems and
> > > > would allow us to avoid any changes in testmpd (which I think is a good
> thing).
> > >
> > > Yes, I understand that this big change is very daunted, But I think
> > > the current a lot of bugs in testpmd(regarding port ownership) even
> > > more
> > daunted.
> > >
> > > Look,
> > > Testpmd initiates some of its internal databases depends on specific
> > > port iteration, In some time someone may take ownership of Testpmd
> ports and testpmd will continue to touch them.
> 
> But if someone will take the ownership (assign new owner_id) that port will
> not appear in RTE_ETH_FOREACH_DEV() any more.
> 

Yes, but testpmd sometimes depends on previous iteration using internal database.
So it uses internal database that was updated by old iteration.  

> > >
> >
> > If I look back on the fail-safe, its sole purpose is to have seamless
> > hotplug with existing applications.
> >
> > Port ownership is a genericization of some functions introduced by the
> > fail-safe, that could structure DPDK further. It should allow
> > applications to have a seamless integration with subsystems using port
> > ownership. Without this, port ownership cannot be used.
> >
> > Testpmd should be fixed, but follow the most common design patterns of
> > DPDK applications. Going with port ownership seems like a paradigm
> > shift.
> >
> > > In addition
> > > Using the old iterator in some places in testpmd will cause a race for run-
> time new ports(can be created by failsafe or any hotplug code):
> > > - testpmd finds an ownerless port(just now created) by the old
> > > iterator and start traffic there,
> > > - failsafe takes ownership of this new port and start traffic there.
> > > Problem!
> 
> Could you shed a bit more light here - it would be race condition between
> whom and whom?

Sure.

> As I remember in testpmd all control ops are done within one thread (main
> lcore).

But other dpdk entity can use another thread, for example:
Failsafe uses the host thread(using alarm callback) to create a new port and to take ownership of a port.

The race:
Testpmd iterates over all ports by the master thread.
Failsafe takes ownership of a port by the host thread and start using it. 
=> The two dpdk entities may use the device at same time!

Obeying the new ownership rules can prevent all these races.

> The only way to attach/detach port with it - invoke testpmd CLI
> "attach/detach" port.
> 
> Konstantin
> 
> >
> > Testpmd does not handle detection of new port. If it did, testing
> > fail-safe with it would be wrong.
> >
> > At startup, RTE_ETH_FOREACH_DEV already fixed the issue of registering
> > DEFERRED ports. There are still remaining issues regarding this, but I
> > think they should be fixed. The architecture does not need to be
> > completely moved to port ownership.
> >
> > If anything, this should serve as a test for your API with common
> > applications. I think you'd prefer to know and debug with testpmd
> > instead of firing up VPP or something like that to determine what went
> > wrong with using the fail-safe.
> >
> > >
> > > In addition
> > > As a good example for well-done application (free from ownership
> > > bugs) I tried here to adjust Tespmd to the new rules and BTW to fix
> > > a
> > lot of bugs.
> >
> > Testpmd has too much cruft, it won't ever be a good example of a
> > well-done application.
> >
> > If you want to demonstrate ownership, I think you should start an
> > example application demonstrating race conditions and their mitigation.
> >
> > I think that would be interesting for many DPDK users.
> >
> > >
> > >
> > > So actually applications which are not aware to the port ownership
> > > still are exposed to races, but if there use the old iterator(with
> > > the new
> > change) the amount of races decreases.
> > >
> > > Thanks, Matan.
> > > > Konstantin
> > > >
> > > > >
> > > > > I added to Testpmd ability to take an ownership of ports as the
> > > > > new ownership and synchronization rules suggested, Since Tespmd
> > > > > is a DPDK entity which wants that no one will touch its owned
> > > > > ports, It must allocate
> > > > an unique ID, set owner for its ports (see in main function) and
> > > > recognizes them by its owner ID.
> > > > >
> > > > > > Konstantin
> > > > > >
> >
> > Regards,
> > --
> > Gaëtan Rivet
> > 6WIND
  
Ananyev, Konstantin Jan. 22, 2018, 8:48 p.m. UTC | #11
Hi Matan,

> -----Original Message-----
> From: Matan Azrad [mailto:matan@mellanox.com]
> Sent: Monday, January 22, 2018 1:23 PM
> To: Ananyev, Konstantin <konstantin.ananyev@intel.com>; Gaëtan Rivet <gaetan.rivet@6wind.com>
> Cc: Thomas Monjalon <thomas@monjalon.net>; Wu, Jingjing <jingjing.wu@intel.com>; dev@dpdk.org; Neil Horman
> <nhorman@tuxdriver.com>; Richardson, Bruce <bruce.richardson@intel.com>
> Subject: RE: [PATCH v3 7/7] app/testpmd: adjust ethdev port ownership
> 
> 
> Hi
> From: Ananyev, Konstantin [mailto:konstantin.ananyev@intel.com]
> > Hi lads,
> >
> > >
> > > Hi Matan,
> > >
> > > On Fri, Jan 19, 2018 at 01:35:10PM +0000, Matan Azrad wrote:
> > > > Hi Konstantin
> > > >
> > > > From: Ananyev, Konstantin, Friday, January 19, 2018 3:09 PM
> > > > > > -----Original Message-----
> > > > > > From: Matan Azrad [mailto:matan@mellanox.com]
> > > > > > Sent: Friday, January 19, 2018 12:52 PM
> > > > > > To: Ananyev, Konstantin <konstantin.ananyev@intel.com>; Thomas
> > > > > > Monjalon <thomas@monjalon.net>; Gaetan Rivet
> > > > > <gaetan.rivet@6wind.com>;
> > > > > > Wu, Jingjing <jingjing.wu@intel.com>
> > > > > > Cc: dev@dpdk.org; Neil Horman <nhorman@tuxdriver.com>;
> > > > > > Richardson, Bruce <bruce.richardson@intel.com>
> > > > > > Subject: RE: [PATCH v3 7/7] app/testpmd: adjust ethdev port
> > > > > > ownership
> > > > > >
> > > > > > Hi Konstantin
> > > > > >
> > > > > > From: Ananyev, Konstantin, Friday, January 19, 2018 2:38 PM
> > > > > > > To: Matan Azrad <matan@mellanox.com>; Thomas Monjalon
> > > > > > > <thomas@monjalon.net>; Gaetan Rivet
> > <gaetan.rivet@6wind.com>;
> > > > > Wu,
> > > > > > > Jingjing <jingjing.wu@intel.com>
> > > > > > > Cc: dev@dpdk.org; Neil Horman <nhorman@tuxdriver.com>;
> > > > > > > Richardson, Bruce <bruce.richardson@intel.com>
> > > > > > > Subject: RE: [PATCH v3 7/7] app/testpmd: adjust ethdev port
> > > > > > > ownership
> > > > > > >
> > > > > > > Hi Matan,
> > > > > > >
> > > > > > > > -----Original Message-----
> > > > > > > > From: Matan Azrad [mailto:matan@mellanox.com]
> > > > > > > > Sent: Thursday, January 18, 2018 4:35 PM
> > > > > > > > To: Thomas Monjalon <thomas@monjalon.net>; Gaetan Rivet
> > > > > > > > <gaetan.rivet@6wind.com>; Wu, Jingjing
> > > > > > > > <jingjing.wu@intel.com>
> > > > > > > > Cc: dev@dpdk.org; Neil Horman <nhorman@tuxdriver.com>;
> > > > > Richardson,
> > > > > > > > Bruce <bruce.richardson@intel.com>; Ananyev, Konstantin
> > > > > > > > <konstantin.ananyev@intel.com>
> > > > > > > > Subject: [PATCH v3 7/7] app/testpmd: adjust ethdev port
> > > > > > > > ownership
> > > > > > > >
> > > > > > > > Testpmd should not use ethdev ports which are managed by
> > > > > > > > other DPDK entities.
> > > > > > > >
> > > > > > > > Set Testpmd ownership to each port which is not used by
> > > > > > > > other entity and prevent any usage of ethdev ports which are
> > > > > > > > not owned by
> > > > > Testpmd.
> > > > > > > >
> > > > > > > > Signed-off-by: Matan Azrad <matan@mellanox.com>
> > > > > > > > ---
> > > > > > > >  app/test-pmd/cmdline.c      | 89 +++++++++++++++++++----------
> > -------
> > > > > ----
> > > > > > > -----
> > > > > > > >  app/test-pmd/cmdline_flow.c |  2 +-
> > > > > > > >  app/test-pmd/config.c       | 37 ++++++++++---------
> > > > > > > >  app/test-pmd/parameters.c   |  4 +-
> > > > > > > >  app/test-pmd/testpmd.c      | 63 ++++++++++++++++++++--------
> > ----
> > > > > > > >  app/test-pmd/testpmd.h      |  3 ++
> > > > > > > >  6 files changed, 103 insertions(+), 95 deletions(-)
> > > > > > > >
> > > > > > > > diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
> > > > > > > > index
> > > > > > > > 31919ba..6199c64 100644
> > > > > > > > --- a/app/test-pmd/cmdline.c
> > > > > > > > +++ b/app/test-pmd/cmdline.c
> > > > > > > > @@ -1394,7 +1394,7 @@ struct cmd_config_speed_all {
> > > > > > > >  			&link_speed) < 0)
> > > > > > > >  		return;
> > > > > > > >
> > > > > > > > -	RTE_ETH_FOREACH_DEV(pid) {
> > > > > > > > +	RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id) {
> > > > > > >
> > > > > > > Why do we need all these changes?
> > > > > > > As I understand you changed definition of
> > > > > > > RTE_ETH_FOREACH_DEV(), so no testpmd should work ok default
> > (no_owner case).
> > > > > > > Am I missing something here?
> > > > > >
> > > > > > Now, After Gaetan suggestion RTE_ETH_FOREACH_DEV(pid) will
> > > > > > iterate
> > > > > over all valid and ownerless ports.
> > > > >
> > > > > Yes.
> > > > >
> > > > > > Here Testpmd wants to iterate over its owned ports.
> > > > >
> > > > > Why? Why it can't just iterate over all valid and ownerless ports?
> > > > > As I understand it would be enough to fix current problems and
> > > > > would allow us to avoid any changes in testmpd (which I think is a good
> > thing).
> > > >
> > > > Yes, I understand that this big change is very daunted, But I think
> > > > the current a lot of bugs in testpmd(regarding port ownership) even
> > > > more
> > > daunted.
> > > >
> > > > Look,
> > > > Testpmd initiates some of its internal databases depends on specific
> > > > port iteration, In some time someone may take ownership of Testpmd
> > ports and testpmd will continue to touch them.
> >
> > But if someone will take the ownership (assign new owner_id) that port will
> > not appear in RTE_ETH_FOREACH_DEV() any more.
> >
> 
> Yes, but testpmd sometimes depends on previous iteration using internal database.
> So it uses internal database that was updated by old iteration.

That sounds like just a bug in testpmd that need to be fixed, no?
Any particular places where outdated device info is used? 

> 
> > > >
> > >
> > > If I look back on the fail-safe, its sole purpose is to have seamless
> > > hotplug with existing applications.
> > >
> > > Port ownership is a genericization of some functions introduced by the
> > > fail-safe, that could structure DPDK further. It should allow
> > > applications to have a seamless integration with subsystems using port
> > > ownership. Without this, port ownership cannot be used.
> > >
> > > Testpmd should be fixed, but follow the most common design patterns of
> > > DPDK applications. Going with port ownership seems like a paradigm
> > > shift.
> > >
> > > > In addition
> > > > Using the old iterator in some places in testpmd will cause a race for run-
> > time new ports(can be created by failsafe or any hotplug code):
> > > > - testpmd finds an ownerless port(just now created) by the old
> > > > iterator and start traffic there,
> > > > - failsafe takes ownership of this new port and start traffic there.
> > > > Problem!
> >
> > Could you shed a bit more light here - it would be race condition between
> > whom and whom?
> 
> Sure.
> 
> > As I remember in testpmd all control ops are done within one thread (main
> > lcore).
> 
> But other dpdk entity can use another thread, for example:
> Failsafe uses the host thread(using alarm callback) to create a new port and to take ownership of a port.

Hm, and you create new ports inside failsafe PMD, right and then set new owner_id for it?
And all this in alarm in interrupt thread?
If so I wonder how you can guarantee that no-one else will set different owner_id between
rte_eth_dev_allocate() and rte_eth_dev_owner_set()?
Could you point me to that place (I am not really familiar with familiar with failsafe code)?

> 
> The race:
> Testpmd iterates over all ports by the master thread.
> Failsafe takes ownership of a port by the host thread and start using it.
> => The two dpdk entities may use the device at same time!

Ok, if failsafe really assigns its owner_id(s) to ports that are already in use by the app,
then how such scheme supposed to work at all?
I.E. application has a port - it assigns some owner_id != 0 to it, then PMD tries to
set its owner_id tot the same port.
Obviously failsafe's set_owner() will always fail in such case.

From what I hear we need to introduce a concept of 'default owner id'.
I.E. when failsafe PMD is created - user assigns some owner_id to it (default).
Then failsafe PMD generates it's own owner_id and assigns it only to the ports
whose current owner_id is equal either 0 or 'default' owner_id. 

> 
> Obeying the new ownership rules can prevent all these races.
> 

When we discussed RFC of owner_id patch, I thought we all agreed that
owner_id  API shouldn't be mandatory - i.e. existing apps not required to change
to work normally with that.
Though right now it seems that application changes seems necessary,
at least to work ok with failsafe PMD.
Which makes we wonder was it some sort of misunderstanding or
we did we do something wrong here?
Konstantin

> > The only way to attach/detach port with it - invoke testpmd CLI
> > "attach/detach" port.
> >
> > Konstantin
> >
> > >
> > > Testpmd does not handle detection of new port. If it did, testing
> > > fail-safe with it would be wrong.
> > >
> > > At startup, RTE_ETH_FOREACH_DEV already fixed the issue of registering
> > > DEFERRED ports. There are still remaining issues regarding this, but I
> > > think they should be fixed. The architecture does not need to be
> > > completely moved to port ownership.
> > >
> > > If anything, this should serve as a test for your API with common
> > > applications. I think you'd prefer to know and debug with testpmd
> > > instead of firing up VPP or something like that to determine what went
> > > wrong with using the fail-safe.
> > >
> > > >
> > > > In addition
> > > > As a good example for well-done application (free from ownership
> > > > bugs) I tried here to adjust Tespmd to the new rules and BTW to fix
> > > > a
> > > lot of bugs.
> > >
> > > Testpmd has too much cruft, it won't ever be a good example of a
> > > well-done application.
> > >
> > > If you want to demonstrate ownership, I think you should start an
> > > example application demonstrating race conditions and their mitigation.
> > >
> > > I think that would be interesting for many DPDK users.
> > >
> > > >
> > > >
> > > > So actually applications which are not aware to the port ownership
> > > > still are exposed to races, but if there use the old iterator(with
> > > > the new
> > > change) the amount of races decreases.
> > > >
> > > > Thanks, Matan.
> > > > > Konstantin
> > > > >
> > > > > >
> > > > > > I added to Testpmd ability to take an ownership of ports as the
> > > > > > new ownership and synchronization rules suggested, Since Tespmd
> > > > > > is a DPDK entity which wants that no one will touch its owned
> > > > > > ports, It must allocate
> > > > > an unique ID, set owner for its ports (see in main function) and
> > > > > recognizes them by its owner ID.
> > > > > >
> > > > > > > Konstantin
> > > > > > >
> > >
> > > Regards,
> > > --
> > > Gaëtan Rivet
> > > 6WIND
  
Matan Azrad Jan. 23, 2018, 8:54 a.m. UTC | #12
Hi Konstantin
From: Ananyev, Konstantin, Monday, January 22, 2018 10:49 PM
> Hi Matan,
> 
> > -----Original Message-----
> > From: Matan Azrad [mailto:matan@mellanox.com]
> > Sent: Monday, January 22, 2018 1:23 PM
> > To: Ananyev, Konstantin <konstantin.ananyev@intel.com>; Gaëtan Rivet
> > <gaetan.rivet@6wind.com>
> > Cc: Thomas Monjalon <thomas@monjalon.net>; Wu, Jingjing
> > <jingjing.wu@intel.com>; dev@dpdk.org; Neil Horman
> > <nhorman@tuxdriver.com>; Richardson, Bruce
> > <bruce.richardson@intel.com>
> > Subject: RE: [PATCH v3 7/7] app/testpmd: adjust ethdev port ownership
> >
> >
> > Hi
> > From: Ananyev, Konstantin [mailto:konstantin.ananyev@intel.com]
> > > Hi lads,
> > >
> > > >
> > > > Hi Matan,
> > > >
> > > > On Fri, Jan 19, 2018 at 01:35:10PM +0000, Matan Azrad wrote:
> > > > > Hi Konstantin
> > > > >
> > > > > From: Ananyev, Konstantin, Friday, January 19, 2018 3:09 PM
> > > > > > > -----Original Message-----
> > > > > > > From: Matan Azrad [mailto:matan@mellanox.com]
> > > > > > > Sent: Friday, January 19, 2018 12:52 PM
> > > > > > > To: Ananyev, Konstantin <konstantin.ananyev@intel.com>;
> > > > > > > Thomas Monjalon <thomas@monjalon.net>; Gaetan Rivet
> > > > > > <gaetan.rivet@6wind.com>;
> > > > > > > Wu, Jingjing <jingjing.wu@intel.com>
> > > > > > > Cc: dev@dpdk.org; Neil Horman <nhorman@tuxdriver.com>;
> > > > > > > Richardson, Bruce <bruce.richardson@intel.com>
> > > > > > > Subject: RE: [PATCH v3 7/7] app/testpmd: adjust ethdev port
> > > > > > > ownership
> > > > > > >
> > > > > > > Hi Konstantin
> > > > > > >
> > > > > > > From: Ananyev, Konstantin, Friday, January 19, 2018 2:38 PM
> > > > > > > > To: Matan Azrad <matan@mellanox.com>; Thomas Monjalon
> > > > > > > > <thomas@monjalon.net>; Gaetan Rivet
> > > <gaetan.rivet@6wind.com>;
> > > > > > Wu,
> > > > > > > > Jingjing <jingjing.wu@intel.com>
> > > > > > > > Cc: dev@dpdk.org; Neil Horman <nhorman@tuxdriver.com>;
> > > > > > > > Richardson, Bruce <bruce.richardson@intel.com>
> > > > > > > > Subject: RE: [PATCH v3 7/7] app/testpmd: adjust ethdev
> > > > > > > > port ownership
> > > > > > > >
> > > > > > > > Hi Matan,
> > > > > > > >
> > > > > > > > > -----Original Message-----
> > > > > > > > > From: Matan Azrad [mailto:matan@mellanox.com]
> > > > > > > > > Sent: Thursday, January 18, 2018 4:35 PM
> > > > > > > > > To: Thomas Monjalon <thomas@monjalon.net>; Gaetan Rivet
> > > > > > > > > <gaetan.rivet@6wind.com>; Wu, Jingjing
> > > > > > > > > <jingjing.wu@intel.com>
> > > > > > > > > Cc: dev@dpdk.org; Neil Horman <nhorman@tuxdriver.com>;
> > > > > > Richardson,
> > > > > > > > > Bruce <bruce.richardson@intel.com>; Ananyev, Konstantin
> > > > > > > > > <konstantin.ananyev@intel.com>
> > > > > > > > > Subject: [PATCH v3 7/7] app/testpmd: adjust ethdev port
> > > > > > > > > ownership
> > > > > > > > >
> > > > > > > > > Testpmd should not use ethdev ports which are managed by
> > > > > > > > > other DPDK entities.
> > > > > > > > >
> > > > > > > > > Set Testpmd ownership to each port which is not used by
> > > > > > > > > other entity and prevent any usage of ethdev ports which
> > > > > > > > > are not owned by
> > > > > > Testpmd.
> > > > > > > > >
> > > > > > > > > Signed-off-by: Matan Azrad <matan@mellanox.com>
> > > > > > > > > ---
> > > > > > > > >  app/test-pmd/cmdline.c      | 89 +++++++++++++++++++------
> ----
> > > -------
> > > > > > ----
> > > > > > > > -----
> > > > > > > > >  app/test-pmd/cmdline_flow.c |  2 +-
> > > > > > > > >  app/test-pmd/config.c       | 37 ++++++++++---------
> > > > > > > > >  app/test-pmd/parameters.c   |  4 +-
> > > > > > > > >  app/test-pmd/testpmd.c      | 63 ++++++++++++++++++++----
> ----
> > > ----
> > > > > > > > >  app/test-pmd/testpmd.h      |  3 ++
> > > > > > > > >  6 files changed, 103 insertions(+), 95 deletions(-)
> > > > > > > > >
> > > > > > > > > diff --git a/app/test-pmd/cmdline.c
> > > > > > > > > b/app/test-pmd/cmdline.c index
> > > > > > > > > 31919ba..6199c64 100644
> > > > > > > > > --- a/app/test-pmd/cmdline.c
> > > > > > > > > +++ b/app/test-pmd/cmdline.c
> > > > > > > > > @@ -1394,7 +1394,7 @@ struct cmd_config_speed_all {
> > > > > > > > >  			&link_speed) < 0)
> > > > > > > > >  		return;
> > > > > > > > >
> > > > > > > > > -	RTE_ETH_FOREACH_DEV(pid) {
> > > > > > > > > +	RTE_ETH_FOREACH_DEV_OWNED_BY(pid,
> my_owner.id) {
> > > > > > > >
> > > > > > > > Why do we need all these changes?
> > > > > > > > As I understand you changed definition of
> > > > > > > > RTE_ETH_FOREACH_DEV(), so no testpmd should work ok
> > > > > > > > default
> > > (no_owner case).
> > > > > > > > Am I missing something here?
> > > > > > >
> > > > > > > Now, After Gaetan suggestion RTE_ETH_FOREACH_DEV(pid) will
> > > > > > > iterate
> > > > > > over all valid and ownerless ports.
> > > > > >
> > > > > > Yes.
> > > > > >
> > > > > > > Here Testpmd wants to iterate over its owned ports.
> > > > > >
> > > > > > Why? Why it can't just iterate over all valid and ownerless ports?
> > > > > > As I understand it would be enough to fix current problems and
> > > > > > would allow us to avoid any changes in testmpd (which I think
> > > > > > is a good
> > > thing).
> > > > >
> > > > > Yes, I understand that this big change is very daunted, But I
> > > > > think the current a lot of bugs in testpmd(regarding port
> > > > > ownership) even more
> > > > daunted.
> > > > >
> > > > > Look,
> > > > > Testpmd initiates some of its internal databases depends on
> > > > > specific port iteration, In some time someone may take ownership
> > > > > of Testpmd
> > > ports and testpmd will continue to touch them.
> > >
> > > But if someone will take the ownership (assign new owner_id) that
> > > port will not appear in RTE_ETH_FOREACH_DEV() any more.
> > >
> >
> > Yes, but testpmd sometimes depends on previous iteration using internal
> database.
> > So it uses internal database that was updated by old iteration.
> 
> That sounds like just a bug in testpmd that need to be fixed, no?

If Testpmd already took ownership for these ports(like I did), it is ok.

> Any particular places where outdated device info is used?

For example, look for the stream management in testpmd(I think I saw it there).

> > > > If I look back on the fail-safe, its sole purpose is to have
> > > > seamless hotplug with existing applications.
> > > >
> > > > Port ownership is a genericization of some functions introduced by
> > > > the fail-safe, that could structure DPDK further. It should allow
> > > > applications to have a seamless integration with subsystems using
> > > > port ownership. Without this, port ownership cannot be used.
> > > >
> > > > Testpmd should be fixed, but follow the most common design
> > > > patterns of DPDK applications. Going with port ownership seems
> > > > like a paradigm shift.
> > > >
> > > > > In addition
> > > > > Using the old iterator in some places in testpmd will cause a
> > > > > race for run-
> > > time new ports(can be created by failsafe or any hotplug code):
> > > > > - testpmd finds an ownerless port(just now created) by the old
> > > > > iterator and start traffic there,
> > > > > - failsafe takes ownership of this new port and start traffic there.
> > > > > Problem!
> > >
> > > Could you shed a bit more light here - it would be race condition
> > > between whom and whom?
> >
> > Sure.
> >
> > > As I remember in testpmd all control ops are done within one thread
> > > (main lcore).
> >
> > But other dpdk entity can use another thread, for example:
> > Failsafe uses the host thread(using alarm callback) to create a new port and
> to take ownership of a port.
> 
> Hm, and you create new ports inside failsafe PMD, right and then set new
> owner_id for it?

Yes.

> And all this in alarm in interrupt thread?

Yes.

> If so I wonder how you can guarantee that no-one else will set different
> owner_id between
> rte_eth_dev_allocate() and rte_eth_dev_owner_set()?

I check it (see failsafe patch to this series - V5).
Function: fs_bus_init.

> Could you point me to that place (I am not really familiar with familiar with
> failsafe code)?
> 
> >
> > The race:
> > Testpmd iterates over all ports by the master thread.
> > Failsafe takes ownership of a port by the host thread and start using it.
> > => The two dpdk entities may use the device at same time!
> 
> Ok, if failsafe really assigns its owner_id(s) to ports that are already in use by
> the app, then how such scheme supposed to work at all?

If the app works well (with the new rules) it already took ownership and failsafe will see it and will wait until the application release it.
Every dpdk entity should know which port it wants to manage,
If 2 entities want to manage the same device -  it can be ok and port ownership can synchronize the usage.

Probably, application which will run fail-safe wants to manage only the fail-safe port and therefor to take ownership only for it.

> I.E. application has a port - it assigns some owner_id != 0 to it, then PMD tries
> to set its owner_id tot the same port.
> Obviously failsafe's set_owner() will always fail in such case.
>
Yes, and will try again after some time.
 
> From what I hear we need to introduce a concept of 'default owner id'.
> I.E. when failsafe PMD is created - user assigns some owner_id to it (default).
> Then failsafe PMD generates it's own owner_id and assigns it only to the
> ports whose current owner_id is equal either 0 or 'default' owner_id.
> 

It is a suggestion and we need to think about it more (I'm talking about it with Gaetan in another thread).
Actually I think, if we want a generic solution to the generic problem the current solution is ok. 

> >
> > Obeying the new ownership rules can prevent all these races.
> >
> 
> When we discussed RFC of owner_id patch, I thought we all agreed that
> owner_id  API shouldn't be mandatory - i.e. existing apps not required to
> change to work normally with that.

Yes, it is not mandatory if app doesn't use hotplug.

I think with hotplug it is mandatory in the most cases.

And it can ease the secondary process model too.

Again, in the generic ownership problem as discussed in RFC:
Every entity, include app, should know which ports it wants to manage and to take ownership only for them.

> Though right now it seems that application changes seems necessary, at least
> to work ok with failsafe PMD.

And for solving the generic problem of ownership.(will defend from future issues by sure).

> Which makes we wonder was it some sort of misunderstanding or we did we
> do something wrong here?

Mistakes can be done all the time, but I think we are all understand the big issue of ownership and how the current solution solves it.
fail-safe it is just a current example for the problems which are possible because of the generic ownership issue.

Thanks,
Matan
> Konstantin
> 
> > > The only way to attach/detach port with it - invoke testpmd CLI
> > > "attach/detach" port.
> > >
> > > Konstantin
> > >
> > > >
> > > > Testpmd does not handle detection of new port. If it did, testing
> > > > fail-safe with it would be wrong.
> > > >
> > > > At startup, RTE_ETH_FOREACH_DEV already fixed the issue of
> > > > registering DEFERRED ports. There are still remaining issues
> > > > regarding this, but I think they should be fixed. The architecture
> > > > does not need to be completely moved to port ownership.
> > > >
> > > > If anything, this should serve as a test for your API with common
> > > > applications. I think you'd prefer to know and debug with testpmd
> > > > instead of firing up VPP or something like that to determine what
> > > > went wrong with using the fail-safe.
> > > >
> > > > >
> > > > > In addition
> > > > > As a good example for well-done application (free from ownership
> > > > > bugs) I tried here to adjust Tespmd to the new rules and BTW to
> > > > > fix a
> > > > lot of bugs.
> > > >
> > > > Testpmd has too much cruft, it won't ever be a good example of a
> > > > well-done application.
> > > >
> > > > If you want to demonstrate ownership, I think you should start an
> > > > example application demonstrating race conditions and their mitigation.
> > > >
> > > > I think that would be interesting for many DPDK users.
> > > >
> > > > >
> > > > >
> > > > > So actually applications which are not aware to the port
> > > > > ownership still are exposed to races, but if there use the old
> > > > > iterator(with the new
> > > > change) the amount of races decreases.
> > > > >
> > > > > Thanks, Matan.
> > > > > > Konstantin
> > > > > >
> > > > > > >
> > > > > > > I added to Testpmd ability to take an ownership of ports as
> > > > > > > the new ownership and synchronization rules suggested, Since
> > > > > > > Tespmd is a DPDK entity which wants that no one will touch
> > > > > > > its owned ports, It must allocate
> > > > > > an unique ID, set owner for its ports (see in main function)
> > > > > > and recognizes them by its owner ID.
> > > > > > >
> > > > > > > > Konstantin
> > > > > > > >
> > > >
> > > > Regards,
> > > > --
> > > > Gaëtan Rivet
> > > > 6WIND
  
Gaëtan Rivet Jan. 23, 2018, 12:56 p.m. UTC | #13
Hi all,

On Tue, Jan 23, 2018 at 08:54:27AM +0000, Matan Azrad wrote:

<snip>

> > > > > > > > > > Subject: [PATCH v3 7/7] app/testpmd: adjust ethdev port
> > > > > > > > > > ownership
> > > > > > > > > >
> > > > > > > > > > Testpmd should not use ethdev ports which are managed by
> > > > > > > > > > other DPDK entities.
> > > > > > > > > >
> > > > > > > > > > Set Testpmd ownership to each port which is not used by
> > > > > > > > > > other entity and prevent any usage of ethdev ports which
> > > > > > > > > > are not owned by
> > > > > > > Testpmd.
> > > > > > > > > >
> > > > > > > > > > Signed-off-by: Matan Azrad <matan@mellanox.com>
> > > > > > > > > > ---
> > > > > > > > > >  app/test-pmd/cmdline.c      | 89 +++++++++++++++++++------
> > ----
> > > > -------
> > > > > > > ----
> > > > > > > > > -----
> > > > > > > > > >  app/test-pmd/cmdline_flow.c |  2 +-
> > > > > > > > > >  app/test-pmd/config.c       | 37 ++++++++++---------
> > > > > > > > > >  app/test-pmd/parameters.c   |  4 +-
> > > > > > > > > >  app/test-pmd/testpmd.c      | 63 ++++++++++++++++++++----
> > ----
> > > > ----
> > > > > > > > > >  app/test-pmd/testpmd.h      |  3 ++
> > > > > > > > > >  6 files changed, 103 insertions(+), 95 deletions(-)
> > > > > > > > > >
> > > > > > > > > > diff --git a/app/test-pmd/cmdline.c
> > > > > > > > > > b/app/test-pmd/cmdline.c index
> > > > > > > > > > 31919ba..6199c64 100644
> > > > > > > > > > --- a/app/test-pmd/cmdline.c
> > > > > > > > > > +++ b/app/test-pmd/cmdline.c
> > > > > > > > > > @@ -1394,7 +1394,7 @@ struct cmd_config_speed_all {
> > > > > > > > > >  			&link_speed) < 0)
> > > > > > > > > >  		return;
> > > > > > > > > >
> > > > > > > > > > -	RTE_ETH_FOREACH_DEV(pid) {
> > > > > > > > > > +	RTE_ETH_FOREACH_DEV_OWNED_BY(pid,
> > my_owner.id) {
> > > > > > > > >
> > > > > > > > > Why do we need all these changes?
> > > > > > > > > As I understand you changed definition of
> > > > > > > > > RTE_ETH_FOREACH_DEV(), so no testpmd should work ok
> > > > > > > > > default
> > > > (no_owner case).
> > > > > > > > > Am I missing something here?
> > > > > > > >
> > > > > > > > Now, After Gaetan suggestion RTE_ETH_FOREACH_DEV(pid) will
> > > > > > > > iterate
> > > > > > > over all valid and ownerless ports.
> > > > > > >
> > > > > > > Yes.
> > > > > > >
> > > > > > > > Here Testpmd wants to iterate over its owned ports.
> > > > > > >
> > > > > > > Why? Why it can't just iterate over all valid and ownerless ports?
> > > > > > > As I understand it would be enough to fix current problems and
> > > > > > > would allow us to avoid any changes in testmpd (which I think
> > > > > > > is a good
> > > > thing).
> > > > > >
> > > > > > Yes, I understand that this big change is very daunted, But I
> > > > > > think the current a lot of bugs in testpmd(regarding port
> > > > > > ownership) even more
> > > > > daunted.
> > > > > >
> > > > > > Look,
> > > > > > Testpmd initiates some of its internal databases depends on
> > > > > > specific port iteration, In some time someone may take ownership
> > > > > > of Testpmd ports and testpmd will continue to touch them.
> > > >
> > > > But if someone will take the ownership (assign new owner_id) that
> > > > port will not appear in RTE_ETH_FOREACH_DEV() any more.
> > > >
> > >
> > > Yes, but testpmd sometimes depends on previous iteration using internal database.
> > > So it uses internal database that was updated by old iteration.
> > 
> > That sounds like just a bug in testpmd that need to be fixed, no?
> 
> If Testpmd already took ownership for these ports(like I did), it is ok.
> 

Have you tested using the default iterator (NO_OWNER)?
It worked until now with the bare minimal device tagging using
DEV_DEFERRED. Testpmd did not seem to mind having to skip this port.

I'm sure there were places where this was overlooked, but overall, I'd
think everything should be fixable using only the NO_OWNER iteration.

Can you point to a specific scenario (command line, chain of event) that
would lead to a problem?

> > Any particular places where outdated device info is used?
> 
> For example, look for the stream management in testpmd(I think I saw it there).
> 

The stream management is certainly shaky, but it happens after the EAL
initial port creation, and is not able to update itself for new
hotplugged ports (unless something changed).

> > > > > If I look back on the fail-safe, its sole purpose is to have
> > > > > seamless hotplug with existing applications.
> > > > >
> > > > > Port ownership is a genericization of some functions introduced by
> > > > > the fail-safe, that could structure DPDK further. It should allow
> > > > > applications to have a seamless integration with subsystems using
> > > > > port ownership. Without this, port ownership cannot be used.
> > > > >
> > > > > Testpmd should be fixed, but follow the most common design
> > > > > patterns of DPDK applications. Going with port ownership seems
> > > > > like a paradigm shift.
> > > > >
> > > > > > In addition
> > > > > > Using the old iterator in some places in testpmd will cause a
> > > > > > race for run-
> > > > time new ports(can be created by failsafe or any hotplug code):
> > > > > > - testpmd finds an ownerless port(just now created) by the old
> > > > > > iterator and start traffic there,

How does testpmd start traffic there? Testpmd has only a callback for
displaying that it received an event for a new port. It has no concept
of hotplugging beyond that.

Testpmd will not start using any new port probed using the hotplug API
on its own, again, unless something has drastically changed.

> > > > > > - failsafe takes ownership of this new port and start traffic there.
> > > > > > Problem!
> > > >
> > > > Could you shed a bit more light here - it would be race condition
> > > > between whom and whom?
> > >
> > > Sure.
> > >
> > > > As I remember in testpmd all control ops are done within one thread
> > > > (main lcore).
> > >
> > > But other dpdk entity can use another thread, for example:
> > > Failsafe uses the host thread(using alarm callback) to create a new port and
> > to take ownership of a port.
> > 
> > Hm, and you create new ports inside failsafe PMD, right and then set new
> > owner_id for it?
> 
> Yes.
> 
> > And all this in alarm in interrupt thread?
> 
> Yes.
> 
> > If so I wonder how you can guarantee that no-one else will set different
> > owner_id between
> > rte_eth_dev_allocate() and rte_eth_dev_owner_set()?
> 
> I check it (see failsafe patch to this series - V5).
> Function: fs_bus_init.
> 
> > Could you point me to that place (I am not really familiar with familiar with
> > failsafe code)?
> > 
> > >
> > > The race:
> > > Testpmd iterates over all ports by the master thread.
> > > Failsafe takes ownership of a port by the host thread and start using it.
> > > => The two dpdk entities may use the device at same time!
> > 

When can this happen? Fail-safe creates its initial pool of ports during
EAL init, before testpmd scans eth_dev ports and configure its streams.
At that point, it has taken ownership, from the master lcore context.

After this point, new ports could be detected and hotplugged by
fail-safe. However, even if testpmd had a callback to capture those new
ports and reconfigure its streams, it would be executed from within the
intr-thread, same as failsafe. If the thread was interrupted, by a
dataplane-lcore for example, streams would not have been reconfigured.
The fail-safe would execute its callback and set the owner-id before the
callback chains goes to the application.

And that would only be if testpmd had any callback for hotplugging ports
and reconfiguring its streams, which it hasn't, as far as I know.

> > Ok, if failsafe really assigns its owner_id(s) to ports that are already in use by
> > the app, then how such scheme supposed to work at all?
> 
> If the app works well (with the new rules) it already took ownership and failsafe will see it and will wait until the application release it.
> Every dpdk entity should know which port it wants to manage,
> If 2 entities want to manage the same device -  it can be ok and port ownership can synchronize the usage.
> 
> Probably, application which will run fail-safe wants to manage only the fail-safe port and therefor to take ownership only for it.
> 
> > I.E. application has a port - it assigns some owner_id != 0 to it, then PMD tries
> > to set its owner_id tot the same port.
> > Obviously failsafe's set_owner() will always fail in such case.
> >
> Yes, and will try again after some time.
>  
> > From what I hear we need to introduce a concept of 'default owner id'.
> > I.E. when failsafe PMD is created - user assigns some owner_id to it (default).
> > Then failsafe PMD generates it's own owner_id and assigns it only to the
> > ports whose current owner_id is equal either 0 or 'default' owner_id.
> > 
> 
> It is a suggestion and we need to think about it more (I'm talking about it with Gaetan in another thread).
> Actually I think, if we want a generic solution to the generic problem the current solution is ok. 
> 

We could as well conclude this other thread there.

The only solution would be to have a default relationship between
owners, something that goes beyond the scope assigned by Thomas to your
evolution, but would be necessary for this API to be properly used by
existing applications.

I think it's the only way to have a sane default behavior with your
API, but I also think this goes beyong the scope of the DPDK altogether.

But even with those considerations that could be ironed out later (API
is still experimental anyway), in the meantime, I think we should strive
not to break "userland" as much as possible. Meaning that unless you
have a specific situation creating a bug, you shouldn't have to modify
testpmd, and if an issues arises, you need to try to improve your API
before resorting to changing the resource management model of all
existing applications.

> > >
> > > Obeying the new ownership rules can prevent all these races.
> > >
> > 
> > When we discussed RFC of owner_id patch, I thought we all agreed that
> > owner_id  API shouldn't be mandatory - i.e. existing apps not required to
> > change to work normally with that.
> 
> Yes, it is not mandatory if app doesn't use hotplug.
> 
> I think with hotplug it is mandatory in the most cases.
> 
> And it can ease the secondary process model too.
> 
> Again, in the generic ownership problem as discussed in RFC:
> Every entity, include app, should know which ports it wants to manage and to take ownership only for them.
> 
> > Though right now it seems that application changes seems necessary, at least
> > to work ok with failsafe PMD.
> 
> And for solving the generic problem of ownership.(will defend from future issues by sure).
> 
> > Which makes we wonder was it some sort of misunderstanding or we did we
> > do something wrong here?
> 
> Mistakes can be done all the time, but I think we are all understand the big issue of ownership and how the current solution solves it.
> fail-safe it is just a current example for the problems which are possible because of the generic ownership issue.
> 

Regards,
  
Ananyev, Konstantin Jan. 23, 2018, 1:34 p.m. UTC | #14
Hi Matan,

> 
> 
> Hi Konstantin
> From: Ananyev, Konstantin, Monday, January 22, 2018 10:49 PM
> > Hi Matan,
> >
> > > -----Original Message-----
> > > From: Matan Azrad [mailto:matan@mellanox.com]
> > > Sent: Monday, January 22, 2018 1:23 PM
> > > To: Ananyev, Konstantin <konstantin.ananyev@intel.com>; Gaëtan Rivet
> > > <gaetan.rivet@6wind.com>
> > > Cc: Thomas Monjalon <thomas@monjalon.net>; Wu, Jingjing
> > > <jingjing.wu@intel.com>; dev@dpdk.org; Neil Horman
> > > <nhorman@tuxdriver.com>; Richardson, Bruce
> > > <bruce.richardson@intel.com>
> > > Subject: RE: [PATCH v3 7/7] app/testpmd: adjust ethdev port ownership
> > >
> > >
> > > Hi
> > > From: Ananyev, Konstantin [mailto:konstantin.ananyev@intel.com]
> > > > Hi lads,
> > > >
> > > > >
> > > > > Hi Matan,
> > > > >
> > > > > On Fri, Jan 19, 2018 at 01:35:10PM +0000, Matan Azrad wrote:
> > > > > > Hi Konstantin
> > > > > >
> > > > > > From: Ananyev, Konstantin, Friday, January 19, 2018 3:09 PM
> > > > > > > > -----Original Message-----
> > > > > > > > From: Matan Azrad [mailto:matan@mellanox.com]
> > > > > > > > Sent: Friday, January 19, 2018 12:52 PM
> > > > > > > > To: Ananyev, Konstantin <konstantin.ananyev@intel.com>;
> > > > > > > > Thomas Monjalon <thomas@monjalon.net>; Gaetan Rivet
> > > > > > > <gaetan.rivet@6wind.com>;
> > > > > > > > Wu, Jingjing <jingjing.wu@intel.com>
> > > > > > > > Cc: dev@dpdk.org; Neil Horman <nhorman@tuxdriver.com>;
> > > > > > > > Richardson, Bruce <bruce.richardson@intel.com>
> > > > > > > > Subject: RE: [PATCH v3 7/7] app/testpmd: adjust ethdev port
> > > > > > > > ownership
> > > > > > > >
> > > > > > > > Hi Konstantin
> > > > > > > >
> > > > > > > > From: Ananyev, Konstantin, Friday, January 19, 2018 2:38 PM
> > > > > > > > > To: Matan Azrad <matan@mellanox.com>; Thomas Monjalon
> > > > > > > > > <thomas@monjalon.net>; Gaetan Rivet
> > > > <gaetan.rivet@6wind.com>;
> > > > > > > Wu,
> > > > > > > > > Jingjing <jingjing.wu@intel.com>
> > > > > > > > > Cc: dev@dpdk.org; Neil Horman <nhorman@tuxdriver.com>;
> > > > > > > > > Richardson, Bruce <bruce.richardson@intel.com>
> > > > > > > > > Subject: RE: [PATCH v3 7/7] app/testpmd: adjust ethdev
> > > > > > > > > port ownership
> > > > > > > > >
> > > > > > > > > Hi Matan,
> > > > > > > > >
> > > > > > > > > > -----Original Message-----
> > > > > > > > > > From: Matan Azrad [mailto:matan@mellanox.com]
> > > > > > > > > > Sent: Thursday, January 18, 2018 4:35 PM
> > > > > > > > > > To: Thomas Monjalon <thomas@monjalon.net>; Gaetan Rivet
> > > > > > > > > > <gaetan.rivet@6wind.com>; Wu, Jingjing
> > > > > > > > > > <jingjing.wu@intel.com>
> > > > > > > > > > Cc: dev@dpdk.org; Neil Horman <nhorman@tuxdriver.com>;
> > > > > > > Richardson,
> > > > > > > > > > Bruce <bruce.richardson@intel.com>; Ananyev, Konstantin
> > > > > > > > > > <konstantin.ananyev@intel.com>
> > > > > > > > > > Subject: [PATCH v3 7/7] app/testpmd: adjust ethdev port
> > > > > > > > > > ownership
> > > > > > > > > >
> > > > > > > > > > Testpmd should not use ethdev ports which are managed by
> > > > > > > > > > other DPDK entities.
> > > > > > > > > >
> > > > > > > > > > Set Testpmd ownership to each port which is not used by
> > > > > > > > > > other entity and prevent any usage of ethdev ports which
> > > > > > > > > > are not owned by
> > > > > > > Testpmd.
> > > > > > > > > >
> > > > > > > > > > Signed-off-by: Matan Azrad <matan@mellanox.com>
> > > > > > > > > > ---
> > > > > > > > > >  app/test-pmd/cmdline.c      | 89 +++++++++++++++++++------
> > ----
> > > > -------
> > > > > > > ----
> > > > > > > > > -----
> > > > > > > > > >  app/test-pmd/cmdline_flow.c |  2 +-
> > > > > > > > > >  app/test-pmd/config.c       | 37 ++++++++++---------
> > > > > > > > > >  app/test-pmd/parameters.c   |  4 +-
> > > > > > > > > >  app/test-pmd/testpmd.c      | 63 ++++++++++++++++++++----
> > ----
> > > > ----
> > > > > > > > > >  app/test-pmd/testpmd.h      |  3 ++
> > > > > > > > > >  6 files changed, 103 insertions(+), 95 deletions(-)
> > > > > > > > > >
> > > > > > > > > > diff --git a/app/test-pmd/cmdline.c
> > > > > > > > > > b/app/test-pmd/cmdline.c index
> > > > > > > > > > 31919ba..6199c64 100644
> > > > > > > > > > --- a/app/test-pmd/cmdline.c
> > > > > > > > > > +++ b/app/test-pmd/cmdline.c
> > > > > > > > > > @@ -1394,7 +1394,7 @@ struct cmd_config_speed_all {
> > > > > > > > > >  			&link_speed) < 0)
> > > > > > > > > >  		return;
> > > > > > > > > >
> > > > > > > > > > -	RTE_ETH_FOREACH_DEV(pid) {
> > > > > > > > > > +	RTE_ETH_FOREACH_DEV_OWNED_BY(pid,
> > my_owner.id) {
> > > > > > > > >
> > > > > > > > > Why do we need all these changes?
> > > > > > > > > As I understand you changed definition of
> > > > > > > > > RTE_ETH_FOREACH_DEV(), so no testpmd should work ok
> > > > > > > > > default
> > > > (no_owner case).
> > > > > > > > > Am I missing something here?
> > > > > > > >
> > > > > > > > Now, After Gaetan suggestion RTE_ETH_FOREACH_DEV(pid) will
> > > > > > > > iterate
> > > > > > > over all valid and ownerless ports.
> > > > > > >
> > > > > > > Yes.
> > > > > > >
> > > > > > > > Here Testpmd wants to iterate over its owned ports.
> > > > > > >
> > > > > > > Why? Why it can't just iterate over all valid and ownerless ports?
> > > > > > > As I understand it would be enough to fix current problems and
> > > > > > > would allow us to avoid any changes in testmpd (which I think
> > > > > > > is a good
> > > > thing).
> > > > > >
> > > > > > Yes, I understand that this big change is very daunted, But I
> > > > > > think the current a lot of bugs in testpmd(regarding port
> > > > > > ownership) even more
> > > > > daunted.
> > > > > >
> > > > > > Look,
> > > > > > Testpmd initiates some of its internal databases depends on
> > > > > > specific port iteration, In some time someone may take ownership
> > > > > > of Testpmd
> > > > ports and testpmd will continue to touch them.
> > > >
> > > > But if someone will take the ownership (assign new owner_id) that
> > > > port will not appear in RTE_ETH_FOREACH_DEV() any more.
> > > >
> > >
> > > Yes, but testpmd sometimes depends on previous iteration using internal
> > database.
> > > So it uses internal database that was updated by old iteration.
> >
> > That sounds like just a bug in testpmd that need to be fixed, no?
> 
> If Testpmd already took ownership for these ports(like I did), it is ok.
> 

Hmm, why not just to fix testpmd, if there is a bug?
As I said all control ops here are done by one thread, so it should be pretty easy.
Or are you talking about race conditions?

> > Any particular places where outdated device info is used?
> 
> For example, look for the stream management in testpmd(I think I saw it there).

Anything particular?

> 
> > > > > If I look back on the fail-safe, its sole purpose is to have
> > > > > seamless hotplug with existing applications.
> > > > >
> > > > > Port ownership is a genericization of some functions introduced by
> > > > > the fail-safe, that could structure DPDK further. It should allow
> > > > > applications to have a seamless integration with subsystems using
> > > > > port ownership. Without this, port ownership cannot be used.
> > > > >
> > > > > Testpmd should be fixed, but follow the most common design
> > > > > patterns of DPDK applications. Going with port ownership seems
> > > > > like a paradigm shift.
> > > > >
> > > > > > In addition
> > > > > > Using the old iterator in some places in testpmd will cause a
> > > > > > race for run-
> > > > time new ports(can be created by failsafe or any hotplug code):
> > > > > > - testpmd finds an ownerless port(just now created) by the old
> > > > > > iterator and start traffic there,
> > > > > > - failsafe takes ownership of this new port and start traffic there.
> > > > > > Problem!
> > > >
> > > > Could you shed a bit more light here - it would be race condition
> > > > between whom and whom?
> > >
> > > Sure.
> > >
> > > > As I remember in testpmd all control ops are done within one thread
> > > > (main lcore).
> > >
> > > But other dpdk entity can use another thread, for example:
> > > Failsafe uses the host thread(using alarm callback) to create a new port and
> > to take ownership of a port.
> >
> > Hm, and you create new ports inside failsafe PMD, right and then set new
> > owner_id for it?
> 
> Yes.
> 
> > And all this in alarm in interrupt thread?
> 
> Yes.
> 
> > If so I wonder how you can guarantee that no-one else will set different
> > owner_id between
> > rte_eth_dev_allocate() and rte_eth_dev_owner_set()?
> 
> I check it (see failsafe patch to this series - V5).
> Function: fs_bus_init.

You are talking about that peace of code:
+		ret = rte_eth_dev_owner_set(pid, &PRIV(dev)->my_owner);
+		if (ret) {
+			INFO("sub_device %d owner set failed (%s),"
+			     " will try again later", i, strerror(ret));
+			continue;

right?
So you just wouldn't include that device into your failsafe device.
But that probably not what user wanted, especially if he bothered to create
a special new low-level device for you.
If that' s the use case, then I think you need to set device ownership at creation time -
inside dev_allocate().
Again that would avoid such racing conditions inside testpmd.

> 
> > Could you point me to that place (I am not really familiar with familiar with
> > failsafe code)?
> >
> > >
> > > The race:
> > > Testpmd iterates over all ports by the master thread.
> > > Failsafe takes ownership of a port by the host thread and start using it.
> > > => The two dpdk entities may use the device at same time!
> >
> > Ok, if failsafe really assigns its owner_id(s) to ports that are already in use by
> > the app, then how such scheme supposed to work at all?
> 
> If the app works well (with the new rules) it already took ownership and failsafe will see it and will wait until the application release it.

Ok, and why application would need to release it?
How it would know that failsafe device wants to use it now?
Again where is a guarantee that after app released it some other entity wouldn't grab it for itself?

> Every dpdk entity should know which port it wants to manage,
> If 2 entities want to manage the same device -  it can be ok and port ownership can synchronize the usage.
> 
> Probably, application which will run fail-safe wants to manage only the fail-safe port and therefor to take ownership only for it.
> 
> > I.E. application has a port - it assigns some owner_id != 0 to it, then PMD tries
> > to set its owner_id tot the same port.
> > Obviously failsafe's set_owner() will always fail in such case.
> >
> Yes, and will try again after some time.

Same question again - how app will know that it has to release the port ownership?

> 
> > From what I hear we need to introduce a concept of 'default owner id'.
> > I.E. when failsafe PMD is created - user assigns some owner_id to it (default).
> > Then failsafe PMD generates it's own owner_id and assigns it only to the
> > ports whose current owner_id is equal either 0 or 'default' owner_id.
> >
> 
> It is a suggestion and we need to think about it more (I'm talking about it with Gaetan in another thread).
> Actually I think, if we want a generic solution to the generic problem the current solution is ok.

From what I heard - every app that wants to use failsafe PMD would require quite a lot of changes.
It doesn't look ok to me.

> 
> > >
> > > Obeying the new ownership rules can prevent all these races.
> > >
> >
> > When we discussed RFC of owner_id patch, I thought we all agreed that
> > owner_id  API shouldn't be mandatory - i.e. existing apps not required to
> > change to work normally with that.
> 
> Yes, it is not mandatory if app doesn't use hotplug.
> 
> I think with hotplug it is mandatory in the most cases.

Yes in failsafe you always install this alarm handler, so even
if the app would have its own  way to handle hotplug  devices -
it would suddenly need to use this new owner API - even if it doesn't need to.
Why it has to be?

> 
> And it can ease the secondary process model too.
> 
> Again, in the generic ownership problem as discussed in RFC:
> Every entity, include app, should know which ports it wants to manage and to take ownership only for them.
> 
> > Though right now it seems that application changes seems necessary, at least
> > to work ok with failsafe PMD.
> 
> And for solving the generic problem of ownership.(will defend from future issues by sure).
> 
> > Which makes we wonder was it some sort of misunderstanding or we did we
> > do something wrong here?
> 
> Mistakes can be done all the time, but I think we are all understand the big issue of ownership and how the current solution solves it.
> fail-safe it is just a current example for the problems which are possible because of the generic ownership issue.

Honestly that seems too much changes for the app just to make failsafe PMD work correctly.
IMO - It should be some way to support it without causing changes in each DPDK application  -
otherwise something is wrong with the PMD itself.
If let say that ownership model is required to make failsafe PMD to operate -
it should be done in a transparent way to the user.
Probably something like Gaetan suggested in another mail or so.
Konstantin

> 
> Thanks,
> Matan
> > Konstantin
> >
> > > > The only way to attach/detach port with it - invoke testpmd CLI
> > > > "attach/detach" port.
> > > >
> > > > Konstantin
> > > >
> > > > >
> > > > > Testpmd does not handle detection of new port. If it did, testing
> > > > > fail-safe with it would be wrong.
> > > > >
> > > > > At startup, RTE_ETH_FOREACH_DEV already fixed the issue of
> > > > > registering DEFERRED ports. There are still remaining issues
> > > > > regarding this, but I think they should be fixed. The architecture
> > > > > does not need to be completely moved to port ownership.
> > > > >
> > > > > If anything, this should serve as a test for your API with common
> > > > > applications. I think you'd prefer to know and debug with testpmd
> > > > > instead of firing up VPP or something like that to determine what
> > > > > went wrong with using the fail-safe.
> > > > >
> > > > > >
> > > > > > In addition
> > > > > > As a good example for well-done application (free from ownership
> > > > > > bugs) I tried here to adjust Tespmd to the new rules and BTW to
> > > > > > fix a
> > > > > lot of bugs.
> > > > >
> > > > > Testpmd has too much cruft, it won't ever be a good example of a
> > > > > well-done application.
> > > > >
> > > > > If you want to demonstrate ownership, I think you should start an
> > > > > example application demonstrating race conditions and their mitigation.
> > > > >
> > > > > I think that would be interesting for many DPDK users.
> > > > >
> > > > > >
> > > > > >
> > > > > > So actually applications which are not aware to the port
> > > > > > ownership still are exposed to races, but if there use the old
> > > > > > iterator(with the new
> > > > > change) the amount of races decreases.
> > > > > >
> > > > > > Thanks, Matan.
> > > > > > > Konstantin
> > > > > > >
> > > > > > > >
> > > > > > > > I added to Testpmd ability to take an ownership of ports as
> > > > > > > > the new ownership and synchronization rules suggested, Since
> > > > > > > > Tespmd is a DPDK entity which wants that no one will touch
> > > > > > > > its owned ports, It must allocate
> > > > > > > an unique ID, set owner for its ports (see in main function)
> > > > > > > and recognizes them by its owner ID.
> > > > > > > >
> > > > > > > > > Konstantin
> > > > > > > > >
> > > > >
> > > > > Regards,
> > > > > --
> > > > > Gaëtan Rivet
> > > > > 6WIND
  
Thomas Monjalon Jan. 23, 2018, 2:18 p.m. UTC | #15
23/01/2018 14:34, Ananyev, Konstantin:
> If that' s the use case, then I think you need to set device ownership at creation time -
> inside dev_allocate().
> Again that would avoid such racing conditions inside testpmd.

The devices must be allocated at a low level layer.
When a new device appears (hotplug), an ethdev port should be allocated
automatically if it passes the whitelist/blacklist policy test.
Then we must decide who will manage this device.
I suggest notifying the DPDK libs first.
So a DPDK lib or PMD like failsafe can have the priority to take the
ownership in its notification callback.
  
Matan Azrad Jan. 23, 2018, 2:30 p.m. UTC | #16
Hi

From: Gaëtan Rivet [mailto:gaetan.rivet@6wind.com]
<snip>
> > > > > > Look,
> > > > > > > Testpmd initiates some of its internal databases depends on
> > > > > > > specific port iteration, In some time someone may take
> > > > > > > ownership of Testpmd ports and testpmd will continue to touch
> them.
> > > > >
> > > > > But if someone will take the ownership (assign new owner_id)
> > > > > that port will not appear in RTE_ETH_FOREACH_DEV() any more.
> > > > >
> > > >
> > > > Yes, but testpmd sometimes depends on previous iteration using
> internal database.
> > > > So it uses internal database that was updated by old iteration.
> > >
> > > That sounds like just a bug in testpmd that need to be fixed, no?
> >
> > If Testpmd already took ownership for these ports(like I did), it is ok.
> >
> 
> Have you tested using the default iterator (NO_OWNER)?
> It worked until now with the bare minimal device tagging using
> DEV_DEFERRED. Testpmd did not seem to mind having to skip this port.
> 
> I'm sure there were places where this was overlooked, but overall, I'd think
> everything should be fixable using only the NO_OWNER iteration.

I don't think so.

> Can you point to a specific scenario (command line, chain of event) that
> would lead to a problem?
>

I didn't construct a race test to catch testpmd issue, but I think without this patch, there is a lot of issues.
Go to the testpmd code (before ownership) and find usage of the old iterator(after the first iteration in main),
Ask yourself what should happen if exactly in this time, a new port is created by fail-safe(plug in event).
 
> > > Any particular places where outdated device info is used?
> >
> > For example, look for the stream management in testpmd(I think I saw it
> there).
> >
> 
> The stream management is certainly shaky, but it happens after the EAL
> initial port creation, and is not able to update itself for new hotplugged ports
> (unless something changed).
> 

Yes, but conceptually someone in the future may take the port(because it ownerless).

> > > > > > If I look back on the fail-safe, its sole purpose is to have
> > > > > > seamless hotplug with existing applications.
> > > > > >
> > > > > > Port ownership is a genericization of some functions
> > > > > > introduced by the fail-safe, that could structure DPDK
> > > > > > further. It should allow applications to have a seamless
> > > > > > integration with subsystems using port ownership. Without this,
> port ownership cannot be used.
> > > > > >
> > > > > > Testpmd should be fixed, but follow the most common design
> > > > > > patterns of DPDK applications. Going with port ownership seems
> > > > > > like a paradigm shift.
> > > > > >
> > > > > > > In addition
> > > > > > > Using the old iterator in some places in testpmd will cause
> > > > > > > a race for run-
> > > > > time new ports(can be created by failsafe or any hotplug code):
> > > > > > > - testpmd finds an ownerless port(just now created) by the
> > > > > > > old iterator and start traffic there,
> 
> How does testpmd start traffic there? Testpmd has only a callback for
> displaying that it received an event for a new port. It has no concept of
> hotplugging beyond that.
> 

Yes, so no traffic just some control command.

> Testpmd will not start using any new port probed using the hotplug API on its
> own, again, unless something has drastically changed.
> 

Every iterator using in testpmd is exposed to race.

> > > > > > > - failsafe takes ownership of this new port and start traffic there.
> > > > > > > Problem!
> > > > >
> > > > > Could you shed a bit more light here - it would be race
> > > > > condition between whom and whom?
> > > >
> > > > Sure.
> > > >
> > > > > As I remember in testpmd all control ops are done within one
> > > > > thread (main lcore).
> > > >
> > > > But other dpdk entity can use another thread, for example:
> > > > Failsafe uses the host thread(using alarm callback) to create a
> > > > new port and
> > > to take ownership of a port.
> > >
> > > Hm, and you create new ports inside failsafe PMD, right and then set
> > > new owner_id for it?
> >
> > Yes.
> >
> > > And all this in alarm in interrupt thread?
> >
> > Yes.
> >
> > > If so I wonder how you can guarantee that no-one else will set
> > > different owner_id between
> > > rte_eth_dev_allocate() and rte_eth_dev_owner_set()?
> >
> > I check it (see failsafe patch to this series - V5).
> > Function: fs_bus_init.
> >
> > > Could you point me to that place (I am not really familiar with
> > > familiar with failsafe code)?
> > >
> > > >
> > > > The race:
> > > > Testpmd iterates over all ports by the master thread.
> > > > Failsafe takes ownership of a port by the host thread and start using it.
> > > > => The two dpdk entities may use the device at same time!
> > >
> 
> When can this happen? Fail-safe creates its initial pool of ports during EAL
> init, before testpmd scans eth_dev ports and configure its streams.
> At that point, it has taken ownership, from the master lcore context.
> 
> After this point, new ports could be detected and hotplugged by fail-safe.
> However, even if testpmd had a callback to capture those new ports and
> reconfigure its streams, it would be executed from within the intr-thread,
> same as failsafe. If the thread was interrupted, by a dataplane-lcore for
> example, streams would not have been reconfigured.
> The fail-safe would execute its callback and set the owner-id before the
> callback chains goes to the application.
>

Some iterator may be invoked in plug out process by other thread in testpmd and causes to control command 
 
> And that would only be if testpmd had any callback for hotplugging ports and
> reconfiguring its streams, which it hasn't, as far as I know.
>

We don't need to implement it in testpmd.
 
> > > Ok, if failsafe really assigns its owner_id(s) to ports that are
> > > already in use by the app, then how such scheme supposed to work at
> all?
> >
> > If the app works well (with the new rules) it already took ownership and
> failsafe will see it and will wait until the application release it.
> > Every dpdk entity should know which port it wants to manage, If 2
> > entities want to manage the same device -  it can be ok and port ownership
> can synchronize the usage.
> >
> > Probably, application which will run fail-safe wants to manage only the fail-
> safe port and therefor to take ownership only for it.
> >
> > > I.E. application has a port - it assigns some owner_id != 0 to it,
> > > then PMD tries to set its owner_id tot the same port.
> > > Obviously failsafe's set_owner() will always fail in such case.
> > >
> > Yes, and will try again after some time.
> >
> > > From what I hear we need to introduce a concept of 'default owner id'.
> > > I.E. when failsafe PMD is created - user assigns some owner_id to it
> (default).
> > > Then failsafe PMD generates it's own owner_id and assigns it only to
> > > the ports whose current owner_id is equal either 0 or 'default' owner_id.
> > >
> >
> > It is a suggestion and we need to think about it more (I'm talking about it
> with Gaetan in another thread).
> > Actually I think, if we want a generic solution to the generic problem the
> current solution is ok.
> >
> 
> We could as well conclude this other thread there.
> 
> The only solution would be to have a default relationship between owners,
> something that goes beyond the scope assigned by Thomas to your
> evolution, but would be necessary for this API to be properly used by
> existing applications.
> 
> I think it's the only way to have a sane default behavior with your API, but I
> also think this goes beyong the scope of the DPDK altogether.
> 
> But even with those considerations that could be ironed out later (API is still
> experimental anyway), in the meantime, I think we should strive not to
> break "userland" as much as possible. Meaning that unless you have a
> specific situation creating a bug, you shouldn't have to modify testpmd, and if
> an issues arises, you need to try to improve your API before resorting to
> changing the resource management model of all existing applications.
> 

I understand it.
Suggestion:

2 system owners.
APP_OWNER - 0.
NO_OWNER - 1.

And allowing for more owners as now.

1. Every port creation will set the owner for NO_OWNER (as now).
2. There is option for all dpdk entities to take owner of  NO_OWNER ports all the time(as now).
3. In some point in the end of EAL init: set all the NO_OWNER to APP_OWNER(for V6).
4. Change the old iterator to iterate over APP_OWNER ports(for V6).

What do you think?

<snip>
  
Matan Azrad Jan. 23, 2018, 2:43 p.m. UTC | #17
Hi Konstantin

Please move the second thread, I'm feeling you and Gaetan have the same questions.

From: Ananyev, Konstantin, Tuesday, January 23, 2018 3:35 PM
> Hi Matan,
> 
> >
> >
> > Hi Konstantin
> > From: Ananyev, Konstantin, Monday, January 22, 2018 10:49 PM
> > > Hi Matan,
> > >
> > > > -----Original Message-----
> > > > From: Matan Azrad [mailto:matan@mellanox.com]
> > > > Sent: Monday, January 22, 2018 1:23 PM
> > > > To: Ananyev, Konstantin <konstantin.ananyev@intel.com>; Gaëtan
> > > > Rivet <gaetan.rivet@6wind.com>
> > > > Cc: Thomas Monjalon <thomas@monjalon.net>; Wu, Jingjing
> > > > <jingjing.wu@intel.com>; dev@dpdk.org; Neil Horman
> > > > <nhorman@tuxdriver.com>; Richardson, Bruce
> > > > <bruce.richardson@intel.com>
> > > > Subject: RE: [PATCH v3 7/7] app/testpmd: adjust ethdev port
> > > > ownership
> > > >
> > > >
> > > > Hi
> > > > From: Ananyev, Konstantin [mailto:konstantin.ananyev@intel.com]
> > > > > Hi lads,
> > > > >
> > > > > >
> > > > > > Hi Matan,
> > > > > >
> > > > > > On Fri, Jan 19, 2018 at 01:35:10PM +0000, Matan Azrad wrote:
> > > > > > > Hi Konstantin
> > > > > > >
> > > > > > > From: Ananyev, Konstantin, Friday, January 19, 2018 3:09 PM
> > > > > > > > > -----Original Message-----
> > > > > > > > > From: Matan Azrad [mailto:matan@mellanox.com]
> > > > > > > > > Sent: Friday, January 19, 2018 12:52 PM
> > > > > > > > > To: Ananyev, Konstantin <konstantin.ananyev@intel.com>;
> > > > > > > > > Thomas Monjalon <thomas@monjalon.net>; Gaetan Rivet
> > > > > > > > <gaetan.rivet@6wind.com>;
> > > > > > > > > Wu, Jingjing <jingjing.wu@intel.com>
> > > > > > > > > Cc: dev@dpdk.org; Neil Horman <nhorman@tuxdriver.com>;
> > > > > > > > > Richardson, Bruce <bruce.richardson@intel.com>
> > > > > > > > > Subject: RE: [PATCH v3 7/7] app/testpmd: adjust ethdev
> > > > > > > > > port ownership
> > > > > > > > >
> > > > > > > > > Hi Konstantin
> > > > > > > > >
> > > > > > > > > From: Ananyev, Konstantin, Friday, January 19, 2018 2:38
> > > > > > > > > PM
> > > > > > > > > > To: Matan Azrad <matan@mellanox.com>; Thomas
> Monjalon
> > > > > > > > > > <thomas@monjalon.net>; Gaetan Rivet
> > > > > <gaetan.rivet@6wind.com>;
> > > > > > > > Wu,
> > > > > > > > > > Jingjing <jingjing.wu@intel.com>
> > > > > > > > > > Cc: dev@dpdk.org; Neil Horman
> <nhorman@tuxdriver.com>;
> > > > > > > > > > Richardson, Bruce <bruce.richardson@intel.com>
> > > > > > > > > > Subject: RE: [PATCH v3 7/7] app/testpmd: adjust ethdev
> > > > > > > > > > port ownership
> > > > > > > > > >
> > > > > > > > > > Hi Matan,
> > > > > > > > > >
> > > > > > > > > > > -----Original Message-----
> > > > > > > > > > > From: Matan Azrad [mailto:matan@mellanox.com]
> > > > > > > > > > > Sent: Thursday, January 18, 2018 4:35 PM
> > > > > > > > > > > To: Thomas Monjalon <thomas@monjalon.net>; Gaetan
> > > > > > > > > > > Rivet <gaetan.rivet@6wind.com>; Wu, Jingjing
> > > > > > > > > > > <jingjing.wu@intel.com>
> > > > > > > > > > > Cc: dev@dpdk.org; Neil Horman
> > > > > > > > > > > <nhorman@tuxdriver.com>;
> > > > > > > > Richardson,
> > > > > > > > > > > Bruce <bruce.richardson@intel.com>; Ananyev,
> > > > > > > > > > > Konstantin <konstantin.ananyev@intel.com>
> > > > > > > > > > > Subject: [PATCH v3 7/7] app/testpmd: adjust ethdev
> > > > > > > > > > > port ownership
> > > > > > > > > > >
> > > > > > > > > > > Testpmd should not use ethdev ports which are
> > > > > > > > > > > managed by other DPDK entities.
> > > > > > > > > > >
> > > > > > > > > > > Set Testpmd ownership to each port which is not used
> > > > > > > > > > > by other entity and prevent any usage of ethdev
> > > > > > > > > > > ports which are not owned by
> > > > > > > > Testpmd.
> > > > > > > > > > >
> > > > > > > > > > > Signed-off-by: Matan Azrad <matan@mellanox.com>
> > > > > > > > > > > ---
> > > > > > > > > > >  app/test-pmd/cmdline.c      | 89 +++++++++++++++++++--
> ----
> > > ----
> > > > > -------
> > > > > > > > ----
> > > > > > > > > > -----
> > > > > > > > > > >  app/test-pmd/cmdline_flow.c |  2 +-
> > > > > > > > > > >  app/test-pmd/config.c       | 37 ++++++++++---------
> > > > > > > > > > >  app/test-pmd/parameters.c   |  4 +-
> > > > > > > > > > >  app/test-pmd/testpmd.c      | 63
> ++++++++++++++++++++----
> > > ----
> > > > > ----
> > > > > > > > > > >  app/test-pmd/testpmd.h      |  3 ++
> > > > > > > > > > >  6 files changed, 103 insertions(+), 95 deletions(-)
> > > > > > > > > > >
> > > > > > > > > > > diff --git a/app/test-pmd/cmdline.c
> > > > > > > > > > > b/app/test-pmd/cmdline.c index
> > > > > > > > > > > 31919ba..6199c64 100644
> > > > > > > > > > > --- a/app/test-pmd/cmdline.c
> > > > > > > > > > > +++ b/app/test-pmd/cmdline.c
> > > > > > > > > > > @@ -1394,7 +1394,7 @@ struct cmd_config_speed_all {
> > > > > > > > > > >  			&link_speed) < 0)
> > > > > > > > > > >  		return;
> > > > > > > > > > >
> > > > > > > > > > > -	RTE_ETH_FOREACH_DEV(pid) {
> > > > > > > > > > > +	RTE_ETH_FOREACH_DEV_OWNED_BY(pid,
> > > my_owner.id) {
> > > > > > > > > >
> > > > > > > > > > Why do we need all these changes?
> > > > > > > > > > As I understand you changed definition of
> > > > > > > > > > RTE_ETH_FOREACH_DEV(), so no testpmd should work ok
> > > > > > > > > > default
> > > > > (no_owner case).
> > > > > > > > > > Am I missing something here?
> > > > > > > > >
> > > > > > > > > Now, After Gaetan suggestion RTE_ETH_FOREACH_DEV(pid)
> > > > > > > > > will iterate
> > > > > > > > over all valid and ownerless ports.
> > > > > > > >
> > > > > > > > Yes.
> > > > > > > >
> > > > > > > > > Here Testpmd wants to iterate over its owned ports.
> > > > > > > >
> > > > > > > > Why? Why it can't just iterate over all valid and ownerless ports?
> > > > > > > > As I understand it would be enough to fix current problems
> > > > > > > > and would allow us to avoid any changes in testmpd (which
> > > > > > > > I think is a good
> > > > > thing).
> > > > > > >
> > > > > > > Yes, I understand that this big change is very daunted, But
> > > > > > > I think the current a lot of bugs in testpmd(regarding port
> > > > > > > ownership) even more
> > > > > > daunted.
> > > > > > >
> > > > > > > Look,
> > > > > > > Testpmd initiates some of its internal databases depends on
> > > > > > > specific port iteration, In some time someone may take
> > > > > > > ownership of Testpmd
> > > > > ports and testpmd will continue to touch them.
> > > > >
> > > > > But if someone will take the ownership (assign new owner_id)
> > > > > that port will not appear in RTE_ETH_FOREACH_DEV() any more.
> > > > >
> > > >
> > > > Yes, but testpmd sometimes depends on previous iteration using
> > > > internal
> > > database.
> > > > So it uses internal database that was updated by old iteration.
> > >
> > > That sounds like just a bug in testpmd that need to be fixed, no?
> >
> > If Testpmd already took ownership for these ports(like I did), it is ok.
> >
> 
> Hmm, why not just to fix testpmd, if there is a bug?
> As I said all control ops here are done by one thread, so it should be pretty
> easy.
> Or are you talking about race conditions?
> 
> > > Any particular places where outdated device info is used?
> >
> > For example, look for the stream management in testpmd(I think I saw it
> there).
> 
> Anything particular?
> 
> >
> > > > > > If I look back on the fail-safe, its sole purpose is to have
> > > > > > seamless hotplug with existing applications.
> > > > > >
> > > > > > Port ownership is a genericization of some functions
> > > > > > introduced by the fail-safe, that could structure DPDK
> > > > > > further. It should allow applications to have a seamless
> > > > > > integration with subsystems using port ownership. Without this,
> port ownership cannot be used.
> > > > > >
> > > > > > Testpmd should be fixed, but follow the most common design
> > > > > > patterns of DPDK applications. Going with port ownership seems
> > > > > > like a paradigm shift.
> > > > > >
> > > > > > > In addition
> > > > > > > Using the old iterator in some places in testpmd will cause
> > > > > > > a race for run-
> > > > > time new ports(can be created by failsafe or any hotplug code):
> > > > > > > - testpmd finds an ownerless port(just now created) by the
> > > > > > > old iterator and start traffic there,
> > > > > > > - failsafe takes ownership of this new port and start traffic there.
> > > > > > > Problem!
> > > > >
> > > > > Could you shed a bit more light here - it would be race
> > > > > condition between whom and whom?
> > > >
> > > > Sure.
> > > >
> > > > > As I remember in testpmd all control ops are done within one
> > > > > thread (main lcore).
> > > >
> > > > But other dpdk entity can use another thread, for example:
> > > > Failsafe uses the host thread(using alarm callback) to create a
> > > > new port and
> > > to take ownership of a port.
> > >
> > > Hm, and you create new ports inside failsafe PMD, right and then set
> > > new owner_id for it?
> >
> > Yes.
> >
> > > And all this in alarm in interrupt thread?
> >
> > Yes.
> >
> > > If so I wonder how you can guarantee that no-one else will set
> > > different owner_id between
> > > rte_eth_dev_allocate() and rte_eth_dev_owner_set()?
> >
> > I check it (see failsafe patch to this series - V5).
> > Function: fs_bus_init.
> 
> You are talking about that peace of code:
> +		ret = rte_eth_dev_owner_set(pid, &PRIV(dev)-
> >my_owner);
> +		if (ret) {
> +			INFO("sub_device %d owner set failed (%s),"
> +			     " will try again later", i, strerror(ret));
> +			continue;
> 
> right?
> So you just wouldn't include that device into your failsafe device.
> But that probably not what user wanted, especially if he bothered to create a
> special new low-level device for you.
> If that' s the use case, then I think you need to set device ownership at
> creation time - inside dev_allocate().
> Again that would avoid such racing conditions inside testpmd.
> 
> >
> > > Could you point me to that place (I am not really familiar with
> > > familiar with failsafe code)?
> > >
> > > >
> > > > The race:
> > > > Testpmd iterates over all ports by the master thread.
> > > > Failsafe takes ownership of a port by the host thread and start using it.
> > > > => The two dpdk entities may use the device at same time!
> > >
> > > Ok, if failsafe really assigns its owner_id(s) to ports that are
> > > already in use by the app, then how such scheme supposed to work at
> all?
> >
> > If the app works well (with the new rules) it already took ownership and
> failsafe will see it and will wait until the application release it.
> 
> Ok, and why application would need to release it?
> How it would know that failsafe device wants to use it now?
> Again where is a guarantee that after app released it some other entity
> wouldn't grab it for itself?
> 
> > Every dpdk entity should know which port it wants to manage, If 2
> > entities want to manage the same device -  it can be ok and port ownership
> can synchronize the usage.
> >
> > Probably, application which will run fail-safe wants to manage only the fail-
> safe port and therefor to take ownership only for it.
> >
> > > I.E. application has a port - it assigns some owner_id != 0 to it,
> > > then PMD tries to set its owner_id tot the same port.
> > > Obviously failsafe's set_owner() will always fail in such case.
> > >
> > Yes, and will try again after some time.
> 
> Same question again - how app will know that it has to release the port
> ownership?
> 
> >
> > > From what I hear we need to introduce a concept of 'default owner id'.
> > > I.E. when failsafe PMD is created - user assigns some owner_id to it
> (default).
> > > Then failsafe PMD generates it's own owner_id and assigns it only to
> > > the ports whose current owner_id is equal either 0 or 'default' owner_id.
> > >
> >
> > It is a suggestion and we need to think about it more (I'm talking about it
> with Gaetan in another thread).
> > Actually I think, if we want a generic solution to the generic problem the
> current solution is ok.
> 
> From what I heard - every app that wants to use failsafe PMD would require
> quite a lot of changes.
> It doesn't look ok to me.
> 
> >
> > > >
> > > > Obeying the new ownership rules can prevent all these races.
> > > >
> > >
> > > When we discussed RFC of owner_id patch, I thought we all agreed
> > > that owner_id  API shouldn't be mandatory - i.e. existing apps not
> > > required to change to work normally with that.
> >
> > Yes, it is not mandatory if app doesn't use hotplug.
> >
> > I think with hotplug it is mandatory in the most cases.
> 
> Yes in failsafe you always install this alarm handler, so even if the app would
> have its own  way to handle hotplug  devices - it would suddenly need to use
> this new owner API - even if it doesn't need to.
> Why it has to be?
> 
> >
> > And it can ease the secondary process model too.
> >
> > Again, in the generic ownership problem as discussed in RFC:
> > Every entity, include app, should know which ports it wants to manage and
> to take ownership only for them.
> >
> > > Though right now it seems that application changes seems necessary,
> > > at least to work ok with failsafe PMD.
> >
> > And for solving the generic problem of ownership.(will defend from future
> issues by sure).
> >
> > > Which makes we wonder was it some sort of misunderstanding or we did
> > > we do something wrong here?
> >
> > Mistakes can be done all the time, but I think we are all understand the big
> issue of ownership and how the current solution solves it.
> > fail-safe it is just a current example for the problems which are possible
> because of the generic ownership issue.
> 
> Honestly that seems too much changes for the app just to make failsafe PMD
> work correctly.
> IMO - It should be some way to support it without causing changes in each
> DPDK application  - otherwise something is wrong with the PMD itself.
> If let say that ownership model is required to make failsafe PMD to operate -
> it should be done in a transparent way to the user.
> Probably something like Gaetan suggested in another mail or so.
> Konstantin
> 
> >
> > Thanks,
> > Matan
> > > Konstantin
> > >
> > > > > The only way to attach/detach port with it - invoke testpmd CLI
> > > > > "attach/detach" port.
> > > > >
> > > > > Konstantin
> > > > >
> > > > > >
> > > > > > Testpmd does not handle detection of new port. If it did,
> > > > > > testing fail-safe with it would be wrong.
> > > > > >
> > > > > > At startup, RTE_ETH_FOREACH_DEV already fixed the issue of
> > > > > > registering DEFERRED ports. There are still remaining issues
> > > > > > regarding this, but I think they should be fixed. The
> > > > > > architecture does not need to be completely moved to port
> ownership.
> > > > > >
> > > > > > If anything, this should serve as a test for your API with
> > > > > > common applications. I think you'd prefer to know and debug
> > > > > > with testpmd instead of firing up VPP or something like that
> > > > > > to determine what went wrong with using the fail-safe.
> > > > > >
> > > > > > >
> > > > > > > In addition
> > > > > > > As a good example for well-done application (free from
> > > > > > > ownership
> > > > > > > bugs) I tried here to adjust Tespmd to the new rules and BTW
> > > > > > > to fix a
> > > > > > lot of bugs.
> > > > > >
> > > > > > Testpmd has too much cruft, it won't ever be a good example of
> > > > > > a well-done application.
> > > > > >
> > > > > > If you want to demonstrate ownership, I think you should start
> > > > > > an example application demonstrating race conditions and their
> mitigation.
> > > > > >
> > > > > > I think that would be interesting for many DPDK users.
> > > > > >
> > > > > > >
> > > > > > >
> > > > > > > So actually applications which are not aware to the port
> > > > > > > ownership still are exposed to races, but if there use the
> > > > > > > old iterator(with the new
> > > > > > change) the amount of races decreases.
> > > > > > >
> > > > > > > Thanks, Matan.
> > > > > > > > Konstantin
> > > > > > > >
> > > > > > > > >
> > > > > > > > > I added to Testpmd ability to take an ownership of ports
> > > > > > > > > as the new ownership and synchronization rules
> > > > > > > > > suggested, Since Tespmd is a DPDK entity which wants
> > > > > > > > > that no one will touch its owned ports, It must allocate
> > > > > > > > an unique ID, set owner for its ports (see in main
> > > > > > > > function) and recognizes them by its owner ID.
> > > > > > > > >
> > > > > > > > > > Konstantin
> > > > > > > > > >
> > > > > >
> > > > > > Regards,
> > > > > > --
> > > > > > Gaëtan Rivet
> > > > > > 6WIND
  
Ananyev, Konstantin Jan. 23, 2018, 3:12 p.m. UTC | #18
> -----Original Message-----
> From: Thomas Monjalon [mailto:thomas@monjalon.net]
> Sent: Tuesday, January 23, 2018 2:19 PM
> To: Ananyev, Konstantin <konstantin.ananyev@intel.com>
> Cc: Matan Azrad <matan@mellanox.com>; Gaëtan Rivet <gaetan.rivet@6wind.com>; Wu, Jingjing <jingjing.wu@intel.com>;
> dev@dpdk.org; Neil Horman <nhorman@tuxdriver.com>; Richardson, Bruce <bruce.richardson@intel.com>
> Subject: Re: [PATCH v3 7/7] app/testpmd: adjust ethdev port ownership
> 
> 23/01/2018 14:34, Ananyev, Konstantin:
> > If that' s the use case, then I think you need to set device ownership at creation time -
> > inside dev_allocate().
> > Again that would avoid such racing conditions inside testpmd.
> 
> The devices must be allocated at a low level layer.

No one arguing about that.
But we can provide owner id information to the low level.

> When a new device appears (hotplug), an ethdev port should be allocated
> automatically if it passes the whitelist/blacklist policy test.
> Then we must decide who will manage this device.
> I suggest notifying the DPDK libs first.
> So a DPDK lib or PMD like failsafe can have the priority to take the
> ownership in its notification callback.

Possible, but seems a bit overcomplicated.
Why not just:

Have a global variable process_default_owner_id, that would be init once at startup.
Have an LTS variable default_owner_id.
It will be used by rte_eth_dev_allocate() caller can set dev->owner_id at creation time,
so port allocation and setting ownership - will be an atomic operation.
At the exit rte_eth_dev_allocate() will always reset default_owner_id=0:

rte_eth_dev_allocate(...)
{
   lock(owner_lock);
   <allocate_port>
   owner = RTE_PER_LCORE(default_owner_id);
   if (owner == 0)
       owner = process_default_owner_id;
  set_owner(port, ..., owner);
 unlock(owner_lock);
 RTE_PER_LCORE(default_owner_id) = 0;
}

So callers who don't need any special ownership - don't need to do anything.
Special callers (like failsafe) can set default_owenr_id just before calling hotplug
handling routine.  

Konstantin
  
Ananyev, Konstantin Jan. 23, 2018, 3:18 p.m. UTC | #19
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Ananyev, Konstantin
> Sent: Tuesday, January 23, 2018 3:12 PM
> To: Thomas Monjalon <thomas@monjalon.net>
> Cc: Matan Azrad <matan@mellanox.com>; Gaëtan Rivet <gaetan.rivet@6wind.com>; Wu, Jingjing <jingjing.wu@intel.com>;
> dev@dpdk.org; Neil Horman <nhorman@tuxdriver.com>; Richardson, Bruce <bruce.richardson@intel.com>
> Subject: Re: [dpdk-dev] [PATCH v3 7/7] app/testpmd: adjust ethdev port ownership
> 
> 
> 
> > -----Original Message-----
> > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > Sent: Tuesday, January 23, 2018 2:19 PM
> > To: Ananyev, Konstantin <konstantin.ananyev@intel.com>
> > Cc: Matan Azrad <matan@mellanox.com>; Gaëtan Rivet <gaetan.rivet@6wind.com>; Wu, Jingjing <jingjing.wu@intel.com>;
> > dev@dpdk.org; Neil Horman <nhorman@tuxdriver.com>; Richardson, Bruce <bruce.richardson@intel.com>
> > Subject: Re: [PATCH v3 7/7] app/testpmd: adjust ethdev port ownership
> >
> > 23/01/2018 14:34, Ananyev, Konstantin:
> > > If that' s the use case, then I think you need to set device ownership at creation time -
> > > inside dev_allocate().
> > > Again that would avoid such racing conditions inside testpmd.
> >
> > The devices must be allocated at a low level layer.
> 
> No one arguing about that.
> But we can provide owner id information to the low level.
> 
> > When a new device appears (hotplug), an ethdev port should be allocated
> > automatically if it passes the whitelist/blacklist policy test.
> > Then we must decide who will manage this device.
> > I suggest notifying the DPDK libs first.
> > So a DPDK lib or PMD like failsafe can have the priority to take the
> > ownership in its notification callback.
> 
> Possible, but seems a bit overcomplicated.
> Why not just:
> 
> Have a global variable process_default_owner_id, that would be init once at startup.
> Have an LTS variable default_owner_id.
> It will be used by rte_eth_dev_allocate() caller can set dev->owner_id at creation time,
> so port allocation and setting ownership - will be an atomic operation.
> At the exit rte_eth_dev_allocate() will always reset default_owner_id=0:
> 
> rte_eth_dev_allocate(...)
> {
>    lock(owner_lock);
>    <allocate_port>
>    owner = RTE_PER_LCORE(default_owner_id);
>    if (owner == 0)
>        owner = process_default_owner_id;
>   set_owner(port, ..., owner);
>  unlock(owner_lock);
>  RTE_PER_LCORE(default_owner_id) = 0;

Or probably better to leave default_owner_id reset to the caller.
Another thing - we can use same LTS variable in all control ops to
allow/disallow changing of port configuration based on ownership.
Konstantin

> }
> 
> So callers who don't need any special ownership - don't need to do anything.
> Special callers (like failsafe) can set default_owenr_id just before calling hotplug
> handling routine.
> 
> Konstantin
> 
> 
>
  
Thomas Monjalon Jan. 23, 2018, 5:33 p.m. UTC | #20
23/01/2018 16:18, Ananyev, Konstantin:
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Ananyev, Konstantin
> > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > > 23/01/2018 14:34, Ananyev, Konstantin:
> > > > If that' s the use case, then I think you need to set device ownership at creation time -
> > > > inside dev_allocate().
> > > > Again that would avoid such racing conditions inside testpmd.
> > >
> > > The devices must be allocated at a low level layer.
> > 
> > No one arguing about that.
> > But we can provide owner id information to the low level.

Sorry, you did not get it.
We cannot provide owner id at the low level
because it is not yet decided who will be the owner
before the port is allocated.

> > > When a new device appears (hotplug), an ethdev port should be allocated
> > > automatically if it passes the whitelist/blacklist policy test.
> > > Then we must decide who will manage this device.
> > > I suggest notifying the DPDK libs first.
> > > So a DPDK lib or PMD like failsafe can have the priority to take the
> > > ownership in its notification callback.
> > 
> > Possible, but seems a bit overcomplicated.
> > Why not just:
> > 
> > Have a global variable process_default_owner_id, that would be init once at startup.
> > Have an LTS variable default_owner_id.
> > It will be used by rte_eth_dev_allocate() caller can set dev->owner_id at creation time,
> > so port allocation and setting ownership - will be an atomic operation.
> > At the exit rte_eth_dev_allocate() will always reset default_owner_id=0:
> > 
> > rte_eth_dev_allocate(...)
> > {
> >    lock(owner_lock);
> >    <allocate_port>
> >    owner = RTE_PER_LCORE(default_owner_id);
> >    if (owner == 0)
> >        owner = process_default_owner_id;
> >   set_owner(port, ..., owner);
> >  unlock(owner_lock);
> >  RTE_PER_LCORE(default_owner_id) = 0;
> 
> Or probably better to leave default_owner_id reset to the caller.
> Another thing - we can use same LTS variable in all control ops to
> allow/disallow changing of port configuration based on ownership.
> Konstantin
> 
> > }
> > 
> > So callers who don't need any special ownership - don't need to do anything.
> > Special callers (like failsafe) can set default_owenr_id just before calling hotplug
> > handling routine.

No, hotplug will not be a routine.
I am talking about real hotplug, like a device which appears in the VM.
This new device must be handled by EAL and probed automatically if
comply with whitelist/blacklist policy given by the application or user.
Real hotplug is asynchronous.
We will just receive notifications that it appeared.

Note: there is some temporary code in failsafe to manage some hotplug.
This code must be removed when it will be properly handled in EAL.
  
Ananyev, Konstantin Jan. 23, 2018, 9:18 p.m. UTC | #21
> 
> 23/01/2018 16:18, Ananyev, Konstantin:
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Ananyev, Konstantin
> > > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > > > 23/01/2018 14:34, Ananyev, Konstantin:
> > > > > If that' s the use case, then I think you need to set device ownership at creation time -
> > > > > inside dev_allocate().
> > > > > Again that would avoid such racing conditions inside testpmd.
> > > >
> > > > The devices must be allocated at a low level layer.
> > >
> > > No one arguing about that.
> > > But we can provide owner id information to the low level.
> 
> Sorry, you did not get it.

Might be.

> We cannot provide owner id at the low level
> because it is not yet decided who will be the owner
> before the port is allocated.

Why is that?
What prevents us decide who will manage that device *before* allocating port of it?
IMO we do have all needed information at that stage.

> 
> > > > When a new device appears (hotplug), an ethdev port should be allocated
> > > > automatically if it passes the whitelist/blacklist policy test.
> > > > Then we must decide who will manage this device.
> > > > I suggest notifying the DPDK libs first.
> > > > So a DPDK lib or PMD like failsafe can have the priority to take the
> > > > ownership in its notification callback.
> > >
> > > Possible, but seems a bit overcomplicated.
> > > Why not just:
> > >
> > > Have a global variable process_default_owner_id, that would be init once at startup.
> > > Have an LTS variable default_owner_id.
> > > It will be used by rte_eth_dev_allocate() caller can set dev->owner_id at creation time,
> > > so port allocation and setting ownership - will be an atomic operation.
> > > At the exit rte_eth_dev_allocate() will always reset default_owner_id=0:
> > >
> > > rte_eth_dev_allocate(...)
> > > {
> > >    lock(owner_lock);
> > >    <allocate_port>
> > >    owner = RTE_PER_LCORE(default_owner_id);
> > >    if (owner == 0)
> > >        owner = process_default_owner_id;
> > >   set_owner(port, ..., owner);
> > >  unlock(owner_lock);
> > >  RTE_PER_LCORE(default_owner_id) = 0;
> >
> > Or probably better to leave default_owner_id reset to the caller.
> > Another thing - we can use same LTS variable in all control ops to
> > allow/disallow changing of port configuration based on ownership.
> > Konstantin
> >
> > > }
> > >
> > > So callers who don't need any special ownership - don't need to do anything.
> > > Special callers (like failsafe) can set default_owenr_id just before calling hotplug
> > > handling routine.
> 
> No, hotplug will not be a routine.
> I am talking about real hotplug, like a device which appears in the VM.
> This new device must be handled by EAL and probed automatically if
> comply with whitelist/blacklist policy given by the application or user.
> Real hotplug is asynchronous.

By 'asynchronous' here you mean it would be handled in the EAL interrupt thread
or something different?
Anyway, I suppose  you do need a function inside DPDK that will do the actual work in response
on hotplug event, right?
That's what I refer to as 'hotplug routine' above. 

> We will just receive notifications that it appeared.
> 
> Note: there is some temporary code in failsafe to manage some hotplug.
> This code must be removed when it will be properly handled in EAL.

Ok, if it is just a temporary code, that would be removed soon -
then it definitely seems wrong to modify tespmd (or any other user app)
to comply with that temporary solution.

Konstantin
  
Thomas Monjalon Jan. 24, 2018, 8:10 a.m. UTC | #22
23/01/2018 22:18, Ananyev, Konstantin:
> > 
> > 23/01/2018 16:18, Ananyev, Konstantin:
> > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Ananyev, Konstantin
> > > > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > > > > 23/01/2018 14:34, Ananyev, Konstantin:
> > > > > > If that' s the use case, then I think you need to set device ownership at creation time -
> > > > > > inside dev_allocate().
> > > > > > Again that would avoid such racing conditions inside testpmd.
> > > > >
> > > > > The devices must be allocated at a low level layer.
> > > >
> > > > No one arguing about that.
> > > > But we can provide owner id information to the low level.
> > 
> > Sorry, you did not get it.
> 
> Might be.
> 
> > We cannot provide owner id at the low level
> > because it is not yet decided who will be the owner
> > before the port is allocated.
> 
> Why is that?
> What prevents us decide who will manage that device *before* allocating port of it?
> IMO we do have all needed information at that stage.

We don't have the information.
It is a new device, it is matched by a driver which allocates a port.
I don't see where the higher level can interact here.
And even if you manage a trick, the higher level needs to read the port
informations to decide the ownership.

> > > > > When a new device appears (hotplug), an ethdev port should be allocated
> > > > > automatically if it passes the whitelist/blacklist policy test.
> > > > > Then we must decide who will manage this device.
> > > > > I suggest notifying the DPDK libs first.
> > > > > So a DPDK lib or PMD like failsafe can have the priority to take the
> > > > > ownership in its notification callback.
> > > >
> > > > Possible, but seems a bit overcomplicated.
> > > > Why not just:
> > > >
> > > > Have a global variable process_default_owner_id, that would be init once at startup.
> > > > Have an LTS variable default_owner_id.
> > > > It will be used by rte_eth_dev_allocate() caller can set dev->owner_id at creation time,
> > > > so port allocation and setting ownership - will be an atomic operation.
> > > > At the exit rte_eth_dev_allocate() will always reset default_owner_id=0:
> > > >
> > > > rte_eth_dev_allocate(...)
> > > > {
> > > >    lock(owner_lock);
> > > >    <allocate_port>
> > > >    owner = RTE_PER_LCORE(default_owner_id);
> > > >    if (owner == 0)
> > > >        owner = process_default_owner_id;
> > > >   set_owner(port, ..., owner);
> > > >  unlock(owner_lock);
> > > >  RTE_PER_LCORE(default_owner_id) = 0;
> > >
> > > Or probably better to leave default_owner_id reset to the caller.
> > > Another thing - we can use same LTS variable in all control ops to
> > > allow/disallow changing of port configuration based on ownership.
> > > Konstantin
> > >
> > > > }
> > > >
> > > > So callers who don't need any special ownership - don't need to do anything.
> > > > Special callers (like failsafe) can set default_owenr_id just before calling hotplug
> > > > handling routine.
> > 
> > No, hotplug will not be a routine.
> > I am talking about real hotplug, like a device which appears in the VM.
> > This new device must be handled by EAL and probed automatically if
> > comply with whitelist/blacklist policy given by the application or user.
> > Real hotplug is asynchronous.
> 
> By 'asynchronous' here you mean it would be handled in the EAL interrupt thread
> or something different?

Yes, we receive an hotplug event which is processed in the event thread.

> Anyway, I suppose  you do need a function inside DPDK that will do the actual work in response
> on hotplug event, right?

Yes

> That's what I refer to as 'hotplug routine' above.
> 
> > We will just receive notifications that it appeared.
> > 
> > Note: there is some temporary code in failsafe to manage some hotplug.
> > This code must be removed when it will be properly handled in EAL.
> 
> Ok, if it is just a temporary code, that would be removed soon -
> then it definitely seems wrong to modify tespmd (or any other user app)
> to comply with that temporary solution.

It will be modified when EAL hotplug will be implemented.

However, the ownership issue will be the same:
we don't know the owner when allocating a port.
  
Ananyev, Konstantin Jan. 24, 2018, 6:30 p.m. UTC | #23
> -----Original Message-----
> From: Thomas Monjalon [mailto:thomas@monjalon.net]
> Sent: Wednesday, January 24, 2018 8:10 AM
> To: Ananyev, Konstantin <konstantin.ananyev@intel.com>
> Cc: Matan Azrad <matan@mellanox.com>; Gaëtan Rivet <gaetan.rivet@6wind.com>; Wu, Jingjing <jingjing.wu@intel.com>;
> dev@dpdk.org; Neil Horman <nhorman@tuxdriver.com>; Richardson, Bruce <bruce.richardson@intel.com>
> Subject: Re: [dpdk-dev] [PATCH v3 7/7] app/testpmd: adjust ethdev port ownership
> 
> 23/01/2018 22:18, Ananyev, Konstantin:
> > >
> > > 23/01/2018 16:18, Ananyev, Konstantin:
> > > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Ananyev, Konstantin
> > > > > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > > > > > 23/01/2018 14:34, Ananyev, Konstantin:
> > > > > > > If that' s the use case, then I think you need to set device ownership at creation time -
> > > > > > > inside dev_allocate().
> > > > > > > Again that would avoid such racing conditions inside testpmd.
> > > > > >
> > > > > > The devices must be allocated at a low level layer.
> > > > >
> > > > > No one arguing about that.
> > > > > But we can provide owner id information to the low level.
> > >
> > > Sorry, you did not get it.
> >
> > Might be.
> >
> > > We cannot provide owner id at the low level
> > > because it is not yet decided who will be the owner
> > > before the port is allocated.
> >
> > Why is that?
> > What prevents us decide who will manage that device *before* allocating port of it?
> > IMO we do have all needed information at that stage.
> 
> We don't have the information.

At that point we do have dev name and all parameters, right?
Plus we do have blacklist/whitelist, etc.
So what else are we missing to make the decision at that point? 

> It is a new device, it is matched by a driver which allocates a port.
> I don't see where the higher level can interact here.
> And even if you manage a trick, the higher level needs to read the port
> informations to decide the ownership.

Could you specify what particular port information it needs?

> 
> > > > > > When a new device appears (hotplug), an ethdev port should be allocated
> > > > > > automatically if it passes the whitelist/blacklist policy test.
> > > > > > Then we must decide who will manage this device.
> > > > > > I suggest notifying the DPDK libs first.
> > > > > > So a DPDK lib or PMD like failsafe can have the priority to take the
> > > > > > ownership in its notification callback.
> > > > >
> > > > > Possible, but seems a bit overcomplicated.
> > > > > Why not just:
> > > > >
> > > > > Have a global variable process_default_owner_id, that would be init once at startup.
> > > > > Have an LTS variable default_owner_id.
> > > > > It will be used by rte_eth_dev_allocate() caller can set dev->owner_id at creation time,
> > > > > so port allocation and setting ownership - will be an atomic operation.
> > > > > At the exit rte_eth_dev_allocate() will always reset default_owner_id=0:
> > > > >
> > > > > rte_eth_dev_allocate(...)
> > > > > {
> > > > >    lock(owner_lock);
> > > > >    <allocate_port>
> > > > >    owner = RTE_PER_LCORE(default_owner_id);
> > > > >    if (owner == 0)
> > > > >        owner = process_default_owner_id;
> > > > >   set_owner(port, ..., owner);
> > > > >  unlock(owner_lock);
> > > > >  RTE_PER_LCORE(default_owner_id) = 0;
> > > >
> > > > Or probably better to leave default_owner_id reset to the caller.
> > > > Another thing - we can use same LTS variable in all control ops to
> > > > allow/disallow changing of port configuration based on ownership.
> > > > Konstantin
> > > >
> > > > > }
> > > > >
> > > > > So callers who don't need any special ownership - don't need to do anything.
> > > > > Special callers (like failsafe) can set default_owenr_id just before calling hotplug
> > > > > handling routine.
> > >
> > > No, hotplug will not be a routine.
> > > I am talking about real hotplug, like a device which appears in the VM.
> > > This new device must be handled by EAL and probed automatically if
> > > comply with whitelist/blacklist policy given by the application or user.
> > > Real hotplug is asynchronous.
> >
> > By 'asynchronous' here you mean it would be handled in the EAL interrupt thread
> > or something different?
> 
> Yes, we receive an hotplug event which is processed in the event thread.
> 
> > Anyway, I suppose  you do need a function inside DPDK that will do the actual work in response
> > on hotplug event, right?
> 
> Yes

Ok, btw why that function has to be always called from interrupt thread?
Why not to allow user to decide?
Some apps have their own thread dedicated for control ops (like testpmd)
For them it might be plausible to call that function from their own control thread context.
Konstantin

> 
> > That's what I refer to as 'hotplug routine' above.
> >
> > > We will just receive notifications that it appeared.
> > >
> > > Note: there is some temporary code in failsafe to manage some hotplug.
> > > This code must be removed when it will be properly handled in EAL.
> >
> > Ok, if it is just a temporary code, that would be removed soon -
> > then it definitely seems wrong to modify tespmd (or any other user app)
> > to comply with that temporary solution.
> 
> It will be modified when EAL hotplug will be implemented.
> 
> However, the ownership issue will be the same:
> we don't know the owner when allocating a port.
  
Matan Azrad Jan. 25, 2018, 9:36 a.m. UTC | #24
Gaetan, Konstantin, Thomas

Any response to my suggestion below?

From: Matan Azrad
> Hi
> 
> From: Gaëtan Rivet [mailto:gaetan.rivet@6wind.com] <snip>
> > > > > > > Look,
> > > > > > > > Testpmd initiates some of its internal databases depends
> > > > > > > > on specific port iteration, In some time someone may take
> > > > > > > > ownership of Testpmd ports and testpmd will continue to
> > > > > > > > touch
> > them.
> > > > > >
> > > > > > But if someone will take the ownership (assign new owner_id)
> > > > > > that port will not appear in RTE_ETH_FOREACH_DEV() any more.
> > > > > >
> > > > >
> > > > > Yes, but testpmd sometimes depends on previous iteration using
> > internal database.
> > > > > So it uses internal database that was updated by old iteration.
> > > >
> > > > That sounds like just a bug in testpmd that need to be fixed, no?
> > >
> > > If Testpmd already took ownership for these ports(like I did), it is ok.
> > >
> >
> > Have you tested using the default iterator (NO_OWNER)?
> > It worked until now with the bare minimal device tagging using
> > DEV_DEFERRED. Testpmd did not seem to mind having to skip this port.
> >
> > I'm sure there were places where this was overlooked, but overall, I'd
> > think everything should be fixable using only the NO_OWNER iteration.
> 
> I don't think so.
> 
> > Can you point to a specific scenario (command line, chain of event)
> > that would lead to a problem?
> >
> 
> I didn't construct a race test to catch testpmd issue, but I think without this
> patch, there is a lot of issues.
> Go to the testpmd code (before ownership) and find usage of the old
> iterator(after the first iteration in main), Ask yourself what should happen if
> exactly in this time, a new port is created by fail-safe(plug in event).
> 
> > > > Any particular places where outdated device info is used?
> > >
> > > For example, look for the stream management in testpmd(I think I saw
> > > it
> > there).
> > >
> >
> > The stream management is certainly shaky, but it happens after the EAL
> > initial port creation, and is not able to update itself for new
> > hotplugged ports (unless something changed).
> >
> 
> Yes, but conceptually someone in the future may take the port(because it
> ownerless).
> 
> > > > > > > If I look back on the fail-safe, its sole purpose is to have
> > > > > > > seamless hotplug with existing applications.
> > > > > > >
> > > > > > > Port ownership is a genericization of some functions
> > > > > > > introduced by the fail-safe, that could structure DPDK
> > > > > > > further. It should allow applications to have a seamless
> > > > > > > integration with subsystems using port ownership. Without
> > > > > > > this,
> > port ownership cannot be used.
> > > > > > >
> > > > > > > Testpmd should be fixed, but follow the most common design
> > > > > > > patterns of DPDK applications. Going with port ownership
> > > > > > > seems like a paradigm shift.
> > > > > > >
> > > > > > > > In addition
> > > > > > > > Using the old iterator in some places in testpmd will
> > > > > > > > cause a race for run-
> > > > > > time new ports(can be created by failsafe or any hotplug code):
> > > > > > > > - testpmd finds an ownerless port(just now created) by the
> > > > > > > > old iterator and start traffic there,
> >
> > How does testpmd start traffic there? Testpmd has only a callback for
> > displaying that it received an event for a new port. It has no concept
> > of hotplugging beyond that.
> >
> 
> Yes, so no traffic just some control command.
> 
> > Testpmd will not start using any new port probed using the hotplug API
> > on its own, again, unless something has drastically changed.
> >
> 
> Every iterator using in testpmd is exposed to race.
> 
> > > > > > > > - failsafe takes ownership of this new port and start traffic there.
> > > > > > > > Problem!
> > > > > >
> > > > > > Could you shed a bit more light here - it would be race
> > > > > > condition between whom and whom?
> > > > >
> > > > > Sure.
> > > > >
> > > > > > As I remember in testpmd all control ops are done within one
> > > > > > thread (main lcore).
> > > > >
> > > > > But other dpdk entity can use another thread, for example:
> > > > > Failsafe uses the host thread(using alarm callback) to create a
> > > > > new port and
> > > > to take ownership of a port.
> > > >
> > > > Hm, and you create new ports inside failsafe PMD, right and then
> > > > set new owner_id for it?
> > >
> > > Yes.
> > >
> > > > And all this in alarm in interrupt thread?
> > >
> > > Yes.
> > >
> > > > If so I wonder how you can guarantee that no-one else will set
> > > > different owner_id between
> > > > rte_eth_dev_allocate() and rte_eth_dev_owner_set()?
> > >
> > > I check it (see failsafe patch to this series - V5).
> > > Function: fs_bus_init.
> > >
> > > > Could you point me to that place (I am not really familiar with
> > > > familiar with failsafe code)?
> > > >
> > > > >
> > > > > The race:
> > > > > Testpmd iterates over all ports by the master thread.
> > > > > Failsafe takes ownership of a port by the host thread and start using
> it.
> > > > > => The two dpdk entities may use the device at same time!
> > > >
> >
> > When can this happen? Fail-safe creates its initial pool of ports
> > during EAL init, before testpmd scans eth_dev ports and configure its
> streams.
> > At that point, it has taken ownership, from the master lcore context.
> >
> > After this point, new ports could be detected and hotplugged by fail-safe.
> > However, even if testpmd had a callback to capture those new ports and
> > reconfigure its streams, it would be executed from within the
> > intr-thread, same as failsafe. If the thread was interrupted, by a
> > dataplane-lcore for example, streams would not have been reconfigured.
> > The fail-safe would execute its callback and set the owner-id before
> > the callback chains goes to the application.
> >
> 
> Some iterator may be invoked in plug out process by other thread in testpmd
> and causes to control command
> 
> > And that would only be if testpmd had any callback for hotplugging
> > ports and reconfiguring its streams, which it hasn't, as far as I know.
> >
> 
> We don't need to implement it in testpmd.
> 
> > > > Ok, if failsafe really assigns its owner_id(s) to ports that are
> > > > already in use by the app, then how such scheme supposed to work
> > > > at
> > all?
> > >
> > > If the app works well (with the new rules) it already took ownership
> > > and
> > failsafe will see it and will wait until the application release it.
> > > Every dpdk entity should know which port it wants to manage, If 2
> > > entities want to manage the same device -  it can be ok and port
> > > ownership
> > can synchronize the usage.
> > >
> > > Probably, application which will run fail-safe wants to manage only
> > > the fail-
> > safe port and therefor to take ownership only for it.
> > >
> > > > I.E. application has a port - it assigns some owner_id != 0 to it,
> > > > then PMD tries to set its owner_id tot the same port.
> > > > Obviously failsafe's set_owner() will always fail in such case.
> > > >
> > > Yes, and will try again after some time.
> > >
> > > > From what I hear we need to introduce a concept of 'default owner id'.
> > > > I.E. when failsafe PMD is created - user assigns some owner_id to
> > > > it
> > (default).
> > > > Then failsafe PMD generates it's own owner_id and assigns it only
> > > > to the ports whose current owner_id is equal either 0 or 'default'
> owner_id.
> > > >
> > >
> > > It is a suggestion and we need to think about it more (I'm talking
> > > about it
> > with Gaetan in another thread).
> > > Actually I think, if we want a generic solution to the generic
> > > problem the
> > current solution is ok.
> > >
> >
> > We could as well conclude this other thread there.
> >
> > The only solution would be to have a default relationship between
> > owners, something that goes beyond the scope assigned by Thomas to
> > your evolution, but would be necessary for this API to be properly
> > used by existing applications.
> >
> > I think it's the only way to have a sane default behavior with your
> > API, but I also think this goes beyong the scope of the DPDK altogether.
> >
> > But even with those considerations that could be ironed out later (API
> > is still experimental anyway), in the meantime, I think we should
> > strive not to break "userland" as much as possible. Meaning that
> > unless you have a specific situation creating a bug, you shouldn't
> > have to modify testpmd, and if an issues arises, you need to try to
> > improve your API before resorting to changing the resource management
> model of all existing applications.
> >
> 
> I understand it.
> Suggestion:
> 
> 2 system owners.
> APP_OWNER - 1.
> NO_OWNER - 0.
> 
> And allowing for more owners as now.
> 
> 1. Every port creation will set the owner for NO_OWNER (as now).
> 2. There is option for all dpdk entities to take owner of  NO_OWNER ports all
> the time(as now).
> 3. In some point in the end of EAL init: set all the NO_OWNER to
> APP_OWNER(for V6).
> 4. Change the old iterator to iterate over APP_OWNER ports(for V6).
> 
> What do you think?
> 
> <snip>
  
Thomas Monjalon Jan. 25, 2018, 10:05 a.m. UTC | #25
25/01/2018 10:36, Matan Azrad:
> Gaetan, Konstantin, Thomas
> 
> Any response to my suggestion below?
> 
> From: Matan Azrad
> > Suggestion:
> > 
> > 2 system owners.
> > APP_OWNER - 1.
> > NO_OWNER - 0.
> > 
> > And allowing for more owners as now.
> > 
> > 1. Every port creation will set the owner for NO_OWNER (as now).
> > 2. There is option for all dpdk entities to take owner of  NO_OWNER ports all
> > the time(as now).
> > 3. In some point in the end of EAL init: set all the NO_OWNER to
> > APP_OWNER(for V6).
> > 4. Change the old iterator to iterate over APP_OWNER ports(for V6).
> > 
> > What do you think?

Reminder for everybody: there is no issue if no hotplug.
There is a race condition with hotplug.
Hotplug is not managed by EAL yet, but there is a temporary hotplug
management in failsafe.
So until now, the issue is seen only with hotplug in failsafe.

Your suggestion makes no change for applications,
and fix the ownership issue for failsafe.
And later, if an application wants to support generic hotplug properly
(when it will be generally available in DPDK),
the application should use the ownership API.
Right?

I think it is a good compromise.
  
Thomas Monjalon Jan. 25, 2018, 10:55 a.m. UTC | #26
24/01/2018 19:30, Ananyev, Konstantin:
> From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > 23/01/2018 22:18, Ananyev, Konstantin:
> > > >
> > > > 23/01/2018 16:18, Ananyev, Konstantin:
> > > > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Ananyev, Konstantin
> > > > > > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > > > > > > 23/01/2018 14:34, Ananyev, Konstantin:
> > > > > > > > If that' s the use case, then I think you need to set device ownership at creation time -
> > > > > > > > inside dev_allocate().
> > > > > > > > Again that would avoid such racing conditions inside testpmd.
> > > > > > >
> > > > > > > The devices must be allocated at a low level layer.
> > > > > >
> > > > > > No one arguing about that.
> > > > > > But we can provide owner id information to the low level.
> > > >
> > > > Sorry, you did not get it.
> > >
> > > Might be.
> > >
> > > > We cannot provide owner id at the low level
> > > > because it is not yet decided who will be the owner
> > > > before the port is allocated.
> > >
> > > Why is that?
> > > What prevents us decide who will manage that device *before* allocating port of it?
> > > IMO we do have all needed information at that stage.
> > 
> > We don't have the information.
> 
> At that point we do have dev name and all parameters, right?

We just have the PCI id.

> Plus we do have blacklist/whitelist, etc.
> So what else are we missing to make the decision at that point? 

It depends on the ownership policy.
Example: we can decide to take ownership based on a MAC address.
Another example: it can be decided to take ownership of a given driver.
We don't have these informations with the PCI id.

> > It is a new device, it is matched by a driver which allocates a port.
> > I don't see where the higher level can interact here.
> > And even if you manage a trick, the higher level needs to read the port
> > informations to decide the ownership.
> 
> Could you specify what particular port information it needs?

Replied to the same question above :)


> > > > > > > When a new device appears (hotplug), an ethdev port should be allocated
> > > > > > > automatically if it passes the whitelist/blacklist policy test.
> > > > > > > Then we must decide who will manage this device.
> > > > > > > I suggest notifying the DPDK libs first.
> > > > > > > So a DPDK lib or PMD like failsafe can have the priority to take the
> > > > > > > ownership in its notification callback.
> > > > > >
> > > > > > Possible, but seems a bit overcomplicated.
> > > > > > Why not just:
> > > > > >
> > > > > > Have a global variable process_default_owner_id, that would be init once at startup.
> > > > > > Have an LTS variable default_owner_id.
> > > > > > It will be used by rte_eth_dev_allocate() caller can set dev->owner_id at creation time,
> > > > > > so port allocation and setting ownership - will be an atomic operation.
> > > > > > At the exit rte_eth_dev_allocate() will always reset default_owner_id=0:
> > > > > >
> > > > > > rte_eth_dev_allocate(...)
> > > > > > {
> > > > > >    lock(owner_lock);
> > > > > >    <allocate_port>
> > > > > >    owner = RTE_PER_LCORE(default_owner_id);
> > > > > >    if (owner == 0)
> > > > > >        owner = process_default_owner_id;
> > > > > >   set_owner(port, ..., owner);
> > > > > >  unlock(owner_lock);
> > > > > >  RTE_PER_LCORE(default_owner_id) = 0;
> > > > >
> > > > > Or probably better to leave default_owner_id reset to the caller.
> > > > > Another thing - we can use same LTS variable in all control ops to
> > > > > allow/disallow changing of port configuration based on ownership.
> > > > > Konstantin
> > > > >
> > > > > > }
> > > > > >
> > > > > > So callers who don't need any special ownership - don't need to do anything.
> > > > > > Special callers (like failsafe) can set default_owenr_id just before calling hotplug
> > > > > > handling routine.
> > > >
> > > > No, hotplug will not be a routine.
> > > > I am talking about real hotplug, like a device which appears in the VM.
> > > > This new device must be handled by EAL and probed automatically if
> > > > comply with whitelist/blacklist policy given by the application or user.
> > > > Real hotplug is asynchronous.
> > >
> > > By 'asynchronous' here you mean it would be handled in the EAL interrupt thread
> > > or something different?
> > 
> > Yes, we receive an hotplug event which is processed in the event thread.
> > 
> > > Anyway, I suppose  you do need a function inside DPDK that will do the actual work in response
> > > on hotplug event, right?
> > 
> > Yes
> 
> Ok, btw why that function has to be always called from interrupt thread?
> Why not to allow user to decide?

Absolutely, the user must decide.
In the example of failsafe, the user instructs a policy to decide
which devices will be owned, so failsafe takes the decision based
on user inputs.

> Some apps have their own thread dedicated for control ops (like testpmd)
> For them it might be plausible to call that function from their own control thread context.
> Konstantin
> 
> > 
> > > That's what I refer to as 'hotplug routine' above.
> > >
> > > > We will just receive notifications that it appeared.
> > > >
> > > > Note: there is some temporary code in failsafe to manage some hotplug.
> > > > This code must be removed when it will be properly handled in EAL.
> > >
> > > Ok, if it is just a temporary code, that would be removed soon -
> > > then it definitely seems wrong to modify tespmd (or any other user app)
> > > to comply with that temporary solution.
> > 
> > It will be modified when EAL hotplug will be implemented.
> > 
> > However, the ownership issue will be the same:
> > we don't know the owner when allocating a port.
  
Ananyev, Konstantin Jan. 25, 2018, 11:09 a.m. UTC | #27
> -----Original Message-----
> From: Thomas Monjalon [mailto:thomas@monjalon.net]
> Sent: Thursday, January 25, 2018 10:55 AM
> To: Ananyev, Konstantin <konstantin.ananyev@intel.com>
> Cc: Matan Azrad <matan@mellanox.com>; Gaëtan Rivet <gaetan.rivet@6wind.com>; Wu, Jingjing <jingjing.wu@intel.com>;
> dev@dpdk.org; Neil Horman <nhorman@tuxdriver.com>; Richardson, Bruce <bruce.richardson@intel.com>
> Subject: Re: [dpdk-dev] [PATCH v3 7/7] app/testpmd: adjust ethdev port ownership
> 
> 24/01/2018 19:30, Ananyev, Konstantin:
> > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > > 23/01/2018 22:18, Ananyev, Konstantin:
> > > > >
> > > > > 23/01/2018 16:18, Ananyev, Konstantin:
> > > > > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Ananyev, Konstantin
> > > > > > > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > > > > > > > 23/01/2018 14:34, Ananyev, Konstantin:
> > > > > > > > > If that' s the use case, then I think you need to set device ownership at creation time -
> > > > > > > > > inside dev_allocate().
> > > > > > > > > Again that would avoid such racing conditions inside testpmd.
> > > > > > > >
> > > > > > > > The devices must be allocated at a low level layer.
> > > > > > >
> > > > > > > No one arguing about that.
> > > > > > > But we can provide owner id information to the low level.
> > > > >
> > > > > Sorry, you did not get it.
> > > >
> > > > Might be.
> > > >
> > > > > We cannot provide owner id at the low level
> > > > > because it is not yet decided who will be the owner
> > > > > before the port is allocated.
> > > >
> > > > Why is that?
> > > > What prevents us decide who will manage that device *before* allocating port of it?
> > > > IMO we do have all needed information at that stage.
> > >
> > > We don't have the information.
> >
> > At that point we do have dev name and all parameters, right?
> 
> We just have the PCI id.
> 
> > Plus we do have blacklist/whitelist, etc.
> > So what else are we missing to make the decision at that point?
> 
> It depends on the ownership policy.
> Example: we can decide to take ownership based on a MAC address.

That's sounds a bit articificial (mac address can be changed on the fly), but ok -
for such devices user can decide to use default id first and change
it later after port is allocated and dev_init() is passed.
Though as I understand there situations (like in failsafe PMD) when we do 
know for sure owner_id before calling dev_allocate().

> Another example: it can be decided to take ownership of a given driver.
> We don't have these informations with the PCI id.
> 
> > > It is a new device, it is matched by a driver which allocates a port.
> > > I don't see where the higher level can interact here.
> > > And even if you manage a trick, the higher level needs to read the port
> > > informations to decide the ownership.
> >
> > Could you specify what particular port information it needs?
> 
> Replied to the same question above :)
> 
> 
> > > > > > > > When a new device appears (hotplug), an ethdev port should be allocated
> > > > > > > > automatically if it passes the whitelist/blacklist policy test.
> > > > > > > > Then we must decide who will manage this device.
> > > > > > > > I suggest notifying the DPDK libs first.
> > > > > > > > So a DPDK lib or PMD like failsafe can have the priority to take the
> > > > > > > > ownership in its notification callback.
> > > > > > >
> > > > > > > Possible, but seems a bit overcomplicated.
> > > > > > > Why not just:
> > > > > > >
> > > > > > > Have a global variable process_default_owner_id, that would be init once at startup.
> > > > > > > Have an LTS variable default_owner_id.
> > > > > > > It will be used by rte_eth_dev_allocate() caller can set dev->owner_id at creation time,
> > > > > > > so port allocation and setting ownership - will be an atomic operation.
> > > > > > > At the exit rte_eth_dev_allocate() will always reset default_owner_id=0:
> > > > > > >
> > > > > > > rte_eth_dev_allocate(...)
> > > > > > > {
> > > > > > >    lock(owner_lock);
> > > > > > >    <allocate_port>
> > > > > > >    owner = RTE_PER_LCORE(default_owner_id);
> > > > > > >    if (owner == 0)
> > > > > > >        owner = process_default_owner_id;
> > > > > > >   set_owner(port, ..., owner);
> > > > > > >  unlock(owner_lock);
> > > > > > >  RTE_PER_LCORE(default_owner_id) = 0;
> > > > > >
> > > > > > Or probably better to leave default_owner_id reset to the caller.
> > > > > > Another thing - we can use same LTS variable in all control ops to
> > > > > > allow/disallow changing of port configuration based on ownership.
> > > > > > Konstantin
> > > > > >
> > > > > > > }
> > > > > > >
> > > > > > > So callers who don't need any special ownership - don't need to do anything.
> > > > > > > Special callers (like failsafe) can set default_owenr_id just before calling hotplug
> > > > > > > handling routine.
> > > > >
> > > > > No, hotplug will not be a routine.
> > > > > I am talking about real hotplug, like a device which appears in the VM.
> > > > > This new device must be handled by EAL and probed automatically if
> > > > > comply with whitelist/blacklist policy given by the application or user.
> > > > > Real hotplug is asynchronous.
> > > >
> > > > By 'asynchronous' here you mean it would be handled in the EAL interrupt thread
> > > > or something different?
> > >
> > > Yes, we receive an hotplug event which is processed in the event thread.
> > >
> > > > Anyway, I suppose  you do need a function inside DPDK that will do the actual work in response
> > > > on hotplug event, right?
> > >
> > > Yes
> >
> > Ok, btw why that function has to be always called from interrupt thread?
> > Why not to allow user to decide?
> 
> Absolutely, the user must decide.
> In the example of failsafe, the user instructs a policy to decide
> which devices will be owned, so failsafe takes the decision based
> on user inputs.
> 
> > Some apps have their own thread dedicated for control ops (like testpmd)
> > For them it might be plausible to call that function from their own control thread context.
> > Konstantin
> >
> > >
> > > > That's what I refer to as 'hotplug routine' above.
> > > >
> > > > > We will just receive notifications that it appeared.
> > > > >
> > > > > Note: there is some temporary code in failsafe to manage some hotplug.
> > > > > This code must be removed when it will be properly handled in EAL.
> > > >
> > > > Ok, if it is just a temporary code, that would be removed soon -
> > > > then it definitely seems wrong to modify tespmd (or any other user app)
> > > > to comply with that temporary solution.
> > >
> > > It will be modified when EAL hotplug will be implemented.
> > >
> > > However, the ownership issue will be the same:
> > > we don't know the owner when allocating a port.
  
Ananyev, Konstantin Jan. 25, 2018, 11:15 a.m. UTC | #28
Hi everyone,

> 
> 25/01/2018 10:36, Matan Azrad:
> > Gaetan, Konstantin, Thomas
> >
> > Any response to my suggestion below?
> >
> > From: Matan Azrad
> > > Suggestion:
> > >
> > > 2 system owners.
> > > APP_OWNER - 1.
> > > NO_OWNER - 0.
> > >
> > > And allowing for more owners as now.
> > >
> > > 1. Every port creation will set the owner for NO_OWNER (as now).
> > > 2. There is option for all dpdk entities to take owner of  NO_OWNER ports all
> > > the time(as now).
> > > 3. In some point in the end of EAL init: set all the NO_OWNER to
> > > APP_OWNER(for V6).

What will happen if we have 2 (or more process) sharing the same device?
How we will distinguish what APP_OWNER we are talking about?
Shouldn't default_owner be unique per process?

> > > 4. Change the old iterator to iterate over APP_OWNER ports(for V6).

If I get it right it means no changes in tetpmd, correct?

> > >
> > > What do you think?
> 
> Reminder for everybody: there is no issue if no hotplug.
> There is a race condition with hotplug.
> Hotplug is not managed by EAL yet, but there is a temporary hotplug
> management in failsafe.
> So until now, the issue is seen only with hotplug in failsafe.
> 
> Your suggestion makes no change for applications,
> and fix the ownership issue for failsafe.
> And later, if an application wants to support generic hotplug properly
> (when it will be generally available in DPDK),
> the application should use the ownership API.
> Right?
> 
> I think it is a good compromise.

I still think it would be good if future hotplug support will be transparent
to existing apps (no/minimal changes).
But I suppose we can discuss it later, when will have hotplug patches.
Konstantin
  
Thomas Monjalon Jan. 25, 2018, 11:27 a.m. UTC | #29
25/01/2018 12:09, Ananyev, Konstantin:
> From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > 24/01/2018 19:30, Ananyev, Konstantin:
> > > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > > > 23/01/2018 22:18, Ananyev, Konstantin:
> > > > > >
> > > > > > 23/01/2018 16:18, Ananyev, Konstantin:
> > > > > > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Ananyev, Konstantin
> > > > > > > > From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > > > > > > > > 23/01/2018 14:34, Ananyev, Konstantin:
> > > > > > > > > > If that' s the use case, then I think you need to set device ownership at creation time -
> > > > > > > > > > inside dev_allocate().
> > > > > > > > > > Again that would avoid such racing conditions inside testpmd.
> > > > > > > > >
> > > > > > > > > The devices must be allocated at a low level layer.
> > > > > > > >
> > > > > > > > No one arguing about that.
> > > > > > > > But we can provide owner id information to the low level.
> > > > > >
> > > > > > Sorry, you did not get it.
> > > > >
> > > > > Might be.
> > > > >
> > > > > > We cannot provide owner id at the low level
> > > > > > because it is not yet decided who will be the owner
> > > > > > before the port is allocated.
> > > > >
> > > > > Why is that?
> > > > > What prevents us decide who will manage that device *before* allocating port of it?
> > > > > IMO we do have all needed information at that stage.
> > > >
> > > > We don't have the information.
> > >
> > > At that point we do have dev name and all parameters, right?
> > 
> > We just have the PCI id.
> > 
> > > Plus we do have blacklist/whitelist, etc.
> > > So what else are we missing to make the decision at that point?
> > 
> > It depends on the ownership policy.
> > Example: we can decide to take ownership based on a MAC address.
> 
> That's sounds a bit articificial (mac address can be changed on the fly), but ok -
> for such devices user can decide to use default id first and change
> it later after port is allocated and dev_init() is passed.
> Though as I understand there situations (like in failsafe PMD) when we do 
> know for sure owner_id before calling dev_allocate().

In the general case, when hotplug will be managed by EAL in an
asynchronous way, the port allocation will be done without any knowledge
about the port owner.

> > Another example: it can be decided to take ownership of a given driver.
> > We don't have these informations with the PCI id.
> > 
> > > > It is a new device, it is matched by a driver which allocates a port.
> > > > I don't see where the higher level can interact here.
> > > > And even if you manage a trick, the higher level needs to read the port
> > > > informations to decide the ownership.
> > >
> > > Could you specify what particular port information it needs?
> > 
> > Replied to the same question above :)
> > 
> > 
> > > > > > > > > When a new device appears (hotplug), an ethdev port should be allocated
> > > > > > > > > automatically if it passes the whitelist/blacklist policy test.
> > > > > > > > > Then we must decide who will manage this device.
> > > > > > > > > I suggest notifying the DPDK libs first.
> > > > > > > > > So a DPDK lib or PMD like failsafe can have the priority to take the
> > > > > > > > > ownership in its notification callback.
> > > > > > > >
> > > > > > > > Possible, but seems a bit overcomplicated.
> > > > > > > > Why not just:
> > > > > > > >
> > > > > > > > Have a global variable process_default_owner_id, that would be init once at startup.
> > > > > > > > Have an LTS variable default_owner_id.
> > > > > > > > It will be used by rte_eth_dev_allocate() caller can set dev->owner_id at creation time,
> > > > > > > > so port allocation and setting ownership - will be an atomic operation.
> > > > > > > > At the exit rte_eth_dev_allocate() will always reset default_owner_id=0:
> > > > > > > >
> > > > > > > > rte_eth_dev_allocate(...)
> > > > > > > > {
> > > > > > > >    lock(owner_lock);
> > > > > > > >    <allocate_port>
> > > > > > > >    owner = RTE_PER_LCORE(default_owner_id);
> > > > > > > >    if (owner == 0)
> > > > > > > >        owner = process_default_owner_id;
> > > > > > > >   set_owner(port, ..., owner);
> > > > > > > >  unlock(owner_lock);
> > > > > > > >  RTE_PER_LCORE(default_owner_id) = 0;
> > > > > > >
> > > > > > > Or probably better to leave default_owner_id reset to the caller.
> > > > > > > Another thing - we can use same LTS variable in all control ops to
> > > > > > > allow/disallow changing of port configuration based on ownership.
> > > > > > > Konstantin
> > > > > > >
> > > > > > > > }
> > > > > > > >
> > > > > > > > So callers who don't need any special ownership - don't need to do anything.
> > > > > > > > Special callers (like failsafe) can set default_owenr_id just before calling hotplug
> > > > > > > > handling routine.
> > > > > >
> > > > > > No, hotplug will not be a routine.
> > > > > > I am talking about real hotplug, like a device which appears in the VM.
> > > > > > This new device must be handled by EAL and probed automatically if
> > > > > > comply with whitelist/blacklist policy given by the application or user.
> > > > > > Real hotplug is asynchronous.
> > > > >
> > > > > By 'asynchronous' here you mean it would be handled in the EAL interrupt thread
> > > > > or something different?
> > > >
> > > > Yes, we receive an hotplug event which is processed in the event thread.
> > > >
> > > > > Anyway, I suppose  you do need a function inside DPDK that will do the actual work in response
> > > > > on hotplug event, right?
> > > >
> > > > Yes
> > >
> > > Ok, btw why that function has to be always called from interrupt thread?
> > > Why not to allow user to decide?
> > 
> > Absolutely, the user must decide.
> > In the example of failsafe, the user instructs a policy to decide
> > which devices will be owned, so failsafe takes the decision based
> > on user inputs.
> > 
> > > Some apps have their own thread dedicated for control ops (like testpmd)
> > > For them it might be plausible to call that function from their own control thread context.
> > > Konstantin
> > >
> > > >
> > > > > That's what I refer to as 'hotplug routine' above.
> > > > >
> > > > > > We will just receive notifications that it appeared.
> > > > > >
> > > > > > Note: there is some temporary code in failsafe to manage some hotplug.
> > > > > > This code must be removed when it will be properly handled in EAL.
> > > > >
> > > > > Ok, if it is just a temporary code, that would be removed soon -
> > > > > then it definitely seems wrong to modify tespmd (or any other user app)
> > > > > to comply with that temporary solution.
> > > >
> > > > It will be modified when EAL hotplug will be implemented.
> > > >
> > > > However, the ownership issue will be the same:
> > > > we don't know the owner when allocating a port.
>
  
Thomas Monjalon Jan. 25, 2018, 11:33 a.m. UTC | #30
25/01/2018 12:15, Ananyev, Konstantin:
> Hi everyone,
> 
> > 
> > 25/01/2018 10:36, Matan Azrad:
> > > Gaetan, Konstantin, Thomas
> > >
> > > Any response to my suggestion below?
> > >
> > > From: Matan Azrad
> > > > Suggestion:
> > > >
> > > > 2 system owners.
> > > > APP_OWNER - 1.
> > > > NO_OWNER - 0.
> > > >
> > > > And allowing for more owners as now.
> > > >
> > > > 1. Every port creation will set the owner for NO_OWNER (as now).
> > > > 2. There is option for all dpdk entities to take owner of  NO_OWNER ports all
> > > > the time(as now).
> > > > 3. In some point in the end of EAL init: set all the NO_OWNER to
> > > > APP_OWNER(for V6).
> 
> What will happen if we have 2 (or more process) sharing the same device?
> How we will distinguish what APP_OWNER we are talking about?
> Shouldn't default_owner be unique per process?

Yes, good point!
Each application process should be considered a different owner
by default.

> > > > 4. Change the old iterator to iterate over APP_OWNER ports(for V6).
> 
> If I get it right it means no changes in tetpmd, correct?

It is my understanding yes.

> > > > What do you think?
> > 
> > Reminder for everybody: there is no issue if no hotplug.
> > There is a race condition with hotplug.
> > Hotplug is not managed by EAL yet, but there is a temporary hotplug
> > management in failsafe.
> > So until now, the issue is seen only with hotplug in failsafe.
> > 
> > Your suggestion makes no change for applications,
> > and fix the ownership issue for failsafe.
> > And later, if an application wants to support generic hotplug properly
> > (when it will be generally available in DPDK),
> > the application should use the ownership API.
> > Right?
> > 
> > I think it is a good compromise.
> 
> I still think it would be good if future hotplug support will be transparent
> to existing apps (no/minimal changes).
> But I suppose we can discuss it later, when will have hotplug patches.

It cannot be really transparent.
If an application is based on port detection at init, it has to be changed
to decide how to handle new ports.
That's why I introduced the new events for ethdev notification callback.
  
Ananyev, Konstantin Jan. 25, 2018, 11:55 a.m. UTC | #31
> -----Original Message-----
> From: Thomas Monjalon [mailto:thomas@monjalon.net]
> Sent: Thursday, January 25, 2018 11:33 AM
> To: Ananyev, Konstantin <konstantin.ananyev@intel.com>
> Cc: Matan Azrad <matan@mellanox.com>; Gaëtan Rivet <gaetan.rivet@6wind.com>; Wu, Jingjing <jingjing.wu@intel.com>;
> dev@dpdk.org; Neil Horman <nhorman@tuxdriver.com>; Richardson, Bruce <bruce.richardson@intel.com>
> Subject: Re: [dpdk-dev] [PATCH v3 7/7] app/testpmd: adjust ethdev port ownership
> 
> 25/01/2018 12:15, Ananyev, Konstantin:
> > Hi everyone,
> >
> > >
> > > 25/01/2018 10:36, Matan Azrad:
> > > > Gaetan, Konstantin, Thomas
> > > >
> > > > Any response to my suggestion below?
> > > >
> > > > From: Matan Azrad
> > > > > Suggestion:
> > > > >
> > > > > 2 system owners.
> > > > > APP_OWNER - 1.
> > > > > NO_OWNER - 0.
> > > > >
> > > > > And allowing for more owners as now.
> > > > >
> > > > > 1. Every port creation will set the owner for NO_OWNER (as now).
> > > > > 2. There is option for all dpdk entities to take owner of  NO_OWNER ports all
> > > > > the time(as now).
> > > > > 3. In some point in the end of EAL init: set all the NO_OWNER to
> > > > > APP_OWNER(for V6).
> >
> > What will happen if we have 2 (or more process) sharing the same device?
> > How we will distinguish what APP_OWNER we are talking about?
> > Shouldn't default_owner be unique per process?
> 
> Yes, good point!
> Each application process should be considered a different owner
> by default.
> 
> > > > > 4. Change the old iterator to iterate over APP_OWNER ports(for V6).
> >
> > If I get it right it means no changes in tetpmd, correct?
> 
> It is my understanding yes.

Sounds reasonable to me then.

> 
> > > > > What do you think?
> > >
> > > Reminder for everybody: there is no issue if no hotplug.
> > > There is a race condition with hotplug.
> > > Hotplug is not managed by EAL yet, but there is a temporary hotplug
> > > management in failsafe.
> > > So until now, the issue is seen only with hotplug in failsafe.
> > >
> > > Your suggestion makes no change for applications,
> > > and fix the ownership issue for failsafe.
> > > And later, if an application wants to support generic hotplug properly
> > > (when it will be generally available in DPDK),
> > > the application should use the ownership API.
> > > Right?
> > >
> > > I think it is a good compromise.
> >
> > I still think it would be good if future hotplug support will be transparent
> > to existing apps (no/minimal changes).
> > But I suppose we can discuss it later, when will have hotplug patches.
> 
> It cannot be really transparent.
> If an application is based on port detection at init, it has to be changed
> to decide how to handle new ports.
> That's why I introduced the new events for ethdev notification callback.

Ok, but I think, most processes would just assign default_owner for newly plugged device. 
For that case I don't see why it can't be transparent.
Anyway, that's probably a topic for new mail thread.
Konstantin
  

Patch

diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c
index 31919ba..6199c64 100644
--- a/app/test-pmd/cmdline.c
+++ b/app/test-pmd/cmdline.c
@@ -1394,7 +1394,7 @@  struct cmd_config_speed_all {
 			&link_speed) < 0)
 		return;
 
-	RTE_ETH_FOREACH_DEV(pid) {
+	RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id) {
 		ports[pid].dev_conf.link_speeds = link_speed;
 	}
 
@@ -1902,7 +1902,7 @@  struct cmd_config_rss {
 	struct cmd_config_rss *res = parsed_result;
 	struct rte_eth_rss_conf rss_conf = { .rss_key_len = 0, };
 	int diag;
-	uint8_t i;
+	uint16_t pid;
 
 	if (!strcmp(res->value, "all"))
 		rss_conf.rss_hf = ETH_RSS_IP | ETH_RSS_TCP |
@@ -1936,12 +1936,12 @@  struct cmd_config_rss {
 		return;
 	}
 	rss_conf.rss_key = NULL;
-	for (i = 0; i < rte_eth_dev_count(); i++) {
-		diag = rte_eth_dev_rss_hash_update(i, &rss_conf);
+	RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id) {
+		diag = rte_eth_dev_rss_hash_update(pid, &rss_conf);
 		if (diag < 0)
 			printf("Configuration of RSS hash at ethernet port %d "
 				"failed with error (%d): %s.\n",
-				i, -diag, strerror(-diag));
+				pid, -diag, strerror(-diag));
 	}
 }
 
@@ -3686,10 +3686,9 @@  struct cmd_csum_result {
 	uint64_t csum_offloads = 0;
 	struct rte_eth_dev_info dev_info;
 
-	if (port_id_is_invalid(res->port_id, ENABLED_WARN)) {
-		printf("invalid port %d\n", res->port_id);
+	if (port_id_is_invalid(res->port_id, ENABLED_WARN))
 		return;
-	}
+
 	if (!port_is_stopped(res->port_id)) {
 		printf("Please stop port %d first\n", res->port_id);
 		return;
@@ -4364,8 +4363,8 @@  struct cmd_gso_show_result {
 {
 	struct cmd_gso_show_result *res = parsed_result;
 
-	if (!rte_eth_dev_is_valid_port(res->cmd_pid)) {
-		printf("invalid port id %u\n", res->cmd_pid);
+	if (port_id_is_invalid(res->cmd_pid, ENABLED_WARN)) {
+		printf("invalid/not owned port id %u\n", res->cmd_pid);
 		return;
 	}
 	if (!strcmp(res->cmd_keyword, "gso")) {
@@ -5375,7 +5374,12 @@  static void cmd_create_bonded_device_parsed(void *parsed_result,
 				port_id);
 
 		/* Update number of ports */
-		nb_ports = rte_eth_dev_count();
+		if (rte_eth_dev_owner_set(port_id, &my_owner) != 0) {
+			printf("Error: cannot own new attached port %d\n",
+			       port_id);
+			return;
+		}
+		nb_ports++;
 		reconfig(port_id, res->socket);
 		rte_eth_promiscuous_enable(port_id);
 	}
@@ -5484,10 +5488,8 @@  static void cmd_set_bond_mon_period_parsed(void *parsed_result,
 	struct cmd_set_bond_mon_period_result *res = parsed_result;
 	int ret;
 
-	if (res->port_num >= nb_ports) {
-		printf("Port id %d must be less than %d\n", res->port_num, nb_ports);
+	if (port_id_is_invalid(res->port_num, ENABLED_WARN))
 		return;
-	}
 
 	ret = rte_eth_bond_link_monitoring_set(res->port_num, res->period_ms);
 
@@ -5545,11 +5547,8 @@  struct cmd_set_bonding_agg_mode_policy_result {
 	struct cmd_set_bonding_agg_mode_policy_result *res = parsed_result;
 	uint8_t policy = AGG_BANDWIDTH;
 
-	if (res->port_num >= nb_ports) {
-		printf("Port id %d must be less than %d\n",
-				res->port_num, nb_ports);
+	if (port_id_is_invalid(res->port_num, ENABLED_WARN))
 		return;
-	}
 
 	if (!strcmp(res->policy, "bandwidth"))
 		policy = AGG_BANDWIDTH;
@@ -5808,7 +5807,7 @@  static void cmd_set_promisc_mode_parsed(void *parsed_result,
 
 	/* all ports */
 	if (allports) {
-		RTE_ETH_FOREACH_DEV(i) {
+		RTE_ETH_FOREACH_DEV_OWNED_BY(i, my_owner.id) {
 			if (enable)
 				rte_eth_promiscuous_enable(i);
 			else
@@ -5888,7 +5887,7 @@  static void cmd_set_allmulti_mode_parsed(void *parsed_result,
 
 	/* all ports */
 	if (allports) {
-		RTE_ETH_FOREACH_DEV(i) {
+		RTE_ETH_FOREACH_DEV_OWNED_BY(i, my_owner.id) {
 			if (enable)
 				rte_eth_allmulticast_enable(i);
 			else
@@ -6622,31 +6621,31 @@  static void cmd_showportall_parsed(void *parsed_result,
 	struct cmd_showportall_result *res = parsed_result;
 	if (!strcmp(res->show, "clear")) {
 		if (!strcmp(res->what, "stats"))
-			RTE_ETH_FOREACH_DEV(i)
+			RTE_ETH_FOREACH_DEV_OWNED_BY(i, my_owner.id)
 				nic_stats_clear(i);
 		else if (!strcmp(res->what, "xstats"))
-			RTE_ETH_FOREACH_DEV(i)
+			RTE_ETH_FOREACH_DEV_OWNED_BY(i, my_owner.id)
 				nic_xstats_clear(i);
 	} else if (!strcmp(res->what, "info"))
-		RTE_ETH_FOREACH_DEV(i)
+		RTE_ETH_FOREACH_DEV_OWNED_BY(i, my_owner.id)
 			port_infos_display(i);
 	else if (!strcmp(res->what, "stats"))
-		RTE_ETH_FOREACH_DEV(i)
+		RTE_ETH_FOREACH_DEV_OWNED_BY(i, my_owner.id)
 			nic_stats_display(i);
 	else if (!strcmp(res->what, "xstats"))
-		RTE_ETH_FOREACH_DEV(i)
+		RTE_ETH_FOREACH_DEV_OWNED_BY(i, my_owner.id)
 			nic_xstats_display(i);
 	else if (!strcmp(res->what, "fdir"))
-		RTE_ETH_FOREACH_DEV(i)
+		RTE_ETH_FOREACH_DEV_OWNED_BY(i, my_owner.id)
 			fdir_get_infos(i);
 	else if (!strcmp(res->what, "stat_qmap"))
-		RTE_ETH_FOREACH_DEV(i)
+		RTE_ETH_FOREACH_DEV_OWNED_BY(i, my_owner.id)
 			nic_stats_mapping_display(i);
 	else if (!strcmp(res->what, "dcb_tc"))
-		RTE_ETH_FOREACH_DEV(i)
+		RTE_ETH_FOREACH_DEV_OWNED_BY(i, my_owner.id)
 			port_dcb_info_display(i);
 	else if (!strcmp(res->what, "cap"))
-		RTE_ETH_FOREACH_DEV(i)
+		RTE_ETH_FOREACH_DEV_OWNED_BY(i, my_owner.id)
 			port_offload_cap_display(i);
 }
 
@@ -10698,10 +10697,8 @@  struct cmd_flow_director_mask_result {
 	struct rte_eth_fdir_masks *mask;
 	struct rte_port *port;
 
-	if (res->port_id > nb_ports) {
-		printf("Invalid port, range is [0, %d]\n", nb_ports - 1);
+	if (port_id_is_invalid(res->port_id, ENABLED_WARN))
 		return;
-	}
 
 	port = &ports[res->port_id];
 	/** Check if the port is not started **/
@@ -10899,10 +10896,8 @@  struct cmd_flow_director_flex_mask_result {
 	uint16_t i;
 	int ret;
 
-	if (res->port_id > nb_ports) {
-		printf("Invalid port, range is [0, %d]\n", nb_ports - 1);
+	if (port_id_is_invalid(res->port_id, ENABLED_WARN))
 		return;
-	}
 
 	port = &ports[res->port_id];
 	/** Check if the port is not started **/
@@ -11053,12 +11048,10 @@  struct cmd_flow_director_flexpayload_result {
 	struct cmd_flow_director_flexpayload_result *res = parsed_result;
 	struct rte_eth_flex_payload_cfg flex_cfg;
 	struct rte_port *port;
-	int ret = 0;
+	int ret;
 
-	if (res->port_id > nb_ports) {
-		printf("Invalid port, range is [0, %d]\n", nb_ports - 1);
+	if (port_id_is_invalid(res->port_id, ENABLED_WARN))
 		return;
-	}
 
 	port = &ports[res->port_id];
 	/** Check if the port is not started **/
@@ -11774,7 +11767,7 @@  struct cmd_config_l2_tunnel_eth_type_result {
 	entry.l2_tunnel_type = str2fdir_l2_tunnel_type(res->l2_tunnel_type);
 	entry.ether_type = res->eth_type_val;
 
-	RTE_ETH_FOREACH_DEV(pid) {
+	RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id) {
 		rte_eth_dev_l2_tunnel_eth_type_conf(pid, &entry);
 	}
 }
@@ -11890,7 +11883,7 @@  struct cmd_config_l2_tunnel_en_dis_result {
 	else
 		en = 0;
 
-	RTE_ETH_FOREACH_DEV(pid) {
+	RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id) {
 		rte_eth_dev_l2_tunnel_offload_set(pid,
 						  &entry,
 						  ETH_L2_TUNNEL_ENABLE_MASK,
@@ -14440,10 +14433,8 @@  struct cmd_ddp_add_result {
 	int file_num;
 	int ret = -ENOTSUP;
 
-	if (res->port_id > nb_ports) {
-		printf("Invalid port, range is [0, %d]\n", nb_ports - 1);
+	if (port_id_is_invalid(res->port_id, ENABLED_WARN))
 		return;
-	}
 
 	if (!all_ports_stopped()) {
 		printf("Please stop all ports first\n");
@@ -14522,10 +14513,8 @@  struct cmd_ddp_del_result {
 	uint32_t size;
 	int ret = -ENOTSUP;
 
-	if (res->port_id > nb_ports) {
-		printf("Invalid port, range is [0, %d]\n", nb_ports - 1);
+	if (port_id_is_invalid(res->port_id, ENABLED_WARN))
 		return;
-	}
 
 	if (!all_ports_stopped()) {
 		printf("Please stop all ports first\n");
@@ -14837,10 +14826,8 @@  struct cmd_ddp_get_list_result {
 #endif
 	int ret = -ENOTSUP;
 
-	if (res->port_id > nb_ports) {
-		printf("Invalid port, range is [0, %d]\n", nb_ports - 1);
+	if (port_id_is_invalid(res->port_id, ENABLED_WARN))
 		return;
-	}
 
 #ifdef RTE_LIBRTE_I40E_PMD
 	size = PROFILE_INFO_SIZE * MAX_PROFILE_NUM + 4;
@@ -16296,7 +16283,7 @@  struct cmd_cmdfile_result {
 	if (id == (portid_t)RTE_PORT_ALL) {
 		portid_t pid;
 
-		RTE_ETH_FOREACH_DEV(pid) {
+		RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id) {
 			/* check if need_reconfig has been set to 1 */
 			if (ports[pid].need_reconfig == 0)
 				ports[pid].need_reconfig = dev;
diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c
index 561e057..e55490f 100644
--- a/app/test-pmd/cmdline_flow.c
+++ b/app/test-pmd/cmdline_flow.c
@@ -2652,7 +2652,7 @@  static int comp_vc_action_rss_queue(struct context *, const struct token *,
 
 	(void)ctx;
 	(void)token;
-	RTE_ETH_FOREACH_DEV(p) {
+	RTE_ETH_FOREACH_DEV_OWNED_BY(p, my_owner.id) {
 		if (buf && i == ent)
 			return snprintf(buf, size, "%u", p);
 		++i;
diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
index 957b820..43b9a7d 100644
--- a/app/test-pmd/config.c
+++ b/app/test-pmd/config.c
@@ -156,7 +156,7 @@  struct rss_type_info {
 
 	if (port_id_is_invalid(port_id, ENABLED_WARN)) {
 		printf("Valid port range is [0");
-		RTE_ETH_FOREACH_DEV(pid)
+		RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id)
 			printf(", %d", pid);
 		printf("]\n");
 		return;
@@ -236,7 +236,7 @@  struct rss_type_info {
 
 	if (port_id_is_invalid(port_id, ENABLED_WARN)) {
 		printf("Valid port range is [0");
-		RTE_ETH_FOREACH_DEV(pid)
+		RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id)
 			printf(", %d", pid);
 		printf("]\n");
 		return;
@@ -253,10 +253,9 @@  struct rss_type_info {
 	struct rte_eth_xstat_name *xstats_names;
 
 	printf("###### NIC extended statistics for port %-2d\n", port_id);
-	if (!rte_eth_dev_is_valid_port(port_id)) {
-		printf("Error: Invalid port number %i\n", port_id);
+
+	if (port_id_is_invalid(port_id, ENABLED_WARN))
 		return;
-	}
 
 	/* Get count */
 	cnt_xstats = rte_eth_xstats_get_names(port_id, NULL, 0);
@@ -321,7 +320,7 @@  struct rss_type_info {
 
 	if (port_id_is_invalid(port_id, ENABLED_WARN)) {
 		printf("Valid port range is [0");
-		RTE_ETH_FOREACH_DEV(pid)
+		RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id)
 			printf(", %d", pid);
 		printf("]\n");
 		return;
@@ -439,7 +438,7 @@  struct rss_type_info {
 
 	if (port_id_is_invalid(port_id, ENABLED_WARN)) {
 		printf("Valid port range is [0");
-		RTE_ETH_FOREACH_DEV(pid)
+		RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id)
 			printf(", %d", pid);
 		printf("]\n");
 		return;
@@ -756,10 +755,15 @@  struct rss_type_info {
 int
 port_id_is_invalid(portid_t port_id, enum print_warning warning)
 {
+	struct rte_eth_dev_owner owner;
+	int ret;
+
 	if (port_id == (portid_t)RTE_PORT_ALL)
 		return 0;
 
-	if (rte_eth_dev_is_valid_port(port_id))
+	ret = rte_eth_dev_owner_get(port_id, &owner);
+
+	if (ret == 0 && owner.id == my_owner.id)
 		return 0;
 
 	if (warning == ENABLED_WARN)
@@ -2373,7 +2377,7 @@  struct igb_ring_desc_16_bytes {
 		return;
 	}
 	nb_pt = 0;
-	RTE_ETH_FOREACH_DEV(i) {
+	RTE_ETH_FOREACH_DEV_OWNED_BY(i, my_owner.id) {
 		if (! ((uint64_t)(1ULL << i) & portmask))
 			continue;
 		portlist[nb_pt++] = i;
@@ -2512,10 +2516,9 @@  struct igb_ring_desc_16_bytes {
 void
 setup_gro(const char *onoff, portid_t port_id)
 {
-	if (!rte_eth_dev_is_valid_port(port_id)) {
-		printf("invalid port id %u\n", port_id);
+	if (port_id_is_invalid(port_id, ENABLED_WARN))
 		return;
-	}
+
 	if (test_done == 0) {
 		printf("Before enable/disable GRO,"
 				" please stop forwarding first\n");
@@ -2574,10 +2577,9 @@  struct igb_ring_desc_16_bytes {
 
 	param = &gro_ports[port_id].param;
 
-	if (!rte_eth_dev_is_valid_port(port_id)) {
-		printf("Invalid port id %u.\n", port_id);
+	if (port_id_is_invalid(port_id, ENABLED_WARN))
 		return;
-	}
+
 	if (gro_ports[port_id].enable) {
 		printf("GRO type: TCP/IPv4\n");
 		if (gro_flush_cycles == GRO_DEFAULT_FLUSH_CYCLES) {
@@ -2595,10 +2597,9 @@  struct igb_ring_desc_16_bytes {
 void
 setup_gso(const char *mode, portid_t port_id)
 {
-	if (!rte_eth_dev_is_valid_port(port_id)) {
-		printf("invalid port id %u\n", port_id);
+	if (port_id_is_invalid(port_id, ENABLED_WARN))
 		return;
-	}
+
 	if (strcmp(mode, "on") == 0) {
 		if (test_done == 0) {
 			printf("before enabling GSO,"
diff --git a/app/test-pmd/parameters.c b/app/test-pmd/parameters.c
index 878c112..0e57b46 100644
--- a/app/test-pmd/parameters.c
+++ b/app/test-pmd/parameters.c
@@ -398,7 +398,7 @@ 
 		if (port_id_is_invalid(port_id, ENABLED_WARN) ||
 			port_id == (portid_t)RTE_PORT_ALL) {
 			printf("Valid port range is [0");
-			RTE_ETH_FOREACH_DEV(pid)
+			RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id)
 				printf(", %d", pid);
 			printf("]\n");
 			return -1;
@@ -459,7 +459,7 @@ 
 		if (port_id_is_invalid(port_id, ENABLED_WARN) ||
 			port_id == (portid_t)RTE_PORT_ALL) {
 			printf("Valid port range is [0");
-			RTE_ETH_FOREACH_DEV(pid)
+			RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id)
 				printf(", %d", pid);
 			printf("]\n");
 			return -1;
diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index c066cf9..83f5e84 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -108,6 +108,11 @@ 
 lcoreid_t nb_lcores;           /**< Number of probed logical cores. */
 
 /*
+ * My port owner structure used to own Ethernet ports.
+ */
+struct rte_eth_dev_owner my_owner; /**< Unique owner. */
+
+/*
  * Test Forwarding Configuration.
  *    nb_fwd_lcores <= nb_cfg_lcores <= nb_lcores
  *    nb_fwd_ports  <= nb_cfg_ports  <= nb_ports
@@ -449,7 +454,7 @@  static int eth_event_callback(portid_t port_id,
 	portid_t pt_id;
 	int i = 0;
 
-	RTE_ETH_FOREACH_DEV(pt_id)
+	RTE_ETH_FOREACH_DEV_OWNED_BY(pt_id, my_owner.id)
 		fwd_ports_ids[i++] = pt_id;
 
 	nb_cfg_ports = nb_ports;
@@ -573,7 +578,7 @@  static int eth_event_callback(portid_t port_id,
 		fwd_lcores[lc_id]->cpuid_idx = lc_id;
 	}
 
-	RTE_ETH_FOREACH_DEV(pid) {
+	RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id) {
 		port = &ports[pid];
 		/* Apply default Tx configuration for all ports */
 		port->dev_conf.txmode = tx_mode;
@@ -706,7 +711,7 @@  static int eth_event_callback(portid_t port_id,
 	queueid_t q;
 
 	/* set socket id according to numa or not */
-	RTE_ETH_FOREACH_DEV(pid) {
+	RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id) {
 		port = &ports[pid];
 		if (nb_rxq > port->dev_info.max_rx_queues) {
 			printf("Fail: nb_rxq(%d) is greater than "
@@ -1000,9 +1005,8 @@  static int eth_event_callback(portid_t port_id,
 	uint64_t tics_per_1sec;
 	uint64_t tics_datum;
 	uint64_t tics_current;
-	uint8_t idx_port, cnt_ports;
+	uint16_t idx_port;
 
-	cnt_ports = rte_eth_dev_count();
 	tics_datum = rte_rdtsc();
 	tics_per_1sec = rte_get_timer_hz();
 #endif
@@ -1017,11 +1021,10 @@  static int eth_event_callback(portid_t port_id,
 			tics_current = rte_rdtsc();
 			if (tics_current - tics_datum >= tics_per_1sec) {
 				/* Periodic bitrate calculation */
-				for (idx_port = 0;
-						idx_port < cnt_ports;
-						idx_port++)
+				RTE_ETH_FOREACH_DEV_OWNED_BY(idx_port,
+							     my_owner.id)
 					rte_stats_bitrate_calc(bitrate_data,
-						idx_port);
+							       idx_port);
 				tics_datum = tics_current;
 			}
 		}
@@ -1359,7 +1362,7 @@  static int eth_event_callback(portid_t port_id,
 	portid_t pi;
 	struct rte_port *port;
 
-	RTE_ETH_FOREACH_DEV(pi) {
+	RTE_ETH_FOREACH_DEV_OWNED_BY(pi, my_owner.id) {
 		port = &ports[pi];
 		/* Check if there is a port which is not started */
 		if ((port->port_status != RTE_PORT_STARTED) &&
@@ -1387,7 +1390,7 @@  static int eth_event_callback(portid_t port_id,
 {
 	portid_t pi;
 
-	RTE_ETH_FOREACH_DEV(pi) {
+	RTE_ETH_FOREACH_DEV_OWNED_BY(pi, my_owner.id) {
 		if (!port_is_stopped(pi))
 			return 0;
 	}
@@ -1434,7 +1437,7 @@  static int eth_event_callback(portid_t port_id,
 
 	if(dcb_config)
 		dcb_test = 1;
-	RTE_ETH_FOREACH_DEV(pi) {
+	RTE_ETH_FOREACH_DEV_OWNED_BY(pi, my_owner.id) {
 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
 			continue;
 
@@ -1620,7 +1623,7 @@  static int eth_event_callback(portid_t port_id,
 
 	printf("Stopping ports...\n");
 
-	RTE_ETH_FOREACH_DEV(pi) {
+	RTE_ETH_FOREACH_DEV_OWNED_BY(pi, my_owner.id) {
 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
 			continue;
 
@@ -1663,7 +1666,7 @@  static int eth_event_callback(portid_t port_id,
 
 	printf("Closing ports...\n");
 
-	RTE_ETH_FOREACH_DEV(pi) {
+	RTE_ETH_FOREACH_DEV_OWNED_BY(pi, my_owner.id) {
 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
 			continue;
 
@@ -1714,7 +1717,7 @@  static int eth_event_callback(portid_t port_id,
 
 	printf("Resetting ports...\n");
 
-	RTE_ETH_FOREACH_DEV(pi) {
+	RTE_ETH_FOREACH_DEV_OWNED_BY(pi, my_owner.id) {
 		if (pid != pi && pid != (portid_t)RTE_PORT_ALL)
 			continue;
 
@@ -1759,6 +1762,12 @@  static int eth_event_callback(portid_t port_id,
 	if (rte_eth_dev_attach(identifier, &pi))
 		return;
 
+	if (rte_eth_dev_owner_set(pi, &my_owner) != 0) {
+		printf("Error: cannot own new attached port %d\n", pi);
+		return;
+	}
+	nb_ports++;
+
 	socket_id = (unsigned)rte_eth_dev_socket_id(pi);
 	/* if socket_id is invalid, set to 0 */
 	if (check_socket_id(socket_id) < 0)
@@ -1766,8 +1775,6 @@  static int eth_event_callback(portid_t port_id,
 	reconfig(pi, socket_id);
 	rte_eth_promiscuous_enable(pi);
 
-	nb_ports = rte_eth_dev_count();
-
 	ports[pi].port_status = RTE_PORT_STOPPED;
 
 	printf("Port %d is attached. Now total ports is %d\n", pi, nb_ports);
@@ -1781,6 +1788,9 @@  static int eth_event_callback(portid_t port_id,
 
 	printf("Detaching a port...\n");
 
+	if (port_id_is_invalid(port_id, ENABLED_WARN))
+		return;
+
 	if (!port_is_closed(port_id)) {
 		printf("Please close port first\n");
 		return;
@@ -1794,7 +1804,7 @@  static int eth_event_callback(portid_t port_id,
 		return;
 	}
 
-	nb_ports = rte_eth_dev_count();
+	nb_ports--;
 
 	printf("Port '%s' is detached. Now total ports is %d\n",
 			name, nb_ports);
@@ -1812,7 +1822,7 @@  static int eth_event_callback(portid_t port_id,
 
 	if (ports != NULL) {
 		no_link_check = 1;
-		RTE_ETH_FOREACH_DEV(pt_id) {
+		RTE_ETH_FOREACH_DEV_OWNED_BY(pt_id, my_owner.id) {
 			printf("\nShutting down port %d...\n", pt_id);
 			fflush(stdout);
 			stop_port(pt_id);
@@ -1844,7 +1854,7 @@  struct pmd_test_command {
 	fflush(stdout);
 	for (count = 0; count <= MAX_CHECK_TIME; count++) {
 		all_ports_up = 1;
-		RTE_ETH_FOREACH_DEV(portid) {
+		RTE_ETH_FOREACH_DEV_OWNED_BY(portid, my_owner.id) {
 			if ((port_mask & (1 << portid)) == 0)
 				continue;
 			memset(&link, 0, sizeof(link));
@@ -1936,6 +1946,8 @@  struct pmd_test_command {
 
 	switch (type) {
 	case RTE_ETH_EVENT_INTR_RMV:
+		if (port_id_is_invalid(port_id, ENABLED_WARN))
+			break;
 		if (rte_eal_alarm_set(100000,
 				rmv_event_callback, (void *)(intptr_t)port_id))
 			fprintf(stderr, "Could not set up deferred device removal\n");
@@ -2068,7 +2080,7 @@  struct pmd_test_command {
 	portid_t pid;
 	struct rte_port *port;
 
-	RTE_ETH_FOREACH_DEV(pid) {
+	RTE_ETH_FOREACH_DEV_OWNED_BY(pid, my_owner.id) {
 		port = &ports[pid];
 		port->dev_conf.fdir_conf = fdir_conf;
 		if (nb_rxq > 1) {
@@ -2383,7 +2395,12 @@  uint8_t port_is_bonding_slave(portid_t slave_pid)
 	rte_pdump_init(NULL);
 #endif
 
-	nb_ports = (portid_t) rte_eth_dev_count();
+	if (rte_eth_dev_owner_new(&my_owner.id))
+		rte_panic("Failed to get unique owner identifier\n");
+	snprintf(my_owner.name, sizeof(my_owner.name), TESTPMD_OWNER_NAME);
+	RTE_ETH_FOREACH_DEV(port_id)
+		if (rte_eth_dev_owner_set(port_id, &my_owner) == 0)
+			nb_ports++;
 	if (nb_ports == 0)
 		TESTPMD_LOG(WARNING, "No probed ethernet devices\n");
 
@@ -2431,7 +2448,7 @@  uint8_t port_is_bonding_slave(portid_t slave_pid)
 		rte_exit(EXIT_FAILURE, "Start ports failed\n");
 
 	/* set all ports to promiscuous mode by default */
-	RTE_ETH_FOREACH_DEV(port_id)
+	RTE_ETH_FOREACH_DEV_OWNED_BY(port_id, my_owner.id)
 		rte_eth_promiscuous_enable(port_id);
 
 	/* Init metrics library */
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index 9c739e5..2d253b9 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -50,6 +50,8 @@ 
 #define NUMA_NO_CONFIG 0xFF
 #define UMA_NO_CONFIG  0xFF
 
+#define TESTPMD_OWNER_NAME "TestPMD"
+
 typedef uint8_t  lcoreid_t;
 typedef uint16_t portid_t;
 typedef uint16_t queueid_t;
@@ -361,6 +363,7 @@  struct queue_stats_mappings {
  * nb_fwd_ports <= nb_cfg_ports <= nb_ports
  */
 extern portid_t nb_ports; /**< Number of ethernet ports probed at init time. */
+extern struct rte_eth_dev_owner my_owner; /**< Unique owner. */
 extern portid_t nb_cfg_ports; /**< Number of configured ports. */
 extern portid_t nb_fwd_ports; /**< Number of forwarding ports. */
 extern portid_t fwd_ports_ids[RTE_MAX_ETHPORTS];