[dpdk-dev] [PATCH 1/8] bond: dynamic rss configuration

Tomasz Kulasek tomaszx.kulasek at intel.com
Wed Jun 3 12:59:00 CEST 2015


Bonding device implements independent management of RSS settings. It stores its
own copies of settings i.e. RETA, RSS hash function and RSS key. It’s required
to ensure consistency.

1) RSS hash function set for bonding device is maximal set of RSS hash functions
supported by all bonded devices. That mean, to have RSS support for bonding, all
slaves should be RSS-capable.

2) RSS key is propagated over the slaves "as is", and is always 40 bytes long.

3) RETA for bonding is an internal table managed by bonding API, and is used as
a pattern to set up slaves. Its size is GCD of all RETA sizes, so it can be
easily used as a pattern providing expected behavior, even if slaves RETA sizes
are different.

Signed-off-by: Tomasz Kulasek <tomaszx.kulasek at intel.com>
---
 drivers/net/bonding/rte_eth_bond_api.c     |   22 ++++
 drivers/net/bonding/rte_eth_bond_pmd.c     |  192 ++++++++++++++++++++++++++--
 drivers/net/bonding/rte_eth_bond_private.h |   11 ++
 3 files changed, 211 insertions(+), 14 deletions(-)

diff --git a/drivers/net/bonding/rte_eth_bond_api.c b/drivers/net/bonding/rte_eth_bond_api.c
index e91a623..a31b098 100644
--- a/drivers/net/bonding/rte_eth_bond_api.c
+++ b/drivers/net/bonding/rte_eth_bond_api.c
@@ -301,6 +301,7 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id)
 	internals->active_slave_count = 0;
 	internals->rx_offload_capa = 0;
 	internals->tx_offload_capa = 0;
+	internals->flow_type_rss_offloads = 0;
 
 	memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
 	memset(internals->slaves, 0, sizeof(internals->slaves));
@@ -366,6 +367,11 @@ __eth_bond_slave_add_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
 	memset(&dev_info, 0, sizeof(dev_info));
 	rte_eth_dev_info_get(slave_port_id, &dev_info);
 
+	/* We need to store slaves reta_size to be able to synchronize RETA for all
+	 * slave devices even if its sizes are different.
+	 */
+	internals->slaves[internals->slave_count].reta_size = dev_info.reta_size;
+
 	if (internals->slave_count < 1) {
 		/* if MAC is not user defined then use MAC of first slave add to
 		 * bonded device */
@@ -379,9 +385,16 @@ __eth_bond_slave_add_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
 		/* Make primary slave */
 		internals->primary_port = slave_port_id;
 
+		/* Inherit queues settings from first slave */
+		internals->nb_rx_queues = slave_eth_dev->data->nb_rx_queues;
+		internals->nb_tx_queues = slave_eth_dev->data->nb_tx_queues;
+
+		internals->reta_size = dev_info.reta_size;
+
 		/* Take the first dev's offload capabilities */
 		internals->rx_offload_capa = dev_info.rx_offload_capa;
 		internals->tx_offload_capa = dev_info.tx_offload_capa;
+		internals->flow_type_rss_offloads = dev_info.flow_type_rss_offloads;
 
 	} else {
 		/* Check slave link properties are supported if props are set,
@@ -400,6 +413,13 @@ __eth_bond_slave_add_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
 		}
 		internals->rx_offload_capa &= dev_info.rx_offload_capa;
 		internals->tx_offload_capa &= dev_info.tx_offload_capa;
+		internals->flow_type_rss_offloads &= dev_info.flow_type_rss_offloads;
+
+		/* RETA size is GCD of all slaves RETA sizes,
+		 * so, if all sizes will be the power of 2, the lower one is GCD */
+		if (internals->reta_size > dev_info.reta_size)
+			internals->reta_size = dev_info.reta_size;
+
 	}
 
 	internals->slave_count++;
@@ -528,6 +548,8 @@ __eth_bond_slave_remove_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id)
 	if (internals->slave_count == 0) {
 		internals->rx_offload_capa = 0;
 		internals->tx_offload_capa = 0;
+		internals->flow_type_rss_offloads = 0;
+		internals->reta_size = 0;
 	}
 	return 0;
 }
diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c
index c937e6b..4f6fc68 100644
--- a/drivers/net/bonding/rte_eth_bond_pmd.c
+++ b/drivers/net/bonding/rte_eth_bond_pmd.c
@@ -1306,6 +1306,15 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev,
 	if (slave_eth_dev->driver->pci_drv.drv_flags & RTE_PCI_DRV_INTR_LSC)
 		slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
 
+	/* If RSS is enabled for bonding, try to enable it for slaves  */
+	if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
+		memcpy(
+			&(slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf),
+			&(bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf),
+			sizeof(bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf));
+		slave_eth_dev->data->dev_conf.rxmode.mq_mode |= ETH_MQ_RX_RSS;
+	}
+
 	/* Configure device */
 	errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
 			bonded_eth_dev->data->nb_rx_queues,
@@ -1349,6 +1358,29 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev,
 		}
 	}
 
+	/* If RSS is enabled for bonding, synchronize RETA */
+	if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
+		int i;
+		struct bond_dev_private *internals;
+		internals = bonded_eth_dev->data->dev_private;
+
+		for (i = 0; i < internals->slave_count; i++) {
+			if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
+				errval = rte_eth_dev_rss_reta_update(
+						slave_eth_dev->data->port_id,
+						&(internals->reta_conf[0]),
+						internals->slaves[i].reta_size);
+				if (errval != 0) {
+					RTE_BOND_LOG(ERR,
+							"rte_eth_dev_rss_reta_update: port=%d, err (%d)",
+							slave_eth_dev->data->port_id, errval);
+					return -1;
+				}
+				break;
+			}
+		}
+	}
+
 	/* Start device */
 	errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
 	if (errval != 0) {
@@ -1569,6 +1601,9 @@ bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 
 	dev_info->rx_offload_capa = internals->rx_offload_capa;
 	dev_info->tx_offload_capa = internals->tx_offload_capa;
+	dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
+
+	dev_info->reta_size = internals->reta_size;
 }
 
 static int
@@ -1954,21 +1989,126 @@ bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type,
 	}
 }
 
+static int
+bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
+			  struct rte_eth_rss_reta_entry64 *reta_conf,
+			  uint16_t reta_size)
+{
+	int i, j, result = 0;
+	int slave_reta_size;
+	int reta_count;
+	struct bond_dev_private *internals = dev->data->dev_private;
+
+	if (reta_size != internals->reta_size)
+		return -EINVAL;
+
+	 /* Copy RETA table */
+	reta_count = reta_size / RTE_RETA_GROUP_SIZE;
+
+	for (i = 0; i < reta_count; i++) {
+		internals->reta_conf[i].mask = reta_conf[i].mask;
+		for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
+			if ((reta_conf[i].mask >> j) & 0x01)
+				internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
+	}
+
+	/* Fill rest of array (512 - max slaves RETA size) */
+	for (; i < 512 / RTE_RETA_GROUP_SIZE; i += reta_count)
+		memcpy(&(internals->reta_conf[i]), &(internals->reta_conf[0]),
+			sizeof(internals->reta_conf[0]) * reta_count);
+
+	/* Propagate RETA over slaves */
+	for (i = 0; i < internals->slave_count; i++) {
+		slave_reta_size = internals->slaves[i].reta_size;
+		result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
+			&(internals->reta_conf[0]), slave_reta_size);
+		if (result < 0)
+			return result;
+	}
+
+	return 0;
+}
+
+static int
+bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
+		struct rte_eth_rss_reta_entry64 *reta_conf,
+		uint16_t reta_size)
+{
+	int i, j;
+	struct bond_dev_private *internals = dev->data->dev_private;
+
+	if (reta_size != internals->reta_size)
+		return -EINVAL;
+
+	 /* Copy RETA table */
+	for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
+		for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
+			if ((reta_conf[i].mask >> j) & 0x01)
+				reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
+
+	return 0;
+}
+
+static int
+bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
+			  struct rte_eth_rss_conf *rss_conf)
+{
+	int i, result = 0;
+	struct bond_dev_private *internals = dev->data->dev_private;
+	struct rte_eth_rss_conf bond_rss_conf;
+
+	memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
+
+	bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
+
+	if (bond_rss_conf.rss_hf != 0)
+		dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
+
+	if (bond_rss_conf.rss_key != NULL)
+		memcpy(internals->rss_key, bond_rss_conf.rss_key, 40);
+
+	for (i = 0; i < internals->slave_count; i++) {
+		result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
+				&bond_rss_conf);
+		if (result < 0)
+			return result;
+	}
+
+	return 0;
+}
+
+static int
+bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
+		struct rte_eth_rss_conf *rss_conf)
+{
+	struct bond_dev_private *internals = dev->data->dev_private;
+
+	rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
+	if (rss_conf->rss_key != NULL)
+		memcpy(rss_conf->rss_key, internals->rss_key, 40);
+
+	return 0;
+}
+
 struct eth_dev_ops default_dev_ops = {
-		.dev_start = bond_ethdev_start,
-		.dev_stop = bond_ethdev_stop,
-		.dev_close = bond_ethdev_close,
-		.dev_configure = bond_ethdev_configure,
-		.dev_infos_get = bond_ethdev_info,
-		.rx_queue_setup = bond_ethdev_rx_queue_setup,
-		.tx_queue_setup = bond_ethdev_tx_queue_setup,
-		.rx_queue_release = bond_ethdev_rx_queue_release,
-		.tx_queue_release = bond_ethdev_tx_queue_release,
-		.link_update = bond_ethdev_link_update,
-		.stats_get = bond_ethdev_stats_get,
-		.stats_reset = bond_ethdev_stats_reset,
-		.promiscuous_enable = bond_ethdev_promiscuous_enable,
-		.promiscuous_disable = bond_ethdev_promiscuous_disable
+		.dev_start            = bond_ethdev_start,
+		.dev_stop             = bond_ethdev_stop,
+		.dev_close            = bond_ethdev_close,
+		.dev_configure        = bond_ethdev_configure,
+		.dev_infos_get        = bond_ethdev_info,
+		.rx_queue_setup       = bond_ethdev_rx_queue_setup,
+		.tx_queue_setup       = bond_ethdev_tx_queue_setup,
+		.rx_queue_release     = bond_ethdev_rx_queue_release,
+		.tx_queue_release     = bond_ethdev_tx_queue_release,
+		.link_update          = bond_ethdev_link_update,
+		.stats_get            = bond_ethdev_stats_get,
+		.stats_reset          = bond_ethdev_stats_reset,
+		.promiscuous_enable   = bond_ethdev_promiscuous_enable,
+		.promiscuous_disable  = bond_ethdev_promiscuous_disable,
+		.reta_update          = bond_ethdev_rss_reta_update,
+		.reta_query           = bond_ethdev_rss_reta_query,
+		.rss_hash_update      = bond_ethdev_rss_hash_update,
+		.rss_hash_conf_get    = bond_ethdev_rss_hash_conf_get
 };
 
 static int
@@ -2049,6 +2189,30 @@ bond_ethdev_configure(struct rte_eth_dev *dev)
 	int arg_count;
 	uint8_t port_id = dev - rte_eth_devices;
 
+	static const uint8_t default_rss_key[40] = {
+		0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
+		0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
+		0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
+		0xBE, 0xAC, 0x01, 0xFA
+	};
+
+	int i, j;
+
+	/* If RSS is enabled, fill table and key with default values */
+	if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
+
+		dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
+		dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 40;
+		memcpy(internals->rss_key, default_rss_key, 40);
+
+		for (i = 0; i < 512 / RTE_RETA_GROUP_SIZE; i++) {
+			internals->reta_conf[i].mask = ~0LL;
+			for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
+				internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
+		}
+
+	}
+
 	/*
 	 * if no kvlist, it means that this bonded device has been created
 	 * through the bonding api.
diff --git a/drivers/net/bonding/rte_eth_bond_private.h b/drivers/net/bonding/rte_eth_bond_private.h
index 45e5c65..26f4b28 100644
--- a/drivers/net/bonding/rte_eth_bond_private.h
+++ b/drivers/net/bonding/rte_eth_bond_private.h
@@ -103,6 +103,8 @@ struct bond_slave_details {
 	uint8_t last_link_status;
 	/**< Port Id of slave eth_dev */
 	struct ether_addr persisted_mac_addr;
+
+	uint16_t reta_size;
 };
 
 
@@ -155,6 +157,15 @@ struct bond_dev_private {
 	uint32_t rx_offload_capa;            /** Rx offload capability */
 	uint32_t tx_offload_capa;            /** Tx offload capability */
 
+	/** Bit mask of RSS offloads, the bit offset also means flow type */
+	uint64_t flow_type_rss_offloads;
+
+	uint16_t reta_size;
+	struct rte_eth_rss_reta_entry64 reta_conf[512 / RTE_RETA_GROUP_SIZE];
+
+	uint8_t rss_key[40];				/**< 40-byte hash key. */
+	uint8_t rss_key_len;				/**< hash key length in bytes. */
+
 	struct rte_kvargs *kvlist;
 	uint8_t slave_update_idx;
 };
-- 
1.7.9.5



More information about the dev mailing list