net/cxgbe: fix races while executing rte_flow operations

Message ID 0f6ca29b43ee22150098ff83ea11bcff37927869.1570198875.git.rahul.lakkireddy@chelsio.com (mailing list archive)
State Accepted, archived
Delegated to: Ferruh Yigit
Headers
Series net/cxgbe: fix races while executing rte_flow operations |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/iol-compilation success Compile Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/Intel-compilation success Compilation OK
ci/iol-mellanox-Performance success Performance Testing PASS

Commit Message

Rahul Lakkireddy Oct. 4, 2019, 2:24 p.m. UTC
  When rules are being inserted from multiple cores, there are several
race conditions during rte_flow operations.

For example, when inserting rules from 2 cores simultaneously, both
the cores try to fetch a free available filter entry and they both
end up fetching the same entry. Both of them start overwriting the
same filter entry before sending to firmware, which results in wrong
rule being inserted to hardware.

Fix the races by adding spinlock to serialize the rte_flow operations.

Fixes: ee61f5113b17 ("net/cxgbe: parse and validate flows")
Fixes: 9eb2c9a48072 ("net/cxgbe: implement flow create operation")
Fixes: da23bc9d33f4 ("net/cxgbe: implement flow destroy operation")
Fixes: 8d3c12e19368 ("net/cxgbe: implement flow query operation")
Fixes: 86910379d335 ("net/cxgbe: implement flow flush operation")
Cc: stable@dpdk.org

Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
---
 drivers/net/cxgbe/base/adapter.h |  2 +
 drivers/net/cxgbe/cxgbe_flow.c   | 68 ++++++++++++++++++++++----------
 drivers/net/cxgbe/cxgbe_main.c   |  2 +
 3 files changed, 51 insertions(+), 21 deletions(-)
  

Comments

Ferruh Yigit Oct. 8, 2019, 9:36 a.m. UTC | #1
On 10/4/2019 3:24 PM, Rahul Lakkireddy wrote:
> When rules are being inserted from multiple cores, there are several
> race conditions during rte_flow operations.
> 
> For example, when inserting rules from 2 cores simultaneously, both
> the cores try to fetch a free available filter entry and they both
> end up fetching the same entry. Both of them start overwriting the
> same filter entry before sending to firmware, which results in wrong
> rule being inserted to hardware.
> 
> Fix the races by adding spinlock to serialize the rte_flow operations.
> 
> Fixes: ee61f5113b17 ("net/cxgbe: parse and validate flows")
> Fixes: 9eb2c9a48072 ("net/cxgbe: implement flow create operation")
> Fixes: da23bc9d33f4 ("net/cxgbe: implement flow destroy operation")
> Fixes: 8d3c12e19368 ("net/cxgbe: implement flow query operation")
> Fixes: 86910379d335 ("net/cxgbe: implement flow flush operation")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>

Applied to dpdk-next-net/master, thanks.
  

Patch

diff --git a/drivers/net/cxgbe/base/adapter.h b/drivers/net/cxgbe/base/adapter.h
index 6758364c7..db654ad9c 100644
--- a/drivers/net/cxgbe/base/adapter.h
+++ b/drivers/net/cxgbe/base/adapter.h
@@ -328,6 +328,8 @@  struct adapter {
 	int use_unpacked_mode; /* unpacked rx mode state */
 	rte_spinlock_t win0_lock;
 
+	rte_spinlock_t flow_lock; /* Serialize access for rte_flow ops */
+
 	unsigned int clipt_start; /* CLIP table start */
 	unsigned int clipt_end;   /* CLIP table end */
 	unsigned int l2t_start;   /* Layer 2 table start */
diff --git a/drivers/net/cxgbe/cxgbe_flow.c b/drivers/net/cxgbe/cxgbe_flow.c
index 9ee8353ae..9070f4960 100644
--- a/drivers/net/cxgbe/cxgbe_flow.c
+++ b/drivers/net/cxgbe/cxgbe_flow.c
@@ -1014,6 +1014,7 @@  cxgbe_flow_create(struct rte_eth_dev *dev,
 		  const struct rte_flow_action action[],
 		  struct rte_flow_error *e)
 {
+	struct adapter *adap = ethdev2adap(dev);
 	struct rte_flow *flow;
 	int ret;
 
@@ -1034,8 +1035,10 @@  cxgbe_flow_create(struct rte_eth_dev *dev,
 		return NULL;
 	}
 
+	t4_os_lock(&adap->flow_lock);
 	/* go, interact with cxgbe_filter */
 	ret = __cxgbe_flow_create(dev, flow);
+	t4_os_unlock(&adap->flow_lock);
 	if (ret) {
 		rte_flow_error_set(e, ret, RTE_FLOW_ERROR_TYPE_HANDLE,
 				   NULL, "Unable to create flow rule");
@@ -1100,9 +1103,12 @@  static int
 cxgbe_flow_destroy(struct rte_eth_dev *dev, struct rte_flow *flow,
 		   struct rte_flow_error *e)
 {
+	struct adapter *adap = ethdev2adap(dev);
 	int ret;
 
+	t4_os_lock(&adap->flow_lock);
 	ret = __cxgbe_flow_destroy(dev, flow);
+	t4_os_unlock(&adap->flow_lock);
 	if (ret)
 		return rte_flow_error_set(e, ret, RTE_FLOW_ERROR_TYPE_HANDLE,
 					  flow, "error destroying filter.");
@@ -1159,11 +1165,14 @@  cxgbe_flow_query(struct rte_eth_dev *dev, struct rte_flow *flow,
 					  " enabled during filter creation");
 
 	c = (struct rte_flow_query_count *)data;
+
+	t4_os_lock(&adap->flow_lock);
 	ret = __cxgbe_flow_query(flow, &c->hits, &c->bytes);
-	if (ret)
-		return rte_flow_error_set(e, -ret, RTE_FLOW_ERROR_TYPE_ACTION,
-					  f, "cxgbe pmd failed to"
-					  " perform query");
+	if (ret) {
+		rte_flow_error_set(e, -ret, RTE_FLOW_ERROR_TYPE_ACTION,
+				   f, "cxgbe pmd failed to perform query");
+		goto out;
+	}
 
 	/* Query was successful */
 	c->bytes_set = 1;
@@ -1171,7 +1180,9 @@  cxgbe_flow_query(struct rte_eth_dev *dev, struct rte_flow *flow,
 	if (c->reset)
 		cxgbe_clear_filter_count(adap, flow->fidx, f->fs.cap, true);
 
-	return 0; /* success / partial_success */
+out:
+	t4_os_unlock(&adap->flow_lock);
+	return ret;
 }
 
 static int
@@ -1184,7 +1195,7 @@  cxgbe_flow_validate(struct rte_eth_dev *dev,
 	struct adapter *adap = ethdev2adap(dev);
 	struct rte_flow *flow;
 	unsigned int fidx;
-	int ret;
+	int ret = 0;
 
 	flow = t4_os_alloc(sizeof(struct rte_flow));
 	if (!flow)
@@ -1208,20 +1219,23 @@  cxgbe_flow_validate(struct rte_eth_dev *dev,
 				"validation failed. Check f/w config file.");
 	}
 
+	t4_os_lock(&adap->flow_lock);
 	if (cxgbe_get_fidx(flow, &fidx)) {
-		t4_os_free(flow);
-		return rte_flow_error_set(e, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
-					  NULL, "no memory in tcam.");
+		ret = rte_flow_error_set(e, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
+					 NULL, "no memory in tcam.");
+		goto out;
 	}
 
 	if (cxgbe_verify_fidx(flow, fidx, 0)) {
-		t4_os_free(flow);
-		return rte_flow_error_set(e, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
-					  NULL, "validation failed");
+		ret = rte_flow_error_set(e, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE,
+					 NULL, "validation failed");
+		goto out;
 	}
 
+out:
+	t4_os_unlock(&adap->flow_lock);
 	t4_os_free(flow);
-	return 0;
+	return ret;
 }
 
 /*
@@ -1230,14 +1244,12 @@  cxgbe_flow_validate(struct rte_eth_dev *dev,
  *        == 1 filter not active / not found
  */
 static int
-cxgbe_check_n_destroy(struct filter_entry *f, struct rte_eth_dev *dev,
-		      struct rte_flow_error *e)
+cxgbe_check_n_destroy(struct filter_entry *f, struct rte_eth_dev *dev)
 {
 	if (f && (f->valid || f->pending) &&
 	    f->dev == dev && /* Only if user has asked for this port */
 	     f->private) /* We (rte_flow) created this filter */
-		return cxgbe_flow_destroy(dev, (struct rte_flow *)f->private,
-					  e);
+		return __cxgbe_flow_destroy(dev, (struct rte_flow *)f->private);
 	return 1;
 }
 
@@ -1247,13 +1259,20 @@  static int cxgbe_flow_flush(struct rte_eth_dev *dev, struct rte_flow_error *e)
 	unsigned int i;
 	int ret = 0;
 
+	t4_os_lock(&adap->flow_lock);
 	if (adap->tids.ftid_tab) {
 		struct filter_entry *f = &adap->tids.ftid_tab[0];
 
 		for (i = 0; i < adap->tids.nftids; i++, f++) {
-			ret = cxgbe_check_n_destroy(f, dev, e);
-			if (ret < 0)
+			ret = cxgbe_check_n_destroy(f, dev);
+			if (ret < 0) {
+				rte_flow_error_set(e, ret,
+						   RTE_FLOW_ERROR_TYPE_HANDLE,
+						   f->private,
+						   "error destroying TCAM "
+						   "filter.");
 				goto out;
+			}
 		}
 	}
 
@@ -1263,13 +1282,20 @@  static int cxgbe_flow_flush(struct rte_eth_dev *dev, struct rte_flow_error *e)
 		for (i = adap->tids.hash_base; i <= adap->tids.ntids; i++) {
 			f = (struct filter_entry *)adap->tids.tid_tab[i];
 
-			ret = cxgbe_check_n_destroy(f, dev, e);
-			if (ret < 0)
+			ret = cxgbe_check_n_destroy(f, dev);
+			if (ret < 0) {
+				rte_flow_error_set(e, ret,
+						   RTE_FLOW_ERROR_TYPE_HANDLE,
+						   f->private,
+						   "error destroying HASH "
+						   "filter.");
 				goto out;
+			}
 		}
 	}
 
 out:
+	t4_os_unlock(&adap->flow_lock);
 	return ret >= 0 ? 0 : ret;
 }
 
diff --git a/drivers/net/cxgbe/cxgbe_main.c b/drivers/net/cxgbe/cxgbe_main.c
index f6967a3e4..cc5f4df3a 100644
--- a/drivers/net/cxgbe/cxgbe_main.c
+++ b/drivers/net/cxgbe/cxgbe_main.c
@@ -1906,6 +1906,8 @@  int cxgbe_probe(struct adapter *adapter)
 			 "filter support disabled. Continuing\n");
 	}
 
+	t4_os_lock_init(&adapter->flow_lock);
+
 	adapter->mpstcam = t4_init_mpstcam(adapter);
 	if (!adapter->mpstcam)
 		dev_warn(adapter, "could not allocate mps tcam table."