[1/3] regex/mlx5: fix memory region unregistration
Checks
Commit Message
The issue can cause illegal physical address access while a huge-page A
is released and huge-page B is allocated on the same virtual address.
The old MR can be matched using the virtual address of huge-page B but
the HW will access the physical address of huge-page A which is no more
part of the DPDK process.
Register a driver callback for memory event in order to free out all the
MRs of memory that is going to be freed from the dpdk process.
Fixes: cda883bbb655 ("regex/mlx5: add dynamic memory registration to datapath")
Cc: stable@dpdk.org
Signed-off-by: Michael Baum <michaelba@nvidia.com>
---
This series depends on this patch:
https://patchwork.dpdk.org/project/dpdk/patch/20210628150614.1769507-1-michaelba@nvidia.com/
Please don't apply it only before this patch is integrated.
drivers/regex/mlx5/mlx5_regex.c | 55 ++++++++++++++++++++++++
drivers/regex/mlx5/mlx5_regex.h | 2 +
drivers/regex/mlx5/mlx5_regex_fastpath.c | 39 +++++++++++++++--
3 files changed, 92 insertions(+), 4 deletions(-)
Comments
From: Michael Baum
> The issue can cause illegal physical address access while a huge-page A is
> released and huge-page B is allocated on the same virtual address.
> The old MR can be matched using the virtual address of huge-page B but the
> HW will access the physical address of huge-page A which is no more part of
> the DPDK process.
>
> Register a driver callback for memory event in order to free out all the MRs of
> memory that is going to be freed from the dpdk process.
>
> Fixes: cda883bbb655 ("regex/mlx5: add dynamic memory registration to
> datapath")
> Cc: stable@dpdk.org
>
> Signed-off-by: Michael Baum <michaelba@nvidia.com>
> ---
>
> This series depends on this patch:
> https://patchwork.dpdk.org/project/dpdk/patch/20210628150614.1769507-
> 1-michaelba@nvidia.com/
> Please don't apply it only before this patch is integrated.
>
> drivers/regex/mlx5/mlx5_regex.c | 55 ++++++++++++++++++++++++
> drivers/regex/mlx5/mlx5_regex.h | 2 +
> drivers/regex/mlx5/mlx5_regex_fastpath.c | 39 +++++++++++++++--
> 3 files changed, 92 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/regex/mlx5/mlx5_regex.c
> b/drivers/regex/mlx5/mlx5_regex.c index dcb2ced88e..0f12d94d7e 100644
> --- a/drivers/regex/mlx5/mlx5_regex.c
> +++ b/drivers/regex/mlx5/mlx5_regex.c
> @@ -11,6 +11,7 @@
> #include <rte_regexdev_driver.h>
>
> #include <mlx5_common_pci.h>
> +#include <mlx5_common_mr.h>
> #include <mlx5_common.h>
> #include <mlx5_glue.h>
> #include <mlx5_devx_cmds.h>
> @@ -24,6 +25,10 @@
>
> int mlx5_regex_logtype;
>
> +TAILQ_HEAD(regex_mem_event, mlx5_regex_priv) mlx5_mem_event_list
> =
> +
> TAILQ_HEAD_INITIALIZER(mlx5_mem_event_list);
> +static pthread_mutex_t mem_event_list_lock =
> PTHREAD_MUTEX_INITIALIZER;
> +
> const struct rte_regexdev_ops mlx5_regexdev_ops = {
> .dev_info_get = mlx5_regex_info_get,
> .dev_configure = mlx5_regex_configure, @@ -82,6 +87,40 @@
> mlx5_regex_get_name(char *name, struct rte_pci_device *pci_dev
> __rte_unused)
> pci_dev->addr.devid, pci_dev->addr.function); }
>
> +/**
> + * Callback for memory event.
> + *
> + * @param event_type
> + * Memory event type.
> + * @param addr
> + * Address of memory.
> + * @param len
> + * Size of memory.
> + */
> +static void
> +mlx5_regex_mr_mem_event_cb(enum rte_mem_event event_type,
> const void *addr,
> + size_t len, void *arg __rte_unused) {
> + struct mlx5_regex_priv *priv;
> +
> + /* Must be called from the primary process. */
> + MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
> + switch (event_type) {
> + case RTE_MEM_EVENT_FREE:
> + pthread_mutex_lock(&mem_event_list_lock);
> + /* Iterate all the existing mlx5 devices. */
> + TAILQ_FOREACH(priv, &mlx5_mem_event_list,
> mem_event_cb)
> + mlx5_free_mr_by_addr(&priv->mr_scache,
> + priv->ctx->device->name,
> + addr, len);
> + pthread_mutex_unlock(&mem_event_list_lock);
> + break;
> + case RTE_MEM_EVENT_ALLOC:
> + default:
> + break;
> + }
> +}
> +
> static int
> mlx5_regex_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
> struct rte_pci_device *pci_dev)
> @@ -193,6 +232,15 @@ mlx5_regex_pci_probe(struct rte_pci_driver
> *pci_drv __rte_unused,
> rte_errno = ENOMEM;
> goto error;
> }
> + /* Register callback function for global shared MR cache
> management. */
> + if (TAILQ_EMPTY(&mlx5_mem_event_list))
> +
> rte_mem_event_callback_register("MLX5_MEM_EVENT_CB",
> +
> mlx5_regex_mr_mem_event_cb,
> + NULL);
> + /* Add device to memory callback list. */
> + pthread_mutex_lock(&mem_event_list_lock);
> + TAILQ_INSERT_TAIL(&mlx5_mem_event_list, priv, mem_event_cb);
> + pthread_mutex_unlock(&mem_event_list_lock);
> DRV_LOG(INFO, "RegEx GGA is %s.",
> priv->has_umr ? "supported" : "unsupported");
> return 0;
> @@ -225,6 +273,13 @@ mlx5_regex_pci_remove(struct rte_pci_device
> *pci_dev)
> return 0;
> priv = dev->data->dev_private;
> if (priv) {
> + /* Remove from memory callback device list. */
> + pthread_mutex_lock(&mem_event_list_lock);
> + TAILQ_REMOVE(&mlx5_mem_event_list, priv,
> mem_event_cb);
> + pthread_mutex_unlock(&mem_event_list_lock);
> + if (TAILQ_EMPTY(&mlx5_mem_event_list))
> +
> rte_mem_event_callback_unregister("MLX5_MEM_EVENT_CB",
> + NULL);
> if (priv->pd)
> mlx5_glue->dealloc_pd(priv->pd);
> if (priv->uar)
> diff --git a/drivers/regex/mlx5/mlx5_regex.h
> b/drivers/regex/mlx5/mlx5_regex.h index 51a2101e53..61f59ba873 100644
> --- a/drivers/regex/mlx5/mlx5_regex.h
> +++ b/drivers/regex/mlx5/mlx5_regex.h
> @@ -70,6 +70,8 @@ struct mlx5_regex_priv {
> uint32_t nb_engines; /* Number of RegEx engines. */
> struct mlx5dv_devx_uar *uar; /* UAR object. */
> struct ibv_pd *pd;
> + TAILQ_ENTRY(mlx5_regex_priv) mem_event_cb;
> + /**< Called by memory event callback. */
> struct mlx5_mr_share_cache mr_scache; /* Global shared MR cache.
> */
> uint8_t is_bf2; /* The device is BF2 device. */
> uint8_t sq_ts_format; /* Whether SQ supports timestamp formats.
> */ diff --git a/drivers/regex/mlx5/mlx5_regex_fastpath.c
> b/drivers/regex/mlx5/mlx5_regex_fastpath.c
> index b57e7d7794..437009dcb6 100644
> --- a/drivers/regex/mlx5/mlx5_regex_fastpath.c
> +++ b/drivers/regex/mlx5/mlx5_regex_fastpath.c
> @@ -109,6 +109,40 @@ set_wqe_ctrl_seg(struct mlx5_wqe_ctrl_seg *seg,
> uint16_t pi, uint8_t opcode,
> seg->imm = imm;
> }
>
> +/**
> + * Query LKey from a packet buffer for QP. If not found, add the mempool.
> + *
> + * @param priv
> + * Pointer to the priv object.
> + * @param mr_ctrl
> + * Pointer to per-queue MR control structure.
> + * @param op
> + * Pointer to the RegEx operations object.
> + *
> + * @return
> + * Searched LKey on success, UINT32_MAX on no match.
> + */
> +static inline uint32_t
> +mlx5_regex_addr2mr(struct mlx5_regex_priv *priv, struct mlx5_mr_ctrl
> *mr_ctrl,
> + struct rte_regex_ops *op)
> +{
> + uintptr_t addr = rte_pktmbuf_mtod(op->mbuf, uintptr_t);
> + uint32_t lkey;
> +
> + /* Check generation bit to see if there's any change on existing MRs.
> */
> + if (unlikely(*mr_ctrl->dev_gen_ptr != mr_ctrl->cur_gen))
> + mlx5_mr_flush_local_cache(mr_ctrl);
Where is dev_gen_ptr initialized?
> + /* Linear search on MR cache array. */
> + lkey = mlx5_mr_lookup_lkey(mr_ctrl->cache, &mr_ctrl->mru,
> + MLX5_MR_CACHE_N, addr);
> + if (likely(lkey != UINT32_MAX))
> + return lkey;
> + /* Take slower bottom-half on miss. */
> + return mlx5_mr_addr2mr_bh(priv->pd, 0, &priv->mr_scache,
> mr_ctrl, addr,
> + !!(op->mbuf->ol_flags &
> EXT_ATTACHED_MBUF)); }
> +
> +
> static inline void
> __prep_one(struct mlx5_regex_priv *priv, struct mlx5_regex_sq *sq,
> struct rte_regex_ops *op, struct mlx5_regex_job *job, @@ -160,10
> +194,7 @@ prep_one(struct mlx5_regex_priv *priv, struct mlx5_regex_qp
> *qp,
> struct mlx5_klm klm;
>
> klm.byte_count = rte_pktmbuf_data_len(op->mbuf);
> - klm.mkey = mlx5_mr_addr2mr_bh(priv->pd, 0,
> - &priv->mr_scache, &qp->mr_ctrl,
> - rte_pktmbuf_mtod(op->mbuf, uintptr_t),
> - !!(op->mbuf->ol_flags &
> EXT_ATTACHED_MBUF));
> + klm.mkey = mlx5_regex_addr2mr(priv, &qp->mr_ctrl, op);
> klm.address = rte_pktmbuf_mtod(op->mbuf, uintptr_t);
> __prep_one(priv, sq, op, job, sq->pi, &klm);
> sq->db_pi = sq->pi;
> --
> 2.25.1
@@ -11,6 +11,7 @@
#include <rte_regexdev_driver.h>
#include <mlx5_common_pci.h>
+#include <mlx5_common_mr.h>
#include <mlx5_common.h>
#include <mlx5_glue.h>
#include <mlx5_devx_cmds.h>
@@ -24,6 +25,10 @@
int mlx5_regex_logtype;
+TAILQ_HEAD(regex_mem_event, mlx5_regex_priv) mlx5_mem_event_list =
+ TAILQ_HEAD_INITIALIZER(mlx5_mem_event_list);
+static pthread_mutex_t mem_event_list_lock = PTHREAD_MUTEX_INITIALIZER;
+
const struct rte_regexdev_ops mlx5_regexdev_ops = {
.dev_info_get = mlx5_regex_info_get,
.dev_configure = mlx5_regex_configure,
@@ -82,6 +87,40 @@ mlx5_regex_get_name(char *name, struct rte_pci_device *pci_dev __rte_unused)
pci_dev->addr.devid, pci_dev->addr.function);
}
+/**
+ * Callback for memory event.
+ *
+ * @param event_type
+ * Memory event type.
+ * @param addr
+ * Address of memory.
+ * @param len
+ * Size of memory.
+ */
+static void
+mlx5_regex_mr_mem_event_cb(enum rte_mem_event event_type, const void *addr,
+ size_t len, void *arg __rte_unused)
+{
+ struct mlx5_regex_priv *priv;
+
+ /* Must be called from the primary process. */
+ MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
+ switch (event_type) {
+ case RTE_MEM_EVENT_FREE:
+ pthread_mutex_lock(&mem_event_list_lock);
+ /* Iterate all the existing mlx5 devices. */
+ TAILQ_FOREACH(priv, &mlx5_mem_event_list, mem_event_cb)
+ mlx5_free_mr_by_addr(&priv->mr_scache,
+ priv->ctx->device->name,
+ addr, len);
+ pthread_mutex_unlock(&mem_event_list_lock);
+ break;
+ case RTE_MEM_EVENT_ALLOC:
+ default:
+ break;
+ }
+}
+
static int
mlx5_regex_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
struct rte_pci_device *pci_dev)
@@ -193,6 +232,15 @@ mlx5_regex_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
rte_errno = ENOMEM;
goto error;
}
+ /* Register callback function for global shared MR cache management. */
+ if (TAILQ_EMPTY(&mlx5_mem_event_list))
+ rte_mem_event_callback_register("MLX5_MEM_EVENT_CB",
+ mlx5_regex_mr_mem_event_cb,
+ NULL);
+ /* Add device to memory callback list. */
+ pthread_mutex_lock(&mem_event_list_lock);
+ TAILQ_INSERT_TAIL(&mlx5_mem_event_list, priv, mem_event_cb);
+ pthread_mutex_unlock(&mem_event_list_lock);
DRV_LOG(INFO, "RegEx GGA is %s.",
priv->has_umr ? "supported" : "unsupported");
return 0;
@@ -225,6 +273,13 @@ mlx5_regex_pci_remove(struct rte_pci_device *pci_dev)
return 0;
priv = dev->data->dev_private;
if (priv) {
+ /* Remove from memory callback device list. */
+ pthread_mutex_lock(&mem_event_list_lock);
+ TAILQ_REMOVE(&mlx5_mem_event_list, priv, mem_event_cb);
+ pthread_mutex_unlock(&mem_event_list_lock);
+ if (TAILQ_EMPTY(&mlx5_mem_event_list))
+ rte_mem_event_callback_unregister("MLX5_MEM_EVENT_CB",
+ NULL);
if (priv->pd)
mlx5_glue->dealloc_pd(priv->pd);
if (priv->uar)
@@ -70,6 +70,8 @@ struct mlx5_regex_priv {
uint32_t nb_engines; /* Number of RegEx engines. */
struct mlx5dv_devx_uar *uar; /* UAR object. */
struct ibv_pd *pd;
+ TAILQ_ENTRY(mlx5_regex_priv) mem_event_cb;
+ /**< Called by memory event callback. */
struct mlx5_mr_share_cache mr_scache; /* Global shared MR cache. */
uint8_t is_bf2; /* The device is BF2 device. */
uint8_t sq_ts_format; /* Whether SQ supports timestamp formats. */
@@ -109,6 +109,40 @@ set_wqe_ctrl_seg(struct mlx5_wqe_ctrl_seg *seg, uint16_t pi, uint8_t opcode,
seg->imm = imm;
}
+/**
+ * Query LKey from a packet buffer for QP. If not found, add the mempool.
+ *
+ * @param priv
+ * Pointer to the priv object.
+ * @param mr_ctrl
+ * Pointer to per-queue MR control structure.
+ * @param op
+ * Pointer to the RegEx operations object.
+ *
+ * @return
+ * Searched LKey on success, UINT32_MAX on no match.
+ */
+static inline uint32_t
+mlx5_regex_addr2mr(struct mlx5_regex_priv *priv, struct mlx5_mr_ctrl *mr_ctrl,
+ struct rte_regex_ops *op)
+{
+ uintptr_t addr = rte_pktmbuf_mtod(op->mbuf, uintptr_t);
+ uint32_t lkey;
+
+ /* Check generation bit to see if there's any change on existing MRs. */
+ if (unlikely(*mr_ctrl->dev_gen_ptr != mr_ctrl->cur_gen))
+ mlx5_mr_flush_local_cache(mr_ctrl);
+ /* Linear search on MR cache array. */
+ lkey = mlx5_mr_lookup_lkey(mr_ctrl->cache, &mr_ctrl->mru,
+ MLX5_MR_CACHE_N, addr);
+ if (likely(lkey != UINT32_MAX))
+ return lkey;
+ /* Take slower bottom-half on miss. */
+ return mlx5_mr_addr2mr_bh(priv->pd, 0, &priv->mr_scache, mr_ctrl, addr,
+ !!(op->mbuf->ol_flags & EXT_ATTACHED_MBUF));
+}
+
+
static inline void
__prep_one(struct mlx5_regex_priv *priv, struct mlx5_regex_sq *sq,
struct rte_regex_ops *op, struct mlx5_regex_job *job,
@@ -160,10 +194,7 @@ prep_one(struct mlx5_regex_priv *priv, struct mlx5_regex_qp *qp,
struct mlx5_klm klm;
klm.byte_count = rte_pktmbuf_data_len(op->mbuf);
- klm.mkey = mlx5_mr_addr2mr_bh(priv->pd, 0,
- &priv->mr_scache, &qp->mr_ctrl,
- rte_pktmbuf_mtod(op->mbuf, uintptr_t),
- !!(op->mbuf->ol_flags & EXT_ATTACHED_MBUF));
+ klm.mkey = mlx5_regex_addr2mr(priv, &qp->mr_ctrl, op);
klm.address = rte_pktmbuf_mtod(op->mbuf, uintptr_t);
__prep_one(priv, sq, op, job, sq->pi, &klm);
sq->db_pi = sq->pi;